克隆策略

    {"description":"实验创建于2017/8/26","graph":{"edges":[{"to_node_id":"-322:features","from_node_id":"-331:data"},{"to_node_id":"-322:instruments","from_node_id":"-312:data"},{"to_node_id":"-1136:input_data","from_node_id":"-322:data"},{"to_node_id":"-932:input_functions","from_node_id":"-1111:functions"},{"to_node_id":"-1136:features","from_node_id":"-2149:data"},{"to_node_id":"-1111:input_functions","from_node_id":"-2016:functions"},{"to_node_id":"-322:user_functions","from_node_id":"-932:functions"},{"to_node_id":"-2016:input_functions","from_node_id":"-584:functions"}],"nodes":[{"node_id":"-331","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"_c = close\n_o = open\n_v = volume\n_amt = amount\n_num = num_trades\n_ret = ret_sim(_c, _o)\n\n# 高频偏度\n_RVar = RVar(_ret)\nRSkew = RSkew(_ret, _RVar)\n\n# 下行波动占比\n_down_vol = np.sqrt(np.power(_ret*where(_ret<0, 1, 0), 2).sum())\ndown_vol_ratio = where(_RVar==0, 1, _down_vol / (_RVar))\n\n# 改进反转因子\ninverse_plus = _c.iloc[-1] / _c.loc[100000] - 1\n\n# 尾盘成交量占比\nend_volume_ratio = _v.loc[143000:150000].sum() / _v.sum()\n\n# 量价相关性\n_Volume = _v / (_v.sum())\n_Price = _c\n_corr = _Volume.corr(_Price, method='pearson')\ncorr_PV = where(np.isnan(_corr), 0, _corr)\n\n# 单笔流出金额占比\n_n1 = (_num*where(_ret<0, 1, 0)).sum()\n_FlowOutRatio_1 = where(_n1==0, 1, (_amt*where(_ret<0, 1, 0)).sum() / _n1)\n_FOR_1 = where(np.isnan(_FlowOutRatio_1), 0, _FlowOutRatio_1)\n_FlowOutRatio_2 = _amt.sum() / _num.sum()\nFlowOutRatio_indiv = _FOR_1 / _FlowOutRatio_2\n\n# 大单推动涨幅\nbig_order_ret = np.prod(1 + _ret*where(_amt >= _amt.sort_values(ascending=False).iloc[np.int(240*0.3-1)], 1, 0))\n\n# 成交委托相关性\n# ask_diff_bid_volume = (bid_volume1.diff().fillna(0)).sub(ask_volume1.diff().fillna(0))\n\n# 收盘前成交委托相关性\n# ret_late = ret.loc[142600:145700]\n# ask_diff_bid_volume_late = (bid_volume1.loc[142600:145700].diff().fillna(0)).sub(ask_volume1.loc[142600:145700].diff().fillna(0))\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"-331"}],"output_ports":[{"name":"data","node_id":"-331"}],"cacheable":true,"seq_num":1,"comment":"","comment_collapsed":true},{"node_id":"-312","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2014-03-03","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2021-12-31","type":"Literal","bound_global_parameter":null},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"000001.SZA\n000002.SZA\n000005.SZA","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":0,"type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"-312"}],"output_ports":[{"name":"data","node_id":"-312"}],"cacheable":true,"seq_num":2,"comment":"","comment_collapsed":true},{"node_id":"-322","module_id":"BigQuantSpace.feature_extractor_1m.feature_extractor_1m-v1","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":"20","type":"Literal","bound_global_parameter":null},{"name":"workers","value":2,"type":"Literal","bound_global_parameter":null},{"name":"parallel_mode","value":"测试","type":"Literal","bound_global_parameter":null},{"name":"table_1m","value":"level2_bar1m_CN_STOCK_A","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-322"},{"name":"features","node_id":"-322"},{"name":"user_functions","node_id":"-322"}],"output_ports":[{"name":"data","node_id":"-322"}],"cacheable":true,"seq_num":3,"comment":"","comment_collapsed":true},{"node_id":"-1111","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RSkew","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret, RVar):\n if RVar == 0:\n result = 0\n else:\n result = (np.power(ret, 3).sum()) * np.sqrt(240) / np.power(RVar, 1.5)\n return result\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-1111"}],"output_ports":[{"name":"functions","node_id":"-1111"}],"cacheable":false,"seq_num":6,"comment":"","comment_collapsed":true},{"node_id":"-1136","module_id":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","parameters":[{"name":"date_col","value":"date","type":"Literal","bound_global_parameter":null},{"name":"instrument_col","value":"instrument","type":"Literal","bound_global_parameter":null},{"name":"drop_na","value":"False","type":"Literal","bound_global_parameter":null},{"name":"remove_extra_columns","value":"False","type":"Literal","bound_global_parameter":null},{"name":"user_functions","value":"def nanmean(df, x_name, N):\n return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).mean())\n\ndef nansum(df, x_name, N):\n return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).sum())\n\ndef nanprod(df, x_name, N):\n return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).apply(np.prod, raw=True))\n\nbigquant_run = {\n 'nanmean': nanmean,\n 'nansum': nansum,\n 'nanprod': nanprod\n}","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-1136"},{"name":"features","node_id":"-1136"}],"output_ports":[{"name":"data","node_id":"-1136"}],"cacheable":true,"seq_num":7,"comment":"","comment_collapsed":true},{"node_id":"-2149","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"RSkew = nanmean(RSkew, 20)\ndown_vol_ratio = nanmean(down_vol_ratio, 20)\ninverse_plus = nanmean(inverse_plus, 20)\nend_volume_ratio = nanmean(end_volume_ratio, 20)\ncorr_PV = nanmean(corr_PV, 20)\nFlowOutRatio_indiv = nanmean(FlowOutRatio_indiv, 20)\nbig_order_ret = nanprod(big_order_ret, 20) - 1","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"-2149"}],"output_ports":[{"name":"data","node_id":"-2149"}],"cacheable":true,"seq_num":8,"comment":"","comment_collapsed":true},{"node_id":"-2016","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RKurt","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret, RVar):\n if RVar == 0:\n result = 1\n else:\n result = (np.power(ret, 4).sum()) * (240) / np.power(RVar, 2)\n \n return result\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-2016"}],"output_ports":[{"name":"functions","node_id":"-2016"}],"cacheable":false,"seq_num":4,"comment":"","comment_collapsed":true},{"node_id":"-932","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RVar","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret):\n return np.power(ret, 2).sum()\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-932"}],"output_ports":[{"name":"functions","node_id":"-932"}],"cacheable":false,"seq_num":5,"comment":"","comment_collapsed":true},{"node_id":"-584","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"ret_sim","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, close, op):\n res = close.pct_change()\n res.iloc[0] = close.iloc[0] / op.iloc[0] - 1\n return res\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-584"}],"output_ports":[{"name":"functions","node_id":"-584"}],"cacheable":false,"seq_num":9,"comment":"","comment_collapsed":true}],"node_layout":"<node_postions><node_position Node='-331' Position='-112,-236,200,200'/><node_position Node='-312' Position='-439.43504333496094,-239,200,200'/><node_position Node='-322' Position='-119,-19,200,200'/><node_position Node='-1111' Position='198,-296,200,200'/><node_position Node='-1136' Position='20,110,200,200'/><node_position Node='-2149' Position='245,-20,200,200'/><node_position Node='-2016' Position='197,-356,200,200'/><node_position Node='-932' Position='199,-235,200,200'/><node_position Node='-584' Position='201,-449,200,200'/></node_postions>"},"nodes_readonly":false,"studio_version":"v2"}
    In [2]:
    # 本代码由可视化策略环境自动生成 2021年7月9日14:59
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    def m9_func_bigquant_run(df, close, op):
        res = close.pct_change()
        res.iloc[0] = close.iloc[0] / op.iloc[0] - 1
        return res
    
    def m4_func_bigquant_run(df, ret, RVar):
        if RVar == 0:
            result = 1
        else:
            result = (np.power(ret, 4).sum()) * (240) / np.power(RVar, 2)
        
        return result
    
    def m6_func_bigquant_run(df, ret, RVar):
        if RVar == 0:
            result = 0
        else:
            result = (np.power(ret, 3).sum()) * np.sqrt(240) / np.power(RVar, 1.5)
        return result
    
    def m5_func_bigquant_run(df, ret):
        return np.power(ret, 2).sum()
    
    def nanmean(df, x_name, N):
        return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).mean())
    
    def nansum(df, x_name, N):
        return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).sum())
    
    def nanprod(df, x_name, N):
        return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).apply(np.prod, raw=True))
    
    m7_user_functions_bigquant_run = {
        'nanmean': nanmean,
        'nansum': nansum,
        'nanprod': nanprod
    }
    
    m1 = M.input_features.v1(
        features="""_c = close
    _o = open
    _v = volume
    _amt = amount
    _num = num_trades
    _ret = ret_sim(_c, _o)
    
    # 高频偏度
    _RVar = RVar(_ret)
    RSkew = RSkew(_ret, _RVar)
    
    # 下行波动占比
    _down_vol = np.sqrt(np.power(_ret*where(_ret<0, 1, 0), 2).sum())
    down_vol_ratio = where(_RVar==0, 1, _down_vol / (_RVar))
    
    # 改进反转因子
    inverse_plus = _c.iloc[-1] / _c.loc[100000] - 1
    
    # 尾盘成交量占比
    end_volume_ratio = _v.loc[143000:150000].sum() / _v.sum()
    
    # 量价相关性
    _Volume = _v / (_v.sum())
    _Price = _c
    _corr = _Volume.corr(_Price, method='pearson')
    corr_PV = where(np.isnan(_corr), 0, _corr)
    
    # 单笔流出金额占比
    _n1 = (_num*where(_ret<0, 1, 0)).sum()
    _FlowOutRatio_1 = where(_n1==0, 1, (_amt*where(_ret<0, 1, 0)).sum() / _n1)
    _FOR_1 = where(np.isnan(_FlowOutRatio_1), 0, _FlowOutRatio_1)
    _FlowOutRatio_2 = _amt.sum() / _num.sum()
    FlowOutRatio_indiv = _FOR_1 / _FlowOutRatio_2
    
    # 大单推动涨幅
    big_order_ret = np.prod(1 + _ret*where(_amt >= _amt.sort_values(ascending=False).iloc[np.int(240*0.3-1)], 1, 0))
    
    # 成交委托相关性
    # ask_diff_bid_volume = (bid_volume1.diff().fillna(0)).sub(ask_volume1.diff().fillna(0))
    
    # 收盘前成交委托相关性
    # ret_late = ret.loc[142600:145700]
    # ask_diff_bid_volume_late = (bid_volume1.loc[142600:145700].diff().fillna(0)).sub(ask_volume1.loc[142600:145700].diff().fillna(0))
    """
    )
    
    m2 = M.instruments.v2(
        start_date='2014-03-03',
        end_date='2021-12-31',
        market='CN_STOCK_A',
        instrument_list="""000001.SZA
    000002.SZA
    000005.SZA""",
        max_count=0
    )
    
    m8 = M.input_features.v1(
        features="""RSkew = nanmean(RSkew, 20)
    down_vol_ratio = nanmean(down_vol_ratio, 20)
    inverse_plus = nanmean(inverse_plus, 20)
    end_volume_ratio = nanmean(end_volume_ratio, 20)
    corr_PV = nanmean(corr_PV, 20)
    FlowOutRatio_indiv = nanmean(FlowOutRatio_indiv, 20)
    big_order_ret = nanprod(big_order_ret, 20) - 1"""
    )
    
    m9 = M.feature_extractor_user_function.v1(
        name='ret_sim',
        func=m9_func_bigquant_run
    )
    
    m4 = M.feature_extractor_user_function.v1(
        input_functions=m9.functions,
        name='RKurt',
        func=m4_func_bigquant_run
    )
    
    m6 = M.feature_extractor_user_function.v1(
        input_functions=m4.functions,
        name='RSkew',
        func=m6_func_bigquant_run
    )
    
    m5 = M.feature_extractor_user_function.v1(
        input_functions=m6.functions,
        name='RVar',
        func=m5_func_bigquant_run
    )
    
    m3 = M.feature_extractor_1m.v1(
        instruments=m2.data,
        features=m1.data,
        user_functions=m5.functions,
        start_date='',
        end_date='',
        before_start_days=20,
        workers=2,
        parallel_mode='测试',
        table_1m='level2_bar1m_CN_STOCK_A'
    )
    
    m7 = M.derived_feature_extractor.v3(
        input_data=m3.data,
        features=m8.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions=m7_user_functions_bigquant_run
    )
    
    In [3]:
    dt = m7.data.read()
    dt.shape, dt.date.min(), dt.date.max()
    
    Out[3]:
    ((729, 9), Timestamp('2020-01-02 00:00:00'), Timestamp('2020-12-31 00:00:00'))
    In [4]:
    dt.head().T
    
    Out[4]:
    0 1 2 3 4
    date 2020-01-02 00:00:00 2020-01-02 00:00:00 2020-01-02 00:00:00 2020-01-03 00:00:00 2020-01-03 00:00:00
    instrument 000001.SZA 000002.SZA 000005.SZA 000001.SZA 000002.SZA
    RSkew 1.047927 0.069402 0.052861 1.076504 0.085965
    down_vol_ratio 30.456535 29.380865 21.09432 33.837667 36.822297
    inverse_plus 0.010785 -0.023688 0.003195 0.008026 -0.012312
    end_volume_ratio 0.057526 0.107093 0.142403 0.074372 0.120171
    corr_PV -0.300752 0.336809 -0.068722 -0.140373 0.3731
    FlowOutRatio_indiv 1.08746 1.000544 1.174714 1.016684 0.947484
    big_order_ret 0.013191 0.00238 0.000071 0.039573 -0.026463
    In [5]:
    dt.isna().sum()
    
    Out[5]:
    date                  0
    instrument            0
    RSkew                 0
    down_vol_ratio        0
    inverse_plus          0
    end_volume_ratio      0
    corr_PV               0
    FlowOutRatio_indiv    0
    big_order_ret         0
    dtype: int64
    In [6]:
    dt.describe().T
    
    Out[6]:
    count mean std min 25% 50% 75% max
    RSkew 729.0 0.185820 0.237419 -0.492867 0.037204 0.182241 0.331799 1.076504
    down_vol_ratio 729.0 32.392754 11.335961 16.546408 20.024995 32.660923 41.569017 60.251793
    inverse_plus 729.0 0.000600 0.002914 -0.023688 -0.001394 0.000423 0.002379 0.010785
    end_volume_ratio 729.0 0.136746 0.020717 0.057526 0.122185 0.133771 0.146432 0.213053
    corr_PV 729.0 0.062076 0.062016 -0.300752 0.023917 0.059457 0.100945 0.373100
    FlowOutRatio_indiv 729.0 0.989114 0.026160 0.925070 0.974353 0.986347 1.000033 1.174714
    big_order_ret 729.0 0.007761 0.126244 -0.368740 -0.050243 -0.000282 0.057164 0.502803
    In [7]:
    dt2 = dt.set_index(['date','instrument'])
    dt2.head()
    
    Out[7]:
    RSkew down_vol_ratio inverse_plus end_volume_ratio corr_PV FlowOutRatio_indiv big_order_ret
    date instrument
    2020-01-02 000001.SZA 1.047927 30.456535 0.010785 0.057526 -0.300752 1.087460 0.013191
    000002.SZA 0.069402 29.380865 -0.023688 0.107093 0.336809 1.000544 0.002380
    000005.SZA 0.052861 21.094320 0.003195 0.142403 -0.068722 1.174714 0.000071
    2020-01-03 000001.SZA 1.076504 33.837667 0.008026 0.074372 -0.140373 1.016684 0.039573
    000002.SZA 0.085965 36.822297 -0.012312 0.120171 0.373100 0.947484 -0.026463
    In [8]:
    import seaborn as sns
    import matplotlib.pyplot as plt
    
    In [9]:
    # %%time
    n=4#行数
    m=dt2.shape[1]
    x=m%n
    fig,axes=plt.subplots(n,int((m - x + (n if x else x))/n),figsize=(20, 10))
    axes=axes.reshape((-1,))
    for i in range(dt2.shape[1]):    
        sns.distplot(dt2[dt2.columns[i]],ax=axes[i])
    plt.subplots_adjust(wspace =0.3, hspace =0.3)
    
    In [10]:
    # for i in dt2.columns[:]:
    #     dt2[i].to_hdf(f'XX券商-XXXX研报-{i}.h5', 'fac', mode='w')
    
    In [ ]:
     
    
    In [ ]:
     
    
    In [ ]:
     
    
    In [ ]:
     
    
    In [ ]: