克隆策略

    {"description":"实验创建于2017/8/26","graph":{"edges":[{"to_node_id":"-322:features","from_node_id":"-331:data"},{"to_node_id":"-322:instruments","from_node_id":"-312:data"},{"to_node_id":"-1136:input_data","from_node_id":"-322:data"},{"to_node_id":"-932:input_functions","from_node_id":"-1111:functions"},{"to_node_id":"-1136:features","from_node_id":"-2149:data"},{"to_node_id":"-1111:input_functions","from_node_id":"-2016:functions"},{"to_node_id":"-322:user_functions","from_node_id":"-932:functions"},{"to_node_id":"-2016:input_functions","from_node_id":"-584:functions"}],"nodes":[{"node_id":"-331","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"_v = volume\n_amt = amount\n_num_trd = num_trades\n_c = close\n_h = high\n_l = low\n_o = open\n_tbv = total_bid_volume\n_ret = ret_sim(_c, _o)\n\n# 高频偏度\n_RVar = RVar(_ret)\nRSkew = RSkew(_ret, _RVar)\n\n# 平均单笔成交金额\nAmtPerTrd = where(_num_trd.sum()==0, 1, _amt.sum() / _num_trd.sum())\n\n# 平均单笔流入金额\nAmtPerTrd_InFlow = where((_num_trd*where(_ret>0,1,0)).sum()==0, 1, (_amt*where(_ret>0,1,0)).sum() / (_num_trd*where(_ret>0,1,0)).sum())\n\n# 平均单笔流出金额\nAmtPerTrd_OutFlow = where((_num_trd*where(_ret<0,1,0)).sum()==0, 1, (_amt*where(_ret<0,1,0)).sum() / (_num_trd*where(_ret<0,1,0)).sum())\n\n# 平均单笔流入金额占比\nAmtPerTrd_InFlow_ratio = where(AmtPerTrd==0, 1, AmtPerTrd_InFlow / AmtPerTrd)\n\n# 平均单笔流出金额占比\nAmtPerTrd_OutFlow_ratio = where(AmtPerTrd_OutFlow==0, 1, AmtPerTrd_InFlow / AmtPerTrd_OutFlow)\n\n# 大单资金净流入金额\t\n_AmtPerTrd_avg = _amt.div(_num_trd)\n_anb_1 = where(_AmtPerTrd_avg >= _AmtPerTrd_avg.sort_values(ascending=False).iloc[np.int(240*0.1-1)], _amt*where(_ret>0,1,0), 0).sum()\n_anb_2 = where(_AmtPerTrd_avg >= _AmtPerTrd_avg.sort_values(ascending=False).iloc[np.int(240*0.1-1)], _amt*where(_ret<0,1,0), 0).sum()\nApt_NetinFlow_bigorder = _anb_1 - _anb_2\n\n# 大单资金净流入率\nApt_NetinFlow_bigorder_ratio = where(_amt.sum()==0, 1, Apt_NetinFlow_bigorder / (_amt.sum()))\n\n# 大单驱动涨幅(前10%)\nmom_bigorder = np.prod(1 + _ret*where(_amt >= _amt.sort_values(ascending=False).iloc[np.int(240*0.1-1)], 1, 0))\n# 大单驱动涨幅(前20%)\nmom_bigorder = np.prod(1 + _ret*where(_amt >= _amt.sort_values(ascending=False).iloc[np.int(240*0.2-1)], 1, 0))\n# 大单驱动涨幅(前30%)\nmom_bigorder = np.prod(1 + _ret*where(_amt >= _amt.sort_values(ascending=False).iloc[np.int(240*0.3-1)], 1, 0))\n\n# 趋势强度\n_diff_close = _c.diff().fillna(0).iloc[1:].abs().sum()\ntrend_strength = where(_diff_close==0, 1, _c.iloc[-1] - _c.iloc[0] / _diff_close)\n\n# 日内时点涨幅\ntime_incre_1 = _c.loc[93500] / _c.loc[93100]\ntime_incre_2 = _c.loc[150000] / _c.loc[145100]\ntime_incre_3 = _c.loc[140000] / _c.loc[133100]\n\n# N个日内累计时点涨幅\n# cum_time_incre = np.prod([time_incre_1, time_incre_2, time_incre_3]) - 1\n\n# 平均净委买变化率均值\n# avg_bid_volume_delta = (_tbv.loc[93100:145700]).pct_change().fillna(0).mean()\n\n# 平均净委买变化率波动率\n# avg_bid_volume_std = (_tbv.loc[93100:145700]).pct_change().std()\n\n# 平均净委买变化率偏度\n# avg_bid_volume_skewness = (_tbv.loc[93100:145700]).pct_change().skew()\n\n# 均价偏差(二次抽取)\nvolu = _v.sum()\nvwap = where(volu==0, 1, ((_c.add(_h).add(_l).add(_o)) / 4).mul(_v).sum() / (volu))\nvolu_vwap = volu * vwap\n\n# 时间加权平均的相对价格位置(二次抽取)\nhigh1 = _h.max()\nlow1 = _l.min()\ntwap = ((_c.add(_h).add(_l).add(_o)) / 4).mean()\n\n# 聪明钱因子\n_s = (_ret.abs().div(np.sqrt(_v))).sort_values(ascending=False)\n_v1 = _v[_s.index]\n_c1 = _c[_s.index]\n_sm = ((_v1.cumsum() / _v.sum()) >= 0.2)\nsmart_money = ((_v1[_sm]*_c1[_sm]).sum() / (_v1[_sm].sum())) / ((_v*_c).sum() / (_v.sum()))\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"-331"}],"output_ports":[{"name":"data","node_id":"-331"}],"cacheable":true,"seq_num":1,"comment":"","comment_collapsed":true},{"node_id":"-312","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2014-03-03","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2021-12-31","type":"Literal","bound_global_parameter":null},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"000001.SZA\n000002.SZA\n000005.SZA","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":0,"type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"-312"}],"output_ports":[{"name":"data","node_id":"-312"}],"cacheable":true,"seq_num":2,"comment":"","comment_collapsed":true},{"node_id":"-322","module_id":"BigQuantSpace.feature_extractor_1m.feature_extractor_1m-v1","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":"20","type":"Literal","bound_global_parameter":null},{"name":"workers","value":2,"type":"Literal","bound_global_parameter":null},{"name":"parallel_mode","value":"测试","type":"Literal","bound_global_parameter":null},{"name":"table_1m","value":"level2_bar1m_CN_STOCK_A","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-322"},{"name":"features","node_id":"-322"},{"name":"user_functions","node_id":"-322"}],"output_ports":[{"name":"data","node_id":"-322"}],"cacheable":true,"seq_num":3,"comment":"","comment_collapsed":true},{"node_id":"-1111","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RSkew","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret, RVar):\n if RVar == 0:\n result = 0\n else:\n result = (np.power(ret, 3).sum()) * np.sqrt(240) / np.power(RVar, 1.5)\n return result\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-1111"}],"output_ports":[{"name":"functions","node_id":"-1111"}],"cacheable":false,"seq_num":6,"comment":"","comment_collapsed":true},{"node_id":"-1136","module_id":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","parameters":[{"name":"date_col","value":"date","type":"Literal","bound_global_parameter":null},{"name":"instrument_col","value":"instrument","type":"Literal","bound_global_parameter":null},{"name":"drop_na","value":"False","type":"Literal","bound_global_parameter":null},{"name":"remove_extra_columns","value":"False","type":"Literal","bound_global_parameter":null},{"name":"user_functions","value":"def nanmean(df, x_name, N):\n return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).mean())\n\ndef nansum(df, x_name, N):\n return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).sum())\n\ndef nanhigh(df, x_name, N):\n return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).max())\n\ndef nanlow(df, x_name, N):\n return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).min())\n\ndef nanprod(df, x_name, N):\n return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).apply(np.prod))\n\nbigquant_run = {\n 'nanmean': nanmean,\n 'nansum': nansum,\n 'nanhigh': nanhigh,\n 'nanlow': nanlow,\n 'nanprod': nanprod\n}","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-1136"},{"name":"features","node_id":"-1136"}],"output_ports":[{"name":"data","node_id":"-1136"}],"cacheable":true,"seq_num":7,"comment":"","comment_collapsed":true},{"node_id":"-2149","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"APB = np.log(nanmean(vwap, 5) / (nansum(volu_vwap, 5) / nansum(volu, 5)))\n\nARPP = (twap - nanhigh(high1, 5)) / (nanhigh(high1, 5) - nanlow(low1, 5))\n\n# 过去10个日内累计时点涨幅(%)\ntime_zf1 = nanprod(time_incre_1, 10) - 1 \n\n# 过去10个日内累计时点涨幅(%)\ntime_zf2 = nanprod(time_incre_2, 10) - 1 \n\n# 过去10个日内累计时点涨幅(%)\ntime_zf3 = nanprod(time_incre_3, 10) - 1\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"-2149"}],"output_ports":[{"name":"data","node_id":"-2149"}],"cacheable":true,"seq_num":8,"comment":"","comment_collapsed":true},{"node_id":"-2016","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RKurt","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret, RVar):\n if RVar == 0:\n result = 1\n else:\n result = (np.power(ret, 4).sum()) * (240) / np.power(RVar, 2)\n \n return result\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-2016"}],"output_ports":[{"name":"functions","node_id":"-2016"}],"cacheable":false,"seq_num":4,"comment":"","comment_collapsed":true},{"node_id":"-932","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RVar","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret):\n return np.power(ret, 2).sum()\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-932"}],"output_ports":[{"name":"functions","node_id":"-932"}],"cacheable":false,"seq_num":5,"comment":"","comment_collapsed":true},{"node_id":"-584","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"ret_sim","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, close, op):\n res = close.pct_change()\n res.iloc[0] = close.iloc[0] / op.iloc[0] - 1\n return res\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-584"}],"output_ports":[{"name":"functions","node_id":"-584"}],"cacheable":false,"seq_num":9,"comment":"","comment_collapsed":true}],"node_layout":"<node_postions><node_position Node='-331' Position='-120.8932580947876,-240.43328857421875,200,200'/><node_position Node='-312' Position='-439,-237,200,200'/><node_position Node='-322' Position='-119,-16,200,200'/><node_position Node='-1111' Position='198,-296,200,200'/><node_position Node='-1136' Position='21,114,200,200'/><node_position Node='-2149' Position='250,-19,200,200'/><node_position Node='-2016' Position='197,-357,200,200'/><node_position Node='-932' Position='197,-235,200,200'/><node_position Node='-584' Position='195,-446,200,200'/></node_postions>"},"nodes_readonly":false,"studio_version":"v2"}
    In [84]:
    # 本代码由可视化策略环境自动生成 2021年7月9日15:29
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    def m9_func_bigquant_run(df, close, op):
        res = close.pct_change()
        res.iloc[0] = close.iloc[0] / op.iloc[0] - 1
        return res
    
    def m4_func_bigquant_run(df, ret, RVar):
        if RVar == 0:
            result = 1
        else:
            result = (np.power(ret, 4).sum()) * (240) / np.power(RVar, 2)
        
        return result
    
    def m6_func_bigquant_run(df, ret, RVar):
        if RVar == 0:
            result = 0
        else:
            result = (np.power(ret, 3).sum()) * np.sqrt(240) / np.power(RVar, 1.5)
        return result
    
    def m5_func_bigquant_run(df, ret):
        return np.power(ret, 2).sum()
    
    def nanmean(df, x_name, N):
        return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).mean())
    
    def nansum(df, x_name, N):
        return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).sum())
    
    def nanhigh(df, x_name, N):
        return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).max())
    
    def nanlow(df, x_name, N):
        return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).min())
    
    def nanprod(df, x_name, N):
        return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).apply(np.prod))
    
    m7_user_functions_bigquant_run = {
        'nanmean': nanmean,
        'nansum': nansum,
        'nanhigh': nanhigh,
        'nanlow': nanlow,
        'nanprod': nanprod
    }
    
    m1 = M.input_features.v1(
        features="""_v = volume
    _amt = amount
    _num_trd = num_trades
    _c = close
    _h = high
    _l = low
    _o = open
    _tbv = total_bid_volume
    _ret = ret_sim(_c, _o)
    
    # 高频偏度
    _RVar = RVar(_ret)
    RSkew = RSkew(_ret, _RVar)
    
    # 平均单笔成交金额
    AmtPerTrd = where(_num_trd.sum()==0, 1, _amt.sum() / _num_trd.sum())
    
    # 平均单笔流入金额
    AmtPerTrd_InFlow = where((_num_trd*where(_ret>0,1,0)).sum()==0, 1, (_amt*where(_ret>0,1,0)).sum() / (_num_trd*where(_ret>0,1,0)).sum())
    
    # 平均单笔流出金额
    AmtPerTrd_OutFlow = where((_num_trd*where(_ret<0,1,0)).sum()==0, 1, (_amt*where(_ret<0,1,0)).sum() / (_num_trd*where(_ret<0,1,0)).sum())
    
    # 平均单笔流入金额占比
    AmtPerTrd_InFlow_ratio = where(AmtPerTrd==0, 1, AmtPerTrd_InFlow / AmtPerTrd)
    
    # 平均单笔流出金额占比
    AmtPerTrd_OutFlow_ratio = where(AmtPerTrd_OutFlow==0, 1, AmtPerTrd_InFlow / AmtPerTrd_OutFlow)
    
    # 大单资金净流入金额	
    _AmtPerTrd_avg = _amt.div(_num_trd)
    _anb_1 = where(_AmtPerTrd_avg >= _AmtPerTrd_avg.sort_values(ascending=False).iloc[np.int(240*0.1-1)], _amt*where(_ret>0,1,0), 0).sum()
    _anb_2 = where(_AmtPerTrd_avg >= _AmtPerTrd_avg.sort_values(ascending=False).iloc[np.int(240*0.1-1)], _amt*where(_ret<0,1,0), 0).sum()
    Apt_NetinFlow_bigorder = _anb_1 - _anb_2
    
    # 大单资金净流入率
    Apt_NetinFlow_bigorder_ratio = where(_amt.sum()==0, 1, Apt_NetinFlow_bigorder / (_amt.sum()))
    
    # 大单驱动涨幅(前10%)
    mom_bigorder = np.prod(1 + _ret*where(_amt >= _amt.sort_values(ascending=False).iloc[np.int(240*0.1-1)], 1, 0))
    # 大单驱动涨幅(前20%)
    mom_bigorder = np.prod(1 + _ret*where(_amt >= _amt.sort_values(ascending=False).iloc[np.int(240*0.2-1)], 1, 0))
    # 大单驱动涨幅(前30%)
    mom_bigorder = np.prod(1 + _ret*where(_amt >= _amt.sort_values(ascending=False).iloc[np.int(240*0.3-1)], 1, 0))
    
    # 趋势强度
    _diff_close = _c.diff().fillna(0).iloc[1:].abs().sum()
    trend_strength = where(_diff_close==0, 1, _c.iloc[-1] - _c.iloc[0] / _diff_close)
    
    # 日内时点涨幅
    time_incre_1 = _c.loc[93500] / _c.loc[93100]
    time_incre_2 = _c.loc[150000] / _c.loc[145100]
    time_incre_3 = _c.loc[140000] / _c.loc[133100]
    
    # N个日内累计时点涨幅
    # cum_time_incre = np.prod([time_incre_1, time_incre_2, time_incre_3]) - 1
    
    # 平均净委买变化率均值
    # avg_bid_volume_delta = (_tbv.loc[93100:145700]).pct_change().fillna(0).mean()
    
    # 平均净委买变化率波动率
    # avg_bid_volume_std = (_tbv.loc[93100:145700]).pct_change().std()
    
    # 平均净委买变化率偏度
    # avg_bid_volume_skewness = (_tbv.loc[93100:145700]).pct_change().skew()
    
    # 均价偏差(二次抽取)
    volu = _v.sum()
    vwap = where(volu==0, 1, ((_c.add(_h).add(_l).add(_o)) / 4).mul(_v).sum() / (volu))
    volu_vwap = volu * vwap
    
    # 时间加权平均的相对价格位置(二次抽取)
    high1 = _h.max()
    low1 = _l.min()
    twap = ((_c.add(_h).add(_l).add(_o)) / 4).mean()
    
    # 聪明钱因子
    _s = (_ret.abs().div(np.sqrt(_v))).sort_values(ascending=False)
    _v1 = _v[_s.index]
    _c1 = _c[_s.index]
    _sm = ((_v1.cumsum() / _v.sum()) >= 0.2)
    smart_money = ((_v1[_sm]*_c1[_sm]).sum() / (_v1[_sm].sum())) / ((_v*_c).sum() / (_v.sum()))
    """
    )
    
    m2 = M.instruments.v2(
        start_date='2014-03-03',
        end_date='2021-12-31',
        market='CN_STOCK_A',
        instrument_list="""000001.SZA
    000002.SZA
    000005.SZA""",
        max_count=0
    )
    
    m8 = M.input_features.v1(
        features="""APB = np.log(nanmean(vwap, 5) / (nansum(volu_vwap, 5) / nansum(volu, 5)))
    
    ARPP = (twap - nanhigh(high1, 5)) / (nanhigh(high1, 5) - nanlow(low1, 5))
    
    # 过去10个日内累计时点涨幅(%)
    time_zf1 = nanprod(time_incre_1, 10) - 1 
    
    # 过去10个日内累计时点涨幅(%)
    time_zf2 = nanprod(time_incre_2, 10) - 1 
    
    # 过去10个日内累计时点涨幅(%)
    time_zf3 = nanprod(time_incre_3, 10) - 1
    """
    )
    
    m9 = M.feature_extractor_user_function.v1(
        name='ret_sim',
        func=m9_func_bigquant_run
    )
    
    m4 = M.feature_extractor_user_function.v1(
        input_functions=m9.functions,
        name='RKurt',
        func=m4_func_bigquant_run
    )
    
    m6 = M.feature_extractor_user_function.v1(
        input_functions=m4.functions,
        name='RSkew',
        func=m6_func_bigquant_run
    )
    
    m5 = M.feature_extractor_user_function.v1(
        input_functions=m6.functions,
        name='RVar',
        func=m5_func_bigquant_run
    )
    
    m3 = M.feature_extractor_1m.v1(
        instruments=m2.data,
        features=m1.data,
        user_functions=m5.functions,
        start_date='',
        end_date='',
        before_start_days=20,
        workers=2,
        parallel_mode='测试',
        table_1m='level2_bar1m_CN_STOCK_A'
    )
    
    m7 = M.derived_feature_extractor.v3(
        input_data=m3.data,
        features=m8.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions=m7_user_functions_bigquant_run
    )
    
    In [85]:
    dt = m7.data.read()
    dt.shape, dt.date.min(), dt.date.max()
    
    Out[85]:
    ((729, 27), Timestamp('2020-01-02 00:00:00'), Timestamp('2020-12-31 00:00:00'))
    In [86]:
    dt.head().T
    
    Out[86]:
    0 1 2 3 4
    RSkew 1.047927 0.069402 0.052861 1.105082 0.102527
    AmtPerTrd 3798375704869.113281 4984674616497.397461 982002722790.138306 3251796984509.554199 4210276638194.229492
    AmtPerTrd_InFlow 3758339355203.48291 5121173580494.896484 877194456521.73938 3370598244452.932617 4811054511020.068359
    AmtPerTrd_OutFlow 4130580403494.038574 4987388430500.258789 1153572364062.499756 3075902569536.804688 3765773279027.903809
    AmtPerTrd_InFlow_ratio 0.98946 1.027384 0.893271 1.036534 1.142693
    AmtPerTrd_OutFlow_ratio 0.909882 1.026825 0.760416 1.095808 1.277574
    Apt_NetinFlow_bigorder -20864145438000012.0 11316890853999968.0 -106866700000000.0 5299090711000020.0 15874072872000020.0
    Apt_NetinFlow_bigorder_ratio -0.081146 0.033859 -0.03272 0.027679 0.061425
    mom_bigorder 1.013191 1.00238 1.000071 1.026039 0.971225
    trend_strength 12.368649 28.479662 0.040939 11.263328 25.984936
    time_incre_1 1.006587 1.014541 1.003195 1.004108 0.996322
    time_incre_2 1.001187 1.000307 1.0 1.001166 0.998442
    time_incre_3 1.004162 1.001217 1.0 1.005828 1.002823
    volu 153023187.0 101213040.0 10413412.0 111619481.0 80553629.0
    vwap 16.802871 33.027311 3.136721 17.149766 32.082389
    volu_vwap 2571228842.441957 3342794557.714462 32663970.247552 1914247973.968689 2584352889.505314
    high1 16.950001 33.580002 3.15 17.309999 32.810001
    low1 16.559999 32.509998 3.11 16.92 31.780001
    twap 16.837271 32.946728 3.137125 17.153553 32.026375
    smart_money 0.999845 0.999511 1.000156 1.000396 0.999661
    date 2020-01-02 00:00:00 2020-01-02 00:00:00 2020-01-02 00:00:00 2020-01-03 00:00:00 2020-01-03 00:00:00
    instrument 000001.SZA 000002.SZA 000005.SZA 000001.SZA 000002.SZA
    APB 0.0 0.0 0.0 0.0016 -0.001648
    ARPP -0.28905 -0.591843 -0.321869 -0.208595 -0.863126
    time_zf1 0.006587 0.014541 0.003195 0.010722 0.01081
    time_zf2 0.001187 0.000307 0.0 0.002354 -0.001251
    time_zf3 0.004162 0.001217 0.0 0.010014 0.004043
    In [87]:
    dt.isna().sum()
    
    Out[87]:
    RSkew                           0
    AmtPerTrd                       0
    AmtPerTrd_InFlow                0
    AmtPerTrd_OutFlow               0
    AmtPerTrd_InFlow_ratio          0
    AmtPerTrd_OutFlow_ratio         0
    Apt_NetinFlow_bigorder          0
    Apt_NetinFlow_bigorder_ratio    0
    mom_bigorder                    0
    trend_strength                  0
    time_incre_1                    0
    time_incre_2                    0
    time_incre_3                    0
    volu                            0
    vwap                            0
    volu_vwap                       0
    high1                           0
    low1                            0
    twap                            0
    smart_money                     0
    date                            0
    instrument                      0
    APB                             0
    ARPP                            0
    time_zf1                        0
    time_zf2                        0
    time_zf3                        0
    dtype: int64
    In [88]:
    dt.describe().T
    
    Out[88]:
    count mean std min 25% 50% 75% max
    RSkew 729.0 1.818319e-01 1.009579e+00 -5.650973e+00 -1.619114e-01 7.126568e-02 4.775091e-01 9.137768e+00
    AmtPerTrd 729.0 2.175409e+12 9.895664e+11 3.955845e+11 1.027790e+12 2.415818e+12 2.909778e+12 4.987064e+12
    AmtPerTrd_InFlow 729.0 2.271794e+12 1.052790e+12 1.000000e+00 1.092146e+12 2.526029e+12 3.044447e+12 5.538499e+12
    AmtPerTrd_OutFlow 729.0 2.139925e+12 9.679452e+11 1.000000e+00 1.040863e+12 2.394016e+12 2.872153e+12 4.987388e+12
    AmtPerTrd_InFlow_ratio 729.0 1.041056e+00 1.119821e-01 1.038475e-12 1.000086e+00 1.038709e+00 1.088233e+00 1.631763e+00
    AmtPerTrd_OutFlow_ratio 729.0 1.065391e+00 1.726532e-01 5.675684e-01 9.784438e-01 1.050460e+00 1.137883e+00 2.366064e+00
    Apt_NetinFlow_bigorder 729.0 3.211953e+15 1.068630e+16 -3.558523e+16 -1.541434e+14 1.477287e+14 6.066124e+15 9.689822e+16
    Apt_NetinFlow_bigorder_ratio 729.0 1.724218e-02 5.744633e-02 -2.080655e-01 -1.690086e-02 1.664743e-02 4.948400e-02 2.849511e-01
    mom_bigorder 729.0 1.000301e+00 2.446677e-02 9.116218e-01 9.843413e-01 9.992460e-01 1.013693e+00 1.115498e+00
    trend_strength 729.0 1.086825e+01 9.593094e+00 -2.706525e+00 1.646416e-01 1.021219e+01 2.062259e+01 2.908537e+01
    time_incre_1 729.0 9.998436e-01 5.355022e-03 9.777997e-01 9.965230e-01 1.000000e+00 1.003266e+00 1.032258e+00
    time_incre_2 729.0 1.000641e+00 2.781702e-03 9.865038e-01 9.996345e-01 1.000358e+00 1.002208e+00 1.017921e+00
    time_incre_3 729.0 1.000225e+00 4.635163e-03 9.821332e-01 9.969698e-01 1.000000e+00 1.002950e+00 1.037879e+00
    volu 729.0 6.831689e+07 5.879417e+07 2.374964e+06 1.268165e+07 6.424187e+07 9.578686e+07 4.711461e+08
    vwap 729.0 1.538140e+01 1.046962e+01 2.496388e+00 2.934159e+00 1.480402e+01 2.679793e+01 3.302731e+01
    volu_vwap 729.0 1.348274e+09 1.241247e+09 6.210434e+06 3.549785e+07 1.382898e+09 1.975755e+09 7.168741e+09
    high1 729.0 1.559391e+01 1.061565e+01 2.510000e+00 2.980000e+00 1.508000e+01 2.715000e+01 3.358000e+01
    low1 729.0 1.517647e+01 1.033370e+01 2.400000e+00 2.880000e+00 1.459000e+01 2.643000e+01 3.251000e+01
    twap 729.0 1.537552e+01 1.046607e+01 2.497479e+00 2.929281e+00 1.478233e+01 2.677460e+01 3.294673e+01
    smart_money 729.0 9.999439e-01 6.454189e-04 9.952794e-01 9.996920e-01 9.999779e-01 1.000234e+00 1.004406e+00
    APB 729.0 -1.526586e-03 4.536976e-03 -4.204176e-02 -2.531400e-03 -9.060856e-04 2.313322e-04 2.499784e-02
    ARPP 729.0 -5.516152e-01 2.521168e-01 -9.999564e-01 -7.814006e-01 -5.927083e-01 -3.163647e-01 -7.939286e-02
    time_zf1 729.0 -1.540589e-03 1.663200e-02 -4.631601e-02 -1.160197e-02 -1.159986e-03 8.981147e-03 6.711782e-02
    time_zf2 729.0 6.260170e-03 8.802292e-03 -2.226221e-02 5.506050e-04 6.723897e-03 1.217047e-02 3.980015e-02
    time_zf3 729.0 2.521302e-03 1.551592e-02 -4.663265e-02 -7.528366e-03 2.390159e-04 1.044426e-02 6.990938e-02
    In [89]:
    dt2 = dt.set_index(['date','instrument'])
    dt2.head()
    
    Out[89]:
    RSkew AmtPerTrd AmtPerTrd_InFlow AmtPerTrd_OutFlow AmtPerTrd_InFlow_ratio AmtPerTrd_OutFlow_ratio Apt_NetinFlow_bigorder Apt_NetinFlow_bigorder_ratio mom_bigorder trend_strength ... volu_vwap high1 low1 twap smart_money APB ARPP time_zf1 time_zf2 time_zf3
    date instrument
    2020-01-02 000001.SZA 1.047927 3.798376e+12 3.758339e+12 4.130580e+12 0.989460 0.909882 -2.086415e+16 -0.081146 1.013191 12.368649 ... 2.571229e+09 16.950001 16.559999 16.837271 0.999845 0.000000 -0.289050 0.006587 0.001187 0.004162
    000002.SZA 0.069402 4.984675e+12 5.121174e+12 4.987388e+12 1.027384 1.026825 1.131689e+16 0.033859 1.002380 28.479662 ... 3.342795e+09 33.580002 32.509998 32.946728 0.999511 0.000000 -0.591843 0.014541 0.000307 0.001217
    000005.SZA 0.052861 9.820027e+11 8.771945e+11 1.153572e+12 0.893271 0.760416 -1.068667e+14 -0.032720 1.000071 0.040939 ... 3.266397e+07 3.150000 3.110000 3.137125 1.000156 0.000000 -0.321869 0.003195 0.000000 0.000000
    2020-01-03 000001.SZA 1.105082 3.251797e+12 3.370598e+12 3.075903e+12 1.036534 1.095808 5.299091e+15 0.027679 1.026039 11.263328 ... 1.914248e+09 17.309999 16.920000 17.153553 1.000396 0.001600 -0.208595 0.010722 0.002354 0.010014
    000002.SZA 0.102527 4.210277e+12 4.811055e+12 3.765773e+12 1.142693 1.277574 1.587407e+16 0.061425 0.971225 25.984936 ... 2.584353e+09 32.810001 31.780001 32.026375 0.999661 -0.001648 -0.863126 0.010810 -0.001251 0.004043

    5 rows × 25 columns

    In [90]:
    import seaborn as sns
    import matplotlib.pyplot as plt
    
    In [102]:
    # %%time
    n=4#行数
    m=dt2.shape[1]
    x=m%n
    fig,axes=plt.subplots(n,int((m - x + (n if x else x))/n),figsize=(20, 10))
    axes=axes.reshape((-1,))
    for i in range(dt2.shape[1]):    
        sns.distplot(dt2[dt2.columns[i]],ax=axes[i])
    plt.subplots_adjust(wspace =0.3, hspace =0.3)
    
    In [92]:
    # for i in dt2.columns[:]:
    #     dt2[i].to_hdf(f'XX券商-XXXX研报-{i}.h5', 'fac', mode='w')
    
    In [ ]:
     
    
    In [ ]:
     
    
    In [ ]:
     
    
    In [ ]:
     
    
    In [ ]: