复制链接
克隆策略

高频因子抽取案例

(注:普通用户没有level使用权限,确需使用请联系小Q)

    {"description":"实验创建于2017/8/26","graph":{"edges":[{"to_node_id":"-726:features","from_node_id":"-331:data"},{"to_node_id":"-726:instruments","from_node_id":"-312:data"},{"to_node_id":"-726:user_functions","from_node_id":"-932:functions"},{"to_node_id":"-932:input_functions","from_node_id":"-937:functions"},{"to_node_id":"-937:input_functions","from_node_id":"-1111:functions"},{"to_node_id":"-1111:input_functions","from_node_id":"-1116:functions"},{"to_node_id":"-1116:input_functions","from_node_id":"-1121:functions"},{"to_node_id":"-1121:input_functions","from_node_id":"-1126:functions"},{"to_node_id":"-1126:input_functions","from_node_id":"-584:functions"},{"to_node_id":"-584:input_functions","from_node_id":"-320:functions"},{"to_node_id":"-320:input_functions","from_node_id":"-671:functions"},{"to_node_id":"-671:input_functions","from_node_id":"-7333:functions"}],"nodes":[{"node_id":"-331","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"# _close = resample(close, '1min') ## 可修改K线粒度参数\n# _open = resample(open, '1min') ## 可修改K线粒度参数\n# _volume = resample_cumsum(volume, '1min') ## 可修改K线粒度参数\n# _amount = resample_cumsum(amount, '1min') ## 可修改K线粒度参数\n_close = close\n_open = open\n_volume = volume\n_amount = amount\n_ret = ret_sim(_close, _open)\n_rk1 = _ret.iloc[:60] ## 开盘后前N分钟\n_rk2 = _ret.iloc[60:-60] ## 除开开盘后前N分钟和收盘前N分钟\n_rk3 = _ret.iloc[-60:] ## 收盘前N分钟\n\n_vr1 = _volume.iloc[:30]\n_vr2 = _volume.iloc[30:60]\n_vr3 = _volume.iloc[90:120]\n_vr4 = _volume.iloc[-120:-90]\n_vr5 = _volume.iloc[-60:-30]\n_vr6 = _volume.iloc[-30:]\n\n# 1 收益方差 ht001\nRetVar1 = RVar1(_ret)\nRetVar2 = RVar2(_ret)\nRetVar3 = (RVar2(_rk1) + RVar2(_rk2) + RVar2(_rk3)) / 3\n\n# 2 收益偏度 ht001\nRetSkew1 = RSkew1(_ret, RetVar1)\nRetSkew2 = RSkew2(_ret, RetVar2)\nRetSkew3 = (RSkew2(_rk1, RetVar2) + RSkew2(_rk2, RetVar2) + RSkew2(_rk3, RetVar2)) / 3\n\n# 3 收益峰度 ht001\nRetKurt1 = RKurt1(_ret, RetVar1)\nRetKurt2 = RKurt2(_ret, RetVar2)\nRetKurt3 = (RKurt2(_rk1, RetVar2) + RKurt2(_rk2, RetVar2) + RKurt2(_rk3, RetVar2)) / 3\n\n# 4 收益波动因子 ht002\nret_vol = np.power(RVar1(_ret), 0.5)\n\n# 5 上行波动因子 ht002\nup_vol = np.sqrt(np.power(_ret*where(_ret>0, 1, 0), 2).sum())\n\n# 6 下行波动因子 ht002\ndown_vol = np.sqrt(np.power(_ret*where(_ret<0, 1, 0), 2).sum())\n\n# 上行波动占比因子 ht002\nup_vol_ratio = nandivided(up_vol, RVar1(_ret))\n\n# 7 下行波动占比因子 ht002\ndown_vol_ratio = nandivided(down_vol, RVar1(_ret))\n\n# 8 成交量占比 ht003\nvolume_ratio_1 = _vr1.sum() / _volume.sum()\nvolume_ratio_2 = _vr2.sum() / _volume.sum()\nvolume_ratio_3 = _vr3.sum() / _volume.sum()\nvolume_ratio_4 = _vr4.sum() / _volume.sum()\nvolume_ratio_5 = _vr5.sum() / _volume.sum()\nvolume_ratio_6 = _vr6.sum() / _volume.sum()\n\n# 9 量价相关性 ht003\n_corr= _volume.corr(_close, method='pearson')\ncorr_PV = where(np.isnan(_corr), 0, _corr)\n\n# 10 资金流入因子 ht003\n_diff_close = _close.diff().fillna(0)\n_amount_sum = _amount.sum()\nflowIn_oneday = _volume.mul(_close).mul(_diff_close).div((_diff_close).abs()).fillna(0).sum()\n\n# 11 趋势强度因子 ht003\ntrend_str = where(_diff_close.iloc[1:].abs().sum()==0, 1, _close.iloc[-1] - _close.iloc[0] / (_diff_close.iloc[1:].abs().sum()))\n\n# 12 改进反转因子 ht003\ninverse_plus = (_close.iloc[-1] - _close.iloc[30]) / _close.iloc[30]\n\n# 13 日内动量因子 ht003\ndaily_mom_1 = (_close.iloc[15] - _close.iloc[0]) / _close.iloc[0]\ndaily_mom_2 = (_close.iloc[30] - _close.iloc[0]) / _close.iloc[0]\ndaily_mom_3 = (_close.iloc[-30] - _close.iloc[0]) / _close.iloc[0]\ndaily_mom_4 = (_close.iloc[-1] - _close.iloc[0]) / _close.iloc[0]","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"-331"}],"output_ports":[{"name":"data","node_id":"-331"}],"cacheable":false,"seq_num":1,"comment":"","comment_collapsed":true},{"node_id":"-312","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2022-01-01","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2022-06-01","type":"Literal","bound_global_parameter":"交易日期"},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"000001.SZA","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"-312"}],"output_ports":[{"name":"data","node_id":"-312"}],"cacheable":false,"seq_num":2,"comment":"","comment_collapsed":true},{"node_id":"-932","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RVar1","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret):\n return (ret**2).sum()\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-932"}],"output_ports":[{"name":"functions","node_id":"-932"}],"cacheable":false,"seq_num":4,"comment":"","comment_collapsed":true},{"node_id":"-937","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RVar2","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret):\n return np.power(ret - (ret.mean()), 2).sum()\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-937"}],"output_ports":[{"name":"functions","node_id":"-937"}],"cacheable":false,"seq_num":5,"comment":"","comment_collapsed":true},{"node_id":"-1111","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RSkew1","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret, RVar):\n if RVar == 0:\n result = 0\n else:\n result = (np.power(ret, 3).sum()) * np.sqrt(240) / np.power(RVar, 1.5)\n return result\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-1111"}],"output_ports":[{"name":"functions","node_id":"-1111"}],"cacheable":false,"seq_num":6,"comment":"","comment_collapsed":true},{"node_id":"-1116","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RSkew2","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret, RVar):\n if RVar == 0:\n result = 0\n else:\n result = (np.power(ret - (ret.mean()), 3).sum()) * np.sqrt(240) / np.power(RVar, 1.5)\n return result\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-1116"}],"output_ports":[{"name":"functions","node_id":"-1116"}],"cacheable":false,"seq_num":7,"comment":"","comment_collapsed":true},{"node_id":"-1121","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RKurt1","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret, RVar):\n if RVar == 0:\n result = 1\n else:\n result = (np.power(ret, 4).sum()) * (240) / np.power(RVar, 2)\n \n return result\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-1121"}],"output_ports":[{"name":"functions","node_id":"-1121"}],"cacheable":false,"seq_num":8,"comment":"","comment_collapsed":true},{"node_id":"-1126","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RKurt2","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret, RVar):\n if RVar == 0:\n result = 1\n else:\n result = (np.power(ret - (ret.mean()), 4).sum()) * (240) / np.power(RVar, 2)\n return result\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-1126"}],"output_ports":[{"name":"functions","node_id":"-1126"}],"cacheable":false,"seq_num":9,"comment":"","comment_collapsed":true},{"node_id":"-584","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"ret_sim","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, close, op):\n res = close.pct_change()\n res.iloc[0] = close.iloc[0] / op.iloc[0] - 1\n return res\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-584"}],"output_ports":[{"name":"functions","node_id":"-584"}],"cacheable":false,"seq_num":12,"comment":"","comment_collapsed":true},{"node_id":"-726","module_id":"BigQuantSpace.feature_extractor_1m.feature_extractor_1m-v2","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":"0","type":"Literal","bound_global_parameter":null},{"name":"workers","value":"10","type":"Literal","bound_global_parameter":null},{"name":"parallel_mode","value":"单机","type":"Literal","bound_global_parameter":null},{"name":"table_1m","value":"level2_bar1m_CN_STOCK_A","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-726"},{"name":"features","node_id":"-726"},{"name":"user_functions","node_id":"-726"}],"output_ports":[{"name":"data","node_id":"-726"}],"cacheable":false,"seq_num":14,"comment":"","comment_collapsed":true},{"node_id":"-320","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"resample","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, x, freq):\n a = df[['date', x.name]].set_index('date')\n res = a.resample(freq, label='right', closed='right').last().dropna()\n return res[x.name]\n \n \n\n\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-320"}],"output_ports":[{"name":"functions","node_id":"-320"}],"cacheable":false,"seq_num":3,"comment":"","comment_collapsed":true},{"node_id":"-671","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"resample_cumsum","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, x, freq):\n def jugde_time(x):\n if x.strftime('%H:%M:%S') >= '11:31:00' and x.strftime('%H:%M:%S') <= '13:00:00':\n return False\n else:\n return True\n a = df[['date', x.name]].set_index('date')\n res = a.resample(freq, label='right', closed='right').sum()\n res = res.loc[[i for i in res.index if jugde_time(i)==True]]\n return res[x.name]\n \n \n\n\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-671"}],"output_ports":[{"name":"functions","node_id":"-671"}],"cacheable":false,"seq_num":11,"comment":"","comment_collapsed":true},{"node_id":"-7333","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"nandivided","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, x, y):\n if x == 0 or y == 0:\n res = 1\n elif np.isnan(x) or np.isnan(y):\n res = 1\n else:\n res = x / y\n return res\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-7333"}],"output_ports":[{"name":"functions","node_id":"-7333"}],"cacheable":false,"seq_num":15,"comment":"","comment_collapsed":true}],"node_layout":"<node_postions><node_position Node='-331' Position='-212,-1,200,200'/><node_position Node='-312' Position='-537.7727355957031,-3,200,200'/><node_position Node='-932' Position='55,-121,200,200'/><node_position Node='-937' Position='55,-184,200,200'/><node_position Node='-1111' Position='55,-246,200,200'/><node_position Node='-1116' Position='57,-315,200,200'/><node_position Node='-1121' Position='55,-383,200,200'/><node_position Node='-1126' Position='56,-456,200,200'/><node_position Node='-584' Position='361,-127,200,200'/><node_position Node='-726' Position='-160,127,200,200'/><node_position Node='-320' Position='361,-198,200,200'/><node_position Node='-671' Position='361,-274,200,200'/><node_position Node='-7333' Position='362,-361,200,200'/></node_postions>"},"nodes_readonly":false,"studio_version":"v2"}
    In [7]:
    # 本代码由可视化策略环境自动生成 2022年6月18日 10:14
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    def m15_func_bigquant_run(df, x, y):
        if x == 0 or y == 0:
            res = 1
        elif np.isnan(x) or np.isnan(y):
            res = 1
        else:
            res = x / y
        return res
    
    def m11_func_bigquant_run(df, x, freq):
        def jugde_time(x):
            if x.strftime('%H:%M:%S') >= '11:31:00' and x.strftime('%H:%M:%S') <= '13:00:00':
                return False
            else:
                return True
        a = df[['date', x.name]].set_index('date')
        res = a.resample(freq, label='right', closed='right').sum()
        res = res.loc[[i for i in res.index if jugde_time(i)==True]]
        return res[x.name]
     
        
    
    
    
    def m3_func_bigquant_run(df, x, freq):
        a = df[['date', x.name]].set_index('date')
        res = a.resample(freq, label='right', closed='right').last().dropna()
        return res[x.name]
     
        
    
    
    
    def m12_func_bigquant_run(df, close, op):
        res = close.pct_change()
        res.iloc[0] = close.iloc[0] / op.iloc[0] - 1
        return res
    
    def m9_func_bigquant_run(df, ret, RVar):
        if RVar == 0:
            result = 1
        else:
            result = (np.power(ret - (ret.mean()), 4).sum()) * (240) / np.power(RVar, 2)
        return result
    
    def m8_func_bigquant_run(df, ret, RVar):
        if RVar == 0:
            result = 1
        else:
            result = (np.power(ret, 4).sum()) * (240) / np.power(RVar, 2)
        
        return result
    
    def m7_func_bigquant_run(df, ret, RVar):
        if RVar == 0:
            result = 0
        else:
            result = (np.power(ret - (ret.mean()), 3).sum()) * np.sqrt(240) / np.power(RVar, 1.5)
        return result
    
    def m6_func_bigquant_run(df, ret, RVar):
        if RVar == 0:
            result = 0
        else:
            result = (np.power(ret, 3).sum()) * np.sqrt(240) / np.power(RVar, 1.5)
        return result
    
    def m5_func_bigquant_run(df, ret):
        return np.power(ret - (ret.mean()), 2).sum()
    
    def m4_func_bigquant_run(df, ret):
        return (ret**2).sum()
    
    
    m1 = M.input_features.v1(
        features="""# _close = resample(close, '1min')  ## 可修改K线粒度参数
    # _open = resample(open, '1min')  ## 可修改K线粒度参数
    # _volume = resample_cumsum(volume, '1min')  ## 可修改K线粒度参数
    # _amount = resample_cumsum(amount, '1min')  ## 可修改K线粒度参数
    _close = close
    _open = open
    _volume = volume
    _amount = amount
    _ret = ret_sim(_close, _open)
    _rk1 = _ret.iloc[:60]  ## 开盘后前N分钟
    _rk2 = _ret.iloc[60:-60]  ## 除开开盘后前N分钟和收盘前N分钟
    _rk3 = _ret.iloc[-60:]  ## 收盘前N分钟
    
    _vr1 = _volume.iloc[:30]
    _vr2 = _volume.iloc[30:60]
    _vr3 = _volume.iloc[90:120]
    _vr4 = _volume.iloc[-120:-90]
    _vr5 = _volume.iloc[-60:-30]
    _vr6 = _volume.iloc[-30:]
    
    # 1 收益方差 ht001
    RetVar1 = RVar1(_ret)
    RetVar2 = RVar2(_ret)
    RetVar3 = (RVar2(_rk1) + RVar2(_rk2) + RVar2(_rk3)) / 3
    
    # 2 收益偏度 ht001
    RetSkew1 = RSkew1(_ret, RetVar1)
    RetSkew2 = RSkew2(_ret, RetVar2)
    RetSkew3 = (RSkew2(_rk1, RetVar2) + RSkew2(_rk2, RetVar2) + RSkew2(_rk3, RetVar2)) / 3
    
    # 3 收益峰度 ht001
    RetKurt1 = RKurt1(_ret, RetVar1)
    RetKurt2 = RKurt2(_ret, RetVar2)
    RetKurt3 = (RKurt2(_rk1, RetVar2) + RKurt2(_rk2, RetVar2) + RKurt2(_rk3, RetVar2)) / 3
    
    # 4 收益波动因子 ht002
    ret_vol = np.power(RVar1(_ret), 0.5)
    
    # 5 上行波动因子 ht002
    up_vol = np.sqrt(np.power(_ret*where(_ret>0, 1, 0), 2).sum())
    
    # 6 下行波动因子 ht002
    down_vol = np.sqrt(np.power(_ret*where(_ret<0, 1, 0), 2).sum())
    
    # 上行波动占比因子 ht002
    up_vol_ratio = nandivided(up_vol, RVar1(_ret))
    
    # 7 下行波动占比因子 ht002
    down_vol_ratio = nandivided(down_vol, RVar1(_ret))
    
    # 8 成交量占比 ht003
    volume_ratio_1 = _vr1.sum() / _volume.sum()
    volume_ratio_2 = _vr2.sum() / _volume.sum()
    volume_ratio_3 = _vr3.sum() / _volume.sum()
    volume_ratio_4 = _vr4.sum() / _volume.sum()
    volume_ratio_5 = _vr5.sum() / _volume.sum()
    volume_ratio_6 = _vr6.sum() / _volume.sum()
    
    # 9 量价相关性 ht003
    _corr= _volume.corr(_close, method='pearson')
    corr_PV = where(np.isnan(_corr), 0, _corr)
    
    # 10 资金流入因子 ht003
    _diff_close = _close.diff().fillna(0)
    _amount_sum = _amount.sum()
    flowIn_oneday = _volume.mul(_close).mul(_diff_close).div((_diff_close).abs()).fillna(0).sum()
    
    # 11 趋势强度因子 ht003
    trend_str = where(_diff_close.iloc[1:].abs().sum()==0, 1, _close.iloc[-1] - _close.iloc[0] / (_diff_close.iloc[1:].abs().sum()))
    
    # 12 改进反转因子 ht003
    inverse_plus = (_close.iloc[-1] - _close.iloc[30]) / _close.iloc[30]
    
    # 13 日内动量因子 ht003
    daily_mom_1 = (_close.iloc[15] - _close.iloc[0]) / _close.iloc[0]
    daily_mom_2 = (_close.iloc[30] - _close.iloc[0]) / _close.iloc[0]
    daily_mom_3 = (_close.iloc[-30] - _close.iloc[0]) / _close.iloc[0]
    daily_mom_4 = (_close.iloc[-1] - _close.iloc[0]) / _close.iloc[0]""",
        m_cached=False
    )
    
    m2 = M.instruments.v2(
        start_date='2022-01-01',
        end_date=T.live_run_param('trading_date', '2022-06-01'),
        market='CN_STOCK_A',
        instrument_list='000001.SZA',
        m_cached=False
    )
    
    m15 = M.feature_extractor_user_function.v1(
        name='nandivided',
        func=m15_func_bigquant_run
    )
    
    m11 = M.feature_extractor_user_function.v1(
        input_functions=m15.functions,
        name='resample_cumsum',
        func=m11_func_bigquant_run
    )
    
    m3 = M.feature_extractor_user_function.v1(
        input_functions=m11.functions,
        name='resample',
        func=m3_func_bigquant_run
    )
    
    m12 = M.feature_extractor_user_function.v1(
        input_functions=m3.functions,
        name='ret_sim',
        func=m12_func_bigquant_run
    )
    
    m9 = M.feature_extractor_user_function.v1(
        input_functions=m12.functions,
        name='RKurt2',
        func=m9_func_bigquant_run
    )
    
    m8 = M.feature_extractor_user_function.v1(
        input_functions=m9.functions,
        name='RKurt1',
        func=m8_func_bigquant_run
    )
    
    m7 = M.feature_extractor_user_function.v1(
        input_functions=m8.functions,
        name='RSkew2',
        func=m7_func_bigquant_run
    )
    
    m6 = M.feature_extractor_user_function.v1(
        input_functions=m7.functions,
        name='RSkew1',
        func=m6_func_bigquant_run
    )
    
    m5 = M.feature_extractor_user_function.v1(
        input_functions=m6.functions,
        name='RVar2',
        func=m5_func_bigquant_run
    )
    
    m4 = M.feature_extractor_user_function.v1(
        input_functions=m5.functions,
        name='RVar1',
        func=m4_func_bigquant_run
    )
    
    m14 = M.feature_extractor_1m.v2(
        instruments=m2.data,
        features=m1.data,
        user_functions=m4.functions,
        start_date='',
        end_date='',
        before_start_days=0,
        workers=10,
        parallel_mode='单机',
        table_1m='level2_bar1m_CN_STOCK_A',
        m_cached=False
    )
    

    高频数据

    数据

    • OHLC
    • bidprice{i}
    • bidamount{i}

    降频

    • tick到分钟
    • 分钟到日频
    In [10]:
    df = DataSource("level2_bar1m_CN_STOCK_A").read("000001.SZA", start_date="2022-01-01", end_date="2022-06-01")
    df.head()
    
    Out[10]:
    instrument date time trading_day open high low close pre_close amount ... beyond_13_bid_volume beyond_13_bid_amount beyond_13_ask_volume beyond_13_ask_amount initiative_bid_amount initiative_bid_volume initiative_bid_rate initiative_ask_volume initiative_ask_amount initiative_ask_rate
    0 000001.SZA 2022-01-04 09:31:00 93100 20220104 16.480000 16.500000 16.440001 16.459999 16.48 60816578.64 ... 260391.0 4290283.0 1004073.0 1.653366e+07 21085060.0 1279818 0.344963 2431182 40037520.0 0.655036
    1 000001.SZA 2022-01-04 09:32:00 93200 20220104 16.459999 16.459999 16.370001 16.370001 16.48 25554242.00 ... 68100.0 1118511.0 263900.0 4.330755e+06 7065064.0 430100 0.269639 1166500 19136880.0 0.730361
    2 000001.SZA 2022-01-04 09:33:00 93300 20220104 16.350000 16.360001 16.260000 16.270000 16.48 40321701.00 ... 57500.0 938285.0 306700.0 4.995846e+06 14007283.0 859500 0.342729 1648800 26862544.0 0.657271
    3 000001.SZA 2022-01-04 09:34:00 93400 20220104 16.280001 16.309999 16.270000 16.309999 16.48 22162048.60 ... 48200.0 785045.0 70720.0 1.151668e+06 7061149.0 433400 0.336149 856220 13944840.0 0.663851
    4 000001.SZA 2022-01-04 09:35:00 93500 20220104 16.309999 16.350000 16.309999 16.350000 16.48 13450956.00 ... 30700.0 501760.0 162400.0 2.651019e+06 5051724.0 309200 0.367553 532400 8692499.0 0.632447

    5 rows × 145 columns

    高频因子的使用

    将高频因子降频到日频,再加入现有的日频模型中进行训练和预测

    In [11]:
    feature = m14.data.read()
    print(feature.shape)
    feature.head()
    
    (95, 30)
    
    Out[11]:
    RetVar1 RetVar2 RetVar3 RetSkew1 RetSkew2 RetSkew3 RetKurt1 RetKurt2 RetKurt3 ret_vol ... corr_PV flowIn_oneday trend_str inverse_plus daily_mom_1 daily_mom_2 daily_mom_3 daily_mom_4 date instrument
    0 0.000273 0.000272 0.000089 -0.683860 -0.814874 -0.195990 12.412301 12.587360 3.759866 0.016523 ... -0.135974 7.355418e+07 10.128305 0.024600 -0.010936 -0.012151 0.007898 0.012151 2022-01-04 000001.SZA
    1 0.000301 0.000297 0.000097 1.125726 0.769316 0.121811 8.313232 8.095633 2.561294 0.017358 ... -0.408275 7.309825e+08 11.385235 0.005276 0.020408 0.024010 0.027611 0.029412 2022-01-05 000001.SZA
    2 0.000238 0.000238 0.000079 0.925200 0.916396 0.312438 10.696391 10.685749 3.575786 0.015437 ... 0.236379 -5.870872e+07 10.608548 -0.006384 0.005276 0.009965 0.001172 0.003517 2022-01-06 000001.SZA
    3 0.000184 0.000184 0.000061 0.872579 0.789079 0.260318 6.151647 6.067733 2.015853 0.013574 ... 0.133496 1.483394e+08 10.280966 0.000000 0.005266 0.006437 0.007022 0.006437 2022-01-07 000001.SZA
    4 0.000309 0.000309 0.000102 -0.261299 -0.199290 0.012959 7.223917 7.211026 2.372922 0.017592 ... 0.423949 -1.893492e+07 11.619047 0.002332 -0.005211 -0.006949 -0.006949 -0.004632 2022-01-10 000001.SZA

    5 rows × 30 columns

    In [ ]: