复制链接
克隆策略

    {"description":"实验创建于2017/8/26","graph":{"edges":[{"to_node_id":"-322:features","from_node_id":"-331:data"},{"to_node_id":"-322:instruments","from_node_id":"-312:data"},{"to_node_id":"-322:user_functions","from_node_id":"-421:functions"},{"to_node_id":"-421:input_functions","from_node_id":"-932:functions"}],"nodes":[{"node_id":"-331","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"_ret_log = np.log(close.pct_change().fillna(method='bfill'))\n_ret = close.pct_change().fillna(method='bfill')\n_rk1 = _ret.loc[93500:100000]\n_rk2 = _ret.loc[110000:113000]\n_rk3 = _ret.loc[143000:145700]\n_bm_ret = (bar1m_000905_HIX__close).pct_change().fillna(method='bfill')\n\n# 高频波动- 收益波动因子\nret_vol = np.power(RVar(_ret), 0.5)\n\n# 高频特征波动-特质波动因子\n_spec_ret = residual_return(_ret, _bm_ret)\nspec_ret_vol = np.power(RVar(_spec_ret), 0.5)\n\n# 高频系统波动-系统波动因子\nsys_vol = np.power(np.power(_ret - _spec_ret, 2).sum(), 0.5)\n\n# 高频特异度-特异度因子\nspeci_ratio = where(RVar(_ret)==0, 1, np.power(_spec_ret, 2).sum() / RVar(_ret))\n\n# 高频上行波动-上行波动因子\nup_vol = np.sqrt(np.power(_ret*where(_ret>0, 1, 0), 2).sum())\n\n# 高频下行波动-下行波动因子\ndown_vol = np.sqrt(np.power(_ret*where(_ret<0, 1, 0), 2).sum())\n\n# 上行波动占比因子\n#up_vol_ratio = where(RVar(_ret)==0, 1, up_vol / RVar(_ret)) # hcd \nup_vol_ratio = where(RVar(_ret)==0, 1, np.power(up_vol,2) / RVar(_ret)) #sst\n\n\n# 下行波动占比因子\n#down_vol_ratio = where(RVar(_ret)==0, 1, down_vol / RVar(_ret)) #hcd\ndown_vol_ratio = where(RVar(_ret)==0, 1, np.power(down_vol,2) / RVar(_ret)) #sst\n\n\n\n\n\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"-331"}],"output_ports":[{"name":"data","node_id":"-331"}],"cacheable":true,"seq_num":1,"comment":"","comment_collapsed":true},{"node_id":"-312","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2020-03-01","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2020-05-01","type":"Literal","bound_global_parameter":null},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"000001.SZA\n000002.SZA\n000005.SZA","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":0,"type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"-312"}],"output_ports":[{"name":"data","node_id":"-312"}],"cacheable":true,"seq_num":2,"comment":"","comment_collapsed":true},{"node_id":"-322","module_id":"BigQuantSpace.feature_extractor_1m.feature_extractor_1m-v1","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":"20","type":"Literal","bound_global_parameter":null},{"name":"workers","value":2,"type":"Literal","bound_global_parameter":null},{"name":"parallel_mode","value":"测试","type":"Literal","bound_global_parameter":null},{"name":"table_1m","value":"level2_bar1m_CN_STOCK_A","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-322"},{"name":"features","node_id":"-322"},{"name":"user_functions","node_id":"-322"}],"output_ports":[{"name":"data","node_id":"-322"}],"cacheable":true,"seq_num":3,"comment":"","comment_collapsed":true},{"node_id":"-421","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"residual_return","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret, bm_ret):\n from sklearn import linear_model\n idx = ret.index\n ret = np.array(ret.fillna(0)).reshape(-1,1)\n bm_ret = np.array(bm_ret.fillna(0)).reshape(-1,1)\n \n model = linear_model.LinearRegression().fit(ret, bm_ret)\n res = ret - model.intercept_ - bm_ret*model.coef_\n \n res = pd.Series(res.ravel(), index=idx)\n return res","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-421"}],"output_ports":[{"name":"functions","node_id":"-421"}],"cacheable":false,"seq_num":4,"comment":"","comment_collapsed":true},{"node_id":"-932","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RVar","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret):\n return np.power(ret, 2).sum()\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-932"}],"output_ports":[{"name":"functions","node_id":"-932"}],"cacheable":false,"seq_num":5,"comment":"","comment_collapsed":true}],"node_layout":"<node_postions><node_position Node='-331' Position='73.10943603515625,-2,200,200'/><node_position Node='-312' Position='-256,-4,200,200'/><node_position Node='-322' Position='68,214.75791931152344,200,200'/><node_position Node='-421' Position='408,-1,200,200'/><node_position Node='-932' Position='229.68304443359375,-117.67169952392578,200,200'/></node_postions>"},"nodes_readonly":false,"studio_version":"v2"}
    In [2]:
    # 本代码由可视化策略环境自动生成 2021年12月9日 11:30
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    def m5_func_bigquant_run(df, ret):
        return np.power(ret, 2).sum()
    
    def m4_func_bigquant_run(df, ret, bm_ret):
        from sklearn import linear_model
        idx = ret.index
        ret = np.array(ret.fillna(0)).reshape(-1,1)
        bm_ret = np.array(bm_ret.fillna(0)).reshape(-1,1)
        
        model = linear_model.LinearRegression().fit(ret, bm_ret)
        res = ret - model.intercept_ - bm_ret*model.coef_
        
        res = pd.Series(res.ravel(), index=idx)
        return res
    
    m1 = M.input_features.v1(
        features="""_ret_log = np.log(close.pct_change().fillna(method='bfill'))
    _ret = close.pct_change().fillna(method='bfill')
    _rk1 = _ret.loc[93500:100000]
    _rk2 = _ret.loc[110000:113000]
    _rk3 = _ret.loc[143000:145700]
    _bm_ret = (bar1m_000905_HIX__close).pct_change().fillna(method='bfill')
    
    # 高频波动- 收益波动因子
    ret_vol = np.power(RVar(_ret), 0.5)
    
    # 高频特征波动-特质波动因子
    _spec_ret = residual_return(_ret, _bm_ret)
    spec_ret_vol = np.power(RVar(_spec_ret), 0.5)
    
    # 高频系统波动-系统波动因子
    sys_vol = np.power(np.power(_ret - _spec_ret, 2).sum(), 0.5)
    
    # 高频特异度-特异度因子
    speci_ratio = where(RVar(_ret)==0, 1, np.power(_spec_ret, 2).sum() / RVar(_ret))
    
    # 高频上行波动-上行波动因子
    up_vol = np.sqrt(np.power(_ret*where(_ret>0, 1, 0), 2).sum())
    
    # 高频下行波动-下行波动因子
    down_vol = np.sqrt(np.power(_ret*where(_ret<0, 1, 0), 2).sum())
    
    # 上行波动占比因子
    #up_vol_ratio = where(RVar(_ret)==0, 1, up_vol / RVar(_ret)) # hcd 
    up_vol_ratio = where(RVar(_ret)==0, 1, np.power(up_vol,2) / RVar(_ret)) #sst
    
    
    # 下行波动占比因子
    #down_vol_ratio = where(RVar(_ret)==0, 1, down_vol / RVar(_ret)) #hcd
    down_vol_ratio = where(RVar(_ret)==0, 1, np.power(down_vol,2) / RVar(_ret)) #sst
    
    
    
    
    
    """
    )
    
    m2 = M.instruments.v2(
        start_date='2020-03-01',
        end_date='2020-05-01',
        market='CN_STOCK_A',
        instrument_list="""000001.SZA
    000002.SZA
    000005.SZA""",
        max_count=0
    )
    
    m5 = M.feature_extractor_user_function.v1(
        name='RVar',
        func=m5_func_bigquant_run
    )
    
    m4 = M.feature_extractor_user_function.v1(
        input_functions=m5.functions,
        name='residual_return',
        func=m4_func_bigquant_run
    )
    
    m3 = M.feature_extractor_1m.v1(
        instruments=m2.data,
        features=m1.data,
        user_functions=m4.functions,
        start_date='',
        end_date='',
        before_start_days=20,
        workers=2,
        parallel_mode='测试',
        table_1m='level2_bar1m_CN_STOCK_A'
    )
    
    In [3]:
    dt = m3.data.read()
    dt.shape, dt.date.min(), dt.date.max()
    dt
    
    Out[3]:
    ret_vol spec_ret_vol sys_vol speci_ratio up_vol down_vol up_vol_ratio down_vol_ratio date instrument
    0 0.015570 0.014746 0.003125 0.896883 0.011097 0.010922 0.507950 0.492050 2020-02-10 000001.SZA
    1 0.027859 0.027566 0.001219 0.979073 0.023013 0.015701 0.682372 0.317628 2020-02-10 000002.SZA
    2 0.046506 0.046388 0.000992 0.994917 0.032360 0.033402 0.484169 0.515831 2020-02-10 000005.SZA
    3 0.018749 0.018226 0.001981 0.944918 0.014083 0.012378 0.564189 0.435811 2020-02-11 000001.SZA
    4 0.025979 0.025731 0.001262 0.981011 0.019466 0.017205 0.561418 0.438582 2020-02-11 000002.SZA
    ... ... ... ... ... ... ... ... ... ... ...
    169 0.016196 0.016101 0.000529 0.988243 0.012253 0.010591 0.572347 0.427653 2020-04-29 000002.SZA
    170 0.039274 0.039265 0.000225 0.999552 0.027835 0.027707 0.502316 0.497684 2020-04-29 000005.SZA
    171 0.021652 0.021627 0.001225 0.997707 0.015422 0.015198 0.507341 0.492659 2020-04-30 000001.SZA
    172 0.014198 0.013727 0.001909 0.934818 0.010013 0.010066 0.497342 0.502658 2020-04-30 000002.SZA
    173 0.036632 0.036619 0.001008 0.999253 0.025681 0.026123 0.491458 0.508542 2020-04-30 000005.SZA

    174 rows × 10 columns

    In [45]:
    dt.head().T
    
    Out[45]:
    0 1 2 3 4
    ret_vol 0.01557 0.027859 0.046506 0.018749 0.025979
    spec_ret_vol 0.014746 0.027566 0.046388 0.018226 0.025731
    sys_vol 0.003125 0.001219 0.000992 0.001981 0.001262
    speci_ratio 0.896883 0.979073 0.994917 0.944918 0.981011
    up_vol 0.011097 0.023013 0.03236 0.014083 0.019466
    down_vol 0.010922 0.015701 0.033402 0.012378 0.017205
    up_vol_ratio 45.772846 29.650953 14.961852 40.0616 28.841484
    down_vol_ratio 45.050758 20.22962 15.443329 35.209896 25.491758
    date 2020-02-10 00:00:00 2020-02-10 00:00:00 2020-02-10 00:00:00 2020-02-11 00:00:00 2020-02-11 00:00:00
    instrument 000001.SZA 000002.SZA 000005.SZA 000001.SZA 000002.SZA
    In [46]:
    dt.isna().sum()
    
    Out[46]:
    ret_vol           0
    spec_ret_vol      0
    sys_vol           0
    speci_ratio       0
    up_vol            0
    down_vol          0
    up_vol_ratio      0
    down_vol_ratio    0
    date              0
    instrument        0
    dtype: int64
    In [47]:
    dt.describe().T
    
    Out[47]:
    count mean std min 25% 50% 75% max
    ret_vol 174.0 0.026355 0.010155 0.009907 0.017162 0.024761 0.034587 0.058712
    spec_ret_vol 174.0 0.025609 0.010203 0.009571 0.016751 0.022937 0.034161 0.058633
    sys_vol 174.0 0.002484 0.002775 0.000000 0.000902 0.001685 0.003176 0.018924
    speci_ratio 174.0 0.942030 0.083889 0.476786 0.917441 0.977648 0.997982 1.001193
    up_vol 174.0 0.019071 0.007660 0.007058 0.012720 0.018480 0.025069 0.049393
    down_vol 174.0 0.018069 0.006991 0.006952 0.011857 0.016529 0.024140 0.038609
    up_vol_ratio 174.0 31.828780 12.543239 12.332359 20.708571 29.478662 41.771338 71.915939
    down_vol_ratio 174.0 30.387620 12.370962 9.207603 19.928149 26.486481 38.840605 70.833633
    In [48]:
    dt2 = dt.set_index(['date','instrument'])
    dt2.head()
    
    Out[48]:
    ret_vol spec_ret_vol sys_vol speci_ratio up_vol down_vol up_vol_ratio down_vol_ratio
    date instrument
    2020-02-10 000001.SZA 0.015570 0.014746 0.003125 0.896883 0.011097 0.010922 45.772846 45.050758
    000002.SZA 0.027859 0.027566 0.001219 0.979073 0.023013 0.015701 29.650953 20.229620
    000005.SZA 0.046506 0.046388 0.000992 0.994917 0.032360 0.033402 14.961852 15.443329
    2020-02-11 000001.SZA 0.018749 0.018226 0.001981 0.944918 0.014083 0.012378 40.061600 35.209896
    000002.SZA 0.025979 0.025731 0.001262 0.981011 0.019466 0.017205 28.841484 25.491758
    In [49]:
    import seaborn as sns
    import matplotlib.pyplot as plt
    
    In [50]:
    # %%time
    n=4#行数
    m=dt2.shape[1]
    x=m%n
    fig,axes=plt.subplots(n,int((m - x + (n if x else x))/n),figsize=(20, 10))
    axes=axes.reshape((-1,))
    for i in range(dt2.shape[1]):    
        sns.distplot(dt2[dt2.columns[i]],ax=axes[i])
    
    In [51]:
    # for i in dt2.columns[:]:
    #     dt2[i].to_hdf(f'XX券商-XXXX研报-{i}.h5', 'fac', mode='w')
    
    In [ ]:
     
    
    In [ ]:
     
    
    In [ ]:
     
    
    In [ ]:
     
    
    In [ ]: