克隆策略

    {"description":"实验创建于2017/8/26","graph":{"edges":[{"to_node_id":"-322:features","from_node_id":"-331:data"},{"to_node_id":"-322:instruments","from_node_id":"-312:data"},{"to_node_id":"-1136:input_data","from_node_id":"-322:data"},{"to_node_id":"-322:user_functions","from_node_id":"-932:functions"},{"to_node_id":"-932:input_functions","from_node_id":"-937:functions"},{"to_node_id":"-937:input_functions","from_node_id":"-1111:functions"},{"to_node_id":"-1111:input_functions","from_node_id":"-1116:functions"},{"to_node_id":"-1116:input_functions","from_node_id":"-1121:functions"},{"to_node_id":"-1121:input_functions","from_node_id":"-1126:functions"},{"to_node_id":"-1136:features","from_node_id":"-1131:data"},{"to_node_id":"-1126:input_functions","from_node_id":"-584:functions"}],"nodes":[{"node_id":"-331","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"_c = close\n_o = open\n_ret = ret_sim(_c, _o)\n_rk1 = _ret.loc[93500:100000]\n_rk2 = _ret.loc[110000:113000]\n_rk3 = _ret.loc[143000:145700]\n\n# 1 收益方差\nRVar1 = RVar1(_ret)\nRVar2 = RVar2(_ret)\nRVar3 = (RVar2(_rk1) + RVar2(_rk2) + RVar2(_rk3)) / 3\n\n# 2 收益偏度\nRSkew1 = RSkew1(_ret, RVar1)\nRSkew2 = RSkew2(_ret, RVar2)\nRSkew3 = (RSkew2(_rk1, RVar2) + RSkew2(_rk2, RVar2) + RSkew2(_rk3, RVar2)) / 3\n\n\n# 3 收益峰度\nRKurt1 = RKurt1(_ret, RVar1)\nRKurt2 = RKurt2(_ret, RVar2)\nRKurt3 = (RKurt2(_rk1, RVar2) + RKurt2(_rk2, RVar2) + RKurt2(_rk3, RVar2)) / 3\n\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"-331"}],"output_ports":[{"name":"data","node_id":"-331"}],"cacheable":true,"seq_num":1,"comment":"","comment_collapsed":true},{"node_id":"-312","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2014-03-03","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2021-12-31","type":"Literal","bound_global_parameter":null},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"000001.SZA\n000002.SZA\n000005.SZA","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":0,"type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"-312"}],"output_ports":[{"name":"data","node_id":"-312"}],"cacheable":true,"seq_num":2,"comment":"","comment_collapsed":true},{"node_id":"-322","module_id":"BigQuantSpace.feature_extractor_1m.feature_extractor_1m-v1","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":"10","type":"Literal","bound_global_parameter":null},{"name":"workers","value":2,"type":"Literal","bound_global_parameter":null},{"name":"parallel_mode","value":"测试","type":"Literal","bound_global_parameter":null},{"name":"table_1m","value":"level2_bar1m_CN_STOCK_A","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-322"},{"name":"features","node_id":"-322"},{"name":"user_functions","node_id":"-322"}],"output_ports":[{"name":"data","node_id":"-322"}],"cacheable":true,"seq_num":3,"comment":"","comment_collapsed":true},{"node_id":"-932","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RVar1","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret):\n return (ret**2).sum()\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-932"}],"output_ports":[{"name":"functions","node_id":"-932"}],"cacheable":false,"seq_num":4,"comment":"","comment_collapsed":true},{"node_id":"-937","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RVar2","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret):\n return np.power(ret - (ret.mean()), 2).sum()\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-937"}],"output_ports":[{"name":"functions","node_id":"-937"}],"cacheable":false,"seq_num":5,"comment":"","comment_collapsed":true},{"node_id":"-1111","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RSkew1","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret, RVar):\n if RVar == 0:\n result = 0\n else:\n result = (np.power(ret, 3).sum()) * np.sqrt(240) / np.power(RVar, 1.5)\n return result\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-1111"}],"output_ports":[{"name":"functions","node_id":"-1111"}],"cacheable":false,"seq_num":6,"comment":"","comment_collapsed":true},{"node_id":"-1116","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RSkew2","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret, RVar):\n if RVar == 0:\n result = 0\n else:\n result = (np.power(ret - (ret.mean()), 3).sum()) * np.sqrt(240) / np.power(RVar, 1.5)\n return result\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-1116"}],"output_ports":[{"name":"functions","node_id":"-1116"}],"cacheable":false,"seq_num":7,"comment":"","comment_collapsed":true},{"node_id":"-1121","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RKurt1","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret, RVar):\n if RVar == 0:\n result = 1\n else:\n result = (np.power(ret, 4).sum()) * (240) / np.power(RVar, 2)\n \n return result\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-1121"}],"output_ports":[{"name":"functions","node_id":"-1121"}],"cacheable":false,"seq_num":8,"comment":"","comment_collapsed":true},{"node_id":"-1126","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"RKurt2","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, ret, RVar):\n if RVar == 0:\n result = 1\n else:\n result = (np.power(ret - (ret.mean()), 4).sum()) * (240) / np.power(RVar, 2)\n return result\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-1126"}],"output_ports":[{"name":"functions","node_id":"-1126"}],"cacheable":false,"seq_num":9,"comment":"","comment_collapsed":true},{"node_id":"-1131","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"RVar1_mean = nanmean(RVar1, 5)\nRVar2_mean = nanmean(RVar2, 5)\nRVar3_mean = nanmean(RVar3, 5)\n\nRSkew1_mean = nanmean(RSkew1, 5)\nRSkew2_mean = nanmean(RSkew2, 5)\nRSkew3_mean = nanmean(RSkew3, 5)\n\nRKurt1_mean = nanmean(RKurt1, 5)\nRKurt2_mean = nanmean(RKurt2, 5)\nRKurt3_mean = nanmean(RKurt3, 5)","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"-1131"}],"output_ports":[{"name":"data","node_id":"-1131"}],"cacheable":true,"seq_num":10,"comment":"","comment_collapsed":true},{"node_id":"-1136","module_id":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","parameters":[{"name":"date_col","value":"date","type":"Literal","bound_global_parameter":null},{"name":"instrument_col","value":"instrument","type":"Literal","bound_global_parameter":null},{"name":"drop_na","value":"False","type":"Literal","bound_global_parameter":null},{"name":"remove_extra_columns","value":"False","type":"Literal","bound_global_parameter":null},{"name":"user_functions","value":"def nanmean(df, x_name, N):\n return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).mean())\n\nbigquant_run = {\n 'nanmean': nanmean\n}","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-1136"},{"name":"features","node_id":"-1136"}],"output_ports":[{"name":"data","node_id":"-1136"}],"cacheable":true,"seq_num":11,"comment":"","comment_collapsed":true},{"node_id":"-584","module_id":"BigQuantSpace.feature_extractor_user_function.feature_extractor_user_function-v1","parameters":[{"name":"name","value":"ret_sim","type":"Literal","bound_global_parameter":null},{"name":"func","value":"def bigquant_run(df, close, op):\n res = close.pct_change()\n res.iloc[0] = close.iloc[0] / op.iloc[0] - 1\n return res\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_functions","node_id":"-584"}],"output_ports":[{"name":"functions","node_id":"-584"}],"cacheable":false,"seq_num":12,"comment":"","comment_collapsed":true}],"node_layout":"<node_postions><node_position Node='-331' Position='79,-5,200,200'/><node_position Node='-312' Position='-256,-2,200,200'/><node_position Node='-322' Position='64,213,200,200'/><node_position Node='-932' Position='64,-114,200,200'/><node_position Node='-937' Position='65,-189,200,200'/><node_position Node='-1111' Position='66,-264,200,200'/><node_position Node='-1116' Position='69,-342,200,200'/><node_position Node='-1121' Position='56,-442,200,200'/><node_position Node='-1126' Position='50,-517,200,200'/><node_position Node='-1131' Position='394,202,200,200'/><node_position Node='-1136' Position='305,333,200,200'/><node_position Node='-584' Position='48.76287841796875,-603.71337890625,200,200'/></node_postions>"},"nodes_readonly":false,"studio_version":"v2"}
    In [17]:
    # 本代码由可视化策略环境自动生成 2021年7月9日15:34
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    def m12_func_bigquant_run(df, close, op):
        res = close.pct_change()
        res.iloc[0] = close.iloc[0] / op.iloc[0] - 1
        return res
    
    def m9_func_bigquant_run(df, ret, RVar):
        if RVar == 0:
            result = 1
        else:
            result = (np.power(ret - (ret.mean()), 4).sum()) * (240) / np.power(RVar, 2)
        return result
    
    def m8_func_bigquant_run(df, ret, RVar):
        if RVar == 0:
            result = 1
        else:
            result = (np.power(ret, 4).sum()) * (240) / np.power(RVar, 2)
        
        return result
    
    def m7_func_bigquant_run(df, ret, RVar):
        if RVar == 0:
            result = 0
        else:
            result = (np.power(ret - (ret.mean()), 3).sum()) * np.sqrt(240) / np.power(RVar, 1.5)
        return result
    
    def m6_func_bigquant_run(df, ret, RVar):
        if RVar == 0:
            result = 0
        else:
            result = (np.power(ret, 3).sum()) * np.sqrt(240) / np.power(RVar, 1.5)
        return result
    
    def m5_func_bigquant_run(df, ret):
        return np.power(ret - (ret.mean()), 2).sum()
    
    def m4_func_bigquant_run(df, ret):
        return (ret**2).sum()
    
    def nanmean(df, x_name, N):
        return df.groupby('instrument')[x_name.name].apply(lambda x: x.rolling(N,1).mean())
    
    m11_user_functions_bigquant_run = {
        'nanmean': nanmean
    }
    
    m1 = M.input_features.v1(
        features="""_c = close
    _o = open
    _ret = ret_sim(_c, _o)
    _rk1 = _ret.loc[93500:100000]
    _rk2 = _ret.loc[110000:113000]
    _rk3 = _ret.loc[143000:145700]
    
    # 1 收益方差
    RVar1 = RVar1(_ret)
    RVar2 = RVar2(_ret)
    RVar3 = (RVar2(_rk1) + RVar2(_rk2) + RVar2(_rk3)) / 3
    
    # 2 收益偏度
    RSkew1 = RSkew1(_ret, RVar1)
    RSkew2 = RSkew2(_ret, RVar2)
    RSkew3 = (RSkew2(_rk1, RVar2) + RSkew2(_rk2, RVar2) + RSkew2(_rk3, RVar2)) / 3
    
    
    # 3 收益峰度
    RKurt1 = RKurt1(_ret, RVar1)
    RKurt2 = RKurt2(_ret, RVar2)
    RKurt3 = (RKurt2(_rk1, RVar2) + RKurt2(_rk2, RVar2) + RKurt2(_rk3, RVar2)) / 3
    
    """
    )
    
    m2 = M.instruments.v2(
        start_date='2014-03-03',
        end_date='2021-12-31',
        market='CN_STOCK_A',
        instrument_list="""000001.SZA
    000002.SZA
    000005.SZA""",
        max_count=0
    )
    
    m10 = M.input_features.v1(
        features="""RVar1_mean = nanmean(RVar1, 5)
    RVar2_mean = nanmean(RVar2, 5)
    RVar3_mean = nanmean(RVar3, 5)
    
    RSkew1_mean = nanmean(RSkew1, 5)
    RSkew2_mean = nanmean(RSkew2, 5)
    RSkew3_mean = nanmean(RSkew3, 5)
    
    RKurt1_mean = nanmean(RKurt1, 5)
    RKurt2_mean = nanmean(RKurt2, 5)
    RKurt3_mean = nanmean(RKurt3, 5)"""
    )
    
    m12 = M.feature_extractor_user_function.v1(
        name='ret_sim',
        func=m12_func_bigquant_run
    )
    
    m9 = M.feature_extractor_user_function.v1(
        input_functions=m12.functions,
        name='RKurt2',
        func=m9_func_bigquant_run
    )
    
    m8 = M.feature_extractor_user_function.v1(
        input_functions=m9.functions,
        name='RKurt1',
        func=m8_func_bigquant_run
    )
    
    m7 = M.feature_extractor_user_function.v1(
        input_functions=m8.functions,
        name='RSkew2',
        func=m7_func_bigquant_run
    )
    
    m6 = M.feature_extractor_user_function.v1(
        input_functions=m7.functions,
        name='RSkew1',
        func=m6_func_bigquant_run
    )
    
    m5 = M.feature_extractor_user_function.v1(
        input_functions=m6.functions,
        name='RVar2',
        func=m5_func_bigquant_run
    )
    
    m4 = M.feature_extractor_user_function.v1(
        input_functions=m5.functions,
        name='RVar1',
        func=m4_func_bigquant_run
    )
    
    m3 = M.feature_extractor_1m.v1(
        instruments=m2.data,
        features=m1.data,
        user_functions=m4.functions,
        start_date='',
        end_date='',
        before_start_days=10,
        workers=2,
        parallel_mode='测试',
        table_1m='level2_bar1m_CN_STOCK_A'
    )
    
    m11 = M.derived_feature_extractor.v3(
        input_data=m3.data,
        features=m10.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions=m11_user_functions_bigquant_run
    )
    
    In [18]:
    dt = m11.data.read()
    dt.shape, dt.date.min(), dt.date.max()
    
    Out[18]:
    ((729, 20), Timestamp('2020-01-02 00:00:00'), Timestamp('2020-12-31 00:00:00'))
    In [19]:
    dt.head().T
    
    Out[19]:
    0 1 2 3 4
    RVar1 0.000431 0.000587 0.001088 0.00027 0.000276
    RVar2 0.000431 0.000587 0.001087 0.000269 0.000275
    RVar3 0.000038 0.000067 0.000115 0.000025 0.000027
    RSkew1 1.047927 0.069402 0.052861 1.105082 0.102527
    RSkew2 0.926045 0.125816 -0.006688 0.94235 0.340594
    RSkew3 0.035828 -0.1152 0.005461 0.012674 0.02601
    RKurt1 8.230084 8.235238 2.750429 7.394212 7.387867
    RKurt2 8.09438 8.248393 2.750763 7.210986 7.550165
    RKurt3 0.302805 0.880233 0.236959 0.302917 0.434376
    date 2020-01-02 00:00:00 2020-01-02 00:00:00 2020-01-02 00:00:00 2020-01-03 00:00:00 2020-01-03 00:00:00
    instrument 000001.SZA 000002.SZA 000005.SZA 000001.SZA 000002.SZA
    RVar1_mean 0.000431 0.000587 0.001088 0.000351 0.000432
    RVar2_mean 0.000431 0.000587 0.001087 0.00035 0.000431
    RVar3_mean 0.000038 0.000067 0.000115 0.000032 0.000047
    RSkew1_mean 1.047927 0.069402 0.052861 1.076504 0.085965
    RSkew2_mean 0.926045 0.125816 -0.006688 0.934197 0.233205
    RSkew3_mean 0.035828 -0.1152 0.005461 0.024251 -0.044595
    RKurt1_mean 8.230084 8.235238 2.750429 7.812148 7.811553
    RKurt2_mean 8.09438 8.248393 2.750763 7.652683 7.899279
    RKurt3_mean 0.302805 0.880233 0.236959 0.302861 0.657304
    In [20]:
    dt.isna().sum()
    
    Out[20]:
    RVar1          0
    RVar2          0
    RVar3          0
    RSkew1         0
    RSkew2         0
    RSkew3         0
    RKurt1         0
    RKurt2         0
    RKurt3         0
    date           0
    instrument     0
    RVar1_mean     0
    RVar2_mean     0
    RVar3_mean     0
    RSkew1_mean    0
    RSkew2_mean    0
    RSkew3_mean    0
    RKurt1_mean    0
    RKurt2_mean    0
    RKurt3_mean    0
    dtype: int64
    In [21]:
    dt.describe().T
    
    Out[21]:
    count mean std min 25% 50% 75% max
    RVar1 729.0 0.000755 0.000607 0.000000 0.000273 0.000533 0.001194 0.004743
    RVar2 729.0 0.000754 0.000606 0.000000 0.000273 0.000530 0.001193 0.004740
    RVar3 729.0 0.000092 0.000077 0.000000 0.000034 0.000068 0.000141 0.000830
    RSkew1 729.0 0.181832 1.009579 -5.650973 -0.161911 0.071266 0.477509 9.137768
    RSkew2 729.0 0.181950 0.954902 -5.596322 -0.130481 0.073321 0.414229 8.972824
    RSkew3 729.0 0.018801 0.094182 -0.456704 -0.016899 0.004197 0.042500 0.810618
    RKurt1 729.0 6.856221 7.767365 1.000000 3.610083 4.902572 6.959804 110.387276
    RKurt2 729.0 6.811771 7.681253 1.000000 3.602799 4.911075 6.927811 109.074507
    RKurt3 729.0 0.666789 0.625066 0.026559 0.324698 0.468799 0.764274 6.877303
    RVar1_mean 729.0 0.000754 0.000525 0.000111 0.000295 0.000544 0.001239 0.002458
    RVar2_mean 729.0 0.000752 0.000525 0.000110 0.000295 0.000544 0.001238 0.002447
    RVar3_mean 729.0 0.000092 0.000063 0.000015 0.000036 0.000069 0.000145 0.000314
    RSkew1_mean 729.0 0.182745 0.448256 -1.608074 -0.039612 0.122743 0.375442 1.961031
    RSkew2_mean 729.0 0.183152 0.420955 -1.586971 -0.025254 0.121888 0.368437 1.933729
    RSkew3_mean 729.0 0.018477 0.042737 -0.115200 -0.005227 0.010453 0.034867 0.220185
    RKurt1_mean 729.0 6.869545 3.935674 2.579543 4.347250 5.937566 7.914999 28.760960
    RKurt2_mean 729.0 6.824995 3.897658 2.579571 4.346171 5.892742 7.884892 28.571781
    RKurt3_mean 729.0 0.665326 0.329462 0.236959 0.420258 0.584520 0.828738 2.243435
    In [22]:
    dt2 = dt.set_index(['date','instrument'])
    dt2.head()
    
    Out[22]:
    RVar1 RVar2 RVar3 RSkew1 RSkew2 RSkew3 RKurt1 RKurt2 RKurt3 RVar1_mean RVar2_mean RVar3_mean RSkew1_mean RSkew2_mean RSkew3_mean RKurt1_mean RKurt2_mean RKurt3_mean
    date instrument
    2020-01-02 000001.SZA 0.000431 0.000431 0.000038 1.047927 0.926045 0.035828 8.230084 8.094380 0.302805 0.000431 0.000431 0.000038 1.047927 0.926045 0.035828 8.230084 8.094380 0.302805
    000002.SZA 0.000587 0.000587 0.000067 0.069402 0.125816 -0.115200 8.235238 8.248393 0.880233 0.000587 0.000587 0.000067 0.069402 0.125816 -0.115200 8.235238 8.248393 0.880233
    000005.SZA 0.001088 0.001087 0.000115 0.052861 -0.006688 0.005461 2.750429 2.750763 0.236959 0.001088 0.001087 0.000115 0.052861 -0.006688 0.005461 2.750429 2.750763 0.236959
    2020-01-03 000001.SZA 0.000270 0.000269 0.000025 1.105082 0.942350 0.012674 7.394212 7.210986 0.302917 0.000351 0.000350 0.000032 1.076504 0.934197 0.024251 7.812148 7.652683 0.302861
    000002.SZA 0.000276 0.000275 0.000027 0.102527 0.340594 0.026010 7.387867 7.550165 0.434376 0.000432 0.000431 0.000047 0.085965 0.233205 -0.044595 7.811553 7.899279 0.657304
    In [23]:
    import seaborn as sns
    import matplotlib.pyplot as plt
    
    In [24]:
    # %%time
    n=4#行数
    m=dt2.shape[1]
    x=m%n
    fig,axes=plt.subplots(n,int((m - x + (n if x else x))/n),figsize=(20, 10))
    axes=axes.reshape((-1,))
    for i in range(dt2.shape[1]):    
        sns.distplot(dt2[dt2.columns[i]],ax=axes[i])
    plt.subplots_adjust(wspace =0.3, hspace =0.3)
    
    In [25]:
    # for i in dt2.columns[:]:
    #     dt2[i].to_hdf(f'XX券商-XXXX研报-{i}.h5', 'fac', mode='w')
    
    In [ ]:
     
    
    In [ ]:
     
    
    In [ ]:
     
    
    In [ ]:
     
    
    In [ ]: