克隆策略

    {"description":"实验创建于7/8/2021","graph":{"edges":[{"to_node_id":"-54:instruments","from_node_id":"-6:data"},{"to_node_id":"-65:input_data","from_node_id":"-54:data"},{"to_node_id":"-54:features","from_node_id":"-60:data"},{"to_node_id":"-65:features","from_node_id":"-60:data"},{"to_node_id":"-74:input_data","from_node_id":"-65:data"},{"to_node_id":"-92:input_data","from_node_id":"-65:data"}],"nodes":[{"node_id":"-6","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2021-06-01","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2021-07-01","type":"Literal","bound_global_parameter":null},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":0,"type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"-6"}],"output_ports":[{"name":"data","node_id":"-6"}],"cacheable":true,"seq_num":1,"comment":"","comment_collapsed":true},{"node_id":"-54","module_id":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":"0","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-54"},{"name":"features","node_id":"-54"}],"output_ports":[{"name":"data","node_id":"-54"}],"cacheable":true,"seq_num":2,"comment":"","comment_collapsed":true},{"node_id":"-60","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"\n# #号开始的表示注释,注释需单独一行\n# 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征\n\nrk = rank(volume_0)\nbins = cbins(volume_0,3)\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"-60"}],"output_ports":[{"name":"data","node_id":"-60"}],"cacheable":true,"seq_num":3,"comment":"","comment_collapsed":true},{"node_id":"-65","module_id":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","parameters":[{"name":"date_col","value":"date","type":"Literal","bound_global_parameter":null},{"name":"instrument_col","value":"instrument","type":"Literal","bound_global_parameter":null},{"name":"drop_na","value":"False","type":"Literal","bound_global_parameter":null},{"name":"remove_extra_columns","value":"False","type":"Literal","bound_global_parameter":null},{"name":"user_functions","value":"{}","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-65"},{"name":"features","node_id":"-65"}],"output_ports":[{"name":"data","node_id":"-65"}],"cacheable":true,"seq_num":4,"comment":"","comment_collapsed":true},{"node_id":"-74","module_id":"BigQuantSpace.filter.filter-v3","parameters":[{"name":"expr","value":"rk>=0.4 & rk<=0.6","type":"Literal","bound_global_parameter":null},{"name":"output_left_data","value":"False","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-74"}],"output_ports":[{"name":"data","node_id":"-74"},{"name":"left_data","node_id":"-74"}],"cacheable":true,"seq_num":5,"comment":"","comment_collapsed":true},{"node_id":"-92","module_id":"BigQuantSpace.filter.filter-v3","parameters":[{"name":"expr","value":"bins==1","type":"Literal","bound_global_parameter":null},{"name":"output_left_data","value":"False","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-92"}],"output_ports":[{"name":"data","node_id":"-92"},{"name":"left_data","node_id":"-92"}],"cacheable":true,"seq_num":6,"comment":"","comment_collapsed":true}],"node_layout":"<node_postions><node_position Node='-6' Position='278,-9.068674087524414,200,200'/><node_position Node='-54' Position='532,182,200,200'/><node_position Node='-60' Position='733,-10,200,200'/><node_position Node='-65' Position='508,310,200,200'/><node_position Node='-74' Position='315,447,200,200'/><node_position Node='-92' Position='885,447,200,200'/></node_postions>"},"nodes_readonly":false,"studio_version":"v2"}
    In [93]:
    # 本代码由可视化策略环境自动生成 2021年7月8日17:37
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    m1 = M.instruments.v2(
        start_date='2021-06-01',
        end_date='2021-07-01',
        market='CN_STOCK_A',
        instrument_list='',
        max_count=0
    )
    
    m3 = M.input_features.v1(
        features="""
    # #号开始的表示注释,注释需单独一行
    # 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征
    
    rk = rank(volume_0)
    bins = cbins(volume_0,3)
    """
    )
    
    m2 = M.general_feature_extractor.v7(
        instruments=m1.data,
        features=m3.data,
        start_date='',
        end_date='',
        before_start_days=0
    )
    
    m4 = M.derived_feature_extractor.v3(
        input_data=m2.data,
        features=m3.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions={}
    )
    
    m5 = M.filter.v3(
        input_data=m4.data,
        expr='rk>=0.4 & rk<=0.6',
        output_left_data=False
    )
    
    m6 = M.filter.v3(
        input_data=m4.data,
        expr='bins==1',
        output_left_data=False
    )
    
    In [94]:
    import pandas as pd
    pd.set_option('display.width', 5000)#一行显示所有列
    
    df_m4 = m4.data.read()
    print("抽取的数据:\n",df_m4)
    
    #按照某天把所有股票排序
    df=df_m4[df_m4["date"]=="2021-06-01"]
    df = df.sort_values("volume_0")
    
    print("\n某天所有股票的分组情况\n",df)
    
    # #按照某天把所有股票排序
    # df=df_m4[df_m4["date"]=="2021-06-01"]
    # print("\n某天所有股票的分组情况:\n",df["cbins(volume_0,3)"].describe())
    
    抽取的数据:
                 date  instrument  volume_0        rk  bins
    0     2021-06-01  000001.SZA  62501796  0.939007     2
    1     2021-06-02  000001.SZA  49752702  0.924218     2
    2     2021-06-03  000001.SZA  40090085  0.903659     2
    3     2021-06-04  000001.SZA  75673942  0.962260     2
    4     2021-06-07  000001.SZA  44791410  0.924751     2
    ...          ...         ...       ...       ...   ...
    95334 2021-06-25  689009.SHA   2002814  0.218721     0
    95335 2021-06-28  689009.SHA   3129153  0.325597     0
    95336 2021-06-29  689009.SHA   1957139  0.212121     0
    95337 2021-06-30  689009.SHA   1597740  0.186783     0
    95338 2021-07-01  689009.SHA   1721291  0.174771     0
    
    [95339 rows x 5 columns]
    
    某天所有股票的分组情况
                 date  instrument   volume_0        rk  bins
    58905 2021-06-01  600306.SHA      14800  0.000232     0
    88893 2021-06-01  605499.SHA     119255  0.000464     0
    81098 2021-06-01  603511.SHA     146280  0.000696     0
    25740 2021-06-01  002729.SZA     150300  0.000928     0
    58135 2021-06-01  600265.SHA     179926  0.001160     0
    ...          ...         ...        ...       ...   ...
    73017 2021-06-01  601377.SHA  416552478  0.999072     2
    5803  2021-06-01  000725.SZA  514200957  0.999304     2
    57365 2021-06-01  600221.SHA  544070476  0.999536     2
    53530 2021-06-01  600010.SHA  582612377  0.999768     2
    93549 2021-06-01  688538.SHA  610435036  1.000000     2
    
    [4312 rows x 5 columns]
    
    In [99]:
    df = m5.data.read()
    print("\n rank过滤结果:\n",df)
    
    df = m6.data.read()
    print("\n cbins过滤结果:\n",df)
    
     rank过滤结果:
                 date  instrument  volume_0        rk  bins
    46    2021-06-03  000004.SZA   5543127  0.462483     1
    48    2021-06-07  000004.SZA   8423939  0.594351     1
    51    2021-06-10  000004.SZA   8370235  0.576647     1
    52    2021-06-11  000004.SZA   5616145  0.444932     1
    58    2021-06-22  000004.SZA   7869339  0.566951     1
    ...          ...         ...       ...       ...   ...
    95278 2021-06-08  688819.SHA   4554925  0.420310     1
    95291 2021-06-28  688819.SHA   7010689  0.551241     1
    95292 2021-06-29  688819.SHA   6139080  0.518825     1
    95293 2021-06-30  688819.SHA   4724901  0.458008     1
    95294 2021-07-01  688819.SHA   5209162  0.452752     1
    
    [19071 rows x 5 columns]
    
     cbins过滤结果:
                 date  instrument  volume_0        rk  bins
    45    2021-06-02  000004.SZA   4712557  0.398841     1
    46    2021-06-03  000004.SZA   5543127  0.462483     1
    47    2021-06-04  000004.SZA   3281800  0.340820     1
    48    2021-06-07  000004.SZA   8423939  0.594351     1
    51    2021-06-10  000004.SZA   8370235  0.576647     1
    ...          ...         ...       ...       ...   ...
    95291 2021-06-28  688819.SHA   7010689  0.551241     1
    95292 2021-06-29  688819.SHA   6139080  0.518825     1
    95293 2021-06-30  688819.SHA   4724901  0.458008     1
    95294 2021-07-01  688819.SHA   5209162  0.452752     1
    95317 2021-06-01  689009.SHA   3885368  0.362941     1
    
    [31781 rows x 5 columns]
    
    In [ ]: