克隆策略

练习

  • 提取计算log(volume_0)因子的历史序列值
  • 按股票分组数据,每组数据分别向下缺失值填充
  • 删除未填充的NaN值行
  • 按日期截面对log(volume_0)做数据标准化

    {"description":"实验创建于2017/8/26","graph":{"edges":[{"to_node_id":"-107:instruments","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8:data"},{"to_node_id":"-54:input_2","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-243:input_2","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-107:features","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-114:features","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-114:input_data","from_node_id":"-107:data"},{"to_node_id":"-243:input_1","from_node_id":"-114:data"},{"to_node_id":"-261:input_data","from_node_id":"-243:data"},{"to_node_id":"-54:input_1","from_node_id":"-261:data"}],"nodes":[{"node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2015-01-01","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2016-01-01","type":"Literal","bound_global_parameter":null},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":"0","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8"}],"output_ports":[{"name":"data","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8"}],"cacheable":true,"seq_num":1,"comment":"","comment_collapsed":true},{"node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"# #号开始的表示注释\n# 多个特征,每行一个,可以包含基础特征和衍生特征\nlog(volume_0)","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24"}],"output_ports":[{"name":"data","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24"}],"cacheable":true,"seq_num":3,"comment":"","comment_collapsed":true},{"node_id":"-107","module_id":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":0,"type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-107"},{"name":"features","node_id":"-107"}],"output_ports":[{"name":"data","node_id":"-107"}],"cacheable":true,"seq_num":15,"comment":"","comment_collapsed":true},{"node_id":"-114","module_id":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","parameters":[{"name":"date_col","value":"date","type":"Literal","bound_global_parameter":null},{"name":"instrument_col","value":"instrument","type":"Literal","bound_global_parameter":null},{"name":"drop_na","value":"False","type":"Literal","bound_global_parameter":null},{"name":"remove_extra_columns","value":"False","type":"Literal","bound_global_parameter":null},{"name":"user_functions","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-114"},{"name":"features","node_id":"-114"}],"output_ports":[{"name":"data","node_id":"-114"}],"cacheable":true,"seq_num":16,"comment":"","comment_collapsed":true},{"node_id":"-54","module_id":"BigQuantSpace.standardlize.standardlize-v8","parameters":[{"name":"columns_input","value":"[]","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-54"},{"name":"input_2","node_id":"-54"}],"output_ports":[{"name":"data","node_id":"-54"}],"cacheable":true,"seq_num":5,"comment":"","comment_collapsed":true},{"node_id":"-243","module_id":"BigQuantSpace.fill_nan.fill_nan-v1","parameters":[{"name":"columns_input","value":"","type":"Literal","bound_global_parameter":null},{"name":"group_key","value":"['instrument']","type":"Literal","bound_global_parameter":null},{"name":"method","value":"向下填充","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-243"},{"name":"input_2","node_id":"-243"}],"output_ports":[{"name":"data","node_id":"-243"}],"cacheable":true,"seq_num":11,"comment":"后面各行缺失值使用前值填充","comment_collapsed":false},{"node_id":"-261","module_id":"BigQuantSpace.dropnan.dropnan-v1","parameters":[],"input_ports":[{"name":"input_data","node_id":"-261"}],"output_ports":[{"name":"data","node_id":"-261"}],"cacheable":true,"seq_num":6,"comment":"删除前面含有缺失值的行","comment_collapsed":false}],"node_layout":"<node_postions><node_position Node='287d2cb0-f53c-4101-bdf8-104b137c8601-8' Position='166,104,200,200'/><node_position Node='287d2cb0-f53c-4101-bdf8-104b137c8601-24' Position='569,80,200,200'/><node_position Node='-107' Position='381,185,200,200'/><node_position Node='-114' Position='388,262,200,200'/><node_position Node='-54' Position='403,506,200,200'/><node_position Node='-243' Position='405,328,200,200'/><node_position Node='-261' Position='394,414,200,200'/></node_postions>"},"nodes_readonly":false,"studio_version":"v2"}
    In [4]:
    # 本代码由可视化策略环境自动生成 2021年10月14日21:32
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    m1 = M.instruments.v2(
        start_date='2015-01-01',
        end_date='2016-01-01',
        market='CN_STOCK_A',
        instrument_list='',
        max_count=0
    )
    
    m3 = M.input_features.v1(
        features="""# #号开始的表示注释
    # 多个特征,每行一个,可以包含基础特征和衍生特征
    log(volume_0)"""
    )
    
    m15 = M.general_feature_extractor.v7(
        instruments=m1.data,
        features=m3.data,
        start_date='',
        end_date='',
        before_start_days=0
    )
    
    m16 = M.derived_feature_extractor.v3(
        input_data=m15.data,
        features=m3.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False
    )
    
    m11 = M.fill_nan.v1(
        input_1=m16.data,
        input_2=m3.data,
        group_key=['instrument'],
        method='向下填充'
    )
    
    m6 = M.dropnan.v1(
        input_data=m11.data
    )
    
    m5 = M.standardlize.v8(
        input_1=m6.data,
        input_2=m3.data,
        columns_input='[]'
    )
    
    In [5]:
    '''查看缺失数据处理后的结果'''
    m6.data.read_df().head()
    
    Out[5]:
    date instrument volume_0 log(volume_0)
    0 2015-01-05 000001.SZA 286043643 19.471655
    1 2015-01-06 000001.SZA 216642140 19.193757
    2 2015-01-07 000001.SZA 170012067 18.951380
    3 2015-01-08 000001.SZA 140771421 18.762648
    4 2015-01-09 000001.SZA 250850023 19.340366