抽取数据当作因子参与训练报错

策略分享
标签: #<Tag:0x00007fa199d59ea8>

(developer) #1
克隆策略

利用原始数据和表达式引擎构建因子

1、3年平均净利润增长率

2、3年平均资产负债率

3、3年净利润连续增长

    {"Description":"实验创建于2019/4/22","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"-245:input_data","SourceOutputPortId":"-222:data"},{"DestinationInputPortId":"-222:features","SourceOutputPortId":"-232:data"},{"DestinationInputPortId":"-245:features","SourceOutputPortId":"-232:data"},{"DestinationInputPortId":"-2209:features","SourceOutputPortId":"-232:data"},{"DestinationInputPortId":"-222:instruments","SourceOutputPortId":"-236:data"},{"DestinationInputPortId":"-254:input_data","SourceOutputPortId":"-245:data"},{"DestinationInputPortId":"-279:input_ds","SourceOutputPortId":"-254:data"},{"DestinationInputPortId":"-287:input_data","SourceOutputPortId":"-279:sorted_data"},{"DestinationInputPortId":"-300:input_data","SourceOutputPortId":"-287:data"},{"DestinationInputPortId":"-287:features","SourceOutputPortId":"-295:data"},{"DestinationInputPortId":"-62:input_data","SourceOutputPortId":"-300:data"},{"DestinationInputPortId":"-304:input_1","SourceOutputPortId":"-62:data"},{"DestinationInputPortId":"-2209:training_ds","SourceOutputPortId":"-62:data"}],"ModuleNodes":[{"Id":"-222","ModuleId":"BigQuantSpace.use_datasource.use_datasource-v1","ModuleParameters":[{"Name":"datasource_id","Value":"financial_statement_CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-222"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-222"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-222","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":1,"Comment":"","CommentCollapsed":true},{"Id":"-232","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"\n# #号开始的表示注释,注释需单独一行\n# 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征\n\n# 财报季度编号\nfs_quarter_index\n# 净利率同比增长率\nfs_net_profit_yoy\n# 净利润\nfs_total_profit\n# 资产负债率\nzcfzl=(fs_current_liabilities+fs_non_current_liabilities)/(fs_current_assets+fs_non_current_assets)","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-232"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-232","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":2,"Comment":"","CommentCollapsed":true},{"Id":"-236","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2012-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2019-04-21","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":0,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"-236"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-236","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":3,"Comment":"","CommentCollapsed":true},{"Id":"-245","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-245"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-245"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-245","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":4,"Comment":"","CommentCollapsed":true},{"Id":"-254","ModuleId":"BigQuantSpace.filter.filter-v3","ModuleParameters":[{"Name":"expr","Value":"fs_quarter_index==4","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_left_data","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-254"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-254","OutputType":null},{"Name":"left_data","NodeId":"-254","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":5,"Comment":"","CommentCollapsed":true},{"Id":"-279","ModuleId":"BigQuantSpace.sort.sort-v4","ModuleParameters":[{"Name":"sort_by","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"group_by","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"keep_columns","Value":"--","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"ascending","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_ds","NodeId":"-279"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"sort_by_ds","NodeId":"-279"}],"OutputPortsInternal":[{"Name":"sorted_data","NodeId":"-279","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":9,"Comment":"","CommentCollapsed":true},{"Id":"-287","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-287"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-287"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-287","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":10,"Comment":"","CommentCollapsed":true},{"Id":"-295","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"\n# #号开始的表示注释,注释需单独一行\n# 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征\n#3年平均净利润增长率\npjjlyzzl=mean(fs_net_profit_yoy,3)\n#3年平均资产负债率\npjzcfzl=mean(zcfzl,3)\n# 3年净利润连续增长\nlrydz_3year=where((fs_total_profit>shift(fs_total_profit,1))&(shift(fs_total_profit,1)>shift(fs_total_profit,2)),1,0)","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-295"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-295","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":11,"Comment":"","CommentCollapsed":true},{"Id":"-300","ModuleId":"BigQuantSpace.filter.filter-v3","ModuleParameters":[{"Name":"expr","Value":"zcfzl>60","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_left_data","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-300"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-300","OutputType":null},{"Name":"left_data","NodeId":"-300","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":12,"Comment":"","CommentCollapsed":true},{"Id":"-304","ModuleId":"BigQuantSpace.df_to_csv.df_to_csv-v2","ModuleParameters":[{"Name":"name","Value":"财务数据选股.csv","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-304"}],"OutputPortsInternal":[],"UsePreviousResults":true,"moduleIdForCode":13,"Comment":"","CommentCollapsed":true},{"Id":"-62","ModuleId":"BigQuantSpace.filter.filter-v3","ModuleParameters":[{"Name":"expr","Value":"pjjlyzzl>0.15 and lrydz_3year>0 and pjzcfzl<0.25 and fs_net_cash_flow>0 and yyjlv>0.3","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_left_data","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-62"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-62","OutputType":null},{"Name":"left_data","NodeId":"-62","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":6,"Comment":"","CommentCollapsed":true},{"Id":"-2209","ModuleId":"BigQuantSpace.stock_ranker_train.stock_ranker_train-v6","ModuleParameters":[{"Name":"learning_algorithm","Value":"排序","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"number_of_leaves","Value":30,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"minimum_docs_per_leaf","Value":1000,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"number_of_trees","Value":20,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"learning_rate","Value":0.1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_bins","Value":1023,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"feature_fraction","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"data_row_fraction","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"ndcg_discount_base","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"m_lazy_run","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"training_ds","NodeId":"-2209"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-2209"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"test_ds","NodeId":"-2209"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"base_model","NodeId":"-2209"}],"OutputPortsInternal":[{"Name":"model","NodeId":"-2209","OutputType":null},{"Name":"feature_gains","NodeId":"-2209","OutputType":null},{"Name":"m_lazy_run","NodeId":"-2209","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":8,"Comment":"","CommentCollapsed":true}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='-222' Position='211.10165405273438,167.4066619873047,200,200'/><NodePosition Node='-232' Position='484.24578857421875,-20.881707191467285,200,200'/><NodePosition Node='-236' Position='81.55084228515625,52.60997772216797,200,200'/><NodePosition Node='-245' Position='243.44918823242188,245.23291015625,200,200'/><NodePosition Node='-254' Position='184,410,200,200'/><NodePosition Node='-279' Position='217,485,200,200'/><NodePosition Node='-287' Position='290,575,200,200'/><NodePosition Node='-295' Position='588.7966918945312,313.8428649902344,200,200'/><NodePosition Node='-300' Position='299,657,200,200'/><NodePosition Node='-304' Position='301.6229248046875,854.8557739257812,200,200'/><NodePosition Node='-62' Position='344,760,200,200'/><NodePosition Node='-2209' Position='468.2606506347656,999.8544311523438,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
    In [9]:
    # 本代码由可视化策略环境自动生成 2020年8月25日 16:51
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    m2 = M.input_features.v1(
        features="""
    # #号开始的表示注释,注释需单独一行
    # 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征
    
    # 财报季度编号
    fs_quarter_index
    # 净利率同比增长率
    fs_net_profit_yoy
    # 净利润
    fs_total_profit
    # 资产负债率
    zcfzl=(fs_current_liabilities+fs_non_current_liabilities)/(fs_current_assets+fs_non_current_assets)"""
    )
    
    m3 = M.instruments.v2(
        start_date='2012-01-01',
        end_date='2019-04-21',
        market='CN_STOCK_A',
        instrument_list='',
        max_count=0
    )
    
    m1 = M.use_datasource.v1(
        instruments=m3.data,
        features=m2.data,
        datasource_id='financial_statement_CN_STOCK_A',
        start_date='',
        end_date=''
    )
    
    m4 = M.derived_feature_extractor.v3(
        input_data=m1.data,
        features=m2.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions={}
    )
    
    m5 = M.filter.v3(
        input_data=m4.data,
        expr='fs_quarter_index==4',
        output_left_data=False
    )
    
    m9 = M.sort.v4(
        input_ds=m5.data,
        sort_by='date',
        group_by='instrument',
        keep_columns='--',
        ascending=True
    )
    
    m11 = M.input_features.v1(
        features="""
    # #号开始的表示注释,注释需单独一行
    # 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征
    #3年平均净利润增长率
    pjjlyzzl=mean(fs_net_profit_yoy,3)
    #3年平均资产负债率
    pjzcfzl=mean(zcfzl,3)
    # 3年净利润连续增长
    lrydz_3year=where((fs_total_profit>shift(fs_total_profit,1))&(shift(fs_total_profit,1)>shift(fs_total_profit,2)),1,0)"""
    )
    
    m10 = M.derived_feature_extractor.v3(
        input_data=m9.sorted_data,
        features=m11.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions={}
    )
    
    m12 = M.filter.v3(
        input_data=m10.data,
        expr='zcfzl>60',
        output_left_data=False
    )
    
    m6 = M.filter.v3(
        input_data=m12.data,
        expr='pjjlyzzl>0.15 and lrydz_3year>0 and pjzcfzl<0.25 and fs_net_cash_flow>0  and yyjlv>0.3',
        output_left_data=False
    )
    
    m13 = M.df_to_csv.v2(
        input_1=m6.data,
        name='财务数据选股.csv'
    )
    
    m8 = M.stock_ranker_train.v6(
        training_ds=m6.data,
        features=m2.data,
        learning_algorithm='排序',
        number_of_leaves=30,
        minimum_docs_per_leaf=1000,
        number_of_trees=20,
        learning_rate=0.1,
        max_bins=1023,
        feature_fraction=1,
        data_row_fraction=1,
        ndcg_discount_base=1,
        m_lazy_run=False
    )
    

    (tgu) #2

    使用非预计算因子,需要表名__字段名,比如financial_statement_CN_STOCK_A__fs_net_profit_yoy,但是这种原始财报数据直接使用可能会有对齐的问题。
    财报应该都是有预计算的因子的,比如你使用的fs_net_profit_yoy,有对应的因子fs_net_profit_yoy_0
    这个因子是做了清洗和填充的,可以直接用