抽取数据当作因子报错

策略分享
标签: #<Tag:0x00007fa1aa3977b0>

(developer) #1

老师帮忙看看

克隆策略
In [3]:
DataSource("bar30m_CN_STOCK_A").read("000001.SZA", start_date="2019-01-01", end_date="2020-01-10").head(10)
Out[3]:
date open close high low volume amount instrument
0 2019-01-02 10:00:00 9.34 9.34 9.34 9.34 0.0 0.0 000001.SZA
1 2019-01-02 10:30:00 9.34 9.34 9.34 9.34 0.0 0.0 000001.SZA
2 2019-01-02 11:00:00 9.34 9.34 9.34 9.34 0.0 0.0 000001.SZA
3 2019-01-02 11:30:00 9.34 9.34 9.34 9.34 0.0 0.0 000001.SZA
4 2019-01-02 13:30:00 9.34 9.34 9.34 9.34 0.0 0.0 000001.SZA
5 2019-01-02 14:00:00 9.34 9.34 9.34 9.34 0.0 0.0 000001.SZA
6 2019-01-02 14:30:00 9.34 9.34 9.34 9.34 0.0 0.0 000001.SZA
7 2019-01-02 15:00:00 9.34 9.34 9.34 9.34 0.0 0.0 000001.SZA
8 2019-01-03 10:00:00 9.34 9.34 9.34 9.34 0.0 0.0 000001.SZA
9 2019-01-03 10:30:00 9.34 9.34 9.34 9.34 0.0 0.0 000001.SZA

    {"Description":"实验创建于2020/8/19","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"-301:input_data","SourceOutputPortId":"-283:data"},{"DestinationInputPortId":"-301:features","SourceOutputPortId":"-289:data"},{"DestinationInputPortId":"-240:input_ds","SourceOutputPortId":"-301:data"},{"DestinationInputPortId":"-187:input_data","SourceOutputPortId":"-35:data"},{"DestinationInputPortId":"-54:features","SourceOutputPortId":"-41:data"},{"DestinationInputPortId":"-61:features","SourceOutputPortId":"-41:data"},{"DestinationInputPortId":"-54:instruments","SourceOutputPortId":"-45:data"},{"DestinationInputPortId":"-218:instruments","SourceOutputPortId":"-45:data"},{"DestinationInputPortId":"-61:input_data","SourceOutputPortId":"-54:data"},{"DestinationInputPortId":"-229:input_2","SourceOutputPortId":"-61:data"},{"DestinationInputPortId":"-195:training_ds","SourceOutputPortId":"-187:data"},{"DestinationInputPortId":"-229:input_1","SourceOutputPortId":"-218:data"},{"DestinationInputPortId":"-35:data2","SourceOutputPortId":"-229:data"},{"DestinationInputPortId":"-195:features","SourceOutputPortId":"-235:data"},{"DestinationInputPortId":"-35:data1","SourceOutputPortId":"-240:data"}],"ModuleNodes":[{"Id":"-283","ModuleId":"BigQuantSpace.use_datasource.use_datasource-v1","ModuleParameters":[{"Name":"datasource_id","Value":"dragon_stock","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"2019-1-1","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2020-1-31","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-283"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-283"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-283","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":1,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-289","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"\n# #号开始的表示注释,注释需单独一行\n# 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征\na = in_lhb_10d + 1\nb = kind_code\n\n","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-289"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-289","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":2,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-301","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-301"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-301"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-301","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":4,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-35","ModuleId":"BigQuantSpace.join.join-v3","ModuleParameters":[{"Name":"on","Value":"date,instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"how","Value":"inner","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"sort","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data1","NodeId":"-35"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data2","NodeId":"-35"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-35","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":3,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-41","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"\n# #号开始的表示注释,注释需单独一行\n# 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征\nreturn_5\n","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-41"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-41","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":5,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-45","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2019-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2020-01-31","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":0,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"-45"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-45","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":6,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-54","ModuleId":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_start_days","Value":90,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-54"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-54"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-54","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":7,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-61","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-61"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-61"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-61","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":8,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-187","ModuleId":"BigQuantSpace.dropnan.dropnan-v2","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-187"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-187"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-187","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":9,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-195","ModuleId":"BigQuantSpace.stock_ranker_train.stock_ranker_train-v6","ModuleParameters":[{"Name":"learning_algorithm","Value":"排序","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"number_of_leaves","Value":30,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"minimum_docs_per_leaf","Value":1000,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"number_of_trees","Value":20,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"learning_rate","Value":0.1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_bins","Value":1023,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"feature_fraction","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"data_row_fraction","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"ndcg_discount_base","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"m_lazy_run","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"training_ds","NodeId":"-195"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-195"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"test_ds","NodeId":"-195"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"base_model","NodeId":"-195"}],"OutputPortsInternal":[{"Name":"model","NodeId":"-195","OutputType":null},{"Name":"feature_gains","NodeId":"-195","OutputType":null},{"Name":"m_lazy_run","NodeId":"-195","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":10,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-218","ModuleId":"BigQuantSpace.advanced_auto_labeler.advanced_auto_labeler-v2","ModuleParameters":[{"Name":"label_expr","Value":"# #号开始的表示注释\n# 0. 每行一个,顺序执行,从第二个开始,可以使用label字段\n# 1. 可用数据字段见 https://bigquant.com/docs/develop/datasource/deprecated/history_data.html\n# 添加benchmark_前缀,可使用对应的benchmark数据\n# 2. 可用操作符和函数见 `表达式引擎 <https://bigquant.com/docs/develop/bigexpr/usage.html>`_\n\n# 计算收益:5日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)\nshift(close, -5) / shift(open, -1)\n\n# 极值处理:用1%和99%分位的值做clip\nclip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))\n\n# 将分数映射到分类,这里使用20个分类\nall_wbins(label, 20)\n\n# 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)\nwhere(shift(high, -1) == shift(low, -1), NaN, label)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"benchmark","Value":"000300.SHA","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na_label","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"cast_label_int","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-218"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-218","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":12,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-229","ModuleId":"BigQuantSpace.data_join.data_join-v3","ModuleParameters":[{"Name":"on","Value":"date,instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"how","Value":"inner","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"sort","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-229"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-229"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-229","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":13,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-235","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"\n# #号开始的表示注释,注释需单独一行\n# 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征\nreturn_5\na\nb","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-235"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-235","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":14,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-240","ModuleId":"BigQuantSpace.select_columns.select_columns-v3","ModuleParameters":[{"Name":"columns","Value":"date,instrument,a,b","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"reverse_select","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_ds","NodeId":"-240"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"columns_ds","NodeId":"-240"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-240","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":15,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='-283' Position='726,81,200,200'/><NodePosition Node='-289' Position='1022,-38,200,200'/><NodePosition Node='-301' Position='988,178,200,200'/><NodePosition Node='-35' Position='1219,387,200,200'/><NodePosition Node='-41' Position='1729,53,200,200'/><NodePosition Node='-45' Position='1353,47,200,200'/><NodePosition Node='-54' Position='1656,139,200,200'/><NodePosition Node='-61' Position='1652,229,200,200'/><NodePosition Node='-187' Position='1212,469,200,200'/><NodePosition Node='-195' Position='1405,565,200,200'/><NodePosition Node='-218' Position='1288,142,200,200'/><NodePosition Node='-229' Position='1433,304,200,200'/><NodePosition Node='-235' Position='1610,439,200,200'/><NodePosition Node='-240' Position='1010,266,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
    In [4]:
    # 本代码由可视化策略环境自动生成 2020年8月27日 10:01
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    m1 = M.use_datasource.v1(
        datasource_id='dragon_stock',
        start_date='2019-1-1',
        end_date='2020-1-31'
    )
    
    m2 = M.input_features.v1(
        features="""
    # #号开始的表示注释,注释需单独一行
    # 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征
    a = in_lhb_10d + 1
    b = kind_code
    
    """
    )
    
    m4 = M.derived_feature_extractor.v3(
        input_data=m1.data,
        features=m2.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions={}
    )
    
    m15 = M.select_columns.v3(
        input_ds=m4.data,
        columns='date,instrument,a,b',
        reverse_select=False
    )
    
    m5 = M.input_features.v1(
        features="""
    # #号开始的表示注释,注释需单独一行
    # 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征
    return_5
    """
    )
    
    m6 = M.instruments.v2(
        start_date='2019-01-01',
        end_date='2020-01-31',
        market='CN_STOCK_A',
        instrument_list='',
        max_count=0
    )
    
    m7 = M.general_feature_extractor.v7(
        instruments=m6.data,
        features=m5.data,
        start_date='',
        end_date='',
        before_start_days=90
    )
    
    m8 = M.derived_feature_extractor.v3(
        input_data=m7.data,
        features=m5.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions={}
    )
    
    m12 = M.advanced_auto_labeler.v2(
        instruments=m6.data,
        label_expr="""# #号开始的表示注释
    # 0. 每行一个,顺序执行,从第二个开始,可以使用label字段
    # 1. 可用数据字段见 https://bigquant.com/docs/develop/datasource/deprecated/history_data.html
    #   添加benchmark_前缀,可使用对应的benchmark数据
    # 2. 可用操作符和函数见 `表达式引擎 <https://bigquant.com/docs/develop/bigexpr/usage.html>`_
    
    # 计算收益:5日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)
    shift(close, -5) / shift(open, -1)
    
    # 极值处理:用1%和99%分位的值做clip
    clip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))
    
    # 将分数映射到分类,这里使用20个分类
    all_wbins(label, 20)
    
    # 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)
    where(shift(high, -1) == shift(low, -1), NaN, label)
    """,
        start_date='',
        end_date='',
        benchmark='000300.SHA',
        drop_na_label=True,
        cast_label_int=True,
        user_functions={}
    )
    
    m13 = M.data_join.v3(
        input_1=m12.data,
        input_2=m8.data,
        on='date,instrument',
        how='inner',
        sort=False
    )
    
    m3 = M.join.v3(
        data1=m15.data,
        data2=m13.data,
        on='date,instrument',
        how='inner',
        sort=False
    )
    
    m9 = M.dropnan.v2(
        input_data=m3.data
    )
    
    m14 = M.input_features.v1(
        features="""
    # #号开始的表示注释,注释需单独一行
    # 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征
    return_5
    a
    b"""
    )
    
    m10 = M.stock_ranker_train.v6(
        training_ds=m9.data,
        features=m14.data,
        learning_algorithm='排序',
        number_of_leaves=30,
        minimum_docs_per_leaf=1000,
        number_of_trees=20,
        learning_rate=0.1,
        max_bins=1023,
        feature_fraction=1,
        data_row_fraction=1,
        ndcg_discount_base=1,
        m_lazy_run=False
    )
    
    ---------------------------------------------------------------------------
    TypeError                                 Traceback (most recent call last)
    <ipython-input-4-952ef035fbe9> in <module>()
        138     data_row_fraction=1,
        139     ndcg_discount_base=1,
    --> 140     m_lazy_run=False
        141 )
    
    TypeError: Cannot cast array data from dtype('O') to dtype('float64') according to the rule 'safe'

    (adhaha111) #2

    您好,可以看到 kind_code 中是存在字符串的,您还需要使用自定义模块进行处理一下,对于这种情况建议您直接使用自定义模块,可以一步处理到位,也不用设置这么多模块了

    image