克隆策略

    {"Description":"实验创建于2018/1/23","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"-43:instruments","SourceOutputPortId":"-4129:data"},{"DestinationInputPortId":"-331:instruments","SourceOutputPortId":"-4129:data"},{"DestinationInputPortId":"-2932:data2","SourceOutputPortId":"-43:data"},{"DestinationInputPortId":"-43:features","SourceOutputPortId":"-49:data"},{"DestinationInputPortId":"-2932:data1","SourceOutputPortId":"-331:data"},{"DestinationInputPortId":"-331:features","SourceOutputPortId":"-337:data"},{"DestinationInputPortId":"-355:features","SourceOutputPortId":"-350:data"},{"DestinationInputPortId":"-3888:data2","SourceOutputPortId":"-2932:data"},{"DestinationInputPortId":"-780:input_1","SourceOutputPortId":"-387:data"},{"DestinationInputPortId":"-556:input_1","SourceOutputPortId":"-780:data_1"},{"DestinationInputPortId":"-474:input_1","SourceOutputPortId":"-380:data"},{"DestinationInputPortId":"-556:input_2","SourceOutputPortId":"-474:data_1"},{"DestinationInputPortId":"-606:input_1","SourceOutputPortId":"-556:data_1"},{"DestinationInputPortId":"-606:input_2","SourceOutputPortId":"-596:data"},{"DestinationInputPortId":"-996:input_data","SourceOutputPortId":"-606:data_1"},{"DestinationInputPortId":"-3888:data1","SourceOutputPortId":"-996:data"},{"DestinationInputPortId":"-355:input_data","SourceOutputPortId":"-3888:data"}],"ModuleNodes":[{"Id":"-4129","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2010-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2018-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"\n\n ","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":0,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"-4129"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-4129","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":1,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-43","ModuleId":"BigQuantSpace.use_datasource.use_datasource-v1","ModuleParameters":[{"Name":"datasource_id","Value":"financial_statement_CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-43"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-43"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-43","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":3,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-49","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"\n# #号开始的表示注释,注释需单独一行\n# 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征\nfs_net_income_x","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-49"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-49","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":5,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-331","ModuleId":"BigQuantSpace.use_datasource.use_datasource-v1","ModuleParameters":[{"Name":"datasource_id","Value":"market_value_CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-331"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-331"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-331","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":2,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-337","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"\n# #号开始的表示注释,注释需单独一行\n# 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征\nmarket_cap","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-337"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-337","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":4,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-350","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"\n# #号开始的表示注释,注释需单独一行\n# 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征\nlog(market_cap)\nmarket_cap\nfs_net_income_x/market_cap","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-350"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-350","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":8,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-355","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-355"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-355"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-355","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":9,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-2932","ModuleId":"BigQuantSpace.join.join-v3","ModuleParameters":[{"Name":"on","Value":"date,instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"how","Value":"inner","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"sort","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data1","NodeId":"-2932"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data2","NodeId":"-2932"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-2932","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":6,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-387","ModuleId":"BigQuantSpace.use_datasource.use_datasource-v1","ModuleParameters":[{"Name":"datasource_id","Value":"basic_info_IndustrySw","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"2010-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2018-01-01","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-387"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-387"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-387","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":7,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-780","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# 修改数据列名\ndef bigquant_run(input_ds):\n df = input_ds.read_df()\n df['code'] = df['code'].astype(int)\n ds = DataSource().write_df(df)\n return Outputs(data_1=ds)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"input_ds","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"data_1","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-780"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-780"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-780"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-780","OutputType":null},{"Name":"data_2","NodeId":"-780","OutputType":null},{"Name":"data_3","NodeId":"-780","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":10,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-380","ModuleId":"BigQuantSpace.use_datasource.use_datasource-v1","ModuleParameters":[{"Name":"datasource_id","Value":"industry_CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"2010-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2018-01-01","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-380"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-380"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-380","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":11,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-474","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# 修改数据列名\ndef bigquant_run(input_ds):\n df = input_ds.read_df()\n df.rename(columns={'industry_sw_level1':'code'},inplace=True)\n df['code'] = df['code'].astype(int)\n ds = DataSource().write_df(df)\n return Outputs(data_1=ds)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"input_ds","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"data_1","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-474"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-474"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-474"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-474","OutputType":null},{"Name":"data_2","NodeId":"-474","OutputType":null},{"Name":"data_3","NodeId":"-474","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":12,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-556","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2):\n # 示例代码如下。在这里编写您的代码\n df_1=input_1.read_df()\n df_2=input_2.read_df()\n data = DataSource.write_df(pd.merge(df_1,df_2,on='code'))\n return Outputs(data_1=data)","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"input_1, input_2","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"data_1","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-556"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-556"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-556"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-556","OutputType":null},{"Name":"data_2","NodeId":"-556","OutputType":null},{"Name":"data_3","NodeId":"-556","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":13,"IsPartOfPartialRun":null,"Comment":"得出行业与股票代码的dataframe","CommentCollapsed":true},{"Id":"-596","ModuleId":"BigQuantSpace.use_datasource.use_datasource-v1","ModuleParameters":[{"Name":"datasource_id","Value":"financial_statement_CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"2010-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2018-01-01","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-596"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-596"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-596","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":14,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-606","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2):\n # 示例代码如下。在这里编写您的代码\n df_1=input_1.read_df()\n df_2=input_2.read_df()\n data = DataSource.write_df(pd.merge(df_1[['name','date','instrument']],df_2,on=['instrument','date']))\n return Outputs(data_1=data)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"input_1, input_2","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"data_1","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-606"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-606"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-606"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-606","OutputType":null},{"Name":"data_2","NodeId":"-606","OutputType":null},{"Name":"data_3","NodeId":"-606","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":15,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-996","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-996"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-996","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":16,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-3888","ModuleId":"BigQuantSpace.join.join-v3","ModuleParameters":[{"Name":"on","Value":"date,instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"how","Value":"inner","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"sort","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data1","NodeId":"-3888"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data2","NodeId":"-3888"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-3888","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":17,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='-4129' Position='183,654,200,200'/><NodePosition Node='-43' Position='266,779,200,200'/><NodePosition Node='-49' Position='463.13421630859375,652.6218872070312,200,200'/><NodePosition Node='-331' Position='-39,777,200,200'/><NodePosition Node='-337' Position='-104,655,200,200'/><NodePosition Node='-350' Position='466,871,200,200'/><NodePosition Node='-355' Position='117,966,200,200'/><NodePosition Node='-2932' Position='40,862,200,200'/><NodePosition Node='-387' Position='-980,436,200,200'/><NodePosition Node='-780' Position='-973,513,200,200'/><NodePosition Node='-380' Position='-647,435,200,200'/><NodePosition Node='-474' Position='-624,522,200,200'/><NodePosition Node='-556' Position='-783,612,200,200'/><NodePosition Node='-596' Position='-469,620,200,200'/><NodePosition Node='-606' Position='-616,706,200,200'/><NodePosition Node='-996' Position='-545,783,200,200'/><NodePosition Node='-3888' Position='-345,892,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
    In [2]:
    # 本代码由可视化策略环境自动生成 2019年1月15日 18:55
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    # 修改数据列名
    def m10_run_bigquant_run(input_ds):
        df = input_ds.read_df()
        df['code'] = df['code'].astype(int)
        ds = DataSource().write_df(df)
        return Outputs(data_1=ds)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m10_post_run_bigquant_run(outputs):
        return outputs
    
    # 修改数据列名
    def m12_run_bigquant_run(input_ds):
        df = input_ds.read_df()
        df.rename(columns={'industry_sw_level1':'code'},inplace=True)
        df['code'] = df['code'].astype(int)
        ds = DataSource().write_df(df)
        return Outputs(data_1=ds)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m12_post_run_bigquant_run(outputs):
        return outputs
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m13_run_bigquant_run(input_1, input_2):
        # 示例代码如下。在这里编写您的代码
        df_1=input_1.read_df()
        df_2=input_2.read_df()
        data = DataSource.write_df(pd.merge(df_1,df_2,on='code'))
        return Outputs(data_1=data)
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m13_post_run_bigquant_run(outputs):
        return outputs
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m15_run_bigquant_run(input_1, input_2):
        # 示例代码如下。在这里编写您的代码
        df_1=input_1.read_df()
        df_2=input_2.read_df()
        data = DataSource.write_df(pd.merge(df_1[['name','date','instrument']],df_2,on=['instrument','date']))
        return Outputs(data_1=data)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m15_post_run_bigquant_run(outputs):
        return outputs
    
    
    m1 = M.instruments.v2(
        start_date='2010-01-01',
        end_date='2018-01-01',
        market='CN_STOCK_A',
        instrument_list="""
    
     """,
        max_count=0
    )
    
    m5 = M.input_features.v1(
        features="""
    # #号开始的表示注释,注释需单独一行
    # 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征
    fs_net_income_x"""
    )
    
    m3 = M.use_datasource.v1(
        instruments=m1.data,
        features=m5.data,
        datasource_id='financial_statement_CN_STOCK_A',
        start_date='',
        end_date='',
        m_cached=False
    )
    
    m4 = M.input_features.v1(
        features="""
    # #号开始的表示注释,注释需单独一行
    # 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征
    market_cap"""
    )
    
    m2 = M.use_datasource.v1(
        instruments=m1.data,
        features=m4.data,
        datasource_id='market_value_CN_STOCK_A',
        start_date='',
        end_date=''
    )
    
    m6 = M.join.v3(
        data1=m2.data,
        data2=m3.data,
        on='date,instrument',
        how='inner',
        sort=False
    )
    
    m8 = M.input_features.v1(
        features="""
    # #号开始的表示注释,注释需单独一行
    # 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征
    log(market_cap)
    market_cap
    fs_net_income_x/market_cap"""
    )
    
    m7 = M.use_datasource.v1(
        datasource_id='basic_info_IndustrySw',
        start_date='2010-01-01',
        end_date='2018-01-01'
    )
    
    m10 = M.cached.v3(
        input_1=m7.data,
        run=m10_run_bigquant_run,
        post_run=m10_post_run_bigquant_run,
        input_ports='input_ds',
        params="""{}
    """,
        output_ports='data_1'
    )
    
    m11 = M.use_datasource.v1(
        datasource_id='industry_CN_STOCK_A',
        start_date='2010-01-01',
        end_date='2018-01-01'
    )
    
    m12 = M.cached.v3(
        input_1=m11.data,
        run=m12_run_bigquant_run,
        post_run=m12_post_run_bigquant_run,
        input_ports='input_ds',
        params="""{}
    """,
        output_ports='data_1'
    )
    
    m13 = M.cached.v3(
        input_1=m10.data_1,
        input_2=m12.data_1,
        run=m13_run_bigquant_run,
        post_run=m13_post_run_bigquant_run,
        input_ports='input_1, input_2',
        params='{}',
        output_ports='data_1'
    )
    
    m14 = M.use_datasource.v1(
        datasource_id='financial_statement_CN_STOCK_A',
        start_date='2010-01-01',
        end_date='2018-01-01'
    )
    
    m15 = M.cached.v3(
        input_1=m13.data_1,
        input_2=m14.data,
        run=m15_run_bigquant_run,
        post_run=m15_post_run_bigquant_run,
        input_ports='input_1, input_2',
        params='{}',
        output_ports='data_1'
    )
    
    m16 = M.dropnan.v1(
        input_data=m15.data_1
    )
    
    m17 = M.join.v3(
        data1=m16.data,
        data2=m6.data,
        on='date,instrument',
        how='inner',
        sort=False
    )
    
    m9 = M.derived_feature_extractor.v3(
        input_data=m17.data,
        features=m8.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions={}
    )
    
    [2019-01-15 17:59:11.478277] INFO: bigquant: instruments.v2 开始运行..
    [2019-01-15 17:59:11.494980] INFO: bigquant: 命中缓存
    [2019-01-15 17:59:11.496078] INFO: bigquant: instruments.v2 运行完成[0.01788s].
    [2019-01-15 17:59:11.499494] INFO: bigquant: input_features.v1 开始运行..
    [2019-01-15 17:59:11.506338] INFO: bigquant: 命中缓存
    [2019-01-15 17:59:11.507206] INFO: bigquant: input_features.v1 运行完成[0.007675s].
    [2019-01-15 17:59:11.523574] INFO: bigquant: use_datasource.v1 开始运行..
    [2019-01-15 17:59:12.074848] INFO: bigquant: use_datasource.v1 运行完成[0.55126s].
    [2019-01-15 17:59:12.077441] INFO: bigquant: input_features.v1 开始运行..
    [2019-01-15 17:59:12.083406] INFO: bigquant: 命中缓存
    [2019-01-15 17:59:12.084445] INFO: bigquant: input_features.v1 运行完成[0.006978s].
    [2019-01-15 17:59:12.087062] INFO: bigquant: use_datasource.v1 开始运行..
    [2019-01-15 17:59:12.094077] INFO: bigquant: 命中缓存
    [2019-01-15 17:59:12.094879] INFO: bigquant: use_datasource.v1 运行完成[0.007835s].
    [2019-01-15 17:59:12.098266] INFO: bigquant: join.v3 开始运行..
    [2019-01-15 17:59:17.851180] INFO: join: /data, 行数=64682/80558, 耗时=2.107615s
    [2019-01-15 17:59:18.000537] INFO: join: 最终行数: 64682
    [2019-01-15 17:59:18.002575] INFO: bigquant: join.v3 运行完成[5.904373s].
    [2019-01-15 17:59:18.004518] INFO: bigquant: input_features.v1 开始运行..
    [2019-01-15 17:59:18.009055] INFO: bigquant: 命中缓存
    [2019-01-15 17:59:18.009779] INFO: bigquant: input_features.v1 运行完成[0.005266s].
    [2019-01-15 17:59:18.011648] INFO: bigquant: use_datasource.v1 开始运行..
    [2019-01-15 17:59:18.026834] INFO: bigquant: 命中缓存
    [2019-01-15 17:59:18.027999] INFO: bigquant: use_datasource.v1 运行完成[0.016347s].
    [2019-01-15 17:59:18.034366] INFO: bigquant: cached.v3 开始运行..
    [2019-01-15 17:59:18.040293] INFO: bigquant: 命中缓存
    [2019-01-15 17:59:18.041300] INFO: bigquant: cached.v3 运行完成[0.006931s].
    [2019-01-15 17:59:18.043947] INFO: bigquant: use_datasource.v1 开始运行..
    [2019-01-15 17:59:18.092537] INFO: bigquant: 命中缓存
    [2019-01-15 17:59:18.093488] INFO: bigquant: use_datasource.v1 运行完成[0.049578s].
    [2019-01-15 17:59:18.097528] INFO: bigquant: cached.v3 开始运行..
    [2019-01-15 17:59:18.102904] INFO: bigquant: 命中缓存
    [2019-01-15 17:59:18.103721] INFO: bigquant: cached.v3 运行完成[0.006216s].
    [2019-01-15 17:59:18.106585] INFO: bigquant: cached.v3 开始运行..
    [2019-01-15 17:59:18.110802] INFO: bigquant: 命中缓存
    [2019-01-15 17:59:18.111538] INFO: bigquant: cached.v3 运行完成[0.004937s].
    [2019-01-15 17:59:18.113446] INFO: bigquant: use_datasource.v1 开始运行..
    [2019-01-15 17:59:18.118320] INFO: bigquant: 命中缓存
    [2019-01-15 17:59:18.119175] INFO: bigquant: use_datasource.v1 运行完成[0.005704s].
    [2019-01-15 17:59:18.122619] INFO: bigquant: cached.v3 开始运行..
    [2019-01-15 17:59:18.127252] INFO: bigquant: 命中缓存
    [2019-01-15 17:59:18.127919] INFO: bigquant: cached.v3 运行完成[0.005311s].
    [2019-01-15 17:59:18.131131] INFO: bigquant: dropnan.v1 开始运行..
    [2019-01-15 17:59:18.135604] INFO: bigquant: 命中缓存
    [2019-01-15 17:59:18.136299] INFO: bigquant: dropnan.v1 运行完成[0.0052s].
    [2019-01-15 17:59:18.138252] INFO: bigquant: join.v3 开始运行..
    [2019-01-15 17:59:18.227538] INFO: join: /data, 行数=3643/64682, 耗时=0.05775s
    [2019-01-15 17:59:18.238390] INFO: join: 最终行数: 3643
    [2019-01-15 17:59:18.240348] INFO: bigquant: join.v3 运行完成[0.102083s].
    [2019-01-15 17:59:18.243747] INFO: bigquant: derived_feature_extractor.v3 开始运行..
    [2019-01-15 17:59:18.260740] WARNING: derived_feature_extractor: 特征 fs_net_income_x/market_cap,找不到依赖的列:fs_net_income_x
    [2019-01-15 17:59:18.266199] INFO: derived_feature_extractor: 提取完成 log(market_cap), 0.001s
    [2019-01-15 17:59:18.267055] INFO: derived_feature_extractor: 提取失败 fs_net_income_x/market_cap: Unknown fs_net_income_x
    [2019-01-15 17:59:18.287992] INFO: derived_feature_extractor: /data, 3643
    [2019-01-15 17:59:18.319331] INFO: bigquant: derived_feature_extractor.v3 运行完成[0.075545s].
    
    In [3]:
    m9.data.read_df().head()
    
    Out[3]:
    date instrument fs_quarter_index_x market_cap name fs_account_payable fs_account_receivable fs_bps fs_capital_reserves fs_paicl_up_capital ... fs_total_profit fs_undistributed_profit fs_common_equity fs_eps_yoy fs_net_profit_yoy fs_operating_revenue_yoy fs_quarter fs_quarter_year fs_quarter_index_y log(market_cap)
    0 2010-01-29 000860.SZA 4 7.718304e+09 食品饮料 380763008.0 249083904.0 5.47 1.208242e+09 4.385400e+08 ... 245111792.0 575305536.0 2.397787e+09 -26.000000 -27.341499 17.613600 20091231 2009 4 7.718304e+09
    1 2010-02-08 600703.SHA 4 1.564477e+10 电子 67495256.0 118568024.0 5.26 1.159715e+09 2.776850e+08 ... 204681120.0 12674759.0 1.461065e+09 153.571426 246.108398 120.628403 20091231 2009 4 1.564477e+10
    2 2010-02-09 000155.SZA 4 3.684800e+09 化工 122345456.0 36059852.0 3.81 8.102781e+08 4.700000e+08 ... 80090016.0 287790688.0 1.788389e+09 -33.333332 -34.674500 -14.791800 20091231 2009 4 3.684800e+09
    3 2010-02-10 000822.SZA 4 6.462564e+09 化工 521201440.0 273611488.0 3.67 1.500998e+09 8.950919e+08 ... -917584384.0 595832512.0 3.282417e+09 -443.478271 -435.690887 -36.361900 20091231 2009 4 6.462564e+09
    4 2010-02-11 600533.SHA 4 6.289500e+09 房地产 389854208.0 31424230.0 2.58 1.026469e+09 1.050000e+09 ... 397046336.0 493017184.0 2.709020e+09 10.850286 18.800301 38.354900 20091231 2009 4 6.289500e+09

    5 rows × 67 columns

    In [93]:
    df = m9.data.read_df()[['fs_net_income','log(market_cap)','market_cap','instrument','fs_operating_revenue','fs_deducted_profit_ttm','fs_roe_ttm','fs_fixed_assets','fs_current_assets']]
    df_0 = df.dropna(how='any').set_index('instrument')
    df_0 = df.groupby(['instrument']).mean().dropna()
    df_0.head()
    
    Out[93]:
    fs_net_income log(market_cap) market_cap fs_operating_revenue fs_deducted_profit_ttm fs_roe_ttm fs_fixed_assets fs_current_assets
    instrument
    000004.SZA 1.381987e+06 7.188404e+08 7.188404e+08 2.898781e+07 1721686.5 2.596900 3.739730e+07 1.165177e+08
    000005.SZA -2.925819e+07 2.550991e+09 2.550991e+09 2.971566e+07 -76030208.0 -7.129900 2.441289e+07 1.628671e+08
    000006.SZA 2.820548e+08 9.069716e+09 9.069716e+09 1.294451e+09 555921792.0 15.222667 4.405602e+06 9.396219e+09
    000009.SZA 1.076491e+08 2.178003e+10 2.178003e+10 1.379832e+09 64158300.0 16.230301 2.190413e+09 1.267394e+10
    000010.SZA -3.724679e+04 3.873420e+09 3.873420e+09 2.131622e+08 -15654562.0 1.010900 3.890702e+07 3.480452e+08
    In [94]:
    import numpy as np
    
    df_0['logmarket_cap'] = np.log10(df_0['market_cap']+1)
    df_0['logmarket_cap'].head()
    
    Out[94]:
    instrument
    000004.SZA     8.856632
    000005.SZA     9.406709
    000006.SZA     9.957594
    000009.SZA    10.338058
    000010.SZA     9.588095
    Name: logmarket_cap, dtype: float32
    In [96]:
    import numpy as np
    import pandas as pd
    from scipy import stats, integrate
    import matplotlib.pyplot as plt
    import seaborn as sns
    def winsorize(df, width=3):
            df = df.copy()
            factor_columns = set(df.columns).difference(['date','instrument'])
            for factor in factor_columns:
                mean = df[factor].mean()
                sigma = df[factor].std()
                df[factor] = df[factor].clip(mean-width*sigma,mean+width*sigma)
            return df
    def standardlize(df):
            df = df.copy()
            factor_columns = set(df.columns).difference(['date','instrument'])
            for factor in factor_columns:
                mean = df[factor].mean()
                sigma = df[factor].std()
                df[factor] = (df[factor]-mean)/sigma
            return df
    df_0 = standardlize(winsorize(df_0))    
        
    sns.distplot(df_0['logmarket_cap'])
    sns.distplot(df_0['logmarket_cap'], label='市值对数', hist=False)
    
    Out[96]:
    <matplotlib.axes._subplots.AxesSubplot at 0x7ff8725dfa58>
    In [ ]: