用户自定义函数构建衍生特征

自定义因子
衍生特征抽取
自定义函数
自定义特征
标签: #<Tag:0x00007fcf63301c10> #<Tag:0x00007fcf63301ad0> #<Tag:0x00007fcf63301990> #<Tag:0x00007fcf63301850>

(iQuant) #1

在衍生特征抽取的时候实现自定义的函数,或者覆盖同名预定义函数。

克隆策略

    {"Description":"实验创建于2017/9/28","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"-311:instruments","SourceOutputPortId":"-303:data"},{"DestinationInputPortId":"-26:input_data","SourceOutputPortId":"-311:data"},{"DestinationInputPortId":"-311:features","SourceOutputPortId":"-315:data"},{"DestinationInputPortId":"-26:features","SourceOutputPortId":"-31:data"}],"ModuleNodes":[{"Id":"-303","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2015-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2017-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":0,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"-303"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-303","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":1,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-311","ModuleId":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v6","ModuleParameters":[{"Name":"start_date","Value":"2015-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2017-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_start_days","Value":0,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-311"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-311"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-311","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":2,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-315","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"\n# #号开始的表示注释\n# 多个特征,每行一个,可以包含基础特征和衍生特征\nreturn_5, # 5日收益\nreturn_10, # 10日收益\nreturn_20, # 20日收益\navg_amount_0/avg_amount_5, # 当日/5日平均交易额\navg_amount_5/avg_amount_20, # 5日/20日平均交易额\nclose_0,\nclose_1,\n","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-315"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-315","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":3,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-26","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v2","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"# https://bigquant.com/docs/big_expr.html 自定义函数\ndef timex(df, s, x):\n return s * x\n\nbigquant_run = {\n 'timex': timex\n}\n","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-26"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-26"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-26","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":4,"IsPartOfPartialRun":null,"Comment":"在衍生特征里配置自定义函数就可以了,也可以覆盖同名的预定义函数","CommentCollapsed":false},{"Id":"-31","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"\n# #号开始的表示注释\n# 多个特征,每行一个,可以包含基础特征和衍生特征\ntimex(close_1/close_0, 100)","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-31"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-31","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":5,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='-303' Position='-260,2,200,200'/><NodePosition Node='-311' Position='-121,140,200,200'/><NodePosition Node='-315' Position='40,0,200,200'/><NodePosition Node='-26' Position='46,264,200,200'/><NodePosition Node='-31' Position='219,139,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":false}
    In [1]:
    # 本代码由可视化策略环境自动生成 2017年11月14日 16:25
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    m1 = M.instruments.v2(
        start_date='2015-01-01',
        end_date='2017-01-01',
        market='CN_STOCK_A',
        instrument_list='',
        max_count=0
    )
    
    m3 = M.input_features.v1(
        features="""
    # #号开始的表示注释
    # 多个特征,每行一个,可以包含基础特征和衍生特征
    return_5,  # 5日收益
    return_10,  # 10日收益
    return_20,  # 20日收益
    avg_amount_0/avg_amount_5,  # 当日/5日平均交易额
    avg_amount_5/avg_amount_20,  # 5日/20日平均交易额
    close_0,
    close_1,
    """
    )
    
    m2 = M.general_feature_extractor.v6(
        instruments=m1.data,
        features=m3.data,
        start_date='2015-01-01',
        end_date='2017-01-01',
        before_start_days=0
    )
    
    m5 = M.input_features.v1(
        features="""
    # #号开始的表示注释
    # 多个特征,每行一个,可以包含基础特征和衍生特征
    timex(close_1/close_0, 100)"""
    )
    
    # https://bigquant.com/docs/big_expr.html 自定义函数
    def timex(df, s, x):
        return s * x
    
    m4_user_functions_bigquant_run = {
        'timex': timex
    }
    
    m4 = M.derived_feature_extractor.v2(
        input_data=m2.data,
        features=m5.data,
        date_col='date',
        instrument_col='instrument',
        user_functions=m4_user_functions_bigquant_run
    )
    
    [2017-11-14 16:24:12.775902] INFO: bigquant: instruments.v2 开始运行..
    [2017-11-14 16:24:12.785685] INFO: bigquant: 命中缓存
    [2017-11-14 16:24:12.787110] INFO: bigquant: instruments.v2 运行完成[0.011268s].
    [2017-11-14 16:24:12.795811] INFO: bigquant: input_features.v1 开始运行..
    [2017-11-14 16:24:12.800493] INFO: bigquant: 命中缓存
    [2017-11-14 16:24:12.801955] INFO: bigquant: input_features.v1 运行完成[0.006135s].
    [2017-11-14 16:24:12.820987] INFO: bigquant: general_feature_extractor.v6 开始运行..
    [2017-11-14 16:24:12.825073] INFO: bigquant: 命中缓存
    [2017-11-14 16:24:12.826399] INFO: bigquant: general_feature_extractor.v6 运行完成[0.005441s].
    [2017-11-14 16:24:12.831858] INFO: bigquant: input_features.v1 开始运行..
    [2017-11-14 16:24:12.851578] INFO: bigquant: 命中缓存
    [2017-11-14 16:24:12.852993] INFO: bigquant: input_features.v1 运行完成[0.021139s].
    [2017-11-14 16:24:12.906136] INFO: bigquant: derived_feature_extractor.v2 开始运行..
    [2017-11-14 16:24:14.594325] INFO: derived_feature_extractor: 提取完成 timex(close_1/close_0, 100), 0.005s
    [2017-11-14 16:24:14.878646] INFO: derived_feature_extractor: /y_2015, 569698
    [2017-11-14 16:24:15.600142] INFO: derived_feature_extractor: /y_2016, 641546
    [2017-11-14 16:24:15.930758] INFO: bigquant: derived_feature_extractor.v2 运行完成[3.024605s].
    
    In [3]:
    m4.data.read_df().head()
    
    Out[3]:
    avg_amount_0 avg_amount_20 avg_amount_5 close_0 close_1 date instrument return_10 return_20 return_5 timex(close_1/close_0, 100)
    0 4.565388e+09 4.074674e+09 3.743381e+09 1138.280273 1125.490601 2015-01-05 000001.SZA 1.042969 1.222901 1.138593 98.876404
    1 3.453446e+09 4.016539e+09 3.837692e+09 1121.227295 1138.280273 2015-01-06 000001.SZA 1.059772 1.130372 1.074200 101.520920
    2 2.634796e+09 3.840188e+09 3.672543e+09 1099.911255 1121.227295 2015-01-07 000001.SZA 1.032000 1.065382 1.025166 101.937973
    3 2.128003e+09 3.661347e+09 3.363223e+09 1062.963257 1099.911255 2015-01-08 000001.SZA 0.975228 0.982272 1.002681 103.475937
    4 3.835378e+09 3.558131e+09 3.396205e+09 1071.489746 1062.963257 2015-01-09 000001.SZA 1.022373 1.099927 0.972903 99.204239

    用户自定义函数衍生特征抽取:示例中部分代码的使用位置问题
    策略研究常用功能
    (ohmyskyhigh) #2

    你好,在如下的环节中,当timex被呼叫时,parameter为两个变量,(close_1/close_0, 100),然而在定义中为三个。为什么会有一个被省去?

    m5 = M.input_features.v1(
        features="""
    # #号开始的表示注释
    # 多个特征,每行一个,可以包含基础特征和衍生特征
    timex(close_1/close_0, 100)"""
    )
    
    # https://bigquant.com/docs/big_expr.html 自定义函数
    def timex(df, s, x):
        return s * x
    

    (达达) #3

    默认df是分组后输入的数据框


    (quantai) #4

    我来提供一个更简单的模板,告诉大家如何自定义因子,这个功能太赞了。大家直接克隆到自己空间就能看明白

    克隆策略

      {"Description":"实验创建于2017/9/28","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"-32:instruments","SourceOutputPortId":"-303:data"},{"DestinationInputPortId":"-39:features","SourceOutputPortId":"-31:data"},{"DestinationInputPortId":"-32:features","SourceOutputPortId":"-31:data"},{"DestinationInputPortId":"-39:input_data","SourceOutputPortId":"-32:data"}],"ModuleNodes":[{"Id":"-303","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2015-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2017-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":0,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"-303"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-303","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":1,"Comment":"","CommentCollapsed":true},{"Id":"-31","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"close_0\nclose_1\ntimex(close_1/close_0, 100)\naddx(close_0, close_1)","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-31"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-31","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":5,"Comment":"","CommentCollapsed":true},{"Id":"-32","ModuleId":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","ModuleParameters":[{"Name":"start_date","Value":"2015-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2017-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_start_days","Value":0,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-32"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-32"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-32","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":6,"Comment":"","CommentCollapsed":true},{"Id":"-39","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"# https://bigquant.com/docs/big_expr.html 自定义函数\ndef timex(df, s, x):\n return s * x\n\ndef addx(df, s, x):\n return s + x\n\nbigquant_run = {\n 'timex': timex,\n 'addx':addx\n}\n","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-39"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-39"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-39","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":7,"Comment":"在衍生特征里配置自定义函数就可以了,也可以覆盖同名的预定义函数","CommentCollapsed":false}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='-303' Position='-260,2,200,200'/><NodePosition Node='-31' Position='59.88677978515625,8.855921745300293,200,200'/><NodePosition Node='-32' Position='-121,140,200,200'/><NodePosition Node='-39' Position='47,268.2984619140625,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
      In [4]:
      # 本代码由可视化策略环境自动生成 2020年3月10日 08:41
      # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
      
      
      # https://bigquant.com/docs/big_expr.html 自定义函数
      def timex(df, s, x):
          return s * x
      
      def addx(df, s, x):
          return s + x
      
      m7_user_functions_bigquant_run = {
          'timex': timex,
          'addx':addx
      }
      
      
      m1 = M.instruments.v2(
          start_date='2015-01-01',
          end_date='2017-01-01',
          market='CN_STOCK_A',
          instrument_list='',
          max_count=0
      )
      
      m5 = M.input_features.v1(
          features="""close_0
      close_1
      timex(close_1/close_0, 100)
      addx(close_0, close_1)"""
      )
      
      m6 = M.general_feature_extractor.v7(
          instruments=m1.data,
          features=m5.data,
          start_date='2015-01-01',
          end_date='2017-01-01',
          before_start_days=0
      )
      
      m7 = M.derived_feature_extractor.v3(
          input_data=m6.data,
          features=m5.data,
          date_col='date',
          instrument_col='instrument',
          drop_na=False,
          remove_extra_columns=False,
          user_functions=m7_user_functions_bigquant_run
      )
      
      In [5]:
      m7.data.read_df().head()
      
      Out[5]:
      close_0 close_1 date instrument timex(close_1/close_0, 100) addx(close_0, close_1)
      0 1138.280273 1125.490601 2015-01-05 000001.SZA 98.876404 2263.770996
      1 1121.227295 1138.280273 2015-01-06 000001.SZA 101.520920 2259.507568
      2 1099.911255 1121.227295 2015-01-07 000001.SZA 101.937973 2221.138672
      3 1062.963257 1099.911255 2015-01-08 000001.SZA 103.475937 2162.874512
      4 1071.489746 1062.963257 2015-01-09 000001.SZA 99.204239 2134.453125
      In [ ]:
       
      

      怎么自定义因子表达式引擎中的函数?