复制链接
克隆策略

使用本例子介绍如何进行自定义因子构建

本例子希望:

  • 构建一个收益率因子

  • 该因子按日期进行分组,分成3组

    {"description":"实验创建于2017/8/26","graph":{"edges":[{"to_node_id":"-215:instruments","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8:data"},{"to_node_id":"-215:features","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-222:features","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-222:input_data","from_node_id":"-215:data"}],"nodes":[{"node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2021-10-01","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2021-12-01","type":"Literal","bound_global_parameter":null},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":"0","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8"}],"output_ports":[{"name":"data","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8"}],"cacheable":true,"seq_num":1,"comment":"","comment_collapsed":true},{"node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"get_ret(close_0, 15)\ncut(get_ret(close_0, 15), 3)\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24"}],"output_ports":[{"name":"data","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24"}],"cacheable":true,"seq_num":3,"comment":"","comment_collapsed":true},{"node_id":"-215","module_id":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":"0","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-215"},{"name":"features","node_id":"-215"}],"output_ports":[{"name":"data","node_id":"-215"}],"cacheable":true,"seq_num":15,"comment":"","comment_collapsed":true},{"node_id":"-222","module_id":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","parameters":[{"name":"date_col","value":"date","type":"Literal","bound_global_parameter":null},{"name":"instrument_col","value":"instrument","type":"Literal","bound_global_parameter":null},{"name":"drop_na","value":"False","type":"Literal","bound_global_parameter":null},{"name":"remove_extra_columns","value":"False","type":"Literal","bound_global_parameter":null},{"name":"user_functions","value":"def _ret(df, factor , N):\n # 单支票求收益率\n df['ret'] = factor / factor.shift(N) -1 # 这里的factor 是一个pandas的Series对象\n return df \n \ndef get_ret(df, factor, N):\n # 全市场求收益率, 按标的进行分组\n ret_df = df.groupby('instrument').apply(_ret, factor=factor, N=N).reset_index()\n return ret_df['ret'] # 求一个表达式的数值,应该返回一个Series对象\n\ndef _cut(df, factor, Bins):\n # 单个交易日横截面进行cut\n df['label'] = pd.cut(factor.rank(pct=True), bins=Bins, labels=False) / (Bins-1)\n return df\n\ndef cut(df, factor, Bins=3):\n # 全部交易日进行cut, 按date进行分组\n label_df = df.groupby('date').apply(_cut, factor=factor, Bins=Bins).reset_index() \n return label_df['label'] # 求一个表达式的数值,应该返回一个Series对象\n\n\n# 因为这俩表达式是我们新自定义的表达式,因此需要声明,以便在输入特征列表可使用\nbigquant_run = {\n 'cut':cut, \n 'get_ret':get_ret,\n}","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-222"},{"name":"features","node_id":"-222"}],"output_ports":[{"name":"data","node_id":"-222"}],"cacheable":true,"seq_num":16,"comment":"","comment_collapsed":true}],"node_layout":"<node_postions><node_position Node='287d2cb0-f53c-4101-bdf8-104b137c8601-8' Position='9.754318237304688,-176.0166630744934,200,200'/><node_position Node='287d2cb0-f53c-4101-bdf8-104b137c8601-24' Position='320.9460754394531,-181.9390823841095,200,200'/><node_position Node='-215' Position='107.2994384765625,-51.52054786682129,200,200'/><node_position Node='-222' Position='80.18855285644531,36.300506591796875,200,200'/></node_postions>"},"nodes_readonly":false,"studio_version":"v2"}
    In [17]:
    # 本代码由可视化策略环境自动生成 2021年12月23日 22:16
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    def _ret(df, factor , N):
        # 单支票求收益率
        df['ret'] = factor / factor.shift(N) -1  # 这里的factor 是一个pandas的Series对象
        return df 
        
    def get_ret(df, factor, N):
        # 全市场求收益率, 按标的进行分组
        ret_df = df.groupby('instrument').apply(_ret, factor=factor, N=N).reset_index()
        return ret_df['ret'] # 求一个表达式的数值,应该返回一个Series对象
    
    def _cut(df, factor, Bins):
        # 单个交易日横截面进行cut
        df['label'] = pd.cut(factor.rank(pct=True), bins=Bins, labels=False) / (Bins-1)
        return df
    
    def cut(df, factor, Bins=3):
        # 全部交易日进行cut, 按date进行分组
        label_df = df.groupby('date').apply(_cut, factor=factor,  Bins=Bins).reset_index() 
        return label_df['label'] #  求一个表达式的数值,应该返回一个Series对象
    
    
    # 因为这俩表达式是我们新自定义的表达式,因此需要声明,以便在输入特征列表可使用
    m16_user_functions_bigquant_run = {
        'cut':cut, 
        'get_ret':get_ret,
    }
    
    m1 = M.instruments.v2(
        start_date='2021-10-01',
        end_date='2021-12-01',
        market='CN_STOCK_A',
        instrument_list='',
        max_count=0
    )
    
    m3 = M.input_features.v1(
        features="""get_ret(close_0, 15)
    cut(get_ret(close_0, 15), 3)
    """
    )
    
    m15 = M.general_feature_extractor.v7(
        instruments=m1.data,
        features=m3.data,
        start_date='',
        end_date='',
        before_start_days=0
    )
    
    m16 = M.derived_feature_extractor.v3(
        input_data=m15.data,
        features=m3.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions=m16_user_functions_bigquant_run
    )