复制链接
克隆策略
In [1]:
#如何在建模选取对股票池区分度比较大的因子?

#因子分析(factor analysis)-因子溢价
In [2]:
#因子溢价:
#通俗的讲就是一对多空组合的收益率
#计算过程:
#1.从时间序列的角度,将股票按因子值的大小进行排序
#2.将排序后的股票按因子值大小划分为10、5分位
#3.计算每个分位的收益变化
#4.第一分位和第五分位的收益差值即为因子溢价
#因子溢价越大,因子对股票池的区分度越好

    {"description":"实验创建于2020/12/25","graph":{"edges":[{"to_node_id":"-50:instruments","from_node_id":"-20:data"},{"to_node_id":"-50:features","from_node_id":"-38:data"},{"to_node_id":"-66:features","from_node_id":"-38:data"},{"to_node_id":"-6138:input_data","from_node_id":"-50:data"},{"to_node_id":"-1569:input_1","from_node_id":"-66:data"},{"to_node_id":"-66:input_data","from_node_id":"-6138:data"}],"nodes":[{"node_id":"-20","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2020-01-01","type":"Literal","bound_global_parameter":"交易日期"},{"name":"end_date","value":"2022-10-13","type":"Literal","bound_global_parameter":"交易日期"},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":0,"type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"-20"}],"output_ports":[{"name":"data","node_id":"-20"}],"cacheable":true,"seq_num":2,"comment":"股票代码","comment_collapsed":false},{"node_id":"-38","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"#条件一\nbuy_cond_1 = where((close_0>mean(close_0, 20))&(close_0>mean(close_0, 10))&(close_0>mean(close_0, 5)),1,0)\n#条件二\nbuy_cond_2 = where((low_0<mean(close_0, 5))&(low_0<mean(close_0, 10))&(low_0<mean(close_0, 20)), 1, 0)\n#条件三\nbuy_cond_3 = where((close_0>open_0), 1, 0)\n\n#目标变量\ny = shift(close_0,-2)/shift(open_0,-1)\n\n# 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)\nyiziban = where(shift(high_0, -1) == shift(low_0, -1), NaN, y)\n\na0 = return_0\na1 = return_5\na2 = return_10\na3 = return_20\na4 = return_50\na5 = return_120\n\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"-38"}],"output_ports":[{"name":"data","node_id":"-38"}],"cacheable":true,"seq_num":4,"comment":"条件因子\n模型因子\n目标变量y","comment_collapsed":false},{"node_id":"-50","module_id":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":"100","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-50"},{"name":"features","node_id":"-50"}],"output_ports":[{"name":"data","node_id":"-50"}],"cacheable":true,"seq_num":6,"comment":"基础数据集抽取","comment_collapsed":false},{"node_id":"-66","module_id":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","parameters":[{"name":"date_col","value":"date","type":"Literal","bound_global_parameter":null},{"name":"instrument_col","value":"instrument","type":"Literal","bound_global_parameter":null},{"name":"drop_na","value":"False","type":"Literal","bound_global_parameter":null},{"name":"remove_extra_columns","value":"True","type":"Literal","bound_global_parameter":null},{"name":"user_functions","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-66"},{"name":"features","node_id":"-66"}],"output_ports":[{"name":"data","node_id":"-66"}],"cacheable":true,"seq_num":8,"comment":"","comment_collapsed":true},{"node_id":"-6138","module_id":"BigQuantSpace.chinaa_stock_filter.chinaa_stock_filter-v1","parameters":[{"name":"index_constituent_cond","value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22displayValue%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%5D%7D","type":"Literal","bound_global_parameter":null},{"name":"board_cond","value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22displayValue%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%A7%91%E5%88%9B%E6%9D%BF%22%2C%22displayValue%22%3A%22%E7%A7%91%E5%88%9B%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%5D%7D","type":"Literal","bound_global_parameter":null},{"name":"industry_cond","value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22displayValue%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22displayValue%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22displayValue%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22displayValue%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22displayValue%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22displayValue%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22displayValue%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22displayValue%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22displayValue%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22displayValue%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22displayValue%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22displayValue%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22displayValue%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22displayValue%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%87%87%E6%8E%98%22%2C%22displayValue%22%3A%22%E9%87%87%E6%8E%98%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%92%A2%E9%93%81%22%2C%22displayValue%22%3A%22%E9%92%A2%E9%93%81%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22displayValue%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22displayValue%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22selected%22%3Afalse%7D%5D%7D","type":"Literal","bound_global_parameter":null},{"name":"st_cond","value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22displayValue%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22ST%22%2C%22displayValue%22%3A%22ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22*ST%22%2C%22displayValue%22%3A%22*ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22displayValue%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22selected%22%3Afalse%7D%5D%7D","type":"Literal","bound_global_parameter":null},{"name":"delist_cond","value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%80%80%E5%B8%82%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22selected%22%3Atrue%7D%5D%7D","type":"Literal","bound_global_parameter":null},{"name":"output_left_data","value":"False","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-6138"}],"output_ports":[{"name":"data","node_id":"-6138"},{"name":"left_data","node_id":"-6138"}],"cacheable":true,"seq_num":1,"comment":"股票池过滤","comment_collapsed":false},{"node_id":"-1569","module_id":"BigQuantSpace.cached.cached-v3","parameters":[{"name":"run","value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3):\n #获取数据\n df_all = m8.data.read_df()\n df_all = df_all[df_all['date']>'2020-01-01']\n df_all = df_all[df_all['yiziban'].notnull()==True] \n \n #新建dataframe,过滤股票池\n df_all = df_all[(df_all['buy_cond_1']==1)&(df_all['buy_cond_2']==1)&(df_all['buy_cond_2']==1)]\n\n \n data_1 = DataSource.write_df(df_all)\n return Outputs(data_1=data_1, data_2=None, data_3=None)\n","type":"Literal","bound_global_parameter":null},{"name":"post_run","value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs","type":"Literal","bound_global_parameter":null},{"name":"input_ports","value":"","type":"Literal","bound_global_parameter":null},{"name":"params","value":"{}","type":"Literal","bound_global_parameter":null},{"name":"output_ports","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-1569"},{"name":"input_2","node_id":"-1569"},{"name":"input_3","node_id":"-1569"}],"output_ports":[{"name":"data_1","node_id":"-1569"},{"name":"data_2","node_id":"-1569"},{"name":"data_3","node_id":"-1569"}],"cacheable":true,"seq_num":3,"comment":"过滤条件","comment_collapsed":false}],"node_layout":"<node_postions><node_position Node='-20' Position='572.9605102539062,176.20181274414062,200,200'/><node_position Node='-38' Position='991,190,200,200'/><node_position Node='-50' Position='595,339,200,200'/><node_position Node='-66' Position='954,530,200,200'/><node_position Node='-6138' Position='555,460,200,200'/><node_position Node='-1569' Position='746,700,200,200'/></node_postions>"},"nodes_readonly":false,"studio_version":"v2"}
    In [3]:
    # 本代码由可视化策略环境自动生成 2022年11月4日 01:09
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m3_run_bigquant_run(input_1, input_2, input_3):
        #获取数据
        df_all = m8.data.read_df()
        df_all = df_all[df_all['date']>'2020-01-01']
        df_all = df_all[df_all['yiziban'].notnull()==True]   
        
        #新建dataframe,过滤股票池
        df_all = df_all[(df_all['buy_cond_1']==1)&(df_all['buy_cond_2']==1)&(df_all['buy_cond_2']==1)]
    
        
        data_1 = DataSource.write_df(df_all)
        return Outputs(data_1=data_1, data_2=None, data_3=None)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m3_post_run_bigquant_run(outputs):
        return outputs
    
    m2 = M.instruments.v2(
        start_date=T.live_run_param('trading_date', '2020-01-01'),
        end_date=T.live_run_param('trading_date', '2022-10-13'),
        market='CN_STOCK_A',
        instrument_list='',
        max_count=0
    )
    
    m4 = M.input_features.v1(
        features="""#条件一
    buy_cond_1 = where((close_0>mean(close_0, 20))&(close_0>mean(close_0, 10))&(close_0>mean(close_0, 5)),1,0)
    #条件二
    buy_cond_2 = where((low_0<mean(close_0, 5))&(low_0<mean(close_0, 10))&(low_0<mean(close_0, 20)), 1, 0)
    #条件三
    buy_cond_3 = where((close_0>open_0), 1, 0)
    
    #目标变量
    y = shift(close_0,-2)/shift(open_0,-1)
    
    # 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)
    yiziban = where(shift(high_0, -1) == shift(low_0, -1), NaN, y)
    
    a0 = return_0
    a1 = return_5
    a2 = return_10
    a3 = return_20
    a4 = return_50
    a5 = return_120
    
    """
    )
    
    m6 = M.general_feature_extractor.v7(
        instruments=m2.data,
        features=m4.data,
        start_date='',
        end_date='',
        before_start_days=100
    )
    
    m1 = M.chinaa_stock_filter.v1(
        input_data=m6.data,
        index_constituent_cond=['全部'],
        board_cond=['深证主板'],
        industry_cond=['全部'],
        st_cond=['正常'],
        delist_cond=['非退市'],
        output_left_data=False
    )
    
    m8 = M.derived_feature_extractor.v3(
        input_data=m1.data,
        features=m4.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=True
    )
    
    m3 = M.cached.v3(
        input_1=m8.data,
        run=m3_run_bigquant_run,
        post_run=m3_post_run_bigquant_run,
        input_ports='',
        params='{}',
        output_ports=''
    )
    
    In [4]:
    import matplotlib.pyplot as plt
    
    In [5]:
    alpha_list = ['a0','a1','a2','a3','a4','a5']
    
    In [6]:
    #-----------------------------------------因子溢价模块--------------------------------------------
    def factor_yijia(factor):
        #过滤缺失值
        df_all = m3.data_1.read_df()
        df_all = df_all[['date','instrument','y',factor]]
        df_all = df_all.dropna() 
        
        #时间,分位表,按时间序列分析
        time_fen_return = pd.DataFrame(columns = ['date','yi','er','san','si','wu'])    
    
        index = 0
        #按天groupby
        for date,part_df in df_all.groupby(['date']):
            #过滤当天数据量少于10的情况
            if len(part_df)>10:
                time_fen_return.loc[index,'date'] = date
    
                #按因子排序
                part_df = part_df.sort_values([factor],ascending = False).reset_index(drop = True)
    
                #第一分位的平均收益
                df_yi = part_df[:len(part_df)//5]
                time_fen_return.loc[index,'yi'] = df_yi['y'].mean()
    
                #第二分位的平均收益
                df_er = part_df[len(part_df)//5:len(part_df)//5*2]
                time_fen_return.loc[index,'er'] = df_er['y'].mean()    
    
                #第三分位的平均收益
                df_san = part_df[len(part_df)//5*2:len(part_df)//5*3]
                time_fen_return.loc[index,'san'] = df_san['y'].mean()   
    
                #第四分位的平均收益
                df_si = part_df[len(part_df)//5*3:len(part_df)//5*4]
                time_fen_return.loc[index,'si'] = df_si['y'].mean()       
    
                #第五分位的平均收益
                df_wu = part_df[len(part_df)//5*4:]
                time_fen_return.loc[index,'wu'] = df_wu['y'].mean() 
    
                index+=1
        #print('因子{}的第一分位平均收益为{}'.format(factor,time_fen_return['yi'].mean()))
        #print('因子{}的第二分位平均收益为{}'.format(factor,time_fen_return['er'].mean()))
        #print('因子{}的第三分位平均收益为{}'.format(factor,time_fen_return['san'].mean()))
        #print('因子{}的第四分位平均收益为{}'.format(factor,time_fen_return['si'].mean()))
        #print('因子{}的第五分位平均收益为{}'.format(factor,time_fen_return['wu'].mean()))    
        return time_fen_return
    #-----------------------------------------因子溢价模块--------------------------------------------
    
    In [7]:
    #------------------------------------因子溢价累计收益计算模块-----------------------------
    def leiji(df,columns):
        for i in range(len(columns)):
            column = columns[i]
            for i in range(len(df)):
                if i == 0:
                    df.loc[i,str(column)+'_leiji'] = df.loc[i,column]
                else:
                    df.loc[i,str(column)+'_leiji'] = df.loc[i-1,str(column)+'_leiji']*(((df.loc[i,column]-1)/2)+1)
        return df
    #------------------------------------因子溢价累计收益计算模块-----------------------------
    
    In [8]:
    #----------------------------------因子溢价-分位点收益画图-----------------------
    def factor_yijia_draw(df):
        #图片大小
        plt.figure(figsize=(15, 10))
    
        #画图数据
        plt.plot(df['yi_leiji'], label='one_quantile')
        plt.plot(df['er_leiji'],  label='two_quantile')
        plt.plot(df['san_leiji'], label='three_quantile')
        plt.plot(df['si_leiji'],  label='four_quantile')
        plt.plot(df['wu_leiji'],  label='five_quantile')
    
        #坐标轴单位
        plt.legend(loc="best")
        plt.xlabel('days', fontsize=14)
        plt.ylabel('multi_return', fontsize=14)
        
        plt.show()
    
    #----------------------------------因子溢价-分位点收益画图-----------------------
    
    In [9]:
    df = factor_yijia('a0')
    df
    
    Out[9]:
    date yi er san si wu
    0 2020-01-02 00:00:00 1.011113 1.030144 1.019439 1.017565 1.011547
    1 2020-01-03 00:00:00 1.019047 1.019224 1.034929 1.01979 1.035426
    2 2020-01-06 00:00:00 1.005552 0.99286 1.002439 0.991327 0.99283
    3 2020-01-07 00:00:00 0.992964 1.007735 1.012154 0.99721 1.004803
    4 2020-01-09 00:00:00 1.016706 1.025026 1.03138 1.014947 1.017215
    ... ... ... ... ... ... ...
    643 2022-09-28 00:00:00 0.975193 1.025272 0.898041 0.981002 0.978734
    644 2022-09-29 00:00:00 0.97575 0.888277 1.0033 1.011483 0.998352
    645 2022-09-30 00:00:00 0.985998 1.005844 0.957408 1.01196 0.99445
    646 2022-10-10 00:00:00 1.050612 1.039995 1.031628 1.025689 0.990091
    647 2022-10-11 00:00:00 1.056453 1.034522 1.040476 1.0264 1.020437

    648 rows × 6 columns

    In [10]:
    leiji(factor_yijia('a0'),columns = ['yi','er','san','si','wu'])
    
    Out[10]:
    date yi er san si wu yi_leiji er_leiji san_leiji si_leiji wu_leiji
    0 2020-01-02 00:00:00 1.011113 1.030144 1.019439 1.017565 1.011547 1.011113 1.030144 1.019439 1.017565 1.011547
    1 2020-01-03 00:00:00 1.019047 1.019224 1.034929 1.01979 1.035426 1.020742 1.040046 1.037242 1.027634 1.029464
    2 2020-01-06 00:00:00 1.005552 0.99286 1.002439 0.991327 0.99283 1.023576 1.036334 1.038507 1.023177 1.025773
    3 2020-01-07 00:00:00 0.992964 1.007735 1.012154 0.99721 1.004803 1.019975 1.040341 1.044819 1.021750 1.028237
    4 2020-01-09 00:00:00 1.016706 1.025026 1.03138 1.014947 1.017215 1.028495 1.053359 1.061212 1.029386 1.037087
    ... ... ... ... ... ... ... ... ... ... ... ...
    643 2022-09-28 00:00:00 0.975193 1.025272 0.898041 0.981002 0.978734 1.383248 2.572206 1.633900 1.864543 2.206029
    644 2022-09-29 00:00:00 0.97575 0.888277 1.0033 1.011483 0.998352 1.366476 2.428518 1.636595 1.875248 2.204212
    645 2022-09-30 00:00:00 0.985998 1.005844 0.957408 1.01196 0.99445 1.356909 2.435615 1.601743 1.886462 2.198095
    646 2022-10-10 00:00:00 1.050612 1.039995 1.031628 1.025689 0.990091 1.391247 2.484321 1.627073 1.910693 2.187205
    647 2022-10-11 00:00:00 1.056453 1.034522 1.040476 1.0264 1.020437 1.430517 2.527203 1.660002 1.935914 2.209555

    648 rows × 11 columns

    In [11]:
    factor_yijia_draw(leiji(factor_yijia('a0'),columns = ['yi','er','san','si','wu']))
    
    In [12]:
    factor_yijia_draw(leiji(factor_yijia('a1'),columns = ['yi','er','san','si','wu']))
    
    In [13]:
    factor_yijia_draw(leiji(factor_yijia('a2'),columns = ['yi','er','san','si','wu']))
    
    In [14]:
    factor_yijia_draw(leiji(factor_yijia('a3'),columns = ['yi','er','san','si','wu']))
    
    In [15]:
    factor_yijia_draw(leiji(factor_yijia('a4'),columns = ['yi','er','san','si','wu']))
    
    In [16]:
    factor_yijia_draw(leiji(factor_yijia('a5'),columns = ['yi','er','san','si','wu']))
    
    In [17]:
    #得到结论
    #a0,a1,a2,a3的区分度要好于a4,a5
    
    In [18]:
    df = D.history_data(instruments=['000001.HIX'], start_date='2020-01-01', end_date='2022-11-02', fields=['close'])
    df.dtypes
    
    Out[18]:
    instrument            object
    close                float32
    date          datetime64[ns]
    dtype: object
    In [19]:
    #--------------------------------------------因子溢价修正-------------------------------
    def factor_yijia_cor(factor):
        #过滤缺失值
        df_all = m3.data_1.read_df()
        df_all = df_all[['date','instrument','y',factor]]
        df_all = df_all.dropna()
        df_all = df_all.sort_values([factor],ascending = False).reset_index(drop = True)
    
        fen_return = pd.DataFrame(columns = ['date','yi','er','san','si','wu'])    
        
        #按因子大小划分数据集
        df_yi = df_all[:len(df_all)//5]
        df_er = df_all[len(df_all)//5:len(df_all)//5*2]
        df_san = df_all[len(df_all)//5*2:len(df_all)//5*3]
        df_si = df_all[len(df_all)//5*3:len(df_all)//5*4]
        df_wu = df_all[len(df_all)//5*4:]    
        
        #输入指数表(时间)
        index_df = D.history_data(instruments=['000001.HIX'], start_date='2020-01-01', end_date='2022-10-11', fields=['close'])    
        
        #如果当天选中的分位没有数据,设为1
        def len_df_return(df):
            if len(df)==0:
                return 1
            else:
                return df['y'].mean()
        
        #输入每个分数每天的平均收益
        for i in range(len(index_df)):
            date = index_df.loc[i,'date']
            fen_return.loc[i,'date'] = date
    
            fen_return.loc[i,'yi'] = len_df_return(df_yi[df_yi['date']==date])
            fen_return.loc[i,'er'] = len_df_return(df_er[df_er['date']==date])
            fen_return.loc[i,'san'] = len_df_return(df_san[df_san['date']==date])
            fen_return.loc[i,'si'] = len_df_return(df_si[df_si['date']==date])
            fen_return.loc[i,'wu'] = len_df_return(df_wu[df_wu['date']==date])
    
        return fen_return    
    #--------------------------------------------因子溢价修正-------------------------------    
    
    In [20]:
    factor_yijia_draw(leiji(factor_yijia_cor('a0'),columns = ['yi','er','san','si','wu']))
    
    In [21]:
    factor_yijia_draw(leiji(factor_yijia_cor('a1'),columns = ['yi','er','san','si','wu']))
    
    In [22]:
    factor_yijia_draw(leiji(factor_yijia_cor('a2'),columns = ['yi','er','san','si','wu']))
    
    In [23]:
    factor_yijia_draw(leiji(factor_yijia_cor('a3'),columns = ['yi','er','san','si','wu']))
    
    In [24]:
    factor_yijia_draw(leiji(factor_yijia_cor('a4'),columns = ['yi','er','san','si','wu']))
    
    In [25]:
    factor_yijia_draw(leiji(factor_yijia_cor('a5'),columns = ['yi','er','san','si','wu']))
    
    In [ ]: