早上起床发现列表式输入特征不能用了

策略分享
标签: #<Tag:0x00007fc4c0da45a8>

(a1641181638) #1
克隆策略
In [148]:
a = [1,2,3]
print((a[0]))
1
In [149]:
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from sklearn import linear_model
from matplotlib import rc
rc('mathtext', default='regular')
import seaborn as sns
sns.set_style('white')
from matplotlib import dates
import numpy as np
import pandas as pd
import statsmodels.api as sm
import time
import time
import gc
import datetime as DT
import scipy.stats as st
from math import sqrt
time_start = time.time()

    {"Description":"实验创建于2017/8/26","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"-2432:input_1","SourceOutputPortId":"-6126:feature_list"}],"ModuleNodes":[{"Id":"-3911","ModuleId":"BigQuantSpace.csv_read.csv_read-v3","ModuleParameters":[{"Name":"csv_path","Value":"因子批量测试结果.csv","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"encoding","Value":"utf-8","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[],"OutputPortsInternal":[{"Name":"data","NodeId":"-3911","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":1,"Comment":"","CommentCollapsed":true},{"Id":"-6126","ModuleId":"BigQuantSpace.list_input_feature.list_input_feature-v7","ModuleParameters":[{"Name":"feature_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_features","NodeId":"-6126"}],"OutputPortsInternal":[{"Name":"feature_list","NodeId":"-6126","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":3,"Comment":"","CommentCollapsed":true},{"Id":"-2422","ModuleId":"BigQuantSpace.hyper_run.hyper_run-v1","ModuleParameters":[{"Name":"run","Value":"def bigquant_run(bq_graph, inputs):\n features =[\n\"pb_lf_0\", \n ]\n#\"pe_lyr_0\",\n#\"pe_ttm_0\",\n#\"ps_ttm_0\", \n parameters_list = []\n for feature in features:\n feature = [feature]\n feature.append('return_5')\n parameters = {'m3.feature_list':feature}\n parameters_list.append({'parameters': parameters})\n \n def run(parameters):\n try:\n print(parameters)\n return g.run(parameters)\n except Exception as e:\n print('ERROR --------', e)\n return None\n \n results = T.parallel_map(run, parameters_list, max_workers=1, remote_run=False, silent=False)\n\n return results\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"run_now","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"bq_graph","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"bq_graph_port","NodeId":"-2422"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-2422"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-2422"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-2422"}],"OutputPortsInternal":[{"Name":"result","NodeId":"-2422","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":4,"Comment":"","CommentCollapsed":true},{"Id":"-2432","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\n#--------------------------------------------------------------------------------------------------\n# 去极值函数\n\ndef winsorize_series(se):\n q = se.quantile([0.025, 0.975])\n if isinstance(q, pd.Series) and len(q) == 2:\n se[se < q.iloc[0]] = q.iloc[0]\n se[se > q.iloc[1]] = q.iloc[1]\n return se\n\n# 皮尔逊相关系数函数(IC)\ndef multiply(a,b):\n sum_ab=0.0\n for i in range(len(a)):\n temp=a[i]*b[i]\n sum_ab+=temp\n return sum_ab\n\ndef cal_pearson(x,y):\n try:\n n=len(x)\n sum_x=sum(x)\n sum_y=sum(y)\n sum_xy=multiply(x,y)\n sum_x2 = sum([pow(i,2) for i in x])\n sum_y2 = sum([pow(j,2) for j in y])\n molecular=sum_xy-(float(sum_x)*float(sum_y)/n)\n denominator=sqrt((sum_x2-float(sum_x**2)/n)*(sum_y2-float(sum_y**2)/n)) \n return molecular/denominator\n except ZeroDivisionError:\n print (\"Error:ZeroDivisionError\") \n#----------------------------------------------------------------------------------------\n\ndef bigquant_run(input_1, input_2, input_3):\n # 示例代码如下。在这里编写您的代码\n start_date = '2017-04-30'\n end_date = '2018-04-30'\n rebalance_period = 5 \n \n m1 = M.instruments.v2(\n start_date=start_date,\n end_date=end_date,\n market='CN_STOCK_A',\n instrument_list='',\n max_count=0\n )\n\n data_1 = input_1\n fac =data_1.read_pickle() \n print(fac)\n \n \n\n m2 = M.use_datasource.v1(\n features= data_1,\n instruments = m1.data,\n datasource_id='instruments_CN_STOCK_A',\n start_date='',\n end_date=''\n )\n\n m5 = M.dropnan.v1(\n input_data=m2.data\n )\n\n\n m6 = M.chinaa_stock_filter.v1(\n input_data=m5.data,\n index_constituent_cond=['中证800'],\n board_cond=['全部'],\n industry_cond=['全部'],\n st_cond=['正常'],\n delist_cond=['非退市'],\n output_left_data=False\n )\n\n df = m6.data.read_df()\n instruments = list(set(df.instrument))\n\n features_data = D.features(instruments=instruments, start_date=start_date, end_date=end_date, fields=fac)\n\n mydata = features_data.set_index(['date','instrument'])[fac].unstack() #unstack函数使得表格变成花括号\n \n #--------------------------------------------------------------------------------------------------------------------\n print(\"开始计算各因子IC\")\n\n for col in mydata['return_5'].columns:\n mydata['return_5'][col] = mydata['return_5'][col].shift(-rebalance_period)\n data_ret = mydata['return_5']\n\n group_fac = []\n\n data_tmp = mydata[str(fac[0])]\n\n for col in data_ret.T.columns[:-rebalance_period]:\n tmp_df = pd.DataFrame(index=data_ret.T.index)\n tmp_df['fac'] = data_tmp.T[col]\n tmp_df['ret'] = data_ret.T[col]\n tmp_df = tmp_df.dropna()\n coff = cal_pearson(tmp_df['fac'],tmp_df['ret'])\n dict_temp = {'feature':fac,'date':col,'coff':coff}\n group_fac.append(dict_temp)\n\n _ic = pd.DataFrame(group_fac,columns=['fac[0]','date','coff'])\n print('_ic')\n print(_ic)\n\n ic_temp = _ic.set_index(['date','fac[0]']).unstack()\n print('ic_temp')\n print(ic_temp)\n ic_df = ic_temp['coff'] \n print('ic_df')\n print(ic_df)\n ic_df.head()\n ic_df.plot(figsize=(12,6), title='Factor weight using sample covariance')\n #---------------------------------------------以上是借鉴的前半部分,获取了相关性data-----------------------------------------------------\n ic_df = ic_df.mean()\n ic_df.to_csv('第5期IC计算结果.csv',header=True,mode='w')\n #-------------------------------------------------------------------------------------------------------------------------------------\n return Outputs(data_1=None, data_2=None, data_3=None) \n #-----------------------------------------------------------------------------------------------------------------------------------------\n\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-2432"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-2432"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-2432"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-2432","OutputType":null},{"Name":"data_2","NodeId":"-2432","OutputType":null},{"Name":"data_3","NodeId":"-2432","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":2,"Comment":"","CommentCollapsed":true}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='-3911' Position='864,-207,200,200'/><NodePosition Node='-6126' Position='378,-185,200,200'/><NodePosition Node='-2422' Position='-25.294898986816406,-275.0531005859375,200,200'/><NodePosition Node='-2432' Position='413.6998291015625,16.76612091064453,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
    In [150]:
    # 本代码由可视化策略环境自动生成 2020年3月15日 23:18
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    #--------------------------------------------------------------------------------------------------
    # 去极值函数
    
    def winsorize_series(se):
        q = se.quantile([0.025, 0.975])
        if isinstance(q, pd.Series) and len(q) == 2:
            se[se < q.iloc[0]] = q.iloc[0]
            se[se > q.iloc[1]] = q.iloc[1]
        return se
    
    # 皮尔逊相关系数函数(IC)
    def multiply(a,b):
        sum_ab=0.0
        for i in range(len(a)):
            temp=a[i]*b[i]
            sum_ab+=temp
        return sum_ab
    
    def cal_pearson(x,y):
        try:
            n=len(x)
            sum_x=sum(x)
            sum_y=sum(y)
            sum_xy=multiply(x,y)
            sum_x2 = sum([pow(i,2) for i in x])
            sum_y2 = sum([pow(j,2) for j in y])
            molecular=sum_xy-(float(sum_x)*float(sum_y)/n)
            denominator=sqrt((sum_x2-float(sum_x**2)/n)*(sum_y2-float(sum_y**2)/n)) 
            return molecular/denominator
        except ZeroDivisionError:
            print ("Error:ZeroDivisionError")     
    #----------------------------------------------------------------------------------------
    
    def m2_run_bigquant_run(input_1, input_2, input_3):
        # 示例代码如下。在这里编写您的代码
        start_date = '2017-04-30'
        end_date = '2018-04-30'
        rebalance_period = 5  
     
        m1 = M.instruments.v2(
            start_date=start_date,
            end_date=end_date,
            market='CN_STOCK_A',
            instrument_list='',
            max_count=0
        )
    
        data_1 = input_1
        fac =data_1.read_pickle()    
        print(fac)
         
        
    
        m2 = M.use_datasource.v1(
            features= data_1,
            instruments = m1.data,
            datasource_id='instruments_CN_STOCK_A',
            start_date='',
            end_date=''
        )
    
        m5 = M.dropnan.v1(
            input_data=m2.data
        )
    
    
        m6 = M.chinaa_stock_filter.v1(
            input_data=m5.data,
            index_constituent_cond=['中证800'],
            board_cond=['全部'],
            industry_cond=['全部'],
            st_cond=['正常'],
            delist_cond=['非退市'],
            output_left_data=False
        )
    
        df = m6.data.read_df()
        instruments = list(set(df.instrument))
    
        features_data = D.features(instruments=instruments, start_date=start_date, end_date=end_date, fields=fac)
    
        mydata = features_data.set_index(['date','instrument'])[fac].unstack()    #unstack函数使得表格变成花括号
     
        #--------------------------------------------------------------------------------------------------------------------
        print("开始计算各因子IC")
    
        for col in mydata['return_5'].columns:
            mydata['return_5'][col] = mydata['return_5'][col].shift(-rebalance_period)
        data_ret = mydata['return_5']
    
        group_fac = []
    
        data_tmp = mydata[str(fac[0])]
    
        for col in data_ret.T.columns[:-rebalance_period]:
            tmp_df = pd.DataFrame(index=data_ret.T.index)
            tmp_df['fac'] = data_tmp.T[col]
            tmp_df['ret'] = data_ret.T[col]
            tmp_df = tmp_df.dropna()
            coff = cal_pearson(tmp_df['fac'],tmp_df['ret'])
            dict_temp = {'feature':fac,'date':col,'coff':coff}
            group_fac.append(dict_temp)
    
        _ic = pd.DataFrame(group_fac,columns=['fac[0]','date','coff'])
        print('_ic')
        print(_ic)
    
        ic_temp = _ic.set_index(['date','fac[0]']).unstack()
        print('ic_temp')
        print(ic_temp)
        ic_df = ic_temp['coff']  
        print('ic_df')
        print(ic_df)
        ic_df.head()
        ic_df.plot(figsize=(12,6), title='Factor weight using sample covariance')
        #---------------------------------------------以上是借鉴的前半部分,获取了相关性data-----------------------------------------------------
        ic_df = ic_df.mean()
        ic_df.to_csv('第5期IC计算结果.csv',header=True,mode='w')
        #-------------------------------------------------------------------------------------------------------------------------------------
        return Outputs(data_1=None, data_2=None, data_3=None) 
        #-----------------------------------------------------------------------------------------------------------------------------------------
    
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m2_post_run_bigquant_run(outputs):
        return outputs
    
    
    g = T.Graph({
    
        'm1': 'M.csv_read.v3',
        'm1.csv_path': '因子批量测试结果.csv',
        'm1.encoding': 'utf-8',
    
        'm3': 'M.list_input_feature.v7',
    
        'm2': 'M.cached.v3',
        'm2.input_1': T.Graph.OutputPort('m3.feature_list'),
        'm2.run': m2_run_bigquant_run,
        'm2.post_run': m2_post_run_bigquant_run,
        'm2.input_ports': '',
        'm2.params': '{}',
        'm2.output_ports': '',
    })
    
    # g.run({})
    
    
    def m4_run_bigquant_run(bq_graph, inputs):
        features =[
    "pb_lf_0",   
        ]
    #"pe_lyr_0",
    #"pe_ttm_0",
    #"ps_ttm_0", 
        parameters_list = []
        for feature in features:
            feature = [feature]
            feature.append('return_5')
            parameters = {'m3.feature_list':feature}
            parameters_list.append({'parameters': parameters})
        
        def run(parameters):
            try:
                print(parameters)
                return g.run(parameters)
            except Exception as e:
                print('ERROR --------', e)
                return None
     
        results = T.parallel_map(run, parameters_list, max_workers=1, remote_run=False, silent=False)
    
        return results
    
    
    m4 = M.hyper_run.v1(
        run=m4_run_bigquant_run,
        run_now=True,
        bq_graph=g
    )
    
    [2020-03-15 23:15:11.742820] INFO: bigquant: T.parallel_map  开始并行运算..
    {'m3.feature_list': ['pb_lf_0', 'return_5']}
    
    并行运算进度:   0%|          | 0/1 [00:00<?, ?it/s]
    ['pb_lf_0', 'return_5']
    
    开始计算各因子IC
    _ic
         fac[0]       date      coff
    0       NaN 2017-05-02  0.024558
    1       NaN 2017-05-03  0.009843
    2       NaN 2017-05-04  0.022725
    3       NaN 2017-05-05  0.004705
    4       NaN 2017-05-08  0.019671
    5       NaN 2017-05-09  0.104503
    6       NaN 2017-05-10  0.079691
    7       NaN 2017-05-11  0.048184
    8       NaN 2017-05-12  0.006039
    9       NaN 2017-05-15  0.045294
    10      NaN 2017-05-16  0.028977
    11      NaN 2017-05-17 -0.022414
    12      NaN 2017-05-18 -0.130841
    13      NaN 2017-05-19 -0.129144
    14      NaN 2017-05-22 -0.143752
    15      NaN 2017-05-23 -0.122669
    16      NaN 2017-05-24 -0.139408
    17      NaN 2017-05-25 -0.140194
    18      NaN 2017-05-26  0.041672
    19      NaN 2017-05-31  0.098105
    20      NaN 2017-06-01  0.200046
    21      NaN 2017-06-02  0.193505
    22      NaN 2017-06-05  0.185196
    23      NaN 2017-06-06  0.153365
    24      NaN 2017-06-07  0.119923
    25      NaN 2017-06-08  0.075213
    26      NaN 2017-06-09  0.075527
    27      NaN 2017-06-12  0.020675
    28      NaN 2017-06-13  0.037539
    29      NaN 2017-06-14  0.120395
    ..      ...        ...       ...
    209     NaN 2018-03-08  0.122260
    210     NaN 2018-03-09  0.049291
    211     NaN 2018-03-12  0.092691
    212     NaN 2018-03-13  0.087926
    213     NaN 2018-03-14  0.067228
    214     NaN 2018-03-15  0.012313
    215     NaN 2018-03-16 -0.014187
    216     NaN 2018-03-19  0.161023
    217     NaN 2018-03-20  0.157883
    218     NaN 2018-03-21  0.098924
    219     NaN 2018-03-22  0.050728
    220     NaN 2018-03-23  0.193220
    221     NaN 2018-03-26  0.147805
    222     NaN 2018-03-27  0.056463
    223     NaN 2018-03-28 -0.028389
    224     NaN 2018-03-29  0.012037
    225     NaN 2018-03-30  0.086225
    226     NaN 2018-04-02 -0.021085
    227     NaN 2018-04-03 -0.002664
    228     NaN 2018-04-04 -0.041129
    229     NaN 2018-04-09  0.147616
    230     NaN 2018-04-10 -0.020813
    231     NaN 2018-04-11  0.053728
    232     NaN 2018-04-12 -0.028370
    233     NaN 2018-04-13  0.067777
    234     NaN 2018-04-16  0.025527
    235     NaN 2018-04-17 -0.034035
    236     NaN 2018-04-18  0.172682
    237     NaN 2018-04-19  0.108538
    238     NaN 2018-04-20  0.105967
    
    [239 rows x 3 columns]
    ic_temp
                    coff
    fac[0]           NaN
    date                
    2017-05-02  0.024558
    2017-05-03  0.009843
    2017-05-04  0.022725
    2017-05-05  0.004705
    2017-05-08  0.019671
    2017-05-09  0.104503
    2017-05-10  0.079691
    2017-05-11  0.048184
    2017-05-12  0.006039
    2017-05-15  0.045294
    2017-05-16  0.028977
    2017-05-17 -0.022414
    2017-05-18 -0.130841
    2017-05-19 -0.129144
    2017-05-22 -0.143752
    2017-05-23 -0.122669
    2017-05-24 -0.139408
    2017-05-25 -0.140194
    2017-05-26  0.041672
    2017-05-31  0.098105
    2017-06-01  0.200046
    2017-06-02  0.193505
    2017-06-05  0.185196
    2017-06-06  0.153365
    2017-06-07  0.119923
    2017-06-08  0.075213
    2017-06-09  0.075527
    2017-06-12  0.020675
    2017-06-13  0.037539
    2017-06-14  0.120395
    ...              ...
    2018-03-08  0.122260
    2018-03-09  0.049291
    2018-03-12  0.092691
    2018-03-13  0.087926
    2018-03-14  0.067228
    2018-03-15  0.012313
    2018-03-16 -0.014187
    2018-03-19  0.161023
    2018-03-20  0.157883
    2018-03-21  0.098924
    2018-03-22  0.050728
    2018-03-23  0.193220
    2018-03-26  0.147805
    2018-03-27  0.056463
    2018-03-28 -0.028389
    2018-03-29  0.012037
    2018-03-30  0.086225
    2018-04-02 -0.021085
    2018-04-03 -0.002664
    2018-04-04 -0.041129
    2018-04-09  0.147616
    2018-04-10 -0.020813
    2018-04-11  0.053728
    2018-04-12 -0.028370
    2018-04-13  0.067777
    2018-04-16  0.025527
    2018-04-17 -0.034035
    2018-04-18  0.172682
    2018-04-19  0.108538
    2018-04-20  0.105967
    
    [239 rows x 1 columns]
    ic_df
                    coff
    fac[0]           NaN
    date                
    2017-05-02  0.024558
    2017-05-03  0.009843
    2017-05-04  0.022725
    2017-05-05  0.004705
    2017-05-08  0.019671
    2017-05-09  0.104503
    2017-05-10  0.079691
    2017-05-11  0.048184
    2017-05-12  0.006039
    2017-05-15  0.045294
    2017-05-16  0.028977
    2017-05-17 -0.022414
    2017-05-18 -0.130841
    2017-05-19 -0.129144
    2017-05-22 -0.143752
    2017-05-23 -0.122669
    2017-05-24 -0.139408
    2017-05-25 -0.140194
    2017-05-26  0.041672
    2017-05-31  0.098105
    2017-06-01  0.200046
    2017-06-02  0.193505
    2017-06-05  0.185196
    2017-06-06  0.153365
    2017-06-07  0.119923
    2017-06-08  0.075213
    2017-06-09  0.075527
    2017-06-12  0.020675
    2017-06-13  0.037539
    2017-06-14  0.120395
    ...              ...
    2018-03-08  0.122260
    2018-03-09  0.049291
    2018-03-12  0.092691
    2018-03-13  0.087926
    2018-03-14  0.067228
    2018-03-15  0.012313
    2018-03-16 -0.014187
    2018-03-19  0.161023
    2018-03-20  0.157883
    2018-03-21  0.098924
    2018-03-22  0.050728
    2018-03-23  0.193220
    2018-03-26  0.147805
    2018-03-27  0.056463
    2018-03-28 -0.028389
    2018-03-29  0.012037
    2018-03-30  0.086225
    2018-04-02 -0.021085
    2018-04-03 -0.002664
    2018-04-04 -0.041129
    2018-04-09  0.147616
    2018-04-10 -0.020813
    2018-04-11  0.053728
    2018-04-12 -0.028370
    2018-04-13  0.067777
    2018-04-16  0.025527
    2018-04-17 -0.034035
    2018-04-18  0.172682
    2018-04-19  0.108538
    2018-04-20  0.105967
    
    [239 rows x 1 columns]
    
    并行运算进度: 100%|██████████| 1/1 [01:53<00:00, 113.57s/it]

    (达达) #2

    这个模块是用户编写的,缺少输出,您可以自行编写模块重新封装。