克隆策略

    {"Description":"实验创建于14/11/2018","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"-2161:instruments","SourceOutputPortId":"-5:data"},{"DestinationInputPortId":"-3722:instruments","SourceOutputPortId":"-5:data"},{"DestinationInputPortId":"-3740:instruments","SourceOutputPortId":"-5:data"},{"DestinationInputPortId":"-979:input_1","SourceOutputPortId":"-5:data"},{"DestinationInputPortId":"-2161:features","SourceOutputPortId":"-717:data"},{"DestinationInputPortId":"-2893:input_1","SourceOutputPortId":"-2161:data"},{"DestinationInputPortId":"-3701:input_2","SourceOutputPortId":"-2893:data_1"},{"DestinationInputPortId":"-3722:features","SourceOutputPortId":"-3299:data"},{"DestinationInputPortId":"-3729:features","SourceOutputPortId":"-3299:data"},{"DestinationInputPortId":"-3701:input_3","SourceOutputPortId":"-3299:data"},{"DestinationInputPortId":"-872:input_2","SourceOutputPortId":"-3299:data"},{"DestinationInputPortId":"-4193:input_1","SourceOutputPortId":"-3701:data_1"},{"DestinationInputPortId":"-3729:input_data","SourceOutputPortId":"-3722:data"},{"DestinationInputPortId":"-872:input_1","SourceOutputPortId":"-3729:data"},{"DestinationInputPortId":"-3740:features","SourceOutputPortId":"-3735:data"},{"DestinationInputPortId":"-3747:features","SourceOutputPortId":"-3735:data"},{"DestinationInputPortId":"-3747:input_data","SourceOutputPortId":"-3740:data"},{"DestinationInputPortId":"-4193:input_2","SourceOutputPortId":"-3747:data"},{"DestinationInputPortId":"-126:input_1","SourceOutputPortId":"-4193:data_1"},{"DestinationInputPortId":"-988:data2","SourceOutputPortId":"-872:data_1"},{"DestinationInputPortId":"-988:data1","SourceOutputPortId":"-979:data_1"},{"DestinationInputPortId":"-3701:input_1","SourceOutputPortId":"-988:data"}],"ModuleNodes":[{"Id":"-5","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2010-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2018-09-30","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":0,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"-5"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-5","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":1,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-717","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"mean(amount_0,66)","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-717"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-717","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":2,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-2161","ModuleId":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_start_days","Value":"0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-2161"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-2161"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-2161","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":3,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-2893","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3, topN):\n # 示例代码如下。在这里编写您的代码\n amount_df = input_1.read_df()\n universe_dic = amount_df.groupby('date').apply(lambda df: df.sort_values('amount_0', ascending=False)[:topN].instrument.tolist()).to_dict()\n \n return Outputs(data_1=DataSource().write_pickle(universe_dic))\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{'topN':2000}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-2893"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-2893"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-2893"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-2893","OutputType":null},{"Name":"data_2","NodeId":"-2893","OutputType":null},{"Name":"data_3","NodeId":"-2893","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":4,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-3299","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"-1*market_cap_0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-3299"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-3299","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":6,"IsPartOfPartialRun":null,"Comment":"alpha因子","CommentCollapsed":false},{"Id":"-3701","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3, max_stock_weight, neuralized_type):\n # 示例代码如下。在这里编写您的代码\n\n df = input_1.read_df()\n factor = list(set(input_3.read_pickle()).difference(['end_date', 'instruments', 'start_date']))[0]\n pvt = df.pivot(index='date', columns='instrument', values=factor)\n universe_dic = input_2.read_pickle()\n all_dates = sorted(list(universe_dic.keys()))\n weights = {}\n for date in all_dates:\n alpha = pvt.loc[date, universe_dic[date]]\n if neuralized_type == 'market':\n # 市场中性化\n alpha = alpha - alpha.mean()\n elif neuralized_type == 'industry':\n # 行业中性化\n group_mean = df[df.date == date].groupby('industry_code', as_index=False).mean().rename(columns={factor:'group_mean'})\n tmp = df[df.date == date].merge(group_mean, how='left', on='industry_code')\n tmp[factor] = tmp[factor]- tmp['group_mean']\n alpha = tmp.set_index('instrument')[factor].loc[universe_dic[date]]\n\n alpha_weight = alpha / alpha.abs().sum()\n alpha_weight = alpha_weight.clip(-max_stock_weight, max_stock_weight) # 权重截断处理\n alpha_weight = alpha_weight / alpha_weight.abs().sum()\n weights[date] = alpha_weight\n \n \n \n ds = DataSource().write_pickle(weights)\n return Outputs(data_1=ds, data_2=None, data_3=None)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{'max_stock_weight': 0.1,\n'neuralized_type': 'industry'}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-3701"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-3701"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-3701"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-3701","OutputType":null},{"Name":"data_2","NodeId":"-3701","OutputType":null},{"Name":"data_3","NodeId":"-3701","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":8,"IsPartOfPartialRun":null,"Comment":"AlphaWeight市场中性化","CommentCollapsed":false},{"Id":"-3722","ModuleId":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_start_days","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-3722"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-3722"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-3722","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":10,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-3729","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-3729"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-3729"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-3729","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":11,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-3735","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"close_0/open_0-1\nclose_0/close_1-1\n","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-3735"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-3735","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":12,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-3740","ModuleId":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_start_days","Value":"0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-3740"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-3740"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-3740","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":13,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-3747","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-3747"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-3747"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-3747","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":14,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-4193","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3):\n # 示例代码如下。在这里编写您的代码\n alpha_weights = input_1.read_pickle()\n ret_df = input_2.read_df()\n ret0_df = ret_df.pivot(index='date', columns='instrument', values='close_0/close_1-1')\n ret1_df = ret_df.pivot(index='date', columns='instrument', values='close_0/open_0-1')\n alpha0, alpha1, alpha2 = {}, {}, {}\n all_dates = sorted(alpha_weights.keys())\n last_date = None\n w_prev = None\n for date in all_dates:\n #Alpha0: 权重是当天因子值,收益:Close/Open -1 \n #Alpha1: 权重是前一天因子值,收益:Close/shift(Close, 1) -1 \n #Alpha2:权重是前一天因子值,收益:Close/Open -1 \n #根据统计,市场平均情况下次日低开概率较大,这个导致了alpha1的收益会更低\n w = alpha_weights[date]\n alpha0[date] = (ret1_df.loc[date, w.index]*w).sum() \n alpha1[date] = (ret0_df.loc[date, w_prev.index]*w_prev).sum() if w_prev is not None else 0.0\n alpha2[date] = (ret1_df.loc[date, w_prev.index]*w_prev).sum() if w_prev is not None else 0.0\n w_prev = w\n alpha0 = pd.Series(alpha0)\n alpha1 = pd.Series(alpha1)\n alpha2 = pd.Series(alpha2)\n alpha = pd.DataFrame({'alpha0':alpha0, \n 'alpha1':alpha1, \n 'alpha2':alpha2})\n ds = DataSource().write_df(alpha)\n \n return Outputs(data_1=ds, data_2=None, data_3=None)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-4193"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-4193"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-4193"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-4193","OutputType":null},{"Name":"data_2","NodeId":"-4193","OutputType":null},{"Name":"data_3","NodeId":"-4193","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":15,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-126","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3, booksize):\n # 示例代码如下。在这里编写您的代码\n def calc_daily_turnover(alpha_weights):\n all_dates = sorted(alpha_weights.keys())\n last_date = None\n turnover = {}\n\n for date in all_dates:\n w = alpha_weights[date]\n w.name = 'w'\n w_prev = alpha_weights[last_date] if last_date is not None else pd.Series(0,index=w.index)\n w_prev.name = 'w_prev'\n tmp = pd.concat([w,w_prev], axis=1).fillna(0)\n\n turnover[date] = (tmp['w']-tmp['w_prev']).abs().sum()\n last_date = date\n turnover = pd.Series(turnover)\n turnover /= 2\n return turnover\n\n import empyrical\n alpha_df = m15.data_1.read_df()\n alpha_weights = m8.data_1.read_pickle()\n dailyPnL = alpha_df*booksize\n PnL = dailyPnL.groupby(dailyPnL.index.year).sum()\n IR = dailyPnL.groupby(dailyPnL.index.year).mean()/dailyPnL.groupby(dailyPnL.index.year).std()\n sharpe = IR * np.sqrt(252)\n returns = dailyPnL.groupby(dailyPnL.index.year).sum()/booksize\n daily_turnover = calc_daily_turnover(alpha_weights)\n turnover = daily_turnover.groupby(daily_turnover.index.year).mean()\n fitness = sharpe * np.sqrt(returns.abs().apply(lambda x: x/turnover))\n margin = PnL.apply(lambda x: x/(daily_turnover.groupby(daily_turnover.index.year).sum()*booksize)*10000)\n long_short_count = pd.DataFrame({date:((w>0).sum(), (w<0).sum()) for date, w in alpha_weights.items()}).T\n long_short_count = long_short_count.rename(columns={0: 'long', 1: 'short'})\n long_short_count = long_short_count.groupby(long_short_count.index.year).sum()\n max_drawdown = dailyPnL.apply(lambda x: empyrical.max_drawdown(x/booksize))\n\n dataset_ds = DataSource()\n output_store = dataset_ds.open_df_store()\n dailyPnL.to_hdf(output_store, key='dailyPnL')\n PnL.to_hdf(output_store, key='PnL')\n turnover.to_hdf(output_store, key='turnover')\n fitness.to_hdf(output_store, key='fitness')\n margin.to_hdf(output_store, key='margin')\n max_drawdown.to_hdf(output_store, key='max_drawdown')\n long_short_count.to_hdf(output_store, key='long_short_count')\n sharpe.to_hdf(output_store, key='sharpe')\n returns.to_hdf(output_store, key='returns')\n dataset_ds.close_df_store()\n return Outputs(data_1=dataset_ds)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{'booksize': 20000000}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-126"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-126"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-126"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-126","OutputType":null},{"Name":"data_2","NodeId":"-126","OutputType":null},{"Name":"data_3","NodeId":"-126","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":16,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-872","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3, decay):\n # 示例代码如下。在这里编写您的代码\n df = input_1.read_df()\n factor = list(set(input_2.read_pickle()).difference(['end_date', 'instruments', 'start_date']))[0]\n pvt = df.pivot(index='date', columns='instrument', values=factor)\n pvt = pvt.rolling(decay).apply(lambda x: sum([(i+1)*xx for i,xx in enumerate(x)])/sum(range(decay+1)))\n result = pvt.unstack().reset_index().rename(columns={0:factor})\n ds = DataSource().write_df(result)\n return Outputs(data_1=ds, data_2=None, data_3=None)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{'decay': 4}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-872"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-872"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-872"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-872","OutputType":null},{"Name":"data_2","NodeId":"-872","OutputType":null},{"Name":"data_3","NodeId":"-872","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":5,"IsPartOfPartialRun":null,"Comment":"decay","CommentCollapsed":false},{"Id":"-979","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3):\n \n \n ins = input_1.read_pickle()['instruments']\n start_date = input_1.read_pickle()['start_date']\n end_date = input_1.read_pickle()['end_date']\n industry_df = D.history_data(ins,start_date=start_date,end_date=end_date,fields=['industry_sw_level1'])\n processed_industry_df = industry_df.pivot(index='date',columns='instrument',values='industry_sw_level1')\\\n .dropna(how='all')\\\n .stack()\\\n .apply(lambda x: 'SW'+str(int(x))+'.SHA')\\\n .reset_index()\\\n .rename(columns={0:'industry_code'})\n ## 过滤为0的数据异常,不过不应该被简单过滤\n processed_industry_df = processed_industry_df[processed_industry_df['industry_code'].apply(lambda x:len(x)==12)]\n \n data_1 = DataSource.write_df(processed_industry_df)\n return Outputs(data_1=data_1, data_2=None, data_3=None)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-979"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-979"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-979"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-979","OutputType":null},{"Name":"data_2","NodeId":"-979","OutputType":null},{"Name":"data_3","NodeId":"-979","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":7,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-988","ModuleId":"BigQuantSpace.join.join-v3","ModuleParameters":[{"Name":"on","Value":"date,instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"how","Value":"inner","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"sort","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data1","NodeId":"-988"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data2","NodeId":"-988"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-988","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":9,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='-5' Position='177,45,200,200'/><NodePosition Node='-717' Position='-144,76,200,200'/><NodePosition Node='-2161' Position='2,225,200,200'/><NodePosition Node='-2893' Position='163,612,200,200'/><NodePosition Node='-3299' Position='-546,77,200,200'/><NodePosition Node='-3701' Position='49,950,200,200'/><NodePosition Node='-3722' Position='-124,368,200,200'/><NodePosition Node='-3729' Position='-160,539,200,200'/><NodePosition Node='-3735' Position='588,52,200,200'/><NodePosition Node='-3740' Position='572,226,200,200'/><NodePosition Node='-3747' Position='626,456,200,200'/><NodePosition Node='-4193' Position='340,1122,200,200'/><NodePosition Node='-126' Position='354,1265,200,200'/><NodePosition Node='-872' Position='-340,693,200,200'/><NodePosition Node='-979' Position='-634,478,200,200'/><NodePosition Node='-988' Position='-455,856,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
    In [1]:
    # 本代码由可视化策略环境自动生成 2021年1月7日 18:07
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m7_run_bigquant_run(input_1, input_2, input_3):
        
        
        ins = input_1.read_pickle()['instruments']
        start_date = input_1.read_pickle()['start_date']
        end_date = input_1.read_pickle()['end_date']
        industry_df = D.history_data(ins,start_date=start_date,end_date=end_date,fields=['industry_sw_level1'])
        processed_industry_df = industry_df.pivot(index='date',columns='instrument',values='industry_sw_level1')\
                                .dropna(how='all')\
                                .stack()\
                                .apply(lambda x: 'SW'+str(int(x))+'.SHA')\
                                .reset_index()\
                                .rename(columns={0:'industry_code'})
        ## 过滤为0的数据异常,不过不应该被简单过滤
        processed_industry_df = processed_industry_df[processed_industry_df['industry_code'].apply(lambda x:len(x)==12)]
     
        data_1 = DataSource.write_df(processed_industry_df)
        return Outputs(data_1=data_1, data_2=None, data_3=None)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m7_post_run_bigquant_run(outputs):
        return outputs
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m4_run_bigquant_run(input_1, input_2, input_3, topN):
        # 示例代码如下。在这里编写您的代码
        amount_df = input_1.read_df()
        universe_dic = amount_df.groupby('date').apply(lambda df: df.sort_values('amount_0', ascending=False)[:topN].instrument.tolist()).to_dict()
        
        return Outputs(data_1=DataSource().write_pickle(universe_dic))
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m4_post_run_bigquant_run(outputs):
        return outputs
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m5_run_bigquant_run(input_1, input_2, input_3, decay):
        # 示例代码如下。在这里编写您的代码
        df = input_1.read_df()
        factor = list(set(input_2.read_pickle()).difference(['end_date', 'instruments', 'start_date']))[0]
        pvt = df.pivot(index='date', columns='instrument', values=factor)
        pvt = pvt.rolling(decay).apply(lambda x: sum([(i+1)*xx for i,xx in enumerate(x)])/sum(range(decay+1)))
        result = pvt.unstack().reset_index().rename(columns={0:factor})
        ds = DataSource().write_df(result)
        return Outputs(data_1=ds, data_2=None, data_3=None)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m5_post_run_bigquant_run(outputs):
        return outputs
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m8_run_bigquant_run(input_1, input_2, input_3, max_stock_weight, neuralized_type):
        # 示例代码如下。在这里编写您的代码
    
        df = input_1.read_df()
        factor = list(set(input_3.read_pickle()).difference(['end_date', 'instruments', 'start_date']))[0]
        pvt = df.pivot(index='date', columns='instrument', values=factor)
        universe_dic = input_2.read_pickle()
        all_dates = sorted(list(universe_dic.keys()))
        weights = {}
        for date in all_dates:
            alpha = pvt.loc[date, universe_dic[date]]
            if neuralized_type == 'market':
                # 市场中性化
                alpha = alpha - alpha.mean()
            elif neuralized_type == 'industry':
                # 行业中性化
                group_mean = df[df.date == date].groupby('industry_code', as_index=False).mean().rename(columns={factor:'group_mean'})
                tmp = df[df.date == date].merge(group_mean, how='left', on='industry_code')
                tmp[factor] =  tmp[factor]- tmp['group_mean']
                alpha = tmp.set_index('instrument')[factor].loc[universe_dic[date]]
    
            alpha_weight = alpha / alpha.abs().sum()
            alpha_weight = alpha_weight.clip(-max_stock_weight, max_stock_weight) # 权重截断处理
            alpha_weight = alpha_weight / alpha_weight.abs().sum()
            weights[date] = alpha_weight
            
        
            
        ds = DataSource().write_pickle(weights)
        return Outputs(data_1=ds, data_2=None, data_3=None)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m8_post_run_bigquant_run(outputs):
        return outputs
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m15_run_bigquant_run(input_1, input_2, input_3):
        # 示例代码如下。在这里编写您的代码
        alpha_weights = input_1.read_pickle()
        ret_df = input_2.read_df()
        ret0_df = ret_df.pivot(index='date', columns='instrument', values='close_0/close_1-1')
        ret1_df = ret_df.pivot(index='date', columns='instrument', values='close_0/open_0-1')
        alpha0, alpha1, alpha2 = {}, {}, {}
        all_dates = sorted(alpha_weights.keys())
        last_date = None
        w_prev = None
        for date in all_dates:
            #Alpha0: 权重是当天因子值,收益:Close/Open -1  
            #Alpha1: 权重是前一天因子值,收益:Close/shift(Close, 1) -1  
            #Alpha2:权重是前一天因子值,收益:Close/Open -1  
            #根据统计,市场平均情况下次日低开概率较大,这个导致了alpha1的收益会更低
            w = alpha_weights[date]
            alpha0[date] = (ret1_df.loc[date, w.index]*w).sum() 
            alpha1[date] = (ret0_df.loc[date, w_prev.index]*w_prev).sum() if w_prev is not None else 0.0
            alpha2[date] = (ret1_df.loc[date, w_prev.index]*w_prev).sum() if w_prev is not None else 0.0
            w_prev = w
        alpha0 = pd.Series(alpha0)
        alpha1 = pd.Series(alpha1)
        alpha2 = pd.Series(alpha2)
        alpha = pd.DataFrame({'alpha0':alpha0, 
                  'alpha1':alpha1, 
                  'alpha2':alpha2})
        ds = DataSource().write_df(alpha)
        
        return Outputs(data_1=ds, data_2=None, data_3=None)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m15_post_run_bigquant_run(outputs):
        return outputs
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m16_run_bigquant_run(input_1, input_2, input_3, booksize):
        # 示例代码如下。在这里编写您的代码
        def calc_daily_turnover(alpha_weights):
            all_dates = sorted(alpha_weights.keys())
            last_date = None
            turnover = {}
    
            for date in all_dates:
                w = alpha_weights[date]
                w.name = 'w'
                w_prev = alpha_weights[last_date] if last_date is not None else pd.Series(0,index=w.index)
                w_prev.name = 'w_prev'
                tmp = pd.concat([w,w_prev], axis=1).fillna(0)
    
                turnover[date] = (tmp['w']-tmp['w_prev']).abs().sum()
                last_date = date
            turnover = pd.Series(turnover)
            turnover /= 2
            return turnover
    
        import empyrical
        alpha_df = m15.data_1.read_df()
        alpha_weights = m8.data_1.read_pickle()
        dailyPnL = alpha_df*booksize
        PnL = dailyPnL.groupby(dailyPnL.index.year).sum()
        IR = dailyPnL.groupby(dailyPnL.index.year).mean()/dailyPnL.groupby(dailyPnL.index.year).std()
        sharpe = IR * np.sqrt(252)
        returns = dailyPnL.groupby(dailyPnL.index.year).sum()/booksize
        daily_turnover = calc_daily_turnover(alpha_weights)
        turnover = daily_turnover.groupby(daily_turnover.index.year).mean()
        fitness = sharpe * np.sqrt(returns.abs().apply(lambda x: x/turnover))
        margin = PnL.apply(lambda x: x/(daily_turnover.groupby(daily_turnover.index.year).sum()*booksize)*10000)
        long_short_count = pd.DataFrame({date:((w>0).sum(), (w<0).sum()) for date, w in alpha_weights.items()}).T
        long_short_count = long_short_count.rename(columns={0: 'long', 1: 'short'})
        long_short_count = long_short_count.groupby(long_short_count.index.year).sum()
        max_drawdown  = dailyPnL.apply(lambda x: empyrical.max_drawdown(x/booksize))
    
        dataset_ds = DataSource()
        output_store = dataset_ds.open_df_store()
        dailyPnL.to_hdf(output_store, key='dailyPnL')
        PnL.to_hdf(output_store, key='PnL')
        turnover.to_hdf(output_store, key='turnover')
        fitness.to_hdf(output_store, key='fitness')
        margin.to_hdf(output_store, key='margin')
        max_drawdown.to_hdf(output_store, key='max_drawdown')
        long_short_count.to_hdf(output_store, key='long_short_count')
        sharpe.to_hdf(output_store, key='sharpe')
        returns.to_hdf(output_store, key='returns')
        dataset_ds.close_df_store()
        return Outputs(data_1=dataset_ds)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m16_post_run_bigquant_run(outputs):
        return outputs
    
    
    m1 = M.instruments.v2(
        start_date='2010-01-01',
        end_date='2018-09-30',
        market='CN_STOCK_A',
        instrument_list='',
        max_count=0
    )
    
    m7 = M.cached.v3(
        input_1=m1.data,
        run=m7_run_bigquant_run,
        post_run=m7_post_run_bigquant_run,
        input_ports='',
        params='{}',
        output_ports=''
    )
    
    m2 = M.input_features.v1(
        features='mean(amount_0,66)'
    )
    
    m3 = M.general_feature_extractor.v7(
        instruments=m1.data,
        features=m2.data,
        start_date='',
        end_date='',
        before_start_days=0
    )
    
    m4 = M.cached.v3(
        input_1=m3.data,
        run=m4_run_bigquant_run,
        post_run=m4_post_run_bigquant_run,
        input_ports='',
        params='{\'topN\':2000}',
        output_ports='',
        m_cached=False
    )
    
    m6 = M.input_features.v1(
        features='-1*market_cap_0',
        m_cached=False
    )
    
    m10 = M.general_feature_extractor.v7(
        instruments=m1.data,
        features=m6.data,
        start_date='',
        end_date=''
    )
    
    m11 = M.derived_feature_extractor.v3(
        input_data=m10.data,
        features=m6.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions={}
    )
    
    m5 = M.cached.v3(
        input_1=m11.data,
        input_2=m6.data,
        run=m5_run_bigquant_run,
        post_run=m5_post_run_bigquant_run,
        input_ports='',
        params='{\'decay\': 4}',
        output_ports='',
        m_cached=False
    )
    
    m9 = M.join.v3(
        data1=m7.data_1,
        data2=m5.data_1,
        on='date,instrument',
        how='inner',
        sort=False
    )
    
    m8 = M.cached.v3(
        input_1=m9.data,
        input_2=m4.data_1,
        input_3=m6.data,
        run=m8_run_bigquant_run,
        post_run=m8_post_run_bigquant_run,
        input_ports='',
        params="""{'max_stock_weight': 0.1,
    'neuralized_type': 'industry'}""",
        output_ports='',
        m_cached=False
    )
    
    m12 = M.input_features.v1(
        features="""close_0/open_0-1
    close_0/close_1-1
    """
    )
    
    m13 = M.general_feature_extractor.v7(
        instruments=m1.data,
        features=m12.data,
        start_date='',
        end_date='',
        before_start_days=0
    )
    
    m14 = M.derived_feature_extractor.v3(
        input_data=m13.data,
        features=m12.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions={}
    )
    
    m15 = M.cached.v3(
        input_1=m8.data_1,
        input_2=m14.data,
        run=m15_run_bigquant_run,
        post_run=m15_post_run_bigquant_run,
        input_ports='',
        params='{}',
        output_ports='',
        m_cached=False
    )
    
    m16 = M.cached.v3(
        input_1=m15.data_1,
        run=m16_run_bigquant_run,
        post_run=m16_post_run_bigquant_run,
        input_ports='',
        params='{\'booksize\': 20000000}',
        output_ports='',
        m_cached=False
    )
    

    Alpha收益曲线

    In [2]:
    T.plot((1+m15.data_1.read_df()['alpha0']).cumprod())
    T.plot((1+m15.data_1.read_df()['alpha1']).cumprod())
    T.plot((1+m15.data_1.read_df()['alpha2']).cumprod())
    

    指标结果

    指标结果都在m16模块里,大家可以直接通过data source的方式来读取数据,每一个不同的指标我们用key来表示:

    • dailyPnL
    • PnL
    • turnover
    • fitness
    • margin
    • max_drawdown
    • long_short_count
    • sharpe
    • returns
    In [3]:
    m16.data_1.read_df(key='returns')
    
    Out[3]:
    alpha0 alpha1 alpha2
    2010 0.167371 0.103722 0.185635
    2011 0.055811 -0.020725 0.063959
    2012 0.133811 0.029555 0.114793
    2013 0.191005 0.068348 0.159550
    2014 0.094444 -0.002701 0.070999
    2015 0.331920 0.239870 0.284830
    2016 0.144605 0.009719 0.089157
    2017 -0.066217 -0.216536 -0.107662
    2018 0.055328 -0.061609 0.000606
    In [ ]: