{"Description":"实验创建于2017/8/26","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15:instruments","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8:data"},{"DestinationInputPortId":"-585:features_ds","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"DestinationInputPortId":"-22014:input_3","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"DestinationInputPortId":"-760:features","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"DestinationInputPortId":"-222:input_data","SourceOutputPortId":"-215:data"},{"DestinationInputPortId":"-760:train_ds","SourceOutputPortId":"-222:data"},{"DestinationInputPortId":"-760:test_ds","SourceOutputPortId":"-222:data"},{"DestinationInputPortId":"-215:features","SourceOutputPortId":"-585:data"},{"DestinationInputPortId":"-222:features","SourceOutputPortId":"-585:data"},{"DestinationInputPortId":"-215:instruments","SourceOutputPortId":"-8309:data"},{"DestinationInputPortId":"-22014:input_1","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15:data"},{"DestinationInputPortId":"-22014:input_2","SourceOutputPortId":"-760:train_data"}],"ModuleNodes":[{"Id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2020-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2021-05-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":"0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8"}],"OutputPortsInternal":[{"Name":"data","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":1,"IsPartOfPartialRun":null,"Comment":"训练集标签","CommentCollapsed":false},{"Id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"correlation(open_0,high_0,10)\ncorrelation(open_0,low_0,10)\ncorrelation(open_0,close_0,10)\ncorrelation(open_0,amount_0/volume_0,10)\ncorrelation(open_0,volume_0,10)\ncorrelation(open_0,return_1,10)\ncorrelation(open_0,turn_0,10)\ncorrelation(open_0,open_0/turn_0,10)\ncorrelation(open_0,volume_0/low_0,10)\ncorrelation(open_0,amount_0/volume_0/high_0,10)\ncorrelation(open_0,low_0/high_0,10)\ncorrelation(open_0,amount_0/volume_0/close_0,10)\ncorrelation(high_0,low_0,10)\ncorrelation(high_0,close_0,10)\ncorrelation(high_0,amount_0/volume_0,10)\ncorrelation(high_0,volume_0,10)\ncorrelation(high_0,return_1,10)\ncorrelation(high_0,turn_0,10)\ncorrelation(high_0,open_0/turn_0,10)\ncorrelation(high_0,volume_0/low_0,10)\ncorrelation(high_0,amount_0/volume_0/high_0,10)\ncorrelation(high_0,low_0/high_0,10)\ncorrelation(high_0,amount_0/volume_0/close_0,10)\ncorrelation(low_0,close_0,10)\ncorrelation(low_0,amount_0/volume_0,10)\ncorrelation(low_0,volume_0,10)\ncorrelation(low_0,return_1,10)\ncorrelation(low_0,turn_0,10)\ncorrelation(low_0,open_0/turn_0,10)\ncorrelation(low_0,volume_0/low_0,10)\ncorrelation(low_0,amount_0/volume_0/high_0,10)\ncorrelation(low_0,low_0/high_0,10)\ncorrelation(low_0,amount_0/volume_0/close_0,10)\ncorrelation(close_0,amount_0/volume_0,10)\ncorrelation(close_0,volume_0,10)\ncorrelation(close_0,return_1,10)\ncorrelation(close_0,turn_0,10)\ncorrelation(close_0,open_0/turn_0,10)\ncorrelation(close_0,volume_0/low_0,10)\ncorrelation(close_0,amount_0/volume_0/high_0,10)\ncorrelation(close_0,low_0/high_0,10)\ncorrelation(close_0,amount_0/volume_0/close_0,10)\ncorrelation(amount_0/volume_0,volume_0,10)\ncorrelation(amount_0/volume_0,return_1,10)\ncorrelation(amount_0/volume_0,turn_0,10)\ncorrelation(amount_0/volume_0,open_0/turn_0,10)\ncorrelation(amount_0/volume_0,volume_0/low_0,10)\ncorrelation(amount_0/volume_0,amount_0/volume_0/high_0,10)\ncorrelation(amount_0/volume_0,low_0/high_0,10)\ncorrelation(amount_0/volume_0,amount_0/volume_0/close_0,10)\ncorrelation(volume_0,return_1,10)\ncorrelation(volume_0,turn_0,10)\ncorrelation(volume_0,open_0/turn_0,10)\ncorrelation(volume_0,volume_0/low_0,10)\ncorrelation(volume_0,amount_0/volume_0/high_0,10)\ncorrelation(volume_0,low_0/high_0,10)\ncorrelation(volume_0,amount_0/volume_0/close_0,10)\ncorrelation(return_1,turn_0,10)\ncorrelation(return_1,open_0/turn_0,10)\ncorrelation(return_1,volume_0/low_0,10)\ncorrelation(return_1,amount_0/volume_0/high_0,10)\ncorrelation(return_1,low_0/high_0,10)\ncorrelation(return_1,amount_0/volume_0/close_0,10)\ncorrelation(turn_0,open_0/turn_0,10)\ncorrelation(turn_0,volume_0/low_0,10)\ncorrelation(turn_0,amount_0/volume_0/high_0,10)\ncorrelation(turn_0,low_0/high_0,10)\ncorrelation(turn_0,amount_0/volume_0/close_0,10)\ncorrelation(open_0/turn_0,volume_0/low_0,10)\ncorrelation(open_0/turn_0,amount_0/volume_0/high_0,10)\ncorrelation(open_0/turn_0,low_0/high_0,10)\ncorrelation(open_0/turn_0,amount_0/volume_0/close_0,10)\ncorrelation(volume_0/low_0,amount_0/volume_0/high_0,10)\ncorrelation(volume_0/low_0,low_0/high_0,10)\ncorrelation(volume_0/low_0,amount_0/volume_0/close_0,10)\ncorrelation(amount_0/volume_0/high_0,low_0/high_0,10)\ncorrelation(amount_0/volume_0/high_0,amount_0/volume_0/close_0,10)\ncorrelation(low_0/high_0,amount_0/volume_0/close_0,10)\ncovariance(open_0,high_0,10)\ncovariance(open_0,low_0,10)\ncovariance(open_0,close_0,10)\ncovariance(open_0,amount_0/volume_0,10)\ncovariance(open_0,volume_0,10)\ncovariance(open_0,return_1,10)\ncovariance(open_0,turn_0,10)\ncovariance(open_0,open_0/turn_0,10)\ncovariance(open_0,volume_0/low_0,10)\ncovariance(open_0,amount_0/volume_0/high_0,10)\ncovariance(open_0,low_0/high_0,10)\ncovariance(open_0,amount_0/volume_0/close_0,10)\ncovariance(high_0,low_0,10)\ncovariance(high_0,close_0,10)\ncovariance(high_0,amount_0/volume_0,10)\ncovariance(high_0,volume_0,10)\ncovariance(high_0,return_1,10)\ncovariance(high_0,turn_0,10)\ncovariance(high_0,open_0/turn_0,10)\ncovariance(high_0,volume_0/low_0,10)\ncovariance(high_0,amount_0/volume_0/high_0,10)\ncovariance(high_0,low_0/high_0,10)\ncovariance(high_0,amount_0/volume_0/close_0,10)\ncovariance(low_0,close_0,10)\ncovariance(low_0,amount_0/volume_0,10)\ncovariance(low_0,volume_0,10)\ncovariance(low_0,return_1,10)\ncovariance(low_0,turn_0,10)\ncovariance(low_0,open_0/turn_0,10)\ncovariance(low_0,volume_0/low_0,10)\ncovariance(low_0,amount_0/volume_0/high_0,10)\ncovariance(low_0,low_0/high_0,10)\ncovariance(low_0,amount_0/volume_0/close_0,10)\ncovariance(close_0,amount_0/volume_0,10)\ncovariance(close_0,volume_0,10)\ncovariance(close_0,return_1,10)\ncovariance(close_0,turn_0,10)\ncovariance(close_0,open_0/turn_0,10)\ncovariance(close_0,volume_0/low_0,10)\ncovariance(close_0,amount_0/volume_0/high_0,10)\ncovariance(close_0,low_0/high_0,10)\ncovariance(close_0,amount_0/volume_0/close_0,10)\ncovariance(amount_0/volume_0,volume_0,10)\ncovariance(amount_0/volume_0,return_1,10)\ncovariance(amount_0/volume_0,turn_0,10)\ncovariance(amount_0/volume_0,open_0/turn_0,10)\ncovariance(amount_0/volume_0,volume_0/low_0,10)\ncovariance(amount_0/volume_0,amount_0/volume_0/high_0,10)\ncovariance(amount_0/volume_0,low_0/high_0,10)\ncovariance(amount_0/volume_0,amount_0/volume_0/close_0,10)\ncovariance(volume_0,return_1,10)\ncovariance(volume_0,turn_0,10)\ncovariance(volume_0,open_0/turn_0,10)\ncovariance(volume_0,volume_0/low_0,10)\ncovariance(volume_0,amount_0/volume_0/high_0,10)\ncovariance(volume_0,low_0/high_0,10)\ncovariance(volume_0,amount_0/volume_0/close_0,10)\ncovariance(return_1,turn_0,10)\ncovariance(return_1,open_0/turn_0,10)\ncovariance(return_1,volume_0/low_0,10)\ncovariance(return_1,amount_0/volume_0/high_0,10)\ncovariance(return_1,low_0/high_0,10)\ncovariance(return_1,amount_0/volume_0/close_0,10)\ncovariance(turn_0,open_0/turn_0,10)\ncovariance(turn_0,volume_0/low_0,10)\ncovariance(turn_0,amount_0/volume_0/high_0,10)\ncovariance(turn_0,low_0/high_0,10)\ncovariance(turn_0,amount_0/volume_0/close_0,10)\ncovariance(open_0/turn_0,volume_0/low_0,10)\ncovariance(open_0/turn_0,amount_0/volume_0/high_0,10)\ncovariance(open_0/turn_0,low_0/high_0,10)\ncovariance(open_0/turn_0,amount_0/volume_0/close_0,10)\ncovariance(volume_0/low_0,amount_0/volume_0/high_0,10)\ncovariance(volume_0/low_0,low_0/high_0,10)\ncovariance(volume_0/low_0,amount_0/volume_0/close_0,10)\ncovariance(amount_0/volume_0/high_0,low_0/high_0,10)\ncovariance(amount_0/volume_0/high_0,amount_0/volume_0/close_0,10)\ncovariance(low_0/high_0,amount_0/volume_0/close_0,10)\nstd(open_0,10)\nstd(high_0,10)\nstd(low_0,10)\nstd(close_0,10)\nstd(amount_0/volume_0,10)\nstd(volume_0,10)\nstd(return_1,10)\nstd(turn_0,10)\nstd(open_0/turn_0,10)\nstd(volume_0/low_0,10)\nstd(amount_0/volume_0/high_0,10)\nstd(low_0/high_0,10)\nstd(amount_0/volume_0/close_0,10)\nmean(open_0,10)/std(open_0,10)\nmean(high_0,10)/std(high_0,10)\nmean(low_0,10)/std(low_0,10)\nmean(close_0,10)/std(close_0,10)\nmean(amount_0/volume_0,10)/std(amount_0/volume_0,10)\nmean(volume_0,10)/std(volume_0,10)\nmean(return_1,10)/std(return_1,10)\nmean(turn_0,10)/std(turn_0,10)\nmean(open_0/turn_0,10)/std(open_0/turn_0,10)\nmean(volume_0/low_0,10)/std(volume_0/low_0,10)\nmean(amount_0/volume_0/high_0,10)/std(amount_0/volume_0/high_0,10)\nmean(low_0/high_0,10)/std(low_0/high_0,10)\nmean(amount_0/volume_0/close_0,10)/std(amount_0/volume_0/close_0,10)\n(open_0/turn_0-shift(open_0/turn_0,10))/shift(open_0/turn_0,10)-1\n(high_0-shift(high_0,10))/shift(high_0,10)-1\n(low_0/high_0-shift(low_0/high_0,10))/shift(low_0/high_0,10)-1\n(close_0-shift(close_0,10))/shift(close_0,10)-1\n(amount_0/volume_0/close_0-shift(amount_0/volume_0/close_0,10))/shift(amount_0/volume_0/close_0,10)-1\n(volume_0/low_0-shift(volume_0/low_0,10))/shift(volume_0/low_0,10)-1\n(return_1-shift(return_1,10))/shift(return_1,10)-1\n(turn_0-shift(turn_0,10))/shift(turn_0,10)-1\n(open_0-shift(open_0,10))/shift(open_0,10)-1\n(volume_0-shift(volume_0,10))/shift(volume_0,10)-1\n(amount_0/volume_0-shift(amount_0/volume_0,10))/shift(amount_0/volume_0,10)-1\n(low_0-shift(low_0,10))/shift(low_0,10)-1\n(amount_0/volume_0/high_0-shift(amount_0/volume_0/high_0,10))/shift(amount_0/volume_0/high_0,10)-1\ndecay_linear(open_0,10)\ndecay_linear(high_0,10)\ndecay_linear(low_0,10)\ndecay_linear(close_0,10)\ndecay_linear(amount_0/volume_0,10)\ndecay_linear(volume_0,10)\ndecay_linear(return_1,10)\ndecay_linear(turn_0,10)\ndecay_linear(open_0/turn_0,10)\ndecay_linear(volume_0/low_0,10)\ndecay_linear(amount_0/volume_0/high_0,10)\ndecay_linear(low_0/high_0,10)\ndecay_linear(amount_0/volume_0/close_0,10)","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-24"}],"OutputPortsInternal":[{"Name":"data","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-24","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":3,"IsPartOfPartialRun":null,"Comment":"特征","CommentCollapsed":false},{"Id":"-215","ModuleId":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_start_days","Value":"300","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-215"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-215"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-215","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":7,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-222","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-222"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-222"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-222","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":8,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-585","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"# #号开始的表示注释\n# 多个特征,每行一个,可以包含基础特征和衍生特征\n\nm_amount_x = mean(amount_0, 5)\nmarket_cap_float_x = market_cap_float_0\nmarket_cap_x = market_cap_0\n\nin_csi800_x = in_csi800_0\nin_csi500_x = in_csi500_0\nin_csi300_x = in_csi300_0\n\nlist_days_x = list_days_0\nindustry_sw_level1_x = industry_sw_level1_0\nst_flag_x = st_CN_STOCK_A__st_type","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-585"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-585","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":11,"IsPartOfPartialRun":null,"Comment":"辅助特征","CommentCollapsed":true},{"Id":"-8309","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2020-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2021-05-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":"0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"-8309"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-8309","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":12,"IsPartOfPartialRun":null,"Comment":"公共数据集","CommentCollapsed":false},{"Id":"287d2cb0-f53c-4101-bdf8-104b137c8601-15","ModuleId":"BigQuantSpace.advanced_auto_labeler.advanced_auto_labeler-v2","ModuleParameters":[{"Name":"label_expr","Value":"# #号开始的表示注释\n# 0. 每行一个,顺序执行,从第二个开始,可以使用label字段\n# 1. 可用数据字段见 https://bigquant.com/docs/develop/datasource/deprecated/history_data.html\n# 添加benchmark_前缀,可使用对应的benchmark数据\n# 2. 可用操作符和函数见 `表达式引擎 <https://bigquant.com/docs/develop/bigexpr/usage.html>`_\n\n# 计算收益:5日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)\n\n( shift(close, -20) / shift(open, -1) -1)\n\n# 极值处理:用1%和99%分位的值做clip\nclip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))\n\n# where( (label > all_quantile(label,0.8))|(label < all_quantile(label,0.2)) , label, NaN)\n\n# 将分数映射到分类,这里使用20个分类\n# all_wbins(label, 20)\n\n# 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)\nwhere( abs(shift(high, -1)-shift(low, -1)) < 1e-3, NaN, label)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"benchmark","Value":"000905.SHA","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na_label","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"cast_label_int","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"def cal_max_ret(df, close, open, benchmark_close, benchmark_open, M, N ): \n df['ret'] = df.groupby('instrument', as_index=False, sort=False, group_keys=False).apply( \\\n lambda x: (pd.concat([x['close'].shift(-i) / x['open'].shift(-1) - x['benchmark_close'].shift(-i) / x['benchmark_open'].shift(-1) for i in range(M,N+1)], axis=1)).max(axis=1,skipna=False))\n last_date = df.date.sort_values().unique()[-N]\n return df.query('date < @last_date')['ret']\n\ndef cal_max_ret_v2(df, close, open, benchmark_close, benchmark_open, M, N ):\n df['ret'] = df.groupby('instrument', as_index=False, sort=False, group_keys=False).apply( \\\n lambda x: (pd.concat([x['close'].shift(-i) / x['open'].shift(-1) for i in range(M,N+1)], axis=1)).max(axis=1,skipna=False))\n last_date = df.date.sort_values().unique()[-N]\n return df.query('date < @last_date')['ret']\n\n\ndef last_max_ret(df, close, open, benchmark_close, benchmark_open, M=1, N=20):\n return df.groupby('instrument', as_index=False, sort=False, group_keys=False).apply(lambda x:(pd.concat([ x['close'].shift(-N) / x['open'].shift(-i) for i in range(M,N-1) ], axis=1) ).mean(axis=1,skipna=False) )\n\n\nbigquant_run={'cal_max_ret':cal_max_ret, 'cal_max_ret_v2': cal_max_ret_v2, 'last_max_ret': last_max_ret}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15"}],"OutputPortsInternal":[{"Name":"data","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":2,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-22014","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"def bigquant_run(input_1, input_2, input_3, cap_n,vol_n,ret_n):\n import time\n import multiprocessing as mp\n train_label = input_1.read() # 标签数据\n data_set = input_2.read() # 全数据集\n feature_list = input_3.read() # 特征列表 \n start_t, end_t = train_label.date.min(), train_label.date.max()\n args_to_test = None\n\n # 训练集处理\n ########################################################################################################################################### \n # 获取数据,按日期、股票代码排序\n data_set = data_set.query('date>=@start_t and date<=@end_t').sort_values(['date','instrument']).reset_index(drop=True) \n # 缺失值检测\n data_set_checkNaN = data_set[feature_list].isna().sum(axis=0) / data_set.shape[0]\n print('-'*100,'\\n训练集:', data_set.shape, '开始日期:', data_set.date.min(), '结束日期:', data_set.date.max(),\"\\n 列缺失值检测(超过5%):\\n\",data_set_checkNaN[data_set_checkNaN > 0.05].sort_values(ascending=False))\n \n # 标记股票池\n data_set['select_pool'] = 1\n #data_set['select_pool'][data_set.eval('in_csi800_x !=1')] = 0 # 股票池\n data_set['select_pool'][data_set.eval('list_days_x <= 100')] = 0 # 上市天数 \n ####data_set['select_pool'][data_set.eval('st_flag_x != 0')] = 0 # ST状态:0:正常股票,1:ST,2:*ST,11:暂停上市\n data_set['m_amount_x_rank'] = data_set.groupby(['date'])['m_amount_x'].rank(pct=True,ascending=False) \n # data_set['select_pool'][data_set.eval('m_amount_x_rank >= 0.70')] = 0 # 流动性控制 \n data_set['select_pool'][data_set[feature_list].isna().sum(axis=1) > 5] = 0 # 缺失严重 \n data_set = data_set.query('industry_sw_level1_x > 1.0 ') # 去除异常行业\n \n \n data_set = data_set.query('select_pool == 1')\n data_set[feature_list] = data_set[feature_list].replace([np.inf, -np.inf, np.nan], 0) # 数据异常值、缺失值处理\n\n # 标签处理 \n train_label['label'] = train_label[['date','label']].groupby(['date'])['label'].rank(pct=True,ascending=True) # rank 归一化\n \n ###########################################################################################################################################\n # 训练集标签合并\n label_data = pd.merge( data_set, train_label, on=['date','instrument'], how='inner') \n data_set = label_data[['date','instrument','label','select_pool','market_cap_x','industry_sw_level1_x']+feature_list].dropna(subset=['label']) \n factor_train_data = data_set.reset_index(drop=True)\n return Outputs(data_1= None, data_2= args_to_test, data_3= DataSource.write_df(factor_train_data) )\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{'cap_n':4,'vol_n':4, 'ret_n':50}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-22014"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-22014"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-22014"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-22014","OutputType":null},{"Name":"data_2","NodeId":"-22014","OutputType":null},{"Name":"data_3","NodeId":"-22014","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":9,"IsPartOfPartialRun":null,"Comment":"数据集","CommentCollapsed":false},{"Id":"-760","ModuleId":"BigQuantSpace.RobustScaler.RobustScaler-v13","ModuleParameters":[{"Name":"scale_type","Value":"standard","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"quantile_range_min","Value":0.01,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"quantile_range_max","Value":"0.99","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"global_scale","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"train_ds","NodeId":"-760"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-760"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"test_ds","NodeId":"-760"}],"OutputPortsInternal":[{"Name":"train_data","NodeId":"-760","OutputType":null},{"Name":"test_data","NodeId":"-760","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":4,"Comment":"","CommentCollapsed":true}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='287d2cb0-f53c-4101-bdf8-104b137c8601-8' Position='93,-460,200,200'/><NodePosition Node='287d2cb0-f53c-4101-bdf8-104b137c8601-24' Position='721,-597,200,200'/><NodePosition Node='-215' Position='688.3325805664062,-385.71685791015625,200,200'/><NodePosition Node='-222' Position='662.8134765625,-312.9056396484375,200,200'/><NodePosition Node='-585' Position='713,-475,200,200'/><NodePosition Node='-8309' Position='366.880615234375,-567,200,200'/><NodePosition Node='287d2cb0-f53c-4101-bdf8-104b137c8601-15' Position='90,-362,200,200'/><NodePosition Node='-22014' Position='370,-118,200,200'/><NodePosition Node='-760' Position='462,-215,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
[2021-06-23 17:38:40.914622] INFO: moduleinvoker: instruments.v2 开始运行..
[2021-06-23 17:38:41.252068] INFO: moduleinvoker: instruments.v2 运行完成[0.337427s].
[2021-06-23 17:38:41.256800] INFO: moduleinvoker: advanced_auto_labeler.v2 开始运行..
[2021-06-23 17:38:43.664853] INFO: 自动标注(股票): 加载历史数据: 1276561 行
[2021-06-23 17:38:43.666436] INFO: 自动标注(股票): 开始标注 ..
[2021-06-23 17:38:47.028700] INFO: moduleinvoker: advanced_auto_labeler.v2 运行完成[5.771885s].
[2021-06-23 17:38:47.032576] INFO: moduleinvoker: input_features.v1 开始运行..
[2021-06-23 17:38:47.040633] INFO: moduleinvoker: 命中缓存
[2021-06-23 17:38:47.042929] INFO: moduleinvoker: input_features.v1 运行完成[0.010363s].
[2021-06-23 17:38:47.046931] INFO: moduleinvoker: input_features.v1 开始运行..
[2021-06-23 17:38:47.053398] INFO: moduleinvoker: 命中缓存
[2021-06-23 17:38:47.055514] INFO: moduleinvoker: input_features.v1 运行完成[0.008591s].
[2021-06-23 17:38:47.058597] INFO: moduleinvoker: instruments.v2 开始运行..
[2021-06-23 17:38:47.065557] INFO: moduleinvoker: 命中缓存
[2021-06-23 17:38:47.067488] INFO: moduleinvoker: instruments.v2 运行完成[0.008891s].
[2021-06-23 17:38:47.140790] INFO: moduleinvoker: general_feature_extractor.v7 开始运行..
[2021-06-23 17:38:47.147490] INFO: moduleinvoker: 命中缓存
[2021-06-23 17:38:47.149394] INFO: moduleinvoker: general_feature_extractor.v7 运行完成[0.008638s].
[2021-06-23 17:38:47.155090] INFO: moduleinvoker: derived_feature_extractor.v3 开始运行..
[2021-06-23 17:38:47.162972] INFO: moduleinvoker: 命中缓存
[2021-06-23 17:38:47.165368] INFO: moduleinvoker: derived_feature_extractor.v3 运行完成[0.010307s].
[2021-06-23 17:38:47.179077] INFO: moduleinvoker: RobustScaler.v13 开始运行..
[2021-06-23 17:38:47.188617] INFO: moduleinvoker: 命中缓存
[2021-06-23 17:38:47.190222] INFO: moduleinvoker: RobustScaler.v13 运行完成[0.011163s].
[2021-06-23 17:38:47.194676] INFO: moduleinvoker: cached.v3 开始运行..
[2021-06-23 17:40:08.017076] INFO: moduleinvoker: cached.v3 运行完成[80.822382s].
----------------------------------------------------------------------------------------------------
训练集: (1191717, 218) 开始日期: 2020-01-02 00:00:00 结束日期: 2021-04-01 00:00:00
列缺失值检测(超过5%):
mean(low_0/high_0,10)/std(low_0/high_0,10) 0.623716
correlation(open_0/turn_0,amount_0/volume_0/close_0,10) 0.472042
correlation(open_0/turn_0,amount_0/volume_0/high_0,10) 0.472042
correlation(open_0,amount_0/volume_0/high_0,10) 0.472038
correlation(volume_0,amount_0/volume_0/close_0,10) 0.472038
mean(amount_0/volume_0/high_0,10)/std(amount_0/volume_0/high_0,10) 0.472038
correlation(volume_0/low_0,amount_0/volume_0/close_0,10) 0.472038
correlation(volume_0/low_0,amount_0/volume_0/high_0,10) 0.472038
correlation(turn_0,amount_0/volume_0/close_0,10) 0.472038
correlation(turn_0,amount_0/volume_0/high_0,10) 0.472038
correlation(open_0,amount_0/volume_0/close_0,10) 0.472038
mean(amount_0/volume_0/close_0,10)/std(amount_0/volume_0/close_0,10) 0.472038
correlation(volume_0,amount_0/volume_0/high_0,10) 0.472038
correlation(amount_0/volume_0,amount_0/volume_0/close_0,10) 0.472038
correlation(amount_0/volume_0,amount_0/volume_0/high_0,10) 0.472038
correlation(close_0,amount_0/volume_0/close_0,10) 0.472038
correlation(close_0,amount_0/volume_0/high_0,10) 0.472038
correlation(low_0,amount_0/volume_0/close_0,10) 0.472038
correlation(low_0,amount_0/volume_0/high_0,10) 0.472038
correlation(high_0,amount_0/volume_0/close_0,10) 0.472038
correlation(high_0,amount_0/volume_0/high_0,10) 0.472038
correlation(return_1,amount_0/volume_0/close_0,10) 0.469194
correlation(return_1,amount_0/volume_0/high_0,10) 0.469194
correlation(amount_0/volume_0/high_0,amount_0/volume_0/close_0,10) 0.468626
correlation(low_0/high_0,amount_0/volume_0/close_0,10) 0.269440
correlation(amount_0/volume_0/high_0,low_0/high_0,10) 0.213077
correlation(amount_0/volume_0,low_0/high_0,10) 0.204724
dtype: float64