自定义运行怎么并行计算多个因子

策略分享
标签: #<Tag:0x00007fc4d466d488>

(a1641181638) #1

原来的【列表式输入特征】可以很方便地解决这个问题,后来这个模块直接坏掉了,只好重新换成【输入特征列表】
如果不并行计算的话,输入特征列表的格式是"""\n特征1\n特征2\n"""
但是发现这个格式无法和并行计算兼容

克隆策略
In [56]:
#初始化创建/清空一个csv
import pandas as pd
print("初始化csv开始")
result =pd.DataFrame(columns=('date','algorithm_period_return','alpha','beta','max_drawdown','sharpe','feature'))
result.to_csv('因子批量测试结果.csv',header=True,mode='w')     
print("初始化csv完成")
初始化csv开始
初始化csv完成

    {"Description":"实验创建于2017/8/26","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15:instruments","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8:data"},{"DestinationInputPortId":"-300:instruments","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8:data"},{"DestinationInputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53:data1","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15:data"},{"DestinationInputPortId":"-2048:input_data","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53:data"},{"DestinationInputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53:data2","SourceOutputPortId":"-346:data"},{"DestinationInputPortId":"-2051:input_data","SourceOutputPortId":"-360:data"},{"DestinationInputPortId":"-805:training_ds","SourceOutputPortId":"-4385:data"},{"DestinationInputPortId":"-805:predict_ds","SourceOutputPortId":"-149:data"},{"DestinationInputPortId":"-360:input_data","SourceOutputPortId":"-7016:data"},{"DestinationInputPortId":"-346:input_data","SourceOutputPortId":"-308:data"},{"DestinationInputPortId":"-4385:input_data","SourceOutputPortId":"-565:data_1"},{"DestinationInputPortId":"-149:input_data","SourceOutputPortId":"-2040:data_1"},{"DestinationInputPortId":"-565:input_1","SourceOutputPortId":"-2048:data"},{"DestinationInputPortId":"-2040:input_1","SourceOutputPortId":"-2051:data"},{"DestinationInputPortId":"-1874:training_ds","SourceOutputPortId":"-805:transform_trainds"},{"DestinationInputPortId":"-1874:predict_ds","SourceOutputPortId":"-805:transform_predictds"},{"DestinationInputPortId":"-409:instruments","SourceOutputPortId":"-764:data"},{"DestinationInputPortId":"-307:instruments","SourceOutputPortId":"-764:data"},{"DestinationInputPortId":"-557:sort_by_ds","SourceOutputPortId":"-421:data"},{"DestinationInputPortId":"-409:options_data","SourceOutputPortId":"-557:sorted_data"},{"DestinationInputPortId":"-557:input_ds","SourceOutputPortId":"-1874:predictions"},{"DestinationInputPortId":"-308:input_data","SourceOutputPortId":"-300:data"},{"DestinationInputPortId":"-7016:input_data","SourceOutputPortId":"-307:data"},{"DestinationInputPortId":"-300:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-307:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-346:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-360:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-1874:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-421:features_ds","SourceOutputPortId":"-313:data"}],"ModuleNodes":[{"Id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2014-10-31","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2016-10-31","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":"0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8"}],"OutputPortsInternal":[{"Name":"data","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":1,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"287d2cb0-f53c-4101-bdf8-104b137c8601-15","ModuleId":"BigQuantSpace.advanced_auto_labeler.advanced_auto_labeler-v2","ModuleParameters":[{"Name":"label_expr","Value":"# 计算收益:5日经波动率调整后收益率\nshift(close, -5) / shift(open, -1)/ std(shift(close, -7) / shift(open, -6),5)\n\n# 极值处理:用1%和99%分位的值做clip\nclip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))\n\n# 将分数映射到分类,这里使用50个分类\nall_wbins(label, 20)\n\n# 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)\nwhere(shift(high, -1) == shift(low, -1), NaN, label)","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"benchmark","Value":"399102.ZIX","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na_label","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"cast_label_int","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15"}],"OutputPortsInternal":[{"Name":"data","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":2,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"287d2cb0-f53c-4101-bdf8-104b137c8601-53","ModuleId":"BigQuantSpace.join.join-v3","ModuleParameters":[{"Name":"on","Value":"date,instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"how","Value":"inner","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"sort","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data1","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data2","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53"}],"OutputPortsInternal":[{"Name":"data","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":7,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-346","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-346"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-346"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-346","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":16,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-360","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-360"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-360"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-360","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":18,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-4385","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-4385"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-4385","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":12,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-149","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-149"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-149","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":23,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-7016","ModuleId":"BigQuantSpace.chinaa_stock_filter.chinaa_stock_filter-v1","ModuleParameters":[{"Name":"index_constituent_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22displayValue%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"board_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22displayValue%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"industry_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22displayValue%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22displayValue%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22displayValue%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22displayValue%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22displayValue%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22displayValue%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22displayValue%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22displayValue%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22displayValue%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22displayValue%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22displayValue%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22displayValue%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22displayValue%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22displayValue%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%87%87%E6%8E%98%22%2C%22displayValue%22%3A%22%E9%87%87%E6%8E%98%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%92%A2%E9%93%81%22%2C%22displayValue%22%3A%22%E9%92%A2%E9%93%81%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22displayValue%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22displayValue%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"st_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22*ST%22%2C%22displayValue%22%3A%22*ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22displayValue%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22ST%22%2C%22displayValue%22%3A%22ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22displayValue%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22selected%22%3Atrue%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"delist_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%80%80%E5%B8%82%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22selected%22%3Atrue%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_left_data","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-7016"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-7016","OutputType":null},{"Name":"left_data","NodeId":"-7016","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":14,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-308","ModuleId":"BigQuantSpace.chinaa_stock_filter.chinaa_stock_filter-v1","ModuleParameters":[{"Name":"index_constituent_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22displayValue%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"board_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22displayValue%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"industry_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22displayValue%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22displayValue%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22displayValue%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22displayValue%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22displayValue%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22displayValue%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22displayValue%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22displayValue%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22displayValue%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22displayValue%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22displayValue%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22displayValue%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22displayValue%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22displayValue%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%87%87%E6%8E%98%22%2C%22displayValue%22%3A%22%E9%87%87%E6%8E%98%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%92%A2%E9%93%81%22%2C%22displayValue%22%3A%22%E9%92%A2%E9%93%81%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22displayValue%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22displayValue%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"st_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22*ST%22%2C%22displayValue%22%3A%22*ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22displayValue%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22ST%22%2C%22displayValue%22%3A%22ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22displayValue%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22selected%22%3Atrue%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"delist_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%80%80%E5%B8%82%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22selected%22%3Atrue%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_left_data","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-308"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-308","OutputType":null},{"Name":"left_data","NodeId":"-308","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":8,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-565","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"from sklearn.linear_model import LinearRegression\n# 行业、市值中性模块\ndef bigquant_run(input_1, input_2, input_3): \n \n # 1、获取特征数值\n df = input_1.read_df() \n df = df[df['industry_sw_level1_0']>0] # 去除没有查出行业的股票\n industry_List = df['industry_sw_level1_0'].unique() # 所有行业代码\n factors_all=df.columns #获取因子列表\n\n # 2、获取用来中性化的因子列表 通常是行业和市值\n factor0 = input_2.read_pickle() \n\n #3、需要做清洗的因子列表 \n factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']\n\n #4、缺失值处理 按中信一级行业相同个股的平均值填充\n for fac in factors_need_cal:\n df['fac_mean'] = df[['date']+['industry_sw_level1_0']+[fac]].groupby(['date','industry_sw_level1_0']).transform(np.mean)\n df[fac]=df[fac].fillna(df['fac_mean'])\n del df['fac_mean']\n\n #5、因子异常值处理 \n # 固定比例法\n #for fac in factors_need_cal:\n # df[fac][df[fac]>df[fac].quantile(0.99)]=df[fac].quantile(0.99)\n #df[fac][df[fac]<df[fac].quantile(0.01)]=df[fac].quantile(0.01)\n # 均值标准差法\n #print(df[factors_need_cal].head())\n #for fac in factors_need_cal:\n #df[fac][df[fac]>df[fac].mean()+3*df[fac].std()]=df[fac].mean()+3*df[fac].std()\n #df[fac][df[fac]<=df[fac].mean()-3*df[fac].std()]=df[fac].mean()-3*df[fac].std()\n # MAD法\n #print(df[factors_need_cal].head())\n for fac in factors_need_cal:\n\n median = np.median(list(df[fac]))\n MAD = np.mean(abs(df[fac]) - median)\n df[fac][df[fac]>median+6*MAD] = median+6*MAD # 剔除偏离中位数6倍以上的数据\n df[fac][df[fac]<median-6*MAD] = median-6*MAD\n \n #计算行业哑变量\n dfTmp = df.copy() #copy一份用于计算行业哑变量\n for n in range(len(industry_List)): # 行业哑变量赋值\n dfTmp['industry_%d' % n] = 0\n dfTmp['industry_%d' % n][df['industry_sw_level1_0']==industry_List[n]]=1\n \n # 准备线性回归参数\n model0 = LinearRegression()\n X = dfTmp[list('industry_%d' % n for n in range(len(industry_List)))+factor0] #组装行业哑变量列和中性化因子列矩阵\n del X['industry_sw_level1_0'] #删去中性化因子中的行业列\n \n #需要计算中性化的因子列表\n factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']\n\n # 逐个特征进行行业市值中性化\n from sklearn.preprocessing import scale\n for fac in factors_need_cal:\n\n y = df[fac] #获取需要中性化的因子暴露值\n model0.fit(X, y)\n df[fac] = y-model0.predict(X) # 计算因子暴露相对于行业哑变量和中性化因子回归后的残差\n #df[fac]=(df[fac]-np.mean(df[fac]))/np.std(df[fac])#一种与scale基本等效的处理\n df[fac] = scale(df[fac])\n #对残差取Z-Score标准化将计算后的结果返回给df中的各列,即完成中性化后的结果\n #多重共线性分析\n from sklearn.decomposition import PCA\n import matplotlib.pyplot as plt\n pca = PCA(n_components=len(factors_need_cal))\n pca.fit(df[factors_need_cal])\n var= pca.explained_variance_ratio_ #计算每个因子解释程度\n var1=np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100)#累计解释程度\n plt.plot(var1)\n print(var)\n data_1 = DataSource.write_df(df)\n print(data_1)\n return Outputs(data_1=data_1, data_2=None, data_3=None)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-565"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-565"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-565"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-565","OutputType":null},{"Name":"data_2","NodeId":"-565","OutputType":null},{"Name":"data_3","NodeId":"-565","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":20,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-2040","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"from sklearn.linear_model import LinearRegression\n# 行业、市值中性模块\ndef bigquant_run(input_1, input_2, input_3): \n \n # 1、获取特征数值\n df = input_1.read_df() \n df = df[df['industry_sw_level1_0']>0] # 去除没有查出行业的股票\n industry_List = df['industry_sw_level1_0'].unique() # 所有行业代码\n factors_all=df.columns #获取因子列表\n\n # 2、获取用来中性化的因子列表 通常是行业和市值\n factor0 = input_2.read_pickle() \n\n #3、需要做清洗的因子列表 \n factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']\n\n #4、缺失值处理 按中信一级行业相同个股的平均值填充\n for fac in factors_need_cal:\n df['fac_mean'] = df[['date']+['industry_sw_level1_0']+[fac]].groupby(['date','industry_sw_level1_0']).transform(np.mean)\n df[fac]=df[fac].fillna(df['fac_mean'])\n del df['fac_mean']\n\n #5、因子异常值处理 \n # 固定比例法\n #for fac in factors_need_cal:\n # df[fac][df[fac]>df[fac].quantile(0.99)]=df[fac].quantile(0.99)\n #df[fac][df[fac]<df[fac].quantile(0.01)]=df[fac].quantile(0.01)\n # 均值标准差法\n #print(df[factors_need_cal].head())\n #for fac in factors_need_cal:\n #df[fac][df[fac]>df[fac].mean()+3*df[fac].std()]=df[fac].mean()+3*df[fac].std()\n #df[fac][df[fac]<=df[fac].mean()-3*df[fac].std()]=df[fac].mean()-3*df[fac].std()\n # MAD法\n #print(df[factors_need_cal].head())\n for fac in factors_need_cal:\n\n median = np.median(list(df[fac]))\n MAD = np.mean(abs(df[fac]) - median)\n df[fac][df[fac]>median+6*MAD] = median+6*MAD # 剔除偏离中位数6倍以上的数据\n df[fac][df[fac]<median-6*MAD] = median-6*MAD\n \n #计算行业哑变量\n dfTmp = df.copy() #copy一份用于计算行业哑变量\n for n in range(len(industry_List)): # 行业哑变量赋值\n dfTmp['industry_%d' % n] = 0\n dfTmp['industry_%d' % n][df['industry_sw_level1_0']==industry_List[n]]=1\n \n # 准备线性回归参数\n model0 = LinearRegression()\n X = dfTmp[list('industry_%d' % n for n in range(len(industry_List)))+factor0] #组装行业哑变量列和中性化因子列矩阵\n del X['industry_sw_level1_0'] #删去中性化因子中的行业列\n \n #需要计算中性化的因子列表\n factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']\n\n # 逐个特征进行行业市值中性化\n from sklearn.preprocessing import scale\n for fac in factors_need_cal:\n\n y = df[fac] #获取需要中性化的因子暴露值\n model0.fit(X, y)\n df[fac] = y-model0.predict(X) # 计算因子暴露相对于行业哑变量和中性化因子回归后的残差\n #df[fac]=(df[fac]-np.mean(df[fac]))/np.std(df[fac])#一种与scale基本等效的处理\n df[fac] = scale(df[fac])\n #对残差取Z-Score标准化将计算后的结果返回给df中的各列,即完成中性化后的结果\n data_1 = DataSource.write_df(df)\n return Outputs(data_1=data_1, data_2=None, data_3=None)\n ","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-2040"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-2040"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-2040"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-2040","OutputType":null},{"Name":"data_2","NodeId":"-2040","OutputType":null},{"Name":"data_3","NodeId":"-2040","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":5,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-2048","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-2048"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-2048","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":6,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-2051","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-2051"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-2051","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":13,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-805","ModuleId":"BigQuantSpace.preprocessing_standard_scaler.preprocessing_standard_scaler-v1","ModuleParameters":[{"Name":"with_mean","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"with_std","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"training_ds","NodeId":"-805"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-805"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"model","NodeId":"-805"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"predict_ds","NodeId":"-805"}],"OutputPortsInternal":[{"Name":"output_model","NodeId":"-805","OutputType":null},{"Name":"transform_trainds","NodeId":"-805","OutputType":null},{"Name":"transform_predictds","NodeId":"-805","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":4,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-764","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2016-11-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2017-04-30","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":"0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"-764"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-764","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":11,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-421","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"date\npred_label","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-421"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-421","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":26,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-557","ModuleId":"BigQuantSpace.sort.sort-v4","ModuleParameters":[{"Name":"sort_by","Value":"pred_label","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"group_by","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"keep_columns","Value":"--","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"ascending","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_ds","NodeId":"-557"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"sort_by_ds","NodeId":"-557"}],"OutputPortsInternal":[{"Name":"sorted_data","NodeId":"-557","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":17,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-1874","ModuleId":"BigQuantSpace.random_forest_classifier.random_forest_classifier-v1","ModuleParameters":[{"Name":"iterations","Value":10,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"feature_fraction","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_depth","Value":30,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"min_samples_per_leaf","Value":200,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"key_cols","Value":"date,instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"workers","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"other_train_parameters","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"training_ds","NodeId":"-1874"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-1874"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"model","NodeId":"-1874"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"predict_ds","NodeId":"-1874"}],"OutputPortsInternal":[{"Name":"output_model","NodeId":"-1874","OutputType":null},{"Name":"predictions","NodeId":"-1874","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":3,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-409","ModuleId":"BigQuantSpace.trade.trade-v4","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"initialize","Value":"# 回测引擎:初始化函数,只执行一次\ndef bigquant_run(context):\n # 加载预测数据\n context.ranker_prediction = context.options['data'].read_df()\n\n # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数\n context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))\n # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)\n # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只\n stock_count = 30\n # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]\n context.stock_weights = T.norm([1 / math.log(i + 2) for i in range(0, stock_count)])\n # 设置每只股票占用的最大资金比例\n context.max_cash_per_instrument = 0.2\n context.options['hold_days'] = 5\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"handle_data","Value":"# 回测引擎:每日数据处理函数,每天执行一次\ndef bigquant_run(context, data):\n #------------------------START:加入下面if的两行代码到之前到主函数的最前部分-------------------\n # 相隔几天(以5天举例)运行一下handle_data函数\n if context.trading_day_index % 5 != 0:\n return \n #------------------------END:加上这两句代码在主函数就能实现隔几天运行---------------------\n # 按日期过滤得到今日的预测数据\n ranker_prediction = context.ranker_prediction[\n context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]\n\n # 1. 资金分配\n # 平均持仓时间是hold_days,每日都将买入股票,每日预期使用 1/hold_days 的资金\n # 实际操作中,会存在一定的买入误差,所以在前hold_days天,等量使用资金;之后,尽量使用剩余资金(这里设置最多用等量的1.5倍)\n is_staging = context.trading_day_index < context.options['hold_days'] # 是否在建仓期间(前 hold_days 天)\n cash_avg = context.portfolio.portfolio_value / context.options['hold_days']\n cash_for_buy = min(context.portfolio.cash, (1 if is_staging else 1.5) * cash_avg)\n cash_for_sell = cash_avg - (context.portfolio.cash - cash_for_buy)\n positions = {e.symbol: p.amount * p.last_sale_price\n for e, p in context.perf_tracker.position_tracker.positions.items()}\n\n # 2. 生成卖出订单:hold_days天之后才开始卖出;对持仓的股票,按机器学习算法预测的排序末位淘汰\n if not is_staging and cash_for_sell > 0:\n equities = {e.symbol: e for e, p in context.perf_tracker.position_tracker.positions.items()}\n instruments = list(reversed(list(ranker_prediction.instrument[ranker_prediction.instrument.apply(\n lambda x: x in equities and not context.has_unfinished_sell_order(equities[x]))])))\n # print('rank order for sell %s' % instruments)\n for instrument in instruments:\n context.order_target(context.symbol(instrument), 0)\n cash_for_sell -= positions[instrument]\n if cash_for_sell <= 0:\n break\n\n # 3. 生成买入订单:按机器学习算法预测的排序,买入前面的stock_count只股票\n buy_cash_weights = context.stock_weights\n buy_instruments = list(ranker_prediction.instrument[:len(buy_cash_weights)])\n max_cash_per_instrument = context.portfolio.portfolio_value * context.max_cash_per_instrument\n for i, instrument in enumerate(buy_instruments):\n cash = cash_for_buy * buy_cash_weights[i]\n if cash > max_cash_per_instrument - positions.get(instrument, 0):\n # 确保股票持仓量不会超过每次股票最大的占用资金量\n cash = max_cash_per_instrument - positions.get(instrument, 0)\n if cash > 0:\n price = data.current(context.symbol(instrument), 'price') # 最新价格\n stock_num = np.floor(cash/price/100)*100 # 向下取整\n context.order(context.symbol(instrument), stock_num) # 整百下单\n\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"prepare","Value":"# 回测引擎:准备数据,只执行一次\ndef bigquant_run(context):\n pass\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_trading_start","Value":"# 回测引擎:每个单位时间开始前调用一次,即每日开盘前调用一次。\ndef bigquant_run(context, data):\n pass\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"volume_limit","Value":0.025,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"order_price_field_buy","Value":"open","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"order_price_field_sell","Value":"close","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"capital_base","Value":1000000,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"auto_cancel_non_tradable_orders","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"data_frequency","Value":"daily","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"price_type","Value":"真实价格","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"product_type","Value":"股票","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"plot_charts","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"backtest_only","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"benchmark","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-409"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"options_data","NodeId":"-409"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"history_ds","NodeId":"-409"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"benchmark_ds","NodeId":"-409"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"trading_calendar","NodeId":"-409"}],"OutputPortsInternal":[{"Name":"raw_perf","NodeId":"-409","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":27,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-300","ModuleId":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_start_days","Value":90,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-300"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-300"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-300","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":22,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-307","ModuleId":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_start_days","Value":90,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-307"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-307"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-307","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":10,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-313","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"return_5\nreturn_10","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-313"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-313","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":24,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-5194","ModuleId":"BigQuantSpace.hyper_run.hyper_run-v1","ModuleParameters":[{"Name":"run","Value":"def bigquant_run(bq_graph, inputs):\n features =\"\"\"\nreturn_5\nreturn_10\n\"\"\"\n\n features=features.split()\n\n parameters_list = []\n for feature in features:\n feature = [feature]\n feature.append('industry_sw_level1_0')\n feature.append('market_cap_float_0')\n feature ='\\n'.join(feature)\n feature = ('\\n' + feature + \"\\n\")\n parameters = {'m24.feature':feature}\n parameters_list.append({'parameters': parameters})\n \n def run(parameters):\n try:\n print(parameters)\n return g.run(parameters)\n except Exception as e:\n print('ERROR --------', e)\n return None\n \n results = T.parallel_map(run, parameters_list, max_workers=1, remote_run=False, silent=False)\n\n return results","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"run_now","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"bq_graph","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"bq_graph_port","NodeId":"-5194"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-5194"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-5194"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-5194"}],"OutputPortsInternal":[{"Name":"result","NodeId":"-5194","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":9,"Comment":"","CommentCollapsed":true}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='287d2cb0-f53c-4101-bdf8-104b137c8601-8' Position='299,-232,200,200'/><NodePosition Node='287d2cb0-f53c-4101-bdf8-104b137c8601-15' Position='53,-131,200,200'/><NodePosition Node='287d2cb0-f53c-4101-bdf8-104b137c8601-53' Position='61,45,200,200'/><NodePosition Node='-346' Position='420,99,200,200'/><NodePosition Node='-360' Position='1255,12,200,200'/><NodePosition Node='-4385' Position='147,378,200,200'/><NodePosition Node='-149' Position='1205,272,200,200'/><NodePosition Node='-7016' Position='1400,-121,200,200'/><NodePosition Node='-308' Position='419,-1,200,200'/><NodePosition Node='-565' Position='120,290,200,200'/><NodePosition Node='-2040' Position='1239,190,200,200'/><NodePosition Node='-2048' Position='34,184,200,200'/><NodePosition Node='-2051' Position='1417,104,200,200'/><NodePosition Node='-805' Position='634,198,200,200'/><NodePosition Node='-764' Position='1067,-258,200,200'/><NodePosition Node='-421' Position='958,359,200,200'/><NodePosition Node='-557' Position='575,510,200,200'/><NodePosition Node='-1874' Position='574,339,200,200'/><NodePosition Node='-409' Position='951,502,200,200'/><NodePosition Node='-300' Position='406,-127,200,200'/><NodePosition Node='-307' Position='1408,-220,200,200'/><NodePosition Node='-313' Position='744,-182,200,200'/><NodePosition Node='-5194' Position='-51.071861267089844,-307.35687255859375,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
    In [57]:
    # 本代码由可视化策略环境自动生成 2020年3月25日 10:08
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    from sklearn.linear_model import LinearRegression
    # 行业、市值中性模块
    def m20_run_bigquant_run(input_1, input_2, input_3):    
        
        # 1、获取特征数值
        df = input_1.read_df()   
        df = df[df['industry_sw_level1_0']>0]                # 去除没有查出行业的股票
        industry_List = df['industry_sw_level1_0'].unique()          # 所有行业代码
        factors_all=df.columns    #获取因子列表
    
        # 2、获取用来中性化的因子列表 通常是行业和市值
        factor0 = input_2.read_pickle()                         
    
        #3、需要做清洗的因子列表 
        factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']
    
        #4、缺失值处理 按中信一级行业相同个股的平均值填充
        for fac in factors_need_cal:
            df['fac_mean'] = df[['date']+['industry_sw_level1_0']+[fac]].groupby(['date','industry_sw_level1_0']).transform(np.mean)
            df[fac]=df[fac].fillna(df['fac_mean'])
            del df['fac_mean']
    
        #5、因子异常值处理  
        #  固定比例法
        #for fac in factors_need_cal:
           # df[fac][df[fac]>df[fac].quantile(0.99)]=df[fac].quantile(0.99)
            #df[fac][df[fac]<df[fac].quantile(0.01)]=df[fac].quantile(0.01)
        #  均值标准差法
        #print(df[factors_need_cal].head())
        #for fac in factors_need_cal:
            #df[fac][df[fac]>df[fac].mean()+3*df[fac].std()]=df[fac].mean()+3*df[fac].std()
            #df[fac][df[fac]<=df[fac].mean()-3*df[fac].std()]=df[fac].mean()-3*df[fac].std()
        #  MAD法
        #print(df[factors_need_cal].head())
        for fac in factors_need_cal:
    
            median = np.median(list(df[fac]))
            MAD = np.mean(abs(df[fac]) - median)
            df[fac][df[fac]>median+6*MAD] = median+6*MAD  # 剔除偏离中位数6倍以上的数据
            df[fac][df[fac]<median-6*MAD] = median-6*MAD
           
        #计算行业哑变量
        dfTmp = df.copy()  #copy一份用于计算行业哑变量
        for n in range(len(industry_List)):                         # 行业哑变量赋值
            dfTmp['industry_%d' % n] = 0
            dfTmp['industry_%d' % n][df['industry_sw_level1_0']==industry_List[n]]=1
                              
        # 准备线性回归参数
        model0 = LinearRegression()
        X = dfTmp[list('industry_%d' % n for n in range(len(industry_List)))+factor0] #组装行业哑变量列和中性化因子列矩阵
        del X['industry_sw_level1_0'] #删去中性化因子中的行业列
        
        #需要计算中性化的因子列表
        factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']
    
        # 逐个特征进行行业市值中性化
        from sklearn.preprocessing import scale
        for fac in factors_need_cal:
    
                y = df[fac] #获取需要中性化的因子暴露值
                model0.fit(X, y)
                df[fac] = y-model0.predict(X)  # 计算因子暴露相对于行业哑变量和中性化因子回归后的残差
                #df[fac]=(df[fac]-np.mean(df[fac]))/np.std(df[fac])#一种与scale基本等效的处理
                df[fac] = scale(df[fac])
                 #对残差取Z-Score标准化将计算后的结果返回给df中的各列,即完成中性化后的结果
        #多重共线性分析
        from sklearn.decomposition import PCA
        import matplotlib.pyplot as plt
        pca = PCA(n_components=len(factors_need_cal))
        pca.fit(df[factors_need_cal])
        var= pca.explained_variance_ratio_ #计算每个因子解释程度
        var1=np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100)#累计解释程度
        plt.plot(var1)
        print(var)
        data_1 = DataSource.write_df(df)
        print(data_1)
        return Outputs(data_1=data_1, data_2=None, data_3=None)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m20_post_run_bigquant_run(outputs):
        return outputs
    
    from sklearn.linear_model import LinearRegression
    # 行业、市值中性模块
    def m5_run_bigquant_run(input_1, input_2, input_3):    
        
        # 1、获取特征数值
        df = input_1.read_df()   
        df = df[df['industry_sw_level1_0']>0]                # 去除没有查出行业的股票
        industry_List = df['industry_sw_level1_0'].unique()          # 所有行业代码
        factors_all=df.columns    #获取因子列表
    
        # 2、获取用来中性化的因子列表 通常是行业和市值
        factor0 = input_2.read_pickle()                         
    
        #3、需要做清洗的因子列表 
        factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']
    
        #4、缺失值处理 按中信一级行业相同个股的平均值填充
        for fac in factors_need_cal:
            df['fac_mean'] = df[['date']+['industry_sw_level1_0']+[fac]].groupby(['date','industry_sw_level1_0']).transform(np.mean)
            df[fac]=df[fac].fillna(df['fac_mean'])
            del df['fac_mean']
    
        #5、因子异常值处理  
        #  固定比例法
        #for fac in factors_need_cal:
           # df[fac][df[fac]>df[fac].quantile(0.99)]=df[fac].quantile(0.99)
            #df[fac][df[fac]<df[fac].quantile(0.01)]=df[fac].quantile(0.01)
        #  均值标准差法
        #print(df[factors_need_cal].head())
        #for fac in factors_need_cal:
            #df[fac][df[fac]>df[fac].mean()+3*df[fac].std()]=df[fac].mean()+3*df[fac].std()
            #df[fac][df[fac]<=df[fac].mean()-3*df[fac].std()]=df[fac].mean()-3*df[fac].std()
        #  MAD法
        #print(df[factors_need_cal].head())
        for fac in factors_need_cal:
    
            median = np.median(list(df[fac]))
            MAD = np.mean(abs(df[fac]) - median)
            df[fac][df[fac]>median+6*MAD] = median+6*MAD  # 剔除偏离中位数6倍以上的数据
            df[fac][df[fac]<median-6*MAD] = median-6*MAD
                 
        #计算行业哑变量
        dfTmp = df.copy()  #copy一份用于计算行业哑变量
        for n in range(len(industry_List)):                         # 行业哑变量赋值
            dfTmp['industry_%d' % n] = 0
            dfTmp['industry_%d' % n][df['industry_sw_level1_0']==industry_List[n]]=1
                              
        # 准备线性回归参数
        model0 = LinearRegression()
        X = dfTmp[list('industry_%d' % n for n in range(len(industry_List)))+factor0] #组装行业哑变量列和中性化因子列矩阵
        del X['industry_sw_level1_0'] #删去中性化因子中的行业列
        
        #需要计算中性化的因子列表
        factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']
    
        # 逐个特征进行行业市值中性化
        from sklearn.preprocessing import scale
        for fac in factors_need_cal:
    
                y = df[fac] #获取需要中性化的因子暴露值
                model0.fit(X, y)
                df[fac] = y-model0.predict(X)  # 计算因子暴露相对于行业哑变量和中性化因子回归后的残差
                #df[fac]=(df[fac]-np.mean(df[fac]))/np.std(df[fac])#一种与scale基本等效的处理
                df[fac] = scale(df[fac])
                 #对残差取Z-Score标准化将计算后的结果返回给df中的各列,即完成中性化后的结果
        data_1 = DataSource.write_df(df)
        return Outputs(data_1=data_1, data_2=None, data_3=None)
        
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m5_post_run_bigquant_run(outputs):
        return outputs
    
    # 回测引擎:初始化函数,只执行一次
    def m27_initialize_bigquant_run(context):
        # 加载预测数据
        context.ranker_prediction = context.options['data'].read_df()
    
        # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
        context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
        # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)
        # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只
        stock_count = 30
        # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]
        context.stock_weights = T.norm([1 / math.log(i + 2) for i in range(0, stock_count)])
        # 设置每只股票占用的最大资金比例
        context.max_cash_per_instrument = 0.2
        context.options['hold_days'] = 5
    
    # 回测引擎:每日数据处理函数,每天执行一次
    def m27_handle_data_bigquant_run(context, data):
        #------------------------START:加入下面if的两行代码到之前到主函数的最前部分-------------------
        # 相隔几天(以5天举例)运行一下handle_data函数
        if context.trading_day_index % 5 != 0:
            return 
        #------------------------END:加上这两句代码在主函数就能实现隔几天运行---------------------
        # 按日期过滤得到今日的预测数据
        ranker_prediction = context.ranker_prediction[
            context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]
    
        # 1. 资金分配
        # 平均持仓时间是hold_days,每日都将买入股票,每日预期使用 1/hold_days 的资金
        # 实际操作中,会存在一定的买入误差,所以在前hold_days天,等量使用资金;之后,尽量使用剩余资金(这里设置最多用等量的1.5倍)
        is_staging = context.trading_day_index < context.options['hold_days'] # 是否在建仓期间(前 hold_days 天)
        cash_avg = context.portfolio.portfolio_value / context.options['hold_days']
        cash_for_buy = min(context.portfolio.cash, (1 if is_staging else 1.5) * cash_avg)
        cash_for_sell = cash_avg - (context.portfolio.cash - cash_for_buy)
        positions = {e.symbol: p.amount * p.last_sale_price
                     for e, p in context.perf_tracker.position_tracker.positions.items()}
    
        # 2. 生成卖出订单:hold_days天之后才开始卖出;对持仓的股票,按机器学习算法预测的排序末位淘汰
        if not is_staging and cash_for_sell > 0:
            equities = {e.symbol: e for e, p in context.perf_tracker.position_tracker.positions.items()}
            instruments = list(reversed(list(ranker_prediction.instrument[ranker_prediction.instrument.apply(
                    lambda x: x in equities and not context.has_unfinished_sell_order(equities[x]))])))
            # print('rank order for sell %s' % instruments)
            for instrument in instruments:
                context.order_target(context.symbol(instrument), 0)
                cash_for_sell -= positions[instrument]
                if cash_for_sell <= 0:
                    break
    
        # 3. 生成买入订单:按机器学习算法预测的排序,买入前面的stock_count只股票
        buy_cash_weights = context.stock_weights
        buy_instruments = list(ranker_prediction.instrument[:len(buy_cash_weights)])
        max_cash_per_instrument = context.portfolio.portfolio_value * context.max_cash_per_instrument
        for i, instrument in enumerate(buy_instruments):
            cash = cash_for_buy * buy_cash_weights[i]
            if cash > max_cash_per_instrument - positions.get(instrument, 0):
                # 确保股票持仓量不会超过每次股票最大的占用资金量
                cash = max_cash_per_instrument - positions.get(instrument, 0)
            if cash > 0:
                price = data.current(context.symbol(instrument), 'price')  # 最新价格
                stock_num = np.floor(cash/price/100)*100  # 向下取整
                context.order(context.symbol(instrument), stock_num) # 整百下单
    
    
    # 回测引擎:准备数据,只执行一次
    def m27_prepare_bigquant_run(context):
        pass
    
    # 回测引擎:每个单位时间开始前调用一次,即每日开盘前调用一次。
    def m27_before_trading_start_bigquant_run(context, data):
        pass
    
    
    g = T.Graph({
    
        'm1': 'M.instruments.v2',
        'm1.start_date': '2014-10-31',
        'm1.end_date': '2016-10-31',
        'm1.market': 'CN_STOCK_A',
        'm1.instrument_list': '',
        'm1.max_count': 0,
    
        'm2': 'M.advanced_auto_labeler.v2',
        'm2.instruments': T.Graph.OutputPort('m1.data'),
        'm2.label_expr': """# 计算收益:5日经波动率调整后收益率
    shift(close, -5) / shift(open, -1)/ std(shift(close, -7) / shift(open, -6),5)
    
    # 极值处理:用1%和99%分位的值做clip
    clip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))
    
    # 将分数映射到分类,这里使用50个分类
    all_wbins(label, 20)
    
    # 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)
    where(shift(high, -1) == shift(low, -1), NaN, label)""",
        'm2.start_date': '',
        'm2.end_date': '',
        'm2.benchmark': '399102.ZIX',
        'm2.drop_na_label': True,
        'm2.cast_label_int': True,
    
        'm11': 'M.instruments.v2',
        'm11.start_date': '2016-11-01',
        'm11.end_date': '2017-04-30',
        'm11.market': 'CN_STOCK_A',
        'm11.instrument_list': '',
        'm11.max_count': 0,
    
        'm24': 'M.input_features.v1',
        'm24.features': """return_5
    return_10""",
    
        'm22': 'M.general_feature_extractor.v7',
        'm22.instruments': T.Graph.OutputPort('m1.data'),
        'm22.features': T.Graph.OutputPort('m24.data'),
        'm22.start_date': '',
        'm22.end_date': '',
        'm22.before_start_days': 90,
    
        'm8': 'M.chinaa_stock_filter.v1',
        'm8.input_data': T.Graph.OutputPort('m22.data'),
        'm8.index_constituent_cond': ['中证800'],
        'm8.board_cond': ['全部'],
        'm8.industry_cond': ['全部'],
        'm8.st_cond': ['正常'],
        'm8.delist_cond': ['非退市'],
        'm8.output_left_data': False,
    
        'm10': 'M.general_feature_extractor.v7',
        'm10.instruments': T.Graph.OutputPort('m11.data'),
        'm10.features': T.Graph.OutputPort('m24.data'),
        'm10.start_date': '',
        'm10.end_date': '',
        'm10.before_start_days': 90,
    
        'm14': 'M.chinaa_stock_filter.v1',
        'm14.input_data': T.Graph.OutputPort('m10.data'),
        'm14.index_constituent_cond': ['中证800'],
        'm14.board_cond': ['全部'],
        'm14.industry_cond': ['全部'],
        'm14.st_cond': ['正常'],
        'm14.delist_cond': ['非退市'],
        'm14.output_left_data': False,
    
        'm16': 'M.derived_feature_extractor.v3',
        'm16.input_data': T.Graph.OutputPort('m8.data'),
        'm16.features': T.Graph.OutputPort('m24.data'),
        'm16.date_col': 'date',
        'm16.instrument_col': 'instrument',
        'm16.drop_na': False,
        'm16.remove_extra_columns': True,
    
        'm7': 'M.join.v3',
        'm7.data1': T.Graph.OutputPort('m2.data'),
        'm7.data2': T.Graph.OutputPort('m16.data'),
        'm7.on': 'date,instrument',
        'm7.how': 'inner',
        'm7.sort': False,
    
        'm6': 'M.dropnan.v1',
        'm6.input_data': T.Graph.OutputPort('m7.data'),
    
        'm20': 'M.cached.v3',
        'm20.input_1': T.Graph.OutputPort('m6.data'),
        'm20.run': m20_run_bigquant_run,
        'm20.post_run': m20_post_run_bigquant_run,
        'm20.input_ports': '',
        'm20.params': '{}',
        'm20.output_ports': '',
    
        'm12': 'M.dropnan.v1',
        'm12.input_data': T.Graph.OutputPort('m20.data_1'),
    
        'm18': 'M.derived_feature_extractor.v3',
        'm18.input_data': T.Graph.OutputPort('m14.data'),
        'm18.features': T.Graph.OutputPort('m24.data'),
        'm18.date_col': 'date',
        'm18.instrument_col': 'instrument',
        'm18.drop_na': False,
        'm18.remove_extra_columns': True,
    
        'm13': 'M.dropnan.v1',
        'm13.input_data': T.Graph.OutputPort('m18.data'),
    
        'm5': 'M.cached.v3',
        'm5.input_1': T.Graph.OutputPort('m13.data'),
        'm5.run': m5_run_bigquant_run,
        'm5.post_run': m5_post_run_bigquant_run,
        'm5.input_ports': '',
        'm5.params': '{}',
        'm5.output_ports': '',
    
        'm23': 'M.dropnan.v1',
        'm23.input_data': T.Graph.OutputPort('m5.data_1'),
    
        'm4': 'M.preprocessing_standard_scaler.v1',
        'm4.training_ds': T.Graph.OutputPort('m12.data'),
        'm4.predict_ds': T.Graph.OutputPort('m23.data'),
        'm4.with_mean': True,
        'm4.with_std': True,
    
        'm3': 'M.random_forest_classifier.v1',
        'm3.training_ds': T.Graph.OutputPort('m4.transform_trainds'),
        'm3.features': T.Graph.OutputPort('m24.data'),
        'm3.predict_ds': T.Graph.OutputPort('m4.transform_predictds'),
        'm3.iterations': 10,
        'm3.feature_fraction': 1,
        'm3.max_depth': 30,
        'm3.min_samples_per_leaf': 200,
        'm3.key_cols': 'date,instrument',
        'm3.workers': 1,
        'm3.other_train_parameters': {},
    
        'm26': 'M.input_features.v1',
        'm26.features_ds': T.Graph.OutputPort('m24.data'),
        'm26.features': """date
    pred_label""",
    
        'm17': 'M.sort.v4',
        'm17.input_ds': T.Graph.OutputPort('m3.predictions'),
        'm17.sort_by_ds': T.Graph.OutputPort('m26.data'),
        'm17.sort_by': 'pred_label',
        'm17.group_by': 'date',
        'm17.keep_columns': '--',
        'm17.ascending': False,
    
        'm27': 'M.trade.v4',
        'm27.instruments': T.Graph.OutputPort('m11.data'),
        'm27.options_data': T.Graph.OutputPort('m17.sorted_data'),
        'm27.start_date': '',
        'm27.end_date': '',
        'm27.initialize': m27_initialize_bigquant_run,
        'm27.handle_data': m27_handle_data_bigquant_run,
        'm27.prepare': m27_prepare_bigquant_run,
        'm27.before_trading_start': m27_before_trading_start_bigquant_run,
        'm27.volume_limit': 0.025,
        'm27.order_price_field_buy': 'open',
        'm27.order_price_field_sell': 'close',
        'm27.capital_base': 1000000,
        'm27.auto_cancel_non_tradable_orders': True,
        'm27.data_frequency': 'daily',
        'm27.price_type': '真实价格',
        'm27.product_type': '股票',
        'm27.plot_charts': True,
        'm27.backtest_only': False,
        'm27.benchmark': '',
    })
    
    # g.run({})
    
    
    def m9_run_bigquant_run(bq_graph, inputs):
        features ="""
    return_5
    return_10
    """
    
        features=features.split()
    
        parameters_list = []
        for feature in features:
            feature = [feature]
            feature.append('industry_sw_level1_0')
            feature.append('market_cap_float_0')
            feature ='\n'.join(feature)
            feature = ('\n' + feature  + "\n")
            parameters = {'m24.feature':feature}
            parameters_list.append({'parameters': parameters})
        
        def run(parameters):
            try:
                print(parameters)
                return g.run(parameters)
            except Exception as e:
                print('ERROR --------', e)
                return None
     
        results = T.parallel_map(run, parameters_list, max_workers=1, remote_run=False, silent=False)
    
        return results
    
    m9 = M.hyper_run.v1(
        run=m9_run_bigquant_run,
        run_now=True,
        bq_graph=g
    )
    

    自定义Python模块(cached)使用错误,你可以:

    1.一键查看文档

    2.一键搜索答案

    ---------------------------------------------------------------------------
    KeyError                                  Traceback (most recent call last)
    KeyError: 'industry_sw_level1_0'
    
    During handling of the above exception, another exception occurred:
    
    KeyError                                  Traceback (most recent call last)
    <ipython-input-57-3c196a4f27a4> in <module>()
        334     input_ports='',
        335     params='{}',
    --> 336     output_ports=''
        337 )
        338 
    
    <ipython-input-57-3c196a4f27a4> in m20_run_bigquant_run(input_1, input_2, input_3)
          9     # 1、获取特征数值
         10     df = input_1.read_df()
    ---> 11     df = df[df['industry_sw_level1_0']>0]                # 去除没有查出行业的股票
         12     industry_List = df['industry_sw_level1_0'].unique()          # 所有行业代码
         13     factors_all=df.columns    #获取因子列表
    
    KeyError: 'industry_sw_level1_0'
    In [ ]:
    #读取回测结果和特征名,然后追加写入到创建好的文档里 
    for k in range(len(m9.result)):
        res = m9.result[k]['m27'].read_raw_perf()[['algorithm_period_return','alpha','beta','max_drawdown','sharpe']]
        res_tmp = pd.DataFrame(res.iloc[-1]).T
        feature = m9.result[k]['m24'].feature_list      
        feature = feature.read_pickle()
        feature = feature[0]       
        res_tmp['feature'] = str(feature)
        
        res_tmp = res_tmp.reset_index(drop=False)
        res_tmp.to_csv('因子批量测试结果.csv',header=False,mode='a')   
        
    print('csv追加写入结束')
    
    In [ ]:
    #超参搜索代码
    #print(m17.result.best_params_)
    #print(m17.result.best_score_)
    #自定义运行-并行计算代码
    
    # 查看所有并行任务的运算结果
    #m9.result
    # 查看第一个并行任务的运算结果
    #m9.result[0]
    # 查看第一个并行任务的预测结果前5条记录
    #m9.result[0].predictions.read_df().head()
    # 查看第一个并行任务的运算结果中m19回测模块的回测曲线
    #m9.result[0]['m19'].display()
    

    (a1641181638) #2

    parameters[‘m3.features’]=’\n’.join([你的新因子组合])
    这个也是不行的


    (达达) #3

    并行能跑起来,但是你先调通不带自定义运行的策略

    克隆策略
    In [56]:
    #初始化创建/清空一个csv
    import pandas as pd
    print("初始化csv开始")
    result =pd.DataFrame(columns=('date','algorithm_period_return','alpha','beta','max_drawdown','sharpe','feature'))
    result.to_csv('因子批量测试结果.csv',header=True,mode='w')     
    print("初始化csv完成")
    
    初始化csv开始
    初始化csv完成
    

      {"Description":"实验创建于2017/8/26","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15:instruments","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8:data"},{"DestinationInputPortId":"-300:instruments","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8:data"},{"DestinationInputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53:data1","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15:data"},{"DestinationInputPortId":"-2048:input_data","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53:data"},{"DestinationInputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53:data2","SourceOutputPortId":"-346:data"},{"DestinationInputPortId":"-2051:input_data","SourceOutputPortId":"-360:data"},{"DestinationInputPortId":"-805:training_ds","SourceOutputPortId":"-4385:data"},{"DestinationInputPortId":"-805:predict_ds","SourceOutputPortId":"-149:data"},{"DestinationInputPortId":"-360:input_data","SourceOutputPortId":"-7016:data"},{"DestinationInputPortId":"-346:input_data","SourceOutputPortId":"-308:data"},{"DestinationInputPortId":"-4385:input_data","SourceOutputPortId":"-565:data_1"},{"DestinationInputPortId":"-149:input_data","SourceOutputPortId":"-2040:data_1"},{"DestinationInputPortId":"-565:input_1","SourceOutputPortId":"-2048:data"},{"DestinationInputPortId":"-2040:input_1","SourceOutputPortId":"-2051:data"},{"DestinationInputPortId":"-1874:training_ds","SourceOutputPortId":"-805:transform_trainds"},{"DestinationInputPortId":"-1874:predict_ds","SourceOutputPortId":"-805:transform_predictds"},{"DestinationInputPortId":"-409:instruments","SourceOutputPortId":"-764:data"},{"DestinationInputPortId":"-307:instruments","SourceOutputPortId":"-764:data"},{"DestinationInputPortId":"-557:sort_by_ds","SourceOutputPortId":"-421:data"},{"DestinationInputPortId":"-409:options_data","SourceOutputPortId":"-557:sorted_data"},{"DestinationInputPortId":"-557:input_ds","SourceOutputPortId":"-1874:predictions"},{"DestinationInputPortId":"-308:input_data","SourceOutputPortId":"-300:data"},{"DestinationInputPortId":"-7016:input_data","SourceOutputPortId":"-307:data"},{"DestinationInputPortId":"-300:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-307:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-346:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-360:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-1874:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-421:features_ds","SourceOutputPortId":"-313:data"}],"ModuleNodes":[{"Id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2014-10-31","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2016-10-31","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":"0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8"}],"OutputPortsInternal":[{"Name":"data","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":1,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"287d2cb0-f53c-4101-bdf8-104b137c8601-15","ModuleId":"BigQuantSpace.advanced_auto_labeler.advanced_auto_labeler-v2","ModuleParameters":[{"Name":"label_expr","Value":"# 计算收益:5日经波动率调整后收益率\nshift(close, -5) / shift(open, -1)/ std(shift(close, -7) / shift(open, -6),5)\n\n# 极值处理:用1%和99%分位的值做clip\nclip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))\n\n# 将分数映射到分类,这里使用50个分类\nall_wbins(label, 20)\n\n# 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)\nwhere(shift(high, -1) == shift(low, -1), NaN, label)","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"benchmark","Value":"399102.ZIX","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na_label","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"cast_label_int","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15"}],"OutputPortsInternal":[{"Name":"data","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":2,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"287d2cb0-f53c-4101-bdf8-104b137c8601-53","ModuleId":"BigQuantSpace.join.join-v3","ModuleParameters":[{"Name":"on","Value":"date,instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"how","Value":"inner","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"sort","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data1","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data2","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53"}],"OutputPortsInternal":[{"Name":"data","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":7,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-346","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-346"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-346"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-346","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":16,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-360","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-360"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-360"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-360","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":18,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-4385","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-4385"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-4385","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":12,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-149","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-149"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-149","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":23,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-7016","ModuleId":"BigQuantSpace.chinaa_stock_filter.chinaa_stock_filter-v1","ModuleParameters":[{"Name":"index_constituent_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22displayValue%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"board_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22displayValue%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"industry_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22displayValue%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22displayValue%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22displayValue%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22displayValue%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22displayValue%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22displayValue%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22displayValue%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22displayValue%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22displayValue%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22displayValue%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22displayValue%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22displayValue%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22displayValue%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22displayValue%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%87%87%E6%8E%98%22%2C%22displayValue%22%3A%22%E9%87%87%E6%8E%98%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%92%A2%E9%93%81%22%2C%22displayValue%22%3A%22%E9%92%A2%E9%93%81%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22displayValue%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22displayValue%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"st_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22*ST%22%2C%22displayValue%22%3A%22*ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22displayValue%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22ST%22%2C%22displayValue%22%3A%22ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22displayValue%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22selected%22%3Atrue%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"delist_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%80%80%E5%B8%82%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22selected%22%3Atrue%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_left_data","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-7016"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-7016","OutputType":null},{"Name":"left_data","NodeId":"-7016","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":14,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-308","ModuleId":"BigQuantSpace.chinaa_stock_filter.chinaa_stock_filter-v1","ModuleParameters":[{"Name":"index_constituent_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22displayValue%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"board_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22displayValue%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"industry_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22displayValue%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22displayValue%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22displayValue%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22displayValue%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22displayValue%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22displayValue%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22displayValue%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22displayValue%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22displayValue%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22displayValue%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22displayValue%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22displayValue%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22displayValue%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22displayValue%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%87%87%E6%8E%98%22%2C%22displayValue%22%3A%22%E9%87%87%E6%8E%98%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%92%A2%E9%93%81%22%2C%22displayValue%22%3A%22%E9%92%A2%E9%93%81%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22displayValue%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22displayValue%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"st_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22*ST%22%2C%22displayValue%22%3A%22*ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22displayValue%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22ST%22%2C%22displayValue%22%3A%22ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22displayValue%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22selected%22%3Atrue%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"delist_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%80%80%E5%B8%82%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22selected%22%3Atrue%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_left_data","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-308"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-308","OutputType":null},{"Name":"left_data","NodeId":"-308","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":8,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-565","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"from sklearn.linear_model import LinearRegression\n# 行业、市值中性模块\ndef bigquant_run(input_1, input_2, input_3): \n \n # 1、获取特征数值\n df = input_1.read_df() \n df = df[df['industry_sw_level1_0']>0] # 去除没有查出行业的股票\n industry_List = df['industry_sw_level1_0'].unique() # 所有行业代码\n factors_all=df.columns #获取因子列表\n\n # 2、获取用来中性化的因子列表 通常是行业和市值\n factor0 = input_2.read_pickle() \n\n #3、需要做清洗的因子列表 \n factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']\n\n #4、缺失值处理 按中信一级行业相同个股的平均值填充\n for fac in factors_need_cal:\n df['fac_mean'] = df[['date']+['industry_sw_level1_0']+[fac]].groupby(['date','industry_sw_level1_0']).transform(np.mean)\n df[fac]=df[fac].fillna(df['fac_mean'])\n del df['fac_mean']\n\n #5、因子异常值处理 \n # 固定比例法\n #for fac in factors_need_cal:\n # df[fac][df[fac]>df[fac].quantile(0.99)]=df[fac].quantile(0.99)\n #df[fac][df[fac]<df[fac].quantile(0.01)]=df[fac].quantile(0.01)\n # 均值标准差法\n #print(df[factors_need_cal].head())\n #for fac in factors_need_cal:\n #df[fac][df[fac]>df[fac].mean()+3*df[fac].std()]=df[fac].mean()+3*df[fac].std()\n #df[fac][df[fac]<=df[fac].mean()-3*df[fac].std()]=df[fac].mean()-3*df[fac].std()\n # MAD法\n #print(df[factors_need_cal].head())\n for fac in factors_need_cal:\n\n median = np.median(list(df[fac]))\n MAD = np.mean(abs(df[fac]) - median)\n df[fac][df[fac]>median+6*MAD] = median+6*MAD # 剔除偏离中位数6倍以上的数据\n df[fac][df[fac]<median-6*MAD] = median-6*MAD\n \n #计算行业哑变量\n dfTmp = df.copy() #copy一份用于计算行业哑变量\n for n in range(len(industry_List)): # 行业哑变量赋值\n dfTmp['industry_%d' % n] = 0\n dfTmp['industry_%d' % n][df['industry_sw_level1_0']==industry_List[n]]=1\n \n # 准备线性回归参数\n model0 = LinearRegression()\n X = dfTmp[list('industry_%d' % n for n in range(len(industry_List)))+factor0] #组装行业哑变量列和中性化因子列矩阵\n del X['industry_sw_level1_0'] #删去中性化因子中的行业列\n \n #需要计算中性化的因子列表\n factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']\n\n # 逐个特征进行行业市值中性化\n from sklearn.preprocessing import scale\n for fac in factors_need_cal:\n\n y = df[fac] #获取需要中性化的因子暴露值\n model0.fit(X, y)\n df[fac] = y-model0.predict(X) # 计算因子暴露相对于行业哑变量和中性化因子回归后的残差\n #df[fac]=(df[fac]-np.mean(df[fac]))/np.std(df[fac])#一种与scale基本等效的处理\n df[fac] = scale(df[fac])\n #对残差取Z-Score标准化将计算后的结果返回给df中的各列,即完成中性化后的结果\n #多重共线性分析\n from sklearn.decomposition import PCA\n import matplotlib.pyplot as plt\n pca = PCA(n_components=len(factors_need_cal))\n pca.fit(df[factors_need_cal])\n var= pca.explained_variance_ratio_ #计算每个因子解释程度\n var1=np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100)#累计解释程度\n plt.plot(var1)\n print(var)\n data_1 = DataSource.write_df(df)\n print(data_1)\n return Outputs(data_1=data_1, data_2=None, data_3=None)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-565"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-565"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-565"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-565","OutputType":null},{"Name":"data_2","NodeId":"-565","OutputType":null},{"Name":"data_3","NodeId":"-565","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":20,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-2040","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"from sklearn.linear_model import LinearRegression\n# 行业、市值中性模块\ndef bigquant_run(input_1, input_2, input_3): \n \n # 1、获取特征数值\n df = input_1.read_df() \n df = df[df['industry_sw_level1_0']>0] # 去除没有查出行业的股票\n industry_List = df['industry_sw_level1_0'].unique() # 所有行业代码\n factors_all=df.columns #获取因子列表\n\n # 2、获取用来中性化的因子列表 通常是行业和市值\n factor0 = input_2.read_pickle() \n\n #3、需要做清洗的因子列表 \n factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']\n\n #4、缺失值处理 按中信一级行业相同个股的平均值填充\n for fac in factors_need_cal:\n df['fac_mean'] = df[['date']+['industry_sw_level1_0']+[fac]].groupby(['date','industry_sw_level1_0']).transform(np.mean)\n df[fac]=df[fac].fillna(df['fac_mean'])\n del df['fac_mean']\n\n #5、因子异常值处理 \n # 固定比例法\n #for fac in factors_need_cal:\n # df[fac][df[fac]>df[fac].quantile(0.99)]=df[fac].quantile(0.99)\n #df[fac][df[fac]<df[fac].quantile(0.01)]=df[fac].quantile(0.01)\n # 均值标准差法\n #print(df[factors_need_cal].head())\n #for fac in factors_need_cal:\n #df[fac][df[fac]>df[fac].mean()+3*df[fac].std()]=df[fac].mean()+3*df[fac].std()\n #df[fac][df[fac]<=df[fac].mean()-3*df[fac].std()]=df[fac].mean()-3*df[fac].std()\n # MAD法\n #print(df[factors_need_cal].head())\n for fac in factors_need_cal:\n\n median = np.median(list(df[fac]))\n MAD = np.mean(abs(df[fac]) - median)\n df[fac][df[fac]>median+6*MAD] = median+6*MAD # 剔除偏离中位数6倍以上的数据\n df[fac][df[fac]<median-6*MAD] = median-6*MAD\n \n #计算行业哑变量\n dfTmp = df.copy() #copy一份用于计算行业哑变量\n for n in range(len(industry_List)): # 行业哑变量赋值\n dfTmp['industry_%d' % n] = 0\n dfTmp['industry_%d' % n][df['industry_sw_level1_0']==industry_List[n]]=1\n \n # 准备线性回归参数\n model0 = LinearRegression()\n X = dfTmp[list('industry_%d' % n for n in range(len(industry_List)))+factor0] #组装行业哑变量列和中性化因子列矩阵\n del X['industry_sw_level1_0'] #删去中性化因子中的行业列\n \n #需要计算中性化的因子列表\n factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']\n\n # 逐个特征进行行业市值中性化\n from sklearn.preprocessing import scale\n for fac in factors_need_cal:\n\n y = df[fac] #获取需要中性化的因子暴露值\n model0.fit(X, y)\n df[fac] = y-model0.predict(X) # 计算因子暴露相对于行业哑变量和中性化因子回归后的残差\n #df[fac]=(df[fac]-np.mean(df[fac]))/np.std(df[fac])#一种与scale基本等效的处理\n df[fac] = scale(df[fac])\n #对残差取Z-Score标准化将计算后的结果返回给df中的各列,即完成中性化后的结果\n data_1 = DataSource.write_df(df)\n return Outputs(data_1=data_1, data_2=None, data_3=None)\n ","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-2040"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-2040"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-2040"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-2040","OutputType":null},{"Name":"data_2","NodeId":"-2040","OutputType":null},{"Name":"data_3","NodeId":"-2040","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":5,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-2048","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-2048"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-2048","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":6,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-2051","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-2051"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-2051","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":13,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-805","ModuleId":"BigQuantSpace.preprocessing_standard_scaler.preprocessing_standard_scaler-v1","ModuleParameters":[{"Name":"with_mean","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"with_std","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"training_ds","NodeId":"-805"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-805"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"model","NodeId":"-805"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"predict_ds","NodeId":"-805"}],"OutputPortsInternal":[{"Name":"output_model","NodeId":"-805","OutputType":null},{"Name":"transform_trainds","NodeId":"-805","OutputType":null},{"Name":"transform_predictds","NodeId":"-805","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":4,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-764","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2016-11-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2017-04-30","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":"0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"-764"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-764","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":11,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-421","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"date\npred_label","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-421"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-421","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":26,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-557","ModuleId":"BigQuantSpace.sort.sort-v4","ModuleParameters":[{"Name":"sort_by","Value":"pred_label","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"group_by","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"keep_columns","Value":"--","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"ascending","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_ds","NodeId":"-557"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"sort_by_ds","NodeId":"-557"}],"OutputPortsInternal":[{"Name":"sorted_data","NodeId":"-557","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":17,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-1874","ModuleId":"BigQuantSpace.random_forest_classifier.random_forest_classifier-v1","ModuleParameters":[{"Name":"iterations","Value":10,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"feature_fraction","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_depth","Value":30,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"min_samples_per_leaf","Value":200,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"key_cols","Value":"date,instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"workers","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"other_train_parameters","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"training_ds","NodeId":"-1874"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-1874"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"model","NodeId":"-1874"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"predict_ds","NodeId":"-1874"}],"OutputPortsInternal":[{"Name":"output_model","NodeId":"-1874","OutputType":null},{"Name":"predictions","NodeId":"-1874","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":3,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-409","ModuleId":"BigQuantSpace.trade.trade-v4","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"initialize","Value":"# 回测引擎:初始化函数,只执行一次\ndef bigquant_run(context):\n # 加载预测数据\n context.ranker_prediction = context.options['data'].read_df()\n\n # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数\n context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))\n # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)\n # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只\n stock_count = 30\n # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]\n context.stock_weights = T.norm([1 / math.log(i + 2) for i in range(0, stock_count)])\n # 设置每只股票占用的最大资金比例\n context.max_cash_per_instrument = 0.2\n context.options['hold_days'] = 5\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"handle_data","Value":"# 回测引擎:每日数据处理函数,每天执行一次\ndef bigquant_run(context, data):\n #------------------------START:加入下面if的两行代码到之前到主函数的最前部分-------------------\n # 相隔几天(以5天举例)运行一下handle_data函数\n if context.trading_day_index % 5 != 0:\n return \n #------------------------END:加上这两句代码在主函数就能实现隔几天运行---------------------\n # 按日期过滤得到今日的预测数据\n ranker_prediction = context.ranker_prediction[\n context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]\n\n # 1. 资金分配\n # 平均持仓时间是hold_days,每日都将买入股票,每日预期使用 1/hold_days 的资金\n # 实际操作中,会存在一定的买入误差,所以在前hold_days天,等量使用资金;之后,尽量使用剩余资金(这里设置最多用等量的1.5倍)\n is_staging = context.trading_day_index < context.options['hold_days'] # 是否在建仓期间(前 hold_days 天)\n cash_avg = context.portfolio.portfolio_value / context.options['hold_days']\n cash_for_buy = min(context.portfolio.cash, (1 if is_staging else 1.5) * cash_avg)\n cash_for_sell = cash_avg - (context.portfolio.cash - cash_for_buy)\n positions = {e.symbol: p.amount * p.last_sale_price\n for e, p in context.perf_tracker.position_tracker.positions.items()}\n\n # 2. 生成卖出订单:hold_days天之后才开始卖出;对持仓的股票,按机器学习算法预测的排序末位淘汰\n if not is_staging and cash_for_sell > 0:\n equities = {e.symbol: e for e, p in context.perf_tracker.position_tracker.positions.items()}\n instruments = list(reversed(list(ranker_prediction.instrument[ranker_prediction.instrument.apply(\n lambda x: x in equities and not context.has_unfinished_sell_order(equities[x]))])))\n # print('rank order for sell %s' % instruments)\n for instrument in instruments:\n context.order_target(context.symbol(instrument), 0)\n cash_for_sell -= positions[instrument]\n if cash_for_sell <= 0:\n break\n\n # 3. 生成买入订单:按机器学习算法预测的排序,买入前面的stock_count只股票\n buy_cash_weights = context.stock_weights\n buy_instruments = list(ranker_prediction.instrument[:len(buy_cash_weights)])\n max_cash_per_instrument = context.portfolio.portfolio_value * context.max_cash_per_instrument\n for i, instrument in enumerate(buy_instruments):\n cash = cash_for_buy * buy_cash_weights[i]\n if cash > max_cash_per_instrument - positions.get(instrument, 0):\n # 确保股票持仓量不会超过每次股票最大的占用资金量\n cash = max_cash_per_instrument - positions.get(instrument, 0)\n if cash > 0:\n price = data.current(context.symbol(instrument), 'price') # 最新价格\n stock_num = np.floor(cash/price/100)*100 # 向下取整\n context.order(context.symbol(instrument), stock_num) # 整百下单\n\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"prepare","Value":"# 回测引擎:准备数据,只执行一次\ndef bigquant_run(context):\n pass\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_trading_start","Value":"# 回测引擎:每个单位时间开始前调用一次,即每日开盘前调用一次。\ndef bigquant_run(context, data):\n pass\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"volume_limit","Value":0.025,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"order_price_field_buy","Value":"open","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"order_price_field_sell","Value":"close","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"capital_base","Value":1000000,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"auto_cancel_non_tradable_orders","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"data_frequency","Value":"daily","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"price_type","Value":"真实价格","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"product_type","Value":"股票","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"plot_charts","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"backtest_only","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"benchmark","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-409"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"options_data","NodeId":"-409"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"history_ds","NodeId":"-409"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"benchmark_ds","NodeId":"-409"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"trading_calendar","NodeId":"-409"}],"OutputPortsInternal":[{"Name":"raw_perf","NodeId":"-409","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":27,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-300","ModuleId":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_start_days","Value":90,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-300"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-300"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-300","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":22,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-307","ModuleId":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_start_days","Value":90,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-307"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-307"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-307","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":10,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-313","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"return_5\nreturn_10","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-313"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-313","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":24,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-5194","ModuleId":"BigQuantSpace.hyper_run.hyper_run-v1","ModuleParameters":[{"Name":"run","Value":"def bigquant_run(bq_graph, inputs):\n features =\"\"\"\nreturn_5\nreturn_10\n\"\"\"\n\n features=features.split()\n\n parameters_list = []\n for feature in features:\n tmp = []\n tmp.append(feature)\n tmp.append('industry_sw_level1_0')\n tmp.append('market_cap_float_0')\n parameters = {'m24.features':\"\\n\".join(tmp)}\n parameters_list.append({'parameters': parameters})\n \n def run(parameters):\n try:\n print(parameters)\n return g.run(parameters)\n except Exception as e:\n print('ERROR --------', e)\n return None\n \n results = T.parallel_map(run, parameters_list, max_workers=1, remote_run=False, silent=False)\n\n return results","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"run_now","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"bq_graph","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"bq_graph_port","NodeId":"-5194"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-5194"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-5194"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-5194"}],"OutputPortsInternal":[{"Name":"result","NodeId":"-5194","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":9,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='287d2cb0-f53c-4101-bdf8-104b137c8601-8' Position='299,-232,200,200'/><NodePosition Node='287d2cb0-f53c-4101-bdf8-104b137c8601-15' Position='53,-131,200,200'/><NodePosition Node='287d2cb0-f53c-4101-bdf8-104b137c8601-53' Position='61,45,200,200'/><NodePosition Node='-346' Position='420,99,200,200'/><NodePosition Node='-360' Position='1255,12,200,200'/><NodePosition Node='-4385' Position='147,378,200,200'/><NodePosition Node='-149' Position='1205,272,200,200'/><NodePosition Node='-7016' Position='1400,-121,200,200'/><NodePosition Node='-308' Position='419,-1,200,200'/><NodePosition Node='-565' Position='120,290,200,200'/><NodePosition Node='-2040' Position='1239,190,200,200'/><NodePosition Node='-2048' Position='34,184,200,200'/><NodePosition Node='-2051' Position='1417,104,200,200'/><NodePosition Node='-805' Position='634,198,200,200'/><NodePosition Node='-764' Position='1067,-258,200,200'/><NodePosition Node='-421' Position='958,359,200,200'/><NodePosition Node='-557' Position='575,510,200,200'/><NodePosition Node='-1874' Position='574,339,200,200'/><NodePosition Node='-409' Position='951,502,200,200'/><NodePosition Node='-300' Position='406,-127,200,200'/><NodePosition Node='-307' Position='1408,-220,200,200'/><NodePosition Node='-313' Position='744,-182,200,200'/><NodePosition Node='-5194' Position='-51,-307,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
      In [8]:
      # 本代码由可视化策略环境自动生成 2020年3月25日 19:02
      # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
      
      
      from sklearn.linear_model import LinearRegression
      # 行业、市值中性模块
      def m20_run_bigquant_run(input_1, input_2, input_3):    
          
          # 1、获取特征数值
          df = input_1.read_df()   
          df = df[df['industry_sw_level1_0']>0]                # 去除没有查出行业的股票
          industry_List = df['industry_sw_level1_0'].unique()          # 所有行业代码
          factors_all=df.columns    #获取因子列表
      
          # 2、获取用来中性化的因子列表 通常是行业和市值
          factor0 = input_2.read_pickle()                         
      
          #3、需要做清洗的因子列表 
          factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']
      
          #4、缺失值处理 按中信一级行业相同个股的平均值填充
          for fac in factors_need_cal:
              df['fac_mean'] = df[['date']+['industry_sw_level1_0']+[fac]].groupby(['date','industry_sw_level1_0']).transform(np.mean)
              df[fac]=df[fac].fillna(df['fac_mean'])
              del df['fac_mean']
      
          #5、因子异常值处理  
          #  固定比例法
          #for fac in factors_need_cal:
             # df[fac][df[fac]>df[fac].quantile(0.99)]=df[fac].quantile(0.99)
              #df[fac][df[fac]<df[fac].quantile(0.01)]=df[fac].quantile(0.01)
          #  均值标准差法
          #print(df[factors_need_cal].head())
          #for fac in factors_need_cal:
              #df[fac][df[fac]>df[fac].mean()+3*df[fac].std()]=df[fac].mean()+3*df[fac].std()
              #df[fac][df[fac]<=df[fac].mean()-3*df[fac].std()]=df[fac].mean()-3*df[fac].std()
          #  MAD法
          #print(df[factors_need_cal].head())
          for fac in factors_need_cal:
      
              median = np.median(list(df[fac]))
              MAD = np.mean(abs(df[fac]) - median)
              df[fac][df[fac]>median+6*MAD] = median+6*MAD  # 剔除偏离中位数6倍以上的数据
              df[fac][df[fac]<median-6*MAD] = median-6*MAD
             
          #计算行业哑变量
          dfTmp = df.copy()  #copy一份用于计算行业哑变量
          for n in range(len(industry_List)):                         # 行业哑变量赋值
              dfTmp['industry_%d' % n] = 0
              dfTmp['industry_%d' % n][df['industry_sw_level1_0']==industry_List[n]]=1
                                
          # 准备线性回归参数
          model0 = LinearRegression()
          X = dfTmp[list('industry_%d' % n for n in range(len(industry_List)))+factor0] #组装行业哑变量列和中性化因子列矩阵
          del X['industry_sw_level1_0'] #删去中性化因子中的行业列
          
          #需要计算中性化的因子列表
          factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']
      
          # 逐个特征进行行业市值中性化
          from sklearn.preprocessing import scale
          for fac in factors_need_cal:
      
                  y = df[fac] #获取需要中性化的因子暴露值
                  model0.fit(X, y)
                  df[fac] = y-model0.predict(X)  # 计算因子暴露相对于行业哑变量和中性化因子回归后的残差
                  #df[fac]=(df[fac]-np.mean(df[fac]))/np.std(df[fac])#一种与scale基本等效的处理
                  df[fac] = scale(df[fac])
                   #对残差取Z-Score标准化将计算后的结果返回给df中的各列,即完成中性化后的结果
          #多重共线性分析
          from sklearn.decomposition import PCA
          import matplotlib.pyplot as plt
          pca = PCA(n_components=len(factors_need_cal))
          pca.fit(df[factors_need_cal])
          var= pca.explained_variance_ratio_ #计算每个因子解释程度
          var1=np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100)#累计解释程度
          plt.plot(var1)
          print(var)
          data_1 = DataSource.write_df(df)
          print(data_1)
          return Outputs(data_1=data_1, data_2=None, data_3=None)
      
      # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
      def m20_post_run_bigquant_run(outputs):
          return outputs
      
      from sklearn.linear_model import LinearRegression
      # 行业、市值中性模块
      def m5_run_bigquant_run(input_1, input_2, input_3):    
          
          # 1、获取特征数值
          df = input_1.read_df()   
          df = df[df['industry_sw_level1_0']>0]                # 去除没有查出行业的股票
          industry_List = df['industry_sw_level1_0'].unique()          # 所有行业代码
          factors_all=df.columns    #获取因子列表
      
          # 2、获取用来中性化的因子列表 通常是行业和市值
          factor0 = input_2.read_pickle()                         
      
          #3、需要做清洗的因子列表 
          factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']
      
          #4、缺失值处理 按中信一级行业相同个股的平均值填充
          for fac in factors_need_cal:
              df['fac_mean'] = df[['date']+['industry_sw_level1_0']+[fac]].groupby(['date','industry_sw_level1_0']).transform(np.mean)
              df[fac]=df[fac].fillna(df['fac_mean'])
              del df['fac_mean']
      
          #5、因子异常值处理  
          #  固定比例法
          #for fac in factors_need_cal:
             # df[fac][df[fac]>df[fac].quantile(0.99)]=df[fac].quantile(0.99)
              #df[fac][df[fac]<df[fac].quantile(0.01)]=df[fac].quantile(0.01)
          #  均值标准差法
          #print(df[factors_need_cal].head())
          #for fac in factors_need_cal:
              #df[fac][df[fac]>df[fac].mean()+3*df[fac].std()]=df[fac].mean()+3*df[fac].std()
              #df[fac][df[fac]<=df[fac].mean()-3*df[fac].std()]=df[fac].mean()-3*df[fac].std()
          #  MAD法
          #print(df[factors_need_cal].head())
          for fac in factors_need_cal:
      
              median = np.median(list(df[fac]))
              MAD = np.mean(abs(df[fac]) - median)
              df[fac][df[fac]>median+6*MAD] = median+6*MAD  # 剔除偏离中位数6倍以上的数据
              df[fac][df[fac]<median-6*MAD] = median-6*MAD
                   
          #计算行业哑变量
          dfTmp = df.copy()  #copy一份用于计算行业哑变量
          for n in range(len(industry_List)):                         # 行业哑变量赋值
              dfTmp['industry_%d' % n] = 0
              dfTmp['industry_%d' % n][df['industry_sw_level1_0']==industry_List[n]]=1
                                
          # 准备线性回归参数
          model0 = LinearRegression()
          X = dfTmp[list('industry_%d' % n for n in range(len(industry_List)))+factor0] #组装行业哑变量列和中性化因子列矩阵
          del X['industry_sw_level1_0'] #删去中性化因子中的行业列
          
          #需要计算中性化的因子列表
          factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']
      
          # 逐个特征进行行业市值中性化
          from sklearn.preprocessing import scale
          for fac in factors_need_cal:
      
                  y = df[fac] #获取需要中性化的因子暴露值
                  model0.fit(X, y)
                  df[fac] = y-model0.predict(X)  # 计算因子暴露相对于行业哑变量和中性化因子回归后的残差
                  #df[fac]=(df[fac]-np.mean(df[fac]))/np.std(df[fac])#一种与scale基本等效的处理
                  df[fac] = scale(df[fac])
                   #对残差取Z-Score标准化将计算后的结果返回给df中的各列,即完成中性化后的结果
          data_1 = DataSource.write_df(df)
          return Outputs(data_1=data_1, data_2=None, data_3=None)
          
      # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
      def m5_post_run_bigquant_run(outputs):
          return outputs
      
      # 回测引擎:初始化函数,只执行一次
      def m27_initialize_bigquant_run(context):
          # 加载预测数据
          context.ranker_prediction = context.options['data'].read_df()
      
          # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
          context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
          # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)
          # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只
          stock_count = 30
          # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]
          context.stock_weights = T.norm([1 / math.log(i + 2) for i in range(0, stock_count)])
          # 设置每只股票占用的最大资金比例
          context.max_cash_per_instrument = 0.2
          context.options['hold_days'] = 5
      
      # 回测引擎:每日数据处理函数,每天执行一次
      def m27_handle_data_bigquant_run(context, data):
          #------------------------START:加入下面if的两行代码到之前到主函数的最前部分-------------------
          # 相隔几天(以5天举例)运行一下handle_data函数
          if context.trading_day_index % 5 != 0:
              return 
          #------------------------END:加上这两句代码在主函数就能实现隔几天运行---------------------
          # 按日期过滤得到今日的预测数据
          ranker_prediction = context.ranker_prediction[
              context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]
      
          # 1. 资金分配
          # 平均持仓时间是hold_days,每日都将买入股票,每日预期使用 1/hold_days 的资金
          # 实际操作中,会存在一定的买入误差,所以在前hold_days天,等量使用资金;之后,尽量使用剩余资金(这里设置最多用等量的1.5倍)
          is_staging = context.trading_day_index < context.options['hold_days'] # 是否在建仓期间(前 hold_days 天)
          cash_avg = context.portfolio.portfolio_value / context.options['hold_days']
          cash_for_buy = min(context.portfolio.cash, (1 if is_staging else 1.5) * cash_avg)
          cash_for_sell = cash_avg - (context.portfolio.cash - cash_for_buy)
          positions = {e.symbol: p.amount * p.last_sale_price
                       for e, p in context.perf_tracker.position_tracker.positions.items()}
      
          # 2. 生成卖出订单:hold_days天之后才开始卖出;对持仓的股票,按机器学习算法预测的排序末位淘汰
          if not is_staging and cash_for_sell > 0:
              equities = {e.symbol: e for e, p in context.perf_tracker.position_tracker.positions.items()}
              instruments = list(reversed(list(ranker_prediction.instrument[ranker_prediction.instrument.apply(
                      lambda x: x in equities and not context.has_unfinished_sell_order(equities[x]))])))
              # print('rank order for sell %s' % instruments)
              for instrument in instruments:
                  context.order_target(context.symbol(instrument), 0)
                  cash_for_sell -= positions[instrument]
                  if cash_for_sell <= 0:
                      break
      
          # 3. 生成买入订单:按机器学习算法预测的排序,买入前面的stock_count只股票
          buy_cash_weights = context.stock_weights
          buy_instruments = list(ranker_prediction.instrument[:len(buy_cash_weights)])
          max_cash_per_instrument = context.portfolio.portfolio_value * context.max_cash_per_instrument
          for i, instrument in enumerate(buy_instruments):
              cash = cash_for_buy * buy_cash_weights[i]
              if cash > max_cash_per_instrument - positions.get(instrument, 0):
                  # 确保股票持仓量不会超过每次股票最大的占用资金量
                  cash = max_cash_per_instrument - positions.get(instrument, 0)
              if cash > 0:
                  price = data.current(context.symbol(instrument), 'price')  # 最新价格
                  stock_num = np.floor(cash/price/100)*100  # 向下取整
                  context.order(context.symbol(instrument), stock_num) # 整百下单
      
      
      # 回测引擎:准备数据,只执行一次
      def m27_prepare_bigquant_run(context):
          pass
      
      # 回测引擎:每个单位时间开始前调用一次,即每日开盘前调用一次。
      def m27_before_trading_start_bigquant_run(context, data):
          pass
      
      
      g = T.Graph({
      
          'm1': 'M.instruments.v2',
          'm1.start_date': '2014-10-31',
          'm1.end_date': '2016-10-31',
          'm1.market': 'CN_STOCK_A',
          'm1.instrument_list': '',
          'm1.max_count': 0,
      
          'm2': 'M.advanced_auto_labeler.v2',
          'm2.instruments': T.Graph.OutputPort('m1.data'),
          'm2.label_expr': """# 计算收益:5日经波动率调整后收益率
      shift(close, -5) / shift(open, -1)/ std(shift(close, -7) / shift(open, -6),5)
      
      # 极值处理:用1%和99%分位的值做clip
      clip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))
      
      # 将分数映射到分类,这里使用50个分类
      all_wbins(label, 20)
      
      # 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)
      where(shift(high, -1) == shift(low, -1), NaN, label)""",
          'm2.start_date': '',
          'm2.end_date': '',
          'm2.benchmark': '399102.ZIX',
          'm2.drop_na_label': True,
          'm2.cast_label_int': True,
      
          'm11': 'M.instruments.v2',
          'm11.start_date': '2016-11-01',
          'm11.end_date': '2017-04-30',
          'm11.market': 'CN_STOCK_A',
          'm11.instrument_list': '',
          'm11.max_count': 0,
      
          'm24': 'M.input_features.v1',
          'm24.features': """return_5
      return_10""",
      
          'm22': 'M.general_feature_extractor.v7',
          'm22.instruments': T.Graph.OutputPort('m1.data'),
          'm22.features': T.Graph.OutputPort('m24.data'),
          'm22.start_date': '',
          'm22.end_date': '',
          'm22.before_start_days': 90,
      
          'm8': 'M.chinaa_stock_filter.v1',
          'm8.input_data': T.Graph.OutputPort('m22.data'),
          'm8.index_constituent_cond': ['中证800'],
          'm8.board_cond': ['全部'],
          'm8.industry_cond': ['全部'],
          'm8.st_cond': ['正常'],
          'm8.delist_cond': ['非退市'],
          'm8.output_left_data': False,
      
          'm10': 'M.general_feature_extractor.v7',
          'm10.instruments': T.Graph.OutputPort('m11.data'),
          'm10.features': T.Graph.OutputPort('m24.data'),
          'm10.start_date': '',
          'm10.end_date': '',
          'm10.before_start_days': 90,
      
          'm14': 'M.chinaa_stock_filter.v1',
          'm14.input_data': T.Graph.OutputPort('m10.data'),
          'm14.index_constituent_cond': ['中证800'],
          'm14.board_cond': ['全部'],
          'm14.industry_cond': ['全部'],
          'm14.st_cond': ['正常'],
          'm14.delist_cond': ['非退市'],
          'm14.output_left_data': False,
      
          'm16': 'M.derived_feature_extractor.v3',
          'm16.input_data': T.Graph.OutputPort('m8.data'),
          'm16.features': T.Graph.OutputPort('m24.data'),
          'm16.date_col': 'date',
          'm16.instrument_col': 'instrument',
          'm16.drop_na': False,
          'm16.remove_extra_columns': True,
      
          'm7': 'M.join.v3',
          'm7.data1': T.Graph.OutputPort('m2.data'),
          'm7.data2': T.Graph.OutputPort('m16.data'),
          'm7.on': 'date,instrument',
          'm7.how': 'inner',
          'm7.sort': False,
      
          'm6': 'M.dropnan.v1',
          'm6.input_data': T.Graph.OutputPort('m7.data'),
      
          'm20': 'M.cached.v3',
          'm20.input_1': T.Graph.OutputPort('m6.data'),
          'm20.run': m20_run_bigquant_run,
          'm20.post_run': m20_post_run_bigquant_run,
          'm20.input_ports': '',
          'm20.params': '{}',
          'm20.output_ports': '',
      
          'm12': 'M.dropnan.v1',
          'm12.input_data': T.Graph.OutputPort('m20.data_1'),
      
          'm18': 'M.derived_feature_extractor.v3',
          'm18.input_data': T.Graph.OutputPort('m14.data'),
          'm18.features': T.Graph.OutputPort('m24.data'),
          'm18.date_col': 'date',
          'm18.instrument_col': 'instrument',
          'm18.drop_na': False,
          'm18.remove_extra_columns': True,
      
          'm13': 'M.dropnan.v1',
          'm13.input_data': T.Graph.OutputPort('m18.data'),
      
          'm5': 'M.cached.v3',
          'm5.input_1': T.Graph.OutputPort('m13.data'),
          'm5.run': m5_run_bigquant_run,
          'm5.post_run': m5_post_run_bigquant_run,
          'm5.input_ports': '',
          'm5.params': '{}',
          'm5.output_ports': '',
      
          'm23': 'M.dropnan.v1',
          'm23.input_data': T.Graph.OutputPort('m5.data_1'),
      
          'm4': 'M.preprocessing_standard_scaler.v1',
          'm4.training_ds': T.Graph.OutputPort('m12.data'),
          'm4.predict_ds': T.Graph.OutputPort('m23.data'),
          'm4.with_mean': True,
          'm4.with_std': True,
      
          'm3': 'M.random_forest_classifier.v1',
          'm3.training_ds': T.Graph.OutputPort('m4.transform_trainds'),
          'm3.features': T.Graph.OutputPort('m24.data'),
          'm3.predict_ds': T.Graph.OutputPort('m4.transform_predictds'),
          'm3.iterations': 10,
          'm3.feature_fraction': 1,
          'm3.max_depth': 30,
          'm3.min_samples_per_leaf': 200,
          'm3.key_cols': 'date,instrument',
          'm3.workers': 1,
          'm3.other_train_parameters': {},
      
          'm26': 'M.input_features.v1',
          'm26.features_ds': T.Graph.OutputPort('m24.data'),
          'm26.features': """date
      pred_label""",
      
          'm17': 'M.sort.v4',
          'm17.input_ds': T.Graph.OutputPort('m3.predictions'),
          'm17.sort_by_ds': T.Graph.OutputPort('m26.data'),
          'm17.sort_by': 'pred_label',
          'm17.group_by': 'date',
          'm17.keep_columns': '--',
          'm17.ascending': False,
      
          'm27': 'M.trade.v4',
          'm27.instruments': T.Graph.OutputPort('m11.data'),
          'm27.options_data': T.Graph.OutputPort('m17.sorted_data'),
          'm27.start_date': '',
          'm27.end_date': '',
          'm27.initialize': m27_initialize_bigquant_run,
          'm27.handle_data': m27_handle_data_bigquant_run,
          'm27.prepare': m27_prepare_bigquant_run,
          'm27.before_trading_start': m27_before_trading_start_bigquant_run,
          'm27.volume_limit': 0.025,
          'm27.order_price_field_buy': 'open',
          'm27.order_price_field_sell': 'close',
          'm27.capital_base': 1000000,
          'm27.auto_cancel_non_tradable_orders': True,
          'm27.data_frequency': 'daily',
          'm27.price_type': '真实价格',
          'm27.product_type': '股票',
          'm27.plot_charts': True,
          'm27.backtest_only': False,
          'm27.benchmark': '',
      })
      
      # g.run({})
      
      
      def m9_run_bigquant_run(bq_graph, inputs):
          features ="""
      return_5
      return_10
      """
      
          features=features.split()
      
          parameters_list = []
          for feature in features:
              tmp = []
              tmp.append(feature)
              tmp.append('industry_sw_level1_0')
              tmp.append('market_cap_float_0')
              parameters = {'m24.features':"\n".join(tmp)}
              parameters_list.append({'parameters': parameters})
          
          def run(parameters):
              try:
                  print(parameters)
                  return g.run(parameters)
              except Exception as e:
                  print('ERROR --------', e)
                  return None
       
          results = T.parallel_map(run, parameters_list, max_workers=1, remote_run=False, silent=False)
      
          return results
      
      m9 = M.hyper_run.v1(
          run=m9_run_bigquant_run,
          run_now=True,
          bq_graph=g
      )
      
      [2020-03-25 19:01:31.306113] INFO: bigquant: T.parallel_map  开始并行运算..
      [Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
      {'m24.features': 'return_5\nindustry_sw_level1_0\nmarket_cap_float_0'}
      

      自定义Python模块(cached)使用错误,你可以:

      1.一键查看文档

      2.一键搜索答案

      ERROR -------- 'NoneType' object has no attribute 'read_pickle'
      [Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   31.0s remaining:    0.0s
      {'m24.features': 'return_10\nindustry_sw_level1_0\nmarket_cap_float_0'}
      
      ---------------------------------------------------------------------------
      KeyboardInterrupt                         Traceback (most recent call last)
      <ipython-input-8-a301e7563d94> in <module>()
          440     run=m9_run_bigquant_run,
          441     run_now=True,
      --> 442     bq_graph=g
          443 )
      
      <ipython-input-8-a301e7563d94> in m9_run_bigquant_run(bq_graph, inputs)
          433             return None
          434 
      --> 435     results = T.parallel_map(run, parameters_list, max_workers=1, remote_run=False, silent=False)
          436 
          437     return results
      
      <ipython-input-8-a301e7563d94> in run(parameters)
          428         try:
          429             print(parameters)
      --> 430             return g.run(parameters)
          431         except Exception as e:
          432             print('ERROR --------', e)
      
      KeyboardInterrupt: 
      In [ ]:
      #读取回测结果和特征名,然后追加写入到创建好的文档里 
      for k in range(len(m9.result)):
          res = m9.result[k]['m27'].read_raw_perf()[['algorithm_period_return','alpha','beta','max_drawdown','sharpe']]
          res_tmp = pd.DataFrame(res.iloc[-1]).T
          feature = m9.result[k]['m24'].feature_list      
          feature = feature.read_pickle()
          feature = feature[0]       
          res_tmp['feature'] = str(feature)
          
          res_tmp = res_tmp.reset_index(drop=False)
          res_tmp.to_csv('因子批量测试结果.csv',header=False,mode='a')   
          
      print('csv追加写入结束')
      
      In [ ]:
      #超参搜索代码
      #print(m17.result.best_params_)
      #print(m17.result.best_score_)
      #自定义运行-并行计算代码
      
      # 查看所有并行任务的运算结果
      #m9.result
      # 查看第一个并行任务的运算结果
      #m9.result[0]
      # 查看第一个并行任务的预测结果前5条记录
      #m9.result[0].predictions.read_df().head()
      # 查看第一个并行任务的运算结果中m19回测模块的回测曲线
      #m9.result[0]['m19'].display()
      

      (a1641181638) #5

      def bigquant_run(bq_graph, inputs):
      features ="""
      return_5
      return_10
      “”"

      features=features.split()
      
      parameters_list = []
      for feature in features:
          tmp = []
          tmp.append(feature)
          tmp.append('industry_sw_level1_0')
          tmp.append('market_cap_float_0')
          parameters = {'m24.features':"\n".join(tmp)}
          parameters_list.append({'parameters': parameters})
      
      def run(parameters):
          try:
              print(parameters)
              return g.run(parameters)
          except Exception as e:
              print('ERROR --------', e)
              return None
      
      results = T.parallel_map(run, parameters_list, max_workers=1, remote_run=False, silent=False)
      
      return results

      (达达) #6

      有什么问题么?


      (a1641181638) #7

      哈哈哈没,改了后可以了,想把这个设置成解决方案作备忘录。忘记点“设置解决方案”按钮了