Test KenmenRider Zero-One

策略分享
标签: #<Tag:0x00007f73db816c20>

(a1641181638) #1

STEP1单因子测试(并行次数 = 滚动次数 * 因子数 )

克隆策略
In [5]:
import time

#每次需要初始化用完后就直接框起来,免得不小心点了全部运行白跑一趟

#初始化创建/清空一个csv
import pandas as pd
print("初始化csv开始")
result =pd.DataFrame(columns=('date','algorithm_period_return','alpha','beta','max_drawdown','sharpe','feature'))
result.to_csv('因子批量测试结果.csv',header=True,mode='w')     
"""格式化成2016-03-20 11:45:39形式"""
print (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
print("初始化csv完成")
初始化csv开始
2020-03-30 08:44:36
初始化csv完成

    {"Description":"实验创建于2017/8/26","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15:instruments","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8:data"},{"DestinationInputPortId":"-300:instruments","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8:data"},{"DestinationInputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53:data1","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15:data"},{"DestinationInputPortId":"-2048:input_data","SourceOutputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53:data"},{"DestinationInputPortId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53:data2","SourceOutputPortId":"-346:data"},{"DestinationInputPortId":"-2051:input_data","SourceOutputPortId":"-360:data"},{"DestinationInputPortId":"-805:training_ds","SourceOutputPortId":"-4385:data"},{"DestinationInputPortId":"-805:predict_ds","SourceOutputPortId":"-149:data"},{"DestinationInputPortId":"-360:input_data","SourceOutputPortId":"-7016:data"},{"DestinationInputPortId":"-346:input_data","SourceOutputPortId":"-308:data"},{"DestinationInputPortId":"-4385:input_data","SourceOutputPortId":"-565:data_1"},{"DestinationInputPortId":"-149:input_data","SourceOutputPortId":"-2040:data_1"},{"DestinationInputPortId":"-565:input_1","SourceOutputPortId":"-2048:data"},{"DestinationInputPortId":"-2040:input_1","SourceOutputPortId":"-2051:data"},{"DestinationInputPortId":"-1874:training_ds","SourceOutputPortId":"-805:transform_trainds"},{"DestinationInputPortId":"-1874:predict_ds","SourceOutputPortId":"-805:transform_predictds"},{"DestinationInputPortId":"-409:instruments","SourceOutputPortId":"-764:data"},{"DestinationInputPortId":"-307:instruments","SourceOutputPortId":"-764:data"},{"DestinationInputPortId":"-557:sort_by_ds","SourceOutputPortId":"-421:data"},{"DestinationInputPortId":"-409:options_data","SourceOutputPortId":"-557:sorted_data"},{"DestinationInputPortId":"-557:input_ds","SourceOutputPortId":"-1874:predictions"},{"DestinationInputPortId":"-308:input_data","SourceOutputPortId":"-300:data"},{"DestinationInputPortId":"-7016:input_data","SourceOutputPortId":"-307:data"},{"DestinationInputPortId":"-300:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-307:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-346:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-360:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-1874:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-805:features","SourceOutputPortId":"-313:data"},{"DestinationInputPortId":"-565:input_2","SourceOutputPortId":"-292:data"},{"DestinationInputPortId":"-2040:input_2","SourceOutputPortId":"-292:data"}],"ModuleNodes":[{"Id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2018-10-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2019-10-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":"0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8"}],"OutputPortsInternal":[{"Name":"data","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-8","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":1,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"287d2cb0-f53c-4101-bdf8-104b137c8601-15","ModuleId":"BigQuantSpace.advanced_auto_labeler.advanced_auto_labeler-v2","ModuleParameters":[{"Name":"label_expr","Value":"# 计算收益:5日经波动率调整后收益率\nshift(close, -5) / shift(open, -1)/ std(shift(close, -7) / shift(open, -6),5)\n\n# 极值处理:用1%和99%分位的值做clip\nclip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))\n\n# 将分数映射到分类,这里使用50个分类\nall_wbins(label, 20)\n\n# 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)\nwhere(shift(high, -1) == shift(low, -1), NaN, label)","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"benchmark","Value":"399102.ZIX","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na_label","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"cast_label_int","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15"}],"OutputPortsInternal":[{"Name":"data","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-15","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":2,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"287d2cb0-f53c-4101-bdf8-104b137c8601-53","ModuleId":"BigQuantSpace.join.join-v3","ModuleParameters":[{"Name":"on","Value":"date,instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"how","Value":"inner","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"sort","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data1","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data2","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53"}],"OutputPortsInternal":[{"Name":"data","NodeId":"287d2cb0-f53c-4101-bdf8-104b137c8601-53","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":7,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-346","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-346"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-346"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-346","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":16,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-360","ModuleId":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","ModuleParameters":[{"Name":"date_col","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"drop_na","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"remove_extra_columns","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"user_functions","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-360"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-360"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-360","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":18,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-4385","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-4385"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-4385","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":12,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-149","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-149"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-149","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":23,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-7016","ModuleId":"BigQuantSpace.chinaa_stock_filter.chinaa_stock_filter-v1","ModuleParameters":[{"Name":"index_constituent_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22displayValue%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"board_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22displayValue%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"industry_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22displayValue%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22displayValue%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22displayValue%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22displayValue%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22displayValue%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22displayValue%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22displayValue%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22displayValue%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22displayValue%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22displayValue%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22displayValue%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22displayValue%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22displayValue%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22displayValue%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%87%87%E6%8E%98%22%2C%22displayValue%22%3A%22%E9%87%87%E6%8E%98%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%92%A2%E9%93%81%22%2C%22displayValue%22%3A%22%E9%92%A2%E9%93%81%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22displayValue%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22displayValue%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"st_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22*ST%22%2C%22displayValue%22%3A%22*ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22displayValue%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22ST%22%2C%22displayValue%22%3A%22ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22displayValue%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22selected%22%3Atrue%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"delist_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%80%80%E5%B8%82%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22selected%22%3Atrue%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_left_data","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-7016"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-7016","OutputType":null},{"Name":"left_data","NodeId":"-7016","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":14,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-308","ModuleId":"BigQuantSpace.chinaa_stock_filter.chinaa_stock_filter-v1","ModuleParameters":[{"Name":"index_constituent_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%8150%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22displayValue%22%3A%22%E6%B2%AA%E6%B7%B1300%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81500%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81800%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81180%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22displayValue%22%3A%22%E4%B8%AD%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81100%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"board_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E4%B8%8A%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22displayValue%22%3A%22%E6%B7%B1%E8%AF%81%E4%B8%BB%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22displayValue%22%3A%22%E5%88%9B%E4%B8%9A%E6%9D%BF%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"industry_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22displayValue%22%3A%22%E4%BA%A4%E9%80%9A%E8%BF%90%E8%BE%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%91%E9%97%B2%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22displayValue%22%3A%22%E4%BC%A0%E5%AA%92%2F%E4%BF%A1%E6%81%AF%E6%9C%8D%E5%8A%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22displayValue%22%3A%22%E5%85%AC%E7%94%A8%E4%BA%8B%E4%B8%9A%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22displayValue%22%3A%22%E5%86%9C%E6%9E%97%E7%89%A7%E6%B8%94%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%8C%96%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22displayValue%22%3A%22%E5%8C%BB%E8%8D%AF%E7%94%9F%E7%89%A9%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22displayValue%22%3A%22%E5%95%86%E4%B8%9A%E8%B4%B8%E6%98%93%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22displayValue%22%3A%22%E5%9B%BD%E9%98%B2%E5%86%9B%E5%B7%A5%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22displayValue%22%3A%22%E5%AE%B6%E7%94%A8%E7%94%B5%E5%99%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E6%9D%90%E6%96%99%2F%E5%BB%BA%E7%AD%91%E5%BB%BA%E6%9D%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22displayValue%22%3A%22%E5%BB%BA%E7%AD%91%E8%A3%85%E9%A5%B0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22displayValue%22%3A%22%E6%88%BF%E5%9C%B0%E4%BA%A7%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22displayValue%22%3A%22%E6%9C%89%E8%89%B2%E9%87%91%E5%B1%9E%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%9C%BA%E6%A2%B0%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E6%B1%BD%E8%BD%A6%2F%E4%BA%A4%E8%BF%90%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22displayValue%22%3A%22%E7%94%B5%E5%AD%90%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22displayValue%22%3A%22%E7%94%B5%E6%B0%94%E8%AE%BE%E5%A4%87%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22displayValue%22%3A%22%E7%BA%BA%E7%BB%87%E6%9C%8D%E8%A3%85%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22displayValue%22%3A%22%E7%BB%BC%E5%90%88%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22displayValue%22%3A%22%E8%AE%A1%E7%AE%97%E6%9C%BA%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22displayValue%22%3A%22%E8%BD%BB%E5%B7%A5%E5%88%B6%E9%80%A0%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22displayValue%22%3A%22%E9%80%9A%E4%BF%A1%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%87%87%E6%8E%98%22%2C%22displayValue%22%3A%22%E9%87%87%E6%8E%98%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%92%A2%E9%93%81%22%2C%22displayValue%22%3A%22%E9%92%A2%E9%93%81%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22displayValue%22%3A%22%E9%93%B6%E8%A1%8C%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%93%B6%E9%87%91%E8%9E%8D%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22displayValue%22%3A%22%E9%A3%9F%E5%93%81%E9%A5%AE%E6%96%99%22%2C%22selected%22%3Afalse%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"st_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22*ST%22%2C%22displayValue%22%3A%22*ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22displayValue%22%3A%22%E6%9A%82%E5%81%9C%E4%B8%8A%E5%B8%82%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22ST%22%2C%22displayValue%22%3A%22ST%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22displayValue%22%3A%22%E6%AD%A3%E5%B8%B8%22%2C%22selected%22%3Atrue%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"delist_cond","Value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22displayValue%22%3A%22%E5%85%A8%E9%83%A8%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%80%80%E5%B8%82%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22displayValue%22%3A%22%E9%9D%9E%E9%80%80%E5%B8%82%22%2C%22selected%22%3Atrue%7D%5D%7D","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_left_data","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-308"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-308","OutputType":null},{"Name":"left_data","NodeId":"-308","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":8,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-565","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"from sklearn.linear_model import LinearRegression\n# 行业、市值中性模块\ndef bigquant_run(input_1, input_2, input_3): \n \n # 1、获取特征数值\n df = input_1.read_df() \n df = df[df['industry_sw_level1_0']>0] # 去除没有查出行业的股票\n industry_List = df['industry_sw_level1_0'].unique() # 所有行业代码\n factors_all=df.columns #获取因子列表\n\n # 2、获取用来中性化的因子列表 通常是行业和市值\n factor0 = input_2.read_pickle() \n\n #3、需要做清洗的因子列表 \n factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']\n\n #4、缺失值处理 按中信一级行业相同个股的平均值填充\n for fac in factors_need_cal:\n df['fac_mean'] = df[['date']+['industry_sw_level1_0']+[fac]].groupby(['date','industry_sw_level1_0']).transform(np.mean)\n df[fac]=df[fac].fillna(df['fac_mean'])\n del df['fac_mean']\n\n #5、因子异常值处理 \n # 固定比例法\n #for fac in factors_need_cal:\n # df[fac][df[fac]>df[fac].quantile(0.99)]=df[fac].quantile(0.99)\n #df[fac][df[fac]<df[fac].quantile(0.01)]=df[fac].quantile(0.01)\n # 均值标准差法\n #print(df[factors_need_cal].head())\n #for fac in factors_need_cal:\n #df[fac][df[fac]>df[fac].mean()+3*df[fac].std()]=df[fac].mean()+3*df[fac].std()\n #df[fac][df[fac]<=df[fac].mean()-3*df[fac].std()]=df[fac].mean()-3*df[fac].std()\n # MAD法\n #print(df[factors_need_cal].head())\n for fac in factors_need_cal:\n\n median = np.median(list(df[fac]))\n MAD = np.mean(abs(df[fac]) - median)\n df[fac][df[fac]>median+6*MAD] = median+6*MAD # 剔除偏离中位数6倍以上的数据\n df[fac][df[fac]<median-6*MAD] = median-6*MAD\n \n #计算行业哑变量\n dfTmp = df.copy() #copy一份用于计算行业哑变量\n for n in range(len(industry_List)): # 行业哑变量赋值\n dfTmp['industry_%d' % n] = 0\n dfTmp['industry_%d' % n][df['industry_sw_level1_0']==industry_List[n]]=1\n \n # 准备线性回归参数\n model0 = LinearRegression()\n X = dfTmp[list('industry_%d' % n for n in range(len(industry_List)))+factor0] #组装行业哑变量列和中性化因子列矩阵\n del X['industry_sw_level1_0'] #删去中性化因子中的行业列\n \n #需要计算中性化的因子列表\n factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']\n\n # 逐个特征进行行业市值中性化\n from sklearn.preprocessing import scale\n for fac in factors_need_cal:\n\n y = df[fac] #获取需要中性化的因子暴露值\n model0.fit(X, y)\n df[fac] = y-model0.predict(X) # 计算因子暴露相对于行业哑变量和中性化因子回归后的残差\n #df[fac]=(df[fac]-np.mean(df[fac]))/np.std(df[fac])#一种与scale基本等效的处理\n df[fac] = scale(df[fac])\n #对残差取Z-Score标准化将计算后的结果返回给df中的各列,即完成中性化后的结果\n #多重共线性分析\n from sklearn.decomposition import PCA\n import matplotlib.pyplot as plt\n pca = PCA(n_components=len(factors_need_cal))\n pca.fit(df[factors_need_cal])\n var= pca.explained_variance_ratio_ #计算每个因子解释程度\n var1=np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100)#累计解释程度\n plt.plot(var1)\n data_1 = DataSource.write_df(df)\n return Outputs(data_1=data_1, data_2=None, data_3=None)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-565"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-565"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-565"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-565","OutputType":null},{"Name":"data_2","NodeId":"-565","OutputType":null},{"Name":"data_3","NodeId":"-565","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":20,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-2040","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"from sklearn.linear_model import LinearRegression\n# 行业、市值中性模块\ndef bigquant_run(input_1, input_2, input_3): \n \n # 1、获取特征数值\n df = input_1.read_df() \n df = df[df['industry_sw_level1_0']>0] # 去除没有查出行业的股票\n industry_List = df['industry_sw_level1_0'].unique() # 所有行业代码\n factors_all=df.columns #获取因子列表\n\n # 2、获取用来中性化的因子列表 通常是行业和市值\n factor0 = input_2.read_pickle() \n\n #3、需要做清洗的因子列表 \n factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']\n\n #4、缺失值处理 按中信一级行业相同个股的平均值填充\n for fac in factors_need_cal:\n df['fac_mean'] = df[['date']+['industry_sw_level1_0']+[fac]].groupby(['date','industry_sw_level1_0']).transform(np.mean)\n df[fac]=df[fac].fillna(df['fac_mean'])\n del df['fac_mean']\n\n #5、因子异常值处理 \n # 固定比例法\n #for fac in factors_need_cal:\n # df[fac][df[fac]>df[fac].quantile(0.99)]=df[fac].quantile(0.99)\n #df[fac][df[fac]<df[fac].quantile(0.01)]=df[fac].quantile(0.01)\n # 均值标准差法\n #print(df[factors_need_cal].head())\n #for fac in factors_need_cal:\n #df[fac][df[fac]>df[fac].mean()+3*df[fac].std()]=df[fac].mean()+3*df[fac].std()\n #df[fac][df[fac]<=df[fac].mean()-3*df[fac].std()]=df[fac].mean()-3*df[fac].std()\n # MAD法\n #print(df[factors_need_cal].head())\n for fac in factors_need_cal:\n\n median = np.median(list(df[fac]))\n MAD = np.mean(abs(df[fac]) - median)\n df[fac][df[fac]>median+6*MAD] = median+6*MAD # 剔除偏离中位数6倍以上的数据\n df[fac][df[fac]<median-6*MAD] = median-6*MAD\n \n #计算行业哑变量\n dfTmp = df.copy() #copy一份用于计算行业哑变量\n for n in range(len(industry_List)): # 行业哑变量赋值\n dfTmp['industry_%d' % n] = 0\n dfTmp['industry_%d' % n][df['industry_sw_level1_0']==industry_List[n]]=1\n \n # 准备线性回归参数\n model0 = LinearRegression()\n X = dfTmp[list('industry_%d' % n for n in range(len(industry_List)))+factor0] #组装行业哑变量列和中性化因子列矩阵\n del X['industry_sw_level1_0'] #删去中性化因子中的行业列\n \n #需要计算中性化的因子列表\n factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']\n\n # 逐个特征进行行业市值中性化\n from sklearn.preprocessing import scale\n for fac in factors_need_cal:\n\n y = df[fac] #获取需要中性化的因子暴露值\n model0.fit(X, y)\n df[fac] = y-model0.predict(X) # 计算因子暴露相对于行业哑变量和中性化因子回归后的残差\n #df[fac]=(df[fac]-np.mean(df[fac]))/np.std(df[fac])#一种与scale基本等效的处理\n df[fac] = scale(df[fac])\n #对残差取Z-Score标准化将计算后的结果返回给df中的各列,即完成中性化后的结果\n data_1 = DataSource.write_df(df)\n return Outputs(data_1=data_1, data_2=None, data_3=None)\n ","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-2040"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-2040"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-2040"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-2040","OutputType":null},{"Name":"data_2","NodeId":"-2040","OutputType":null},{"Name":"data_3","NodeId":"-2040","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":5,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-2048","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-2048"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-2048","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":6,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-2051","ModuleId":"BigQuantSpace.dropnan.dropnan-v1","ModuleParameters":[],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_data","NodeId":"-2051"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-2051","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":13,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-805","ModuleId":"BigQuantSpace.preprocessing_standard_scaler.preprocessing_standard_scaler-v1","ModuleParameters":[{"Name":"with_mean","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"with_std","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"training_ds","NodeId":"-805"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-805"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"model","NodeId":"-805"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"predict_ds","NodeId":"-805"}],"OutputPortsInternal":[{"Name":"output_model","NodeId":"-805","OutputType":null},{"Name":"transform_trainds","NodeId":"-805","OutputType":null},{"Name":"transform_predictds","NodeId":"-805","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":4,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-764","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2019-10-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2019-11-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":"0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"-764"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-764","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":11,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-421","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"date\npred_label","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-421"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-421","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":26,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-557","ModuleId":"BigQuantSpace.sort.sort-v4","ModuleParameters":[{"Name":"sort_by","Value":"pred_label","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"group_by","Value":"date","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"keep_columns","Value":"--","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"ascending","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_ds","NodeId":"-557"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"sort_by_ds","NodeId":"-557"}],"OutputPortsInternal":[{"Name":"sorted_data","NodeId":"-557","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":17,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-1874","ModuleId":"BigQuantSpace.random_forest_classifier.random_forest_classifier-v1","ModuleParameters":[{"Name":"iterations","Value":10,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"feature_fraction","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_depth","Value":30,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"min_samples_per_leaf","Value":200,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"key_cols","Value":"date,instrument","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"workers","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"other_train_parameters","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"training_ds","NodeId":"-1874"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-1874"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"model","NodeId":"-1874"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"predict_ds","NodeId":"-1874"}],"OutputPortsInternal":[{"Name":"output_model","NodeId":"-1874","OutputType":null},{"Name":"predictions","NodeId":"-1874","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":3,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-409","ModuleId":"BigQuantSpace.trade.trade-v4","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"initialize","Value":"# 回测引擎:初始化函数,只执行一次\ndef bigquant_run(context):\n # 加载预测数据\n context.ranker_prediction = context.options['data'].read_df()\n\n # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数\n context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))\n # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)\n # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只\n stock_count = 30\n # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]\n context.stock_weights = T.norm([1 / math.log(i + 2) for i in range(0, stock_count)])\n # 设置每只股票占用的最大资金比例\n context.max_cash_per_instrument = 0.2\n context.options['hold_days'] = 5\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"handle_data","Value":"# 回测引擎:每日数据处理函数,每天执行一次\ndef bigquant_run(context, data):\n #------------------------START:加入下面if的两行代码到之前到主函数的最前部分-------------------\n # 相隔几天(以5天举例)运行一下handle_data函数\n if context.trading_day_index % 5 != 0:\n return \n #------------------------END:加上这两句代码在主函数就能实现隔几天运行---------------------\n # 按日期过滤得到今日的预测数据\n ranker_prediction = context.ranker_prediction[\n context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]\n\n # 1. 资金分配\n # 平均持仓时间是hold_days,每日都将买入股票,每日预期使用 1/hold_days 的资金\n # 实际操作中,会存在一定的买入误差,所以在前hold_days天,等量使用资金;之后,尽量使用剩余资金(这里设置最多用等量的1.5倍)\n is_staging = context.trading_day_index < context.options['hold_days'] # 是否在建仓期间(前 hold_days 天)\n cash_avg = context.portfolio.portfolio_value / context.options['hold_days']\n cash_for_buy = min(context.portfolio.cash, (1 if is_staging else 1.5) * cash_avg)\n cash_for_sell = cash_avg - (context.portfolio.cash - cash_for_buy)\n positions = {e.symbol: p.amount * p.last_sale_price\n for e, p in context.perf_tracker.position_tracker.positions.items()}\n\n # 2. 生成卖出订单:hold_days天之后才开始卖出;对持仓的股票,按机器学习算法预测的排序末位淘汰\n if not is_staging and cash_for_sell > 0:\n equities = {e.symbol: e for e, p in context.perf_tracker.position_tracker.positions.items()}\n instruments = list(reversed(list(ranker_prediction.instrument[ranker_prediction.instrument.apply(\n lambda x: x in equities and not context.has_unfinished_sell_order(equities[x]))])))\n # print('rank order for sell %s' % instruments)\n for instrument in instruments:\n context.order_target(context.symbol(instrument), 0)\n cash_for_sell -= positions[instrument]\n if cash_for_sell <= 0:\n break\n\n # 3. 生成买入订单:按机器学习算法预测的排序,买入前面的stock_count只股票\n buy_cash_weights = context.stock_weights\n buy_instruments = list(ranker_prediction.instrument[:len(buy_cash_weights)])\n max_cash_per_instrument = context.portfolio.portfolio_value * context.max_cash_per_instrument\n for i, instrument in enumerate(buy_instruments):\n cash = cash_for_buy * buy_cash_weights[i]\n if cash > max_cash_per_instrument - positions.get(instrument, 0):\n # 确保股票持仓量不会超过每次股票最大的占用资金量\n cash = max_cash_per_instrument - positions.get(instrument, 0)\n if cash > 0:\n price = data.current(context.symbol(instrument), 'price') # 最新价格\n stock_num = np.floor(cash/price/100)*100 # 向下取整\n context.order(context.symbol(instrument), stock_num) # 整百下单\n\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"prepare","Value":"# 回测引擎:准备数据,只执行一次\ndef bigquant_run(context):\n pass\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_trading_start","Value":"# 回测引擎:每个单位时间开始前调用一次,即每日开盘前调用一次。\ndef bigquant_run(context, data):\n pass\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"volume_limit","Value":0.025,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"order_price_field_buy","Value":"open","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"order_price_field_sell","Value":"close","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"capital_base","Value":1000000,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"auto_cancel_non_tradable_orders","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"data_frequency","Value":"daily","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"price_type","Value":"真实价格","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"product_type","Value":"股票","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"plot_charts","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"backtest_only","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"benchmark","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-409"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"options_data","NodeId":"-409"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"history_ds","NodeId":"-409"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"benchmark_ds","NodeId":"-409"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"trading_calendar","NodeId":"-409"}],"OutputPortsInternal":[{"Name":"raw_perf","NodeId":"-409","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":27,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-300","ModuleId":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_start_days","Value":90,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-300"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-300"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-300","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":22,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-307","ModuleId":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","ModuleParameters":[{"Name":"start_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"before_start_days","Value":90,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"instruments","NodeId":"-307"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-307"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-307","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":10,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-313","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"return_15\nindustry_sw_level1_0\nmarket_cap_float_0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-313"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-313","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":24,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-292","ModuleId":"BigQuantSpace.input_features.input_features-v1","ModuleParameters":[{"Name":"features","Value":"industry_sw_level1_0\nmarket_cap_float_0","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features_ds","NodeId":"-292"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-292","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":9,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-546","ModuleId":"BigQuantSpace.hyper_run.hyper_run-v1","ModuleParameters":[{"Name":"run","Value":"#step1单因子测试自定义运行(没加日期自动滚动前)\ndef bigquant_run(bq_graph, inputs):\n date = '2019-11-01' #最后一期单因子测试训练集的end_date,即完整策略测试中训练集的end_date减去一个月\n goal_date = datetime.datetime.strptime(date,'%Y-%m-%d')\n times = 12 #指滚动的次数\n train_period = 360 #训练集时间\n test_period = 30 #测试集时间\n parameters_list = []\n #单因子测试的特征\n features =\"\"\"\n market_cap_0\n pb_lf_0\n pe_lyr_0\n pe_ttm_0\n ps_ttm_0\n rank_market_cap_0\n rank_market_cap_float_0\n rank_pb_lf_0\n rank_pe_lyr_0\n rank_pe_ttm_0\n rank_ps_ttm_0\n west_avgcps_ftm_0\n west_eps_ftm_0\n west_netprofit_ftm_0\n avg_turn_5\n rank_avg_turn_5\n rank_turn_5\n turn_5\n company_found_date_0\n in_csi100_0\n in_csi300_0\n in_csi500_0\n in_csi800_0\n in_sse180_0\n in_sse50_0\n in_szse100_0\n industry_sw_level2_0\n industry_sw_level3_0\n list_board_0\n list_days_0\n fs_account_payable_0\n fs_account_receivable_0\n fs_bps_0\n fs_capital_reserves_0\n fs_cash_equivalents_0\n fs_cash_ratio_0\n fs_common_equity_0\n fs_construction_in_process_0\n fs_current_assets_0\n fs_current_liabilities_0\n fs_deducted_profit_0\n fs_deducted_profit_ttm_0\n fs_eps_0\n fs_eps_yoy_0\n fs_eqy_belongto_parcomsh_0\n fs_financial_expenses_0\n fs_fixed_assets_0\n fs_fixed_assets_disp_0\n fs_free_cash_flow_0\n fs_general_expenses_0\n fs_gross_profit_margin_0\n fs_gross_profit_margin_ttm_0\n fs_gross_revenues_0\n fs_income_tax_0\n fs_net_cash_flow_0\n fs_net_cash_flow_ttm_0\n fs_net_income_0\n fs_net_profit_0\n fs_net_profit_margin_0\n fs_net_profit_margin_ttm_0\n fs_net_profit_qoq_0\n fs_net_profit_ttm_0\n fs_net_profit_yoy_0\n fs_non_current_assets_0\n fs_non_current_liabilities_0\n fs_operating_profit_0\n fs_operating_revenue_0\n fs_operating_revenue_qoq_0\n fs_operating_revenue_ttm_0\n fs_operating_revenue_yoy_0\n fs_paicl_up_capital_0\n fs_publish_date_0\n fs_quarter_index_0\n fs_quarter_year_0\n fs_roa_0\n fs_roa_ttm_0\n fs_roe_0\n fs_roe_ttm_0\n fs_selling_expenses_0\n fs_surplus_reserves_0\n fs_total_equity_0\n fs_total_liability_0\n fs_total_operating_costs_0\n fs_total_profit_0\n fs_undistributed_profit_0\n rank_fs_bps_0\n rank_fs_cash_ratio_0\n rank_fs_eps_0\n rank_fs_eps_yoy_0\n rank_fs_net_profit_qoq_0\n rank_fs_net_profit_yoy_0\n rank_fs_operating_revenue_qoq_0\n rank_fs_operating_revenue_yoy_0\n rank_fs_roa_0\n rank_fs_roa_ttm_0\n rank_fs_roe_0\n rank_fs_roe_ttm_0\n ta_ad_0\n ta_adx_14_0\n ta_adx_28_0\n ta_aroon_down_14_0\n ta_aroon_down_28_0\n ta_aroon_up_14_0\n ta_aroon_up_28_0\n ta_aroonosc_14_0\n ta_aroonosc_28_0\n ta_atr_14_0\n ta_atr_28_0\n ta_bbands_lowerband_14_0\n ta_bbands_lowerband_28_0\n ta_bbands_middleband_14_0\n ta_bbands_middleband_28_0\n ta_bbands_upperband_14_0\n ta_bbands_upperband_28_0\n ta_cci_14_0\n ta_cci_28_0\n ta_ema_5_0\n ta_ema_10_0\n ta_ema_20_0\n ta_ema_30_0\n ta_ema_60_0\n ta_macd_macd_12_26_9_0\n ta_macd_macdhist_12_26_9_0\n ta_macd_macdsignal_12_26_9_0\n ta_mfi_14_0\n ta_mfi_28_0\n ta_mom_10_0\n ta_mom_20_0\n ta_mom_30_0\n ta_mom_60_0\n ta_obv_0\n ta_rsi_14_0\n ta_rsi_28_0\n ta_sar_0\n ta_sma_5_0\n ta_sma_10_0\n ta_sma_20_0\n ta_sma_30_0\n ta_sma_60_0\n ta_stoch_slowd_5_3_0_3_0_0\n ta_stoch_slowk_5_3_0_3_0_0\n ta_trix_14_0\n ta_trix_28_0\n ta_willr_14_0\n ta_willr_28_0\n ta_wma_5_0\n ta_wma_10_0\n ta_wma_20_0\n ta_wma_30_0\n ta_wma_60_0\n adjust_factor_1\n amount_1\n avg_amount_1\n close_1\n daily_return_1\n deal_number_1\n high_1\n low_1\n open_1\n price_limit_status_1\n rank_amount_1\n rank_avg_amount_1\n rank_return_1\n return_1\n volume_1\n rank_sh_holder_avg_pct_0\n rank_sh_holder_avg_pct_3m_chng_0\n rank_sh_holder_avg_pct_6m_chng_0\n rank_sh_holder_num_0\n sh_holder_avg_pct_0\n sh_holder_avg_pct_3m_chng_0\n sh_holder_avg_pct_6m_chng_0\n sh_holder_num_0\n rank_swing_volatility_5_0\n rank_volatility_5_0\n swing_volatility_5_0\n volatility_5_0\n beta_csi100_5_0\n beta_csi300_5_0\n beta_csi500_5_0\n beta_csi800_5_0\n beta_gem_5_0\n beta_industry_5_0\n beta_sse180_5_0\n beta_sse50_5_0\n beta_szzs_5_0\n rank_beta_csi100_5_0\n rank_beta_csi300_5_0\n rank_beta_csi500_5_0\n rank_beta_csi800_5_0\n rank_beta_gem_5_0\n rank_beta_industry_5_0\n rank_beta_sse180_5_0\n rank_beta_sse50_5_0\n rank_beta_szzs_5_0\n avg_mf_net_amount_1\n mf_net_amount_1\n mf_net_amount_l_0\n mf_net_amount_m_0\n mf_net_amount_main_0\n mf_net_amount_s_0\n mf_net_amount_xl_0\n mf_net_pct_l_0\n mf_net_pct_m_0\n mf_net_pct_main_0\n mf_net_pct_s_0\n mf_net_pct_xl_0\n rank_avg_mf_net_amount_1\n \"\"\"\n features=features.split() \n #===================================================================\n for i in range(times): \n #parameters = {}\n #训练集起点\n train_start_date = goal_date - datetime.timedelta(days=train_period) \n train_start_date = train_start_date.strftime(\"%Y-%m-%d\")\n #训练集终点,测试集起点\n train_end_date = goal_date.strftime(\"%Y-%m-%d\")\n test_start_date = train_end_date\n #测试集终点\n test_end_date = goal_date + datetime.timedelta(days = test_period)\n test_end_date = test_end_date.strftime(\"%Y-%m-%d\")\n #往前一个周期\n goal_date = goal_date - datetime.timedelta(days=test_period)\n #开始加入不同特征,并行次数即len(parameters_list)应等于[不同因子数 * 滚动次数]\n for feature in features:\n parameters = {}\n tmp = []\n tmp.append(feature)\n tmp.append('industry_sw_level1_0')\n tmp.append('market_cap_float_0') \n parameters['m24.features'] = \"\\n\".join(tmp)\n #将每期日期加入两个模块的参数\n parameters['m1.start_date'] = train_start_date\n parameters['m1.end_date'] = train_end_date\n parameters['m11.start_date'] = test_start_date\n parameters['m11.end_date'] = test_end_date\n parameters_list.append({'parameters': parameters})\n #=================================================================== \n def run(parameters):\n try:\n #print(parameters)\n return g.run(parameters)\n except Exception as e:\n print('ERROR --------', e)\n return None\n \n results = T.parallel_map(run, parameters_list, max_workers=2, remote_run=True, silent=True)\n\n return results","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"run_now","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"bq_graph","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"bq_graph_port","NodeId":"-546"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-546"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-546"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-546"}],"OutputPortsInternal":[{"Name":"result","NodeId":"-546","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":15,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='287d2cb0-f53c-4101-bdf8-104b137c8601-8' Position='299,-232,200,200'/><NodePosition Node='287d2cb0-f53c-4101-bdf8-104b137c8601-15' Position='53,-131,200,200'/><NodePosition Node='287d2cb0-f53c-4101-bdf8-104b137c8601-53' Position='61,45,200,200'/><NodePosition Node='-346' Position='420,99,200,200'/><NodePosition Node='-360' Position='1255,12,200,200'/><NodePosition Node='-4385' Position='147,378,200,200'/><NodePosition Node='-149' Position='1205,272,200,200'/><NodePosition Node='-7016' Position='1400,-121,200,200'/><NodePosition Node='-308' Position='419,-1,200,200'/><NodePosition Node='-565' Position='116,288,200,200'/><NodePosition Node='-2040' Position='1239,190,200,200'/><NodePosition Node='-2048' Position='34,184,200,200'/><NodePosition Node='-2051' Position='1417,104,200,200'/><NodePosition Node='-805' Position='634,198,200,200'/><NodePosition Node='-764' Position='1067,-259,200,200'/><NodePosition Node='-421' Position='933,348,200,200'/><NodePosition Node='-557' Position='578,485,200,200'/><NodePosition Node='-1874' Position='574,339,200,200'/><NodePosition Node='-409' Position='951,502,200,200'/><NodePosition Node='-300' Position='406,-127,200,200'/><NodePosition Node='-307' Position='1408,-220,200,200'/><NodePosition Node='-313' Position='747,-184,200,200'/><NodePosition Node='-292' Position='752,9,200,200'/><NodePosition Node='-546' Position='-166.54771423339844,-273.80987548828125,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
    In [4]:
    # 本代码由可视化策略环境自动生成 2020年3月30日 08:56
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    from sklearn.linear_model import LinearRegression
    # 行业、市值中性模块
    def m20_run_bigquant_run(input_1, input_2, input_3):    
        
        # 1、获取特征数值
        df = input_1.read_df()   
        df = df[df['industry_sw_level1_0']>0]                # 去除没有查出行业的股票
        industry_List = df['industry_sw_level1_0'].unique()          # 所有行业代码
        factors_all=df.columns    #获取因子列表
    
        # 2、获取用来中性化的因子列表 通常是行业和市值
        factor0 = input_2.read_pickle()                         
    
        #3、需要做清洗的因子列表 
        factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']
    
        #4、缺失值处理 按中信一级行业相同个股的平均值填充
        for fac in factors_need_cal:
            df['fac_mean'] = df[['date']+['industry_sw_level1_0']+[fac]].groupby(['date','industry_sw_level1_0']).transform(np.mean)
            df[fac]=df[fac].fillna(df['fac_mean'])
            del df['fac_mean']
    
        #5、因子异常值处理  
        #  固定比例法
        #for fac in factors_need_cal:
           # df[fac][df[fac]>df[fac].quantile(0.99)]=df[fac].quantile(0.99)
            #df[fac][df[fac]<df[fac].quantile(0.01)]=df[fac].quantile(0.01)
        #  均值标准差法
        #print(df[factors_need_cal].head())
        #for fac in factors_need_cal:
            #df[fac][df[fac]>df[fac].mean()+3*df[fac].std()]=df[fac].mean()+3*df[fac].std()
            #df[fac][df[fac]<=df[fac].mean()-3*df[fac].std()]=df[fac].mean()-3*df[fac].std()
        #  MAD法
        #print(df[factors_need_cal].head())
        for fac in factors_need_cal:
    
            median = np.median(list(df[fac]))
            MAD = np.mean(abs(df[fac]) - median)
            df[fac][df[fac]>median+6*MAD] = median+6*MAD  # 剔除偏离中位数6倍以上的数据
            df[fac][df[fac]<median-6*MAD] = median-6*MAD
           
        #计算行业哑变量
        dfTmp = df.copy()  #copy一份用于计算行业哑变量
        for n in range(len(industry_List)):                         # 行业哑变量赋值
            dfTmp['industry_%d' % n] = 0
            dfTmp['industry_%d' % n][df['industry_sw_level1_0']==industry_List[n]]=1
                              
        # 准备线性回归参数
        model0 = LinearRegression()
        X = dfTmp[list('industry_%d' % n for n in range(len(industry_List)))+factor0] #组装行业哑变量列和中性化因子列矩阵
        del X['industry_sw_level1_0'] #删去中性化因子中的行业列
        
        #需要计算中性化的因子列表
        factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']
    
        # 逐个特征进行行业市值中性化
        from sklearn.preprocessing import scale
        for fac in factors_need_cal:
    
                y = df[fac] #获取需要中性化的因子暴露值
                model0.fit(X, y)
                df[fac] = y-model0.predict(X)  # 计算因子暴露相对于行业哑变量和中性化因子回归后的残差
                #df[fac]=(df[fac]-np.mean(df[fac]))/np.std(df[fac])#一种与scale基本等效的处理
                df[fac] = scale(df[fac])
                 #对残差取Z-Score标准化将计算后的结果返回给df中的各列,即完成中性化后的结果
        #多重共线性分析
        from sklearn.decomposition import PCA
        import matplotlib.pyplot as plt
        pca = PCA(n_components=len(factors_need_cal))
        pca.fit(df[factors_need_cal])
        var= pca.explained_variance_ratio_ #计算每个因子解释程度
        var1=np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100)#累计解释程度
        plt.plot(var1)
        data_1 = DataSource.write_df(df)
        return Outputs(data_1=data_1, data_2=None, data_3=None)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m20_post_run_bigquant_run(outputs):
        return outputs
    
    from sklearn.linear_model import LinearRegression
    # 行业、市值中性模块
    def m5_run_bigquant_run(input_1, input_2, input_3):    
        
        # 1、获取特征数值
        df = input_1.read_df()   
        df = df[df['industry_sw_level1_0']>0]                # 去除没有查出行业的股票
        industry_List = df['industry_sw_level1_0'].unique()          # 所有行业代码
        factors_all=df.columns    #获取因子列表
    
        # 2、获取用来中性化的因子列表 通常是行业和市值
        factor0 = input_2.read_pickle()                         
    
        #3、需要做清洗的因子列表 
        factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']
    
        #4、缺失值处理 按中信一级行业相同个股的平均值填充
        for fac in factors_need_cal:
            df['fac_mean'] = df[['date']+['industry_sw_level1_0']+[fac]].groupby(['date','industry_sw_level1_0']).transform(np.mean)
            df[fac]=df[fac].fillna(df['fac_mean'])
            del df['fac_mean']
    
        #5、因子异常值处理  
        #  固定比例法
        #for fac in factors_need_cal:
           # df[fac][df[fac]>df[fac].quantile(0.99)]=df[fac].quantile(0.99)
            #df[fac][df[fac]<df[fac].quantile(0.01)]=df[fac].quantile(0.01)
        #  均值标准差法
        #print(df[factors_need_cal].head())
        #for fac in factors_need_cal:
            #df[fac][df[fac]>df[fac].mean()+3*df[fac].std()]=df[fac].mean()+3*df[fac].std()
            #df[fac][df[fac]<=df[fac].mean()-3*df[fac].std()]=df[fac].mean()-3*df[fac].std()
        #  MAD法
        #print(df[factors_need_cal].head())
        for fac in factors_need_cal:
    
            median = np.median(list(df[fac]))
            MAD = np.mean(abs(df[fac]) - median)
            df[fac][df[fac]>median+6*MAD] = median+6*MAD  # 剔除偏离中位数6倍以上的数据
            df[fac][df[fac]<median-6*MAD] = median-6*MAD
                 
        #计算行业哑变量
        dfTmp = df.copy()  #copy一份用于计算行业哑变量
        for n in range(len(industry_List)):                         # 行业哑变量赋值
            dfTmp['industry_%d' % n] = 0
            dfTmp['industry_%d' % n][df['industry_sw_level1_0']==industry_List[n]]=1
                              
        # 准备线性回归参数
        model0 = LinearRegression()
        X = dfTmp[list('industry_%d' % n for n in range(len(industry_List)))+factor0] #组装行业哑变量列和中性化因子列矩阵
        del X['industry_sw_level1_0'] #删去中性化因子中的行业列
        
        #需要计算中性化的因子列表
        factors_need_cal=[k for k in set(factors_all)-set(factor0) if k!='date' and k!='instrument' and k[:2]!='m:' and k!='label']
    
        # 逐个特征进行行业市值中性化
        from sklearn.preprocessing import scale
        for fac in factors_need_cal:
    
                y = df[fac] #获取需要中性化的因子暴露值
                model0.fit(X, y)
                df[fac] = y-model0.predict(X)  # 计算因子暴露相对于行业哑变量和中性化因子回归后的残差
                #df[fac]=(df[fac]-np.mean(df[fac]))/np.std(df[fac])#一种与scale基本等效的处理
                df[fac] = scale(df[fac])
                 #对残差取Z-Score标准化将计算后的结果返回给df中的各列,即完成中性化后的结果
        data_1 = DataSource.write_df(df)
        return Outputs(data_1=data_1, data_2=None, data_3=None)
        
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m5_post_run_bigquant_run(outputs):
        return outputs
    
    # 回测引擎:初始化函数,只执行一次
    def m27_initialize_bigquant_run(context):
        # 加载预测数据
        context.ranker_prediction = context.options['data'].read_df()
    
        # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
        context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
        # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)
        # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只
        stock_count = 30
        # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]
        context.stock_weights = T.norm([1 / math.log(i + 2) for i in range(0, stock_count)])
        # 设置每只股票占用的最大资金比例
        context.max_cash_per_instrument = 0.2
        context.options['hold_days'] = 5
    
    # 回测引擎:每日数据处理函数,每天执行一次
    def m27_handle_data_bigquant_run(context, data):
        #------------------------START:加入下面if的两行代码到之前到主函数的最前部分-------------------
        # 相隔几天(以5天举例)运行一下handle_data函数
        if context.trading_day_index % 5 != 0:
            return 
        #------------------------END:加上这两句代码在主函数就能实现隔几天运行---------------------
        # 按日期过滤得到今日的预测数据
        ranker_prediction = context.ranker_prediction[
            context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]
    
        # 1. 资金分配
        # 平均持仓时间是hold_days,每日都将买入股票,每日预期使用 1/hold_days 的资金
        # 实际操作中,会存在一定的买入误差,所以在前hold_days天,等量使用资金;之后,尽量使用剩余资金(这里设置最多用等量的1.5倍)
        is_staging = context.trading_day_index < context.options['hold_days'] # 是否在建仓期间(前 hold_days 天)
        cash_avg = context.portfolio.portfolio_value / context.options['hold_days']
        cash_for_buy = min(context.portfolio.cash, (1 if is_staging else 1.5) * cash_avg)
        cash_for_sell = cash_avg - (context.portfolio.cash - cash_for_buy)
        positions = {e.symbol: p.amount * p.last_sale_price
                     for e, p in context.perf_tracker.position_tracker.positions.items()}
    
        # 2. 生成卖出订单:hold_days天之后才开始卖出;对持仓的股票,按机器学习算法预测的排序末位淘汰
        if not is_staging and cash_for_sell > 0:
            equities = {e.symbol: e for e, p in context.perf_tracker.position_tracker.positions.items()}
            instruments = list(reversed(list(ranker_prediction.instrument[ranker_prediction.instrument.apply(
                    lambda x: x in equities and not context.has_unfinished_sell_order(equities[x]))])))
            # print('rank order for sell %s' % instruments)
            for instrument in instruments:
                context.order_target(context.symbol(instrument), 0)
                cash_for_sell -= positions[instrument]
                if cash_for_sell <= 0:
                    break
    
        # 3. 生成买入订单:按机器学习算法预测的排序,买入前面的stock_count只股票
        buy_cash_weights = context.stock_weights
        buy_instruments = list(ranker_prediction.instrument[:len(buy_cash_weights)])
        max_cash_per_instrument = context.portfolio.portfolio_value * context.max_cash_per_instrument
        for i, instrument in enumerate(buy_instruments):
            cash = cash_for_buy * buy_cash_weights[i]
            if cash > max_cash_per_instrument - positions.get(instrument, 0):
                # 确保股票持仓量不会超过每次股票最大的占用资金量
                cash = max_cash_per_instrument - positions.get(instrument, 0)
            if cash > 0:
                price = data.current(context.symbol(instrument), 'price')  # 最新价格
                stock_num = np.floor(cash/price/100)*100  # 向下取整
                context.order(context.symbol(instrument), stock_num) # 整百下单
    
    
    # 回测引擎:准备数据,只执行一次
    def m27_prepare_bigquant_run(context):
        pass
    
    # 回测引擎:每个单位时间开始前调用一次,即每日开盘前调用一次。
    def m27_before_trading_start_bigquant_run(context, data):
        pass
    
    
    g = T.Graph({
    
        'm1': 'M.instruments.v2',
        'm1.start_date': '2018-10-01',
        'm1.end_date': '2019-10-01',
        'm1.market': 'CN_STOCK_A',
        'm1.instrument_list': '',
        'm1.max_count': 0,
    
        'm2': 'M.advanced_auto_labeler.v2',
        'm2.instruments': T.Graph.OutputPort('m1.data'),
        'm2.label_expr': """# 计算收益:5日经波动率调整后收益率
    shift(close, -5) / shift(open, -1)/ std(shift(close, -7) / shift(open, -6),5)
    
    # 极值处理:用1%和99%分位的值做clip
    clip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))
    
    # 将分数映射到分类,这里使用50个分类
    all_wbins(label, 20)
    
    # 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)
    where(shift(high, -1) == shift(low, -1), NaN, label)""",
        'm2.start_date': '',
        'm2.end_date': '',
        'm2.benchmark': '399102.ZIX',
        'm2.drop_na_label': True,
        'm2.cast_label_int': True,
    
        'm11': 'M.instruments.v2',
        'm11.start_date': '2019-10-01',
        'm11.end_date': '2019-11-01',
        'm11.market': 'CN_STOCK_A',
        'm11.instrument_list': '',
        'm11.max_count': 0,
    
        'm26': 'M.input_features.v1',
        'm26.features': """date
    pred_label""",
    
        'm24': 'M.input_features.v1',
        'm24.features': """return_15
    industry_sw_level1_0
    market_cap_float_0""",
    
        'm22': 'M.general_feature_extractor.v7',
        'm22.instruments': T.Graph.OutputPort('m1.data'),
        'm22.features': T.Graph.OutputPort('m24.data'),
        'm22.start_date': '',
        'm22.end_date': '',
        'm22.before_start_days': 90,
    
        'm8': 'M.chinaa_stock_filter.v1',
        'm8.input_data': T.Graph.OutputPort('m22.data'),
        'm8.index_constituent_cond': ['中证800'],
        'm8.board_cond': ['全部'],
        'm8.industry_cond': ['全部'],
        'm8.st_cond': ['正常'],
        'm8.delist_cond': ['非退市'],
        'm8.output_left_data': False,
    
        'm10': 'M.general_feature_extractor.v7',
        'm10.instruments': T.Graph.OutputPort('m11.data'),
        'm10.features': T.Graph.OutputPort('m24.data'),
        'm10.start_date': '',
        'm10.end_date': '',
        'm10.before_start_days': 90,
    
        'm14': 'M.chinaa_stock_filter.v1',
        'm14.input_data': T.Graph.OutputPort('m10.data'),
        'm14.index_constituent_cond': ['中证800'],
        'm14.board_cond': ['全部'],
        'm14.industry_cond': ['全部'],
        'm14.st_cond': ['正常'],
        'm14.delist_cond': ['非退市'],
        'm14.output_left_data': False,
    
        'm16': 'M.derived_feature_extractor.v3',
        'm16.input_data': T.Graph.OutputPort('m8.data'),
        'm16.features': T.Graph.OutputPort('m24.data'),
        'm16.date_col': 'date',
        'm16.instrument_col': 'instrument',
        'm16.drop_na': False,
        'm16.remove_extra_columns': True,
    
        'm7': 'M.join.v3',
        'm7.data1': T.Graph.OutputPort('m2.data'),
        'm7.data2': T.Graph.OutputPort('m16.data'),
        'm7.on': 'date,instrument',
        'm7.how': 'inner',
        'm7.sort': False,
    
        'm6': 'M.dropnan.v1',
        'm6.input_data': T.Graph.OutputPort('m7.data'),
    
        'm18': 'M.derived_feature_extractor.v3',
        'm18.input_data': T.Graph.OutputPort('m14.data'),
        'm18.features': T.Graph.OutputPort('m24.data'),
        'm18.date_col': 'date',
        'm18.instrument_col': 'instrument',
        'm18.drop_na': False,
        'm18.remove_extra_columns': True,
    
        'm13': 'M.dropnan.v1',
        'm13.input_data': T.Graph.OutputPort('m18.data'),
    
        'm9': 'M.input_features.v1',
        'm9.features': """industry_sw_level1_0
    market_cap_float_0""",
    
        'm20': 'M.cached.v3',
        'm20.input_1': T.Graph.OutputPort('m6.data'),
        'm20.input_2': T.Graph.OutputPort('m9.data'),
        'm20.run': m20_run_bigquant_run,
        'm20.post_run': m20_post_run_bigquant_run,
        'm20.input_ports': '',
        'm20.params': '{}',
        'm20.output_ports': '',
    
        'm12': 'M.dropnan.v1',
        'm12.input_data': T.Graph.OutputPort('m20.data_1'),
    
        'm5': 'M.cached.v3',
        'm5.input_1': T.Graph.OutputPort('m13.data'),
        'm5.input_2': T.Graph.OutputPort('m9.data'),
        'm5.run': m5_run_bigquant_run,
        'm5.post_run': m5_post_run_bigquant_run,
        'm5.input_ports': '',
        'm5.params': '{}',
        'm5.output_ports': '',
    
        'm23': 'M.dropnan.v1',
        'm23.input_data': T.Graph.OutputPort('m5.data_1'),
    
        'm4': 'M.preprocessing_standard_scaler.v1',
        'm4.training_ds': T.Graph.OutputPort('m12.data'),
        'm4.features': T.Graph.OutputPort('m24.data'),
        'm4.predict_ds': T.Graph.OutputPort('m23.data'),
        'm4.with_mean': True,
        'm4.with_std': True,
    
        'm3': 'M.random_forest_classifier.v1',
        'm3.training_ds': T.Graph.OutputPort('m4.transform_trainds'),
        'm3.features': T.Graph.OutputPort('m24.data'),
        'm3.predict_ds': T.Graph.OutputPort('m4.transform_predictds'),
        'm3.iterations': 10,
        'm3.feature_fraction': 1,
        'm3.max_depth': 30,
        'm3.min_samples_per_leaf': 200,
        'm3.key_cols': 'date,instrument',
        'm3.workers': 1,
        'm3.other_train_parameters': {},
    
        'm17': 'M.sort.v4',
        'm17.input_ds': T.Graph.OutputPort('m3.predictions'),
        'm17.sort_by_ds': T.Graph.OutputPort('m26.data'),
        'm17.sort_by': 'pred_label',
        'm17.group_by': 'date',
        'm17.keep_columns': '--',
        'm17.ascending': False,
    
        'm27': 'M.trade.v4',
        'm27.instruments': T.Graph.OutputPort('m11.data'),
        'm27.options_data': T.Graph.OutputPort('m17.sorted_data'),
        'm27.start_date': '',
        'm27.end_date': '',
        'm27.initialize': m27_initialize_bigquant_run,
        'm27.handle_data': m27_handle_data_bigquant_run,
        'm27.prepare': m27_prepare_bigquant_run,
        'm27.before_trading_start': m27_before_trading_start_bigquant_run,
        'm27.volume_limit': 0.025,
        'm27.order_price_field_buy': 'open',
        'm27.order_price_field_sell': 'close',
        'm27.capital_base': 1000000,
        'm27.auto_cancel_non_tradable_orders': True,
        'm27.data_frequency': 'daily',
        'm27.price_type': '真实价格',
        'm27.product_type': '股票',
        'm27.plot_charts': False,
        'm27.backtest_only': False,
        'm27.benchmark': '',
    })
    
    # g.run({})
    
    
    #step1单因子测试自定义运行(没加日期自动滚动前)
    def m15_run_bigquant_run(bq_graph, inputs):
        date = '2019-11-01'  #最后一期单因子测试训练集的end_date,即完整策略测试中训练集的end_date减去一个月
        goal_date = datetime.datetime.strptime(date,'%Y-%m-%d')
        times = 12   #指滚动的次数
        train_period = 360   #训练集时间
        test_period = 30   #测试集时间
        parameters_list = []
        #单因子测试的特征
        features ="""
        market_cap_0
        pb_lf_0
        pe_lyr_0
        pe_ttm_0
        ps_ttm_0
        rank_market_cap_0
        rank_market_cap_float_0
        rank_pb_lf_0
        rank_pe_lyr_0
        rank_pe_ttm_0
        rank_ps_ttm_0
        west_avgcps_ftm_0
        west_eps_ftm_0
        west_netprofit_ftm_0
        avg_turn_5
        rank_avg_turn_5
        rank_turn_5
        turn_5
        company_found_date_0
        in_csi100_0
        in_csi300_0
        in_csi500_0
        in_csi800_0
        in_sse180_0
        in_sse50_0
        in_szse100_0
        industry_sw_level2_0
        industry_sw_level3_0
        list_board_0
        list_days_0
        fs_account_payable_0
        fs_account_receivable_0
        fs_bps_0
        fs_capital_reserves_0
        fs_cash_equivalents_0
        fs_cash_ratio_0
        fs_common_equity_0
        fs_construction_in_process_0
        fs_current_assets_0
        fs_current_liabilities_0
        fs_deducted_profit_0
        fs_deducted_profit_ttm_0
        fs_eps_0
        fs_eps_yoy_0
        fs_eqy_belongto_parcomsh_0
        fs_financial_expenses_0
        fs_fixed_assets_0
        fs_fixed_assets_disp_0
        fs_free_cash_flow_0
        fs_general_expenses_0
        fs_gross_profit_margin_0
        fs_gross_profit_margin_ttm_0
        fs_gross_revenues_0
        fs_income_tax_0
        fs_net_cash_flow_0
        fs_net_cash_flow_ttm_0
        fs_net_income_0
        fs_net_profit_0
        fs_net_profit_margin_0
        fs_net_profit_margin_ttm_0
        fs_net_profit_qoq_0
        fs_net_profit_ttm_0
        fs_net_profit_yoy_0
        fs_non_current_assets_0
        fs_non_current_liabilities_0
        fs_operating_profit_0
        fs_operating_revenue_0
        fs_operating_revenue_qoq_0
        fs_operating_revenue_ttm_0
        fs_operating_revenue_yoy_0
        fs_paicl_up_capital_0
        fs_publish_date_0
        fs_quarter_index_0
        fs_quarter_year_0
        fs_roa_0
        fs_roa_ttm_0
        fs_roe_0
        fs_roe_ttm_0
        fs_selling_expenses_0
        fs_surplus_reserves_0
        fs_total_equity_0
        fs_total_liability_0
        fs_total_operating_costs_0
        fs_total_profit_0
        fs_undistributed_profit_0
        rank_fs_bps_0
        rank_fs_cash_ratio_0
        rank_fs_eps_0
        rank_fs_eps_yoy_0
        rank_fs_net_profit_qoq_0
        rank_fs_net_profit_yoy_0
        rank_fs_operating_revenue_qoq_0
        rank_fs_operating_revenue_yoy_0
        rank_fs_roa_0
        rank_fs_roa_ttm_0
        rank_fs_roe_0
        rank_fs_roe_ttm_0
        ta_ad_0
        ta_adx_14_0
        ta_adx_28_0
        ta_aroon_down_14_0
        ta_aroon_down_28_0
        ta_aroon_up_14_0
        ta_aroon_up_28_0
        ta_aroonosc_14_0
        ta_aroonosc_28_0
        ta_atr_14_0
        ta_atr_28_0
        ta_bbands_lowerband_14_0
        ta_bbands_lowerband_28_0
        ta_bbands_middleband_14_0
        ta_bbands_middleband_28_0
        ta_bbands_upperband_14_0
        ta_bbands_upperband_28_0
        ta_cci_14_0
        ta_cci_28_0
        ta_ema_5_0
        ta_ema_10_0
        ta_ema_20_0
        ta_ema_30_0
        ta_ema_60_0
        ta_macd_macd_12_26_9_0
        ta_macd_macdhist_12_26_9_0
        ta_macd_macdsignal_12_26_9_0
        ta_mfi_14_0
        ta_mfi_28_0
        ta_mom_10_0
        ta_mom_20_0
        ta_mom_30_0
        ta_mom_60_0
        ta_obv_0
        ta_rsi_14_0
        ta_rsi_28_0
        ta_sar_0
        ta_sma_5_0
        ta_sma_10_0
        ta_sma_20_0
        ta_sma_30_0
        ta_sma_60_0
        ta_stoch_slowd_5_3_0_3_0_0
        ta_stoch_slowk_5_3_0_3_0_0
        ta_trix_14_0
        ta_trix_28_0
        ta_willr_14_0
        ta_willr_28_0
        ta_wma_5_0
        ta_wma_10_0
        ta_wma_20_0
        ta_wma_30_0
        ta_wma_60_0
        adjust_factor_1
        amount_1
        avg_amount_1
        close_1
        daily_return_1
        deal_number_1
        high_1
        low_1
        open_1
        price_limit_status_1
        rank_amount_1
        rank_avg_amount_1
        rank_return_1
        return_1
        volume_1
        rank_sh_holder_avg_pct_0
        rank_sh_holder_avg_pct_3m_chng_0
        rank_sh_holder_avg_pct_6m_chng_0
        rank_sh_holder_num_0
        sh_holder_avg_pct_0
        sh_holder_avg_pct_3m_chng_0
        sh_holder_avg_pct_6m_chng_0
        sh_holder_num_0
        rank_swing_volatility_5_0
        rank_volatility_5_0
        swing_volatility_5_0
        volatility_5_0
        beta_csi100_5_0
        beta_csi300_5_0
        beta_csi500_5_0
        beta_csi800_5_0
        beta_gem_5_0
        beta_industry_5_0
        beta_sse180_5_0
        beta_sse50_5_0
        beta_szzs_5_0
        rank_beta_csi100_5_0
        rank_beta_csi300_5_0
        rank_beta_csi500_5_0
        rank_beta_csi800_5_0
        rank_beta_gem_5_0
        rank_beta_industry_5_0
        rank_beta_sse180_5_0
        rank_beta_sse50_5_0
        rank_beta_szzs_5_0
        avg_mf_net_amount_1
        mf_net_amount_1
        mf_net_amount_l_0
        mf_net_amount_m_0
        mf_net_amount_main_0
        mf_net_amount_s_0
        mf_net_amount_xl_0
        mf_net_pct_l_0
        mf_net_pct_m_0
        mf_net_pct_main_0
        mf_net_pct_s_0
        mf_net_pct_xl_0
        rank_avg_mf_net_amount_1
        """
        features=features.split()    
        #===================================================================
        for i in range(times): 
            #parameters = {}
            #训练集起点
            train_start_date = goal_date - datetime.timedelta(days=train_period)    
            train_start_date = train_start_date.strftime("%Y-%m-%d")
            #训练集终点,测试集起点
            train_end_date = goal_date.strftime("%Y-%m-%d")
            test_start_date = train_end_date
            #测试集终点
            test_end_date = goal_date + datetime.timedelta(days = test_period)
            test_end_date = test_end_date.strftime("%Y-%m-%d")
            #往前一个周期
            goal_date = goal_date - datetime.timedelta(days=test_period)
            #开始加入不同特征,并行次数即len(parameters_list)应等于[不同因子数 * 滚动次数]
            for feature in features:
                parameters = {}
                tmp = []
                tmp.append(feature)
                tmp.append('industry_sw_level1_0')
                tmp.append('market_cap_float_0')      
                parameters['m24.features'] = "\n".join(tmp)
                #将每期日期加入两个模块的参数
                parameters['m1.start_date'] = train_start_date
                parameters['m1.end_date'] = train_end_date
                parameters['m11.start_date'] = test_start_date
                parameters['m11.end_date'] = test_end_date
                parameters_list.append({'parameters': parameters})
            #===================================================================  
        def run(parameters):
            try:
                #print(parameters)
                return g.run(parameters)
            except Exception as e:
                print('ERROR --------', e)
                return None
     
        results = T.parallel_map(run, parameters_list, max_workers=2, remote_run=True, silent=True)
    
        return results
    
    m15 = M.hyper_run.v1(
        run=m15_run_bigquant_run,
        run_now=True,
        bq_graph=g
    )
    
    [2020-03-30 08:38:13.978267] INFO: bigquant: T.parallel_map  开始并行运算..
    [Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
    [Parallel(n_jobs=2)]: Done   1 tasks      | elapsed:  1.0min
    [Parallel(n_jobs=2)]: Done   2 tasks      | elapsed:  1.0min
    [Parallel(n_jobs=2)]: Done   3 tasks      | elapsed:  1.7min
    [Parallel(n_jobs=2)]: Done   4 tasks      | elapsed:  1.7min
    [Parallel(n_jobs=2)]: Done   5 tasks      | elapsed:  2.4min
    [Parallel(n_jobs=2)]: Done   6 tasks      | elapsed:  2.4min
    [Parallel(n_jobs=2)]: Done   7 tasks      | elapsed:  3.2min
    [Parallel(n_jobs=2)]: Done   8 tasks      | elapsed:  3.2min
    [Parallel(n_jobs=2)]: Done   9 tasks      | elapsed:  3.9min
    [Parallel(n_jobs=2)]: Done  10 tasks      | elapsed:  4.0min
    [Parallel(n_jobs=2)]: Done  11 tasks      | elapsed:  4.6min
    [Parallel(n_jobs=2)]: Done  12 tasks      | elapsed:  4.6min
    [Parallel(n_jobs=2)]: Done  13 tasks      | elapsed:  5.2min
    [Parallel(n_jobs=2)]: Done  14 tasks      | elapsed:  5.2min
    [Parallel(n_jobs=2)]: Done  15 tasks      | elapsed:  5.9min
    [Parallel(n_jobs=2)]: Done  16 tasks      | elapsed:  6.0min
    [Parallel(n_jobs=2)]: Done  17 tasks      | elapsed:  6.1min
    [Parallel(n_jobs=2)]: Done  18 tasks      | elapsed:  6.1min
    [Parallel(n_jobs=2)]: Done  19 tasks      | elapsed:  6.1min
    [Parallel(n_jobs=2)]: Done  20 tasks      | elapsed:  6.1min
    
    ---------------------------------------------------------------------------
    KeyboardInterrupt                         Traceback (most recent call last)
    <ipython-input-4-a09cceb27e06> in <module>()
        677     run=m15_run_bigquant_run,
        678     run_now=True,
    --> 679     bq_graph=g
        680 )
    
    <ipython-input-4-a09cceb27e06> in m15_run_bigquant_run(bq_graph, inputs)
        670             return None
        671 
    --> 672     results = T.parallel_map(run, parameters_list, max_workers=2, remote_run=True, silent=True)
        673 
        674     return results
    
    KeyboardInterrupt: 
    In [ ]:
    #读取回测结果和特征名,然后追加写入到创建好的文档里 
    
    
    print("开始写入回测结果")
    for k in range(len(m15.result)):
        try:
            res = m15.result[k]['m27'].read_raw_perf()[['algorithm_period_return','alpha','beta','max_drawdown','sharpe']]
            res_tmp = pd.DataFrame(res.iloc[-1]).T
            feature = m15.result[k]['m24'].data     
            feature = feature.read_pickle()
            feature = feature[0]       
            res_tmp['feature'] = str(feature)    
            res_tmp = res_tmp.reset_index(drop=False)
            res_tmp.to_csv('因子批量测试结果.csv',header=False,mode='a')       
        except:
            print("error")
    print('csv追加写入结束')
    print (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
    

    STEP2优劣因子筛选(不涉及并行计算)

    克隆策略
    In [6]:
    #初始化创建/清空一个csv;
    import pandas as pd
    import time
    print("初始化csv开始")
    good_result =pd.DataFrame(columns=('date','great_feature'))
    bad_result =pd.DataFrame(columns=('date','bad_feature'))
    good_result.to_csv('好因子筛选结果.csv',header=True,mode='w')   
    bad_result.to_csv('坏因子筛选结果.csv',header=True,mode='w')     
    """格式化成2016-03-20 11:45:39形式"""
    print (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
    print("初始化csv完成")
    
    初始化csv开始
    2020-03-28 19:58:34
    初始化csv完成
    
    In [7]:
    import numpy as np
    import pandas as pd
    
    m8 =M.csv_read.v3(
        csv_path='因子批量测试结果.csv',
        encoding='utf-8',
        m_cached=False   #不读取缓存,否则可能读取不到最新数据
    )
    
    
    
    df = m8.data.read_all_df()
    #设置2个拥有“日期”、“筛选特征”的空df,用作记录结果.date实际上是测试集的最后一天
    good_factor_result =pd.DataFrame(columns=('date','feature'))
    bad_factor_result =pd.DataFrame(columns=('date','feature'))
    #将每个时期分成一组单独处理
    
    date_list = [] 
    for date in df['date']:
        if date not in date_list:
            date_list.append(date)
        
    #在每组内进行筛选
    for date in date_list:
        grouped = df.groupby('date')
        grouped_df = grouped.get_group(date)
        #设置排序规则
        sorted_df = grouped_df.sort_values(by=['sharpe','alpha','max_drawdown','algorithm_period_return'],ascending =[False,False,True,False] )    #ascending=False为降序,反之为升序
        #重新设置索引
        re_index_df = sorted_df.reset_index(drop=True)    #reset_index进行重新索引,好按索引排名筛选
        #筛选排名前5的好特征组合,并将其加入结果df
        filter = re_index_df.iloc[:5]
        filter = filter.loc[:,['date','feature']]
        good_factor_result = good_factor_result.append(filter)
        #筛选排名倒5的坏特征组合,并将其加入结果df
        filter = re_index_df.iloc[-5:]
        filter = filter.loc[:,['date','feature']]
        bad_factor_result = bad_factor_result.append(filter)
        
    #重新设置列标签名
    good_factor_result = good_factor_result.rename(index=str, columns={ "feature": "great_feature"})
    bad_factor_result = bad_factor_result.rename(index=str, columns={ "feature": "bad_feature"})
    print(good_factor_result)
    print("*"*50)
    print(bad_factor_result)
    good_factor_result.to_csv('好因子筛选结果.csv',header=False,mode='a') 
    bad_factor_result.to_csv('坏因子筛选结果.csv',header=False,mode='a')   
    
                            date            great_feature
    0  2019-11-29 15:00:00+00:00  rank_market_cap_float_0
    1  2019-11-29 15:00:00+00:00                 pe_ttm_0
    2  2019-11-29 15:00:00+00:00            rank_pe_lyr_0
    3  2019-11-29 15:00:00+00:00        rank_market_cap_0
    4  2019-11-29 15:00:00+00:00                 pe_lyr_0
    0  2019-11-01 15:00:00+00:00             rank_pb_lf_0
    1  2019-11-01 15:00:00+00:00                 pe_ttm_0
    2  2019-11-01 15:00:00+00:00            rank_pe_ttm_0
    3  2019-11-01 15:00:00+00:00            rank_pe_lyr_0
    4  2019-11-01 15:00:00+00:00                 pe_lyr_0
    0  2019-09-30 15:00:00+00:00            rank_ps_ttm_0
    1  2019-09-30 15:00:00+00:00            rank_pe_ttm_0
    2  2019-09-30 15:00:00+00:00                 ps_ttm_0
    3  2019-09-30 15:00:00+00:00                  pb_lf_0
    4  2019-09-30 15:00:00+00:00        rank_market_cap_0
    0  2019-09-02 15:00:00+00:00                 pe_ttm_0
    1  2019-09-02 15:00:00+00:00  rank_market_cap_float_0
    2  2019-09-02 15:00:00+00:00            rank_pe_ttm_0
    3  2019-09-02 15:00:00+00:00            rank_pe_lyr_0
    4  2019-09-02 15:00:00+00:00        rank_market_cap_0
    0  2019-08-02 15:00:00+00:00            rank_ps_ttm_0
    1  2019-08-02 15:00:00+00:00  rank_market_cap_float_0
    2  2019-08-02 15:00:00+00:00                 pe_ttm_0
    3  2019-08-02 15:00:00+00:00        rank_market_cap_0
    4  2019-08-02 15:00:00+00:00                 ps_ttm_0
    **************************************************
                            date              bad_feature
    5  2019-11-29 15:00:00+00:00                 ps_ttm_0
    6  2019-11-29 15:00:00+00:00             rank_pb_lf_0
    7  2019-11-29 15:00:00+00:00            rank_pe_ttm_0
    8  2019-11-29 15:00:00+00:00                  pb_lf_0
    9  2019-11-29 15:00:00+00:00            rank_ps_ttm_0
    5  2019-11-01 15:00:00+00:00            rank_ps_ttm_0
    6  2019-11-01 15:00:00+00:00        rank_market_cap_0
    7  2019-11-01 15:00:00+00:00  rank_market_cap_float_0
    8  2019-11-01 15:00:00+00:00                  pb_lf_0
    9  2019-11-01 15:00:00+00:00                 ps_ttm_0
    5  2019-09-30 15:00:00+00:00  rank_market_cap_float_0
    6  2019-09-30 15:00:00+00:00             rank_pb_lf_0
    7  2019-09-30 15:00:00+00:00                 pe_ttm_0
    8  2019-09-30 15:00:00+00:00                 pe_lyr_0
    9  2019-09-30 15:00:00+00:00            rank_pe_lyr_0
    5  2019-09-02 15:00:00+00:00             rank_pb_lf_0
    6  2019-09-02 15:00:00+00:00            rank_ps_ttm_0
    7  2019-09-02 15:00:00+00:00                 ps_ttm_0
    8  2019-09-02 15:00:00+00:00                  pb_lf_0
    9  2019-09-02 15:00:00+00:00                 pe_lyr_0
    5  2019-08-02 15:00:00+00:00                 pe_lyr_0
    6  2019-08-02 15:00:00+00:00            rank_pe_lyr_0
    7  2019-08-02 15:00:00+00:00            rank_pe_ttm_0
    8  2019-08-02 15:00:00+00:00                  pb_lf_0
    9  2019-08-02 15:00:00+00:00             rank_pb_lf_0
    

    STEP3IC相关性筛选-优(并行次数 = 滚动次数)

    克隆策略
    In [4]:
    #初始化工作,用完就注销掉
    cor_result =pd.DataFrame(columns=('date','great_feature'))
    cor_result.to_csv('相关性筛选结果-优.csv',header=True,mode='w') 
    print("初始化完毕")
    
    初始化完毕
    
    In [5]:
    import matplotlib.pyplot as plt
    from sklearn import linear_model
    from matplotlib import rc
    rc('mathtext', default='regular')
    import seaborn as sns
    sns.set_style('white')
    from matplotlib import dates
    import numpy as np
    import pandas as pd
    import statsmodels.api as sm
    import time
    import datetime 
    import scipy.stats as st
    from math import sqrt
    time_start = time.time()
    
    #两个列表求差
    def diff(first, second):
            second = set(second)
            return [item for item in first if item not in second]
    
    # 皮尔逊相关系数函数(IC)
    def multiply(a,b):
        sum_ab=0.0
        for i in range(len(a)):
            temp=a[i]*b[i]
            sum_ab+=temp
        return sum_ab
    
    def cal_pearson(x,y):
        try:
            n=len(x)
            sum_x=sum(x)
            sum_y=sum(y)
            sum_xy=multiply(x,y)
            sum_x2 = sum([pow(i,2) for i in x])
            sum_y2 = sum([pow(j,2) for j in y])
            molecular=sum_xy-(float(sum_x)*float(sum_y)/n)
            denominator=sqrt((sum_x2-float(sum_x**2)/n)*(sum_y2-float(sum_y**2)/n))
            return molecular/denominator
        except ZeroDivisionError:    
             print ("ZeroDivisionError")
                
    # 去极值函数
    def winsorize_series(se):
        q = se.quantile([0.025, 0.975])
        if isinstance(q, pd.Series) and len(q) == 2:
            se[se < q.iloc[0]] = q.iloc[0]
            se[se > q.iloc[1]] = q.iloc[1]
        return se
    

      {"Description":"实验创建于2017/8/26","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"-507:input_1","SourceOutputPortId":"-36:data"}],"ModuleNodes":[{"Id":"-36","ModuleId":"BigQuantSpace.instruments.instruments-v2","ModuleParameters":[{"Name":"start_date","Value":"2018-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"end_date","Value":"2019-01-01","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"market","Value":"CN_STOCK_A","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_list","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_count","Value":0,"ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"rolling_conf","NodeId":"-36"}],"OutputPortsInternal":[{"Name":"data","NodeId":"-36","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":1,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-507","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# 备份自定义python\ndef compute_cor(factors,input_1,start_date,end_date,rebalance_period):\n\n m3 = M.input_features.v1(\n features= factors\n \n )\n\n m2 = M.use_datasource.v1(\n features=m3.data,\n instruments = input_1,\n datasource_id='instruments_CN_STOCK_A',\n start_date='',\n end_date=''\n )\n\n m5 = M.dropnan.v1(\n input_data=m2.data\n )\n\n\n m6 = M.chinaa_stock_filter.v1(\n input_data=m5.data,\n index_constituent_cond=['中证800'],\n board_cond=['全部'],\n industry_cond=['全部'],\n st_cond=['正常'],\n delist_cond=['非退市'],\n output_left_data=False\n )\n\n df = m6.data.read_df()\n instruments = list(set(df.instrument))\n\n\n features_data = D.features(instruments=instruments, start_date=start_date, end_date=end_date, fields=factors)\n\n mydata = features_data.set_index(['date','instrument'])[factors].unstack() #unstack函数使得表格变成花括号\n\n #--------------------------------------------------------------------------------------------------\n #print(\"开始相关性测试\")\n\n for col in mydata['return_5'].columns:\n mydata['return_5'][col] = mydata['return_5'][col].shift(-rebalance_period)\n data_ret = mydata['return_5']\n\n\n group_fac = []\n for fac in factors[1:]:\n #print(str(fac))\n data_tmp = mydata[fac]\n for col in data_ret.T.columns[:-rebalance_period]:\n tmp_df = pd.DataFrame(index=data_ret.T.index)\n tmp_df['fac'] = data_tmp.T[col]\n tmp_df['ret'] = data_ret.T[col]\n tmp_df = tmp_df.dropna()\n coff = cal_pearson(tmp_df['fac'],tmp_df['ret'])\n dict_temp = {'feature':fac,'date':col,'coff':coff}\n group_fac.append(dict_temp)\n\n _ic = pd.DataFrame(group_fac,columns=['feature','date','coff'])\n _ic['coff'] = _ic['coff'].abs() #将IC绝对值化 \n ic_temp = _ic.set_index(['date','feature']).unstack()\n ic_df = ic_temp['coff'] \n ic_df.head()\n #ic_df.plot(figsize=(12,6), title='Factor weight using sample covariance')\n #---------------------------------------------以上是借鉴的前半部分,获取了相关性data-----------------------------------------------------\n result = ic_df.corr()\n return result\n\ndef bigquant_run(input_1, input_2, input_3):\n # 起始结束日期,目前用预测集的前三年\n rebalance_period = 5\n m8 =M.csv_read.v3(\n csv_path='好因子筛选结果.csv',\n encoding='utf-8',\n m_cached=False #不读取缓存,否则可能读取不到最新数据\n )\n #转换一下date格式,以方便坏看和遍历时自动选取日期\n df = m8.data.read_all_df()\n time_list = []\n for date in df['date']:\n timeStruct = time.strptime(date,\"%Y-%m-%d %H:%M:%S+00:00\") \n strTime = time.strftime(\"%Y-%m-%d\", timeStruct) \n time_list.append(strTime)\n time_dict = {'date':time_list}\n tmp_date = pd.DataFrame(time_dict)\n df['date'] = tmp_date['date']\n \n\n #将每个时期分成一组单独处理\n date_list = [] \n for date in df['date']:\n if date not in date_list:\n date_list.append(date)\n \n start_date = input_1.read_pickle()['start_date'] #相关性测试起点\n end_date = input_1.read_pickle()['end_date'] #相关性测试终点\n tmp_date = datetime.datetime.strptime(end_date,'%Y-%m-%d')\n goal_date = tmp_date \n goal_date = goal_date.strftime(\"%Y-%m-%d\")\n goal_date = str(goal_date)\n date = goal_date\n\n \n \n \n #print('所有日期:',date_list)\n #print(\"计算次数:\",len(date_list))\n #在每组内进行筛选,每组有不同的起始日期/\n\n #按日期分组\n grouped = df.groupby('date')\n #print(date)\n grouped_df = grouped.get_group(date)\n grouped_df = grouped_df.rename(index=str, columns={ \"Unnamed: 0\": \"sort_index\"})\n #建立一个空表,然后读取每期因子\n factors = []\n factors.append('return_5')\n for fac in grouped_df['great_feature']:\n if fac not in factors:\n factors.append(fac)\n #-------------------------------------------------------------\n result = compute_cor(factors,input_1,start_date,end_date,rebalance_period) \n #===============================================================\n #如果没有feature这个index/columns,那么我就自己造一个\n result['great_feature'] = list(result) #!!注意这里要区别优质和优质特征。这句代码只能用一次,用来加一列特征列坏做vlookup()\n result = pd.merge(result,grouped_df.loc[:,['great_feature','sort_index']],how='left',on = 'great_feature') #区分优质,这里也要改\n\n #————————————————————————————————————————————————————————————分类规则————————————————————————————————————————————————————————————\n sorted_result = result.sort_values(by=['sort_index'],ascending =[True] ) #优质因子用True,优质因子用False\n #reset_index进行重新索引,坏按索引排名筛选;然后删掉sort_index这个临时用的column\n re_index_df = sorted_result.reset_index(drop=True) \n re_index_df = re_index_df.drop(['sort_index'],axis = 1)\n data_list = list(re_index_df)\n data_list.remove('great_feature') #优质优质要区分,修改这里\n\n #按索引自上而下按行遍历\n flag = 0 #删除行数(不会遍历所有的行,只会遍历(原来所有行数-删除行数))\n garbage_list = []\n try:\n for i in range(len(list(data_list))):\n row = re_index_df.loc[i,list(data_list)]\n #删掉各列超标\n for feature in list(data_list):\n data = row[feature]\n if (data < -0.3) or (data < 1 and data > 0.3): #这个阈值根据需要要改 \n garbage_list.append(feature)\n #先删列\n re_index_df = re_index_df.drop(columns = feature,axis = 1) \n #再删行(问题应该在这里,为什么会多删除rank_pe_ttm_0一行)\n re_index_df = re_index_df.astype(str) #将所有元素转换为str\n y = re_index_df[re_index_df['great_feature'].str.match(feature)] #问题出在这里str.contains为模糊匹配,尝试使用match修复\n test1 = y['great_feature'].tolist() \n test2 = re_index_df['great_feature'].tolist()\n ret = diff(test2,test1) #如何计算列表里的差https://cloud.tencent.com/developer/ask/34629\n re_index_df = re_index_df[re_index_df.great_feature.isin(ret)] \n except:\n print(\"finished\")\n #为每次日期的结果建立一个临时表\n tmp_df = pd.DataFrame()\n tmp_df['great_feature'] = re_index_df['great_feature']\n tmp_df['date'] = date \n tmp_df.rename(columns = {\"date\": \"great_feature\", \"great_feature\":\"date\"}, inplace=True) #不知道什么鬼原因让两列倒置过来了,姑且这么修复吧\n tmp_df.to_csv('相关性筛选结果-优.csv',header=False,mode='a')\n \n \n return Outputs(data_1=None, data_2=None, data_3=None)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-507"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-507"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-507"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-507","OutputType":null},{"Name":"data_2","NodeId":"-507","OutputType":null},{"Name":"data_3","NodeId":"-507","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":2,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true},{"Id":"-2274","ModuleId":"BigQuantSpace.hyper_run.hyper_run-v1","ModuleParameters":[{"Name":"run","Value":"def bigquant_run(bq_graph, inputs):\n m8 =M.csv_read.v3(\n csv_path='好因子筛选结果.csv',\n encoding='utf-8',\n m_cached=False #不读取缓存,否则可能读取不到最新数据\n )\n #转换一下date格式,以方便好看和遍历时自动选取日期\n df = m8.data.read_all_df()\n time_list = []\n for date in df['date']:\n timeStruct = time.strptime(date,\"%Y-%m-%d %H:%M:%S+00:00\") \n strTime = time.strftime(\"%Y-%m-%d\", timeStruct) \n time_list.append(strTime)\n time_dict = {'date':time_list}\n tmp_date = pd.DataFrame(time_dict)\n df['date'] = tmp_date['date']\n\n #将每个时期分成一组单独处理\n date_list = [] \n for date in df['date']:\n if date not in date_list:\n date_list.append(date)\n\n #设置\n parameters_list = []\n #在每组内进行筛选,每组有不同的起始日期/\n\n for date in date_list: \n parameters = {}\n \n #设置起始日期,测试区间为:策略的测试集前1年到前1天(比单因子读取的晚一个月)\n goal_date = datetime.datetime.strptime(date,'%Y-%m-%d')\n #goal_date = goal_date.replace(day=1) #这行代码是把日期替换为当月第一天\n start_date = goal_date - datetime.timedelta(days=365)\n start_date = start_date.strftime(\"%Y-%m-%d\")\n end_date = goal_date\n end_date = end_date.strftime(\"%Y-%m-%d\")\n parameters['m1.start_date'] = start_date\n parameters['m1.end_date'] = end_date\n parameters_list.append({'parameters': parameters})\n #print(parameters_list) \n \n def run(parameters):\n try:\n #print(parameters)\n return g.run(parameters)\n except Exception as e:\n print('ERROR --------', e)\n return None\n \n results = T.parallel_map(run, parameters_list, max_workers=1, remote_run=False, silent=False)\n\n return results\n ","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"run_now","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"bq_graph","Value":"True","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"bq_graph_port","NodeId":"-2274"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-2274"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-2274"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-2274"}],"OutputPortsInternal":[{"Name":"result","NodeId":"-2274","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":3,"IsPartOfPartialRun":null,"Comment":"","CommentCollapsed":true}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='-36' Position='170,-201,200,200'/><NodePosition Node='-507' Position='197,-4,200,200'/><NodePosition Node='-2274' Position='153,144,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
      In [6]:
      # 本代码由可视化策略环境自动生成 2020年3月29日 12:30
      # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
      
      
      # 备份自定义python
      def compute_cor(factors,input_1,start_date,end_date,rebalance_period):
      
          m3 = M.input_features.v1(
              features= factors
              
          )
      
          m2 = M.use_datasource.v1(
              features=m3.data,
              instruments = input_1,
              datasource_id='instruments_CN_STOCK_A',
              start_date='',
              end_date=''
          )
      
          m5 = M.dropnan.v1(
              input_data=m2.data
          )
      
      
          m6 = M.chinaa_stock_filter.v1(
              input_data=m5.data,
              index_constituent_cond=['中证800'],
              board_cond=['全部'],
              industry_cond=['全部'],
              st_cond=['正常'],
              delist_cond=['非退市'],
              output_left_data=False
          )
      
          df = m6.data.read_df()
          instruments = list(set(df.instrument))
      
      
          features_data = D.features(instruments=instruments, start_date=start_date, end_date=end_date, fields=factors)
      
          mydata = features_data.set_index(['date','instrument'])[factors].unstack()    #unstack函数使得表格变成花括号
      
          #--------------------------------------------------------------------------------------------------
          #print("开始相关性测试")
      
          for col in mydata['return_5'].columns:
              mydata['return_5'][col] = mydata['return_5'][col].shift(-rebalance_period)
          data_ret = mydata['return_5']
      
      
          group_fac = []
          for fac in factors[1:]:
              #print(str(fac))
              data_tmp = mydata[fac]
              for col in data_ret.T.columns[:-rebalance_period]:
                  tmp_df = pd.DataFrame(index=data_ret.T.index)
                  tmp_df['fac'] = data_tmp.T[col]
                  tmp_df['ret'] = data_ret.T[col]
                  tmp_df = tmp_df.dropna()
                  coff = cal_pearson(tmp_df['fac'],tmp_df['ret'])
                  dict_temp = {'feature':fac,'date':col,'coff':coff}
                  group_fac.append(dict_temp)
      
          _ic = pd.DataFrame(group_fac,columns=['feature','date','coff'])
          _ic['coff'] = _ic['coff'].abs()    #将IC绝对值化  
          ic_temp = _ic.set_index(['date','feature']).unstack()
          ic_df = ic_temp['coff']  
          ic_df.head()
          #ic_df.plot(figsize=(12,6), title='Factor weight using sample covariance')
          #---------------------------------------------以上是借鉴的前半部分,获取了相关性data-----------------------------------------------------
          result = ic_df.corr()
          return result
      
      def m2_run_bigquant_run(input_1, input_2, input_3):
          # 起始结束日期,目前用预测集的前三年
          rebalance_period = 5
          m8 =M.csv_read.v3(
              csv_path='好因子筛选结果.csv',
              encoding='utf-8',
              m_cached=False   #不读取缓存,否则可能读取不到最新数据
          )
          #转换一下date格式,以方便坏看和遍历时自动选取日期
          df = m8.data.read_all_df()
          time_list = []
          for date in df['date']:
              timeStruct = time.strptime(date,"%Y-%m-%d %H:%M:%S+00:00") 
              strTime = time.strftime("%Y-%m-%d", timeStruct) 
              time_list.append(strTime)
          time_dict = {'date':time_list}
          tmp_date = pd.DataFrame(time_dict)
          df['date'] = tmp_date['date']
          
      
          #将每个时期分成一组单独处理
          date_list = [] 
          for date in df['date']:
              if date not in date_list:
                  date_list.append(date)
          
          start_date = input_1.read_pickle()['start_date']      #相关性测试起点
          end_date = input_1.read_pickle()['end_date']          #相关性测试终点
          tmp_date = datetime.datetime.strptime(end_date,'%Y-%m-%d')
          goal_date = tmp_date 
          goal_date = goal_date.strftime("%Y-%m-%d")
          goal_date = str(goal_date)
          date = goal_date
      
        
          
          
          #print('所有日期:',date_list)
          #print("计算次数:",len(date_list))
          #在每组内进行筛选,每组有不同的起始日期/
      
          #按日期分组
          grouped = df.groupby('date')
          #print(date)
          grouped_df = grouped.get_group(date)
          grouped_df = grouped_df.rename(index=str, columns={ "Unnamed: 0": "sort_index"})
          #建立一个空表,然后读取每期因子
          factors = []
          factors.append('return_5')
          for fac in grouped_df['great_feature']:
              if fac not in factors:
                  factors.append(fac)
          #-------------------------------------------------------------
          result = compute_cor(factors,input_1,start_date,end_date,rebalance_period)     
          #===============================================================
          #如果没有feature这个index/columns,那么我就自己造一个
          result['great_feature'] = list(result)   #!!注意这里要区别优质和优质特征。这句代码只能用一次,用来加一列特征列坏做vlookup()
          result = pd.merge(result,grouped_df.loc[:,['great_feature','sort_index']],how='left',on = 'great_feature')  #区分优质,这里也要改
      
          #————————————————————————————————————————————————————————————分类规则————————————————————————————————————————————————————————————
          sorted_result = result.sort_values(by=['sort_index'],ascending =[True] )    #优质因子用True,优质因子用False
          #reset_index进行重新索引,坏按索引排名筛选;然后删掉sort_index这个临时用的column
          re_index_df = sorted_result.reset_index(drop=True)  
          re_index_df = re_index_df.drop(['sort_index'],axis = 1)
          data_list = list(re_index_df)
          data_list.remove('great_feature')   #优质优质要区分,修改这里
      
          #按索引自上而下按行遍历
          flag =  0    #删除行数(不会遍历所有的行,只会遍历(原来所有行数-删除行数))
          garbage_list = []
          try:
              for i in range(len(list(data_list))):
                  row = re_index_df.loc[i,list(data_list)]
                  #删掉各列超标
                  for feature in list(data_list):
                      data = row[feature]
                      if (data < -0.3) or (data < 1 and data > 0.3):     #这个阈值根据需要要改           
                          garbage_list.append(feature)
                          #先删列
                          re_index_df = re_index_df.drop(columns = feature,axis = 1)      
                          #再删行(问题应该在这里,为什么会多删除rank_pe_ttm_0一行)
                          re_index_df = re_index_df.astype(str) #将所有元素转换为str
                          y = re_index_df[re_index_df['great_feature'].str.match(feature)]           #问题出在这里str.contains为模糊匹配,尝试使用match修复
                          test1 = y['great_feature'].tolist()        
                          test2 = re_index_df['great_feature'].tolist()
                          ret = diff(test2,test1)  #如何计算列表里的差https://cloud.tencent.com/developer/ask/34629
                          re_index_df = re_index_df[re_index_df.great_feature.isin(ret)]     
          except:
              print("finished")
          #为每次日期的结果建立一个临时表
          tmp_df = pd.DataFrame()
          tmp_df['great_feature'] = re_index_df['great_feature']
          tmp_df['date'] = date    
          tmp_df.rename(columns = {"date": "great_feature", "great_feature":"date"},  inplace=True)    #不知道什么鬼原因让两列倒置过来了,姑且这么修复吧
          tmp_df.to_csv('相关性筛选结果-优.csv',header=False,mode='a')
          
          
          return Outputs(data_1=None, data_2=None, data_3=None)
      
      # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
      def m2_post_run_bigquant_run(outputs):
          return outputs
      
      
      g = T.Graph({
      
          'm1': 'M.instruments.v2',
          'm1.start_date': '2018-01-01',
          'm1.end_date': '2019-01-01',
          'm1.market': 'CN_STOCK_A',
          'm1.instrument_list': '',
          'm1.max_count': 0,
      
          'm2': 'M.cached.v3',
          'm2.input_1': T.Graph.OutputPort('m1.data'),
          'm2.run': m2_run_bigquant_run,
          'm2.post_run': m2_post_run_bigquant_run,
          'm2.input_ports': '',
          'm2.params': '{}',
          'm2.output_ports': '',
      })
      
      # g.run({})
      
      
      def m3_run_bigquant_run(bq_graph, inputs):
          m8 =M.csv_read.v3(
              csv_path='好因子筛选结果.csv',
              encoding='utf-8',
              m_cached=False   #不读取缓存,否则可能读取不到最新数据
          )
          #转换一下date格式,以方便好看和遍历时自动选取日期
          df = m8.data.read_all_df()
          time_list = []
          for date in df['date']:
              timeStruct = time.strptime(date,"%Y-%m-%d %H:%M:%S+00:00") 
              strTime = time.strftime("%Y-%m-%d", timeStruct) 
              time_list.append(strTime)
          time_dict = {'date':time_list}
          tmp_date = pd.DataFrame(time_dict)
          df['date'] = tmp_date['date']
      
          #将每个时期分成一组单独处理
          date_list = [] 
          for date in df['date']:
              if date not in date_list:
                  date_list.append(date)
      
          #设置
          parameters_list = []
          #在每组内进行筛选,每组有不同的起始日期/
      
          for date in date_list: 
              parameters = {}
           
              #设置起始日期,测试区间为:策略的测试集前1年到前1天(比单因子读取的晚一个月)
              goal_date = datetime.datetime.strptime(date,'%Y-%m-%d')
              #goal_date = goal_date.replace(day=1)   #这行代码是把日期替换为当月第一天
              start_date = goal_date - datetime.timedelta(days=365)
              start_date = start_date.strftime("%Y-%m-%d")
              end_date = goal_date
              end_date = end_date.strftime("%Y-%m-%d")
              parameters['m1.start_date'] = start_date
              parameters['m1.end_date'] = end_date
              parameters_list.append({'parameters': parameters})
          #print(parameters_list)   
              
          def run(parameters):
              try:
                  #print(parameters)
                  return g.run(parameters)
              except Exception as e:
                  print('ERROR --------', e)
                  return None
       
          results = T.parallel_map(run, parameters_list, max_workers=1, remote_run=False, silent=False)
      
          return results
         
      
      m3 = M.hyper_run.v1(
          run=m3_run_bigquant_run,
          run_now=True,
          bq_graph=g
      )
      
      [2020-03-28 21:18:58.652189] INFO: bigquant: T.parallel_map  开始并行运算..
      [Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
      {'m1.start_date': '2018-11-29', 'm1.end_date': '2019-11-29'}
      
      所有日期: ['2019-11-29', '2019-11-01', '2019-09-30', '2019-09-02', '2019-08-02']
      计算次数: 5
      2019-11-29
      
      开始相关性测试
      rank_market_cap_float_0
      pe_ttm_0
      rank_pe_lyr_0
      rank_market_cap_0
      pe_lyr_0
      finished
      
      [Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  3.3min remaining:    0.0s
      {'m1.start_date': '2018-11-01', 'm1.end_date': '2019-11-01'}
      
      所有日期: ['2019-11-29', '2019-11-01', '2019-09-30', '2019-09-02', '2019-08-02']
      计算次数: 5
      2019-11-01
      
      开始相关性测试
      rank_pb_lf_0
      pe_ttm_0
      rank_pe_ttm_0
      rank_pe_lyr_0
      pe_lyr_0
      finished
      
      [Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  6.5min remaining:    0.0s
      {'m1.start_date': '2018-09-30', 'm1.end_date': '2019-09-30'}
      
      所有日期: ['2019-11-29', '2019-11-01', '2019-09-30', '2019-09-02', '2019-08-02']
      计算次数: 5
      2019-09-30
      
      开始相关性测试
      rank_ps_ttm_0
      rank_pe_ttm_0
      ps_ttm_0
      pb_lf_0
      rank_market_cap_0
      finished
      
      [Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  9.8min remaining:    0.0s
      {'m1.start_date': '2018-09-02', 'm1.end_date': '2019-09-02'}
      
      所有日期: ['2019-11-29', '2019-11-01', '2019-09-30', '2019-09-02', '2019-08-02']
      计算次数: 5
      2019-09-02
      
      开始相关性测试
      pe_ttm_0
      rank_market_cap_float_0
      rank_pe_ttm_0
      rank_pe_lyr_0
      rank_market_cap_0
      finished
      
      [Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 13.4min remaining:    0.0s
      {'m1.start_date': '2018-08-02', 'm1.end_date': '2019-08-02'}
      
      所有日期: ['2019-11-29', '2019-11-01', '2019-09-30', '2019-09-02', '2019-08-02']
      计算次数: 5
      2019-08-02
      
      开始相关性测试
      rank_ps_ttm_0
      rank_market_cap_float_0
      pe_ttm_0
      rank_market_cap_0
      ps_ttm_0
      finished
      
      [Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 16.7min remaining:    0.0s
      [Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 16.7min finished