{"Description":"实验创建于2017/8/26","Summary":"","Graph":{"EdgesInternal":[{"DestinationInputPortId":"-532:input_1","SourceOutputPortId":"-509:data"},{"DestinationInputPortId":"-544:input_1","SourceOutputPortId":"-532:data_1"},{"DestinationInputPortId":"-655:input_1","SourceOutputPortId":"-544:data_1"},{"DestinationInputPortId":"-591:features","SourceOutputPortId":"-544:data_2"},{"DestinationInputPortId":"-601:model","SourceOutputPortId":"-591:model"},{"DestinationInputPortId":"-793:input_1","SourceOutputPortId":"-601:predictions"},{"DestinationInputPortId":"-591:training_ds","SourceOutputPortId":"-655:data_1"},{"DestinationInputPortId":"-601:data","SourceOutputPortId":"-655:data_2"},{"DestinationInputPortId":"-793:input_2","SourceOutputPortId":"-655:data_2"}],"ModuleNodes":[{"Id":"-509","ModuleId":"BigQuantSpace.csv_read.csv_read-v3","ModuleParameters":[{"Name":"csv_path","Value":"AAPL.csv","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"encoding","Value":"utf-8","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[],"OutputPortsInternal":[{"Name":"data","NodeId":"-509","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":4,"Comment":"","CommentCollapsed":true},{"Id":"-532","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, alpha=0.9):\n # 示例代码如下。在这里编写您的代码\n import numpy as np\n import pandas as pd\n df = input_1.read_df()\n data = df.ewm(alpha=alpha).mean()\n data_1 = DataSource.write_df(data)\n return Outputs(data_1=data_1)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{'alpha':0.9}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-532"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-532"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-532"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-532","OutputType":null},{"Name":"data_2","NodeId":"-532","OutputType":null},{"Name":"data_3","NodeId":"-532","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":5,"Comment":"指数平滑","CommentCollapsed":false},{"Id":"-544","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, window):\n # 示例代码如下。在这里编写您的代码\n df = input_1.read_df()\n del(df['Date'])\n del(df['Adj Close'])\n \n # 获取预测指标\n def compute_prediction_int(df, n):\n pred = np.sign(df.shift(-n)['Close'] - df['Close'])\n pred = pred.iloc[:-n]\n return pred.astype(int)\n\n # 获取技术指标\n ## 计算Stochastic Oscillator\n def stochastic_oscillator_d(df, n):\n SOK = [0]\n for i in range(n, len(df)):\n high = df.loc[(i-n):i, 'High']\n low = df.loc[(i-n):i, 'Low']\n SOK.append((df.loc[i, 'Close'] - min(low)) / (max(high) - min(low)))\n SOK = pd.Series(SOK, name='SOK')\n df = df.join(SOK)\n return df\n\n ## 计算Williams %R\n def williams_R(df, n):\n R = [0]\n for i in range(n, len(df)):\n high = df.loc[(i-n):i, 'High']\n low = df.loc[(i-n):i, 'Low']\n R.append((max(high) - df.loc[i, 'Close']) / (max(high) - min(low))*(-100))\n williams_R = pd.Series(R, name='williams_R')\n df = df.join(williams_R)\n return df\n\n\n ## 计算变化率\n def rate_of_change(df, n):\n M = df['Close'].diff(n - 1)\n N = df['Close'].shift(n - 1)\n ROC = pd.Series(M / N, name='ROC_' + str(n))\n df = df.join(ROC)\n return df\n\n ## 计算RSI\n def relative_strength_index(df, n):\n i = 0\n UpI = [0]\n DoI = [0]\n while i + 1 <= df.index[-1]:\n UpMove = df.loc[i + 1, 'High'] - df.loc[i, 'High']\n DoMove = df.loc[i, 'Low'] - df.loc[i + 1, 'Low']\n if UpMove > DoMove and UpMove > 0:\n UpD = UpMove\n else:\n UpD = 0\n UpI.append(UpD)\n if DoMove > UpMove and DoMove > 0:\n DoD = DoMove\n else:\n DoD = 0\n DoI.append(DoD)\n i = i + 1\n UpI = pd.Series(UpI)\n DoI = pd.Series(DoI)\n PosDI = pd.Series(UpI.ewm(span=n, min_periods=n).mean())\n NegDI = pd.Series(DoI.ewm(span=n, min_periods=n).mean())\n RSI = pd.Series(PosDI / (PosDI + NegDI), name='RSI_' + str(n))\n df = df.join(RSI)\n return df\n\n ## 计算On Balance Volume\n def on_balance_volume(df, n):\n i = 0\n OBV = [0]\n while i < df.index[-1]:\n if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] > 0:\n OBV.append(df.loc[i + 1, 'Volume'])\n if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] == 0:\n OBV.append(0)\n if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] < 0:\n OBV.append(-df.loc[i + 1, 'Volume'])\n i = i + 1\n OBV = pd.Series(OBV)\n OBV_ma = pd.Series(OBV.rolling(n, min_periods=n).mean(), name='OBV_' + str(n))\n df = df.join(OBV_ma)\n return df\n\n ## 计算MACD\n def macd(df, n_fast, n_slow):\n EMAfast = pd.Series(df['Close'].ewm(span=n_fast, min_periods=n_slow).mean())\n EMAslow = pd.Series(df['Close'].ewm(span=n_slow, min_periods=n_slow).mean())\n MACD = pd.Series(EMAfast - EMAslow, name='MACD_' + str(n_fast) + '_' + str(n_slow))\n df = df.join(MACD)\n return df\n\n # 数据集准备\n def feature_extraction(data):\n data = relative_strength_index(data, n=14)\n data = stochastic_oscillator_d(data, n=14)\n data = rate_of_change(data, n=14)\n data = on_balance_volume(data, n=14)\n data = macd(data, 12, 26)\n data = williams_R(data, n = 14)\n\n del(data['Open'])\n del(data['High'])\n del(data['Low'])\n del(data['Volume'])\n\n return data\n\n def prepare_data(df, horizon):\n data = feature_extraction(df).dropna().iloc[:-horizon]\n data['label'] = compute_prediction_int(data, n=horizon)\n del(data['Close'])\n return data.dropna()\n \n data = prepare_data(df, horizon=window) \n features = [x for x in data.columns if x not in ['gain', 'label']]\n data_1 = DataSource.write_df(data)\n data_2 = DataSource.write_pickle(features)\n return Outputs(data_1=data_1, data_2=data_2)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{'window':10}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-544"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-544"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-544"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-544","OutputType":null},{"Name":"data_2","NodeId":"-544","OutputType":null},{"Name":"data_3","NodeId":"-544","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":10,"Comment":"特征提取","CommentCollapsed":false},{"Id":"-591","ModuleId":"BigQuantSpace.random_forest_train.random_forest_train-v2","ModuleParameters":[{"Name":"n_estimators","Value":"65","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_features","Value":"auto","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"max_depth","Value":30,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"min_samples_leaf","Value":200,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"n_jobs","Value":1,"ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"algo","Value":"classifier","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"training_ds","NodeId":"-591"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"features","NodeId":"-591"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"test_ds","NodeId":"-591"}],"OutputPortsInternal":[{"Name":"model","NodeId":"-591","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":2,"Comment":"","CommentCollapsed":true},{"Id":"-601","ModuleId":"BigQuantSpace.random_forest_predict.random_forest_predict-v2","ModuleParameters":[{"Name":"date_col","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"instrument_col","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"sort","Value":"False","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"model","NodeId":"-601"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"data","NodeId":"-601"}],"OutputPortsInternal":[{"Name":"predictions","NodeId":"-601","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":3,"Comment":"","CommentCollapsed":true},{"Id":"-655","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1):\n # 示例代码如下。在这里编写您的代码\n data = input_1.read_df()\n # 训练集划分\n train_size = 2*len(data) // 3\n train_df = data[:train_size]\n test_df = data[train_size:]\n\n print('len train', len(train_df))\n print('len test', len(test_df))\n columns = [k for k in test_df.columns if k!='label']\n data_1 = DataSource.write_df(train_df)\n data_2 = DataSource.write_df(test_df)\n \n return Outputs(data_1=data_1, data_2=data_2)\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-655"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-655"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-655"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-655","OutputType":null},{"Name":"data_2","NodeId":"-655","OutputType":null},{"Name":"data_3","NodeId":"-655","OutputType":null}],"UsePreviousResults":true,"moduleIdForCode":9,"Comment":"分离训练集与测试集","CommentCollapsed":false},{"Id":"-793","ModuleId":"BigQuantSpace.cached.cached-v3","ModuleParameters":[{"Name":"run","Value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2):\n # 示例代码如下。在这里编写您的代码\n from sklearn.ensemble import RandomForestClassifier\n from sklearn.metrics import f1_score, precision_score, confusion_matrix, recall_score, accuracy_score\n \n data = input_1.read()\n y_pred = data['pred_label']\n y_test = input_2.read_df()\n y_test = y_test['label']\n precision = precision_score(y_pred=y_pred, y_true=y_test)\n recall = recall_score(y_pred=y_pred, y_true=y_test)\n f1 = f1_score(y_pred=y_pred, y_true=y_test)\n accuracy = accuracy_score(y_pred=y_pred, y_true=y_test)\n print('precision: {0:1.2f}, recall: {1:1.2f}, f1: {2:1.2f}, accuracy: {3:1.2f}'.format(precision, recall, f1, accuracy))\n\n return Outputs()\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"post_run","Value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"input_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"params","Value":"{}","ValueType":"Literal","LinkedGlobalParameter":null},{"Name":"output_ports","Value":"","ValueType":"Literal","LinkedGlobalParameter":null}],"InputPortsInternal":[{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_1","NodeId":"-793"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_2","NodeId":"-793"},{"DataSourceId":null,"TrainedModelId":null,"TransformModuleId":null,"Name":"input_3","NodeId":"-793"}],"OutputPortsInternal":[{"Name":"data_1","NodeId":"-793","OutputType":null},{"Name":"data_2","NodeId":"-793","OutputType":null},{"Name":"data_3","NodeId":"-793","OutputType":null}],"UsePreviousResults":false,"moduleIdForCode":11,"Comment":"准确度","CommentCollapsed":false}],"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions><NodePosition Node='-509' Position='759.736328125,-62.3466796875,200,200'/><NodePosition Node='-532' Position='761.0400390625,55.521484375,200,200'/><NodePosition Node='-544' Position='756.171875,172.8310546875,200,200'/><NodePosition Node='-591' Position='584.833984375,480.8740234375,200,200'/><NodePosition Node='-601' Position='748.8115234375,577.3095703125,200,200'/><NodePosition Node='-655' Position='765.619140625,311.091796875,200,200'/><NodePosition Node='-793' Position='965.215576171875,680.3133544921875,200,200'/></NodePositions><NodeGroups /></DataV1>"},"IsDraft":true,"ParentExperimentId":null,"WebService":{"IsWebServiceExperiment":false,"Inputs":[],"Outputs":[],"Parameters":[{"Name":"交易日期","Value":"","ParameterDefinition":{"Name":"交易日期","FriendlyName":"交易日期","DefaultValue":"","ParameterType":"String","HasDefaultValue":true,"IsOptional":true,"ParameterRules":[],"HasRules":false,"MarkupType":0,"CredentialDescriptor":null}}],"WebServiceGroupId":null,"SerializedClientData":"<?xml version='1.0' encoding='utf-16'?><DataV1 xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><Meta /><NodePositions></NodePositions><NodeGroups /></DataV1>"},"DisableNodesUpdate":false,"Category":"user","Tags":[],"IsPartialRun":true}
[2019-03-30 16:00:18.854293] INFO: bigquant: csv_read.v3 开始运行..
[2019-03-30 16:00:18.865572] INFO: bigquant: 命中缓存
[2019-03-30 16:00:18.868511] INFO: bigquant: csv_read.v3 运行完成[0.014228s].
[2019-03-30 16:00:18.872226] INFO: bigquant: cached.v3 开始运行..
[2019-03-30 16:00:18.876757] INFO: bigquant: 命中缓存
[2019-03-30 16:00:18.878055] INFO: bigquant: cached.v3 运行完成[0.005824s].
[2019-03-30 16:00:18.888480] INFO: bigquant: cached.v3 开始运行..
[2019-03-30 16:00:18.894727] INFO: bigquant: 命中缓存
[2019-03-30 16:00:18.896114] INFO: bigquant: cached.v3 运行完成[0.007633s].
[2019-03-30 16:00:18.900465] INFO: bigquant: cached.v3 开始运行..
[2019-03-30 16:00:18.911851] INFO: bigquant: 命中缓存
[2019-03-30 16:00:18.913304] INFO: bigquant: cached.v3 运行完成[0.012836s].
[2019-03-30 16:00:18.915801] INFO: bigquant: random_forest_train.v2 开始运行..
[2019-03-30 16:00:18.921733] INFO: bigquant: 命中缓存
[2019-03-30 16:00:18.923045] INFO: bigquant: random_forest_train.v2 运行完成[0.007238s].
[2019-03-30 16:00:18.925282] INFO: bigquant: random_forest_predict.v2 开始运行..
[2019-03-30 16:00:18.929138] INFO: bigquant: 命中缓存
[2019-03-30 16:00:18.930312] INFO: bigquant: random_forest_predict.v2 运行完成[0.00503s].
[2019-03-30 16:00:18.932886] INFO: bigquant: cached.v3 开始运行..
[2019-03-30 16:00:18.954776] INFO: bigquant: cached.v3 运行完成[0.021873s].
precision: 0.75, recall: 0.75, f1: 0.75, accuracy: 0.70