本代码为《监督式机器学习算法的应用:择时》一文的策略源代码,欢迎克隆
# 本代码由可视化策略环境自动生成 2018年1月30日 22:06
# 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
m1 = M.instruments.v2(
start_date='2015-01-01',
end_date='2017-01-01',
market='CN_STOCK_A',
instrument_list="""600548.SHA
""",
max_count=0
)
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m2_run_bigquant_run(input_1, input_2, input_3):
# 计算标注数据
start_date = input_1.read_pickle()['start_date']
end_date = input_1.read_pickle()['end_date']
ins = input_1.read_pickle()['instruments']
raw_data = D.history_data(ins,start_date,end_date,['close'])
raw_data['bear_bull'] = raw_data['bear_bull'] = np.where(raw_data['close'] - raw_data['close'].shift(30)>0,1,0)
raw_data.fillna(0, inplace=True)
raw_data = raw_data.drop('close',axis=1)
ds = DataSource.write_df(raw_data)
return Outputs(data_1=ds, data_2=None, data_3=None)
m2 = M.cached.v3(
input_1=m1.data,
run=m2_run_bigquant_run
)
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m3_run_bigquant_run(input_1, input_2, input_3):
start_date = input_1.read_pickle()['start_date']
end_date = input_1.read_pickle()['end_date']
ins =input_1.read_pickle()['instruments']
print('训练集上:,开始时间:{0},结束时间:{1},证券代码:{2}'.format(start_date,end_date,ins))
# 计算特征数据
fields = ['close','volume']
features_data = D.history_data(ins,start_date,end_date,fields)
features_data['ma_10'] = features_data['close'].rolling(10).mean()
features_data['std_10'] = features_data['close'].rolling(10).std()
features_data['ma_50'] = features_data['close'].rolling(50).mean()
features_data['std_50'] = features_data['close'].rolling(50).std()
features_data['return'] = features_data['close'].pct_change()
benchmark = ['000300.SHA'] # 以沪深300为基准计算beta值
benchmark_df=D.history_data(benchmark,fields=['close'],start_date=start_date,end_date=end_date)
ols_ret_10=pd.ols(y=features_data['close'].pct_change() ,x=benchmark_df['close'].pct_change(), window=10, window_type='rolling')
features_data['beta_10']=ols_ret_10.beta['x']
ols_ret_50=pd.ols(y=features_data['close'].pct_change() ,x=benchmark_df['close'].pct_change(), window=50, window_type='rolling')
features_data['beta_50']=ols_ret_50.beta['x']
features_data.fillna(0, inplace=True)
factor_list = ['close','volume','return','std_10','std_50','ma_10','ma_50','beta_10','beta_50']
features_data = features_data[factor_list+['date','instrument']]
ds = DataSource.write_df(features_data)
return Outputs(data_1=ds, data_2=None, data_3=None)
m3 = M.cached.v3(
input_1=m1.data,
run=m3_run_bigquant_run
)
m4 = M.join.v3(
data1=m3.data_1,
data2=m2.data_1,
on='date,instrument',
how='inner',
sort=False
)
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m5_run_bigquant_run(input_1, input_2, input_3):
factor_list = ['close','volume','return','std_10','std_50','ma_10','ma_50','beta_10','beta_50']
data = input_1.read_df()
data.fillna(-9999999,inplace=True)
features_array = np.array(data[factor_list])
from sklearn import preprocessing
X = preprocessing.scale(features_array) # 标准化
print('训练集特征的维度:','X shape',X.shape)
print('训练集标注0-1分布统计: ',data['bear_bull'].value_counts())
y = np.array(data['bear_bull'])
print('训练集标注的维度','y shape',y.shape)
ds = DataSource.write_pickle({'X':X,'y':y})
return Outputs(data_1=ds, data_2=None, data_3=None)
m5 = M.cached.v3(
input_1=m4.data,
run=m5_run_bigquant_run,
m_cached=False
)
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m6_run_bigquant_run(input_1, input_2, input_3):
df = input_1.read_pickle()
X = df['X']
y = df['y']
from sklearn.cross_validation import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.1)
from sklearn.linear_model import LogisticRegression
# 模型训练
logreg = LogisticRegression()
logreg.fit(X_train,y_train)
logreg_score = float("{0:.3f}".format(logreg.score(X_test,y_test)*100))
print('验证集上准确率: ',logreg_score)
model_result = {'model':logreg,'score':logreg_score}
ds = DataSource.write_pickle(model_result)
return Outputs(data_1=ds, data_2=None, data_3=None)
m6 = M.cached.v3(
input_1=m5.data_1,
run=m6_run_bigquant_run,
m_cached=False
)
m7 = M.instruments.v2(
start_date='2017-01-02',
end_date='2018-01-29',
market='CN_STOCK_A',
instrument_list="""600548.SHA
""",
max_count=0
)
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m8_run_bigquant_run(input_1, input_2, input_3):
# 测试集特征数据抽取
start_date = input_1.read_pickle()['start_date']
end_date = input_1.read_pickle()['end_date']
ins = input_1.read_pickle()['instruments']
print('测试集上:,开始时间:{0},结束时间:{1},证券代码:{2}'.format(start_date,end_date,ins))
fields = ['close','volume']
features_data = D.history_data(ins,start_date,end_date,fields)
features_data['ma_10'] = features_data['close'].rolling(10).mean()
features_data['std_10'] = features_data['close'].rolling(10).std()
features_data['ma_50'] = features_data['close'].rolling(50).mean()
features_data['std_50'] = features_data['close'].rolling(50).std()
features_data['return'] = features_data['close'].pct_change()
benchmark = ['000300.SHA']
benchmark_df=D.history_data(benchmark,fields=['close'],start_date=start_date,end_date=end_date)
ols_ret_10=pd.ols(y=features_data['close'].pct_change() ,x=benchmark_df['close'].pct_change(), window=10, window_type='rolling')
features_data['beta_10']=ols_ret_10.beta['x']
ols_ret_50=pd.ols(y=features_data['close'].pct_change() ,x=benchmark_df['close'].pct_change(), window=50, window_type='rolling')
features_data['beta_50']=ols_ret_50.beta['x']
features_data.fillna(0, inplace=True)
factor_list = ['close','volume','return','std_10','std_50','ma_10','ma_50','beta_10','beta_50']
features_data = features_data[factor_list+['date','instrument']]
ds = DataSource.write_df(features_data)
return Outputs(data_1=ds, data_2=None, data_3=None)
m8 = M.cached.v3(
input_1=m7.data,
run=m8_run_bigquant_run
)
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m9_run_bigquant_run(input_1, input_2, input_3):
df = input_2.read_df()
factor_list = ['close','volume','return','std_10','std_50','ma_10','ma_50','beta_10','beta_50']
from sklearn import preprocessing
X = preprocessing.scale(np.array(df[factor_list]))
# 模型预测
result = input_1.read_pickle()['model'].predict(X)
print('预测完成!','预测结果为:',result)
return Outputs(data_1=DataSource.write_pickle(result), data_2=None, data_3=None)
m9 = M.cached.v3(
input_1=m6.data_1,
input_2=m8.data_1,
run=m9_run_bigquant_run,
m_cached=False
)
# 回测引擎:每日数据处理函数,每天执行一次
def m10_handle_data_bigquant_run(context, data):
current_signal = context.signal[context.count]
k = context.instruments[0]
sid = context.symbol(k)
# 账户持仓
cur_position = context.portfolio.positions[sid].amount
if cur_position == 0 and current_signal == 1 and data.can_trade(sid):
context.order_target_percent(sid,1)
print(data.current_dt,'买入')
if cur_position >0 and current_signal == 0 and data.can_trade(sid):
context.order_target_percent(sid,0)
print(data.current_dt,'卖出')
context.count += 1
# 回测引擎:准备数据,只执行一次
def m10_prepare_bigquant_run(context):
pass
# 回测引擎:初始化函数,只执行一次
def m10_initialize_bigquant_run(context):
context.signal = context.options['data'].read_pickle()
context.count = 0
# 回测引擎:每个单位时间开始前调用一次,即每日开盘前调用一次。
def m10_before_trading_start_bigquant_run(context, data):
pass
m10 = M.trade.v3(
instruments=m7.data,
options_data=m9.data_1,
start_date='',
end_date='',
handle_data=m10_handle_data_bigquant_run,
prepare=m10_prepare_bigquant_run,
initialize=m10_initialize_bigquant_run,
before_trading_start=m10_before_trading_start_bigquant_run,
volume_limit=0.025,
order_price_field_buy='open',
order_price_field_sell='open',
capital_base=100000,
benchmark='000300.SHA',
auto_cancel_non_tradable_orders=True,
data_frequency='daily',
price_type='后复权',
plot_charts=True,
backtest_only=False,
amount_integer=False
)