风险作业


(lagerstroemia) #1
克隆策略
In [16]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from keras.layers import Input,Dense,LSTM,merge
from keras.models import Model
from keras.callbacks import EarlyStopping
from keras import regularizers
from keras.layers import Dropout
import numpy as np
from keras.layers import Masking,Embedding
import talib
In [17]:
class conf:
    instrument='000300.SHA'
    start_date='2005-01-01'
    split_date='2014-07-20'
    end_date='2017-07-20'
    fields=['close','open','high','low','amount','volume']
    seq_len=30
    batch=100
In [18]:
data=D.history_data(conf.instrument,conf.start_date,conf.end_date,conf.fields)
data['return']=data['close'].shift(-3) / data['open'].shift(-1)-1
data=data[data.amount>0]
data.dropna(inplace=True)
datatime = data['date'][data.date>=conf.split_date] 
data['return'] = data['return'].apply(lambda x:np.where(x>=0.2,0.2,np.where(x>-0.2,x,-0.2)))
data['return'] = data['return']*10
data.reset_index(drop=True, inplace=True)

opening=[float(x) for x in data['open']]
close=[float(x) for x in data['close']]
low=[float(x) for x in data['low']]
high=[float(x) for x in data['high']]
volume=[float(x) for x in data['volume']]
scaledata = data[conf.fields]
traindata = data[data.date<conf.split_date]
        
print("done")
done
In [19]:
cols=list(traindata.columns)
cols.remove('instrument')
cols.remove('return')
cols.remove('date')
df=traindata[cols]
for col in cols:
    col_zscore=col
    df[col_zscore]=(df[col]-df[col].mean())/df[col].std(ddof=0)

print(df.shape)

df_new=df.dropna(axis=1,how='any',thresh=None,subset=None,inplace=False)
print(df_new.shape)
                    
(2315, 6)
(2315, 6)
In [20]:
y=np.array(traindata['return'])
for i in range(len(traindata['return'])):
    if traindata['return'][i]>0:
        y[i]=1
    else:
        y[i]=-1

print(y.shape)
print(df_new.shape)
print (y)
(2315,)
(2315, 6)
[ 1.  1.  1. ..., -1.  1.  1.]
In [21]:
# 数据处理:设定每个input(30time series×6features)以及数据标准化
train_input = []
train_output = []
test_input = []
test_output = []

for i in range(conf.seq_len-1, len(traindata)):
    a = scale(scaledata[i+1-conf.seq_len:i+1])
    train_input.append(a)
    c = data['return'][i]
    train_output.append(c)

for j in range(len(traindata), len(data)):
    b = scale(scaledata[j+1-conf.seq_len:j+1])
    test_input.append(b)
    c = data['return'][j]
    test_output.append(c)

# LSTM接受数组类型的输入
train_x = np.array(train_input)
train_y = np.array(train_output)
test_x = np.array(test_input) 
print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
(2286, 30, 6)
(2286,)
(730, 30, 6)
In [22]:
# 自定义激活函数
import tensorflow as tf
def atan(x): 
    return tf.atan(x)

# 构建神经网络层 1层LSTM层+3层Dense层
lstm_input = Input(shape=(conf.seq_len,len(conf.fields),),name='lstm_input')
lstm_output = LSTM(128, activation=atan, dropout_W=0.2, dropout_U=0.1)(lstm_input)
Dense_output_1 = Dense(64, activation='linear')(lstm_output)
Dense_output_2 = Dense(16, activation='linear')(Dense_output_1)
predictions = Dense(1, activation=atan)(Dense_output_2)

model = Model(input=lstm_input, output=predictions)

model.compile(optimizer='adam', loss='mse', metrics=['mse'])

model.fit(train_x, train_y, batch_size=conf.batch, nb_epoch=10, verbose=2)
Epoch 1/10
7s - loss: 0.1138 - mean_squared_error: 0.1138
Epoch 2/10
6s - loss: 0.1000 - mean_squared_error: 0.1000
Epoch 3/10
6s - loss: 0.1001 - mean_squared_error: 0.1001
Epoch 4/10
6s - loss: 0.0979 - mean_squared_error: 0.0979
Epoch 5/10
6s - loss: 0.0984 - mean_squared_error: 0.0984
Epoch 6/10
6s - loss: 0.0991 - mean_squared_error: 0.0991
Epoch 7/10
6s - loss: 0.0981 - mean_squared_error: 0.0981
Epoch 8/10
6s - loss: 0.0997 - mean_squared_error: 0.0997
Epoch 9/10
6s - loss: 0.0983 - mean_squared_error: 0.0983
Epoch 10/10
6s - loss: 0.0970 - mean_squared_error: 0.0970
Out[22]:
<keras.callbacks.History at 0x7fe7b747da20>
In [23]:
# 预测
predictions = model.predict(test_x)

for i in range(len(predictions)):
    if predictions[i]>0:
        predictions[i]=1
    else:
        predictions[i]=-1
        
cc = np.reshape(predictions,len(predictions), 1)
databacktest = pd.DataFrame()
databacktest['date'] = datatime
databacktest['direction']=np.round(cc)
In [24]:
# 在沪深300上回测
def initialize(context):
    # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
    context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
    # 传入预测数据和真实数据
    context.predictions=databacktest
    
    context.hold=conf.split_date

# 回测引擎:每日数据处理函数,每天执行一次
def handle_data(context, data):
    current_dt = data.current_dt.strftime('%Y-%m-%d') 
    sid = context.symbol(conf.instrument)
    cur_position = context.portfolio.positions[sid].amount    # 持仓
    if cur_position==0:
        if databacktest['direction'].values[databacktest.date==current_dt]==1:
            context.order_target_percent(sid, 0.9)
            context.date=current_dt
            
    else:
        if databacktest['direction'].values[databacktest.date==current_dt]==-1:
            if context.trading_calendar.session_distance(pd.Timestamp(context.date), pd.Timestamp(current_dt))>=5:
                context.order_target(sid, 0)

# 调用回测引擎
m8 = M.backtest.v5(
    instruments=conf.instrument,
    start_date=conf.split_date,
    end_date=conf.end_date,
    initialize=initialize,
    handle_data=handle_data,
    order_price_field_buy='open',       # 表示 开盘 时买入
    order_price_field_sell='close',     # 表示 收盘 前卖出
    capital_base=10000, 
    benchmark='000300.SHA', 
    m_cached=False
)
[2018-05-27 15:36:42.399269] WARNING: bigquant: 此模块版本 M.backtest.v5 已不再维护。你仍然可以使用,但建议升级到最新版本:请更新到 backtest 最新版本
[2018-05-27 15:36:42.401092] INFO: bigquant: backtest.v5 开始运行..
[2018-05-27 15:36:42.623457] INFO: algo: set price type:backward_adjusted
[2018-05-27 15:36:45.225150] INFO: Performance: Simulated 733 trading days out of 733.
[2018-05-27 15:36:45.226284] INFO: Performance: first open: 2014-07-21 01:30:00+00:00
[2018-05-27 15:36:45.227219] INFO: Performance: last close: 2017-07-20 07:00:00+00:00
  • 收益率47.24%
  • 年化收益率14.22%
  • 基准收益率73.18%
  • 阿尔法-0.02
  • 贝塔0.71
  • 夏普比率0.44
  • 胜率0.586
  • 盈亏比1.131
  • 收益波动率22.03%
  • 信息比率-0.52
  • 最大回撤40.7%
[2018-05-27 15:36:48.362288] INFO: bigquant: backtest.v5 运行完成[5.961159s].
In [25]:
# LSTM与stockranker配合回测

# 基础参数配置
class conf:
    start_date='2005-01-01'
    split_date='2014-07-20'
    end_date='2017-07-20'
    # D.instruments: https://bigquant.com/docs/data_instruments.html
    instruments = D.instruments(start_date, end_date)

    # 机器学习目标标注函数
    # 如下标注函数等价于 min(max((持有期间的收益 * 100), -20), 20) + 20 (后面的M.fast_auto_labeler会做取整操作)
    # 说明:max/min这里将标注分数限定在区间[-20, 20],+20将分数变为非负数 (StockRanker要求标注分数非负整数)
    label_expr = ['return * 100', 'where(label > {0}, {0}, where(label < -{0}, -{0}, label)) + {0}'.format(20)]
    # 持有天数,用于计算label_expr中的return值(收益)
    hold_days = 5

    # 特征 https://bigquant.com/docs/data_features.html,你可以通过表达式构造任何特征
    features = [
        'close_5/close_0',  # 5日收益
        'close_10/close_0',  # 10日收益
        'close_20/close_0',  # 20日收益
        'avg_amount_0/avg_amount_5',  # 当日/5日平均交易额
        'avg_amount_5/avg_amount_20',  # 5日/20日平均交易额
        'rank_avg_amount_0/rank_avg_amount_5',  # 当日/5日平均交易额排名
        'rank_avg_amount_5/rank_avg_amount_10',  # 5日/10日平均交易额排名
        'rank_return_0',  # 当日收益
        'rank_return_5',  # 5日收益
        'rank_return_10',  # 10日收益
        'rank_return_0/rank_return_5',  # 当日/5日收益排名
        'rank_return_5/rank_return_10',  # 5日/10日收益排名
        'pe_ttm_0',  # 市盈率TTM
    ]

# 给数据做标注:给每一行数据(样本)打分,一般分数越高表示越好
m1 = M.fast_auto_labeler.v5(
    instruments=conf.instruments, start_date=conf.start_date, end_date=conf.end_date,
    label_expr=conf.label_expr, hold_days=conf.hold_days,
    benchmark='000300.SHA', sell_at='open', buy_at='open')
# 计算特征数据
m2 = M.general_feature_extractor.v5(
    instruments=conf.instruments, start_date=conf.start_date, end_date=conf.end_date,
    features=conf.features)
# 数据预处理:缺失数据处理,数据规范化,T.get_stock_ranker_default_transforms为StockRanker模型做数据预处理
m3 = M.transform.v2(
    data=m2.data, transforms=T.get_stock_ranker_default_transforms(),
    drop_null=True, astype='int32', except_columns=['date', 'instrument'],
    clip_lower=0, clip_upper=200000000)
# 合并标注和特征数据
m4 = M.join.v2(data1=m1.data, data2=m3.data, on=['date', 'instrument'], sort=True)

# 训练数据集
m5_training = M.filter.v2(data=m4.data, expr='date < "%s"' % conf.split_date)
# 评估数据集
m5_evaluation = M.filter.v2(data=m4.data, expr='"%s" <= date' % conf.split_date)
# StockRanker机器学习训练
m6 = M.stock_ranker_train.v2(training_ds=m5_training.data, features=conf.features)
# 对评估集做预测
m7 = M.stock_ranker_predict.v2(model_id=m6.model_id, data=m5_evaluation.data)


## 量化回测 https://bigquant.com/docs/strategy_backtest.html
# 回测引擎:初始化函数,只执行一次
def initialize(context):
    # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
    context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
    # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)
    context.ranker_prediction = context.options['ranker_prediction'].read_df()
    # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只
    stock_count = 5
    # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]
    context.stock_weights = T.norm([1 / math.log(i + 2) for i in range(0, stock_count)])
    # 设置每只股票占用的最大资金比例
    context.max_cash_per_instrument = 0.2
    
    context.date={}

# 回测引擎:每日数据处理函数,每天执行一次
def handle_data(context, data):
    # 按日期过滤得到今日的预测数据
    ranker_prediction = context.ranker_prediction[context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]
    current_dt = data.current_dt.strftime('%Y-%m-%d')
    # 1. 资金分配
    # 平均持仓时间是hold_days,每日都将买入股票,每日预期使用 1/hold_days 的资金
    # 实际操作中,会存在一定的买入误差,所以在前hold_days天,等量使用资金;之后,尽量使用剩余资金(这里设置最多用等量的1.5倍)
    is_staging = context.trading_day_index < context.options['hold_days'] # 是否在建仓期间(前 hold_days 天)
    cash_avg = context.portfolio.portfolio_value / context.options['hold_days']
    cash_for_buy = min(context.portfolio.cash, (1 if is_staging else 1.5) * cash_avg)
    cash_for_sell = cash_avg - (context.portfolio.cash - cash_for_buy)
    positions = {e.symbol: p.amount * p.last_sale_price         for e, p in context.perf_tracker.position_tracker.positions.items()}
    equities = {e.symbol: e for e, p in context.perf_tracker.position_tracker.positions.items()}
    buy_dates = {}
    for e in equities:
        if e in context.date:
            buy_dates[e] = context.date[e]
    
    # 2. 生成卖出订单:hold_days天之后才开始卖出;对持仓的股票,按StockRanker预测的排序末位淘汰
    if databacktest['direction'].values[databacktest.date==current_dt]==-1:    # LSTM择时卖
        instruments = list(reversed(list(ranker_prediction.instrument[ranker_prediction.instrument.apply(
                lambda x: x in equities and not context.has_unfinished_sell_order(equities[x]))])))
        for instrument in instruments:
            if context.trading_calendar.session_distance(pd.Timestamp(context.date[instrument]), pd.Timestamp(current_dt))>=5:
                context.order_target(context.symbol(instrument), 0)
    
    if not is_staging and cash_for_sell > 0:
        instruments = list(reversed(list(ranker_prediction.instrument[ranker_prediction.instrument.apply(
                lambda x: x in equities and not context.has_unfinished_sell_order(equities[x]))])))
        # print('rank order for sell %s' % instruments)
        for instrument in instruments:
            context.order_target(context.symbol(instrument), 0)
            cash_for_sell -= positions[instrument]
            if cash_for_sell <= 0:
                break

    # 3. 生成买入订单:按StockRanker预测的排序,买入前面的stock_count只股票
    if databacktest['direction'].values[databacktest.date==current_dt]==1:    # LSTM择时买
        buy_dt = data.current_dt.strftime('%Y-%m-%d')
        context.date=buy_dt
        buy_cash_weights = context.stock_weights
        buy_instruments = list(ranker_prediction.instrument[:len(buy_cash_weights)])
        max_cash_per_instrument = context.portfolio.portfolio_value * context.max_cash_per_instrument
        for i, instrument in enumerate(buy_instruments):
            cash = cash_for_buy * buy_cash_weights[i]
            if cash > max_cash_per_instrument - positions.get(instrument, 0):
                # 确保股票持仓量不会超过每次股票最大的占用资金量
                cash = max_cash_per_instrument - positions.get(instrument, 0)
            if cash > 0:
                context.order_value(context.symbol(instrument), cash)
                buy_dates[instrument] = current_dt
            
    context.date = buy_dates

# 调用回测引擎
m8 = M.backtest.v5(
    instruments=m7.instruments,
    start_date=m7.start_date,
    end_date=m7.end_date,
    initialize=initialize,
    handle_data=handle_data,
    order_price_field_buy='open',       # 表示 开盘 时买入
    order_price_field_sell='close',     # 表示 收盘 前卖出
    capital_base=100000,               # 初始资金
    benchmark='000300.SHA',             # 比较基准,不影响回测结果
    # 通过 options 参数传递预测数据和参数给回测引擎
    options={'ranker_prediction': m7.predictions, 'hold_days': conf.hold_days},
    m_cached=False
)
[2018-05-27 15:36:49.166874] WARNING: bigquant: 此模块版本 M.fast_auto_labeler.v5 已不再维护。你仍然可以使用,但建议升级到最新版本:请更新到 fast_auto_labeler 最新版本
[2018-05-27 15:36:49.168974] INFO: bigquant: fast_auto_labeler.v5 开始运行..
[2018-05-27 15:36:49.172223] INFO: bigquant: 命中缓存
[2018-05-27 15:36:49.178161] INFO: bigquant: fast_auto_labeler.v5 运行完成[0.009151s].
[2018-05-27 15:36:49.188496] INFO: bigquant: general_feature_extractor.v5 开始运行..
[2018-05-27 15:36:49.194950] INFO: bigquant: 命中缓存
[2018-05-27 15:36:49.196074] INFO: bigquant: general_feature_extractor.v5 运行完成[0.007627s].
[2018-05-27 15:36:49.213097] INFO: bigquant: transform.v2 开始运行..
[2018-05-27 15:36:49.215932] INFO: bigquant: 命中缓存
[2018-05-27 15:36:49.216839] INFO: bigquant: transform.v2 运行完成[0.003796s].
[2018-05-27 15:36:49.224320] INFO: bigquant: join.v2 开始运行..
[2018-05-27 15:36:49.226682] INFO: bigquant: 命中缓存
[2018-05-27 15:36:49.227573] INFO: bigquant: join.v2 运行完成[0.003249s].
[2018-05-27 15:36:49.235156] INFO: bigquant: filter.v2 开始运行..
[2018-05-27 15:36:49.239841] INFO: bigquant: 命中缓存
[2018-05-27 15:36:49.243341] INFO: bigquant: filter.v2 运行完成[0.008186s].
[2018-05-27 15:36:49.252021] INFO: bigquant: filter.v2 开始运行..
[2018-05-27 15:36:49.255962] INFO: bigquant: 命中缓存
[2018-05-27 15:36:49.257244] INFO: bigquant: filter.v2 运行完成[0.00522s].
[2018-05-27 15:36:49.268080] WARNING: bigquant: 此模块版本 M.stock_ranker_train.v2 已不再维护。你仍然可以使用,但建议升级到最新版本:请更新到 stock_ranker_train 最新版本
[2018-05-27 15:36:49.269252] INFO: bigquant: stock_ranker_train.v2 开始运行..
[2018-05-27 15:36:49.272192] INFO: bigquant: 命中缓存
[2018-05-27 15:36:49.273132] INFO: bigquant: stock_ranker_train.v2 运行完成[0.003886s].
[2018-05-27 15:36:49.280185] INFO: bigquant: stock_ranker_predict.v2 开始运行..
[2018-05-27 15:36:49.292517] INFO: bigquant: 命中缓存
[2018-05-27 15:36:49.294648] INFO: bigquant: stock_ranker_predict.v2 运行完成[0.014438s].
[2018-05-27 15:36:49.312760] WARNING: bigquant: 此模块版本 M.backtest.v5 已不再维护。你仍然可以使用,但建议升级到最新版本:请更新到 backtest 最新版本
[2018-05-27 15:36:49.314199] INFO: bigquant: backtest.v5 开始运行..
[2018-05-27 15:36:52.760221] INFO: algo: set price type:backward_adjusted
[2018-05-27 15:37:16.279093] INFO: Blotter: 2014-07-29 cancel order Equity(2598 [002404.SZA]) 
[2018-05-27 15:38:14.965454] INFO: Performance: Simulated 727 trading days out of 727.
[2018-05-27 15:38:14.966603] INFO: Performance: first open: 2014-07-21 01:30:00+00:00
[2018-05-27 15:38:14.967397] INFO: Performance: last close: 2017-07-12 07:00:00+00:00
[注意] 有 38 笔卖出是在多天内完成的。当日卖出股票超过了当日股票交易的2.5%会出现这种情况。
  • 收益率206.15%
  • 年化收益率47.38%
  • 基准收益率69.07%
  • 阿尔法0.34
  • 贝塔0.57
  • 夏普比率1.49
  • 胜率0.623
  • 盈亏比0.88
  • 收益波动率28.76%
  • 信息比率1.02
  • 最大回撤43.0%
[2018-05-27 15:38:19.499263] INFO: bigquant: backtest.v5 运行完成[90.18494s].

(feynman0825) #2

稍微简单介绍下思想?