编写策略常用代码集锦

策略分享
标签: #<Tag:0x00007fcf60757f88>

(iQuant) #1

股票池中去除获取创业板代码
all_ins = D.instruments(start_date='2017-09-07', end_date='2017-09-07')
# 除去创业板股票代码,创业板代码一般以3开头
ins = [k for k in all_ins if k[0] != '3']  


获取指数成分股股票代码
date='2017-05-23'
df = D.history_data(D.instruments(date, date),date,date,fields=['in_csi300'])
# 获取沪深300的成分股股票列表  中证500:in_csi500,中证800:in_csi800  参考文档说明
hs300 = set(df[df['in_csi300']==1]['instrument'])
 

列举所有模块
M.m_modules


查看模块
M.hellomodule
# 查看模块最新版本
M.hellomodule.m_latest_version
# 查看某版本的模块
M.hellomodule.v1
# 查看模块文档
print(M.hellomodule.v1.m_doc)
M.hellomodule.v1.m_help()
# 查看模块源代码 (部分模块已经开放了源代码)
print(M.hellomodule.v1.m_sourcecode)


使用模块
def foo1(name):
    print('hello, %s' % name)
    df = pd.DataFrame({'a': range(0, 10)})
    ds = DataSource.write_df(df)
    return Outputs(bar=ds)

# 第一次运行的时候,会执行foo
m1 = M.cached.v2(run=foo1, kwargs=dict(name='quant'))


使用全局变量
bar = '1234'

def foo2(name):
    print('hello, %s, %s' % (name, bar))
    df = pd.DataFrame({'a': range(0, 10)})
    ds = DataSource.write_df(df)
    return Outputs(bar=ds)

# 第一次运行的时候,会执行foo
m1 = M.cached.v2(run=foo2, kwargs=dict(name='quant'), m_deps=[bar])


禁用缓存,m_cached=False
# 第一次运行的时候,会执行foo
m1 = M.cached.v2(run=foo1, kwargs=dict(name='quant'), m_cached=False)


获取指定时间段内某个交易市场的交易日历
D.trading_days(market='CN',start_date='2015-01-01', end_date='2016-01-01')


获取指定时间段内有效的股票代码
D.instruments(start_date='2015-01-01', end_date='2016-01-01', market='CN_STOCK_A')


获取指定时间段内的基金代码
D.instruments(start_date='2015-01-01', end_date='2016-01-01', market='CN_FUND')


获取交易数据
D.history_data(instruments=['000001.SZA'], start_date='2017-01-01', end_date='2017-01-07',
               fields=['open', 'high', 'low', 'close'])
          

获取指定行业的数据
instruments = D.instruments()   #获取所有股票列表
df = D.history_data(instruments, start_date='2017-05-19', end_date='2017-05-19',
                    fields=['industry_sw_level1'])  #获取一天历史数据
df[df['industry_sw_level1']== 710000]   #获取计算机行业的股票数据


获取指数成分股
instruments = D.instruments()   #获取股票列表
df = D.history_data(instruments, start_date='2017-05-19', end_date='2017-05-19',
                    fields=['in_csi300'])   #获取一天历史数据
list(df[df['in_csi300']== 1]['instrument']) #沪深300指数成份股


获取板块名称列表
instruments = D.instruments()   #获取股票列表
df = D.history_data(instruments, start_date='2017-05-19', end_date='2017-05-19',
                    fields=['concept']) #获取一天历史数据
set([y for x in df['concept'] if x for y in x.split(';')])  #获取一天包含的所有板块名称


获取指定板块数据
instruments = D.instruments()   #获取股票列表
df = D.history_data(instruments, start_date='2017-05-19', end_date='2017-05-19',
                    fields=['concept'])     #获取一天历史数据
df[df['concept'].apply(lambda x:'智能家居' in x.split(';')
                       if x else False)] #获取智能家居概念包含的股票


获取沪深300的数据:
D.history_data(instruments=['000300.SHA'], start_date='2017-01-01', end_date='2017-01-07',
               fields=['open', 'high', 'low', 'close', 'volume', 'amount'])
               

获取财报数据
D.financial_statements(instruments=['000001.SZA'], start_date='2010-01-01', end_date='2017-01-07',
                       fields=['date', 'instrument', 'fs_publish_date', 'fs_quarter', 'fs_eps', 'fs_roe'])
                       

获取宏观经济数据
D.macro_data(start_date='2015-01-01', end_date='2017-07-01', fields=['gdp','ppi'])


获取特征数据
D.features(instruments=['000001.SZA'], start_date='2017-01-01', end_date='2017-01-07', fields=['close_1'])


绘制基于Highcharts的可交互图表
T.plot(df[['close']], title='收盘价', chart_type='line')


写入一个DataFrame
def foo1():
    df = pd.DataFrame({'abc': range(0, 10)})
    ds = DataSource.write_df(df)
    return Outputs(data=ds)

# 使用 M.cached 实现 DataSource 复用
m1 = M.cached.v2(run=foo1)
print(m1)


读取一个DataFrame
m1.data.read_df().head()


写入多个DataFrame
def foo2():
    ds = DataSource()

    df_store = ds.open_df_store()
    df_store['df_a'] = pd.DataFrame({'abc': range(0, 10)})
    df_store['df_b'] = pd.DataFrame({'def': range(0, 5)})
    ds.close_df_store()

    return Outputs(data=ds)

# 使用 M.cached 实现 DataSource 复用
m2 = M.cached.v2(run=foo2)
print(m2)


读取多个DataFrame
df_store = m2.data.open_df_store()
print(df_store['df_a'])
print(df_store['df_b'])
m2.data.close_df_store()

遍历读取多个DataFrame
for key, df in m2.data.iter_df():
    print(key)
    print(df)


写入任意数据
def foo3():
    ds = DataSource()
    writer = ds.open_file()
    writer.write('Hello, BigQuant')
    ds.close_file()

    return Outputs(data=ds)

# 使用 M.cached 实现 DataSource 复用
m3 = M.cached.v2(run=foo3)
print(m3)


读取任意数据
reader = m3.data.open_file()
text = reader.read()
m3.data.close_file()
print(text)


数据标注
# 查看最新版本和定义
M.fast_auto_labeler.m_latest_version

# 类别标注
label_expr = [
    # 将百分比收益乘以100
    'return * 100',
    # where 将分数限定在[-20, 20]区间,+20将分数调整到 [0, 40] 区间
    'where(label > {0}, {0}, where(label < -{0}, -{0}, label)) + {0}'.format(20)
]
m = M.fast_auto_labeler.v6(
    instruments=['000001.SZA', '600519.SHA'], start_date='2017-01-01', end_date='2017-02-01',
    label_expr=label_expr, hold_days=5,
    benchmark='000300.SHA', sell_at='open', buy_at='open')

# 查看标结果
m.label_counts #显示标记结果

m.data.read_df().head() # label 列(类型:整数),表示最终标注分数/标签

m.plot_label_counts() #显示标记结果直方图


回归标注
label_expr = [
    # 将百分比收益乘以100
    'return * 10',
    # where 将分数限定在[-20, 20]区间,+20将分数调整到 [0, 40] 区间
    'where(label > {0}, {0}, where(label < -{0}, -{0}, label)) + {0}'.format(1)
]
m = M.fast_auto_labeler.v6(
    instruments=['000001.SZA', '600519.SHA'], start_date='2012-01-01', end_date='2017-02-01',
    label_expr=label_expr, hold_days=5,
    benchmark='000300.SHA', sell_at='open', buy_at='open', is_regression=True)
    

高级数据标注
conf.label_expr = [
    # 计算收益:5日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)
    'shift(close, -5) / shift(open, -1) - shift(benchmark_close, -5) / shift(benchmark_open, -1)',
    # 极值处理:用1%和99%分位的值做clip
    'clip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))',
    # 将分数映射到分类,这里使用20个分类
    'all_wbins(label, 20)',
    # 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)
    'where(shift(high, -1) == shift(low, -1), NaN, label)'
]

m1 = M.advanced_auto_labeler.v1(
    instruments=conf.instruments, start_date=conf.start_date, end_date=conf.split_date,
    label_expr=conf.label_expr, benchmark='000300.SHA')


基础特征(因子)抽取
# 查看最新版本和定义
M.general_feature_extractor.m_latest_version

# 抽取指定特征
m = M.general_feature_extractor.v5(
    instruments=['000001.SZA', '600519.SHA'],
    start_date='2017-01-01', end_date='2017-02-01',
    features=['close_5/close_0', 'close_10/close_0', 'close_20/close_0'])
    
# 结果显示
m.data.read_df().head()

# 为给定模型抽取特征数据
m = M.general_feature_extractor.v5(
    instruments=['000001.SZA', '600519.SHA'],
    start_date='2017-01-01', end_date='2017-02-01',
    model_id=m5.model_id)    # m5 = M.stock_ranker_train.v2()


衍生特征提取
features = [
    'return_5',  # 5日收益
    'return_10',  # 10日收益
    'return_20',  # 20日收益
    'avg_amount_0/avg_amount_5',  # 当日/5日平均交易额
    'avg_amount_5/avg_amount_20',  # 5日/20日平均交易额
]

# 抽取基础特征,比如 return_5, return_10, ..
m2 = M.general_feature_extractor.v5(
    instruments=conf.instruments,
    start_date=conf.start_date, end_date=conf.split_date,
    features=conf.features)
# 计算衍生特征,比如 avg_amount_0/avg_amount_5 ..
m2_1 = M.derived_feature_extractor.v1(data=m2.data, features=conf.features)
# 再计算一个衍生特征,比如 rank(return_10 / return_20)
m2_2 = M.derived_feature_extractor.v1(
    data=m2.data,
    features=['rank(return_10 / return_20)'])


用户自定义特征抽取
# 产看最新版本和定义
M.user_feature_extractor.m_latest_version

# 为了避免和因子库中的名称混淆,自定义因子名称请以'u_'为前缀
m1 = M.user_feature_extractor.v1(
    instruments=['000001.SZA', '600519.SHA'], start_date='2017-01-01', end_date='2017-02-01',
    history_data_fields=['close', 'open'], look_back_days=30,
    features_by_instrument={
        'u_ma5':lambda x:x.close.rolling(5).mean(),
        'u_opening_gap':lambda x:x.open/x.shift(1).close,
    },
    features_by_date={
        'u_rank_opening_gap':lambda x:x.u_opening_gap.rank(pct=True)
    }
)

# 查看结果
m1.data.read_df().head()

# 如果要将因子输入到stockranker中,需要保证因子的数值不为负且是整数。
m2 = M.transform.v2(
    data=m1.data,
    # stockranker 默认的转换函数,主要是将特征映射到非负整数区间,因为stockranker要求输入特征数据为非负整数
    transforms=T.get_stock_ranker_default_transforms()+
    [
        ('u_opening_gap',lambda x:x*1000),
        ('u_rank_opening_gap',lambda x:x*1000),
        ('.*',None)
    ],
    drop_null=True, # 缺失数据处理,如果某一行有空列,则删除
    astype='int32', # 数据类型转换
    except_columns=['date', 'instrument'], # 跳过的列,不需要处理
    # clip最后的数据,保证输入落到如下区间
    clip_lower=0, clip_upper=200000000)

# m2.data.read_df().head()


数据变换
# 查看最新版本和定义
M.transform.m_latest_version

# 数据转换
m1 = M.general_feature_extractor.v5(
    instruments=['000001.SZA', '600519.SHA'],
    start_date='2017-01-01', end_date='2017-02-01',
    features=['close_5/close_0', 'close_10/close_0', 'close_20/close_0'])

m2 = M.transform.v2(
    data=m1.data,
    # stockranker 默认的转换函数,主要是将特征映射到非负整数区间,因为stockranker要求输入特征数据为非负整数
    transforms=T.get_stock_ranker_default_transforms(),
    drop_null=True, # 缺失数据处理,如果某一行有空列,则删除
    astype='int32', # 数据类型转换
    except_columns=['date', 'instrument'], # 跳过的列,不需要处理
    # clip最后的数据,保证输入落到如下区间
    clip_lower=0, clip_upper=200000000)

# 查看结果
m2.data.read_df().head()


数据连接
# 查看最新版本和定义
M.join.m_latest_version

# 数据转换
def test_data():
    return Outputs(
        d1 = DataSource.write_df(pd.DataFrame({'a': range(0, 10), 'b': range(0, 10)})),
        d2 = DataSource.write_df(pd.DataFrame({'a': range(0, 20, 2)})),
    )
m1 = M.cached.v2(run=test_data)
m2 = M.join.v2(data1=m1.d1, data2=m1.d2, how='outer', on=['a'], sort=True)
m2.data.read_df()


数据过滤
# 查看最新版本和定义
M.filter.m_latest_version

# 数据过滤
def gen_history_data(instruments, start_date, end_date):
    df = D.history_data(instruments, start_date, end_date, ['close', 'open'])
    return Outputs(data = DataSource.write_df(df))

m1 = M.cached.v2(run=gen_history_data, kwargs={
    'instruments': ['000001.SZA', '600000.SHA'],
    'start_date': '2017-05-01',
    'end_date': '2017-05-20'
})

# 按时间过滤
m2 = M.filter.v2(data=m1.data, expr='"2017-05-08" < date < "2017-05-10"')
m2.data.read_df().head()

# 按价格过滤
m3 = M.filter.v2(data=m1.data, expr='open > 110 and close < 600')
m3.data.read_df().head()


数据增加列
# 查看最新版本和定义
M.add_columns.m_latest_version

# 对特征进行计算
features = ['close_5/close_0', 'close_10/close_0', 'close_20/close_0']
m1 = M.general_feature_extractor.v5(
    instruments=['000001.SZA', '600519.SHA'],
    start_date='2017-01-01', end_date='2017-02-01',
    features=features)

m2 = M.add_columns.v1(data=m1.data, eval_list=features)

# 查看结果
m2.data.read_df().head()


股票排序学习模型
# 基础参数配置
class conf:
    start_date = '2010-01-01'
    end_date='2017-01-01'
    # split_date 之前的数据用于训练,之后的数据用作效果评估
    split_date = '2015-01-01'
    # D.instruments: https://bigquant.com/docs/data_instruments.html
    instruments = D.instruments(start_date, split_date)

    # 机器学习目标标注函数
    # 如下标注函数等价于 min(max((持有期间的收益 * 100), -20), 20) + 20 (后面的M.fast_auto_labeler会做取整操作)
    # 说明:max/min这里将标注分数限定在区间[-20, 20],+20将分数变为非负数 (StockRanker要求标注分数非负整数)
    label_expr = ['return * 100', 'where(label > {0}, {0}, where(label < -{0}, -{0}, label)) + {0}'.format(20)]
    # 持有天数,用于计算label_expr中的return值(收益)
    hold_days = 5

    # 特征 https://bigquant.com/docs/data_features.html,你可以通过表达式构造任何特征
    features = [
        'close_5/close_0',  # 5日收益
        'close_10/close_0',  # 10日收益
        'close_20/close_0',  # 20日收益
        'avg_amount_0/avg_amount_5',  # 当日/5日平均交易额
        'avg_amount_5/avg_amount_20',  # 5日/20日平均交易额
        'rank_avg_amount_0/rank_avg_amount_5',  # 当日/5日平均交易额排名
        'rank_avg_amount_5/rank_avg_amount_10',  # 5日/10日平均交易额排名
        'rank_return_0',  # 当日收益
        'rank_return_5',  # 5日收益
        'rank_return_10',  # 10日收益
        'rank_return_0/rank_return_5',  # 当日/5日收益排名
        'rank_return_5/rank_return_10',  # 5日/10日收益排名
        'pe_ttm_0',  # 市盈率TTM
    ]

# 给数据做标注:给每一行数据(样本)打分,一般分数越高表示越好
m1 = M.fast_auto_labeler.v5(
    instruments=conf.instruments, start_date=conf.start_date, end_date=conf.split_date,
    label_expr=conf.label_expr, hold_days=conf.hold_days,
    benchmark='000300.SHA', sell_at='open', buy_at='open')
# 计算特征数据
m2 = M.general_feature_extractor.v5(
    instruments=conf.instruments, start_date=conf.start_date, end_date=conf.split_date,
    features=conf.features)
# 数据预处理:缺失数据处理,数据规范化,T.get_stock_ranker_default_transforms为StockRanker模型做数据预处理
m3 = M.transform.v2(
    data=m2.data, transforms=T.get_stock_ranker_default_transforms(),
    drop_null=True, astype='int32', except_columns=['date', 'instrument'],
    clip_lower=0, clip_upper=200000000)
# 合并标注和特征数据
m4 = M.join.v2(data1=m1.data, data2=m3.data, on=['date', 'instrument'], sort=True)
# StockRanker机器学习训练
m5 = M.stock_ranker_train.v2(training_ds=m4.data, features=conf.features)


## 量化回测 https://bigquant.com/docs/strategy_backtest.html
# 回测引擎:初始化函数,只执行一次
def initialize(context):
    # 计算预测数据
    # context.start_date / end_date,回测的时候,为trader传入参数;在实盘运行的时候,由系统替换为实盘日期
    n1 = M.general_feature_extractor.v5(
        instruments=D.instruments(start_date=context.start_date, end_date=context.end_date), start_date=context.start_date, end_date=context.end_date,
        model_id=context.options['model_id'])
    n2 = M.transform.v2(
        data=n1.data, transforms=T.get_stock_ranker_default_transforms(),
        drop_null=True, astype='int32', except_columns=['date', 'instrument'],
        clip_lower=0, clip_upper=200000000)
    n3 = M.stock_ranker_predict.v2(model_id=context.options['model_id'], data=n2.data)
    context.ranker_prediction = n3.predictions.read_df()
    # context.instruments:设置回测/实盘运行时需要的股票代码
    context.instruments = n3.instruments

    # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
    context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
    # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)
    # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只
    stock_count = 5
    # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]
    context.stock_weights = T.norm([1 / math.log(i + 2) for i in range(0, stock_count)])
    # 设置每只股票占用的最大资金比例
    context.max_cash_per_instrument = 0.2

# 回测引擎:每日数据处理函数,每天执行一次
def handle_data(context, data):
    # 按日期过滤得到今日的预测数据
    ranker_prediction = context.ranker_prediction[context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]

    # 1. 资金分配
    # 平均持仓时间是hold_days,每日都将买入股票,每日预期使用 1/hold_days 的资金
    # 实际操作中,会存在一定的买入误差,所以在前hold_days天,等量使用资金;之后,尽量使用剩余资金(这里设置最多用等量的1.5倍)
    is_staging = context.trading_day_index < context.options['hold_days'] # 是否在建仓期间(前 hold_days 天)
    cash_avg = context.portfolio.portfolio_value / context.options['hold_days']
    cash_for_buy = min(context.portfolio.cash, (1 if is_staging else 1.5) * cash_avg)
    cash_for_sell = cash_avg - (context.portfolio.cash - cash_for_buy)
    positions = {e.symbol: p.amount * p.last_sale_price
                 for e, p in context.perf_tracker.position_tracker.positions.items()}

    # 2. 生成卖出订单:hold_days天之后才开始卖出;对持仓的股票,按StockRanker预测的排序末位淘汰
    if not is_staging and cash_for_sell > 0:
        equities = {e.symbol: e for e, p in context.perf_tracker.position_tracker.positions.items()}
        instruments = list(reversed(list(ranker_prediction.instrument[ranker_prediction.instrument.apply(
                lambda x: x in equities and not context.has_unfinished_sell_order(equities[x]))])))
        # print('rank order for sell %s' % instruments)
        for instrument in instruments:
            context.order_target(context.symbol(instrument), 0)
            cash_for_sell -= positions[instrument]
            if cash_for_sell <= 0:
                break

    # 3. 生成买入订单:按StockRanker预测的排序,买入前面的stock_count只股票
    buy_cash_weights = context.stock_weights
    buy_instruments = list(ranker_prediction.instrument[:len(buy_cash_weights)])
    max_cash_per_instrument = context.portfolio.portfolio_value * context.max_cash_per_instrument
    for i, instrument in enumerate(buy_instruments):
        cash = cash_for_buy * buy_cash_weights[i]
        if cash > max_cash_per_instrument - positions.get(instrument, 0):
            # 确保股票持仓量不会超过每次股票最大的占用资金量
            cash = max_cash_per_instrument - positions.get(instrument, 0)
        if cash > 0:
            context.order_value(context.symbol(instrument), cash)


# 调用交易引擎
m6 = M.trade.v1(
    instruments=None,
    start_date=conf.split_date,
    end_date=conf.end_date,
    initialize=initialize,
    handle_data=handle_data,
    order_price_field_buy='open',       # 表示 开盘 时买入
    order_price_field_sell='close',     # 表示 收盘 前卖出
    capital_base=1000000,               # 初始资金
    benchmark='000300.SHA',             # 比较基准,不影响回测结果
    # 通过 options 参数传递预测数据和参数给回测引擎
    options={'hold_days': conf.hold_days, 'model_id': m5.model_id}
)


股票排序学习模型(滚动版)
m5 = M.stock_ranker_train_rolling.v2(
    data=m4.data,
    evaluation_start_date=conf.split_date,
    features=conf.features,
    model_update_days=180,
)


随机森林模型
# 基础参数配置
class conf:
    start_date = '2009-01-01'
    end_date='2017-06-21'
    # split_date 之前的数据用于训练,之后的数据用作效果评估
    split_date = '2015-01-01'
    # D.instruments: https://bigquant.com/docs/data_instruments.html
    instruments = D.instruments(start_date, end_date)

    # 机器学习目标标注函数
    # 如下标注函数等价于 min(max((持有期间的收益 * 100), -20), 20) + 20 (后面的M.fast_auto_labeler会做取整操作)
    # 说明:max/min这里将标注分数限定在区间[-20, 20],+20将分数变为非负数 (StockRanker要求标注分数非负整数)
    label_expr = ['return* 30', 'where(label > {0}, {0}, where(label < -{0}, -{0}, label)) + {0}'.format(3)]
    # 持有天数,用于计算label_expr中的return值(收益)
    hold_days = 10
    features = [
        'ta_sma_10_0/ta_sma_20_0',
        'ta_sma_20_0/ta_sma_30_0',
        'ta_sma_30_0/ta_sma_60_0',
        'ta_atr_14_0',
        'ta_atr_28_0',
        'ta_rsi_14_0',
        'ta_rsi_28_0',
    ]


# 给数据做标注:给每一行数据(样本)打分,一般分数越高表示越好
m1 = M.fast_auto_labeler.v6(
    instruments=conf.instruments, start_date=conf.start_date, end_date=conf.end_date,
    label_expr=conf.label_expr, hold_days=conf.hold_days,
    benchmark='000300.SHA', sell_at='close', buy_at='open', is_regression=False)
# 计算特征数据
m2 = M.general_feature_extractor.v5(
    instruments=conf.instruments, start_date=conf.start_date, end_date=conf.end_date,
    features=conf.features)
# 数据预处理:缺失数据处理,数据规范化,T.get_stock_ranker_default_transforms为StockRanker模型做数据预处理
m3=M.add_columns.v1(data=m2.data, eval_list=conf.features)
m4 = M.transform.v2(
    data=m3.data, transforms=None,
    drop_null=True, astype='float32', except_columns=['date', 'instrument'],
    clip_lower=0, clip_upper=200000000)
# 合并标注和特征数据
m5 = M.join.v2(data1=m4.data, data2=m1.data, on=['date', 'instrument'], sort=True)

# 训练数据集
m6_training = M.filter.v2(data=m5.data, expr='date < "%s"' % conf.split_date)
# 评估数据集
m6_evaluation = M.filter.v2(data=m5.data, expr='"%s" <= date' % conf.split_date)

m7 = M.random_forest_train.v1(training_ds=m6_training.data, features=conf.features, is_regression=False,n_jobs=4)

stock_num=40
# 3. 策略主体函数
# 初始化虚拟账户状态,只在第一个交易日运行
def initialize(context):
    # 设置手续费,买入时万3,卖出是千分之1.3,不足5元以5元计
    context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
    m8 = M.random_forest_predict.v1(model=context.options['model'],data=m6_evaluation.data)
    context.pred_df = m8.predictions.read_df()
    context.pred_df = context.pred_df.groupby('date').apply(lambda x:x.sort_values('pred_label',ascending=False))

# 策略交易逻辑,每个交易日运行一次
def handle_data(context,data):
    today = data.current_dt
    today_str=str(today.date())

    equities = {e.symbol: p for e, p in context.portfolio.positions.items() if p.amount>0}

    # 调仓:卖出所有持有股票
    for instrument in equities:
        # 停牌的股票,将不能卖出,将在下一个调仓期处理
        if data.can_trade(context.symbol(instrument)) and today-equities[instrument].last_sale_date>=datetime.timedelta(context.options['rebalance_period']):
            context.order_target_percent(context.symbol(instrument), 0)

    # 调仓:买入新的股票
    if today_str not in context.pred_df.index:
        return
    instruments_to_buy = context.pred_df.ix[today_str].instrument
    if len(instruments_to_buy) == 0:
        return
    # 等量分配资金买入股票
    weight = 1.0 / stock_num
    can_buy_num = stock_num - len(equities)
    for instrument in instruments_to_buy:
        if can_buy_num>0 and data.can_trade(context.symbol(instrument)) and instrument not in equities:
            context.order_target_percent(context.symbol(instrument), weight)
            can_buy_num -= 1

# 4. 策略回测:https://bigquant.com/docs/module_trade.html
m = M.trade.v1(
    instruments=conf.instruments,
    start_date=conf.split_date,
    end_date=conf.end_date,
    initialize=initialize,
    handle_data=handle_data,
    # 买入订单以开盘价成交
    order_price_field_buy='open',
    # 卖出订单以开盘价成交
    order_price_field_sell='close',
    capital_base=1000000,
    benchmark='000300.SHA',
    # 传入数据给回测模块,所有回测函数里用到的数据都要从这里传入,并通过 context.options 使用,否则可能会遇到缓存问题
    options={'rebalance_period': conf.hold_days, 'model':m7.model}
)


线性随机下降模型
# 基础参数配置
class conf:
    start_date = '2009-01-01'
    end_date='2017-06-21'
    # split_date 之前的数据用于训练,之后的数据用作效果评估
    split_date = '2015-01-01'
    # D.instruments: https://bigquant.com/docs/data_instruments.html
    instruments = D.instruments(start_date, end_date)

    # 机器学习目标标注函数
    # 如下标注函数等价于 min(max((持有期间的收益 * 100), -20), 20) + 20 (后面的M.fast_auto_labeler会做取整操作)
    # 说明:max/min这里将标注分数限定在区间[-20, 20],+20将分数变为非负数 (StockRanker要求标注分数非负整数)
    label_expr = ['return* 30', 'where(label > {0}, {0}, where(label < -{0}, -{0}, label)) + {0}'.format(1)]
    # 持有天数,用于计算label_expr中的return值(收益)
    hold_days = 10
    features = [
        'ta_sma_10_0/ta_sma_20_0',
        'ta_sma_20_0/ta_sma_30_0',
        'ta_sma_30_0/ta_sma_60_0',
        'ta_atr_14_0',
        'ta_atr_28_0',
        'ta_rsi_14_0',
        'ta_rsi_28_0',
    ]


# 给数据做标注:给每一行数据(样本)打分,一般分数越高表示越好
m1 = M.fast_auto_labeler.v6(
    instruments=conf.instruments, start_date=conf.start_date, end_date=conf.end_date,
    label_expr=conf.label_expr, hold_days=conf.hold_days,
    benchmark='000300.SHA', sell_at='close', buy_at='open', is_regression=False)

# 计算特征数据
m2 = M.general_feature_extractor.v5(
    instruments=conf.instruments, start_date=conf.start_date, end_date=conf.end_date,
    features=conf.features)

# 数据预处理:缺失数据处理,数据规范化,T.get_stock_ranker_default_transforms为StockRanker模型做数据预处理
m3=M.add_columns.v1(data=m2.data, eval_list=conf.features)
m4 = M.transform.v2(
    data=m3.data, transforms=None,
    drop_null=True, astype='float32', except_columns=['date', 'instrument'],
    clip_lower=0, clip_upper=200000000)
    
# 合并标注和特征数据
m5 = M.join.v2(data1=m4.data, data2=m1.data, on=['date', 'instrument'], sort=True)

# 训练数据集
m6_training = M.filter.v2(data=m5.data, expr='date < "%s"' % conf.split_date)

# 评估数据集
m6_evaluation = M.filter.v2(data=m5.data, expr='"%s" <= date' % conf.split_date)

m7 = M.linear_sgd_train.v1(training_ds=m6_training.data, features=conf.features, is_regression=False)

stock_num=40

# 3. 策略主体函数
# 初始化虚拟账户状态,只在第一个交易日运行
def initialize(context):
    # 设置手续费,买入时万3,卖出是千分之1.3,不足5元以5元计
    context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
    m8 = M.linear_sgd_predict.v1(model=context.options['model'],data=m6_evaluation.data)
    context.pred_df = m8.predictions.read_df()
    context.pred_df = context.pred_df.groupby('date').apply(lambda x:x.sort_values('pred_label',ascending=False))

# 策略交易逻辑,每个交易日运行一次
def handle_data(context,data):
    today = data.current_dt
    today_str=str(today.date())

    equities = {e.symbol: p for e, p in context.portfolio.positions.items() if p.amount>0}

    # 调仓:卖出所有持有股票
    for instrument in equities:
        # 停牌的股票,将不能卖出,将在下一个调仓期处理
        if data.can_trade(context.symbol(instrument)) and today-equities[instrument].last_sale_date>=datetime.timedelta(context.options['rebalance_period']):
            context.order_target_percent(context.symbol(instrument), 0)

    # 调仓:买入新的股票
    if today_str not in context.pred_df.index:
        return
    instruments_to_buy = context.pred_df.ix[today_str].instrument
    if len(instruments_to_buy) == 0:
        return
    # 等量分配资金买入股票
    weight = 1.0 / stock_num
    can_buy_num = stock_num - len(equities)
    for instrument in instruments_to_buy:
        if can_buy_num>0 and data.can_trade(context.symbol(instrument)) and instrument not in equities:
            context.order_target_percent(context.symbol(instrument), weight)
            can_buy_num -= 1

# 4. 策略回测:https://bigquant.com/docs/module_trade.html
m = M.trade.v1(
    instruments=conf.instruments,
    start_date=conf.split_date,
    end_date=conf.end_date,
    initialize=initialize,
    handle_data=handle_data,
    # 买入订单以开盘价成交
    order_price_field_buy='open',
    # 卖出订单以开盘价成交
    order_price_field_sell='close',
    capital_base=1000000,
    benchmark='000300.SHA',
    # 传入数据给回测模块,所有回测函数里用到的数据都要从这里传入,并通过 context.options 使用,否则可能会遇到缓存问题
    options={'rebalance_period': conf.hold_days, 'model':m7.model},
)


交易引擎
# 1. 策略基本参数

# 证券池:这里使用所有股票
instruments = D.instruments()
# 起始日期
start_date = '2016-01-01'
# 结束日期
end_date = '2017-02-28'
# 初始资金
capital_base = 100000
# 策略比较参考标准,以沪深300为例
benchmark = '000300.INDX'
# 调仓周期(多少个交易日调仓)
rebalance_period = 22
# 每轮调仓买入的股票数量
stock_num = 30


# 2. 选择股票:为了得到更好的性能,在这里做批量计算
# 本样例策略逻辑:选取调仓当天,交易额最小的30只股票买入
# 加载数据:https://bigquant.com/docs/data_history_data.html
history_data = D.history_data(instruments, start_date, end_date, fields=['amount'])
# 过滤掉停牌股票:amount为0的数据
selected_data = history_data[history_data.amount > 0]
# 按天做聚合(groupby),对于每一天的数据,做(apply)按交易额升序排列(sort_values),并选取前30只([:stock_num])
selected_data = selected_data.groupby('date').apply(lambda df: df.sort_values('amount')[:stock_num])


# 3. 策略主体函数
# 初始化虚拟账户状态,只在第一个交易日运行
def initialize(context):
    # 设置手续费,买入时万3,卖出是千分之1.3,不足5元以5元计
    set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))

# 策略交易逻辑,每个交易日运行一次
def handle_data(context,data):
    today = data.current_dt.strftime('%Y-%m-%d') # 交易日期
    # context.trading_day_index:交易日序号,第一个交易日为0
    if context.trading_day_index % context.options['rebalance_period'] != 0:
        return

    # 调仓:卖出所有持有股票
    for equity in context.portfolio.positions:
        # 停牌的股票,将不能卖出,将在下一个调仓期处理
        if data.can_trade(equity):
            order_target_percent(equity, 0)

    # 调仓:买入新的股票
    instruments_to_buy = context.options['selected_data'].ix[today].instrument
    if len(instruments_to_buy) == 0:
        return
    # 等量分配资金买入股票
    weight = 1.0 / len(instruments_to_buy)
    for instrument in instruments_to_buy:
        if data.can_trade(symbol(instrument)):
            order_target_percent(symbol(instrument), weight)

# 4. 策略回测:https://bigquant.com/docs/strategy_backtest.html
m = M.trade.v1(
    instruments=instruments,
    start_date=start_date,
    end_date=end_date,
    initialize=initialize,
    handle_data=handle_data,
    # 买入订单以开盘价成交
    order_price_field_buy='open',
    # 卖出订单以开盘价成交
    order_price_field_sell='open',
    capital_base=capital_base,
    benchmark=benchmark,
    # 传入数据给回测模块,所有回测函数里用到的数据都要从这里传入,并通过 context.options 使用,否则可能会遇到缓存问题
    options={'selected_data': selected_data, 'rebalance_period': rebalance_period}
)

请问哪里有D.instruments()使用方法详解
BigQuant常见问题和经验整理合计(1.0版本)
(ylky_2000) #2

好的不错。


(Tinus) #3

好文收藏