【其他】提问:基础特征抽取 (v7)日期不匹配问题
由bq2sr4ov创建,最终由small_q 被浏览 20 用户
下面的m16是基础特征抽取 (v7),我想了解一下这个数据输出的格式是如何定义的。为什么前面几行是有0,0 1,1, 2,2这样的index的,但最后几行是连续的
猜测前几行和输入参数before_start_days有关,但是参数设置before_start_days为90天时,打印出来的数据中2018-11-13到2019-01-02之间似乎只有60天左右。
下面是策略的代码:
# 本代码由可视化策略环境自动生成 2024年1月8日 14:47
# 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
# 显式导入 BigQuant 相关 SDK 模块
from bigdatasource.api import DataSource
from bigdata.api.datareader import D
from biglearning.api import M
from biglearning.api import tools as T
from biglearning.module2.common.data import Outputs
import pandas as pd
import numpy as np
import math
import warnings
import datetime
from zipline.finance.commission import PerOrder
from zipline.api import get_open_orders
from zipline.api import symbol
from bigtrader.sdk import *
from bigtrader.utils.my_collections import NumPyDeque
from bigtrader.constant import OrderType
from bigtrader.constant import Direction
# <aistudiograph>
# @param(id="m6", name="custom_objects")
# 用户的自定义层需要写到字典中,比如
# {
# "MyLayer": MyLayer
# }
m6_custom_objects_bigquant_run = {
}
# @param(id="m2", name="run")
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m2_run_bigquant_run(input_1, input_2, input_3):
test_data = input_2.read_pickle()
pred_label = input_1.read_pickle()
pred_result = pred_label.reshape(pred_label.shape[0])
dt = input_3.read_df()['date'][-1*len(pred_result):]
pred_df = pd.Series(pred_result, index=dt)
ds = DataSource.write_df(pred_df)
return Outputs(data_1=ds)
# @param(id="m2", name="post_run")
# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
def m2_post_run_bigquant_run(outputs):
return outputs
# @param(id="m1", name="initialize")
# 回测引擎:初始化函数,只执行一次
def m1_initialize_bigquant_run(context):
# 加载预测数据
context.prediction = context.options['data'].read_df()
# 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
# @param(id="m1", name="handle_data")
# 回测引擎:每日数据处理函数,每天执行一次
def m1_handle_data_bigquant_run(context, data):
# 按日期过滤得到今日的预测数据
try:
prediction = context.prediction[data.current_dt.strftime('%Y-%m-%d')]
except KeyError as e:
return
instrument = context.instruments[0]
sid = context.symbol(instrument)
cur_position = context.portfolio.positions[sid].amount
# 交易逻辑
if prediction > 0.5 and cur_position == 0:
context.order_target_percent(context.symbol(instrument), 1)
print(data.current_dt, '买入!')
elif prediction < 0.5 and cur_position > 0:
context.order_target_percent(context.symbol(instrument), 0)
print(data.current_dt, '卖出!')
# @param(id="m1", name="prepare")
# 回测引擎:准备数据,只执行一次
def m1_prepare_bigquant_run(context):
pass
# @param(id="m1", name="before_trading_start")
# 回测引擎:每个单位时间开始前调用一次,即每日开盘前调用一次。
def m1_before_trading_start_bigquant_run(context, data):
pass
# @module(position="346,-367", comment='', comment_collapsed=True)
m3 = M.dl_layer_input.v1(
shape='50,5',
batch_shape='',
dtype='float32',
sparse=False,
name=''
)
# @module(position="279,-194", comment='', comment_collapsed=True)
m13 = M.dl_layer_reshape.v1(
inputs=m3.data,
target_shape='50,5,1',
name=''
)
# @module(position="280,-107", comment='', comment_collapsed=True)
m14 = M.dl_layer_conv2d.v1(
inputs=m13.data,
filters=32,
kernel_size='3,5',
strides='1,1',
padding='valid',
data_format='channels_last',
dilation_rate='1,1',
activation='relu',
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='Zeros',
kernel_regularizer='None',
kernel_regularizer_l1=0,
kernel_regularizer_l2=0,
bias_regularizer='None',
bias_regularizer_l1=0,
bias_regularizer_l2=0,
activity_regularizer='None',
activity_regularizer_l1=0,
activity_regularizer_l2=0,
kernel_constraint='None',
bias_constraint='None',
name=''
)
# @module(position="275,-29", comment='', comment_collapsed=True)
m19 = M.dl_layer_reshape.v1(
inputs=m14.data,
target_shape='1536',
name=''
)
# @module(position="276,57", comment='', comment_collapsed=True)
m29 = M.dl_layer_dense.v1(
inputs=m19.data,
units=128,
activation='tanh',
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='Zeros',
kernel_regularizer='None',
kernel_regularizer_l1=0,
kernel_regularizer_l2=0,
bias_regularizer='None',
bias_regularizer_l1=0,
bias_regularizer_l2=0,
activity_regularizer='None',
activity_regularizer_l1=0,
activity_regularizer_l2=0,
kernel_constraint='None',
bias_constraint='None',
name=''
)
# @module(position="279,146", comment='', comment_collapsed=True)
m11 = M.dl_layer_dropout.v1(
inputs=m29.data,
rate=0.4,
noise_shape='',
name=''
)
# @module(position="279,218", comment='', comment_collapsed=True)
m10 = M.dl_layer_dense.v1(
inputs=m11.data,
units=32,
activation='tanh',
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='Zeros',
kernel_regularizer='None',
kernel_regularizer_l1=0,
kernel_regularizer_l2=0,
bias_regularizer='None',
bias_regularizer_l1=0,
bias_regularizer_l2=0,
activity_regularizer='None',
activity_regularizer_l1=0,
activity_regularizer_l2=0,
kernel_constraint='None',
bias_constraint='None',
name=''
)
# @module(position="282,301", comment='', comment_collapsed=True)
m12 = M.dl_layer_dropout.v1(
inputs=m10.data,
rate=0.8,
noise_shape='',
name=''
)
# @module(position="281,387", comment='', comment_collapsed=True)
m9 = M.dl_layer_dense.v1(
inputs=m12.data,
units=1,
activation='sigmoid',
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='Zeros',
kernel_regularizer='None',
kernel_regularizer_l1=0,
kernel_regularizer_l2=0,
bias_regularizer='None',
bias_regularizer_l1=0,
bias_regularizer_l2=0,
activity_regularizer='None',
activity_regularizer_l1=0,
activity_regularizer_l2=0,
kernel_constraint='None',
bias_constraint='None',
name=''
)
# @module(position="474,466", comment='', comment_collapsed=True)
m5 = M.dl_model_init.v1(
inputs=m3.data,
outputs=m9.data
)
# @module(position="1063,-225", comment='', comment_collapsed=True)
m8 = M.input_features.v1(
features="""(close_0/close_1-1)*10
(high_0/high_1-1)*10
(low_0/low_1-1)*10
(open_0/open_1-1)*10
(volume_0/volume_1-1)*10"""
)
# @module(position="708,-325", comment='', comment_collapsed=True)
m24 = M.instruments.v2(
start_date='2017-06-02',
end_date='2017-10-30',
market='CN_STOCK_A',
instrument_list='600009.SHA',
max_count=0
)
# @module(position="561,-48", comment='', comment_collapsed=True)
m21 = M.advanced_auto_labeler.v2(
instruments=m24.data,
label_expr="""# #号开始的表示注释
# 0. 每行一个,顺序执行,从第二个开始,可以使用label字段
# 1. 可用数据字段见 https://bigquant.com/docs/develop/datasource/deprecated/history_data.html
# 添加benchmark_前缀,可使用对应的benchmark数据
# 2. 可用操作符和函数见 `表达式引擎 <https://bigquant.com/docs/develop/bigexpr/usage.html>`_
# 计算收益:5日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)
where(shift(close, -10) / close -1>0,1,0)
# 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)
where(shift(high, -1) == shift(low, -1), NaN, label)
""",
start_date='',
end_date='',
benchmark='000300.SHA',
drop_na_label=True,
cast_label_int=True,
user_functions={}
)
# @module(position="860,-82", comment='', comment_collapsed=True)
m22 = M.general_feature_extractor.v7(
instruments=m24.data,
features=m8.data,
start_date='',
end_date='',
before_start_days=90
)
# @module(position="871,10", comment='', comment_collapsed=True)
m23 = M.derived_feature_extractor.v3(
input_data=m22.data,
features=m8.data,
date_col='date',
instrument_col='instrument',
drop_na=False,
remove_extra_columns=False,
user_functions={}
)
# @module(position="735,138", comment='标注特征连接', comment_collapsed=False)
m17 = M.join.v3(
data1=m21.data,
data2=m23.data,
on='date',
how='inner',
sort=True
)
# @module(position="725,247", comment='去掉为nan的数据', comment_collapsed=True)
m18 = M.dropnan.v1(
input_data=m17.data
)
# @module(position="753,329", comment='', comment_collapsed=True)
m25 = M.dl_convert_to_bin.v2(
input_data=m18.data,
features=m8.data,
window_size=50,
feature_clip=5,
flatten=False,
window_along_col=''
)
# @module(position="633,537", comment='', comment_collapsed=True)
m6 = M.dl_model_train.v1(
input_model=m5.data,
training_data=m25.data,
optimizer='Adam',
loss='binary_crossentropy',
metrics='accuracy',
batch_size=2048,
epochs=10,
custom_objects=m6_custom_objects_bigquant_run,
n_gpus=1,
verbose='1:输出进度条记录'
)
# @module(position="1257,-312", comment='', comment_collapsed=True)
m28 = M.instruments.v2(
start_date=T.live_run_param('trading_date', '2019-02-11'),
end_date=T.live_run_param('trading_date', '2019-08-01'),
market='CN_STOCK_A',
instrument_list='600009.SHA',
max_count=0
)
# @module(position="1229,-86", comment='', comment_collapsed=True)
m16 = M.general_feature_extractor.v7(
instruments=m28.data,
features=m8.data,
start_date='',
end_date='',
before_start_days=90
)
\