df_300 = D.history_data(D.instruments(), '2019-03-01', '2019-03-01', ['in_csi300'])
stock_list = df_300[df_300['in_csi300']==1]['instrument'].unique().tolist()
len(stock_list)
m1 = M.instruments.v2(
start_date='2012-03-01',
end_date='2015-06-01',
market='CN_STOCK_A',
instrument_list=stock_list,
max_count=0
)
[2019-04-29 21:41:43.066870] INFO: bigquant: instruments.v2 开始运行..
[2019-04-29 21:41:43.204275] INFO: bigquant: instruments.v2 运行完成[0.137414s].
m2 = M.advanced_auto_labeler.v2(
instruments=m1.data,
label_expr="""
shift(close, -20) / shift(open, -1)
clip(label, all_quantile(label, 0.05), all_quantile(label, 0.95))
all_wbins(label, 20)
where(shift(high, -1) == shift(low, -1), NaN, label)
""",
start_date='',
end_date='',
benchmark='000300.SHA',
drop_na_label=True,
cast_label_int=True
)
# m2.data.read_df().tail()
# m2.plot_label_counts()
[2019-04-29 22:40:25.283766] INFO: bigquant: advanced_auto_labeler.v2 开始运行..
[2019-04-29 22:40:28.662532] INFO: 自动标注(股票): 加载历史数据: 194932 行
[2019-04-29 22:40:28.665049] INFO: 自动标注(股票): 开始标注 ..
[2019-04-29 22:40:29.910023] INFO: bigquant: advanced_auto_labeler.v2 运行完成[4.626259s].
m3 = M.input_features.v1(
features="""
rank_return_5/rank_return_10
"""
)
m4 = M.general_feature_extractor.v7(
instruments=m1.data,
features=m3.data,
start_date='',
end_date='',
before_start_days=120,
)
m5 = M.derived_feature_extractor.v3(
input_data=m4.data,
features=m3.data,
date_col='date',
instrument_col='instrument',
drop_na=False,
remove_extra_columns=False
)
m7 = M.join.v3(
data1=m2.data,
data2=m5.data,
on='date,instrument',
how='inner',
sort=False
)
m8 = M.dropnan.v1(input_data=m7.data)
# m50 = M.filter.v3(
# input_data=m8.data,
# expr='list_days_0 >=120',
# output_left_data=False
# )
[2019-04-29 22:40:38.130978] INFO: bigquant: input_features.v1 开始运行..
[2019-04-29 22:40:38.164403] INFO: bigquant: 命中缓存
[2019-04-29 22:40:38.166390] INFO: bigquant: input_features.v1 运行完成[0.035358s].
[2019-04-29 22:40:38.208780] INFO: bigquant: general_feature_extractor.v7 开始运行..
[2019-04-29 22:40:38.239189] INFO: bigquant: 命中缓存
[2019-04-29 22:40:38.241155] INFO: bigquant: general_feature_extractor.v7 运行完成[0.032379s].
[2019-04-29 22:40:38.243984] INFO: bigquant: derived_feature_extractor.v3 开始运行..
[2019-04-29 22:40:38.285432] INFO: bigquant: 命中缓存
[2019-04-29 22:40:38.289200] INFO: bigquant: derived_feature_extractor.v3 运行完成[0.04519s].
[2019-04-29 22:40:38.293408] INFO: bigquant: join.v3 开始运行..
[2019-04-29 22:40:38.570245] INFO: join: /y_2011, 行数=0/10265, 耗时=0.060499s
[2019-04-29 22:40:38.662420] INFO: join: /y_2012, 行数=51364/60079, 耗时=0.090054s
[2019-04-29 22:40:38.786187] INFO: join: /y_2013, 行数=58928/58999, 耗时=0.120179s
[2019-04-29 22:40:38.900613] INFO: join: /y_2014, 行数=60035/60222, 耗时=0.110757s
[2019-04-29 22:40:38.971465] INFO: join: /y_2015, 行数=18887/24322, 耗时=0.067952s
[2019-04-29 22:40:39.394357] INFO: join: 最终行数: 189214
[2019-04-29 22:40:39.397797] INFO: bigquant: join.v3 运行完成[1.104384s].
[2019-04-29 22:40:39.401790] INFO: bigquant: dropnan.v1 开始运行..
[2019-04-29 22:40:39.522877] INFO: dropnan: /y_2011, 0/0
[2019-04-29 22:40:39.595974] INFO: dropnan: /y_2012, 51298/51364
[2019-04-29 22:40:39.672153] INFO: dropnan: /y_2013, 58908/58928
[2019-04-29 22:40:39.750428] INFO: dropnan: /y_2014, 59993/60035
[2019-04-29 22:40:39.802037] INFO: dropnan: /y_2015, 18859/18887
[2019-04-29 22:40:39.927139] INFO: dropnan: 行数: 189058/189214
[2019-04-29 22:40:39.931013] INFO: bigquant: dropnan.v1 运行完成[0.529212s].
m10 = M.instruments.v2(
start_date='2015-06-02',
end_date='2017-03-01',
market='CN_STOCK_A',
instrument_list=stock_list,
max_count=0
)
m11 = M.general_feature_extractor.v7(
instruments=m10.data,
features=m3.data,
start_date='',
end_date='',
before_start_days=120
)
m12 = M.derived_feature_extractor.v3(
input_data=m11.data,
features=m3.data,
date_col='date',
instrument_col='instrument',
drop_na=False,
remove_extra_columns=False
)
m13 = M.dropnan.v1(input_data=m12.data)
[2019-04-29 22:40:48.587232] INFO: bigquant: instruments.v2 开始运行..
[2019-04-29 22:40:48.682126] INFO: bigquant: 命中缓存
[2019-04-29 22:40:48.684927] INFO: bigquant: instruments.v2 运行完成[0.097674s].
[2019-04-29 22:40:48.720773] INFO: bigquant: general_feature_extractor.v7 开始运行..
[2019-04-29 22:40:49.600813] INFO: 基础特征抽取: 年份 2015, 特征行数=54613
[2019-04-29 22:40:50.288109] INFO: 基础特征抽取: 年份 2016, 特征行数=63786
[2019-04-29 22:40:54.218678] INFO: 基础特征抽取: 年份 2017, 特征行数=10224
[2019-04-29 22:40:54.315474] INFO: 基础特征抽取: 总行数: 128623
[2019-04-29 22:40:54.319978] INFO: bigquant: general_feature_extractor.v7 运行完成[5.599192s].
[2019-04-29 22:40:54.324496] INFO: bigquant: derived_feature_extractor.v3 开始运行..
[2019-04-29 22:40:54.488027] INFO: general_feature_extractor: 提取完成 rank_return_5/rank_return_10, 0.009s
[2019-04-29 22:40:54.561598] INFO: general_feature_extractor: /y_2015, 54613
[2019-04-29 22:40:54.797047] INFO: general_feature_extractor: /y_2016, 63786
[2019-04-29 22:40:54.869449] INFO: general_feature_extractor: /y_2017, 10224
[2019-04-29 22:40:54.994072] INFO: bigquant: derived_feature_extractor.v3 运行完成[0.669556s].
[2019-04-29 22:40:54.999044] INFO: bigquant: dropnan.v1 开始运行..
[2019-04-29 22:40:55.161365] INFO: dropnan: /y_2015, 54516/54613
[2019-04-29 22:40:55.254977] INFO: dropnan: /y_2016, 63656/63786
[2019-04-29 22:40:55.300053] INFO: dropnan: /y_2017, 10202/10224
[2019-04-29 22:40:55.390661] INFO: dropnan: 行数: 128374/128623
[2019-04-29 22:40:55.398091] INFO: bigquant: dropnan.v1 运行完成[0.399021s].
m9 = M.stock_ranker_train.v5(
training_ds=m8.data,
features=m3.data,
test_ds=m13.data,
learning_algorithm='排序',
number_of_leaves=32,
minimum_docs_per_leaf=1024,
number_of_trees=10,
learning_rate=0.01,
max_bins=512,
feature_fraction=1,
m_lazy_run=False
)
# m9.plot_model()
# m9.feature_gains.read_df()
# m9.ndcg.read_df()
[2019-04-29 22:41:01.079014] INFO: bigquant: stock_ranker_train.v5 开始运行..
[2019-04-29 22:41:01.556909] ERROR: bigquant: module name: cached, module version: v2, trackeback: Traceback (most recent call last): TypeError: unorderable types: NoneType() < int()
[2019-04-29 22:41:01.722725] ERROR: bigquant: module name: stock_ranker_train, module version: v5, trackeback: Traceback (most recent call last): TypeError: unorderable types: NoneType() < int()
m9.ndcg.read_df()
m15 = M.stock_ranker_predict.v5(
model=m9.model,
data=m13.data,
m_lazy_run=False
)
[2019-04-29 22:03:17.078941] INFO: bigquant: stock_ranker_predict.v5 开始运行..
[2019-04-29 22:03:18.940930] INFO: StockRanker预测: /y_2015 ..
[2019-04-29 22:03:19.307737] INFO: StockRanker预测: /y_2016 ..
[2019-04-29 22:03:19.669848] INFO: StockRanker预测: /y_2017 ..
[2019-04-29 22:03:20.375913] INFO: bigquant: stock_ranker_predict.v5 运行完成[3.296971s].
m9.ndcg.read_df()