克隆策略
In [2]:
df_300 = D.history_data(D.instruments(), '2019-03-01', '2019-03-01', ['in_csi300'])
stock_list = df_300[df_300['in_csi300']==1]['instrument'].unique().tolist()
len(stock_list)
Out[2]:
300
In [14]:
m1 = M.instruments.v2(
    start_date='2012-03-01',
    end_date='2015-06-01',
    market='CN_STOCK_A',
    instrument_list=stock_list,
    max_count=0
)
In [54]:
m2 = M.advanced_auto_labeler.v2(
    instruments=m1.data,
    label_expr="""
        shift(close, -20) / shift(open, -1)
        clip(label, all_quantile(label, 0.05), all_quantile(label, 0.95))
        all_wbins(label, 20)
        where(shift(high, -1) == shift(low, -1), NaN, label)
    """,
    start_date='',
    end_date='',
    benchmark='000300.SHA',
    drop_na_label=True,
    cast_label_int=True
)

# m2.data.read_df().tail()
# m2.plot_label_counts()
In [55]:
m3 = M.input_features.v1(
    features="""
       rank_return_5/rank_return_10
    """
)

m4 = M.general_feature_extractor.v7(
    instruments=m1.data,
    features=m3.data,
    start_date='',
    end_date='',
    before_start_days=120,
)

m5 = M.derived_feature_extractor.v3(
    input_data=m4.data,
    features=m3.data,
    date_col='date',
    instrument_col='instrument',
    drop_na=False,
    remove_extra_columns=False
)

m7 = M.join.v3(
    data1=m2.data,
    data2=m5.data,
    on='date,instrument',
    how='inner',
    sort=False
)

m8 = M.dropnan.v1(input_data=m7.data)


# m50 = M.filter.v3(
#     input_data=m8.data,
#     expr='list_days_0 >=120',
#     output_left_data=False
# )
In [56]:
m10 = M.instruments.v2(
    start_date='2015-06-02',
    end_date='2017-03-01',
    market='CN_STOCK_A',
    instrument_list=stock_list,
    max_count=0
)

m11 = M.general_feature_extractor.v7(
    instruments=m10.data,
    features=m3.data,
    start_date='',
    end_date='',
    before_start_days=120
)

m12 = M.derived_feature_extractor.v3(
    input_data=m11.data,
    features=m3.data,
    date_col='date',
    instrument_col='instrument',
    drop_na=False,
    remove_extra_columns=False
)

m13 = M.dropnan.v1(input_data=m12.data)
In [57]:
m9 = M.stock_ranker_train.v5(
    training_ds=m8.data,
    features=m3.data,
    test_ds=m13.data,
    learning_algorithm='排序',
    number_of_leaves=32,
    minimum_docs_per_leaf=1024,
    number_of_trees=10,
    learning_rate=0.01,
    max_bins=512,
    feature_fraction=1,
    m_lazy_run=False
)
# m9.plot_model()
# m9.feature_gains.read_df()
# m9.ndcg.read_df()
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-57-7e65cab48e72> in <module>()
     10     max_bins=512,
     11     feature_fraction=1,
---> 12     m_lazy_run=False
     13 )
     14 # m9.plot_model()

TypeError: unorderable types: NoneType() < int()
In [50]:
m9.ndcg.read_df()
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-50-6582fe7468dd> in <module>()
----> 1 m9.ndcg.read_df()

AttributeError: 'Outputs' object has no attribute 'ndcg'
In [34]:
m15 = M.stock_ranker_predict.v5(
    model=m9.model,
    data=m13.data,
    m_lazy_run=False
)
In [49]:
m9.ndcg.read_df()
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-49-6582fe7468dd> in <module>()
----> 1 m9.ndcg.read_df()

AttributeError: 'Outputs' object has no attribute 'ndcg'