策略简介StockRanker- 排序算法-超跌反弹防守策略

目标:构建资金流反转策略。 因子数:11个 (包含短周期量价因子 和资金流因子)

因子研究:研报复现寻找思路 对量价因子和资金流因子进行分解,找出他们的相关性---机器学习挖掘

过滤:对因子值进行重新约束,寻找一个风格较为稳定的股票池,并过滤ST

标注:未来2日收益(不做离散化)

算法:StockRanker

类型:排序问题

训练集:08-21年

测试集:21.03-21.12年

选股依据:根据预测值降序排序买入

持股数:1,5,10 最佳持股数 :1只。 因为stockRanker算法预测值在头部的风格收益更为集中。 夏普比率:5.61 年化收益:612.66% 最大回撤:11.55% 持仓天数:1 交易规则---9:30日开盘买入
次日15:00前卖出

下面我们把仓位管理函数 添加到我们的回测引擎中

主要还是 通过 这个 calculate_score 函数 计算我们每天的市场得分,根据这个市场得分score的表现, 我们每日收盘后 根据这个市场表现 决定第二天 开仓的资金总量

In [1]:
from biglearning.api.tools import view_model_result 

# 查看模块运行结果,通过column_name指定需要统计的字段 
view_model_result(m7, column_name="date")
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[1], line 4
      1 from biglearning.api.tools import view_model_result 
      3 # 查看模块运行结果,通过column_name指定需要统计的字段 
----> 4 view_model_result(m7, column_name="date")

NameError: name 'm7' is not defined
In [1]:
#series1[series1.values == 1].index  
#MultiIndex([], names=['date', 'industry_sw_level3'])
#f5[f5.values == 1].index

# for value in f5.items():# 遍历并拿到每对索引和数据

#     print(value)
In [2]:
# cg5=m33.result#[7]['m28'].data_1.read()#[['score_1'],['score_2'],['score_3'],['score_4']][0]#.read_raw_perf()#['m34.score_1']#['m28'].data_1.read() 
# cg5

#cg5['score_4'].values[-1]#[::-1]#[1] #'m34.score_1'
# for k in range(len(m33.result)):# 'm34.score_1'
#     print('score_1>:',m33.result[k]['m21'].data.read()['score_1'].values[0])
#     print('score_2<=:',m33.result[k]['m21'].data.read()['score_2'].values[0])
#     print('score_3>:',m33.result[k]['m21'].data.read()['score_3'].values[-1])
#     print('score_4<=:',m33.result[k]['m21'].data.read()['score_4'].values[-1])
#     print('绩效结果:',m33.result[k]['m20'].read_raw_perf()[['algorithm_period_return','alpha','beta','max_drawdown','sharpe']].tail(1))
In [ ]:

In [8]:
# 本代码由可视化策略环境自动生成 2023年12月24日 17:53
# 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
 
# 显式导入 BigQuant 相关 SDK 模块
from bigdatasource.api import DataSource
from bigdata.api.datareader import D
from biglearning.api import M
from biglearning.api import tools as T
from biglearning.module2.common.data import Outputs
 
import pandas as pd
import numpy as np
import math
import warnings
import datetime
 
from zipline.finance.commission import PerOrder
from zipline.api import get_open_orders
from zipline.api import symbol
 
from bigtrader.sdk import *
from bigtrader.utils.my_collections import NumPyDeque
from bigtrader.constant import OrderType
from bigtrader.constant import Direction

# <aistudiograph>

# @param(id="m24", name="run")
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m24_run_bigquant_run(input_1, input_2, input_3):
    # 示例代码如下。在这里编写您的代码
    #df = pd.DataFrame({'data': [1]})
    df = input_1.read()
    risk_score_df=df
    #计算我们风控函数 使用特征的一个模块
    risk_score_df = risk_score_df.sort_values('date', ascending=True).reset_index(drop=True)
    columns = ['bm_ma_3','bm_ma_10','bm_ma_20','bm_ma_30']
    tmp_df2 = risk_score_df[columns].shift(1)
    tmp_df3 = risk_score_df[columns].shift(2)
    for tmp_col in columns:
        risk_score_df[tmp_col + '_trend'] = 0

        risk_score_df['rate1'] = (tmp_df2[tmp_col] - tmp_df3[tmp_col]) / (tmp_df3[tmp_col] + 0.00001)
        risk_score_df['rate2'] = (risk_score_df[tmp_col] - tmp_df2[tmp_col]) / (tmp_df2[tmp_col] + 0.00001)
        idx = (risk_score_df['rate1'] > 0.006) & (risk_score_df['rate2'] > 0.006)
        risk_score_df.loc[idx, tmp_col + '_trend'] = 1 # 趋势向上

        idx = (risk_score_df['rate1'] < -0.003) & (risk_score_df['rate2'] < -0.003)
        risk_score_df.loc[idx, tmp_col + '_trend'] = 2 # 趋势向下

    data_1 = DataSource.write_df(risk_score_df)
    #data_2 = DataSource.write_pickle(df)
    return Outputs(data_1=data_1)

# @param(id="m24", name="post_run")
# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
def m24_post_run_bigquant_run(outputs):
    return outputs

# @param(id="m20", name="initialize")
# 回测引擎:初始化函数,只执行一次
def m20_initialize_bigquant_run(context):
    # 加载预测数据
    context.ranker_prediction = context.options['data'].read_df()

    # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
    context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
    # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)
    # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只
    stock_count = 1
    # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]
    context.stock_weights = [1]
    # 设置每只股票占用的最大资金比例
    context.max_cash_per_instrument = 1
    context.options['hold_days'] = 1

# @param(id="m20", name="handle_data")
# 回测引擎:每日数据处理函数,每天执行一次
def m20_handle_data_bigquant_run(context, data):
    # 获取当前持仓
    positions = {e.symbol: p.amount * p.last_sale_price

                 
                 for e, p in context.portfolio.positions.items()}
    
    
    today = data.current_dt.strftime('%Y-%m-%d')
    # 按日期过滤得到今日的预测数据
    ranker_prediction = context.ranker_prediction[
        context.ranker_prediction.date == today]
    
    
    #把当天用来计算 score得分的 数据我们用一个dataframe存起来
    index_df=ranker_prediction
    
    #print(today,index_df['bm_close'].values[0]-index_df['bm_ma_3'].values[0])
#------------------------------- 计算仓位得分的函数
    def calculate_score(index_df, i=0):
        score = 0
        tmp_rate = (index_df['bm_close'].values[i]-index_df['bm_ma_3'].values[i])
        #假设 当天收盘价 - ma_3日均线 》0
        if tmp_rate>=0: 
            # score得分+1
            score += 1
         #假设 当天收盘价 - ma_10日均线 》0
        tmp_rate = (index_df['bm_close'].values[i]-index_df['bm_ma_10'].values[i])
        if tmp_rate>=0:
             # score得分+2
            score += 2
            #有如果 收益 》-0.03
        elif tmp_rate>-0.03:
              # score得分+1
            score += 1
            #有如果 收益 》-0.06
        elif tmp_rate>-0.06:
            # score得分+0.5
            score += 0.5
          #假设 当天收盘价 减去 ma_20日均线 大于 0
        tmp_rate = (index_df['bm_close'].values[i]-index_df['bm_ma_20'].values[i])
        #  如果该收益率>0
        if tmp_rate>=0:
             # score得分+2
            score += 2
        #  如果该收益率>-0.03
        elif tmp_rate>-0.03:
            # score得分+1
            score += 1
            #如果该收益率>-0.06
        elif tmp_rate>-0.06:
            # score得分+0.5
            score += 0.5

        tmp_rate = (index_df['bm_close'].values[i]-index_df['bm_ma_30'].values[i])
        if tmp_rate>=0:
            score += 1

        if index_df['bm_ma_3_trend'].values[i] == 1:
            score += 1

        if index_df['bm_ma_10_trend'].values[i] == 1:
            score += 2
        elif index_df['bm_ma_10_trend'].values[i] == 0:
            score += 1

        if index_df['bm_ma_20_trend'].values[i] == 1:
            score += 1.5
        elif index_df['bm_ma_20_trend'].values[i] == 0:
            score += 0.5

        # 附加 如果当日得分超过10分  最大只能到10分 不然数据会溢出
        if score > 10:
            score=10  
        
        if index_df['bm_Collapse_sum'].values[i] > 0:
    #         if score > 8:
    #             score = score - 2
    #         else:   
            score = score - 1.5
            if score < 0:
                score = 0

        return score
#------------------------------- 计算仓位得分的函数  得分/10就是 今日最大限度 允许开仓的仓位比例
    try:
        score = calculate_score(index_df, 0) / 10  #比如 今天市场score得分为10  则 全仓买入 这里传入的score应该 是 10/10=1 (100%)
        print(today,'今天市场的得分:',score)
    
    except:
        print('score数据读取出错!')    
    
    
    

#     try:
#     #大盘风控模块,读取仓位管理的数据    
#         #根据市场的得分进行 大盘风控
#         if score <=0:
#             for instrument in positions.keys():
#                 context.order_target(context.symbol(instrument), 0)
#                 print(today,'大盘风控止损触发,全仓卖出')
#             return
#     except:
#         print('大盘score数据读取有误!')
            
    


    
    #------------------------获取 持仓信息 环节--------------------------
      

    # 先定义 我们要用来买卖股票的资金
    cash_for_buy = context.portfolio.cash
    
    print('今日原来仓位:',cash_for_buy)
    #乘以当天的市场得分score
    try:
        max_cash_per_instrument=cash_for_buy*score
        print('今日决定开仓仓位:',max_cash_per_instrument)
        
    except:
        max_cash_per_instrument=cash_for_buy
        print('今日决定开仓仓位:',max_cash_per_instrument)
    #print('今日决定开仓仓位:',max_cash_per_instrument)
    #获取 我们模型今天预测的股票池
    buy_instruments = list(ranker_prediction.instrument)
    #找到我们当前的股票持仓
    current_hold_stock = [equity.symbol for equity in context.portfolio.positions ]
    #定义 一个 列表 用来储存我们今天要卖出的股票
    sell_instruments = [instrument.symbol for instrument in context.portfolio.positions.keys()]
    
    

    #----逻辑上 先卖 后买,防止资金不足---产生空单
    # 今天需要卖出的股票  存在于我们  当前的股票持仓中  
    totay_to_sell = [i for i in sell_instruments[:1] ]#这里 因为我们只有1只股票 所以可以直接卖掉 
    
    #使用一个for循环  将持仓的股票全部卖出
    for instrument in totay_to_sell:
        context.order_target(context.symbol(instrument), 0)
    # 今天需要买入的股票  存在于我们  模型当天预测的股票池 buy_instruments 中  
   
    
    totay_to_buy = [i for i in buy_instruments[:1] ]#这里 我们只买 排名最靠前的第一名   
    # 如果想买入多只股票怎么操作呢?------
    #totay_to_sell = [i for i in sell_instruments[:N] ] N=你想要买入的股票数量,比如我想买2只 我就把N改成2
    #使用一个for循环  将预测的股票前  N名  买入
    #为了方便统计,我们直接用所有的钱下单,all in 当天买入的股票
  
    for i in totay_to_buy:
        print('score',index_df[index_df.instrument==i].score.values[0])
        #if ((index_df[index_df.instrument==i].score.values[0]>=0.0468)&(index_df[index_df.instrument==i].score.values[0]<=0.11)):
            #context.order_value(context.symbol(i),max_cash_per_instrument)
        #if ((index_df[index_df.instrument==i].score.values[0]>=-0.0561)&(index_df[index_df.instrument==i].score.values[0]<=-0.011)):
            #context.order_value(context.symbol(i),max_cash_per_instrument)
        #if     ((index_df[index_df.instrument==i].score.values[0]>=0.435)&(index_df[index_df.instrument==i].score.values[0]<=0.536)):
        #max_cash_per_instrument/len(totay_to_buy)

        context.order_value(context.symbol(i),max_cash_per_instrument)
    
    

# @param(id="m20", name="prepare")
def m20_prepare_bigquant_run(context):


     # 获取st状态和涨跌停状态
    
    context.status_df = D.features(instruments =context.instruments,start_date = context.start_date, end_date = context.end_date, 
                           fields=['st_status_0','price_limit_status_0','price_limit_status_1'])

# @param(id="m20", name="before_trading_start")
def m20_before_trading_start_bigquant_run(context, data):
    pass     
#     # 获取涨跌停状态数据
#     df_price_limit_status=context.status_df.set_index('date')
#     today=data.current_dt.strftime('%Y-%m-%d')
#     # 得到当前未完成订单
#     for orders in get_open_orders().values():
#         # 循环,撤销订单
#         for _order in orders:
#             ins=str(_order.sid.symbol)
#             try:
#                 #判断一下如果当日涨停,则取消卖单
#                 if  df_price_limit_status[df_price_limit_status.instrument==ins].price_limit_status_0.loc[today]>2 and _order.amount<0:
#                     cancel_order(_order)
#                     print(today,'尾盘涨停取消卖单',ins) 
#             except:
#                 continue
  
    
    

# @module(position="211,64", comment='', comment_collapsed=True)
m1 = M.instruments.v2(
    start_date='2005-01-01',
    end_date='2020-03-15',
    market='CN_STOCK_A',
    instrument_list='',
    max_count=0
)

# @module(position="56,181", comment='', comment_collapsed=True)
m2 = M.advanced_auto_labeler.v2(
    instruments=m1.data,
    label_expr="""# 计算收益:2日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)
shift(close, -2) / shift(open, -1)-1

# 极值处理:用1%和99%分位的值做clip
clip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))

# 将分数映射到分类,这里使用30个分类
all_wbins(label, 30)""",
    start_date='',
    end_date='',
    benchmark='000300.HIX',
    drop_na_label=True,
    cast_label_int=True
)

# @module(position="765,21", comment='', comment_collapsed=True)
m3 = M.input_features.v1(
    features="""ts_min(amount_0,20)/mean(amount_0,20)
rank_swing_volatility_5_0
rank(mean(mf_net_amount_xl_0,5))/rank(mean(mf_net_amount_xl_0,20))
rank(sum(high_0/close_0,20))/rank(sum(close_0/low_0,10))
mean(mf_net_amount_m_0,10)/mean(mf_net_amount_m_0,20)
rank(mean(amount_0/deal_number_0,5))/rank(mean(amount_0/deal_number_0,20))
rank(mean(mf_net_amount_s_0,5))/rank(mean(mf_net_amount_s_0,20))
rank(mean(mf_net_amount_m_0,5))/rank(mean(mf_net_amount_m_0,10))
rank(mean(mf_net_amount_l_0,5))/rank(mean(mf_net_amount_l_0,10))
correlation(sqrt(volume_0),return_0,5)
correlation(log(volume_0),abs(return_0-1),5)"""
)

# @module(position="629.280029296875,107.40000915527344", comment='这里放置要过滤的条件', comment_collapsed=True)
m4 = M.input_features.v1(
    features_ds=m3.data,
    features="""cond1=rank(((close_0-open_0)/open_0)/((close_0-open_4)/open_4))


cond3=rank(((high_0-low_0)/close_1)/ts_max(((high_0-low_0)/close_1), 20))"""
)

# @module(position="379,188", comment='', comment_collapsed=True)
m15 = M.general_feature_extractor.v7(
    instruments=m1.data,
    features=m4.data,
    start_date='',
    end_date='',
    before_start_days=0
)

# @module(position="374,257", comment='', comment_collapsed=True)
m16 = M.derived_feature_extractor.v3(
    input_data=m15.data,
    features=m4.data,
    date_col='date',
    instrument_col='instrument',
    drop_na=False,
    remove_extra_columns=False
)

# @module(position="130.91998291015625,325.19000244140625", comment='', comment_collapsed=True)
m7 = M.join.v3(
    data1=m2.data,
    data2=m16.data,
    on='date,instrument',
    how='inner',
    sort=False
)

# @module(position="129.6599884033203,404.3000183105469", comment='', comment_collapsed=True)
m5 = M.filtet_st_stock.v7(
    input_1=m7.data
)

# @module(position="128.94000244140625,481.0600280761719", comment='', comment_collapsed=True)
m10 = M.filter.v3(
    input_data=m5.data_1,
    expr='cond1<0.01 & cond3>0.85',
    output_left_data=False
)

# @module(position="157,556", comment='', comment_collapsed=True)
m13 = M.dropnan.v1(
    input_data=m10.data
)

# @module(position="628.6600341796875,660.8200073242188", comment='', comment_collapsed=True)
m6 = M.stock_ranker_train.v5(
    training_ds=m13.data,
    features=m3.data,
    test_ds=m13.data,
    learning_algorithm='排序',
    number_of_leaves=30,
    minimum_docs_per_leaf=280,
    number_of_trees=21,
    learning_rate=0.1,
    max_bins=1023,
    feature_fraction=1,
    m_lazy_run=False
)

# @module(position="983.5699462890625,99.00999450683594", comment='检验过拟合', comment_collapsed=False)
m19 = M.input_features.v1(
    features_ds=m3.data,
    features="""cond1=rank(((close_0-open_0)/open_0)/((close_0-open_4)/open_4))


cond3=rank(((high_0-low_0)/close_1)/ts_max(((high_0-low_0)/close_1), 20))
#未来两日股票的收益
return_2_day=(shift(close_0, -2)-shift(open_0, -1))/shift(open_0, -1)
#未来三日股票的收益
return_3_day=(shift(close_0, -3)-shift(open_0, -1))/shift(open_0, -1)"""
)

# @module(position="1193.4801025390625,737.2899780273438", comment='预测数据,用于回测和模拟', comment_collapsed=False)
m9 = M.instruments.v2(
    start_date=T.live_run_param('trading_date', '2020-03-16'),
    end_date=T.live_run_param('trading_date', '2023-06-30'),
    market='CN_STOCK_A',
    instrument_list='',
    max_count=0
)

# @module(position="1028.4400634765625,196.30999755859375", comment='', comment_collapsed=True)
m17 = M.general_feature_extractor.v7(
    instruments=m9.data,
    features=m19.data,
    start_date='',
    end_date='',
    before_start_days=90
)

# @module(position="1024.25,264.9599914550781", comment='', comment_collapsed=True)
m18 = M.derived_feature_extractor.v3(
    input_data=m17.data,
    features=m19.data,
    date_col='date',
    instrument_col='instrument',
    drop_na=False,
    remove_extra_columns=False
)

# @module(position="1030.9100341796875,400.510009765625", comment='', comment_collapsed=True)
m11 = M.filtet_st_stock.v7(
    input_1=m18.data
)

# @module(position="1041.260009765625,480.5500183105469", comment='', comment_collapsed=True)
m12 = M.filter.v3(
    input_data=m11.data_1,
    expr='cond1<0.01 & cond3>0.85',
    output_left_data=False
)

# @module(position="1039.2200927734375,564.1300048828125", comment='', comment_collapsed=True)
m14 = M.dropnan.v1(
    input_data=m12.data
)

# @module(position="776.6400146484375,748.5", comment='', comment_collapsed=True)
m8 = M.stock_ranker_predict.v5(
    model=m6.model,
    data=m14.data,
    m_lazy_run=False
)

# @module(position="1150,847", comment='', comment_collapsed=True)
m25 = M.join.v3(
    data1=m8.predictions,
    data2=m14.data,
    on='date,instrument',
    how='inner',
    sort=False
)

# @module(position="1571.5799560546875,624.9999694824219", comment='大盘风控的特征表达式', comment_collapsed=True)
m22 = M.input_features.v1(
    features="""# #号开始的表示注释
# 多个特征,每行一个,可以包含基础特征和衍生特征
#当天收盘价/昨日的收盘价
#--- 1.用指数的成交量(3.5日ma线死叉)  作为  全仓卖出风控的依据
#bm_0=where(ta_macd_dif(close,2,4,4)-ta_macd_dea(close,2,4,4)<0,1,0)#where(mean(volume, 5)-mean(volume, 10)<0,1,0)
#bm_0=where(ta_macd_dif(close,2,4,4)-ta_macd_dea(close,2,4,4)<0,1,0)

#--- 2.用指数的  MAAMT指标 作为 MAAMT指标择时策略  全仓卖出风控的依据
#成交量(金额)类
#求成交额的移动平均线。
#MAAMT=MA(AMOUNT,N)
#信号产生方式 如果成交额上穿 MAAMT,则产生买入信号;
#如果成交额下穿 MAAMT,则产生卖出信号。

#bm_1=where(mean(amount, 5)-mean(amount, 10)<0,1,0)

#--- 3.用指数的  MAAMT指标 作为 MAAMT指标择时策略  全仓卖出风控的依据

#------------------------------
#计算 昨日到今天的涨幅收益率 bm_rate
bm_close=close
bm_pre_close=shift(close, 1)
bm_rate=(close-shift(close, 1))/shift(close, 1)
bm_Collapse=bm_rate<-0.02
bm_Collapse_sum=sum(bm_Collapse,4)
bm_ma_3=mean(close, 3)
bm_ma_10=mean(close, 10)
bm_ma_20=mean(close, 20)
bm_ma_30=mean(close, 30)
"""
)

# @module(position="1553.3499755859375,828.7900390625", comment='', comment_collapsed=True)
m23 = M.index_feature_extract.v3(
    input_1=m9.data,
    input_2=m22.data,
    before_days=40,
    index='000852.HIX'
)

# @module(position="1559.1600341796875,902.5900268554688", comment="""抽取并计算我们需要的
计算风控函数的特征""", comment_collapsed=False)
m24 = M.cached.v3(
    input_1=m23.data_1,
    run=m24_run_bigquant_run,
    post_run=m24_post_run_bigquant_run,
    input_ports='',
    params='{}',
    output_ports=''
)

# @module(position="1264,1010", comment='l连接 大盘风控+个股择时', comment_collapsed=True)
m27 = M.join.v3(
    data1=m25.data,
    data2=m24.data_1,
    on='date,',
    how='left',
    sort=False
)

# @module(position="1193,1104", comment='', comment_collapsed=True)
m21 = M.sort.v5(
    input_ds=m27.data,
    sort_by='score',
    group_by='date',
    keep_columns='--',
    ascending=False
)

# @module(position="1171,1208", comment='', comment_collapsed=True)
m20 = M.trade.v4(
    instruments=m9.data,
    options_data=m21.sorted_data,
    start_date='',
    end_date='',
    initialize=m20_initialize_bigquant_run,
    handle_data=m20_handle_data_bigquant_run,
    prepare=m20_prepare_bigquant_run,
    before_trading_start=m20_before_trading_start_bigquant_run,
    volume_limit=0,
    order_price_field_buy='open',
    order_price_field_sell='close',
    capital_base=100000,
    auto_cancel_non_tradable_orders=True,
    data_frequency='daily',
    price_type='真实价格',
    product_type='股票',
    plot_charts=True,
    backtest_only=False,
    benchmark='000300.SHA'
)
# </aistudiograph>
  Cell In[8], line 552
    # </aistudiograph>
                      
^
SyntaxError: EOF while scanning triple-quoted string literal
In [ ]:
# import pandas as pd
# pd.set_option("display.max_columns",75)
# #data =m2.data.read_df()
# #data=m1.data_1.read()
# #dataabs.describe()
# df2=m25.data.read()
# #df2[]
# df2
In [ ]:
# df3=pd.DataFrame(df2,columns=['date','return_2_day','return_3_day','score','position'])#index=range(3),
# df3.set_index('date', inplace=True)
# #df3=df2[df2()]
# df3['rank_return_2_day']=df3.groupby(['date']).rank(method='first',ascending =0)['return_2_day']
# df3['rank_return_3_day']=df3.groupby(['date']).rank(method='first',ascending =0)['return_3_day']
# df3['rank_2_day_IC']=df3.groupby(pd.Grouper(freq='D')).apply(lambda x : x['score'].corr(x['rank_return_2_day']))#.mean()
# df3['rank_3_day_IC']=df3.groupby(pd.Grouper(freq='D')).apply(lambda x : x['score'].corr(x['rank_return_3_day']))#.mean()
# df3['rank_2_day_mean_IC']=df3['rank_2_day_IC'].mean()
# df3['rank_3_day_mean_IC']=df3['rank_3_day_IC'].mean()
# #df3['rank_return_2_day']=df3.groupby(['date']).rank(method='first',ascending =0)['return_2_day']
# #df3[df3['rank_2_day_IC']>-0.104603]#.describe()
# #df3[df3['return_2_day']>0]
# #df3[df3['score']>0.274052]
# df3['score区间']=pd.qcut(df3.score,q=20,)
# # df3[]
# #
# df3
In [ ]:
# df3[(df3['return_2_day']>0)|(df3['return_3_day']>0)].describe() #求解 的问题是  当  score > ? 时    return_2_day or return_3_day >0
#for i  in
In [ ]:
# df4=df3[(df3['return_2_day']>0)|(df3['return_3_day']>0)]
#.score.groupby(pd.qcut(df3.score,q=3)).count()
#df4.score.groupby(pd.qcut(df4.score,q=20,)).count()

#---------- (-1.3059999999999998, -0.388] 0.93
#--------------(-0.388, -0.113] 0.9
#------------- -0.113, 0.11   1.6
#------ (0.11, 0.361] 0.22
#-------0.361, 1.546  2.03
#pd.qcut(df4.score,q=20,).unique()
#df3[['score']][df3['return_2_day']>0].plot(kind='line',color='g',title='数据变化',figsize=(30, 15),fontsize=22)#df2.describe()
#df3['return_2_day'][(df3['score']>=-1.3059999999999998)&(df3['score']<=0.714)].count()#.plot(kind='line',color='g',title='return_2_day',figsize=(30, 15),fontsize=22)#df2.describe()

# df3[(df3['score']>=-0.714)&(df3['score']<=0.537)]
# df3[(df3['score']>=-0.537)&(df3['score']<=-0.449)]
# df3[(df3['score']>=-0.449)&(df3['score']<=-0.388)]
# df3[(df3['score']>=-0.388)&(df3['score']<=-0.303)]
# df3[(df3['score']>=-0.303)&(df3['score']<=-0.233)]
# df3[(df3['score']>=-0.233)&(df3['score']<=-0.171)]
# df3[(df3['score']>=-0.171)&(df3['score']<=-0.113)]
# df3[(df3['score']>=-0.113)&(df3['score']<=-0.0561)]
# df3[(df3['score']>=-0.0561)&(df3['score']<=-0.011)]
# df3[(df3['score']>=-0.011)&(df3['score']<=0.0468)]
# df3[(df3['score']>=0.0468)&(df3['score']<= 0.11)]
# df3[(df3['score']>= 0.11)&(df3['score']<=0.169)]
# df3[(df3['score']>=0.169)&(df3['score']<=0.227)]
# df3[(df3['score']>=0.227)&(df3['score']<=0.287)]
# df3[(df3['score']>=0.287)&(df3['score']<=0.361)]
# #_--------

# df3[(df3['score']>=0.361)&(df3['score']<=0.435)]
# df3[(df3['score']>=0.435)&(df3['score']<=0.536)]
# df3[(df3['score']>=0.536)&(df3['score']<=0.698)]
# df3[(df3['score']>=0.698)&(df3['score']<=1.546)]
In [ ]:
# df3['return_2_day'][(df3['score']>=-1.3059999999999998)&(df3['score']<=0.714)].describe() # 2000 0.004158   0.026293 0.442982 #.plot(kind='line',color='g',title='return_2_day',figsize=(30, 15),fontsize=22)#df2.describe()
In [ ]:
#df3['return_2_day'][(df3['score']>=-0.537)&(df3['score']<=-0.449)].describe()    #0.002605    0.028501  0.1
#df3['return_2_day'][(df3['score']>=-0.449)&(df3['score']<=-0.388)].describe()  #0.005911   0.021751  0.239216
#df3['return_2_day'][(df3['score']>=-0.388)&(df3['score']<=-0.303)].describe() #135 -0.000586  0.012510  0.442982   8
#df3['return_2_day'][(df3['score']>=-0.303)&(df3['score']<=-0.233)].describe() #110  0.006159   0.202464  0.202464
#df3['return_2_day'][(df3['score']>=-0.233)&(df3['score']<=-0.171)].describe() #98   0.003723   0.035007   0.186347  7
#df3['return_2_day'][(df3['score']>=-0.171)&(df3['score']<=-0.113)].describe() # 101  0.010759   0.028823   0.261965  2
#df3['return_2_day'][(df3['score']>=-0.113)&(df3['score']<=-0.0561)].describe() #98   0.002214  0.026110   0.100441   3
#df3['return_2_day'][(df3['score']>=-0.0561)&(df3['score']<=-0.011)].describe() #94   0.010412  0.032637  0.189704    4
#df3['return_2_day'][(df3['score']>=-0.011)&(df3['score']<=0.0468)].describe()  #102   0.001492   0.022022   0.181818
#df3['return_2_day'][(df3['score']>=0.0468)&(df3['score']<= 0.11)].describe() #109  0.009523  0.015822  0.338358  6
#df3['return_2_day'][(df3['score']>= 0.11)&(df3['score']<=0.169)].describe() #102  0.003458   0.032862   0.130952
#df3['return_2_day'][(df3['score']>=0.169)&(df3['score']<=0.227)].describe() #96  0.007752  0.027923  0.151668
#df3['return_2_day'][(df3['score']>=0.227)&(df3['score']<=0.287)].describe()#105   0.002695  0.025961  0.131479
#df3['return_2_day'][(df3['score']>=0.287)&(df3['score']<=0.361)].describe() #113   0.004832  0.026992   0.209622
# #_--------

#df3['return_2_day'][(df3['score']>=0.361)&(df3['score']<=0.435)].describe()#92  0.008760 0.032824   0.100276  5
#df3['return_2_day'][(df3['score']>=0.435)&(df3['score']<=0.536)].describe()#93  0.011568  0.030636  0.235207  1
#df3['return_2_day'][(df3['score']>=0.536)&(df3['score']<=0.698)].describe()#103   0.006027 0.024931 0.208889  9
# df3['return_2_day'][(df3['score']>=0.698)&(df3['score']<=1.546)].describe()#96   0.005390  0.031579  0.186147 #  10
In [ ]:
# score=[0.004158,0.002605,0.005911,-0.000586,0.006159,0.003723,0.010759,0.002214,0.010412,0.001492,0.009523,0.003458,0.007752,0.002695,0.004832,0.008760,0.011568,0.006027,0.005390]
# name=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]
# cf5=dict({"name":name,'score':score})
# cf6=pd.DataFrame.from_dict(cf5)
# cf7=cf6#.T#.plot(kind='line',color='g',title='return_2_day',figsize=(30, 15),fontsize=22)#df2.describe()
# #cf7.set_index('name', inplace=True)
# cf8=cf7.T
# cf8.reset_index()
# #cf8.loc['score',0]
# #cf8.loc['1']
# #cf8.plot(kind = 'bar')
# # for i in range(1,19,1):
# #cf8.rename(columns = name,inplace=True)
# cf9=cf8.drop(index='name')

#     #plot.xticks(name, )
#     #cf8[i].plot(kind='hist',color='g',title='return_2_day',figsize=(30, 15),fontsize=22)#df2.describe()
# # #追加数据
# # # for i in score:
# # #     cf5.iloc[i] = score[i]
# # #cf5.iloc[0]
# cf9.plot(kind = 'bar',figsize=(30, 15),fontsize=22)
# # x = range(0, 19, 1)
# # plt.xticks(x, (name))
# #plt.show()
In [ ]:
# df6=pd.qcut(df4.score,q=5,).unique()
# #df4[df4['score'].df6[0]]
# df6
# score_1=[-0.113,-1.3059999999999998,0.361,-0.388,0.11,0.361]
# score_2=[-0.388,-0.113,0.11,0.361,1.546]
In [ ]:
# df4.score.groupby(pd.cut(df4.score,bins=[-1.305037,-0.303442,-0.010987,-0.005318,0.286986,1.545682],include_lowest=True)).count()
In [ ]:
#df3[(df3['return_2_day']>0)|(df3['return_3_day']>0)]
# df3[(df3['return_2_day']>0)|(df3['return_3_day']>0)].score.groupby(pd.qcut(df3[(df3['return_2_day']>0)|(df3['return_3_day']>0)].score,q=5,)).count()
In [ ]:
# df3[['rank_2_day_IC',]].plot(kind='line',color='r',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
# df3[['rank_3_day_IC',]].plot(kind='line',color='r',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
# df3[['rank_2_day_mean_IC',]].plot(kind='line',color='g',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
# #df3.show()
# df3[['rank_3_day_mean_IC',]].plot(kind='line',color='g',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
#    #data_act['act_sort']=data_act.groupby(['date']).rank(method='first',ascending =0)['return']
# # data=pd.merge(pred, data_act)
# #     df=pd.DataFrame({'pred_label':data.position, 'act_label':data.act_sort,'date':data.date})
# df3.score[df3['rank_2_day_IC']>-0.104603].plot(kind='line',color='b',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()

#     print('平均RankIC:',df.groupby(['date']).apply(lambda x : x['pred_label'].corr(x['act_label'])).mean())
# df3[['return_2_day']][df3['rank_2_day_IC']>-0.104603].plot(kind='line',color='b',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
# df3[['score']][df3['rank_2_day_IC']>-0.104603].plot(kind='line',color='b',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
# df3[['position']][df3['rank_2_day_IC']>-0.104603].plot(kind='line',color='b',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
#--------------

# df3[['return_2_day']][df3['return_2_day']>0].plot(kind='line',color='g',title='数据变化',figsize=(30, 15),fontsize=22)#df2.describe()
# df3[['score']][df3['return_2_day']>0].plot(kind='line',color='g',title='数据变化',figsize=(30, 15),fontsize=42)#df2.describe()
# df3[['position']][df3['return_2_day']>0].plot(kind='line',color='g',title='数据变化',figsize=(30, 15),fontsize=22)#df2.describe()
#df3[['score区间']][df3['return_2_day']>0].plot(kind='line',color='g',title='数据变化',figsize=(30, 15),fontsize=22)#df2.describe()