目标:构建资金流反转策略。 因子数:11个 (包含短周期量价因子 和资金流因子)
因子研究:研报复现寻找思路 对量价因子和资金流因子进行分解,找出他们的相关性---机器学习挖掘
过滤:对因子值进行重新约束,寻找一个风格较为稳定的股票池,并过滤ST
标注:未来2日收益(不做离散化)
算法:StockRanker
类型:排序问题
训练集:08-21年
测试集:21.03-21.12年
选股依据:根据预测值降序排序买入
持股数:1,5,10
最佳持股数 :1只。 因为stockRanker算法预测值在头部的风格收益更为集中。
夏普比率:5.61
年化收益:612.66%
最大回撤:11.55%
持仓天数:1
交易规则---9:30日开盘买入
次日15:00前卖出
主要还是 通过 这个 calculate_score 函数 计算我们每天的市场得分,根据这个市场得分score的表现, 我们每日收盘后 根据这个市场表现 决定第二天 开仓的资金总量
from biglearning.api.tools import view_model_result
# 查看模块运行结果,通过column_name指定需要统计的字段
view_model_result(m7, column_name="date")
#series1[series1.values == 1].index
#MultiIndex([], names=['date', 'industry_sw_level3'])
#f5[f5.values == 1].index
# for value in f5.items():# 遍历并拿到每对索引和数据
# print(value)
# cg5=m33.result#[7]['m28'].data_1.read()#[['score_1'],['score_2'],['score_3'],['score_4']][0]#.read_raw_perf()#['m34.score_1']#['m28'].data_1.read()
# cg5
#cg5['score_4'].values[-1]#[::-1]#[1] #'m34.score_1'
# for k in range(len(m33.result)):# 'm34.score_1'
# print('score_1>:',m33.result[k]['m21'].data.read()['score_1'].values[0])
# print('score_2<=:',m33.result[k]['m21'].data.read()['score_2'].values[0])
# print('score_3>:',m33.result[k]['m21'].data.read()['score_3'].values[-1])
# print('score_4<=:',m33.result[k]['m21'].data.read()['score_4'].values[-1])
# print('绩效结果:',m33.result[k]['m20'].read_raw_perf()[['algorithm_period_return','alpha','beta','max_drawdown','sharpe']].tail(1))
# 本代码由可视化策略环境自动生成 2023年12月24日 17:53
# 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
# 显式导入 BigQuant 相关 SDK 模块
from bigdatasource.api import DataSource
from bigdata.api.datareader import D
from biglearning.api import M
from biglearning.api import tools as T
from biglearning.module2.common.data import Outputs
import pandas as pd
import numpy as np
import math
import warnings
import datetime
from zipline.finance.commission import PerOrder
from zipline.api import get_open_orders
from zipline.api import symbol
from bigtrader.sdk import *
from bigtrader.utils.my_collections import NumPyDeque
from bigtrader.constant import OrderType
from bigtrader.constant import Direction
# <aistudiograph>
# @param(id="m24", name="run")
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m24_run_bigquant_run(input_1, input_2, input_3):
# 示例代码如下。在这里编写您的代码
#df = pd.DataFrame({'data': [1]})
df = input_1.read()
risk_score_df=df
#计算我们风控函数 使用特征的一个模块
risk_score_df = risk_score_df.sort_values('date', ascending=True).reset_index(drop=True)
columns = ['bm_ma_3','bm_ma_10','bm_ma_20','bm_ma_30']
tmp_df2 = risk_score_df[columns].shift(1)
tmp_df3 = risk_score_df[columns].shift(2)
for tmp_col in columns:
risk_score_df[tmp_col + '_trend'] = 0
risk_score_df['rate1'] = (tmp_df2[tmp_col] - tmp_df3[tmp_col]) / (tmp_df3[tmp_col] + 0.00001)
risk_score_df['rate2'] = (risk_score_df[tmp_col] - tmp_df2[tmp_col]) / (tmp_df2[tmp_col] + 0.00001)
idx = (risk_score_df['rate1'] > 0.006) & (risk_score_df['rate2'] > 0.006)
risk_score_df.loc[idx, tmp_col + '_trend'] = 1 # 趋势向上
idx = (risk_score_df['rate1'] < -0.003) & (risk_score_df['rate2'] < -0.003)
risk_score_df.loc[idx, tmp_col + '_trend'] = 2 # 趋势向下
data_1 = DataSource.write_df(risk_score_df)
#data_2 = DataSource.write_pickle(df)
return Outputs(data_1=data_1)
# @param(id="m24", name="post_run")
# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
def m24_post_run_bigquant_run(outputs):
return outputs
# @param(id="m20", name="initialize")
# 回测引擎:初始化函数,只执行一次
def m20_initialize_bigquant_run(context):
# 加载预测数据
context.ranker_prediction = context.options['data'].read_df()
# 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
# 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)
# 设置买入的股票数量,这里买入预测股票列表排名靠前的5只
stock_count = 1
# 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]
context.stock_weights = [1]
# 设置每只股票占用的最大资金比例
context.max_cash_per_instrument = 1
context.options['hold_days'] = 1
# @param(id="m20", name="handle_data")
# 回测引擎:每日数据处理函数,每天执行一次
def m20_handle_data_bigquant_run(context, data):
# 获取当前持仓
positions = {e.symbol: p.amount * p.last_sale_price
for e, p in context.portfolio.positions.items()}
today = data.current_dt.strftime('%Y-%m-%d')
# 按日期过滤得到今日的预测数据
ranker_prediction = context.ranker_prediction[
context.ranker_prediction.date == today]
#把当天用来计算 score得分的 数据我们用一个dataframe存起来
index_df=ranker_prediction
#print(today,index_df['bm_close'].values[0]-index_df['bm_ma_3'].values[0])
#------------------------------- 计算仓位得分的函数
def calculate_score(index_df, i=0):
score = 0
tmp_rate = (index_df['bm_close'].values[i]-index_df['bm_ma_3'].values[i])
#假设 当天收盘价 - ma_3日均线 》0
if tmp_rate>=0:
# score得分+1
score += 1
#假设 当天收盘价 - ma_10日均线 》0
tmp_rate = (index_df['bm_close'].values[i]-index_df['bm_ma_10'].values[i])
if tmp_rate>=0:
# score得分+2
score += 2
#有如果 收益 》-0.03
elif tmp_rate>-0.03:
# score得分+1
score += 1
#有如果 收益 》-0.06
elif tmp_rate>-0.06:
# score得分+0.5
score += 0.5
#假设 当天收盘价 减去 ma_20日均线 大于 0
tmp_rate = (index_df['bm_close'].values[i]-index_df['bm_ma_20'].values[i])
# 如果该收益率>0
if tmp_rate>=0:
# score得分+2
score += 2
# 如果该收益率>-0.03
elif tmp_rate>-0.03:
# score得分+1
score += 1
#如果该收益率>-0.06
elif tmp_rate>-0.06:
# score得分+0.5
score += 0.5
tmp_rate = (index_df['bm_close'].values[i]-index_df['bm_ma_30'].values[i])
if tmp_rate>=0:
score += 1
if index_df['bm_ma_3_trend'].values[i] == 1:
score += 1
if index_df['bm_ma_10_trend'].values[i] == 1:
score += 2
elif index_df['bm_ma_10_trend'].values[i] == 0:
score += 1
if index_df['bm_ma_20_trend'].values[i] == 1:
score += 1.5
elif index_df['bm_ma_20_trend'].values[i] == 0:
score += 0.5
# 附加 如果当日得分超过10分 最大只能到10分 不然数据会溢出
if score > 10:
score=10
if index_df['bm_Collapse_sum'].values[i] > 0:
# if score > 8:
# score = score - 2
# else:
score = score - 1.5
if score < 0:
score = 0
return score
#------------------------------- 计算仓位得分的函数 得分/10就是 今日最大限度 允许开仓的仓位比例
try:
score = calculate_score(index_df, 0) / 10 #比如 今天市场score得分为10 则 全仓买入 这里传入的score应该 是 10/10=1 (100%)
print(today,'今天市场的得分:',score)
except:
print('score数据读取出错!')
# try:
# #大盘风控模块,读取仓位管理的数据
# #根据市场的得分进行 大盘风控
# if score <=0:
# for instrument in positions.keys():
# context.order_target(context.symbol(instrument), 0)
# print(today,'大盘风控止损触发,全仓卖出')
# return
# except:
# print('大盘score数据读取有误!')
#------------------------获取 持仓信息 环节--------------------------
# 先定义 我们要用来买卖股票的资金
cash_for_buy = context.portfolio.cash
print('今日原来仓位:',cash_for_buy)
#乘以当天的市场得分score
try:
max_cash_per_instrument=cash_for_buy*score
print('今日决定开仓仓位:',max_cash_per_instrument)
except:
max_cash_per_instrument=cash_for_buy
print('今日决定开仓仓位:',max_cash_per_instrument)
#print('今日决定开仓仓位:',max_cash_per_instrument)
#获取 我们模型今天预测的股票池
buy_instruments = list(ranker_prediction.instrument)
#找到我们当前的股票持仓
current_hold_stock = [equity.symbol for equity in context.portfolio.positions ]
#定义 一个 列表 用来储存我们今天要卖出的股票
sell_instruments = [instrument.symbol for instrument in context.portfolio.positions.keys()]
#----逻辑上 先卖 后买,防止资金不足---产生空单
# 今天需要卖出的股票 存在于我们 当前的股票持仓中
totay_to_sell = [i for i in sell_instruments[:1] ]#这里 因为我们只有1只股票 所以可以直接卖掉
#使用一个for循环 将持仓的股票全部卖出
for instrument in totay_to_sell:
context.order_target(context.symbol(instrument), 0)
# 今天需要买入的股票 存在于我们 模型当天预测的股票池 buy_instruments 中
totay_to_buy = [i for i in buy_instruments[:1] ]#这里 我们只买 排名最靠前的第一名
# 如果想买入多只股票怎么操作呢?------
#totay_to_sell = [i for i in sell_instruments[:N] ] N=你想要买入的股票数量,比如我想买2只 我就把N改成2
#使用一个for循环 将预测的股票前 N名 买入
#为了方便统计,我们直接用所有的钱下单,all in 当天买入的股票
for i in totay_to_buy:
print('score',index_df[index_df.instrument==i].score.values[0])
#if ((index_df[index_df.instrument==i].score.values[0]>=0.0468)&(index_df[index_df.instrument==i].score.values[0]<=0.11)):
#context.order_value(context.symbol(i),max_cash_per_instrument)
#if ((index_df[index_df.instrument==i].score.values[0]>=-0.0561)&(index_df[index_df.instrument==i].score.values[0]<=-0.011)):
#context.order_value(context.symbol(i),max_cash_per_instrument)
#if ((index_df[index_df.instrument==i].score.values[0]>=0.435)&(index_df[index_df.instrument==i].score.values[0]<=0.536)):
#max_cash_per_instrument/len(totay_to_buy)
context.order_value(context.symbol(i),max_cash_per_instrument)
# @param(id="m20", name="prepare")
def m20_prepare_bigquant_run(context):
# 获取st状态和涨跌停状态
context.status_df = D.features(instruments =context.instruments,start_date = context.start_date, end_date = context.end_date,
fields=['st_status_0','price_limit_status_0','price_limit_status_1'])
# @param(id="m20", name="before_trading_start")
def m20_before_trading_start_bigquant_run(context, data):
pass
# # 获取涨跌停状态数据
# df_price_limit_status=context.status_df.set_index('date')
# today=data.current_dt.strftime('%Y-%m-%d')
# # 得到当前未完成订单
# for orders in get_open_orders().values():
# # 循环,撤销订单
# for _order in orders:
# ins=str(_order.sid.symbol)
# try:
# #判断一下如果当日涨停,则取消卖单
# if df_price_limit_status[df_price_limit_status.instrument==ins].price_limit_status_0.loc[today]>2 and _order.amount<0:
# cancel_order(_order)
# print(today,'尾盘涨停取消卖单',ins)
# except:
# continue
# @module(position="211,64", comment='', comment_collapsed=True)
m1 = M.instruments.v2(
start_date='2005-01-01',
end_date='2020-03-15',
market='CN_STOCK_A',
instrument_list='',
max_count=0
)
# @module(position="56,181", comment='', comment_collapsed=True)
m2 = M.advanced_auto_labeler.v2(
instruments=m1.data,
label_expr="""# 计算收益:2日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)
shift(close, -2) / shift(open, -1)-1
# 极值处理:用1%和99%分位的值做clip
clip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))
# 将分数映射到分类,这里使用30个分类
all_wbins(label, 30)""",
start_date='',
end_date='',
benchmark='000300.HIX',
drop_na_label=True,
cast_label_int=True
)
# @module(position="765,21", comment='', comment_collapsed=True)
m3 = M.input_features.v1(
features="""ts_min(amount_0,20)/mean(amount_0,20)
rank_swing_volatility_5_0
rank(mean(mf_net_amount_xl_0,5))/rank(mean(mf_net_amount_xl_0,20))
rank(sum(high_0/close_0,20))/rank(sum(close_0/low_0,10))
mean(mf_net_amount_m_0,10)/mean(mf_net_amount_m_0,20)
rank(mean(amount_0/deal_number_0,5))/rank(mean(amount_0/deal_number_0,20))
rank(mean(mf_net_amount_s_0,5))/rank(mean(mf_net_amount_s_0,20))
rank(mean(mf_net_amount_m_0,5))/rank(mean(mf_net_amount_m_0,10))
rank(mean(mf_net_amount_l_0,5))/rank(mean(mf_net_amount_l_0,10))
correlation(sqrt(volume_0),return_0,5)
correlation(log(volume_0),abs(return_0-1),5)"""
)
# @module(position="629.280029296875,107.40000915527344", comment='这里放置要过滤的条件', comment_collapsed=True)
m4 = M.input_features.v1(
features_ds=m3.data,
features="""cond1=rank(((close_0-open_0)/open_0)/((close_0-open_4)/open_4))
cond3=rank(((high_0-low_0)/close_1)/ts_max(((high_0-low_0)/close_1), 20))"""
)
# @module(position="379,188", comment='', comment_collapsed=True)
m15 = M.general_feature_extractor.v7(
instruments=m1.data,
features=m4.data,
start_date='',
end_date='',
before_start_days=0
)
# @module(position="374,257", comment='', comment_collapsed=True)
m16 = M.derived_feature_extractor.v3(
input_data=m15.data,
features=m4.data,
date_col='date',
instrument_col='instrument',
drop_na=False,
remove_extra_columns=False
)
# @module(position="130.91998291015625,325.19000244140625", comment='', comment_collapsed=True)
m7 = M.join.v3(
data1=m2.data,
data2=m16.data,
on='date,instrument',
how='inner',
sort=False
)
# @module(position="129.6599884033203,404.3000183105469", comment='', comment_collapsed=True)
m5 = M.filtet_st_stock.v7(
input_1=m7.data
)
# @module(position="128.94000244140625,481.0600280761719", comment='', comment_collapsed=True)
m10 = M.filter.v3(
input_data=m5.data_1,
expr='cond1<0.01 & cond3>0.85',
output_left_data=False
)
# @module(position="157,556", comment='', comment_collapsed=True)
m13 = M.dropnan.v1(
input_data=m10.data
)
# @module(position="628.6600341796875,660.8200073242188", comment='', comment_collapsed=True)
m6 = M.stock_ranker_train.v5(
training_ds=m13.data,
features=m3.data,
test_ds=m13.data,
learning_algorithm='排序',
number_of_leaves=30,
minimum_docs_per_leaf=280,
number_of_trees=21,
learning_rate=0.1,
max_bins=1023,
feature_fraction=1,
m_lazy_run=False
)
# @module(position="983.5699462890625,99.00999450683594", comment='检验过拟合', comment_collapsed=False)
m19 = M.input_features.v1(
features_ds=m3.data,
features="""cond1=rank(((close_0-open_0)/open_0)/((close_0-open_4)/open_4))
cond3=rank(((high_0-low_0)/close_1)/ts_max(((high_0-low_0)/close_1), 20))
#未来两日股票的收益
return_2_day=(shift(close_0, -2)-shift(open_0, -1))/shift(open_0, -1)
#未来三日股票的收益
return_3_day=(shift(close_0, -3)-shift(open_0, -1))/shift(open_0, -1)"""
)
# @module(position="1193.4801025390625,737.2899780273438", comment='预测数据,用于回测和模拟', comment_collapsed=False)
m9 = M.instruments.v2(
start_date=T.live_run_param('trading_date', '2020-03-16'),
end_date=T.live_run_param('trading_date', '2023-06-30'),
market='CN_STOCK_A',
instrument_list='',
max_count=0
)
# @module(position="1028.4400634765625,196.30999755859375", comment='', comment_collapsed=True)
m17 = M.general_feature_extractor.v7(
instruments=m9.data,
features=m19.data,
start_date='',
end_date='',
before_start_days=90
)
# @module(position="1024.25,264.9599914550781", comment='', comment_collapsed=True)
m18 = M.derived_feature_extractor.v3(
input_data=m17.data,
features=m19.data,
date_col='date',
instrument_col='instrument',
drop_na=False,
remove_extra_columns=False
)
# @module(position="1030.9100341796875,400.510009765625", comment='', comment_collapsed=True)
m11 = M.filtet_st_stock.v7(
input_1=m18.data
)
# @module(position="1041.260009765625,480.5500183105469", comment='', comment_collapsed=True)
m12 = M.filter.v3(
input_data=m11.data_1,
expr='cond1<0.01 & cond3>0.85',
output_left_data=False
)
# @module(position="1039.2200927734375,564.1300048828125", comment='', comment_collapsed=True)
m14 = M.dropnan.v1(
input_data=m12.data
)
# @module(position="776.6400146484375,748.5", comment='', comment_collapsed=True)
m8 = M.stock_ranker_predict.v5(
model=m6.model,
data=m14.data,
m_lazy_run=False
)
# @module(position="1150,847", comment='', comment_collapsed=True)
m25 = M.join.v3(
data1=m8.predictions,
data2=m14.data,
on='date,instrument',
how='inner',
sort=False
)
# @module(position="1571.5799560546875,624.9999694824219", comment='大盘风控的特征表达式', comment_collapsed=True)
m22 = M.input_features.v1(
features="""# #号开始的表示注释
# 多个特征,每行一个,可以包含基础特征和衍生特征
#当天收盘价/昨日的收盘价
#--- 1.用指数的成交量(3.5日ma线死叉) 作为 全仓卖出风控的依据
#bm_0=where(ta_macd_dif(close,2,4,4)-ta_macd_dea(close,2,4,4)<0,1,0)#where(mean(volume, 5)-mean(volume, 10)<0,1,0)
#bm_0=where(ta_macd_dif(close,2,4,4)-ta_macd_dea(close,2,4,4)<0,1,0)
#--- 2.用指数的 MAAMT指标 作为 MAAMT指标择时策略 全仓卖出风控的依据
#成交量(金额)类
#求成交额的移动平均线。
#MAAMT=MA(AMOUNT,N)
#信号产生方式 如果成交额上穿 MAAMT,则产生买入信号;
#如果成交额下穿 MAAMT,则产生卖出信号。
#bm_1=where(mean(amount, 5)-mean(amount, 10)<0,1,0)
#--- 3.用指数的 MAAMT指标 作为 MAAMT指标择时策略 全仓卖出风控的依据
#------------------------------
#计算 昨日到今天的涨幅收益率 bm_rate
bm_close=close
bm_pre_close=shift(close, 1)
bm_rate=(close-shift(close, 1))/shift(close, 1)
bm_Collapse=bm_rate<-0.02
bm_Collapse_sum=sum(bm_Collapse,4)
bm_ma_3=mean(close, 3)
bm_ma_10=mean(close, 10)
bm_ma_20=mean(close, 20)
bm_ma_30=mean(close, 30)
"""
)
# @module(position="1553.3499755859375,828.7900390625", comment='', comment_collapsed=True)
m23 = M.index_feature_extract.v3(
input_1=m9.data,
input_2=m22.data,
before_days=40,
index='000852.HIX'
)
# @module(position="1559.1600341796875,902.5900268554688", comment="""抽取并计算我们需要的
计算风控函数的特征""", comment_collapsed=False)
m24 = M.cached.v3(
input_1=m23.data_1,
run=m24_run_bigquant_run,
post_run=m24_post_run_bigquant_run,
input_ports='',
params='{}',
output_ports=''
)
# @module(position="1264,1010", comment='l连接 大盘风控+个股择时', comment_collapsed=True)
m27 = M.join.v3(
data1=m25.data,
data2=m24.data_1,
on='date,',
how='left',
sort=False
)
# @module(position="1193,1104", comment='', comment_collapsed=True)
m21 = M.sort.v5(
input_ds=m27.data,
sort_by='score',
group_by='date',
keep_columns='--',
ascending=False
)
# @module(position="1171,1208", comment='', comment_collapsed=True)
m20 = M.trade.v4(
instruments=m9.data,
options_data=m21.sorted_data,
start_date='',
end_date='',
initialize=m20_initialize_bigquant_run,
handle_data=m20_handle_data_bigquant_run,
prepare=m20_prepare_bigquant_run,
before_trading_start=m20_before_trading_start_bigquant_run,
volume_limit=0,
order_price_field_buy='open',
order_price_field_sell='close',
capital_base=100000,
auto_cancel_non_tradable_orders=True,
data_frequency='daily',
price_type='真实价格',
product_type='股票',
plot_charts=True,
backtest_only=False,
benchmark='000300.SHA'
)
# </aistudiograph>
# import pandas as pd
# pd.set_option("display.max_columns",75)
# #data =m2.data.read_df()
# #data=m1.data_1.read()
# #dataabs.describe()
# df2=m25.data.read()
# #df2[]
# df2
# df3=pd.DataFrame(df2,columns=['date','return_2_day','return_3_day','score','position'])#index=range(3),
# df3.set_index('date', inplace=True)
# #df3=df2[df2()]
# df3['rank_return_2_day']=df3.groupby(['date']).rank(method='first',ascending =0)['return_2_day']
# df3['rank_return_3_day']=df3.groupby(['date']).rank(method='first',ascending =0)['return_3_day']
# df3['rank_2_day_IC']=df3.groupby(pd.Grouper(freq='D')).apply(lambda x : x['score'].corr(x['rank_return_2_day']))#.mean()
# df3['rank_3_day_IC']=df3.groupby(pd.Grouper(freq='D')).apply(lambda x : x['score'].corr(x['rank_return_3_day']))#.mean()
# df3['rank_2_day_mean_IC']=df3['rank_2_day_IC'].mean()
# df3['rank_3_day_mean_IC']=df3['rank_3_day_IC'].mean()
# #df3['rank_return_2_day']=df3.groupby(['date']).rank(method='first',ascending =0)['return_2_day']
# #df3[df3['rank_2_day_IC']>-0.104603]#.describe()
# #df3[df3['return_2_day']>0]
# #df3[df3['score']>0.274052]
# df3['score区间']=pd.qcut(df3.score,q=20,)
# # df3[]
# #
# df3
# df3[(df3['return_2_day']>0)|(df3['return_3_day']>0)].describe() #求解 的问题是 当 score > ? 时 return_2_day or return_3_day >0
#for i in
# df4=df3[(df3['return_2_day']>0)|(df3['return_3_day']>0)]
#.score.groupby(pd.qcut(df3.score,q=3)).count()
#df4.score.groupby(pd.qcut(df4.score,q=20,)).count()
#---------- (-1.3059999999999998, -0.388] 0.93
#--------------(-0.388, -0.113] 0.9
#------------- -0.113, 0.11 1.6
#------ (0.11, 0.361] 0.22
#-------0.361, 1.546 2.03
#pd.qcut(df4.score,q=20,).unique()
#df3[['score']][df3['return_2_day']>0].plot(kind='line',color='g',title='数据变化',figsize=(30, 15),fontsize=22)#df2.describe()
#df3['return_2_day'][(df3['score']>=-1.3059999999999998)&(df3['score']<=0.714)].count()#.plot(kind='line',color='g',title='return_2_day',figsize=(30, 15),fontsize=22)#df2.describe()
# df3[(df3['score']>=-0.714)&(df3['score']<=0.537)]
# df3[(df3['score']>=-0.537)&(df3['score']<=-0.449)]
# df3[(df3['score']>=-0.449)&(df3['score']<=-0.388)]
# df3[(df3['score']>=-0.388)&(df3['score']<=-0.303)]
# df3[(df3['score']>=-0.303)&(df3['score']<=-0.233)]
# df3[(df3['score']>=-0.233)&(df3['score']<=-0.171)]
# df3[(df3['score']>=-0.171)&(df3['score']<=-0.113)]
# df3[(df3['score']>=-0.113)&(df3['score']<=-0.0561)]
# df3[(df3['score']>=-0.0561)&(df3['score']<=-0.011)]
# df3[(df3['score']>=-0.011)&(df3['score']<=0.0468)]
# df3[(df3['score']>=0.0468)&(df3['score']<= 0.11)]
# df3[(df3['score']>= 0.11)&(df3['score']<=0.169)]
# df3[(df3['score']>=0.169)&(df3['score']<=0.227)]
# df3[(df3['score']>=0.227)&(df3['score']<=0.287)]
# df3[(df3['score']>=0.287)&(df3['score']<=0.361)]
# #_--------
# df3[(df3['score']>=0.361)&(df3['score']<=0.435)]
# df3[(df3['score']>=0.435)&(df3['score']<=0.536)]
# df3[(df3['score']>=0.536)&(df3['score']<=0.698)]
# df3[(df3['score']>=0.698)&(df3['score']<=1.546)]
# df3['return_2_day'][(df3['score']>=-1.3059999999999998)&(df3['score']<=0.714)].describe() # 2000 0.004158 0.026293 0.442982 #.plot(kind='line',color='g',title='return_2_day',figsize=(30, 15),fontsize=22)#df2.describe()
#df3['return_2_day'][(df3['score']>=-0.537)&(df3['score']<=-0.449)].describe() #0.002605 0.028501 0.1
#df3['return_2_day'][(df3['score']>=-0.449)&(df3['score']<=-0.388)].describe() #0.005911 0.021751 0.239216
#df3['return_2_day'][(df3['score']>=-0.388)&(df3['score']<=-0.303)].describe() #135 -0.000586 0.012510 0.442982 8
#df3['return_2_day'][(df3['score']>=-0.303)&(df3['score']<=-0.233)].describe() #110 0.006159 0.202464 0.202464
#df3['return_2_day'][(df3['score']>=-0.233)&(df3['score']<=-0.171)].describe() #98 0.003723 0.035007 0.186347 7
#df3['return_2_day'][(df3['score']>=-0.171)&(df3['score']<=-0.113)].describe() # 101 0.010759 0.028823 0.261965 2
#df3['return_2_day'][(df3['score']>=-0.113)&(df3['score']<=-0.0561)].describe() #98 0.002214 0.026110 0.100441 3
#df3['return_2_day'][(df3['score']>=-0.0561)&(df3['score']<=-0.011)].describe() #94 0.010412 0.032637 0.189704 4
#df3['return_2_day'][(df3['score']>=-0.011)&(df3['score']<=0.0468)].describe() #102 0.001492 0.022022 0.181818
#df3['return_2_day'][(df3['score']>=0.0468)&(df3['score']<= 0.11)].describe() #109 0.009523 0.015822 0.338358 6
#df3['return_2_day'][(df3['score']>= 0.11)&(df3['score']<=0.169)].describe() #102 0.003458 0.032862 0.130952
#df3['return_2_day'][(df3['score']>=0.169)&(df3['score']<=0.227)].describe() #96 0.007752 0.027923 0.151668
#df3['return_2_day'][(df3['score']>=0.227)&(df3['score']<=0.287)].describe()#105 0.002695 0.025961 0.131479
#df3['return_2_day'][(df3['score']>=0.287)&(df3['score']<=0.361)].describe() #113 0.004832 0.026992 0.209622
# #_--------
#df3['return_2_day'][(df3['score']>=0.361)&(df3['score']<=0.435)].describe()#92 0.008760 0.032824 0.100276 5
#df3['return_2_day'][(df3['score']>=0.435)&(df3['score']<=0.536)].describe()#93 0.011568 0.030636 0.235207 1
#df3['return_2_day'][(df3['score']>=0.536)&(df3['score']<=0.698)].describe()#103 0.006027 0.024931 0.208889 9
# df3['return_2_day'][(df3['score']>=0.698)&(df3['score']<=1.546)].describe()#96 0.005390 0.031579 0.186147 # 10
# score=[0.004158,0.002605,0.005911,-0.000586,0.006159,0.003723,0.010759,0.002214,0.010412,0.001492,0.009523,0.003458,0.007752,0.002695,0.004832,0.008760,0.011568,0.006027,0.005390]
# name=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]
# cf5=dict({"name":name,'score':score})
# cf6=pd.DataFrame.from_dict(cf5)
# cf7=cf6#.T#.plot(kind='line',color='g',title='return_2_day',figsize=(30, 15),fontsize=22)#df2.describe()
# #cf7.set_index('name', inplace=True)
# cf8=cf7.T
# cf8.reset_index()
# #cf8.loc['score',0]
# #cf8.loc['1']
# #cf8.plot(kind = 'bar')
# # for i in range(1,19,1):
# #cf8.rename(columns = name,inplace=True)
# cf9=cf8.drop(index='name')
# #plot.xticks(name, )
# #cf8[i].plot(kind='hist',color='g',title='return_2_day',figsize=(30, 15),fontsize=22)#df2.describe()
# # #追加数据
# # # for i in score:
# # # cf5.iloc[i] = score[i]
# # #cf5.iloc[0]
# cf9.plot(kind = 'bar',figsize=(30, 15),fontsize=22)
# # x = range(0, 19, 1)
# # plt.xticks(x, (name))
# #plt.show()
# df6=pd.qcut(df4.score,q=5,).unique()
# #df4[df4['score'].df6[0]]
# df6
# score_1=[-0.113,-1.3059999999999998,0.361,-0.388,0.11,0.361]
# score_2=[-0.388,-0.113,0.11,0.361,1.546]
# df4.score.groupby(pd.cut(df4.score,bins=[-1.305037,-0.303442,-0.010987,-0.005318,0.286986,1.545682],include_lowest=True)).count()
#df3[(df3['return_2_day']>0)|(df3['return_3_day']>0)]
# df3[(df3['return_2_day']>0)|(df3['return_3_day']>0)].score.groupby(pd.qcut(df3[(df3['return_2_day']>0)|(df3['return_3_day']>0)].score,q=5,)).count()
# df3[['rank_2_day_IC',]].plot(kind='line',color='r',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
# df3[['rank_3_day_IC',]].plot(kind='line',color='r',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
# df3[['rank_2_day_mean_IC',]].plot(kind='line',color='g',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
# #df3.show()
# df3[['rank_3_day_mean_IC',]].plot(kind='line',color='g',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
# #data_act['act_sort']=data_act.groupby(['date']).rank(method='first',ascending =0)['return']
# # data=pd.merge(pred, data_act)
# # df=pd.DataFrame({'pred_label':data.position, 'act_label':data.act_sort,'date':data.date})
# df3.score[df3['rank_2_day_IC']>-0.104603].plot(kind='line',color='b',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
# print('平均RankIC:',df.groupby(['date']).apply(lambda x : x['pred_label'].corr(x['act_label'])).mean())
# df3[['return_2_day']][df3['rank_2_day_IC']>-0.104603].plot(kind='line',color='b',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
# df3[['score']][df3['rank_2_day_IC']>-0.104603].plot(kind='line',color='b',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
# df3[['position']][df3['rank_2_day_IC']>-0.104603].plot(kind='line',color='b',title='数据变化',figsize=(20, 5),fontsize=12)#df2.describe()
#--------------
# df3[['return_2_day']][df3['return_2_day']>0].plot(kind='line',color='g',title='数据变化',figsize=(30, 15),fontsize=22)#df2.describe()
# df3[['score']][df3['return_2_day']>0].plot(kind='line',color='g',title='数据变化',figsize=(30, 15),fontsize=42)#df2.describe()
# df3[['position']][df3['return_2_day']>0].plot(kind='line',color='g',title='数据变化',figsize=(30, 15),fontsize=22)#df2.describe()
#df3[['score区间']][df3['return_2_day']>0].plot(kind='line',color='g',title='数据变化',figsize=(30, 15),fontsize=22)#df2.describe()