【代码实现】广发高频因子构建的实现
由hiai创建,最终由hiai 被浏览 18 用户
根据文章:
高频价量数据的因子化方法-多因子Alpha系列报告之四十一-广发证券
将高频因子聚合成一天的时候使用
row_number() over (PARTITION BY day ORDER by day ORDER BY date) as _rn,
QUALIFY _rn = 1
这个降频的选择是应该选择哪一行的问题QUALIFY _rn = 1或者 QUALIFY _rn = 240?
因为
截取了文章中一段代码进行测试发现——
如果去掉不使用QUALIFY _rn 每一行的**==ratio_volumeH==**==1、real_skewnessH1等等这些值其实不一样==,应该选择哪一行?
(不太应该是按照时间的倒序排序数据才能计算全? 那么DESC应该加入排序 row_number() over (PARTITION BY day ORDER by day ORDER BY date ==DESC==) as _rn,)
SELECT
date,
instrument,
datetrunc('day', date) AS day,
strftime('%H:%M', date) AS min,
SUM(if(strftime('%H:%M', date) >= '09:30' and strftime('%H:%M', date) <= '10:00', volume, 0)) OVER(PARTITION BY day ORDER BY date)/SUM(volume) OVER(PARTITION BY day ORDER BY date) AS ratio_volumeH1,
SUM(if(strftime('%H:%M', date) >= '10:00' and strftime('%H:%M', date) <= '10:30', volume, 0)) OVER(PARTITION BY day ORDER BY date)/SUM(volume) OVER(PARTITION BY day ORDER BY date) AS ratio_volumeH2,
SUM(if(strftime('%H:%M', date) >= '10:30' and strftime('%H:%M', date) <= '11:00', volume, 0)) OVER(PARTITION BY day ORDER BY date)/SUM(volume) OVER(PARTITION BY day ORDER BY date) AS ratio_volumeH3,
SUM(if(strftime('%H:%M', date) >= '11:00' and strftime('%H:%M', date) <= '11:30', volume, 0)) OVER(PARTITION BY day ORDER BY date)/SUM(volume) OVER(PARTITION BY day ORDER BY date) AS ratio_volumeH4,
SUM(if(strftime('%H:%M', date) >= '13:00' and strftime('%H:%M', date) <= '13:30', volume, 0)) OVER(PARTITION BY day ORDER BY date)/SUM(volume) OVER(PARTITION BY day ORDER BY date) AS ratio_volumeH5,
SUM(if(strftime('%H:%M', date) >= '13:30' and strftime('%H:%M', date) <= '14:00', volume, 0)) OVER(PARTITION BY day ORDER BY date)/SUM(volume) OVER(PARTITION BY day ORDER BY date) AS ratio_volumeH6,
SUM(if(strftime('%H:%M', date) >= '14:00' and strftime('%H:%M', date) <= '14:30', volume, 0)) OVER(PARTITION BY day ORDER BY date)/SUM(volume) OVER(PARTITION BY day ORDER BY date) AS ratio_volumeH7,
SUM(if(strftime('%H:%M', date) >= '14:30' and strftime('%H:%M', date) <= '15:00', volume, 0)) OVER(PARTITION BY day ORDER BY date)/SUM(volume) OVER(PARTITION BY day ORDER BY date) AS ratio_volumeH8,
close/open AS _ret_min,
CORR(volume,close) OVER(PARTITION BY day ORDER BY date) AS corr_VP,
CORR(volume,_ret_min) OVER(PARTITION BY day ORDER BY date) AS corr_VR,
COALESCE(LAG(_ret_min,1) OVER(PARTITION BY day ORDER BY date),0) AS _ret_LAG,
COALESCE(LEAD(_ret_min,1) OVER(PARTITION BY day ORDER BY date),0) AS _ret_LEAD,
CORR(volume,_ret_LAG) OVER(PARTITION BY day ORDER BY date) AS corr_VRlag,
CORR(volume,_ret_LEAD) OVER(PARTITION BY day ORDER BY date) AS corr_VRlead,
row_number() over (PARTITION BY day ORDER by day ORDER BY date) as _rn,
SUM(if(strftime('%H:%M', date) = '09:31', open, 0)) OVER(PARTITION BY day ORDER BY date) AS _o1,
SUM(if(strftime('%H:%M', date) = '10:00', close, 0)) OVER(PARTITION BY day ORDER BY date) AS _c1,
SUM(if(strftime('%H:%M', date) = '10:00', open, 0)) OVER(PARTITION BY day ORDER BY date) AS _o2,
SUM(if(strftime('%H:%M', date) = '15:00', close, 0)) OVER(PARTITION BY day ORDER BY date) AS _c2,
_c1/_o1 AS ret_H1,
_c2/_o2 AS ret_close2H1,
CORR(volume,close) FILTER(min>='09:30' AND min <= '10:00') OVER(PARTITION BY day ORDER BY date) AS corr_VPH1,
CORR(volume,_ret_min) FILTER(min>='09:30' AND min <= '10:00') OVER(PARTITION BY day ORDER BY date) AS corr_VRH1,
CORR(volume,_ret_LEAD) FILTER(min>='09:30' AND min <= '10:00') OVER(PARTITION BY day ORDER BY date) AS corr_VRleadH1,
CORR(volume,_ret_LAG) FILTER(min>='09:30' AND min <= '10:00') OVER(PARTITION BY day ORDER BY date) AS corr_VRlagH1,
nanvar_pop(_ret_min) FILTER(min>='09:30' AND min <= '10:00') OVER(PARTITION by day ORDER BY date) AS real_varH1,
kurtosis(_ret_min) FILTER(min>='09:30' AND min <= '10:00') OVER(PARTITION by day ORDER BY date) AS real_kurtosisH1,
skewness(_ret_min) FILTER(min>='09:30' AND min <= '10:00') OVER(PARTITION by day ORDER BY date) AS real_skewnessH1,
CORR(volume,close) FILTER(min>='14:30' AND min <= '15:00') OVER(PARTITION BY day ORDER BY date) AS corr_VPH8,
CORR(volume,_ret_min) FILTER(min>='14:30' AND min <= '15:00') OVER(PARTITION BY day ORDER BY date) AS corr_VRH8,
CORR(volume,_ret_LEAD) FILTER(min>='14:30' AND min <= '15:00') OVER(PARTITION BY day ORDER BY date) AS corr_VRleadH8,
CORR(volume,_ret_LAG) FILTER(min>='14:30' AND min <= '15:00') OVER(PARTITION BY day ORDER BY date) AS corr_VRlagH8,
nanvar_pop(_ret_min) FILTER(min>='14:30' AND min <= '15:00') OVER(PARTITION by day ORDER BY date) AS real_varH8,
kurtosis(_ret_min) FILTER(min>='14:30' AND min <= '15:00') OVER(PARTITION by day ORDER BY date) AS real_kurtosisH8,
skewness(_ret_min) FILTER(min>='14:30' AND min <= '15:00') OVER(PARTITION by day ORDER BY date) AS real_skewnessH8,
FROM
cn_stock_level2_bar1m
WHERE
date >='{0}'||' 00:00:00.000'
AND
date <= '{1}'||' 23:59:59.999'
AND
instrument = '{2}'
QUALIFY _rn = 1
ORDER by date
\