声明:以下代码请在 AIStudio 3.0.0 环境下运行
import dai
import pandas as pd
from datetime import datetime, timedelta
sd = '2023-01-01'
ed = datetime.now().date().strftime("%Y-%m-%d")
def get_factor_data(alpha_sql):
sql = f"""
WITH
data_base AS (
WITH
data_a AS (
SELECT
date,
instrument,
open,
close,
high,
low,
amount,
volume,
close / m_lag(close, 1) - 1 AS ret,
amount / volume AS vwap,
industry_level1_code,
industry_level2_code,
industry_level3_code,
IF(open<=m_lag(open, 1), 0, greatest(high-open, open-m_lag(open, 1))) AS DTM,
IF(open>=m_lag(open, 1), 0, greatest(open-low, open-m_lag(open, 1))) AS DBM,
m_lag(low, 1) - low AS LD,
high - m_lag(high, 1) AS HD,
greatest(greatest(high-low, ABS(high-m_lag(close, 1))), ABS(low-m_lag(close, 1))) AS TR,
FROM cn_stock_bar1d JOIN cn_stock_industry_component USING (date, instrument)
WHERE instrument NOT LIKE '%BJ%'
AND industry = 'sw2021'
),
data_b AS (
WITH
data_be AS (
SELECT
date,
instrument,
close / m_lag(close, 1) - 1 AS change_ratio,
float_market_cap,
total_owner_equity_lf,
FROM cn_stock_bar1d JOIN cn_stock_valuation USING (date, instrument) JOIN cn_stock_factors_financial_items USING (date, instrument)
),
data1 AS (
SELECT
date,
instrument,
change_ratio,
FROM data_be
),
data2 AS (
SELECT
date,
instrument,
c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS MKT
FROM data_be
),
data3 AS (
WITH
data3_0 AS (
SELECT
date,
instrument,
change_ratio,
float_market_cap,
c_pct_rank(float_market_cap) AS rank_sb,
c_pct_rank(float_market_cap / total_owner_equity_lf) AS rank_lmh,
CASE
WHEN rank_sb < 0.5 THEN 1
ELSE 2
END AS group_sb,
CASE
WHEN rank_lmh < 0.3 THEN 1
WHEN rank_lmh > 0.7 THEN 3
ELSE 2
END AS group_lmh,
FROM data_be
),
data3_sl AS (
SELECT DISTINCT
date,
c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS SL
FROM data3_0
WHERE group_sb = 1 AND group_lmh = 1
),
data3_sm AS (
SELECT DISTINCT
date,
c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS SM
FROM data3_0
WHERE group_sb = 1 AND group_lmh = 2
),
data3_sh AS (
SELECT DISTINCT
date,
c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS SH
FROM data3_0
WHERE group_sb = 1 AND group_lmh = 3
),
data3_bl AS (
SELECT DISTINCT
date,
c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS BL
FROM data3_0
WHERE group_sb = 2 AND group_lmh = 1
),
data3_bm AS (
SELECT DISTINCT
date,
c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS BM
FROM data3_0
WHERE group_sb = 2 AND group_lmh = 2
),
data3_bh AS (
SELECT DISTINCT
date,
c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS BH
FROM data3_0
WHERE group_sb = 2 AND group_lmh = 3
),
data3_merge AS (
SELECT
data3_0.date,
data3_0.instrument,
(1/3) * (SL + SM + SH) - (1/3) * (BL + BM + BH) AS SMB,
(1/2) * (SH + BH) - (1/2) * (SL + BL) AS HML,
FROM data3_0
JOIN data3_sl USING (date)
JOIN data3_sm USING (date)
JOIN data3_sh USING (date)
JOIN data3_bl USING (date)
JOIN data3_bm USING (date)
JOIN data3_bh USING (date)
)
SELECT * FROM data3_merge
)
SELECT
date,
instrument,
MKT,
SMB,
HML,
FROM data1 JOIN data2 USING (date, instrument) JOIN data3 USING (date, instrument)
),
data_c AS (
SELECT
date,
close AS bm_close,
open AS bm_open,
FROM cn_stock_index_bar1d
WHERE instrument = '000001.SH'
)
SELECT *
FROM data_a JOIN data_b USING (date, instrument) JOIN data_c USING (date)
QUALIFY COLUMNS(*) IS NOT NULL
),
data_factor AS (
{alpha_sql}
)
SELECT
*
FROM data_factor
QUALIFY COLUMNS(*) IS NOT NULL
ORDER BY date, instrument
"""
return dai.query(sql, filters={'date':[sd, ed]}).df()
#### Alpha191因子第181个
alpha_sql = f"""
SELECT
date,
instrument,
m_sum(((close/m_lag(close,1)-1)-m_avg((close/m_lag(close,1)-1),20))-(bm_close-m_avg(bm_close,20))^2,20)/m_sum((bm_close-m_avg(bm_close,20))^3,20) AS alpha_a191_f0181
FROM data_base
"""
get_factor_data(alpha_sql)
#### Alpha191因子第182个
alpha_sql = f"""
SELECT
date,
instrument,
m_sum(IF((close > open AND bm_close > bm_open) OR (close < open AND bm_close < bm_open), 1, 0), 20) / 20 AS alpha_a191_f0182
FROM data_base
"""
get_factor_data(alpha_sql)
#### Alpha191因子第183个
alpha_sql = f"""
SELECT
date,
instrument,
m_max(c_sum(close-m_avg(close,24)),24)-m_min(c_sum(close-m_avg(close,24)),24)/m_stddev(close,24) AS alpha_a191_f0183
FROM data_base
"""
get_factor_data(alpha_sql)
#### Alpha191因子第184个
alpha_sql = f"""
SELECT
date,
instrument,
(c_pct_rank(m_corr(m_lag((open -close), 1), close, 200)) + c_pct_rank((open -close))) AS alpha_a191_f0184
FROM data_base
"""
get_factor_data(alpha_sql)
#### Alpha191因子第185个
alpha_sql = f"""
SELECT
date,
instrument,
c_pct_rank((-1 * ((1 -(open / close))^2))) AS alpha_a191_f0185
FROM data_base
"""
get_factor_data(alpha_sql)
#### Alpha191因子第186个
alpha_sql = f"""
SELECT
date,
instrument,
(m_lag(m_avg(abs(m_sum(IF(LD > 0 AND LD > HD, LD, 0),14) * 100 / m_sum(TR, 14) - m_sum(IF(HD > 0 AND HD > LD, HD, 0),14) * 100 / m_sum(TR, 14)) / (m_sum(IF(LD > 0 AND LD > HD, LD, 0),14) * 100 / m_sum(TR, 14) + m_sum(IF(HD > 0 AND HD > LD, HD, 0),14) * 100 / m_sum(TR, 14)),14) * 100, 6) + m_lag(m_avg(abs(m_sum(IF(LD > 0 AND LD > HD, LD, 0),14) * 100 / m_sum(TR, 14) - m_sum(IF(HD > 0 AND HD > LD, HD, 0),14) * 100 / m_sum(TR, 14)) / (m_sum(IF(LD > 0 AND LD > HD, LD, 0),14) * 100 / m_sum(TR, 14) + m_sum(IF(HD > 0 AND HD > LD, HD, 0),14) * 100 / m_sum(TR, 14)),14) * 100, 6)) / 2 AS alpha_a191_f0186
FROM data_base
"""
get_factor_data(alpha_sql)
#### Alpha191因子第187个
alpha_sql = f"""
SELECT
date,
instrument,
m_sum(IF(open <= m_lag(open, 1), 0, greatest((high - open), (open - m_lag(open, 1)))), 20) AS alpha_a191_f0187
FROM data_base
"""
get_factor_data(alpha_sql)
#### Alpha191因子第188个
alpha_sql = f"""
SELECT
date,
instrument,
((high-low-m_ta_ewm(high-low,11,2))/m_ta_ewm(high-low,11,2))*100 AS alpha_a191_f0188
FROM data_base
"""
get_factor_data(alpha_sql)
#### Alpha191因子第189个
alpha_sql = f"""
SELECT
date,
instrument,
m_avg(abs(close-m_avg(close,6)),6) AS alpha_a191_f0189
FROM data_base
"""
get_factor_data(alpha_sql)
#### Alpha191因子第190个(将外层log去掉避免log取0错误)
alpha_sql = f"""
SELECT
date,
instrument,
(((m_sum(IF(close / m_lag(close,1) - 1 > ((close / m_lag(close, 19))^(1/20) - 1), 1, 0), 20) - 1) * (m_sum(IF(close / m_lag(close,1) - 1 < (close / m_lag(close, 19))^(1/20) - 1, (close / m_lag(close,1) - 1 - (close / m_lag(close, 19))^(1/20) - 1)^2, 0), 20))) / ((m_sum(IF(close / m_lag(close,1) - 1 < (close / m_lag(close, 19))^(1/20) - 1, 1, 0), 20)) * (m_sum(IF(close / m_lag(close,1) - 1 > (close / m_lag(close, 19))^(1/20) - 1, (close / m_lag(close,1) - 1 - (close / m_lag(close, 19))^(1/20) - 1)^2, 0), 20)))) AS alpha_a191_f0190
FROM data_base
"""
get_factor_data(alpha_sql)
#### Alpha191因子第191个
alpha_sql = f"""
SELECT
date,
instrument,
((m_corr(m_avg(volume,20), low, 5) + ((high + low) / 2)) -close) AS alpha_a191_f0191
FROM data_base
"""
get_factor_data(alpha_sql)