声明:以下代码请在 AIStudio 3.0.0 环境下运行
import dai
import pandas as pd
from datetime import datetime
sd = '2022-07-01'
ed = datetime.now().date().strftime("%Y-%m-%d")
def get_factor_data(alpha_sql):
sql = f"""
WITH
data_base AS (
SELECT
date,
instrument,
open,
close,
high,
low,
amount,
volume,
close / m_lag(close, 1) - 1 AS returns,
amount / volume AS vwap,
m_nanavg(volume, 5) AS adv5,
m_nanavg(volume, 10) AS adv10,
m_nanavg(volume, 15) AS adv15,
m_nanavg(volume, 20) AS adv20,
m_nanavg(volume, 30) AS adv30,
m_nanavg(volume, 40) AS adv40,
m_nanavg(volume, 50) AS adv50,
m_nanavg(volume, 60) AS adv60,
m_nanavg(volume, 81) AS adv81,
m_nanavg(volume, 120) AS adv120,
m_nanavg(volume, 150) AS adv150,
m_nanavg(volume, 180) AS adv180,
float_market_cap,
industry_level1_code,
industry_level2_code,
industry_level3_code,
FROM cn_stock_bar1d JOIN cn_stock_industry_component USING (date, instrument) JOIN cn_stock_valuation USING (date, instrument)
WHERE industry = 'sw2021'
AND instrument NOT LIKE '%BJ%'
QUALIFY COLUMNS(*) IS NOT NULL
)
,
data_factor AS (
{alpha_sql}
)
SELECT
*
FROM data_factor
QUALIFY COLUMNS(*) IS NOT NULL
ORDER BY date, instrument
"""
return dai.query(sql, filters={'date':[sd, ed]}).df()
计算公式: ((rank(Log(product(rank((rank(correlation(vwap, sum(adv10, 49.6054), 8.47743))^4)), 14.9655))) < rank(correlation(rank(vwap), rank(volume), 5.07914))) * -1)
alpha_sql = f"""
SELECT
date,
instrument,
IF(((c_pct_rank(log(m_product(c_pct_rank((c_pct_rank(m_corr(vwap, m_sum(adv10, 49.6054), 8.47743))^4)), 14.9655))) < c_pct_rank(m_corr(c_pct_rank(vwap), c_pct_rank(volume), 5.07914)))),1,0) * -1 AS alpha_a101_f0081
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: (min(rank(decay_linear(delta(open, 1.46063), 14.8717)), Ts_Rank(decay_linear(correlation(IndNeutralize(volume, IndClass.sector), ((open 0.634196) + (open (1 - 0.634196))), 17.4842), 6.92131), 13.4283)) * -1)
alpha_sql = f"""
SELECT
date,
instrument,
(least(c_pct_rank(m_decay_linear(m_delta(open, 1.46063), 14.8717)), m_rank(m_decay_linear(m_corr(c_indneutralize(volume, industry_level1_code), ((open * 0.634196) + (open * (1 - 0.634196))), 17.4842), 6.92131), 13.4283)) * -1) AS alpha_a101_f0082
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: ((rank(delay(((high - low) / (sum(close, 5) / 5)), 2)) * rank(rank(volume))) / (((high - low) / (sum(close, 5) / 5)) / (vwap - close)))
alpha_sql = f"""
SELECT
date,
instrument,
((c_pct_rank(m_lag(((high - low) / (m_sum(close, 5) / 5)), 2)) * c_pct_rank(c_pct_rank(volume))) / (((high - low) / (m_sum(close, 5) / 5)) / (vwap - close))) AS alpha_a101_f0083
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: SignedPower(Ts_Rank((vwap - ts_max(vwap, 15.3217)), 20.7127), delta(close, 4.96796))
改动说明:指数部分改为截面排名,避免数值爆炸
alpha_sql = f"""
SELECT
date,
instrument,
power(m_rank((vwap - m_max(vwap, 15.3217)), 20.7127), c_pct_rank(m_delta(close, 4.96796))) AS alpha_a101_f0084
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: (rank(correlation(((high 0.876703) + (close (1 - 0.876703))), adv30, 9.61331))^rank(correlation(Ts_Rank(((high + low) / 2), 3.70596), Ts_Rank(volume, 10.1595), 7.11408)))
alpha_sql = f"""
SELECT
date,
instrument,
(c_pct_rank(m_corr(((high * 0.876703) + (close * (1 - 0.876703))), adv30, 9.61331))^c_pct_rank(m_corr(m_rank(((high + low) / 2), 3.70596), m_rank(volume, 10.1595), 7.11408))) AS alpha_a101_f0085
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: ((Ts_Rank(correlation(close, sum(adv20, 14.7444), 6.00049), 20.4195) < rank(((open + close) - (vwap + open)))) * -1)
alpha_sql = f"""
SELECT
date,
instrument,
IF((m_pct_rank(m_corr(close, m_sum(adv20, 14.7444), 6.00049), 20.4195) < c_pct_rank(((open + close) - (vwap + open)))), 1, 0) * -1 AS alpha_a101_f0086
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: (max(rank(decay_linear(delta(((close 0.369701) + (vwap (1 - 0.369701))), 1.91233), 2.65461)), Ts_Rank(decay_linear(abs(correlation(IndNeutralize(adv81, IndClass.industry), close, 13.4132)), 4.89768), 14.4535)) * -1) 改动说明:adv81的行业中性化改为log(adv81)的行业中性化,避免数值爆炸
alpha_sql = f"""
SELECT
date,
instrument,
greatest(c_pct_rank(m_decay_linear(m_delta(((close * 0.369701) + (vwap * (1 - 0.369701))), 1.91233), 2.65461)),m_pct_rank(m_decay_linear(abs(m_corr(c_indneutralize(adv81, industry_level2_code), close, 13.4132)), 4.89768), 14.4535)) * -1 AS alpha_a101_f0087
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: min(rank(decay_linear(((rank(open) + rank(low)) - (rank(high) + rank(close))), 8.06882)), Ts_Rank(decay_linear(correlation(Ts_Rank(close, 8.44728), Ts_Rank(adv60, 20.6966), 8.01266), 6.65053), 2.61957))
alpha_sql = f"""
SELECT
date,
instrument,
least(c_pct_rank(m_decay_linear(((c_pct_rank(open) + c_pct_rank(low)) - (c_pct_rank(high) + c_pct_rank(close))), 8.06882)), m_rank(m_decay_linear(m_corr(m_rank(close, 8.44728), m_rank(adv60, 20.6966), 8.01266), 6.65053), 2.61957)) AS alpha_a101_f0088
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: (Ts_Rank(decay_linear(correlation(((low 0.967285) + (low (1 - 0.967285))), adv10, 6.94279), 5.51607), 3.79744) - Ts_Rank(decay_linear(delta(IndNeutralize(vwap, IndClass.industry), 3.48158), 10.1466), 15.3012))
alpha_sql = f"""
SELECT
date,
instrument,
(m_rank(m_decay_linear(m_corr(((low * 0.967285) + (low * (1 - 0.967285))), adv10, 6.94279), 5.51607), 3.79744) - m_rank(m_decay_linear(m_delta(c_indneutralize(vwap, industry_level2_code), 3.48158), 10.1466), 15.3012)) AS alpha_a101_f0089
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: ((rank((close - ts_max(close, 4.66719)))^Ts_Rank(correlation(IndNeutralize(adv40, IndClass.subindustry), low, 5.38375), 3.21856)) * -1)
改动说明:adv40的行业中性化改为log(adv40)的行业中性化,避免数值爆炸
alpha_sql = f"""
SELECT
date,
instrument,
((c_pct_rank((close - m_max(close, 4.66719)))^m_rank(m_corr(c_indneutralize(adv40, industry_level3_code), low, 5.38375), 3.21856)) * -1) AS alpha_a101_f0090
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式:((Ts_Rank(decay_linear(decay_linear(correlation(IndNeutralize(close, IndClass.industry), volume, 9.74928), 16.398), 3.83219), 4.8667) - rank(decay_linear(correlation(vwap, adv30, 4.01303), 2.6809))) * -1)
改动说明:close的行业中性化改为log(close)的行业中性化,避免数值爆炸
alpha_sql = f"""
SELECT
date,
instrument,
((m_rank(m_decay_linear(m_decay_linear(m_corr(c_indneutralize(close, industry_level2_code), volume, 9.74928), 16.398), 3.83219), 4.8667) - c_pct_rank(m_decay_linear(m_corr(vwap, adv30, 4.01303), 2.6809))) * -1) AS alpha_a101_f0091
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: min(Ts_Rank(decay_linear(((((high + low) / 2) + close) < (low + open)), 14.7221), 18.8683), Ts_Rank(decay_linear(correlation(rank(low), rank(adv30), 7.58555), 6.94024), 6.80584))
alpha_sql = f"""
SELECT
date,
instrument,
least(m_rank(m_decay_linear(((((high + low) / 2) + close) < (low + open)), 14.7221), 18.8683), m_rank(m_decay_linear(m_corr(c_pct_rank(low), c_pct_rank(adv30), 7.58555), 6.94024), 6.80584)) AS alpha_a101_f0092
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: (Ts_Rank(decay_linear(correlation(IndNeutralize(vwap, IndClass.industry), adv81, 17.4193), 19.848), 7.54455) / rank(decay_linear(delta(((close 0.524434) + (vwap (1 - 0.524434))), 2.77377), 16.2664)))
alpha_sql = f"""
SELECT
date,
instrument,
(m_rank(m_decay_linear(m_corr(c_indneutralize(vwap, industry_level2_code), adv81, 17.4193), 19.848), 7.54455) / c_pct_rank(m_decay_linear(m_delta(((close * 0.524434) + (vwap * (1 - 0.524434))), 2.77377), 16.2664))) AS alpha_a101_f0093
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: ((rank((vwap - ts_min(vwap, 11.5783)))^Ts_Rank(correlation(Ts_Rank(vwap, 19.6462), Ts_Rank(adv60, 4.02992), 18.0926), 2.70756)) * -1)
alpha_sql = f"""
SELECT
date,
instrument,
((c_pct_rank((vwap - m_min(vwap, 11.5783)))^m_rank(m_corr(m_rank(vwap, 19.6462), m_rank(adv60, 4.02992), 18.0926), 2.70756)) * -1) AS alpha_a101_f0094
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: (rank((open - ts_min(open, 12.4105))) < Ts_Rank((rank(correlation(sum(((high + low) / 2), 19.1351), sum(adv40, 19.1351), 12.8742))^5), 11.7584))
alpha_sql = f"""
SELECT
date,
instrument,
IF((c_pct_rank((open - m_min(open, 12.4105))) < m_rank((c_pct_rank(m_corr(m_sum(((high + low) / 2), 19.1351), m_sum(adv40, 19.1351), 12.8742))^5), 11.7584)), 1, 0) AS alpha_a101_f0095
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: (max(Ts_Rank(decay_linear(correlation(rank(vwap), rank(volume), 3.83878), 4.16783), 8.38151), Ts_Rank(decay_linear(Ts_ArgMax(correlation(Ts_Rank(close, 7.45404), Ts_Rank(adv60, 4.13242), 3.65459), 12.6556), 14.0365), 13.4143)) * -1)
alpha_sql = f"""
SELECT
date,
instrument,
greatest(m_rank(m_decay_linear(m_corr(c_pct_rank(vwap), c_pct_rank(volume), 3.83878), 4.16783), 8.38151), m_rank(m_decay_linear(m_imax(m_corr(m_rank(close, 7.45404), m_rank(adv60, 4.13242), 3.65459), 12.6556), 14.0365), 13.4143)) * -1 AS alpha_a101_f0096
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: ((rank(decay_linear(delta(IndNeutralize(((low 0.721001) + (vwap (1 - 0.721001))), IndClass.industry), 3.3705), 20.4523)) - Ts_Rank(decay_linear(Ts_Rank(correlation(Ts_Rank(low, 7.87871), Ts_Rank(adv60, 17.255), 4.97547), 18.5925), 15.7152), 6.71659)) * -1)
alpha_sql = f"""
SELECT
date,
instrument,
((c_pct_rank(m_decay_linear(m_delta(c_indneutralize(((low * 0.721001) + (vwap * (1 - 0.721001))), industry_level2_code), 3.3705), 20.4523)) - m_rank(m_decay_linear(m_rank(m_corr(m_rank(low, 7.87871), m_rank(adv60, 17.255), 4.97547), 18.5925), 15.7152), 6.71659)) * -1) AS alpha_a101_f0097
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: (rank(decay_linear(correlation(vwap, sum(adv5, 26.4719), 4.58418), 7.18088)) - rank(decay_linear(Ts_Rank(Ts_ArgMin(correlation(rank(open), rank(adv15), 20.8187), 8.62571), 6.95668), 8.07206)))
alpha_sql = f"""
SELECT
date,
instrument,
(c_pct_rank(m_decay_linear(m_corr(vwap, m_sum(adv5, 26.4719), 4.58418), 7.18088)) - c_pct_rank(m_decay_linear(m_rank(m_imin(m_corr(c_pct_rank(open), c_pct_rank(adv15), 20.8187), 8.62571), 6.95668), 8.07206))) AS alpha_a101_f0098
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: ((rank(correlation(sum(((high + low) / 2), 19.8975), sum(adv60, 19.8975), 8.8136)) < rank(correlation(low, volume, 6.28259))) * -1)
alpha_sql = f"""
SELECT
date,
instrument,
IF(((c_pct_rank(m_corr(m_sum(((high + low) / 2), 19.8975), m_sum(adv60, 19.8975), 8.8136)) < c_pct_rank(m_corr(low, volume, 6.28259)))), 1, 0) * -1 AS alpha_a101_f0099
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: (0 - (1 (((1.5 scale(indneutralize(indneutralize(rank(((((close - low) - (high - close)) / (high - low)) volume)), IndClass.subindustry), IndClass.subindustry))) - scale(indneutralize((correlation(close, rank(adv20), 5) - rank(ts_argmin(close, 30))), IndClass.subindustry))) (volume / adv20))))
改动说明:对行业中性化的参数进行一定放缩,以防数值爆炸
alpha_sql = f"""
SELECT
date,
instrument,
(0 - (1 * (((1.5 * c_scale(c_indneutralize(c_indneutralize(c_pct_rank((((close - low) - (high - close)) / (high - low)) * volume) * 100, industry_level3_code),industry_level3_code), 1)) - c_scale(c_indneutralize((m_corr(close, c_pct_rank(adv20), 5) - c_pct_rank(m_imin(close, 30))), industry_level3_code),1)) * (volume / adv20)))) AS alpha_a101_f0100
FROM data_base
"""
get_factor_data(alpha_sql)
计算公式: ((close - open) / ((high - low) + .001))
alpha_sql = f"""
SELECT
date,
instrument,
((close - open) / ((high - low) + 0.001)) AS alpha_a101_f0101
FROM data_base
"""
get_factor_data(alpha_sql)