Alpha101因子构建(5)

声明:以下代码请在 AIStudio 3.0.0 环境下运行

In [44]:
import dai
import pandas as pd
from datetime import datetime

sd = '2022-07-01'
ed = datetime.now().date().strftime("%Y-%m-%d")
In [45]:
def get_factor_data(alpha_sql):

    sql = f"""

    WITH 
    data_base AS (
        SELECT
            date,
            instrument,
            open,
            close,
            high,
            low,
            amount,
            volume,
            close / m_lag(close, 1) - 1 AS returns,
            amount / volume AS vwap,
            m_nanavg(volume,   5) AS adv5,
            m_nanavg(volume,  10) AS adv10,
            m_nanavg(volume,  15) AS adv15,
            m_nanavg(volume,  20) AS adv20,
            m_nanavg(volume,  30) AS adv30,
            m_nanavg(volume,  40) AS adv40,
            m_nanavg(volume,  50) AS adv50,
            m_nanavg(volume,  60) AS adv60,
            m_nanavg(volume,  81) AS adv81,
            m_nanavg(volume, 120) AS adv120,
            m_nanavg(volume, 150) AS adv150,
            m_nanavg(volume, 180) AS adv180,
            float_market_cap,
            industry_level1_code,
            industry_level2_code,
            industry_level3_code,
        FROM cn_stock_bar1d JOIN cn_stock_industry_component USING (date, instrument) JOIN cn_stock_valuation USING (date, instrument)
        WHERE industry = 'sw2021'
        AND instrument NOT LIKE '%BJ%'

        QUALIFY COLUMNS(*) IS NOT NULL
    )
    ,
    data_factor AS (
        {alpha_sql}
    )

    SELECT 
        *
    FROM data_factor
    QUALIFY COLUMNS(*) IS NOT NULL
    ORDER BY date, instrument
    
    """

    return dai.query(sql, filters={'date':[sd, ed]}).df()

Alpha101因子第81个

计算公式: ((rank(Log(product(rank((rank(correlation(vwap, sum(adv10, 49.6054), 8.47743))^4)), 14.9655))) < rank(correlation(rank(vwap), rank(volume), 5.07914))) * -1)

In [46]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    IF(((c_pct_rank(log(m_product(c_pct_rank((c_pct_rank(m_corr(vwap, m_sum(adv10, 49.6054), 8.47743))^4)), 14.9655))) < c_pct_rank(m_corr(c_pct_rank(vwap), c_pct_rank(volume), 5.07914)))),1,0) * -1  AS alpha_a101_f0081
FROM data_base
"""

get_factor_data(alpha_sql)
Out[46]:
date instrument alpha_a101_f0081
0 2022-07-04 000001.SZ 0
1 2022-07-04 000002.SZ 0
2 2022-07-04 000004.SZ 0
3 2022-07-04 000005.SZ 0
4 2022-07-04 000006.SZ 0
... ... ... ...
2192699 2024-04-26 688799.SH -1
2192700 2024-04-26 688800.SH -1
2192701 2024-04-26 688819.SH -1
2192702 2024-04-26 688981.SH 0
2192703 2024-04-26 689009.SH -1

2192704 rows × 3 columns

Alpha101因子第82个

计算公式: (min(rank(decay_linear(delta(open, 1.46063), 14.8717)), Ts_Rank(decay_linear(correlation(IndNeutralize(volume, IndClass.sector), ((open 0.634196) + (open (1 - 0.634196))), 17.4842), 6.92131), 13.4283)) * -1)

In [47]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    (least(c_pct_rank(m_decay_linear(m_delta(open, 1.46063), 14.8717)), m_rank(m_decay_linear(m_corr(c_indneutralize(volume, industry_level1_code), ((open * 0.634196) + (open * (1 - 0.634196))), 17.4842), 6.92131), 13.4283)) * -1) AS alpha_a101_f0082
FROM data_base
"""

get_factor_data(alpha_sql)
Out[47]:
date instrument alpha_a101_f0082
0 2022-07-25 000001.SZ -0.002133
1 2022-07-25 000002.SZ -0.000640
2 2022-07-25 000004.SZ -0.256186
3 2022-07-25 000005.SZ -0.655930
4 2022-07-25 000006.SZ -0.058874
... ... ... ...
2115191 2024-04-26 688799.SH -0.075231
2115192 2024-04-26 688800.SH -0.905912
2115193 2024-04-26 688819.SH -0.651935
2115194 2024-04-26 688981.SH -0.354351
2115195 2024-04-26 689009.SH -0.965036

2115196 rows × 3 columns

Alpha101因子第83个

计算公式: ((rank(delay(((high - low) / (sum(close, 5) / 5)), 2)) * rank(rank(volume))) / (((high - low) / (sum(close, 5) / 5)) / (vwap - close)))

In [48]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    ((c_pct_rank(m_lag(((high - low) / (m_sum(close, 5) / 5)), 2)) * c_pct_rank(c_pct_rank(volume))) / (((high - low) / (m_sum(close, 5) / 5)) / (vwap - close))) AS alpha_a101_f0083
FROM data_base
"""

get_factor_data(alpha_sql)
Out[48]:
date instrument alpha_a101_f0083
0 2022-07-12 000001.SZ -12807.592296
1 2022-07-12 000002.SZ -40377.427378
2 2022-07-12 000004.SZ -292.193691
3 2022-07-12 000005.SZ -60.775798
4 2022-07-12 000006.SZ -2419.172978
... ... ... ...
2158184 2024-04-26 688799.SH -2.242810
2158185 2024-04-26 688800.SH -42.030969
2158186 2024-04-26 688819.SH -3.305534
2158187 2024-04-26 688981.SH -0.337910
2158188 2024-04-26 689009.SH 2.871764

2158189 rows × 3 columns

Alpha101因子第84个

计算公式: SignedPower(Ts_Rank((vwap - ts_max(vwap, 15.3217)), 20.7127), delta(close, 4.96796))

改动说明:指数部分改为截面排名,避免数值爆炸

In [49]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    power(m_rank((vwap - m_max(vwap, 15.3217)), 20.7127), c_pct_rank(m_delta(close, 4.96796)))  AS alpha_a101_f0084
FROM data_base
"""

get_factor_data(alpha_sql)
Out[49]:
date instrument alpha_a101_f0084
0 2022-08-19 000001.SZ 20.143428
1 2022-08-19 000002.SZ 20.865887
2 2022-08-19 000004.SZ 4.764642
3 2022-08-19 000005.SZ 8.333603
4 2022-08-19 000006.SZ 5.982180
... ... ... ...
2017149 2024-04-26 688799.SH 1.599554
2017150 2024-04-26 688800.SH 16.057393
2017151 2024-04-26 688819.SH 3.255342
2017152 2024-04-26 688981.SH 3.825792
2017153 2024-04-26 689009.SH 15.688228

2017154 rows × 3 columns

Alpha101因子第85个

计算公式: (rank(correlation(((high 0.876703) + (close (1 - 0.876703))), adv30, 9.61331))^rank(correlation(Ts_Rank(((high + low) / 2), 3.70596), Ts_Rank(volume, 10.1595), 7.11408)))

In [50]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    (c_pct_rank(m_corr(((high * 0.876703) + (close * (1 - 0.876703))), adv30, 9.61331))^c_pct_rank(m_corr(m_rank(((high + low) / 2), 3.70596), m_rank(volume, 10.1595), 7.11408))) AS alpha_a101_f0085
FROM data_base
"""

get_factor_data(alpha_sql)
Out[50]:
date instrument alpha_a101_f0085
0 2022-07-25 000001.SZ 0.193246
1 2022-07-25 000002.SZ 0.807643
2 2022-07-25 000004.SZ 0.568618
3 2022-07-25 000005.SZ 0.184912
4 2022-07-25 000006.SZ 0.956137
... ... ... ...
2072022 2024-04-26 688799.SH 0.346354
2072023 2024-04-26 688800.SH 0.944697
2072024 2024-04-26 688819.SH 0.729653
2072025 2024-04-26 688981.SH 0.529868
2072026 2024-04-26 689009.SH 0.930797

2072027 rows × 3 columns

Alpha101因子第86个

计算公式: ((Ts_Rank(correlation(close, sum(adv20, 14.7444), 6.00049), 20.4195) < rank(((open + close) - (vwap + open)))) * -1)

In [51]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    IF((m_pct_rank(m_corr(close, m_sum(adv20, 14.7444), 6.00049), 20.4195) < c_pct_rank(((open + close) - (vwap + open)))), 1, 0) * -1 AS alpha_a101_f0086
FROM data_base
"""

get_factor_data(alpha_sql)
Out[51]:
date instrument alpha_a101_f0086
0 2022-07-04 000001.SZ 0
1 2022-07-04 000002.SZ 0
2 2022-07-04 000004.SZ 0
3 2022-07-04 000005.SZ 0
4 2022-07-04 000006.SZ 0
... ... ... ...
2192699 2024-04-26 688799.SH 0
2192700 2024-04-26 688800.SH 0
2192701 2024-04-26 688819.SH 0
2192702 2024-04-26 688981.SH 0
2192703 2024-04-26 689009.SH 0

2192704 rows × 3 columns

Alpha101因子第87个

计算公式: (max(rank(decay_linear(delta(((close 0.369701) + (vwap (1 - 0.369701))), 1.91233), 2.65461)), Ts_Rank(decay_linear(abs(correlation(IndNeutralize(adv81, IndClass.industry), close, 13.4132)), 4.89768), 14.4535)) * -1) 改动说明:adv81的行业中性化改为log(adv81)的行业中性化,避免数值爆炸

In [52]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    greatest(c_pct_rank(m_decay_linear(m_delta(((close * 0.369701) + (vwap * (1 - 0.369701))), 1.91233), 2.65461)),m_pct_rank(m_decay_linear(abs(m_corr(c_indneutralize(adv81, industry_level2_code), close, 13.4132)), 4.89768), 14.4535)) * -1 AS alpha_a101_f0087
FROM data_base
"""

get_factor_data(alpha_sql)
Out[52]:
date instrument alpha_a101_f0087
0 2022-07-08 000001.SZ -0.008293
1 2022-07-08 000002.SZ -0.001063
2 2022-07-08 000004.SZ -0.862853
3 2022-07-08 000005.SZ -0.442909
4 2022-07-08 000006.SZ -0.157346
... ... ... ...
2172024 2024-04-26 688799.SH -0.769729
2172025 2024-04-26 688800.SH -0.956616
2172026 2024-04-26 688819.SH -0.589713
2172027 2024-04-26 688981.SH -0.540243
2172028 2024-04-26 689009.SH -0.978013

2172029 rows × 3 columns

Alpha101因子第88个

计算公式: min(rank(decay_linear(((rank(open) + rank(low)) - (rank(high) + rank(close))), 8.06882)), Ts_Rank(decay_linear(correlation(Ts_Rank(close, 8.44728), Ts_Rank(adv60, 20.6966), 8.01266), 6.65053), 2.61957))

In [53]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    least(c_pct_rank(m_decay_linear(((c_pct_rank(open) + c_pct_rank(low)) - (c_pct_rank(high) + c_pct_rank(close))), 8.06882)), m_rank(m_decay_linear(m_corr(m_rank(close, 8.44728), m_rank(adv60, 20.6966), 8.01266), 6.65053), 2.61957)) AS alpha_a101_f0088
FROM data_base
"""

get_factor_data(alpha_sql)
Out[53]:
date instrument alpha_a101_f0088
0 2022-07-13 000001.SZ 0.406177
1 2022-07-13 000002.SZ 0.370820
2 2022-07-13 000004.SZ 0.677316
3 2022-07-13 000005.SZ 0.264111
4 2022-07-13 000006.SZ 0.546539
... ... ... ...
2156520 2024-04-26 688799.SH 0.238320
2156521 2024-04-26 688800.SH 0.056144
2156522 2024-04-26 688819.SH 0.936985
2156523 2024-04-26 688981.SH 0.963094
2156524 2024-04-26 689009.SH 0.082646

2156525 rows × 3 columns

Alpha101因子第89个

计算公式: (Ts_Rank(decay_linear(correlation(((low 0.967285) + (low (1 - 0.967285))), adv10, 6.94279), 5.51607), 3.79744) - Ts_Rank(decay_linear(delta(IndNeutralize(vwap, IndClass.industry), 3.48158), 10.1466), 15.3012))

In [54]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    (m_rank(m_decay_linear(m_corr(((low * 0.967285) + (low * (1 - 0.967285))), adv10, 6.94279), 5.51607), 3.79744) - m_rank(m_decay_linear(m_delta(c_indneutralize(vwap, industry_level2_code), 3.48158), 10.1466), 15.3012)) AS alpha_a101_f0089
FROM data_base
"""

get_factor_data(alpha_sql)
Out[54]:
date instrument alpha_a101_f0089
0 2022-08-09 000001.SZ -14.0
1 2022-08-09 000002.SZ -5.0
2 2022-08-09 000004.SZ -2.0
3 2022-08-09 000005.SZ -8.0
4 2022-08-09 000006.SZ -10.0
... ... ... ...
2058132 2024-04-26 688799.SH -7.0
2058133 2024-04-26 688800.SH -12.0
2058134 2024-04-26 688819.SH -3.0
2058135 2024-04-26 688981.SH 3.0
2058136 2024-04-26 689009.SH -11.0

2058137 rows × 3 columns

Alpha101因子第90个

计算公式: ((rank((close - ts_max(close, 4.66719)))^Ts_Rank(correlation(IndNeutralize(adv40, IndClass.subindustry), low, 5.38375), 3.21856)) * -1)

改动说明:adv40的行业中性化改为log(adv40)的行业中性化,避免数值爆炸

In [55]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    ((c_pct_rank((close - m_max(close, 4.66719)))^m_rank(m_corr(c_indneutralize(adv40, industry_level3_code), low, 5.38375), 3.21856)) * -1) AS alpha_a101_f0090
FROM data_base
"""

get_factor_data(alpha_sql)
Out[55]:
date instrument alpha_a101_f0090
0 2022-07-12 000001.SZ -0.058261
1 2022-07-12 000002.SZ -0.008718
2 2022-07-12 000004.SZ -0.148841
3 2022-07-12 000005.SZ -0.881275
4 2022-07-12 000006.SZ -0.259196
... ... ... ...
2160986 2024-04-26 688799.SH -0.483617
2160987 2024-04-26 688800.SH -0.483617
2160988 2024-04-26 688819.SH -0.339223
2160989 2024-04-26 688981.SH -0.336320
2160990 2024-04-26 689009.SH -0.336320

2160991 rows × 3 columns

Alpha101因子第91个

计算公式:((Ts_Rank(decay_linear(decay_linear(correlation(IndNeutralize(close, IndClass.industry), volume, 9.74928), 16.398), 3.83219), 4.8667) - rank(decay_linear(correlation(vwap, adv30, 4.01303), 2.6809))) * -1)

改动说明:close的行业中性化改为log(close)的行业中性化,避免数值爆炸

In [56]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    ((m_rank(m_decay_linear(m_decay_linear(m_corr(c_indneutralize(close, industry_level2_code), volume, 9.74928), 16.398), 3.83219), 4.8667) - c_pct_rank(m_decay_linear(m_corr(vwap, adv30, 4.01303), 2.6809))) * -1) AS alpha_a101_f0091
FROM data_base
"""

get_factor_data(alpha_sql)
Out[56]:
date instrument alpha_a101_f0091
0 2022-08-16 000001.SZ -4.457727
1 2022-08-16 000002.SZ -2.884672
2 2022-08-16 000004.SZ -0.416403
3 2022-08-16 000005.SZ -0.792536
4 2022-08-16 000006.SZ -1.858950
... ... ... ...
2032620 2024-04-26 688799.SH -0.455241
2032621 2024-04-26 688800.SH -2.067334
2032622 2024-04-26 688819.SH -4.358265
2032623 2024-04-26 688981.SH -0.432862
2032624 2024-04-26 689009.SH -4.074205

2032625 rows × 3 columns

Alpha101因子第92个

计算公式: min(Ts_Rank(decay_linear(((((high + low) / 2) + close) < (low + open)), 14.7221), 18.8683), Ts_Rank(decay_linear(correlation(rank(low), rank(adv30), 7.58555), 6.94024), 6.80584))

In [57]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    least(m_rank(m_decay_linear(((((high + low) / 2) + close) < (low + open)), 14.7221), 18.8683), m_rank(m_decay_linear(m_corr(c_pct_rank(low), c_pct_rank(adv30), 7.58555), 6.94024), 6.80584)) AS alpha_a101_f0092
FROM data_base
"""

get_factor_data(alpha_sql)
Out[57]:
date instrument alpha_a101_f0092
0 2022-07-29 000001.SZ 7.0
1 2022-07-29 000002.SZ 1.0
2 2022-07-29 000004.SZ 2.0
3 2022-07-29 000005.SZ 7.0
4 2022-07-29 000006.SZ 1.0
... ... ... ...
2094512 2024-04-26 688799.SH 1.0
2094513 2024-04-26 688800.SH 6.0
2094514 2024-04-26 688819.SH 7.0
2094515 2024-04-26 688981.SH 7.0
2094516 2024-04-26 689009.SH 7.0

2094517 rows × 3 columns

Alpha101因子第93个

计算公式: (Ts_Rank(decay_linear(correlation(IndNeutralize(vwap, IndClass.industry), adv81, 17.4193), 19.848), 7.54455) / rank(decay_linear(delta(((close 0.524434) + (vwap (1 - 0.524434))), 2.77377), 16.2664)))

In [58]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    (m_rank(m_decay_linear(m_corr(c_indneutralize(vwap, industry_level2_code), adv81, 17.4193), 19.848), 7.54455) / c_pct_rank(m_decay_linear(m_delta(((close * 0.524434) + (vwap * (1 - 0.524434))), 2.77377), 16.2664))) AS alpha_a101_f0093
FROM data_base
"""

get_factor_data(alpha_sql)
Out[58]:
date instrument alpha_a101_f0093
0 2022-08-31 000001.SZ 8.052665
1 2022-08-31 000002.SZ 3.006342
2 2022-08-31 000004.SZ 2.259295
3 2022-08-31 000005.SZ 10.797267
4 2022-08-31 000006.SZ 4.318907
... ... ... ...
1975899 2024-04-26 688799.SH 147.003610
1975900 2024-04-26 688800.SH 8.955355
1975901 2024-04-26 688819.SH 11.454290
1975902 2024-04-26 688981.SH 2.704570
1975903 2024-04-26 689009.SH 8.256285

1975904 rows × 3 columns

Alpha101因子第94个

计算公式: ((rank((vwap - ts_min(vwap, 11.5783)))^Ts_Rank(correlation(Ts_Rank(vwap, 19.6462), Ts_Rank(adv60, 4.02992), 18.0926), 2.70756)) * -1)

In [59]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    ((c_pct_rank((vwap - m_min(vwap, 11.5783)))^m_rank(m_corr(m_rank(vwap, 19.6462), m_rank(adv60, 4.02992), 18.0926), 2.70756)) * -1) AS alpha_a101_f0094
FROM data_base
"""

get_factor_data(alpha_sql)
Out[59]:
date instrument alpha_a101_f0094
0 2022-08-25 000001.SZ -0.763330
1 2022-08-25 000002.SZ -0.069732
2 2022-08-25 000004.SZ -0.018414
3 2022-08-25 000005.SZ -0.210807
4 2022-08-25 000006.SZ -0.313381
... ... ... ...
1661829 2024-04-26 688799.SH -0.384900
1661830 2024-04-26 688800.SH -0.953064
1661831 2024-04-26 688819.SH -0.303245
1661832 2024-04-26 688981.SH -0.400630
1661833 2024-04-26 689009.SH -0.985467

1661834 rows × 3 columns

Alpha101因子第95个

计算公式: (rank((open - ts_min(open, 12.4105))) < Ts_Rank((rank(correlation(sum(((high + low) / 2), 19.1351), sum(adv40, 19.1351), 12.8742))^5), 11.7584))

In [60]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    IF((c_pct_rank((open - m_min(open, 12.4105))) < m_rank((c_pct_rank(m_corr(m_sum(((high + low) / 2), 19.1351), m_sum(adv40, 19.1351), 12.8742))^5), 11.7584)), 1, 0)  AS alpha_a101_f0095
FROM data_base
"""

get_factor_data(alpha_sql)
Out[60]:
date instrument alpha_a101_f0095
0 2022-07-04 000001.SZ 0
1 2022-07-04 000002.SZ 0
2 2022-07-04 000004.SZ 0
3 2022-07-04 000005.SZ 0
4 2022-07-04 000006.SZ 0
... ... ... ...
2192699 2024-04-26 688799.SH 1
2192700 2024-04-26 688800.SH 1
2192701 2024-04-26 688819.SH 1
2192702 2024-04-26 688981.SH 1
2192703 2024-04-26 689009.SH 1

2192704 rows × 3 columns

Alpha101因子第96个

计算公式: (max(Ts_Rank(decay_linear(correlation(rank(vwap), rank(volume), 3.83878), 4.16783), 8.38151), Ts_Rank(decay_linear(Ts_ArgMax(correlation(Ts_Rank(close, 7.45404), Ts_Rank(adv60, 4.13242), 3.65459), 12.6556), 14.0365), 13.4143)) * -1)

In [61]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    greatest(m_rank(m_decay_linear(m_corr(c_pct_rank(vwap), c_pct_rank(volume), 3.83878), 4.16783), 8.38151), m_rank(m_decay_linear(m_imax(m_corr(m_rank(close, 7.45404), m_rank(adv60, 4.13242), 3.65459), 12.6556), 14.0365), 13.4143)) * -1 AS alpha_a101_f0096
FROM data_base
"""

get_factor_data(alpha_sql)
Out[61]:
date instrument alpha_a101_f0096
0 2022-07-21 000001.SZ -6.0
1 2022-07-21 000002.SZ -6.0
2 2022-07-21 000004.SZ -5.0
3 2022-07-21 000005.SZ -7.0
4 2022-07-21 000006.SZ -7.0
... ... ... ...
2123744 2024-04-26 688799.SH -7.0
2123745 2024-04-26 688800.SH -8.0
2123746 2024-04-26 688819.SH -8.0
2123747 2024-04-26 688981.SH -1.0
2123748 2024-04-26 689009.SH -8.0

2123749 rows × 3 columns

Alpha101因子第97个

计算公式: ((rank(decay_linear(delta(IndNeutralize(((low 0.721001) + (vwap (1 - 0.721001))), IndClass.industry), 3.3705), 20.4523)) - Ts_Rank(decay_linear(Ts_Rank(correlation(Ts_Rank(low, 7.87871), Ts_Rank(adv60, 17.255), 4.97547), 18.5925), 15.7152), 6.71659)) * -1)

In [62]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    ((c_pct_rank(m_decay_linear(m_delta(c_indneutralize(((low * 0.721001) + (vwap * (1 - 0.721001))), industry_level2_code), 3.3705), 20.4523)) - m_rank(m_decay_linear(m_rank(m_corr(m_rank(low, 7.87871), m_rank(adv60, 17.255), 4.97547), 18.5925), 15.7152), 6.71659)) * -1)  AS alpha_a101_f0097
FROM data_base
"""

get_factor_data(alpha_sql)
Out[62]:
date instrument alpha_a101_f0097
0 2022-09-26 000001.SZ 0.835535
1 2022-09-26 000156.SZ 0.287002
2 2022-09-26 000159.SZ 2.794130
3 2022-09-26 000623.SZ 3.223585
4 2022-09-26 000638.SZ 6.176101
... ... ... ...
28129 2024-04-26 688501.SH 2.095126
28130 2024-04-26 688523.SH 0.304835
28131 2024-04-26 688548.SH 0.208333
28132 2024-04-26 688612.SH 0.634434
28133 2024-04-26 688677.SH 0.996855

28134 rows × 3 columns

Alpha101因子第98个

计算公式: (rank(decay_linear(correlation(vwap, sum(adv5, 26.4719), 4.58418), 7.18088)) - rank(decay_linear(Ts_Rank(Ts_ArgMin(correlation(rank(open), rank(adv15), 20.8187), 8.62571), 6.95668), 8.07206)))

In [63]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    (c_pct_rank(m_decay_linear(m_corr(vwap, m_sum(adv5, 26.4719), 4.58418), 7.18088)) - c_pct_rank(m_decay_linear(m_rank(m_imin(m_corr(c_pct_rank(open), c_pct_rank(adv15), 20.8187), 8.62571), 6.95668), 8.07206))) AS alpha_a101_f0098
FROM data_base
"""

get_factor_data(alpha_sql)
Out[63]:
date instrument alpha_a101_f0098
0 2022-08-30 000001.SZ -0.349680
1 2022-08-30 000002.SZ 0.141955
2 2022-08-30 000004.SZ 0.601080
3 2022-08-30 000005.SZ -0.673475
4 2022-08-30 000006.SZ -0.434301
... ... ... ...
1980635 2024-04-26 688799.SH -0.759819
1980636 2024-04-26 688800.SH 0.706919
1980637 2024-04-26 688819.SH 0.371035
1980638 2024-04-26 688981.SH -0.022921
1980639 2024-04-26 689009.SH -0.515187

1980640 rows × 3 columns

Alpha101因子第99个

计算公式: ((rank(correlation(sum(((high + low) / 2), 19.8975), sum(adv60, 19.8975), 8.8136)) < rank(correlation(low, volume, 6.28259))) * -1)

In [64]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    IF(((c_pct_rank(m_corr(m_sum(((high + low) / 2), 19.8975), m_sum(adv60, 19.8975), 8.8136)) < c_pct_rank(m_corr(low, volume, 6.28259)))), 1, 0) * -1  AS alpha_a101_f0099
FROM data_base
"""

get_factor_data(alpha_sql)
Out[64]:
date instrument alpha_a101_f0099
0 2022-07-04 000001.SZ 0
1 2022-07-04 000002.SZ 0
2 2022-07-04 000004.SZ 0
3 2022-07-04 000005.SZ 0
4 2022-07-04 000006.SZ 0
... ... ... ...
2192699 2024-04-26 688799.SH 0
2192700 2024-04-26 688800.SH -1
2192701 2024-04-26 688819.SH -1
2192702 2024-04-26 688981.SH -1
2192703 2024-04-26 689009.SH -1

2192704 rows × 3 columns

Alpha101因子第100个

计算公式: (0 - (1 (((1.5 scale(indneutralize(indneutralize(rank(((((close - low) - (high - close)) / (high - low)) volume)), IndClass.subindustry), IndClass.subindustry))) - scale(indneutralize((correlation(close, rank(adv20), 5) - rank(ts_argmin(close, 30))), IndClass.subindustry))) (volume / adv20))))

改动说明:对行业中性化的参数进行一定放缩,以防数值爆炸

In [65]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    (0 - (1 * (((1.5 * c_scale(c_indneutralize(c_indneutralize(c_pct_rank((((close - low) - (high - close)) / (high - low)) * volume) * 100, industry_level3_code),industry_level3_code), 1)) - c_scale(c_indneutralize((m_corr(close, c_pct_rank(adv20), 5) - c_pct_rank(m_imin(close, 30))), industry_level3_code),1)) * (volume / adv20)))) AS alpha_a101_f0100
FROM data_base
"""

get_factor_data(alpha_sql)
Out[65]:
date instrument alpha_a101_f0100
0 2022-08-12 000001.SZ -0.000223
1 2022-08-12 000002.SZ -0.000425
2 2022-08-12 000004.SZ 0.000498
3 2022-08-12 000005.SZ -0.000130
4 2022-08-12 000006.SZ -0.000237
... ... ... ...
2055641 2024-04-26 688799.SH 0.000126
2055642 2024-04-26 688800.SH 0.000671
2055643 2024-04-26 688819.SH 0.000183
2055644 2024-04-26 688981.SH -0.000163
2055645 2024-04-26 689009.SH 0.002413

2055646 rows × 3 columns

Alpha101因子第101个

计算公式: ((close - open) / ((high - low) + .001))

In [66]:
alpha_sql = f"""
SELECT
    date, 
    instrument, 
    ((close - open) / ((high - low) + 0.001))  AS alpha_a101_f0101
FROM data_base
"""

get_factor_data(alpha_sql)
Out[66]:
date instrument alpha_a101_f0101
0 2022-07-04 000001.SZ -0.043477
1 2022-07-04 000002.SZ -0.040816
2 2022-07-04 000004.SZ 0.000000
3 2022-07-04 000005.SZ 0.000000
4 2022-07-04 000006.SZ -0.416578
... ... ... ...
2192699 2024-04-26 688799.SH 0.680726
2192700 2024-04-26 688800.SH 0.445277
2192701 2024-04-26 688819.SH 0.815807
2192702 2024-04-26 688981.SH 0.940255
2192703 2024-04-26 689009.SH 0.192082

2192704 rows × 3 columns