Alpha191因子构建(10)

声明:以下代码请在 AIStudio 3.0.0 环境下运行

In [34]:
import dai
import pandas as pd
from datetime import datetime, timedelta 

sd = '2023-01-01'
ed = datetime.now().date().strftime("%Y-%m-%d")
In [35]:
def get_factor_data(alpha_sql):

    sql = f"""

    WITH 
    data_base AS (
        WITH
        data_a AS (
            SELECT
                date,
                instrument,
                open,
                close,
                high,
                low,
                amount,
                volume,
                close / m_lag(close, 1) - 1 AS ret,
                amount / volume AS vwap,
                industry_level1_code,
                industry_level2_code,
                industry_level3_code,
                IF(open<=m_lag(open, 1), 0, greatest(high-open, open-m_lag(open, 1))) AS DTM, 
                IF(open>=m_lag(open, 1), 0, greatest(open-low,  open-m_lag(open, 1))) AS DBM,
                m_lag(low, 1) - low   AS LD, 
                high - m_lag(high, 1) AS HD, 
                greatest(greatest(high-low, ABS(high-m_lag(close, 1))), ABS(low-m_lag(close, 1))) AS TR, 
            FROM cn_stock_bar1d JOIN cn_stock_industry_component USING (date, instrument)
            WHERE instrument NOT LIKE '%BJ%'
            AND industry = 'sw2021'
        ),
        data_b AS (
            WITH 
            data_be AS (
                SELECT
                    date,
                    instrument,
                    close / m_lag(close, 1) - 1 AS change_ratio,
                    float_market_cap,
                    total_owner_equity_lf,
                FROM cn_stock_bar1d JOIN cn_stock_valuation USING (date, instrument) JOIN cn_stock_factors_financial_items USING (date, instrument)
            ),
            data1 AS ( 
                SELECT 
                    date, 
                    instrument, 
                    change_ratio, 
                FROM data_be 
            ), 
            data2 AS ( 
                SELECT 
                    date, 
                    instrument,
                    c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS MKT
                FROM data_be 
            ), 
            data3 AS (
                WITH 
                data3_0 AS (
                    SELECT
                        date,
                        instrument,
                        change_ratio,
                        float_market_cap,
                        c_pct_rank(float_market_cap)                          AS rank_sb,
                        c_pct_rank(float_market_cap / total_owner_equity_lf)  AS rank_lmh,
                        CASE
                            WHEN rank_sb  < 0.5 THEN 1
                            ELSE 2
                        END AS group_sb,
                        CASE
                            WHEN rank_lmh < 0.3 THEN 1
                            WHEN rank_lmh > 0.7 THEN 3
                            ELSE 2
                        END AS group_lmh,
                    FROM data_be
                ),
                data3_sl AS (
                    SELECT DISTINCT
                        date,
                        c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS SL
                    FROM data3_0
                    WHERE group_sb = 1 AND group_lmh = 1
                ),
                data3_sm AS (
                    SELECT DISTINCT
                        date,
                        c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS SM
                    FROM data3_0
                    WHERE group_sb = 1 AND group_lmh = 2
                ),
                data3_sh AS (
                    SELECT DISTINCT
                        date,
                        c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS SH
                    FROM data3_0
                    WHERE group_sb = 1 AND group_lmh = 3
                ),
                data3_bl AS (
                    SELECT DISTINCT
                        date,
                        c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS BL
                    FROM data3_0
                    WHERE group_sb = 2 AND group_lmh = 1
                ),
                data3_bm AS (
                    SELECT DISTINCT
                        date,
                        c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS BM
                    FROM data3_0
                    WHERE group_sb = 2 AND group_lmh = 2
                ),
                data3_bh AS (
                    SELECT DISTINCT
                        date,
                        c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS BH
                    FROM data3_0
                    WHERE group_sb = 2 AND group_lmh = 3
                ),
                data3_merge AS (
                    SELECT 
                        data3_0.date,
                        data3_0.instrument,
                        (1/3) * (SL + SM + SH) - (1/3) * (BL + BM + BH) AS SMB,
                        (1/2) * (SH + BH)      - (1/2) * (SL + BL)      AS HML,
                    FROM data3_0
                    JOIN data3_sl USING (date)
                    JOIN data3_sm USING (date)
                    JOIN data3_sh USING (date)
                    JOIN data3_bl USING (date)
                    JOIN data3_bm USING (date)
                    JOIN data3_bh USING (date)
                )
                SELECT * FROM data3_merge
            )
            SELECT 
                date, 
                instrument, 
                MKT,
                SMB, 
                HML, 
            FROM data1 JOIN data2 USING (date, instrument) JOIN data3 USING (date, instrument)
        ),
        data_c AS (
            SELECT 
                date,
                close AS bm_close,
                open  AS bm_open,
            FROM cn_stock_index_bar1d
            WHERE instrument = '000001.SH'
        )
        SELECT * 
        FROM data_a JOIN data_b USING (date, instrument) JOIN data_c USING (date)
        QUALIFY COLUMNS(*) IS NOT NULL

    ),
    data_factor AS (
        {alpha_sql}
    )

    SELECT 
        *
    FROM data_factor
    QUALIFY COLUMNS(*) IS NOT NULL
    ORDER BY date, instrument
    
    """

    return dai.query(sql, filters={'date':[sd, ed]}).df()
In [36]:
#### Alpha191因子第181个

alpha_sql = f"""

SELECT
    date,
    instrument,
    m_sum(((close/m_lag(close,1)-1)-m_avg((close/m_lag(close,1)-1),20))-(bm_close-m_avg(bm_close,20))^2,20)/m_sum((bm_close-m_avg(bm_close,20))^3,20) AS alpha_a191_f0181
FROM data_base

"""

get_factor_data(alpha_sql)
Out[36]:
date instrument alpha_a191_f0181
0 2023-03-07 000001.SZ -0.026508
1 2023-03-07 000002.SZ -0.026508
2 2023-03-07 000004.SZ -0.026508
3 2023-03-07 000005.SZ -0.026508
4 2023-03-07 000006.SZ -0.026508
... ... ... ...
1378003 2024-04-24 688799.SH 0.060586
1378004 2024-04-24 688800.SH 0.060586
1378005 2024-04-24 688819.SH 0.060586
1378006 2024-04-24 688981.SH 0.060585
1378007 2024-04-24 689009.SH 0.060585

1378008 rows × 3 columns

In [37]:
#### Alpha191因子第182个

alpha_sql = f"""

SELECT
    date,
    instrument,
    m_sum(IF((close > open AND bm_close > bm_open) OR (close < open AND bm_close < bm_open), 1, 0), 20) / 20 AS alpha_a191_f0182
FROM data_base

"""

get_factor_data(alpha_sql)
Out[37]:
date instrument alpha_a191_f0182
0 2023-02-07 000001.SZ 0.75
1 2023-02-07 000002.SZ 0.55
2 2023-02-07 000004.SZ 0.70
3 2023-02-07 000005.SZ 0.40
4 2023-02-07 000006.SZ 0.60
... ... ... ...
1480729 2024-04-24 688799.SH 0.65
1480730 2024-04-24 688800.SH 0.70
1480731 2024-04-24 688819.SH 0.65
1480732 2024-04-24 688981.SH 0.70
1480733 2024-04-24 689009.SH 0.75

1480734 rows × 3 columns

In [38]:
#### Alpha191因子第183个

alpha_sql = f"""

SELECT
    date,
    instrument,
    m_max(c_sum(close-m_avg(close,24)),24)-m_min(c_sum(close-m_avg(close,24)),24)/m_stddev(close,24) AS alpha_a191_f0183
FROM data_base

"""

get_factor_data(alpha_sql)
Out[38]:
date instrument alpha_a191_f0183
0 2023-03-16 000001.SZ 18128.327863
1 2023-03-16 000002.SZ 17952.450767
2 2023-03-16 000004.SZ 44346.705974
3 2023-03-16 000005.SZ 93767.424177
4 2023-03-16 000006.SZ 19705.390757
... ... ... ...
1347655 2024-04-24 688799.SH 36887.892512
1347656 2024-04-24 688800.SH 31607.898745
1347657 2024-04-24 688819.SH 78371.994480
1347658 2024-04-24 688981.SH 38646.237836
1347659 2024-04-24 689009.SH 44254.544488

1347660 rows × 3 columns

In [39]:
#### Alpha191因子第184个

alpha_sql = f"""

SELECT
    date,
    instrument,
    (c_pct_rank(m_corr(m_lag((open -close), 1), close, 200)) + c_pct_rank((open -close))) AS alpha_a191_f0184
FROM data_base

"""

get_factor_data(alpha_sql)
Out[39]:
date instrument alpha_a191_f0184
0 2023-11-03 000001.SZ 1.824028
1 2023-11-03 000002.SZ 0.830204
2 2023-11-03 000005.SZ 1.540653
3 2023-11-03 000006.SZ 0.392427
4 2023-11-03 000008.SZ 0.584625
... ... ... ...
560878 2024-04-24 688799.SH 1.355444
560879 2024-04-24 688800.SH 1.079171
560880 2024-04-24 688819.SH 1.723480
560881 2024-04-24 688981.SH 1.313003
560882 2024-04-24 689009.SH 1.063586

560883 rows × 3 columns

In [40]:
#### Alpha191因子第185个

alpha_sql = f"""

SELECT
    date,
    instrument,
    c_pct_rank((-1 * ((1 -(open / close))^2))) AS alpha_a191_f0185
FROM data_base

"""

get_factor_data(alpha_sql)
Out[40]:
date instrument alpha_a191_f0185
0 2023-01-04 000001.SZ 0.055930
1 2023-01-04 000002.SZ 0.054501
2 2023-01-04 000004.SZ 0.607471
3 2023-01-04 000005.SZ 0.691366
4 2023-01-04 000006.SZ 0.634619
... ... ... ...
1578415 2024-04-24 688799.SH 0.846078
1578416 2024-04-24 688800.SH 0.052880
1578417 2024-04-24 688819.SH 0.709161
1578418 2024-04-24 688981.SH 0.984372
1578419 2024-04-24 689009.SH 0.605465

1578420 rows × 3 columns

In [41]:
#### Alpha191因子第186个

alpha_sql = f"""

SELECT
    date,
    instrument,
    (m_lag(m_avg(abs(m_sum(IF(LD > 0 AND LD > HD, LD, 0),14) * 100 / m_sum(TR, 14) - m_sum(IF(HD > 0 AND HD > LD, HD, 0),14) * 100 / m_sum(TR, 14)) / (m_sum(IF(LD > 0 AND LD > HD, LD, 0),14) * 100 / m_sum(TR, 14) + m_sum(IF(HD > 0 AND HD > LD, HD, 0),14) * 100 / m_sum(TR, 14)),14) * 100, 6) + m_lag(m_avg(abs(m_sum(IF(LD > 0 AND LD > HD, LD, 0),14) * 100 / m_sum(TR, 14) - m_sum(IF(HD > 0 AND HD > LD, HD, 0),14) * 100 / m_sum(TR, 14)) / (m_sum(IF(LD > 0 AND LD > HD, LD, 0),14) * 100 / m_sum(TR, 14) + m_sum(IF(HD > 0 AND HD > LD, HD, 0),14) * 100 / m_sum(TR, 14)),14) * 100, 6)) / 2 AS alpha_a191_f0186
FROM data_base

"""

get_factor_data(alpha_sql)
Out[41]:
date instrument alpha_a191_f0186
0 2023-02-24 000001.SZ 28.767496
1 2023-02-24 000002.SZ 39.522212
2 2023-02-24 000004.SZ 36.864594
3 2023-02-24 000005.SZ 38.889257
4 2023-02-24 000006.SZ 31.765472
... ... ... ...
1413948 2024-04-24 688799.SH 33.682369
1413949 2024-04-24 688800.SH 26.839940
1413950 2024-04-24 688819.SH 12.278197
1413951 2024-04-24 688981.SH 43.632087
1413952 2024-04-24 689009.SH 37.557814

1413953 rows × 3 columns

In [42]:
#### Alpha191因子第187个

alpha_sql = f"""

SELECT
    date,
    instrument,
    m_sum(IF(open <= m_lag(open, 1), 0, greatest((high - open), (open - m_lag(open, 1)))), 20) AS alpha_a191_f0187
FROM data_base

"""

get_factor_data(alpha_sql)
Out[42]:
date instrument alpha_a191_f0187
0 2023-02-07 000001.SZ 402.194824
1 2023-02-07 000002.SZ 710.306724
2 2023-02-07 000004.SZ 8.981135
3 2023-02-07 000005.SZ 2.780281
4 2023-02-07 000006.SZ 35.824385
... ... ... ...
1480729 2024-04-24 688799.SH 15.796639
1480730 2024-04-24 688800.SH 14.707648
1480731 2024-04-24 688819.SH 6.046750
1480732 2024-04-24 688981.SH 4.590000
1480733 2024-04-24 689009.SH 12.500000

1480734 rows × 3 columns

In [43]:
#### Alpha191因子第188个

alpha_sql = f"""

SELECT
    date,
    instrument,
    ((high-low-m_ta_ewm(high-low,11,2))/m_ta_ewm(high-low,11,2))*100 AS alpha_a191_f0188
FROM data_base

"""

get_factor_data(alpha_sql)
Out[43]:
date instrument alpha_a191_f0188
0 2023-01-04 000001.SZ 0.000000
1 2023-01-04 000002.SZ 0.000000
2 2023-01-04 000004.SZ 0.000000
3 2023-01-04 000005.SZ 0.000000
4 2023-01-04 000006.SZ 0.000000
... ... ... ...
1578396 2024-04-24 688799.SH -47.102031
1578397 2024-04-24 688800.SH 13.153119
1578398 2024-04-24 688819.SH -27.236386
1578399 2024-04-24 688981.SH -30.936837
1578400 2024-04-24 689009.SH -30.654749

1578401 rows × 3 columns

In [44]:
#### Alpha191因子第189个

alpha_sql = f"""

SELECT
    date,
    instrument,
    m_avg(abs(close-m_avg(close,6)),6) AS alpha_a191_f0189
FROM data_base

"""

get_factor_data(alpha_sql)
Out[44]:
date instrument alpha_a191_f0189
0 2023-01-18 000001.SZ 21.204795
1 2023-01-18 000002.SZ 23.475263
2 2023-01-18 000004.SZ 0.593775
3 2023-01-18 000005.SZ 0.213670
4 2023-01-18 000006.SZ 5.419087
... ... ... ...
1526989 2024-04-24 688799.SH 1.207414
1526990 2024-04-24 688800.SH 0.829662
1526991 2024-04-24 688819.SH 0.493398
1526992 2024-04-24 688981.SH 0.252222
1526993 2024-04-24 689009.SH 1.040556

1526994 rows × 3 columns

In [45]:
#### Alpha191因子第190个(将外层log去掉避免log取0错误)

alpha_sql = f"""

SELECT
    date,
    instrument,
    (((m_sum(IF(close / m_lag(close,1) - 1 > ((close / m_lag(close, 19))^(1/20) - 1), 1, 0), 20) - 1) * (m_sum(IF(close / m_lag(close,1) - 1 < (close / m_lag(close, 19))^(1/20) - 1, (close / m_lag(close,1) - 1 - (close / m_lag(close, 19))^(1/20) - 1)^2, 0), 20))) / ((m_sum(IF(close / m_lag(close,1) - 1 < (close / m_lag(close, 19))^(1/20) - 1, 1, 0), 20)) * (m_sum(IF(close / m_lag(close,1) - 1 > (close / m_lag(close, 19))^(1/20) - 1, (close / m_lag(close,1) - 1 - (close / m_lag(close, 19))^(1/20) - 1)^2, 0), 20)))) AS alpha_a191_f0190
FROM data_base

"""

get_factor_data(alpha_sql)
Out[45]:
date instrument alpha_a191_f0190
0 2023-02-08 000001.SZ 0.000000
1 2023-02-08 000004.SZ 0.000000
2 2023-02-08 000005.SZ 0.000000
3 2023-02-08 000007.SZ 0.000000
4 2023-02-08 000008.SZ 0.000000
... ... ... ...
1471912 2024-04-24 688799.SH 0.924794
1471913 2024-04-24 688800.SH 0.967159
1471914 2024-04-24 688819.SH 0.930602
1471915 2024-04-24 688981.SH 0.937772
1471916 2024-04-24 689009.SH 0.971346

1471917 rows × 3 columns

In [46]:
#### Alpha191因子第191个

alpha_sql = f"""

SELECT
    date,
    instrument,
    ((m_corr(m_avg(volume,20), low, 5) + ((high + low) / 2)) -close) AS alpha_a191_f0191
FROM data_base

"""

get_factor_data(alpha_sql)
Out[46]:
date instrument alpha_a191_f0191
0 2023-02-13 000001.SZ -5.268537
1 2023-02-13 000002.SZ -34.912554
2 2023-02-13 000004.SZ 0.203996
3 2023-02-13 000005.SZ 0.391143
4 2023-02-13 000006.SZ -1.507964
... ... ... ...
1459961 2024-04-24 688799.SH -0.821956
1459962 2024-04-24 688800.SH -1.889707
1459963 2024-04-24 688819.SH -0.436248
1459964 2024-04-24 688981.SH 0.284227
1459965 2024-04-24 689009.SH -0.893774

1459966 rows × 3 columns