复制链接
克隆策略

    {"description":"实验创建于2020/2/14","graph":{"edges":[{"to_node_id":"-468:features","from_node_id":"-70:data"},{"to_node_id":"-837:features","from_node_id":"-70:data"},{"to_node_id":"-837:input_data","from_node_id":"-468:data"},{"to_node_id":"-468:instruments","from_node_id":"-492:data"},{"to_node_id":"-487:input_1","from_node_id":"-821:data_1"},{"to_node_id":"-821:input_1","from_node_id":"-837:data"},{"to_node_id":"-808:input_1","from_node_id":"-837:data"},{"to_node_id":"-141:input_data","from_node_id":"-487:data_1"},{"to_node_id":"-8309:features","from_node_id":"-293:data"},{"to_node_id":"-141:features","from_node_id":"-149:data"},{"to_node_id":"-8309:user_factor_data","from_node_id":"-141:data"}],"nodes":[{"node_id":"-70","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"P1=ta_macd_macd_12_26_9_0\nP2=ta_macd_macdhist_12_26_9_0 \nP3=ta_macd_macdsignal_12_26_9_0","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"-70"}],"output_ports":[{"name":"data","node_id":"-70"}],"cacheable":true,"seq_num":1,"comment":"","comment_collapsed":true},{"node_id":"-468","module_id":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":90,"type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-468"},{"name":"features","node_id":"-468"}],"output_ports":[{"name":"data","node_id":"-468"}],"cacheable":true,"seq_num":4,"comment":"","comment_collapsed":true},{"node_id":"-492","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2020-08-01","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2022-08-20","type":"Literal","bound_global_parameter":null},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":0,"type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"-492"}],"output_ports":[{"name":"data","node_id":"-492"}],"cacheable":true,"seq_num":7,"comment":"","comment_collapsed":true},{"node_id":"-808","module_id":"BigQuantSpace.cached.cached-v3","parameters":[{"name":"run","value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1):\n # 示例代码如下。在这里编写您的代码\n df=input_1.read_df()\n df.fillna(method='ffill',inplace=True)\n df2=df.dropna()\n \n data_1 = DataSource.write_df(df)\n \n return Outputs(data_1=data_1 )\nTrue","type":"Literal","bound_global_parameter":null},{"name":"post_run","value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","type":"Literal","bound_global_parameter":null},{"name":"input_ports","value":"input_1","type":"Literal","bound_global_parameter":null},{"name":"params","value":"{}","type":"Literal","bound_global_parameter":null},{"name":"output_ports","value":"data_1","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-808"},{"name":"input_2","node_id":"-808"},{"name":"input_3","node_id":"-808"}],"output_ports":[{"name":"data_1","node_id":"-808"},{"name":"data_2","node_id":"-808"},{"name":"data_3","node_id":"-808"}],"cacheable":true,"seq_num":5,"comment":"填充财务数据和删除数据 源代码","comment_collapsed":false},{"node_id":"-821","module_id":"BigQuantSpace.fillna_And_dropna.fillna_And_dropna-v2","parameters":[],"input_ports":[{"name":"input_1","node_id":"-821"}],"output_ports":[{"name":"data_1","node_id":"-821"}],"cacheable":true,"seq_num":9,"comment":"自定义填充和dropna封装后得模块","comment_collapsed":false},{"node_id":"-837","module_id":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","parameters":[{"name":"date_col","value":"date","type":"Literal","bound_global_parameter":null},{"name":"instrument_col","value":"instrument","type":"Literal","bound_global_parameter":null},{"name":"drop_na","value":"False","type":"Literal","bound_global_parameter":null},{"name":"remove_extra_columns","value":"False","type":"Literal","bound_global_parameter":null},{"name":"user_functions","value":"{}","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-837"},{"name":"features","node_id":"-837"}],"output_ports":[{"name":"data","node_id":"-837"}],"cacheable":true,"seq_num":10,"comment":"","comment_collapsed":true},{"node_id":"-487","module_id":"BigQuantSpace.cached.cached-v3","parameters":[{"name":"run","value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\n\nimport pandas as pd\ndef bigquant_run(input_1):\n #input_1是训练集输入, input_2是测试集输入\n # 示例代码如下。在这里编写您的代码\n df1= input_1.read_df() #训练集输入\n df=df1[['P1','P2','P3']]\n X_train=df.values.tolist() #训练集数据\n \n \n \n #X = StandardScaler().fit_transform(X)\n \n \n scaler = StandardScaler() #数据的标准化工具\n #注意fit时我们仅仅对训练集进行。\n scaler.fit(X_train) #数据标准化训练\n X_train = scaler.transform(X_train) #标准化训练数据\n \n #压缩至 2个成分(2个维度)\n pca = PCA(n_components=1) # PCA工具\n pca.fit(X_train) #使用训练集对数据进行PCA训练\n principalComponents=pca.transform(X_train) #对训练数据进行PCA转换\n#将数据化为dataframe,命名\n principalDf = pd.DataFrame(data=principalComponents, columns=['PMACD1'])\n df1.reset_index(drop=True,inplace=True)\n principalDf.reset_index(drop=True,inplace=True)\n finalDf = pd.concat([df1,principalDf], axis = 1)\n finalDf=finalDf[['date','instrument','PMACD1']]\n data_1 = DataSource.write_df(finalDf) \n \n pca_scaler_dict={'scaler':scaler,'pca':pca}\n data_2=DataSource.write_pickle(pca_scaler_dict)\n return Outputs(data_1=data_1 ,data_2=data_2)\n","type":"Literal","bound_global_parameter":null},{"name":"post_run","value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","type":"Literal","bound_global_parameter":null},{"name":"input_ports","value":"input_1","type":"Literal","bound_global_parameter":null},{"name":"params","value":"{}","type":"Literal","bound_global_parameter":null},{"name":"output_ports","value":"data_1,data_2","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-487"},{"name":"input_2","node_id":"-487"},{"name":"input_3","node_id":"-487"}],"output_ports":[{"name":"data_1","node_id":"-487"},{"name":"data_2","node_id":"-487"},{"name":"data_3","node_id":"-487"}],"cacheable":true,"seq_num":2,"comment":"波动因子的PCA处理的源代码,","comment_collapsed":false},{"node_id":"-8309","module_id":"BigQuantSpace.factorlens.factorlens-v2","parameters":[{"name":"title","value":"因子分析: {factor_name}","type":"Literal","bound_global_parameter":null},{"name":"start_date","value":"2019-01-01","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2019-12-31","type":"Literal","bound_global_parameter":null},{"name":"rebalance_period","value":"22","type":"Literal","bound_global_parameter":null},{"name":"delay_rebalance_days","value":0,"type":"Literal","bound_global_parameter":null},{"name":"rebalance_price","value":"close_0","type":"Literal","bound_global_parameter":null},{"name":"stock_pool","value":"全市场","type":"Literal","bound_global_parameter":null},{"name":"quantile_count","value":"5","type":"Literal","bound_global_parameter":null},{"name":"commission_rate","value":0.0016,"type":"Literal","bound_global_parameter":null},{"name":"returns_calculation_method","value":"累乘","type":"Literal","bound_global_parameter":null},{"name":"benchmark","value":"无","type":"Literal","bound_global_parameter":null},{"name":"drop_new_stocks","value":60,"type":"Literal","bound_global_parameter":null},{"name":"drop_price_limit_stocks","value":"True","type":"Literal","bound_global_parameter":null},{"name":"drop_st_stocks","value":"True","type":"Literal","bound_global_parameter":null},{"name":"drop_suspended_stocks","value":"True","type":"Literal","bound_global_parameter":null},{"name":"cutoutliers","value":"False","type":"Literal","bound_global_parameter":null},{"name":"normalization","value":"False","type":"Literal","bound_global_parameter":null},{"name":"neutralization","value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E8%A1%8C%E4%B8%9A%22%2C%22displayValue%22%3A%22%E8%A1%8C%E4%B8%9A%22%2C%22selected%22%3Afalse%7D%2C%7B%22value%22%3A%22%E5%B8%82%E5%80%BC%22%2C%22displayValue%22%3A%22%E5%B8%82%E5%80%BC%22%2C%22selected%22%3Afalse%7D%5D%7D","type":"Literal","bound_global_parameter":null},{"name":"metrics","value":"%7B%22enumItems%22%3A%5B%7B%22value%22%3A%22%E5%9B%A0%E5%AD%90%E8%A1%A8%E7%8E%B0%E6%A6%82%E8%A7%88%22%2C%22displayValue%22%3A%22%E5%9B%A0%E5%AD%90%E8%A1%A8%E7%8E%B0%E6%A6%82%E8%A7%88%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E5%9B%A0%E5%AD%90%E5%88%86%E5%B8%83%22%2C%22displayValue%22%3A%22%E5%9B%A0%E5%AD%90%E5%88%86%E5%B8%83%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E5%9B%A0%E5%AD%90%E8%A1%8C%E4%B8%9A%E5%88%86%E5%B8%83%22%2C%22displayValue%22%3A%22%E5%9B%A0%E5%AD%90%E8%A1%8C%E4%B8%9A%E5%88%86%E5%B8%83%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E5%9B%A0%E5%AD%90%E5%B8%82%E5%80%BC%E5%88%86%E5%B8%83%22%2C%22displayValue%22%3A%22%E5%9B%A0%E5%AD%90%E5%B8%82%E5%80%BC%E5%88%86%E5%B8%83%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22IC%E5%88%86%E6%9E%90%22%2C%22displayValue%22%3A%22IC%E5%88%86%E6%9E%90%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E4%B9%B0%E5%85%A5%E4%BF%A1%E5%8F%B7%E9%87%8D%E5%90%88%E5%88%86%E6%9E%90%22%2C%22displayValue%22%3A%22%E4%B9%B0%E5%85%A5%E4%BF%A1%E5%8F%B7%E9%87%8D%E5%90%88%E5%88%86%E6%9E%90%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E5%9B%A0%E5%AD%90%E4%BC%B0%E5%80%BC%E5%88%86%E6%9E%90%22%2C%22displayValue%22%3A%22%E5%9B%A0%E5%AD%90%E4%BC%B0%E5%80%BC%E5%88%86%E6%9E%90%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E5%9B%A0%E5%AD%90%E6%8B%A5%E6%8C%A4%E5%BA%A6%E5%88%86%E6%9E%90%22%2C%22displayValue%22%3A%22%E5%9B%A0%E5%AD%90%E6%8B%A5%E6%8C%A4%E5%BA%A6%E5%88%86%E6%9E%90%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E5%9B%A0%E5%AD%90%E5%80%BC%E6%9C%80%E5%A4%A7%2F%E6%9C%80%E5%B0%8F%E8%82%A1%E7%A5%A8%22%2C%22displayValue%22%3A%22%E5%9B%A0%E5%AD%90%E5%80%BC%E6%9C%80%E5%A4%A7%2F%E6%9C%80%E5%B0%8F%E8%82%A1%E7%A5%A8%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E8%A1%A8%E8%BE%BE%E5%BC%8F%E5%9B%A0%E5%AD%90%E5%80%BC%22%2C%22displayValue%22%3A%22%E8%A1%A8%E8%BE%BE%E5%BC%8F%E5%9B%A0%E5%AD%90%E5%80%BC%22%2C%22selected%22%3Atrue%7D%2C%7B%22value%22%3A%22%E5%A4%9A%E5%9B%A0%E5%AD%90%E7%9B%B8%E5%85%B3%E6%80%A7%E5%88%86%E6%9E%90%22%2C%22displayValue%22%3A%22%E5%A4%9A%E5%9B%A0%E5%AD%90%E7%9B%B8%E5%85%B3%E6%80%A7%E5%88%86%E6%9E%90%22%2C%22selected%22%3Atrue%7D%5D%7D","type":"Literal","bound_global_parameter":null},{"name":"factor_coverage","value":0.5,"type":"Literal","bound_global_parameter":null},{"name":"user_data_merge","value":"left","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features","node_id":"-8309"},{"name":"user_factor_data","node_id":"-8309"}],"output_ports":[{"name":"data","node_id":"-8309"},{"name":"save_data","node_id":"-8309"}],"cacheable":true,"seq_num":3,"comment":"","comment_collapsed":true},{"node_id":"-293","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"PMD1\n ","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"-293"}],"output_ports":[{"name":"data","node_id":"-293"}],"cacheable":true,"seq_num":6,"comment":"","comment_collapsed":true},{"node_id":"-149","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"PMD1=clip(normalize(PMACD1),-2,2)\n#PMD2=clip(normalize(PMACD2),-2,2)","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"-149"}],"output_ports":[{"name":"data","node_id":"-149"}],"cacheable":true,"seq_num":11,"comment":"","comment_collapsed":true},{"node_id":"-141","module_id":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","parameters":[{"name":"date_col","value":"date","type":"Literal","bound_global_parameter":null},{"name":"instrument_col","value":"instrument","type":"Literal","bound_global_parameter":null},{"name":"drop_na","value":"False","type":"Literal","bound_global_parameter":null},{"name":"remove_extra_columns","value":"False","type":"Literal","bound_global_parameter":null},{"name":"user_functions","value":"{}","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-141"},{"name":"features","node_id":"-141"}],"output_ports":[{"name":"data","node_id":"-141"}],"cacheable":true,"seq_num":8,"comment":"","comment_collapsed":true}],"node_layout":"<node_postions><node_position Node='-70' Position='261,18,200,200'/><node_position Node='-468' Position='672,143,200,200'/><node_position Node='-492' Position='679,-15,200,200'/><node_position Node='-808' Position='1016,394,200,200'/><node_position Node='-821' Position='719,382,200,200'/><node_position Node='-837' Position='706,250,200,200'/><node_position Node='-487' Position='635,514,200,200'/><node_position Node='-8309' Position='540,738,200,200'/><node_position Node='-293' Position='209,498,200,200'/><node_position Node='-149' Position='377,394,200,200'/><node_position Node='-141' Position='617,642,200,200'/></node_postions>"},"nodes_readonly":false,"studio_version":"v2"}
    In [60]:
    # 本代码由可视化策略环境自动生成 2022年9月3日 22:23
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    from sklearn.preprocessing import StandardScaler
    from sklearn.decomposition import PCA
    
    import pandas as pd
    def m2_run_bigquant_run(input_1):
        #input_1是训练集输入, input_2是测试集输入
        # 示例代码如下。在这里编写您的代码
        df1= input_1.read_df()     #训练集输入
        df=df1[['P1','P2','P3']]
        X_train=df.values.tolist() #训练集数据
        
        
        
        #X = StandardScaler().fit_transform(X)
        
        
        scaler = StandardScaler()  #数据的标准化工具
        #注意fit时我们仅仅对训练集进行。
        scaler.fit(X_train) #数据标准化训练
        X_train = scaler.transform(X_train) #标准化训练数据
           
       #压缩至 2个成分(2个维度)
        pca = PCA(n_components=1)  # PCA工具
        pca.fit(X_train)           #使用训练集对数据进行PCA训练
        principalComponents=pca.transform(X_train)  #对训练数据进行PCA转换
    #将数据化为dataframe,命名
        principalDf = pd.DataFrame(data=principalComponents, columns=['PMACD1'])
        df1.reset_index(drop=True,inplace=True)
        principalDf.reset_index(drop=True,inplace=True)
        finalDf = pd.concat([df1,principalDf], axis = 1)
        finalDf=finalDf[['date','instrument','PMACD1']]
        data_1 = DataSource.write_df(finalDf)   
        
        pca_scaler_dict={'scaler':scaler,'pca':pca}
        data_2=DataSource.write_pickle(pca_scaler_dict)
        return Outputs(data_1=data_1 ,data_2=data_2)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m2_post_run_bigquant_run(outputs):
        return outputs
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m5_run_bigquant_run(input_1):
        # 示例代码如下。在这里编写您的代码
        df=input_1.read_df()
        df.fillna(method='ffill',inplace=True)
        df2=df.dropna()
         
        data_1 = DataSource.write_df(df)
        
        return Outputs(data_1=data_1 )
    True
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m5_post_run_bigquant_run(outputs):
        return outputs
    
    
    m1 = M.input_features.v1(
        features="""P1=ta_macd_macd_12_26_9_0
    P2=ta_macd_macdhist_12_26_9_0 
    P3=ta_macd_macdsignal_12_26_9_0"""
    )
    
    m7 = M.instruments.v2(
        start_date='2020-08-01',
        end_date='2022-08-20',
        market='CN_STOCK_A',
        instrument_list='',
        max_count=0
    )
    
    m4 = M.general_feature_extractor.v7(
        instruments=m7.data,
        features=m1.data,
        start_date='',
        end_date='',
        before_start_days=90
    )
    
    m10 = M.derived_feature_extractor.v3(
        input_data=m4.data,
        features=m1.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions={}
    )
    
    m9 = M.fillna_And_dropna.v2(
        input_1=m10.data
    )
    
    m2 = M.cached.v3(
        input_1=m9.data_1,
        run=m2_run_bigquant_run,
        post_run=m2_post_run_bigquant_run,
        input_ports='input_1',
        params='{}',
        output_ports='data_1,data_2'
    )
    
    m5 = M.cached.v3(
        input_1=m10.data,
        run=m5_run_bigquant_run,
        post_run=m5_post_run_bigquant_run,
        input_ports='input_1',
        params='{}',
        output_ports='data_1'
    )
    
    m6 = M.input_features.v1(
        features="""PMD1
     """
    )
    
    m11 = M.input_features.v1(
        features="""PMD1=clip(normalize(PMACD1),-2,2)
    #PMD2=clip(normalize(PMACD2),-2,2)"""
    )
    
    m8 = M.derived_feature_extractor.v3(
        input_data=m2.data_1,
        features=m11.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=False,
        remove_extra_columns=False,
        user_functions={}
    )
    
    m3 = M.factorlens.v2(
        features=m6.data,
        user_factor_data=m8.data,
        title='因子分析: {factor_name}',
        start_date='2019-01-01',
        end_date='2019-12-31',
        rebalance_period=22,
        delay_rebalance_days=0,
        rebalance_price='close_0',
        stock_pool='全市场',
        quantile_count=5,
        commission_rate=0.0016,
        returns_calculation_method='累乘',
        benchmark='无',
        drop_new_stocks=60,
        drop_price_limit_stocks=True,
        drop_st_stocks=True,
        drop_suspended_stocks=True,
        cutoutliers=False,
        normalization=False,
        neutralization=[],
        metrics=['因子表现概览', '因子分布', '因子行业分布', '因子市值分布', 'IC分析', '买入信号重合分析', '因子估值分析', '因子拥挤度分析', '因子值最大/最小股票', '表达式因子值', '多因子相关性分析'],
        factor_coverage=0.5,
        user_data_merge='left'
    )
    
    ---------------------------------------------------------------------------
    ZeroDivisionError                         Traceback (most recent call last)
    <ipython-input-60-168a8f112cb1> in <module>
        111 )
        112 
    --> 113 m3 = M.factorlens.v2(
        114     features=m6.data,
        115     user_factor_data=m8.data,
    
    ZeroDivisionError: float division
    In [33]:
    df=m9.data_1.read_df()
    df.columns
    
    Out[33]:
    Index(['close_0', 'date', 'industry_sw_level1_0', 'instrument', 'open_0',
           'rank_swing_volatility_10_0', 'rank_swing_volatility_30_0',
           'rank_swing_volatility_5_0', 'rank_volatility_10_0',
           'rank_volatility_30_0', 'rank_volatility_5_0', '_P1', 'P1', '_P2', 'P2',
           '_P3', 'P3', '_P4', 'P4', '_P5', 'P5', '_P6', 'P6', 'F_Return'],
          dtype='object')
    In [35]:
    df.columns[12]
     
    
    Out[35]:
    'P1'
    In [37]:
        i=3
        F_index=i+11
        T1=df.iloc[:,F_index]
        T2=df.iloc[:,-1]
        T3=pd.concat([T1,T2],axis=1)
        s=T3.corr('spearman')
        s
    
    Out[37]:
    P2 F_Return
    P2 1.000000 -0.038027
    F_Return -0.038027 1.000000
    In [15]:
     
    scalerDict=m2.data_2.read_pickle()
    pca=scalerDict['pca']
    pca.explained_variance_ratio_.sum()
    
    Out[15]:
    1.0000000000000222
    In [14]:
    #二维散点图
    import matplotlib.pyplot as plt
    df=m2.data_1.read_df()
    
    plt.figure(figsize = (8, 8))
    plt.xlabel('PB1', )
    plt.ylabel('PB2')
    plt.title("波动因子PCA处理后分布", size=20)
    
    plt.scatter(df.PM1, df.PM2 , c='r', s=2)
     
    
    Out[14]:
    <matplotlib.collections.PathCollection at 0x7f23398015b0>
    In [ ]:
     
    
    In [11]:
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    from sklearn.preprocessing import StandardScaler
    from sklearn.decomposition import PCA
    
    import pandas as pd
    
        # 示例代码如下。在这里编写您的代码
    df1=m9.data_1.read_df()    
    df=df1[['P8','P9','P10','P11','P12','P13','P14','P15']]
    X=df.values.tolist() 
    X = StandardScaler().fit_transform(X)
       #压缩至两个成分(3个维度)
    
        
    
    In [21]:
    from sklearn.preprocessing import StandardScaler
    from sklearn.decomposition import PCA
    
    import pandas as pd
    pca = PCA(n_components=3)
    #
       #对数据进行PCA处理
    principalComponents = pca.fit_transform(X)
    #将数据化为dataframe,命名
    #principalDf = pd.DataFrame(data=principalComponents, columns=['PJ1', 'PJ2','PJ3'])
    print(pca.explained_variance_ratio_.sum())
    
    0.5910563958040396
    
    In [29]:
    from mpl_toolkits.mplot3d import Axes3D
    
    In [5]:
    #三维散点图的草稿
    import seaborn as sns
    df=m9.data_1.read_df()
     
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    # 方式1:设置三维图形模式
    fig = plt.figure() # 创建一个画布figure,然后在这个画布上加各种元素。
    ax = Axes3D(fig) # 将画布作用于 Axes3D 对象上。
    #画三维散点图
    ax.scatter(df.P1,df.P2,df.P3,c='g',marker='*')
    
     
     
     
    # 画二维散点图
    #plt.scatter(df.PJ2, df.PJ3 , c='r', s=2)
    
    Out[5]:
    <mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7f23399b2dc0>
    In [86]:
    df1=m9.data_1.read_df()
    
    In [101]:
    df2=df1[['P4','P8','P9','P10','P12','P15','P18','P21','P22']]
    df2.reset_index(drop=True)
    
    Out[101]:
    P4 P8 P9 P10 P12 P15 P18 P21 P22
    0 0.230210 0.016158 0.145120 0.923573 -66.818604 0.233985 0.128719 0.840237 0.008853
    1 0.229962 0.016339 0.146741 0.933886 -66.818604 0.236598 0.130157 0.849620 0.008952
    2 0.229363 0.016158 0.145120 0.923573 -66.818604 0.233985 0.128719 0.840237 0.008853
    3 0.229901 0.016066 0.144296 0.918327 -66.818604 0.232657 0.127988 0.835465 0.008803
    4 0.229486 0.016364 0.146967 0.935327 -66.818604 0.236963 0.130357 0.850931 0.008966
    ... ... ... ... ... ... ... ... ... ...
    2438226 0.339563 0.011270 0.026493 0.516484 15.540200 0.086067 0.022369 0.010225 0.015190
    2438227 0.339563 0.011270 0.026493 0.516484 15.540200 0.086067 0.022369 0.010225 0.015190
    2438228 0.339563 0.011270 0.026493 0.516484 15.540200 0.086067 0.022369 0.010225 0.015190
    2438229 0.339563 0.011270 0.026493 0.516484 15.540200 0.086067 0.022369 0.010225 0.015190
    2438230 0.339563 0.011270 0.026493 0.516484 15.540200 0.086067 0.022369 0.010225 0.015190

    2438231 rows × 9 columns

    In [102]:
    #PCA降维草稿
    df=df1[['P4','P8','P9','P10','P12','P15','P18','P21','P22']]
    X=df.values.tolist()
    
    from sklearn.preprocessing import StandardScaler
    
    X = StandardScaler().fit_transform(X)
    
    from sklearn.decomposition import PCA
    import pandas as pd
    
    
    #压缩至两个成分(两个维度)
    pca = PCA(n_components=2)
    
    #对数据进行PCA处理
    principalComponents = pca.fit_transform(X)
    
    #将数据化为dataframe,命名
    principalDf = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2'])
    
    #随机抽选出5条数据
    principalDf.sample(5)
    
    #将数据整合到dataframe中
    
    #最终我们将target也整理到dataframe中
    
    
    
    
    
    
    
    #对2D数据可视化
    
    #使用matplotlib对2维数据进行可视化,我们会发现数据能较好的区分开来,说明PCA效果很好的起到了降低维度的效果。
    
    #%matplotlib inline
    
    import matplotlib.pyplot as plt
    
    plt.figure(figsize = (8, 8))
    plt.xlabel('PB1', )
    plt.ylabel('PB2')
    plt.title("波动因子PCA处理后分布", size=20)
    
    plt.scatter(principalDf.PC1, principalDf.PC2 , c='r', s=2)
     
    
    Out[102]:
    <matplotlib.collections.PathCollection at 0x7faa7cc95b80>
    In [115]:
    df1.reset_index(drop=True,inplace=True)
    principalDf.reset_index(drop=True,inplace=True)
    finalDf = pd.concat([df1,principalDf], axis = 1)
    finalDf
    
    Out[115]:
    date fs_account_payable_0 fs_account_receivable_0 fs_cash_equivalents_0 fs_current_liabilities_0 fs_deducted_profit_ttm_0 fs_eqy_belongto_parcomsh_0 fs_gross_profit_margin_ttm_0 fs_net_cash_flow_ttm_0 fs_net_profit_margin_ttm_0 ... P15 P16 P17 P18 P19 P20 P21 P22 PC1 PC2
    0 2020-05-06 2.493776e+11 2.627513e+09 1.732716e+11 1.265385e+12 3.820315e+10 1.886639e+11 35.756901 6.944555e+10 14.799000 ... 0.233985 35.756901 0.635672 0.128719 4.263509 0.583810 0.840237 0.008853 2.132572 -0.603988
    1 2020-05-07 2.493776e+11 2.627513e+09 1.732716e+11 1.265385e+12 3.820315e+10 1.886639e+11 35.756901 6.944555e+10 14.799000 ... 0.236598 35.756901 0.642771 0.130157 4.311118 0.590330 0.849620 0.008952 2.168061 -0.610801
    2 2020-05-08 2.493776e+11 2.627513e+09 1.732716e+11 1.265385e+12 3.820315e+10 1.886639e+11 35.756901 6.944555e+10 14.799000 ... 0.233985 35.756901 0.635672 0.128719 4.263509 0.583810 0.840237 0.008853 2.132628 -0.604162
    3 2020-05-11 2.493776e+11 2.627513e+09 1.732716e+11 1.265385e+12 3.820315e+10 1.886639e+11 35.756901 6.944555e+10 14.799000 ... 0.232657 35.756901 0.632062 0.127988 4.239293 0.580495 0.835465 0.008803 2.114551 -0.600612
    4 2020-05-12 2.493776e+11 2.627513e+09 1.732716e+11 1.265385e+12 3.820315e+10 1.886639e+11 35.756901 6.944555e+10 14.799000 ... 0.236963 35.756901 0.643762 0.130357 4.317768 0.591240 0.850931 0.008966 2.173047 -0.611843
    ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
    2438226 2022-08-15 3.311792e+09 4.920151e+09 8.059961e+10 2.899598e+10 7.245396e+09 1.122484e+11 33.228298 2.787745e+10 35.001801 ... 0.086067 33.228298 0.346550 0.022369 0.089521 0.248839 0.010225 0.015190 -0.333674 -0.318910
    2438227 2022-08-16 3.311792e+09 4.920151e+09 8.059961e+10 2.899598e+10 7.245396e+09 1.122484e+11 33.228298 2.787745e+10 35.001801 ... 0.086067 33.228298 0.346550 0.022369 0.089521 0.248839 0.010225 0.015190 -0.333674 -0.318910
    2438228 2022-08-17 3.311792e+09 4.920151e+09 8.059961e+10 2.899598e+10 7.245396e+09 1.122484e+11 33.228298 2.787745e+10 35.001801 ... 0.086067 33.228298 0.346550 0.022369 0.089521 0.248839 0.010225 0.015190 -0.333674 -0.318910
    2438229 2022-08-18 3.311792e+09 4.920151e+09 8.059961e+10 2.899598e+10 7.245396e+09 1.122484e+11 33.228298 2.787745e+10 35.001801 ... 0.086067 33.228298 0.346550 0.022369 0.089521 0.248839 0.010225 0.015190 -0.333674 -0.318910
    2438230 2022-08-19 3.311792e+09 4.920151e+09 8.059961e+10 2.899598e+10 7.245396e+09 1.122484e+11 33.228298 2.787745e+10 35.001801 ... 0.086067 33.228298 0.346550 0.022369 0.089521 0.248839 0.010225 0.015190 -0.333674 -0.318910

    2438231 rows × 49 columns

    In [80]:
    principalDf
    
    Out[80]:
    PC1 PC2
    0 2.132572 -0.603988
    1 2.168061 -0.610801
    2 2.132628 -0.604162
    3 2.114551 -0.600612
    4 2.173047 -0.611843
    ... ... ...
    2438226 -0.333674 -0.318910
    2438227 -0.333674 -0.318910
    2438228 -0.333674 -0.318910
    2438229 -0.333674 -0.318910
    2438230 -0.333674 -0.318910

    2438231 rows × 2 columns

    In [85]:
    df.reset_index(drop=True)
    df
    
    Out[85]:
    index P4 P8 P9 P10 P12 P15 P18 P21 P22
    0 164 0.230210 0.016158 0.145120 0.923573 -66.818604 0.233985 0.128719 0.840237 0.008853
    1 165 0.229962 0.016339 0.146741 0.933886 -66.818604 0.236598 0.130157 0.849620 0.008952
    2 166 0.229363 0.016158 0.145120 0.923573 -66.818604 0.233985 0.128719 0.840237 0.008853
    3 167 0.229901 0.016066 0.144296 0.918327 -66.818604 0.232657 0.127988 0.835465 0.008803
    4 168 0.229486 0.016364 0.146967 0.935327 -66.818604 0.236963 0.130357 0.850931 0.008966
    ... ... ... ... ... ... ... ... ... ... ...
    2438226 2438390 0.339563 0.011270 0.026493 0.516484 15.540200 0.086067 0.022369 0.010225 0.015190
    2438227 2438391 0.339563 0.011270 0.026493 0.516484 15.540200 0.086067 0.022369 0.010225 0.015190
    2438228 2438392 0.339563 0.011270 0.026493 0.516484 15.540200 0.086067 0.022369 0.010225 0.015190
    2438229 2438393 0.339563 0.011270 0.026493 0.516484 15.540200 0.086067 0.022369 0.010225 0.015190
    2438230 2438394 0.339563 0.011270 0.026493 0.516484 15.540200 0.086067 0.022369 0.010225 0.015190

    2438231 rows × 10 columns

    In [4]:
    ##############################################因子相关性分析模板###############################
    import pandas as pd
    import matplotlib.pyplot as plt  # 可视化
    import seaborn as sns  # 可视化
    df1 = m9.data_1.read_df()
    
    df=df1[['P1', 'P2', 'P3', 'P4', 'P5', 'P6','P7', 'P8', 'P9', 'P10','P11',  'P18', 'P19', 'P20']]
    _,ax = plt.subplots(figsize=(12, 10))  # 分辨率1200×1000
    corr = df.corr(method='spearman')  # 使用皮尔逊系数计算列与列的相关性
    # corr = df.corr(method='kendall')
    # corr = df.corr(method='spearman')
    #  corr =  df.corr(method='pearson') 
    cmap = sns.diverging_palette(220, 10, as_cmap=True)  # 在两种HUSL颜色之间制作不同的调色板。图的正负色彩范围为220、10,结果为真则返回matplotlib的colormap对象
    _ = sns.heatmap(
        corr,  # 使用Pandas DataFrame数据,索引/列信息用于标记列和行
        cmap=cmap,  # 数据值到颜色空间的映射
        square=True,  # 每个单元格都是正方形
        cbar_kws={'shrink': .9},  # `fig.colorbar`的关键字参数
        ax=ax,  # 绘制图的轴
        annot=True,  # 在单元格中标注数据值
        annot_kws={'fontsize': 12})  # 热图,将矩形数据绘制为颜色编码矩阵
    
    plt.show()
    
    In [11]:
    #数据PCA降维 案例
    from sklearn.datasets import load_iris
    
    iris = load_iris()
    
    X = iris.data
    target = iris.target
    
    
     
    
    from sklearn.preprocessing import StandardScaler
    
    X = StandardScaler().fit_transform(X)
    # print(X)
    
    
    
    from sklearn.decomposition import PCA
    import pandas as pd
    
    
    #压缩至两个成分(两个维度)
    pca = PCA(n_components=2)
    
    #对数据进行PCA处理
    principalComponents = pca.fit_transform(X)
    
    #将数据化为dataframe,命名
    principalDf = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2'])
    
    #随机抽选出5条数据
    principalDf.sample(5)
    
    #将数据整合到dataframe中
    
    #最终我们将target也整理到dataframe中
    
    
    #将target转化为datafrmae
    target_df = pd.DataFrame(target, columns=['target'])
    
    #将两个dataframe按照axis=1方向(行)合并
    finalDf = pd.concat([principalDf, target_df], axis = 1) 
    
    
    finalDf.sample(5)
    
    
    #对2D数据可视化
    
    #使用matplotlib对2维数据进行可视化,我们会发现数据能较好的区分开来,说明PCA效果很好的起到了降低维度的效果。
    
    #%matplotlib inline
    
    import matplotlib.pyplot as plt
    
    plt.figure(figsize = (8, 8))
    plt.xlabel('PC1', )
    plt.ylabel('PC2')
    plt.title("2 components's PCA", size=20)
    
    #花的种类
    targets = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
    #在图中对三种花依次标注红绿蓝
    colors = ['r', 'g', 'b']
    
    #将三种花选出来;
    #finalDf[finalDf['target']==0] 挑选出target为0的花
    flower_datas = [finalDf[finalDf['target']==0],
              finalDf[finalDf['target']==1],
              finalDf[finalDf['target']==2]]
    
    for flower_data, color in zip(flower_datas, colors):
        plt.scatter(flower_data.PC1, flower_data.PC2 , c=color, s=50)
        plt.legend(targets)
        plt.grid()
    
    In [33]:
    m4d=m4.data.read_df()
    m4d.fillna(method='ffill',inplace=True)
    
    Out[33]:
    date instrument rank_fs_cash_ratio_0 rank_fs_eps_0 rank_fs_net_profit_qoq_0 rank_fs_operating_revenue_qoq_0 rank_fs_roa_ttm_0 rank_fs_roe_ttm_0
    0 2020-05-06 000001.SZA NaN 0.960813 0.753904 0.908944 NaN 0.632272
    1 2020-05-07 000001.SZA NaN 0.960586 0.753362 0.909042 NaN 0.632300
    2 2020-05-08 000001.SZA NaN 0.960597 0.753159 0.909066 NaN 0.632401
    3 2020-05-11 000001.SZA NaN 0.960597 0.753428 0.909066 NaN 0.632127
    4 2020-05-12 000001.SZA NaN 0.960576 0.753296 0.909018 NaN 0.632300
    ... ... ... ... ... ... ... ... ...
    729283 2022-08-15 873223.BJA 0.904749 0.866456 0.339563 0.815215 0.586064 0.738683
    729284 2022-08-16 873223.BJA 0.904749 0.866456 0.339563 0.815215 0.586064 0.738683
    729285 2022-08-17 873223.BJA 0.904749 0.866456 0.339563 0.815215 0.586064 0.738683
    729286 2022-08-18 873223.BJA 0.904749 0.866456 0.339563 0.815215 0.586064 0.738683
    729287 2022-08-19 873223.BJA 0.904749 0.866456 0.339563 0.815215 0.586064 0.738683

    2438395 rows × 8 columns