克隆策略
In [39]:
import numpy as np
import pandas as pd
import seaborn as sns
df = DataSource('financial_statement_CN_STOCK_A').read(start_date='2017-01-01',end_date='2019-01-02')
df = df.dropna()
df.head()
Out[39]:
date instrument fs_account_payable fs_account_receivable fs_bps fs_capital_reserves fs_paicl_up_capital fs_cash_ratio fs_construction_in_process fs_deducted_profit ... fs_gross_revenues fs_total_profit fs_undistributed_profit fs_common_equity fs_eps_yoy fs_net_profit_yoy fs_operating_revenue_yoy fs_quarter fs_quarter_year fs_quarter_index
53 2017-02-23 600749.SHA 1.544971e+07 1.103501e+07 2.8530 4.398794e+08 1.891379e+08 0.103494 1.366620e+08 -89887352.0 ... 1.262485e+08 -98192512.0 -95045096.0 5.396186e+08 -1877.031860 -1876.277588 -16.968201 20161231 2016 4
88 2017-02-28 600644.SHA 1.168269e+08 1.686708e+07 2.2686 1.373118e+09 5.384006e+08 0.327066 2.471629e+08 71962240.0 ... 1.905601e+09 256792256.0 -791861376.0 1.221431e+09 83.038208 83.066200 15.147800 20161231 2016 4
110 2017-02-28 000952.SZA 1.430234e+08 9.772068e+07 2.5984 6.079381e+07 2.517055e+08 0.164716 8.226689e+07 143457216.0 ... 7.147240e+08 155638368.0 265709344.0 6.540252e+08 579.518066 577.960205 27.548300 20161231 2016 4
128 2017-02-28 002168.SZA 7.757095e+07 1.150886e+08 1.6402 4.594037e+08 8.257448e+08 0.253697 9.032186e+05 39723848.0 ... 2.886184e+08 80488400.0 426517696.0 1.354351e+09 -50.000000 -43.917400 37.850700 20161231 2016 4
156 2017-03-03 000050.SZA 2.670553e+09 1.966606e+09 9.8201 1.133765e+10 1.401099e+09 1.073765 3.688779e+09 254128928.0 ... 1.073676e+10 693072192.0 938929408.0 1.375896e+10 -17.118956 2.614500 1.963500 20161231 2016 4

5 rows × 63 columns

In [40]:
sns.distplot(df['fs_roe'])
Out[40]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f587dcb70>
In [41]:
sns.jointplot(x='fs_roe', y='fs_roa_ttm', data=df,kind='reg')
Out[41]:
<seaborn.axisgrid.JointGrid at 0x7f3f6417a278>
In [42]:
df_0 = DataSource('west_CN_STOCK_A').read(start_date='2017-01-01',end_date='2019-01-02')
df_0 = df_0.dropna()
sns.pairplot(df_0)  
Out[42]:
<seaborn.axisgrid.PairGrid at 0x7f3f586c5f28>
In [43]:
sns.rugplot(df['fs_roe'])
Out[43]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f56ecb128>
In [44]:
sns.barplot(x='instrument', y='fs_operating_revenue', data=df)
Out[44]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f642e5208>
In [45]:
sns.barplot(x='instrument', y='fs_operating_revenue', data=df, estimator=np.std)
Out[45]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f56b8f240>
In [46]:
sns.countplot(x='date', data=df)
Out[46]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f563c0048>
In [47]:
sns.boxplot(x='fs_quarter_year', y='fs_roe', data=df)
Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f5634da58>
In [48]:
df_1 = df[['fs_quarter_year','fs_roe','fs_quarter_index']]
def winsorize(df, width=3):
    df = df.copy()
    factor_columns = set(df.columns).difference(['date','instrument'])
    for factor in factor_columns:
        mean = df[factor].mean()
        sigma = df[factor].std()
        df[factor] = df[factor].clip(mean-width*sigma,mean+width*sigma)
    return df
df_1 = winsorize(df_1,width=3)
sns.boxplot(x='fs_quarter_year', y='fs_roe', data=df_1)
Out[48]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f56294d68>
In [49]:
sns.boxplot(x='fs_quarter_year', y='fs_roe', data=df_1,hue='fs_quarter_index')
Out[49]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f562af1d0>
In [50]:
sns.violinplot(x='fs_quarter_year', y='fs_roe', data=df_1)
Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f5620ab70>
In [51]:
sns.violinplot(x='fs_quarter_year', y='fs_roe', data=df_1,hue='fs_quarter_index')
Out[51]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f562063c8>
In [52]:
sns.violinplot(x='fs_quarter_year', y='fs_roe', data=df_1,hue='fs_quarter_index')
Out[52]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f561eada0>
In [53]:
sns.stripplot(x='fs_quarter_year', y='fs_roe', data=df_1)
Out[53]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f56138208>
In [54]:
sns.stripplot(x='fs_quarter_year', y='fs_roe', data=df_1,jitter=True)
Out[54]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f56551be0>
In [55]:
sns.stripplot(x='fs_quarter_year', y='fs_roe', data=df_1,jitter=True,hue='fs_quarter_index')
Out[55]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f56153278>
In [56]:
sns.swarmplot(x='fs_quarter_year', y='fs_roe', data=df_1)
Out[56]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f5614f160>
In [58]:
sns.swarmplot(x='fs_quarter_year', y='fs_roe', data=df_1,hue='fs_quarter_index')
Out[58]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f56106828>
In [61]:
sns.violinplot(x='fs_quarter_year', y='fs_roe', data=df_1) 
sns.swarmplot(x='fs_quarter_year', y='fs_roe', data=df_1, color='black')
Out[61]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f5608e400>