import numpy as np
import pandas as pd
import seaborn as sns
df = DataSource('financial_statement_CN_STOCK_A').read(start_date='2017-01-01',end_date='2019-01-02')
df = df.dropna()
df.head()
sns.distplot(df['fs_roe'])
sns.jointplot(x='fs_roe', y='fs_roa_ttm', data=df,kind='reg')
df_0 = DataSource('west_CN_STOCK_A').read(start_date='2017-01-01',end_date='2019-01-02')
df_0 = df_0.dropna()
sns.pairplot(df_0)
sns.rugplot(df['fs_roe'])
sns.barplot(x='instrument', y='fs_operating_revenue', data=df)
sns.barplot(x='instrument', y='fs_operating_revenue', data=df, estimator=np.std)
sns.countplot(x='date', data=df)
sns.boxplot(x='fs_quarter_year', y='fs_roe', data=df)
df_1 = df[['fs_quarter_year','fs_roe','fs_quarter_index']]
def winsorize(df, width=3):
df = df.copy()
factor_columns = set(df.columns).difference(['date','instrument'])
for factor in factor_columns:
mean = df[factor].mean()
sigma = df[factor].std()
df[factor] = df[factor].clip(mean-width*sigma,mean+width*sigma)
return df
df_1 = winsorize(df_1,width=3)
sns.boxplot(x='fs_quarter_year', y='fs_roe', data=df_1)
sns.boxplot(x='fs_quarter_year', y='fs_roe', data=df_1,hue='fs_quarter_index')
sns.violinplot(x='fs_quarter_year', y='fs_roe', data=df_1)
sns.violinplot(x='fs_quarter_year', y='fs_roe', data=df_1,hue='fs_quarter_index')
sns.violinplot(x='fs_quarter_year', y='fs_roe', data=df_1,hue='fs_quarter_index')
sns.stripplot(x='fs_quarter_year', y='fs_roe', data=df_1)
sns.stripplot(x='fs_quarter_year', y='fs_roe', data=df_1,jitter=True)
sns.stripplot(x='fs_quarter_year', y='fs_roe', data=df_1,jitter=True,hue='fs_quarter_index')
sns.swarmplot(x='fs_quarter_year', y='fs_roe', data=df_1)
sns.swarmplot(x='fs_quarter_year', y='fs_roe', data=df_1,hue='fs_quarter_index')
sns.violinplot(x='fs_quarter_year', y='fs_roe', data=df_1)
sns.swarmplot(x='fs_quarter_year', y='fs_roe', data=df_1, color='black')