数据分析及图形化实现代码
由small_q创建,最终由small_q 被浏览 68 用户
import dai
import bigcharts
import pandas as pd
from bigcharts import opts
market_cap = dai.query("""
SELECT date, instrument, total_market_cap
FROM cn_stock_valuation
WHERE date = '2023-12-07';
""").df()
# 定义你希望保留的数据范围的分位数,比如1%到99%
lower_quantile = market_cap['total_market_cap'].quantile(0.02)
upper_quantile = market_cap['total_market_cap'].quantile(0.98)
# 去除低于下分位数和高于上分位数的数据
market_cap = market_cap[(market_cap['total_market_cap'] >= lower_quantile) & (market_cap['total_market_cap'] <= upper_quantile)]
bins = pd.cut(market_cap['total_market_cap'], bins=10)
# 现在,我们可以计算每个区间的计数
bin_counts = market_cap.groupby(bins)['total_market_cap'].count()
# 准备数据
categories = bin_counts.index.astype(str).tolist() # 将区间转换为字符串列表
values = bin_counts.values.tolist() # 获取每个区间的计数
data = pd.DataFrame({'categories':categories, 'values':values})
bigcharts.Chart(
data=data,
type_="bar",
x='categories',
y=['values']
).render()
\