我正在制作标准化考试的熟练程度分组条形图。以下是我的代码:
bush_prof_boy = bush.groupby(['BOY Prof'])['BOY Prof'].count()
bush_prof_pct_boy = bush_prof_boy/bush['BOY Prof'].count() * 100
bush_prof_eoy = bush.groupby(['EOY Prof'])['EOY Prof'].count()
bush_prof_pct_eoy = bush_prof_eoy/bush['EOY Prof'].count() * 100
labels = ['Remedial', 'Below Proficient', 'Proficient', 'Advanced']
fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, bush_prof_pct_boy, width, label='BOY',
color='mediumorchid')
rects2 = ax.bar(x + width/2, bush_prof_pct_eoy, width, label='EOY', color='teal')
ax.set_ylabel('% of Students at Proficiency Level', fontsize=18)
ax.set_title('Bushwick Middle Change in Proficiency Levels', fontsize=25)
ax.set_xticks(x)
ax.set_xticklabels(labels, fontsize=25)
ax.legend(fontsize=25)
plt.yticks(fontsize=15)
plt.figure(figsize=(5,15))
plt.show()
&BOY";代表&年初和&年终,因此条形图旨在显示在年初和年底落入各个熟练程度级别的学生的百分率。?这张图看起来不错,但当我深入到数字中时,我可以看到eoy的标签是不正确的。这是我的图表:
男孩的百分比绘制正确,但eoy的百分比标签错误。以下是实际的百分比,我确信它们是正确的:
BOY %
Advanced 14.0
Below Proficient 38.0
Proficient 34.0
Remedial 14.0
EOY %
Advanced 39.0
Below Proficient 18.0
Proficient 32.0
Remedial 11.0
.cut
.melt
将Dataframe转换为长格式,然后使用.groupby
计算'x of Year'
.pivot
整形,用pandas.DataFrame.plot
绘图python 3.8
、pandas 1.3.1
和matplotlib 3.4.2
中测试import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np
# data
data = {'BOY': [11.0, 11.0, 11.0, 11.0, 11.0, 8.0, 11.0, 14.0, 12.0, 13.0, 11.0, 14.0, 10.0, 9.0, 10.0, 10.0, 10.0, 12.0, 12.0, 13.0, 12.0, 11.0, 9.0, 12.0, 16.0, 12.0, 12.0, 12.0, 15.0, 10.0, 10.0, 10.0, 8.0, 11.0, 12.0, 14.0, 10.0, 8.0, 11.0, 12.0, 14.0, 12.0, 13.0, 15.0, 13.0, 8.0, 8.0, 11.0, 10.0, 11.0, 13.0, 11.0, 13.0, 15.0, 10.0, 8.0, 10.0, 9.0, 8.0, 11.0, 13.0, 11.0, 8.0, 11.0, 15.0, 11.0, 12.0, 17.0, 12.0, 11.0, 18.0, 14.0, 15.0, 16.0, 7.0, 11.0, 15.0, 16.0, 13.0, 13.0, 13.0, 0.0, 11.0, 15.0, 14.0, 11.0, 13.0, 16.0, 14.0, 12.0, 8.0, 13.0, 13.0, 14.0, 7.0, 10.0, 16.0, 10.0, 13.0, 10.0, 14.0, 8.0, 16.0, 13.0, 12.0, 14.0, 12.0, 14.0, 16.0, 15.0, 13.0, 13.0, 10.0, 14.0, 8.0, 10.0, 10.0, 11.0, 12.0, 10.0, 12.0, 14.0, 17.0, 13.0, 14.0, 16.0, 15.0, 13.0, 16.0, 9.0, 16.0, 15.0, 11.0, 11.0, 15.0, 14.0, 12.0, 15.0, 11.0, 16.0, 14.0, 14.0, 15.0, 14.0, 14.0, 14.0, 16.0, 15.0, 12.0, 12.0, 14.0, 15.0, 13.0, 14.0, 13.0, 17.0, 14.0, 13.0, 14.0, 13.0, 13.0, 12.0, 10.0, 15.0, 14.0, 12.0, 12.0, 14.0, 12.0, 14.0, 13.0, 15.0, 13.0, 14.0, 14.0, 12.0, 11.0, 15.0, 14.0, 14.0, 10.0], 'EOY': [16.0, 16.0, 16.0, 14.0, 10.0, 14.0, 16.0, 14.0, 15.0, 15.0, 15.0, 11.0, 11.0, 15.0, 10.0, 14.0, 17.0, 14.0, 9.0, 15.0, 14.0, 16.0, 14.0, 13.0, 11.0, 13.0, 12.0, 14.0, 15.0, 13.0, 14.0, 15.0, 12.0, 19.0, 9.0, 13.0, 11.0, 14.0, 17.0, 17.0, 14.0, 13.0, 14.0, 10.0, 16.0, 15.0, 12.0, 11.0, 12.0, 14.0, 15.0, 10.0, 15.0, 14.0, 14.0, 15.0, 18.0, 15.0, 10.0, 10.0, 15.0, 15.0, 13.0, 15.0, 19.0, 13.0, 18.0, 20.0, 21.0, 17.0, 18.0, 17.0, 18.0, 17.0, 12.0, 16.0, 15.0, 18.0, 19.0, 17.0, 20.0, 11.0, 18.0, 19.0, 11.0, 12.0, 17.0, 20.0, 17.0, 15.0, 13.0, 18.0, 14.0, 17.0, 12.0, 12.0, 16.0, 12.0, 14.0, 15.0, 14.0, 10.0, 20.0, 13.0, 18.0, 20.0, 11.0, 20.0, 17.0, 20.0, 13.0, 17.0, 15.0, 18.0, 14.0, 13.0, 13.0, 18.0, 10.0, 13.0, 12.0, 18.0, 20.0, 20.0, 16.0, 18.0, 15.0, 20.0, 22.0, 18.0, 21.0, 18.0, 18.0, 18.0, 17.0, 16.0, 19.0, 16.0, 20.0, 19.0, 19.0, 20.0, 20.0, 14.0, 18.0, 20.0, 20.0, 18.0, 16.0, 21.0, 20.0, 18.0, 15.0, 14.0, 17.0, 19.0, 21.0, 14.0, 18.0, 15.0, 18.0, 21.0, 19.0, 17.0, 16.0, 16.0, 15.0, 20.0, 19.0, 16.0, 21.0, 17.0, 19.0, 15.0, 18.0, 20.0, 18.0, 20.0, 18.0, 16.0, 16.0]}
df = pd.DataFrame(data)
# replace numbers with categorical labels; could also create new columns
labels = ['Remedial', 'Below Proficient', 'Proficient', 'Advanced']
bins = [1, 11, 13, 15, np.inf]
df['BOY'] = pd.cut(x=df.BOY, labels=labels, bins=bins, right=True)
df['EOY'] = pd.cut(x=df.EOY, labels=labels, bins=bins, right=True)
# melt the relevant columns into a long form
dfm = df.melt(var_name='Tested', value_name='Proficiency')
# set the categorical label order, which makes the xaxis labels print in the specific order
dfm['Proficiency'] = pd.Categorical(dfm['Proficiency'], labels, ordered=True)
# groupby and get the value counts
dfg = dfm.groupby('Tested')['Proficiency'].value_counts().reset_index(level=1, name='Size').rename({'level_1': 'Proficiency'}, axis=1)
# divide by the Tested value counts to get the percent
dfg['percent'] = dfg['Size'].div(dfm.Tested.value_counts()).mul(100).round(1)
# reshape to plot
dfp = dfg.reset_index().pivot(index='Proficiency', columns='Tested', values='percent')
# display(dfp)
Tested BOY EOY
Proficiency
Remedial 34.8 9.9
Below Proficient 28.7 12.7
Proficient 27.1 25.4
Advanced 8.8 51.9
ax = dfp.plot(kind='bar', figsize=(15, 5), rot=0, color=['orchid', 'teal'])
# formatting
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
ax.set_ylabel('Students at Proficiency Level', fontsize=18)
ax.set_xlabel('')
ax.set_title('Bushwick Middle Change in Proficiency Levels', fontsize=25)
ax.set_xticklabels(ax.get_xticklabels(), fontsize=25)
ax.legend(fontsize=25)
_ = plt.yticks(fontsize=15)
# add bar labels
for p in ax.containers:
ax.bar_label(p, fmt='%.1f%%', label_type='edge', fontsize=12)
# pad the spacing between the number and the edge of the figure
ax.margins(y=0.2)
dfp
这篇关于如何按正确顺序绘制分组条形图的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持跟版网!