# 1. 考察长表变宽表的知识点
df = pd.read_csv('data/drugs.csv').sort_values(['State','COUNTY','SubstanceName'],ignore_index=True)
df_pivot = df.pivot(index=['State','COUNTY','SubstanceName'],columns='YYYY',values='DrugReports')
# 需要重置索引,以符合题目的要求
df_1 = df_pivot.reset_index()
===================================
# 2. 考察melt知识点
df_2 = df_1.melt(id_vars=['State','COUNTY','SubstanceName'], var_name='YYYY',value_name='DrugReports' )
# 按'State','COUNTY','SubstanceName'排序
df_2 = df_2.sort_values(['State','COUNTY','SubstanceName'],ignore_index=True)
# 删掉YYYY空值所在行
df_2.dropna(subset=['DrugReports'],inplace=True)
===================================
# 3.
# pivot_table 这个比较简单
df.pivot_table(index='YYYY', columns='State',values='DrugReports',aggfunc='sum')
# groupby+unstack
df_3 = df.groupby(['YYYY','State'])['DrugReports'].sum()
# groupby多个列名时记得加[]
df_3.unstack()
答案思路
# 1.
res = df.pivot(index=['State','COUNTY','SubstanceName'], columns='YYYY',values='DrugReports').reset_index().rename_axis(columns={
'YYYY':''})
# 我忘记rename_axis了!!
===================================
# 2.
res_melted = res.melt(id_vars = ['State','COUNTY','SubstanceName'], value_vars = res.columns[-8:],var_name = 'YYYY',value_name = 'DrugReports').dropna(subset=['DrugReports'])
# value_vars = res.columns[-8:] 妙啊~~ 这里当时写时并不知道应该写什么。原来如此。
res_melted = res_melted[df.columns].sort_values(['State','COUNTY','SubstanceName'],ignore_index=True).astype({
'YYYY':'int64', 'DrugReports':'int64'})
心得
[]
rename_axis
value_vars = res.columns[-8:]
的写法从功能上看, melt 方法应当属于 wide_to_long 的一种特殊情况,即 stubnames 只有一类。请使用 wide_to_long 生成 melt 一节中的 df_melted 。(提示:对列名增加适当的前缀)
df = pd.DataFrame({
'Class':[1,2],'Name':['San Zhang', 'Si Li'],'Chinese':[80, 90],'Math':[80, 75]})
df_melted = df.melt(id_vars = ['Class', 'Name'],value_vars = ['Chinese', 'Math'], var_name = 'Subject',value_name = 'Grade')
df.columns=['Class','Name','Grade_Chinese','Grade_Math']
df_w_to_l = pd.wide_to_long(df, stubnames='Grade', i=['Class','Name'], j='Subject',sep='_', suffix='.+')
df_w_to_l.reset_index().sort_values('Subject')
写在后面
吓死我了…差一点就以为学不完了,多次压抑住了想"参考"下答案的念头,吊着一口仙气把题独立写完hhh,以后一定一定要有提前量!! = =