pivot_table(self, values=None, index=None, columns=None,aggfunc='mean',
fill_value=None, margins=False,dropna=True, margins_name='All'):
'''
values:需要聚合的列名,默认所有数值的列
index:在结果透视表的行上进行分组的列名分组键
columns:在结果透视表的列上进行分组的列名分组键
aggfunc:聚合函数或函数列表,默认为mean
fill_value:在结果表中替换缺失值的值
margins:添加行列总计(与aggfunc对应)
dropna:是否除去均为nan的列
margins_name:与margins对应
'''
tips=pd.read_csv('tips.csv')
#下列两组效果一样
result1=tips.pivot_table(index=['day','smoker'])
tips.groupby(by=['day','smoker']).agg('mean')
print(result1)
'''
size tip total_bill
day smoker
Fri No 2.250000 2.812500 18.420000
Yes 2.066667 2.714000 16.813333
Sat No 2.555556 3.102889 19.661778
Yes 2.476190 2.875476 21.276667
Sun No 2.929825 3.167895 20.506667
Yes 2.578947 3.516842 24.120000
Thur No 2.488889 2.673778 17.113111
Yes 2.352941 3.030000 19.190588
'''
result1=tips.pivot_table(values=['tip','size'],index=['day','time'],
columns='smoker',margins=True,margins_name='all_mean')
'''
size tip
smoker No Yes all_mean No Yes all_mean
day time
Fri Dinner 2.000000 2.222222 2.166667 2.750000 3.003333 2.940000
Lunch 3.000000 1.833333 2.000000 3.000000 2.280000 2.382857
Sat Dinner 2.555556 2.476190 2.517241 3.102889 2.875476 2.993103
Sun Dinner 2.929825 2.578947 2.842105 3.167895 3.516842 3.255132
Thur Dinner 2.000000 NaN 2.000000 3.000000 NaN 3.000000
Lunch 2.500000 2.352941 2.459016 2.666364 3.030000 2.767705
all_mean 2.668874 2.408602 2.569672 2.991854 3.008710 2.998279
'''
交叉表是数据透视表的特殊情况,计算的是分组的频率
def crosstab(index, columns, values=None, rownames=None, colnames=None,aggfunc=None,
margins=False, margins_name='All', dropna=True,normalize=False):
'''
index:在结果透视表的行上进行分组的列名分组键
columns:在结果透视表的列上进行分组的列名分组键
values:需要聚合的列名,默认所有数值的列
rownames:给index起名
colnames:给columns起名
aggfunc:聚合函数或函数列表,默认计算频率表
margins:添加行列总计(与aggfunc对应)
margins_name:与margins对应
dropna:是否除去均为nan的列
'''
result=pd.crosstab(index=[tips['time'],tips['day']],columns=tips['smoker'],margins=True)
'''
smoker No Yes All
time day
Dinner Fri 3 9 12
Sat 45 42 87
Sun 57 19 76
Thur 1 0 1
Lunch Fri 1 6 7
Thur 44 17 61
All 151 93 244
'''