参考链接:link
data = {
'Id':[0,1,2,3,4,5,6,7],
'Name':['Alen','Bob','Cidy','Daniel','Ellen','Frankie','Gate','Hebe'],
'Gender':['Male','Male','Female','Male','Female','Male','Male','Female'],
'Age':[18,19,18,20,17,21,20,22],
'Score':[80,90,93,87,96,100,88,98],
#'Timestamp':[1506959142820,1506959172820,1506959056066,1506959086066,1506959088613,1506959118613]
}
df = pd.DataFrame(data)
grouped = df.groupby('Gender')
print(type(grouped))
print(grouped)#
grouped = df.groupby('Gender')
grouped_muti = df.groupby(['Gender', 'Age'])#主要起的作用还是计数的作用,必定会有一列作为计数使用的。
print('===='*10)
print(grouped.size())
print('===='*10)
print(grouped_muti.size())
print('===='*10)
print(grouped.get_group('Female'))#获取指定的东西
print('===='*10)
print(grouped_muti.get_group(('Female', 17)))#使用过get_group之后,数据类型发生了改变。
df = grouped.get_group('Female').reset_index()#这个是索引重新定义
print('===='*10)
print(df)
print('===='*10)#如果使用过max()、count()、std()等,返回的结果是一个DataFrame对象。
print(grouped.count())#将gender作为主要的一列,ID,Name,Age,Score作为次要的列
print('===='*10)
print(grouped.max()[['Age', 'Score']])#只取特定的列
print('===='*10)
print(grouped.max())#默认取出来所有的列
print('===='*10)
print(grouped.mean()[['Age', 'Score']])
输出结果值:
Id Name Gender Age Score
0 0 Alen Male 18 80
1 1 Bob Male 19 90
2 2 Cidy Female 18 93
3 3 Daniel Male 20 87
4 4 Ellen Female 17 96
5 5 Frankie Male 21 100
6 6 Gate Male 20 88
7 7 Hebe Female 22 98
<class 'pandas.core.groupby.generic.DataFrameGroupBy'>
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fba4045a710>
========================================
Gender
Female 3
Male 5
dtype: int64
========================================
Gender Age
Female 17 1
18 1
22 1
Male 18 1
19 1
20 2
21 1
dtype: int64
========================================
Id Name Gender Age Score
2 2 Cidy Female 18 93
4 4 Ellen Female 17 96
7 7 Hebe Female 22 98
========================================
Id Name Gender Age Score
4 4 Ellen Female 17 96
========================================
index Id Name Gender Age Score
0 2 2 Cidy Female 18 93
1 4 4 Ellen Female 17 96
2 7 7 Hebe Female 22 98
========================================
Id Name Age Score
Gender
Female 3 3 3 3
Male 5 5 5 5
========================================
Age Score
Gender
Female 22 98
Male 21 100
========================================
Id Name Age Score
Gender
Female 7 Hebe 22 98
Male 6 Gate 21 100
========================================
Age Score
Gender
Female 19.0 95.666667
Male 19.6 89.000000
Process finished with exit code 0