import pandas as pd
df = pd.DataFrame({'key1':list('aabba'),
'key2': ['one','two','one','two','one'],
'data1': ['1','3','5','7','9'],
'data2': ['2','4','6','8','10']})
print(df)
grouped = df.groupby(['key1']).count()
print(grouped)
输出:
data1 data2 key1 key2
0 1 2 a one
1 3 4 a two
2 5 6 b one
3 7 8 b two
4 9 10 a one
data1 data2 key2
key1
a 3 3 3
b 2 2 2
改变groupby的参数as_index,
输入:
import pandas as pd
df = pd.DataFrame({'key1':list('aabba'),
'key2': ['one','two','one','two','one'],
'data1': ['1','3','5','7','9'],
'data2': ['2','4','6','8','10']})
print(df)
grouped = df.groupby(['key1'],as_index = False).count()
print(grouped)
输出(注意标签的位置差异):
data1 data2 key1 key2
0 1 2 a one
1 3 4 a two
2 5 6 b one
3 7 8 b two
4 9 10 a one
key1 data1 data2 key2
0 a 3 3 3
1 b 2 2 2
最终统计的代码:
import pandas as pd
df = pd.DataFrame({'key1':list('aabba'),
'key2': ['one','two','one','two','one'],
'data1': ['1','3','5','7','9'],
'data2': ['2','4','6','8','10']})
print(df)
grouped = df.groupby(['key1'],as_index = False).count()
print(grouped)
group2 = grouped[['key1','data1']]#数据切片操作
group2.columns = ['key1','count']#给group2的index重命名
print(group2)
输出:
data1 data2 key1 key2
0 1 2 a one
1 3 4 a two
2 5 6 b one
3 7 8 b two
4 9 10 a one
key1 data1 data2 key2
0 a 3 3 3
1 b 2 2 2
key1 count
0 a 3
1 b 2
Process finished with exit code 0