接上:对于数据的基本统计分析(统计元素不再解释,基本是均值、方差等计算)
df
num class name sex english sport army math possity space
0 10 1 mary woman 80 80 90 75.0 60 65
1 28 1 land man 80 50 69 70.0 58 70
2 15 2 asnx man 80 69 80 75.0 90 94
3 18 4 david man 90 80 86 85.0 95 62
4 19 2 gry woman 90 50 64 NaN 64 85
5 20 2 kitty woman 84 58 97 94.0 63 21
6 14 3 lury woman 98 77 88 0.0 55 40
7 21 1 facy man 55 68 94 52.0 36 48
>>> df['sport'].describe()
count 8.000000
mean 66.500000
std 12.535663
min 50.000000
25% 56.000000
50% 68.500000
75% 77.750000
max 80.000000
Name: sport, dtype: float64
>>> df.describe()
num class english ... math possity space
count 8.000000 8.000000 8.000000 ... 7.000000 8.000000 8.000000
mean 18.125000 2.000000 82.125000 ... 64.428571 65.125000 60.625000
std 5.383507 1.069045 12.699128 ... 31.245571 19.067081 23.820384
min 10.000000 1.000000 55.000000 ... 0.000000 36.000000 21.000000
25% 14.750000 1.000000 80.000000 ... 61.000000 57.250000 46.000000
50% 18.500000 2.000000 82.000000 ... 75.000000 61.500000 63.500000
75% 20.250000 2.250000 90.000000 ... 80.000000 70.500000 73.750000
max 28.000000 4.000000 98.000000 ... 94.000000 95.000000 94.000000
[8 rows x 8 columns]
>>> df['english'].size
8
>>> df['english'].max()
98
>>> df['english'].min()
55
>>> df['english'].sum()
657
>>> df['english'].mean()
82.125
>>> df['english'].var()
161.26785714285714
>>> df['english'].std()
12.699128204048383
>>> np.mean(df['english'])
82.125
>>> np.average(df['english'])
82.125
>>> df.median()
num 18.5
class 2.0
english 82.0
sport 68.5
army 87.0
math 75.0
possity 61.5
space 63.5
dtype: float64
>>> df.mode()
num class name sex english sport army math possity space
0 10 1.0 asnx man 80.0 50.0 64 75.0 36 21
1 14 2.0 david woman NaN 80.0 69 NaN 55 40
2 15 NaN facy NaN NaN NaN 80 NaN 58 48
3 18 NaN gry NaN NaN NaN 86 NaN 60 62
4 19 NaN kitty NaN NaN NaN 88 NaN 63 65
5 20 NaN land NaN NaN NaN 90 NaN 64 70
6 21 NaN lury NaN NaN NaN 94 NaN 90 85
7 28 NaN mary NaN NaN NaN 97 NaN 95 94
>>> df.mode()
num class name sex english sport army math possity space
0 10 1.0 asnx man 80.0 50.0 64 75.0 36 21
1 14 2.0 david woman NaN 80.0 69 NaN 55 40
2 15 NaN facy NaN NaN NaN 80 NaN 58 48
3 18 NaN gry NaN NaN NaN 86 NaN 60 62
4 19 NaN kitty NaN NaN NaN 88 NaN 63 65
5 20 NaN land NaN NaN NaN 90 NaN 64 70
6 21 NaN lury NaN NaN NaN 94 NaN 90 85
7 28 NaN mary NaN NaN NaN 97 NaN 95 94
>>> df
num class name sex english sport army math possity space
0 10 1 mary woman 80 80 90 75.0 60 65
1 28 1 land man 80 50 69 70.0 58 70
2 15 2 asnx man 80 69 80 75.0 90 94
3 18 4 david man 90 80 86 85.0 95 62
4 19 2 gry woman 90 50 64 NaN 64 85
5 20 2 kitty woman 84 58 97 94.0 63 21
6 14 3 lury woman 98 77 88 0.0 55 40
7 21 1 facy man 55 68 94 52.0 36 48
>>> df.groupby('class')['english','sport','army'].mean()
english sport army
class
1 71.666667 66.0 84.333333
2 84.666667 59.0 80.333333
3 98.000000 77.0 88.000000
4 90.000000 80.0 86.000000
>>> df.groupby(['class','sex'])['english'].agg({'total':np.sum,'number':np.size,'mean':np.mean,'var':np.var})
total number mean var
class sex
1 man 135 2 67.5 312.5
woman 80 1 80.0 NaN
2 man 80 1 80.0 NaN
woman 174 2 87.0 18.0
3 woman 98 1 98.0 NaN
4 man 90 1 90.0 NaN
>>> #建立透视表
>>> df.pivot_table(index=['class','name'])
army english math num possity space sport
class name
1 facy 94 55 52.0 21 36 48 68
land 69 80 70.0 28 58 70 50
mary 90 80 75.0 10 60 65 80
2 asnx 80 80 75.0 15 90 94 69
gry 64 90 NaN 19 64 85 50
kitty 97 84 94.0 20 63 21 58
3 lury 88 98 0.0 14 55 40 77
4 david 86 90 85.0 18 95 62 80
>>> df
num class name sex english sport army math possity space
0 10 1 mary woman 80 80 90 75.0 60 65
1 28 1 land man 80 50 69 70.0 58 70
2 15 2 asnx man 80 69 80 75.0 90 94
3 18 4 david man 90 80 86 85.0 95 62
4 19 2 gry woman 90 50 64 NaN 64 85
5 20 2 kitty woman 84 58 97 94.0 63 21
6 14 3 lury woman 98 77 88 0.0 55 40
7 21 1 facy man 55 68 94 52.0 36 48
#相关系数
>>> df['english'].corr(df['sport'])
0.0785215353368861
>>> df['english'].corr(df['army'])
-0.28518424251841296
>>> df.loc[:,['english','sport','army','math','possity','space']].corr()
english sport army math possity space
english 1.000000 0.078522 -0.285184 -0.210888 0.486667 0.020484
sport 0.078522 1.000000 0.604026 -0.275197 0.239372 -0.140894
army -0.285184 0.604026 1.000000 -0.010708 -0.191855 -0.744345
math -0.210888 -0.275197 -0.010708 1.000000 0.449533 0.180691
possity 0.486667 0.239372 -0.191855 0.449533 1.000000 0.445185
space 0.020484 -0.140894 -0.744345 0.180691 0.445185 1.000000