series
series的创建
import pandas as pd
import numpy as np
pd.Series(np.arange(9))
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
dtype: int32
pd.Series([11,22,33,44],index=[1,2,3,4])
1 11
2 22
3 33
4 44
dtype: int64
color_count = pd.Series({
"red":10,"green":20,"blue":140})
color_count
red 10
green 20
blue 140
dtype: int64
series的属性
color_count.index
Index(['red', 'green', 'blue'], dtype='object')
color_count.values
array([ 10, 20, 140], dtype=int64)
color_count[0]
10
color_count[1]
20
DataFrame
DataFrame创建
pd.DataFrame(np.random.randn(2,3))
|
0 |
1 |
2 |
0 |
-1.691948 |
-1.686023 |
-1.712956 |
1 |
0.942306 |
-0.128536 |
-0.652079 |
score = np.random.randint(40,100,(10,5))
score
array([[71, 82, 53, 41, 66],
[93, 95, 85, 46, 55],
[47, 94, 99, 70, 91],
[64, 77, 78, 84, 58],
[54, 72, 91, 75, 51],
[60, 95, 88, 67, 78],
[99, 62, 75, 67, 95],
[69, 83, 65, 42, 75],
[97, 99, 89, 77, 44],
[75, 46, 87, 60, 83]])
score_df = pd.DataFrame(score)
score_df
|
0 |
1 |
2 |
3 |
4 |
0 |
71 |
82 |
53 |
41 |
66 |
1 |
93 |
95 |
85 |
46 |
55 |
2 |
47 |
94 |
99 |
70 |
91 |
3 |
64 |
77 |
78 |
84 |
58 |
4 |
54 |
72 |
91 |
75 |
51 |
5 |
60 |
95 |
88 |
67 |
78 |
6 |
99 |
62 |
75 |
67 |
95 |
7 |
69 |
83 |
65 |
42 |
75 |
8 |
97 |
99 |
89 |
77 |
44 |
9 |
75 |
46 |
87 |
60 |
83 |
subjects=["语文","数学","英语","政治","体育"]
stu = ["student" + str(i) for i in range(score_df.shape[0])]
data = pd.DataFrame(score, columns=subjects, index = stu)
stu
['student0',
'student1',
'student2',
'student3',
'student4',
'student5',
'student6',
'student7',
'student8',
'student9']
data
|
语文 |
数学 |
英语 |
政治 |
体育 |
student0 |
71 |
82 |
53 |
41 |
66 |
student1 |
93 |
95 |
85 |
46 |
55 |
student2 |
47 |
94 |
99 |
70 |
91 |
student3 |
64 |
77 |
78 |
84 |
58 |
student4 |
54 |
72 |
91 |
75 |
51 |
student5 |
60 |
95 |
88 |
67 |
78 |
student6 |
99 |
62 |
75 |
67 |
95 |
student7 |
69 |
83 |
65 |
42 |
75 |
student8 |
97 |
99 |
89 |
77 |
44 |
student9 |
75 |
46 |
87 |
60 |
83 |
DataFrame的属性
data.shape
(10, 5)
data.index
Index(['student0', 'student1', 'student2', 'student3', 'student4', 'student5',
'student6', 'student7', 'student8', 'student9'],
dtype='object')
data.values
array([[71, 82, 53, 41, 66],
[93, 95, 85, 46, 55],
[47, 94, 99, 70, 91],
[64, 77, 78, 84, 58],
[54, 72, 91, 75, 51],
[60, 95, 88, 67, 78],
[99, 62, 75, 67, 95],
[69, 83, 65, 42, 75],
[97, 99, 89, 77, 44],
[75, 46, 87, 60, 83]])
data.T
|
student0 |
student1 |
student2 |
student3 |
student4 |
student5 |
student6 |
student7 |
student8 |
student9 |
语文 |
71 |
93 |
47 |
64 |
54 |
60 |
99 |
69 |
97 |
75 |
数学 |
82 |
95 |
94 |
77 |
72 |
95 |
62 |
83 |
99 |
46 |
英语 |
53 |
85 |
99 |
78 |
91 |
88 |
75 |
65 |
89 |
87 |
政治 |
41 |
46 |
70 |
84 |
75 |
67 |
67 |
42 |
77 |
60 |
体育 |
66 |
55 |
91 |
58 |
51 |
78 |
95 |
75 |
44 |
83 |
data.head()
|
语文 |
数学 |
英语 |
政治 |
体育 |
student0 |
71 |
82 |
53 |
41 |
66 |
student1 |
93 |
95 |
85 |
46 |
55 |
student2 |
47 |
94 |
99 |
70 |
91 |
student3 |
64 |
77 |
78 |
84 |
58 |
student4 |
54 |
72 |
91 |
75 |
51 |
data.tail()
|
语文 |
数学 |
英语 |
政治 |
体育 |
student5 |
60 |
95 |
88 |
67 |
78 |
student6 |
99 |
62 |
75 |
67 |
95 |
student7 |
69 |
83 |
65 |
42 |
75 |
student8 |
97 |
99 |
89 |
77 |
44 |
student9 |
75 |
46 |
87 |
60 |
83 |
DataFrame索引值的设置
stu = ["同学_"+ str(i) for i in range(score_df.shape[0])]
data.index = stu
data
|
语文 |
数学 |
英语 |
政治 |
体育 |
同学_0 |
71 |
82 |
53 |
41 |
66 |
同学_1 |
93 |
95 |
85 |
46 |
55 |
同学_2 |
47 |
94 |
99 |
70 |
91 |
同学_3 |
64 |
77 |
78 |
84 |
58 |
同学_4 |
54 |
72 |
91 |
75 |
51 |
同学_5 |
60 |
95 |
88 |
67 |
78 |
同学_6 |
99 |
62 |
75 |
67 |
95 |
同学_7 |
69 |
83 |
65 |
42 |
75 |
同学_8 |
97 |
99 |
89 |
77 |
44 |
同学_9 |
75 |
46 |
87 |
60 |
83 |
data.reset_index()
|
index |
语文 |
数学 |
英语 |
政治 |
体育 |
0 |
同学_0 |
71 |
82 |
53 |
41 |
66 |
1 |
同学_1 |
93 |
95 |
85 |
46 |
55 |
2 |
同学_2 |
47 |
94 |
99 |
70 |
91 |
3 |
同学_3 |
64 |
77 |
78 |
84 |
58 |
4 |
同学_4 |
54 |
72 |
91 |
75 |
51 |
5 |
同学_5 |
60 |
95 |
88 |
67 |
78 |
6 |
同学_6 |
99 |
62 |
75 |
67 |
95 |
7 |
同学_7 |
69 |
83 |
65 |
42 |
75 |
8 |
同学_8 |
97 |
99 |
89 |
77 |
44 |
9 |
同学_9 |
75 |
46 |
87 |
60 |
83 |
data.reset_index(drop=True)
|
语文 |
数学 |
英语 |
政治 |
体育 |
0 |
71 |
82 |
53 |
41 |
66 |
1 |
93 |
95 |
85 |
46 |
55 |
2 |
47 |
94 |
99 |
70 |
91 |
3 |
64 |
77 |
78 |
84 |
58 |
4 |
54 |
72 |
91 |
75 |
51 |
5 |
60 |
95 |
88 |
67 |
78 |
6 |
99 |
62 |
75 |
67 |
95 |
7 |
69 |
83 |
65 |
42 |
75 |
8 |
97 |
99 |
89 |
77 |
44 |
9 |
75 |
46 |
87 |
60 |
83 |
df = pd.DataFrame({
'month': [1, 4, 7, 10],
'year': [2012, 2014, 2013, 2014],
'sale':[55, 40, 84, 31]})
df.set_index("year")
|
month |
sale |
year |
|
|
2012 |
1 |
55 |
2014 |
4 |
40 |
2013 |
7 |
84 |
2014 |
10 |
31 |
multiindex与panel
略