1. 基础:数据导入,读取,索引
1.1 数据导入,读取
import pandas as pa
titan=pa.read_csv('./titanic_train911.csv')
titan.head(5)
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
titan.info()
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId 891 non-null int64
Survived 891 non-null int64
Pclass 891 non-null int64
Name 891 non-null object
Sex 891 non-null object
Age 714 non-null float64
SibSp 891 non-null int64
Parch 891 non-null int64
Ticket 891 non-null object
Fare 891 non-null float64
Cabin 204 non-null object
Embarked 889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.6+ KB
titan.columns
Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
dtype='object')
titan.dtypes
PassengerId int64
Survived int64
Pclass int64
Name object
Sex object
Age float64
SibSp int64
Parch int64
Ticket object
Fare float64
Cabin object
Embarked object
dtype: object
titan.values
array([[1, 0, 3, ..., 7.25, nan, 'S'],
[2, 1, 1, ..., 71.2833, 'C85', 'C'],
[3, 1, 3, ..., 7.925, nan, 'S'],
...,
[889, 0, 3, ..., 23.45, nan, 'S'],
[890, 1, 1, ..., 30.0, 'C148', 'C'],
[891, 0, 3, ..., 7.75, nan, 'Q']], dtype=object)
titan.index
RangeIndex(start=0, stop=891, step=1)
titan=titan.set_index('PassengerId')
titan.head(8)
|
Survived |
Pclass |
Age |
SibSp |
Parch |
Ticket |
Cabin |
Embarked |
PassengerId |
|
|
|
|
|
|
|
|
1 |
0 |
3 |
22.0 |
1 |
0 |
A/5 21171 |
NaN |
S |
2 |
1 |
1 |
38.0 |
1 |
0 |
PC 17599 |
C85 |
C |
3 |
1 |
3 |
26.0 |
0 |
0 |
STON/O2. 3101282 |
NaN |
S |
4 |
1 |
1 |
35.0 |
1 |
0 |
113803 |
C123 |
S |
5 |
0 |
3 |
35.0 |
0 |
0 |
373450 |
NaN |
S |
6 |
0 |
3 |
NaN |
0 |
0 |
330877 |
NaN |
Q |
7 |
0 |
1 |
54.0 |
0 |
0 |
17463 |
E46 |
S |
8 |
0 |
3 |
2.0 |
3 |
1 |
349909 |
NaN |
S |
titan.describe()
|
Survived |
Pclass |
Age |
SibSp |
Parch |
count |
891.000000 |
891.000000 |
714.000000 |
891.000000 |
891.000000 |
mean |
0.383838 |
2.308642 |
29.699118 |
0.523008 |
0.381594 |
std |
0.486592 |
0.836071 |
14.526497 |
1.102743 |
0.806057 |
min |
0.000000 |
1.000000 |
0.420000 |
0.000000 |
0.000000 |
25% |
0.000000 |
2.000000 |
20.125000 |
0.000000 |
0.000000 |
50% |
0.000000 |
3.000000 |
28.000000 |
0.000000 |
0.000000 |
75% |
1.000000 |
3.000000 |
38.000000 |
1.000000 |
0.000000 |
max |
1.000000 |
3.000000 |
80.000000 |
8.000000 |
6.000000 |
1.2 索引
titan['Survived'][:8]
PassengerId
1 0
2 1
3 1
4 1
5 0
6 0
7 0
8 0
Name: Survived, dtype: int64
findlist=['Survived','Age']
titan[findlist][:8]
|
Survived |
Age |
PassengerId |
|
|
1 |
0 |
22.0 |
2 |
1 |
38.0 |
3 |
1 |
26.0 |
4 |
1 |
35.0 |
5 |
0 |
35.0 |
6 |
0 |
NaN |
7 |
0 |
54.0 |
8 |
0 |
2.0 |
titan=titan.set_index('Age')
titan.loc[38]
|
Survived |
Pclass |
SibSp |
Parch |
Ticket |
Cabin |
Embarked |
Age |
|
|
|
|
|
|
|
38.0 |
1 |
1 |
1 |
0 |
PC 17599 |
C85 |
C |
38.0 |
1 |
3 |
1 |
5 |
347077 |
NaN |
S |
38.0 |
1 |
1 |
0 |
0 |
113572 |
B28 |
NaN |
38.0 |
0 |
3 |
0 |
0 |
349249 |
NaN |
S |
38.0 |
1 |
1 |
1 |
0 |
19943 |
C93 |
S |
38.0 |
0 |
1 |
0 |
1 |
PC 17582 |
C91 |
S |
38.0 |
0 |
2 |
0 |
0 |
237671 |
NaN |
S |
38.0 |
0 |
3 |
0 |
0 |
SOTON/O.Q. 3101306 |
NaN |
S |
38.0 |
0 |
3 |
0 |
0 |
315089 |
NaN |
S |
38.0 |
1 |
1 |
0 |
0 |
PC 17757 |
C45 |
C |
38.0 |
0 |
1 |
0 |
0 |
19972 |
NaN |
S |
titan.iloc[666]
Survived 0
Pclass 2
SibSp 0
Parch 0
Ticket 234686
Cabin NaN
Embarked S
Name: 25.0, dtype: object
titan.iloc[666:668,2:5]
|
SibSp |
Parch |
Ticket |
Age |
|
|
|
25.0 |
0 |
0 |
234686 |
NaN |
0 |
0 |
312993 |
titan=pa.read_csv('./titanic_train911.csv')
titan.head()
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
titan=titan.set_index('Name')
titan.head()
|
PassengerId |
Survived |
Pclass |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
Name |
|
|
|
|
|
|
|
|
|
|
|
Braund, Mr. Owen Harris |
1 |
0 |
3 |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
Cumings, Mrs. John Bradley (Florence Briggs Thayer) |
2 |
1 |
1 |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
Heikkinen, Miss. Laina |
3 |
1 |
3 |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
4 |
1 |
1 |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
Allen, Mr. William Henry |
5 |
0 |
3 |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
titan[titan['Age']>18][2:8]
|
PassengerId |
Survived |
Pclass |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
Name |
|
|
|
|
|
|
|
|
|
|
|
Heikkinen, Miss. Laina |
3 |
1 |
3 |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
4 |
1 |
1 |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
Allen, Mr. William Henry |
5 |
0 |
3 |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
McCarthy, Mr. Timothy J |
7 |
0 |
1 |
male |
54.0 |
0 |
0 |
17463 |
51.8625 |
E46 |
S |
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) |
9 |
1 |
3 |
female |
27.0 |
0 |
2 |
347742 |
11.1333 |
NaN |
S |
Bonnell, Miss. Elizabeth |
12 |
1 |
1 |
female |
58.0 |
0 |
0 |
113783 |
26.5500 |
C103 |
S |
titan[titan['Age']==18][:8]
|
PassengerId |
Survived |
Pclass |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
Name |
|
|
|
|
|
|
|
|
|
|
|
Vander Planke, Miss. Augusta Maria |
39 |
0 |
3 |
female |
18.0 |
2 |
0 |
345764 |
18.0000 |
NaN |
S |
Arnold-Franchi, Mrs. Josef (Josefine Franchi) |
50 |
0 |
3 |
female |
18.0 |
1 |
0 |
349237 |
17.8000 |
NaN |
S |
Andrew, Mr. Edgardo Samuel |
145 |
0 |
2 |
male |
18.0 |
0 |
0 |
231945 |
11.5000 |
NaN |
S |
Klasen, Mr. Klas Albin |
176 |
0 |
3 |
male |
18.0 |
1 |
1 |
350404 |
7.8542 |
NaN |
S |
Cohen, Mr. Gurshon "Gus" |
205 |
1 |
3 |
male |
18.0 |
0 |
0 |
A/5 3540 |
8.0500 |
NaN |
S |
Fahlstrom, Mr. Arne Jonas |
229 |
0 |
2 |
male |
18.0 |
0 |
0 |
236171 |
13.0000 |
NaN |
S |
Ryerson, Miss. Emily Borie |
312 |
1 |
1 |
female |
18.0 |
2 |
2 |
PC 17608 |
262.3750 |
B57 B59 B63 B66 |
C |
Wiklund, Mr. Jakob Alfred |
372 |
0 |
3 |
male |
18.0 |
1 |
0 |
3101267 |
6.4958 |
NaN |
S |
1.3 排序
titan.groupby('Sex').sum()
|
PassengerId |
Survived |
Pclass |
Age |
SibSp |
Parch |
Fare |
Sex |
|
|
|
|
|
|
|
female |
135343 |
233 |
678 |
7286.00 |
218 |
204 |
13966.6628 |
male |
262043 |
109 |
1379 |
13919.17 |
248 |
136 |
14727.2865 |
titan.groupby('Sex')['Age'].mean()
Sex
female 27.915709
male 30.726645
Name: Age, dtype: float64
2 统计计算
mac=pa.read_csv('macrodata.csv')
mac.head()
|
year |
quarter |
realgdp |
realcons |
realinv |
realgovt |
realdpi |
cpi |
m1 |
tbilrate |
unemp |
pop |
infl |
realint |
0 |
1959.0 |
1.0 |
2710.349 |
1707.4 |
286.898 |
470.045 |
1886.9 |
28.98 |
139.7 |
2.82 |
5.8 |
177.146 |
0.00 |
0.00 |
1 |
1959.0 |
2.0 |
2778.801 |
1733.7 |
310.859 |
481.301 |
1919.7 |
29.15 |
141.7 |
3.08 |
5.1 |
177.830 |
2.34 |
0.74 |
2 |
1959.0 |
3.0 |
2775.488 |
1751.8 |
289.226 |
491.260 |
1916.4 |
29.35 |
140.5 |
3.82 |
5.3 |
178.657 |
2.74 |
1.09 |
3 |
1959.0 |
4.0 |
2785.204 |
1753.7 |
299.356 |
484.052 |
1931.3 |
29.37 |
140.0 |
4.33 |
5.6 |
179.386 |
0.27 |
4.06 |
4 |
1960.0 |
1.0 |
2847.699 |
1770.5 |
331.722 |
462.199 |
1955.5 |
29.54 |
139.6 |
3.50 |
5.2 |
180.007 |
2.31 |
1.19 |
mac['realinv'].sum()
205611.364
mac.sum(axis=0)
year 402727.000
quarter 506.000
realgdp 1465897.896
realcons 979534.500
realinv 205611.364
realgovt 134655.714
realdpi 1078039.800
cpi 21330.385
m1 135589.300
tbilrate 1078.290
unemp 1194.600
pop 48664.003
infl 804.150
realint 271.310
dtype: float64
mac.mean(axis=1)
0 669.717000
1 681.807214
2 681.973643
3 684.259143
4 692.140500
5 691.066643
6 692.083643
7 687.265786
8 690.154643
9 699.882857
10 708.679857
11 719.592571
12 728.865429
13 734.630857
14 740.307857
15 743.272714
16 748.771071
17 754.663357
18 765.761786
19 770.859429
20 783.005000
21 792.656357
22 801.510857
23 804.556643
24 818.246500
25 825.562357
26 841.191571
27 857.239786
28 871.669929
29 873.893786
...
173 2461.527500
174 2469.052643
175 2475.373643
176 2485.468214
177 2513.350214
178 2550.004357
179 2570.842071
180 2588.743786
181 2611.854143
182 2632.526786
183 2659.255071
184 2667.557429
185 2680.025786
186 2699.507571
187 2714.498857
188 2750.110786
189 2761.561000
190 2766.796214
191 2785.227357
192 2794.254500
193 2807.221143
194 2824.258929
195 2828.893143
196 2820.595929
197 2840.309786
198 2816.360214
199 2802.594286
200 2767.597071
201 2774.576071
202 2788.397500
Length: 203, dtype: float64
mac.sum(axis=1)
0 9376.038
1 9545.301
2 9547.631
3 9579.628
4 9689.967
5 9674.933
6 9689.171
7 9621.721
8 9662.165
9 9798.360
10 9921.518
11 10074.296
12 10204.116
13 10284.832
14 10364.310
15 10405.818
16 10482.795
17 10565.287
18 10720.665
19 10792.032
20 10962.070
21 11097.189
22 11221.152
23 11263.793
24 11455.451
25 11557.873
26 11776.682
27 12001.357
28 12203.379
29 12234.513
...
173 34461.385
174 34566.737
175 34655.231
176 34796.555
177 35186.903
178 35700.061
179 35991.789
180 36242.413
181 36565.958
182 36855.375
183 37229.571
184 37345.804
185 37520.361
186 37793.106
187 38002.984
188 38501.551
189 38661.854
190 38735.147
191 38993.183
192 39119.563
193 39301.096
194 39539.625
195 39604.504
196 39488.343
197 39764.337
198 39429.043
199 39236.320
200 38746.359
201 38844.065
202 39037.565
Length: 203, dtype: float64
mac.cov()
|
year |
quarter |
realgdp |
realcons |
realinv |
realgovt |
realdpi |
cpi |
m1 |
tbilrate |
unemp |
pop |
infl |
realint |
year |
215.702580 |
-0.186558 |
4.647762e+04 |
3.329587e+04 |
8.096633e+03 |
1782.201530 |
3.510097e+04 |
889.385028 |
6.510849e+03 |
-7.164087 |
0.914942 |
547.621880 |
-6.729093 |
-0.093007 |
quarter |
-0.186558 |
1.251183 |
2.187058e+01 |
1.421728e+01 |
4.628355e+00 |
1.913510 |
1.622233e+01 |
0.345575 |
2.667532e+00 |
0.035261 |
-0.006797 |
0.348573 |
-0.008926 |
0.065246 |
realgdp |
46477.622176 |
21.870583 |
1.033594e+07 |
7.431573e+06 |
1.839145e+06 |
393386.359633 |
7.783769e+06 |
194353.924345 |
1.432731e+06 |
-2484.948926 |
-301.503958 |
119402.877291 |
-2006.862002 |
-416.061514 |
realcons |
33295.865483 |
14.217276 |
7.431573e+06 |
5.351571e+06 |
1.321452e+06 |
283735.957690 |
5.601336e+06 |
139453.560568 |
1.029089e+06 |
-1906.458928 |
-189.504759 |
85662.398005 |
-1496.161105 |
-367.448608 |
realinv |
8096.633424 |
4.628355 |
1.839145e+06 |
1.321452e+06 |
3.423447e+05 |
65506.893003 |
1.377683e+06 |
33957.136254 |
2.486486e+05 |
-448.762616 |
-158.177573 |
20952.267936 |
-366.256662 |
-72.505352 |
realgovt |
1782.201530 |
1.913510 |
3.933864e+05 |
2.837360e+05 |
6.550689e+04 |
19842.569221 |
2.977704e+05 |
7619.553691 |
5.711414e+04 |
-119.590326 |
-5.058225 |
4539.121687 |
-135.480431 |
18.526286 |
realdpi |
35100.969912 |
16.222333 |
7.783769e+06 |
5.601336e+06 |
1.377683e+06 |
297770.416433 |
5.873430e+06 |
146538.249776 |
1.078988e+06 |
-1834.742182 |
-108.863828 |
90094.189290 |
-1471.848862 |
-315.850267 |
cpi |
889.385028 |
0.345575 |
1.943539e+05 |
1.394536e+05 |
3.395714e+04 |
7619.553691 |
1.465382e+05 |
3755.100856 |
2.759093e+04 |
-40.682744 |
2.210069 |
2268.750456 |
-44.328471 |
4.689722 |
m1 |
6510.849454 |
2.667532 |
1.432731e+06 |
1.029089e+06 |
2.486486e+05 |
57114.142869 |
1.078988e+06 |
27590.930981 |
2.073403e+05 |
-421.744940 |
-10.890270 |
16668.489012 |
-395.866750 |
-18.638259 |
tbilrate |
-7.164087 |
0.035261 |
-2.484949e+03 |
-1.906459e+03 |
-4.487626e+02 |
-119.590326 |
-1.834742e+03 |
-40.682744 |
-4.217449e+02 |
7.857206 |
0.940225 |
-22.802668 |
5.658413 |
2.233145 |
unemp |
0.914942 |
-0.006797 |
-3.015040e+02 |
-1.895048e+02 |
-1.581776e+02 |
-5.058225 |
-1.088638e+02 |
2.210069 |
-1.089027e+01 |
0.940225 |
2.127439 |
0.487778 |
0.308501 |
0.632511 |
pop |
547.621880 |
0.348573 |
1.194029e+05 |
8.566240e+04 |
2.095227e+04 |
4539.121687 |
9.009419e+04 |
2268.750456 |
1.666849e+04 |
-22.802668 |
0.487778 |
1398.045724 |
-19.926368 |
-2.014960 |
infl |
-6.729093 |
-0.008926 |
-2.006862e+03 |
-1.496161e+03 |
-3.662567e+02 |
-135.480431 |
-1.471849e+03 |
-44.328471 |
-3.958667e+02 |
5.658413 |
0.308501 |
-19.926368 |
10.583418 |
-4.870398 |
realint |
-0.093007 |
0.065246 |
-4.160615e+02 |
-3.674486e+02 |
-7.250535e+01 |
18.526286 |
-3.158503e+02 |
4.689722 |
-1.863826e+01 |
2.233145 |
0.632511 |
-2.014960 |
-4.870398 |
7.122486 |
mac.corr()
|
year |
quarter |
realgdp |
realcons |
realinv |
realgovt |
realdpi |
cpi |
m1 |
tbilrate |
unemp |
pop |
infl |
realint |
year |
1.000000 |
-0.011356 |
0.984331 |
0.979991 |
0.942204 |
0.861450 |
0.986156 |
0.988215 |
0.973572 |
-0.174020 |
0.042711 |
0.997223 |
-0.140837 |
-0.002373 |
quarter |
-0.011356 |
1.000000 |
0.006082 |
0.005494 |
0.007072 |
0.012144 |
0.005984 |
0.005042 |
0.005237 |
0.011246 |
-0.004166 |
0.008334 |
-0.002453 |
0.021856 |
realgdp |
0.984331 |
0.006082 |
1.000000 |
0.999229 |
0.977708 |
0.868651 |
0.999008 |
0.986524 |
0.978696 |
-0.275745 |
-0.064297 |
0.993297 |
-0.191880 |
-0.048492 |
realcons |
0.979991 |
0.005494 |
0.999229 |
1.000000 |
0.976290 |
0.870713 |
0.999091 |
0.983735 |
0.976946 |
-0.294004 |
-0.056163 |
0.990350 |
-0.198804 |
-0.059517 |
realinv |
0.942204 |
0.007072 |
0.977708 |
0.976290 |
1.000000 |
0.794797 |
0.971564 |
0.947084 |
0.933281 |
-0.273622 |
-0.185347 |
0.957720 |
-0.192416 |
-0.046433 |
realgovt |
0.861450 |
0.012144 |
0.868651 |
0.870713 |
0.794797 |
1.000000 |
0.872241 |
0.882714 |
0.890436 |
-0.302875 |
-0.024619 |
0.861811 |
-0.295641 |
0.049280 |
realdpi |
0.986156 |
0.005984 |
0.999008 |
0.999091 |
0.971564 |
0.872241 |
1.000000 |
0.986721 |
0.977752 |
-0.270082 |
-0.030797 |
0.994238 |
-0.186683 |
-0.048834 |
cpi |
0.988215 |
0.005042 |
0.986524 |
0.983735 |
0.947084 |
0.882714 |
0.986721 |
1.000000 |
0.988812 |
-0.236846 |
0.024727 |
0.990182 |
-0.222361 |
0.028676 |
m1 |
0.973572 |
0.005237 |
0.978696 |
0.976946 |
0.933281 |
0.890436 |
0.977752 |
0.988812 |
1.000000 |
-0.330426 |
-0.016397 |
0.979025 |
-0.267236 |
-0.015337 |
tbilrate |
-0.174020 |
0.011246 |
-0.275745 |
-0.294004 |
-0.273622 |
-0.302875 |
-0.270082 |
-0.236846 |
-0.330426 |
1.000000 |
0.229969 |
-0.217566 |
0.620508 |
0.298516 |
unemp |
0.042711 |
-0.004166 |
-0.064297 |
-0.056163 |
-0.185347 |
-0.024619 |
-0.030797 |
0.024727 |
-0.016397 |
0.229969 |
1.000000 |
0.008944 |
0.065015 |
0.162489 |
pop |
0.997223 |
0.008334 |
0.993297 |
0.990350 |
0.957720 |
0.861811 |
0.994238 |
0.990182 |
0.979025 |
-0.217566 |
0.008944 |
1.000000 |
-0.163815 |
-0.020192 |
infl |
-0.140837 |
-0.002453 |
-0.191880 |
-0.198804 |
-0.192416 |
-0.295641 |
-0.186683 |
-0.222361 |
-0.267236 |
0.620508 |
0.065015 |
-0.163815 |
1.000000 |
-0.560965 |
realint |
-0.002373 |
0.021856 |
-0.048492 |
-0.059517 |
-0.046433 |
0.049280 |
-0.048834 |
0.028676 |
-0.015337 |
0.298516 |
0.162489 |
-0.020192 |
-0.560965 |
1.000000 |
mac['cpi'].value_counts(ascending = True,bins=4)
(76.388, 123.795] 38
(171.203, 218.61] 38
(123.795, 171.203] 44
(28.788999999999998, 76.388] 83
Name: cpi, dtype: int64
3. 对象操作
3.1 Dataframe结构
import pandas as pa
data = [[1,2,3],[4,5,6]]
index = ['a','b']
columns = ['A','B','C']
df = pa.DataFrame(data=data,index=index,columns = columns)
df
增:df.loc()/pd.concat()(练接两个dataframe)/直接添加;删:df.drop()/del df[]/;改:通过loc和index重新定义;查:iloc/loc/索引;
df.loc['c']=[7,8,9]
df
|
A |
B |
C |
a |
1 |
2 |
3 |
b |
4 |
5 |
6 |
D |
7 |
7 |
7 |
c |
7 |
8 |
9 |
df.drop(['D'],axis=0,inplace=True)
df
|
A |
B |
C |
a |
1 |
2 |
3 |
b |
4 |
5 |
6 |
c |
7 |
8 |
9 |
df.loc['a']['B']=20
df
|
A |
B |
C |
a |
1 |
20 |
3 |
b |
4 |
5 |
6 |
c |
7 |
8 |
9 |
df.loc['b2']
B 5
C 6
D 200
Name: b2, dtype: int64
df.iloc[2]
B 8
C 9
D 300
Name: c3, dtype: int64
df['D']=[100,200,300]
df
|
A |
B |
C |
D |
a |
1 |
20 |
3 |
100 |
b |
4 |
5 |
6 |
200 |
c |
7 |
8 |
9 |
300 |
del df['A']
df
|
B |
C |
D |
a |
20 |
3 |
100 |
b |
5 |
6 |
200 |
c |
8 |
9 |
300 |
df.index=['a1','b2','c3']
df
|
B |
C |
D |
a1 |
20 |
3 |
100 |
b2 |
5 |
6 |
200 |
c3 |
8 |
9 |
300 |
df['D']
a1 100
b2 200
c3 300
Name: D, dtype: int64
data = [[10,20,30],[40,50,60],[70,80,90]]
index = ['j','k','l']
columns = ['B','C','D']
df1 = pa.DataFrame(data=data,index=index,columns = columns)
df1
|
B |
C |
D |
j |
10 |
20 |
30 |
k |
40 |
50 |
60 |
l |
70 |
80 |
90 |
df2=pa.concat([df,df1])
df2
|
B |
C |
D |
a1 |
20 |
3 |
100 |
b2 |
5 |
6 |
200 |
c3 |
8 |
9 |
300 |
j |
10 |
20 |
30 |
k |
40 |
50 |
60 |
l |
70 |
80 |
90 |
df2=pa.concat([df,df1],axis=1)
df2
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.
To accept the future behavior, pass 'sort=False'.
To retain the current behavior and silence the warning, pass 'sort=True'.
"""Entry point for launching an IPython kernel.
|
B |
C |
D |
B |
C |
D |
a1 |
20.0 |
3.0 |
100.0 |
NaN |
NaN |
NaN |
b2 |
5.0 |
6.0 |
200.0 |
NaN |
NaN |
NaN |
c3 |
8.0 |
9.0 |
300.0 |
NaN |
NaN |
NaN |
j |
NaN |
NaN |
NaN |
10.0 |
20.0 |
30.0 |
k |
NaN |
NaN |
NaN |
40.0 |
50.0 |
60.0 |
l |
NaN |
NaN |
NaN |
70.0 |
80.0 |
90.0 |
left = pa.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})
right = pa.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
|
key |
A |
B |
0 |
K0 |
A0 |
B0 |
1 |
K1 |
A1 |
B1 |
2 |
K2 |
A2 |
B2 |
3 |
K3 |
A3 |
B3 |
merge=pa.merge(left,right)
merge
|
key |
A |
B |
C |
D |
0 |
K0 |
A0 |
B0 |
C0 |
D0 |
1 |
K1 |
A1 |
B1 |
C1 |
D1 |
2 |
K2 |
A2 |
B2 |
C2 |
D2 |
3 |
K3 |
A3 |
B3 |
C3 |
D3 |
merge=pa.merge(left,right,on='key')
merge
|
key |
A |
B |
C |
D |
0 |
K0 |
A0 |
B0 |
C0 |
D0 |
1 |
K1 |
A1 |
B1 |
C1 |
D1 |
2 |
K2 |
A2 |
B2 |
C2 |
D2 |
3 |
K3 |
A3 |
B3 |
C3 |
D3 |
3.2 series结构
data = [10,11,12]
index = ['a','b','c']
s = pa.Series(data = data,index = index)
s
a 10
b 11
c 12
dtype: int64
data = [100,110]
index = ['h','k']
s2 = pa.Series(data = data,index = index)
s3=s.append(s2)
s3
a 10
b 11
c 12
h 100
k 110
dtype: int64
s3['L']=900
s3
a 10
b 11
c 12
h 100
k 110
L 900
dtype: int64
del s2['h']
s2
k 110
dtype: int64
s.drop(['b','a'],inplace = True)
s
c 12
dtype: int64
s3.replace(to_replace = 100,value = 101,inplace = True)
s3
a 10
b 11
c 12
h 101
k 110
L 900
dtype: int64
s3.rename(index = {'a':'A'},inplace = True)
s3
A 10
b 11
c 12
h 101
k 110
L 900
dtype: int64
s3.loc['b']
11
s3.iloc[2]