import pandas as pd
columns = ['name','age','sex']
data = [['wang',15,'male'],
['li',20,'female']]
df = pd.DataFrame(data=data,columns=columns)
df
|
name |
age |
sex |
0 |
wang |
15 |
male |
1 |
li |
20 |
female |
bool 过滤数据
df.sex =='male'
0 True
1 False
Name: sex, dtype: bool
df[df.sex == "male"] = 1
df
|
name |
age |
sex |
0 |
1 |
1 |
1 |
1 |
li |
20 |
female |
df.index = ['student1','student2']
df
|
name |
age |
sex |
student1 |
wang |
15 |
male |
student2 |
li |
20 |
female |
loc 索引
df.loc['student1']
name wang
age 15
sex male
Name: student1, dtype: object
df.loc['student1',['name','sex']]
name wang
sex male
Name: student1, dtype: object
df.loc[['student1'],['name','sex']]
|
name |
sex |
student1 |
wang |
male |
df.loc[['student1','student2'],['name','sex']]
|
name |
sex |
student1 |
wang |
male |
student2 |
li |
female |
iloc 索引
df.iloc[0,[0,2]]
name wang
sex male
Name: student1, dtype: object
df.iloc[[0,1],[0,2]]
|
name |
sex |
student1 |
wang |
male |
student2 |
li |
female |
columns = ['id','age','sex']
data = [[13281,15,1],
[11111,20,0],
[2222,18,1],
[543232,9,0]]
df = pd.DataFrame(data=data,columns=columns)
df.index = ['student1','student2','student3','student4']
df
|
id |
age |
sex |
student1 |
13281 |
15 |
1 |
student2 |
11111 |
20 |
0 |
student3 |
2222 |
18 |
1 |
student4 |
543232 |
9 |
0 |
df.iloc[:2,1:3]
|
age |
sex |
student1 |
15 |
1 |
student2 |
20 |
0 |
数据过滤和筛选
df.iloc[:2,1:3][df.sex == 0]
D:\Anaconda3\envs\data\lib\site-packages\ipykernel_launcher.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
df.loc['student1':'student3',['age','sex']][df.sex==1]
D:\Anaconda3\envs\data\lib\site-packages\ipykernel_launcher.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
"""Entry point for launching an IPython kernel.
|
age |
sex |
student1 |
15 |
1 |
student3 |
18 |
1 |