import pandas as pd
score={'student':['LiLei','HanMeiMei','David','Mary','oldwang'],'chinese':[90,65,72,38,42],'math':[68,32,75,60,92],
'english':[63,75,82,74,91],'physics':[93,32,71,24,85]}
data=pd.DataFrame(score)
print(data)
out:
student chinese math english physics
0 LiLei 90 68 63 93
1 HanMeiMei 65 32 75 32
2 David 72 75 82 71
3 Mary 38 60 74 24
4 oldwang 42 92 91 85
#筛选数学大于60分且物理大于80分
data1=data.loc[(data.math>60)&(data.physics>80)]
print(data1)
out:
student chinese math english physics
0 LiLei 90 68 63 93
4 oldwang 42 92 91 85
#筛选语文大于80分或者物理低于60分
data2=data.loc[(data.chinese>80)|(data.physics<60)]
print(data2)
out:
student chinese math english physics
0 LiLei 90 68 63 93
1 HanMeiMei 65 32 75 32
3 Mary 38 60 74 24
#筛选数学高于平均分,并且按照分数从高到低排列
data3=data[data['math']>data['math'].mean()].sort_values(by='math',ascending=False)
print(data3)
out:
student chinese math english physics
4 oldwang 42 92 91 85
2 David 72 75 82 71
0 LiLei 90 68 63 93
import pandas as pd
score={'student':['LiLei','HanMeiMei','David','Mary','oldwang'],'chinese':[90,65,72,38,42],'math':[68,32,75,60,92],
'english':[63,75,82,74,91],'physics':[93,32,71,24,85],'label':['A','C','B','C','A']}
data=pd.DataFrame(score)
print(data)
out:
student chinese math english physics label
0 LiLei 90 68 63 93 A
1 HanMeiMei 65 32 75 32 C
2 David 72 75 82 71 B
3 Mary 38 60 74 24 C
4 oldwang 42 92 91 85 A
#筛选label为B或者C
data4=data.loc[data['label'].isin(['B','C']),:]
print(data4)
out:
student chinese math english physics label
1 HanMeiMei 65 32 75 32 C
2 David 72 75 82 71 B
3 Mary 38 60 74 24 C
#当然也可以加取反~,查找不在B和C的
data5=data.loc[~data['label'].isin(['B','C']),:]
print(data5)
out:
student chinese math english physics label
0 LiLei 90 68 63 93 A
4 oldwang 42 92 91 85 A
pandas可以用str.contains来模糊筛选,有点像SQL中的like
#查找姓名中包含wang的同学
data6=data.loc[data['student'].str.contains('wang'),:]
print(data6)
out:
student chinese math english physics label
4 oldwang 42 92 91 85 A
#查找语文大于70
data7=data.query('chinese>70')
print(data7)
out:
student chinese math english physics label
0 LiLei 90 68 63 93 A
2 David 72 75 82 71 B
# data.query('chinese>70')==data[data.chinese>70]
filter不筛选具体的数据,而是筛选特定的行和列。它支持三种筛选模式:
items:固定列名
regex:正则表达式
like:模糊查询
#筛选语文和英语列
data8=data.filter(['chinese','english'])
print(data8)
out:
chinese english
0 90 63
1 65 75
2 72 82
3 38 74
4 42 91
#筛选列名中包含a的
data9=data.filter(regex='a',axis=1)
print(data9)
out:
math label
0 68 A
1 32 C
2 75 B
3 60 C
4 92 A
#筛选索引中包含2的
data10=data.filter(like='2',axis=0)
print(data10)
out:
student chinese math english physics label
2 David 72 75 82 71 B