有的时候会经常遇到条件过滤的场景,这个时候可能经常使用isin或者是~来进行一步操作,而不是写条件语句的方式,这样来提高效率和简洁度。
import numpy as np
import pandas as pd
df=pd.DataFrame(np.random.randn(4,4),columns=['A','B','C','D'])
df
Out[189]:
A B C D
0 0.289595 0.202207 -0.850390 0.197016
1 0.403254 -1.287074 0.916361 0.055136
2 -0.359261 -1.266615 -0.733625 -0.790208
3 0.164862 -0.649637 0.716620 1.447703
df['E'] = ['aa', 'bb', 'cc', 'cc']
df
Out[191]:
A B C D E
0 0.289595 0.202207 -0.850390 0.197016 aa
1 0.403254 -1.287074 0.916361 0.055136 bb
2 -0.359261 -1.266615 -0.733625 -0.790208 cc
3 0.164862 -0.649637 0.716620 1.447703 cc
df.E.isin(['aa','cc'])
Out[192]:
0 True
1 False
2 True
3 True
Name: E, dtype: bool
df[df.E.isin(['aa','cc'])]
Out[193]:
A B C D E
0 0.289595 0.202207 -0.850390 0.197016 aa
2 -0.359261 -1.266615 -0.733625 -0.790208 cc
3 0.164862 -0.649637 0.716620 1.447703 cc
df[df.E.isin(['aa'])|df.E.isin(['cc'])]
Out[194]:
A B C D E
0 0.289595 0.202207 -0.850390 0.197016 aa
2 -0.359261 -1.266615 -0.733625 -0.790208 cc
3 0.164862 -0.649637 0.716620 1.447703 cc
df[df.E.isin(['aa'])]
Out[195]:
A B C D E
0 0.289595 0.202207 -0.85039 0.197016 aa
df['D'] = [1,2,3,4]
df[df.isin({'D':[0,3],'E':['aa','cc']})]
Out[200]:
A B C D E
0 NaN NaN NaN NaN aa
1 NaN NaN NaN NaN NaN
2 NaN NaN NaN 3.0 cc
3 NaN NaN NaN NaN cc
df[~(df.E=='cc')]
Out[202]:
A B C D E
0 0.289595 0.202207 -0.850390 1 aa
1 0.403254 -1.287074 0.916361 2 bb