replace(to_replace=None, value=None, regex=False, inplace=False)
import pandas as pd
import numpy as np
data=pd.DataFrame({'qu1':[1,7,41,3,4],
'qu2':[1,9,4,37,4],
'qu3':[1,12,25,3,37]})
print(data)
data.replace(to_replace=[1,4],value=['replace1','replace4'],inplace=True)
print(data)
out:
qu1 qu2 qu3
0 1 1 1
1 7 9 12
2 41 4 25
3 3 37 3
4 4 4 37
qu1 qu2 qu3
0 replace1 replace1 replace1
1 7 9 12
2 41 replace4 25
3 3 37 3
4 replace4 replace4 37
data1=pd.DataFrame({'qu1':[1,7,41,3,4],
'qu2':[1,9,4,37,4],
'qu3':[1,12,25,3,37]})
print(data1)
data1.replace(to_replace={'qu1':1,'qu2':9},value={'qu1':'replace1','qu2':'replace9'},inplace=True)
print(data1)
out:
qu1 qu2 qu3
0 1 1 1
1 7 9 12
2 41 4 25
3 3 37 3
4 4 4 37
qu1 qu2 qu3
0 replace1 1 1
1 7 replace9 12
2 41 4 25
3 3 37 3
4 4 4 37
import pandas as pd
import numpy as np
df_obj=pd.DataFrame([[1,6.5,3],[4.6,np.nan,2.4],[np.nan,np.nan,3.9],[np.nan
,8.5,np.nan]],columns=['col1','col2','col3'])
print(df_obj)
out:
col1 col2 col3
0 1.0 6.5 3.0
1 4.6 NaN 2.4
2 NaN NaN 3.9
3 NaN 8.5 NaN
print(df_obj.isnull())
out:
col1 col2 col3
0 False False False
1 False True False
2 True True False
3 True False
print(df_obj.notnull())
out:
col1 col2 col3
0 True True True
1 True False True
2 False False True
3 False True False
import pandas as pd
import numpy as np
df_obj=pd.DataFrame([[1,6.5,3],[4.6,np.nan,2.4],[np.nan,np.nan,3.9],[np.nan
,8.5,np.nan]],columns=['col1','col2','col3'])
print(df_obj)
out:
col1 col2 col3
0 1.0 6.5 3.0
1 4.6 NaN 2.4
2 NaN NaN 3.9
3 NaN 8.5 NaN
# any 只要有一个就删除,axis=0 对行进行操作,axis=1,对列进行操作,thresh就是数量要相等 subset是作用范围
print(df_obj.dropna(how='any',axis=0))
out:
col1 col2 col3
0 1.0 6.5 3.0
print(df_obj.dropna(how='all',subset=['col2']))
out:
col1 col2 col3
0 1.0 6.5 3.0
3 NaN 8.5 NaN
print(df_obj.dropna(how='all',thresh=2))
out:
col1 col2 col3
0 1.0 6.5 3.0
1 4.6 NaN 2.4
#只能用于列的填充
print(df_obj.fillna({'col1':9,'col2':8}))
col1 col2 col3
0 1.0 6.5 3.0
1 4.6 8.0 2.4
2 9.0 8.0 3.9
3 9.0 8.5 NaN
# ffill就是从上往下填充,bfill就是从下往上填充
print(df_obj.fillna(method='ffill',limit=1))
col1 col2 col3
0 1.0 6.5 3.0
1 4.6 6.5 2.4
2 4.6 NaN 3.9
3 NaN 8.5 3.
print(df_obj.replace(to_replace=[4.6,3],value=['replace0','5a']))
out:
col1 col2 col3
0 1 6.5 5a
1 replace0 NaN 2.4
2 NaN NaN 3.9
3 NaN 8.5 NaN