import pandas as pd
import numpy as np
data=[['mark',55,'Italy',4.5,'Europe'],
['John',33,'China',3.8,'Asian'],
['mary',40,'Japan',2.3,'Asian']]
df=pd.DataFrame(data=data,columns=['name','age','country','score','continent'],
index=[1001,1002,1003])
df.loc[1001,'score'] = None
df.loc[1004,:]=None
print(df)
df=df.dropna()
注:
pd.dropna()同样不能改动原Dataframe,需要重新赋值
df=df.dropna(how='all')
df=df.isna()
df=df.fillna({'score':df['score'].mean()})
import pandas as pd
import numpy as np
data=[['mark',55,'Italy',4.5,'Europe'],
['John',33,'China',3.8,'Asian'],
['mary',40,'Japan',2.3,'Asian'],
['fiona',35,'China',5.6,'Asian']]
df=pd.DataFrame(data=data,columns=['name','age','country','score','continent'],
index=[1001,1002,1003,1004])
df=df.drop_duplicates(['country','continent'])
print(df)
import pandas as pd
import numpy as np
data=[['mark',55,'Italy',4.5,'Europe'],
['John',33,'China',3.8,'Asian'],
['mary',40,'Japan',2.3,'Asian'],
['fiona',35,'China',5.6,'Asian']]
df=pd.DataFrame(data=data,columns=['name','age','country','score','continent'],
index=[1001,1002,1003,1004])
print(df['country'].is_unique)
df=df['country'].unique()
print(df)
import pandas as pd
import numpy as np
data=[['mark',55,'Italy',4.5,'Europe'],
['John',33,'China',3.8,'Asian'],
['mary',40,'Japan',2.3,'Asian'],
['fiona',35,'China',5.6,'Asian']]
df=pd.DataFrame(data=data,columns=['name','age','country','score','continent'],
index=[1001,1002,1003,1004])
print(df['country'].duplicated(keep=False))
print(df['country'].duplicated(keep='first'))
import pandas as pd
import numpy as np
data=[['mark',55,'Italy',4.5,'Europe'],
['John',33,'China',3.8,'Asian'],
['mary',40,'Japan',2.3,'Asian'],
['fiona',35,'China',5.6,'Asian']]
df=pd.DataFrame(data=data,columns=['name','age','country','score','continent'],
index=[1001,1002,1003,1004])
df=df.loc[df['country'].duplicated(keep=False),:]
print(df)