dfkong=df1.dropna()
dfkong = df1.fillna(value=100)
pd.isnull(df1)
pd.isnull(df1).any()
pd.isnull(df1).any().any
空数据不参与计算
df1.mean()
df1.cumsum()
表1.sub(表2,axis=‘index’)
表.apply(np.cumsum)
表.apply(lambda x : x.max()-x.min())
def _sum(x):
print(type(x))
return x.sum()
print(df.apply(_sum))
s.value_counts()
s.mode
表1.append(表2,ignore_index=True)
表.groupby(‘列’).sum()
pd.merge(表1,表2,on=‘共同列’))
import pandas as pd
import numpy as np
import matplotlib.pyplot as plot
dates = pd.date_range('20200209',periods = 6)
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('abcd'))
print(df)
#重新索引
df1 = df.reindex(index=dates[0:4],columns=list(df.columns) + ['e'])
print(df1)
df1.loc[dates[1:3],'e'] = 2
print(df1)
#处理空数据
##复制
# ##丢掉dropna()
dfkong=df1.dropna()
print('处理空数据')
print(dfkong)
##使用默认值替换
dfkong = df1.fillna(value=100)
print(dfkong)
## 判断
print(pd.isnull(df1))
print(pd.isnull(df1).any())
print(pd.isnull(df1).any().any)
##空数据不参与计算
print(df1.mean())
print(df1.cumsum())
#表格运算
## 减
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('abcd'))
s = pd.Series([1,2,3,np.nan,5,6],index=dates).shift(2)
print('表格运算')
print('减')
print(df)
print(s)
a = df.sub(s,axis='index')
print(a)
##累加
b = df.apply(np.cumsum)
print('累加')
print(b)
c = df.apply(lambda x : x.max()-x.min())
print(c)
def _sum(x):
print(type(x))
return x.sum()
print(df.apply(_sum))
##每个数字有多少个
s = pd.Series([1,2,3,1,np.NaN])
print('每个数字有多少个')
print(s.value_counts())
##最多的数字多少
print('最多的数字多少')
print(s.mode)
#插入
df2=df.append(s,ignore_index=True)
print('插入')
print(df2)
#分组
df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'foo', 'foo'],
'B' : ['one', 'one', 'two', 'three',
'two', 'two', 'one', 'three'],
'C' : np.random.randn(8),
'D' : np.random.randn(8)})
aa= df.groupby('A').sum()
print('分组')
print(aa)
#融合
a = pd.DataFrame({'aa':['1','2'],'bb':['22','33']})
b= pd.DataFrame({'aa':['1','2'],'bb':['55','33']})
print('融合')
print(pd.merge(a,b,on='aa'))
处理空数据