data cleansing

import numpy as n[]

import pandas as pd

conn=

sql1=select * from table

data=pd.read_sql(sql1,conn)

print(data.describe())

#cleaning missing numbers

data["price"][(data["price"]==0)]=None

for i in data.columns: 

  for j in range(0,len(data)):

    if (data[i].isnull())[j]:

        data[i][j]="36"

         x+=1

print(x)

 

pandas 空值定义为numpy.nan

对整体的series或Dataframe判断是否未空,用isnull()
eg:
pd.isnull(df1) #df1是dataframe变量

对单独的某个值判断,可以用 np.isnan()
eg: np.isnan(df1.ix[0,3]) #对df1的第0行第3列判断

 

 

#print(data_1.isnull())
data_2=data_1.isnull()
print(data_2.head())
for i in data_2.columns:
print(i)
print(data_2[i][1])
for j in range(1,440):
print(data_2[i][j])
if (data_2[i][j]==True) :
pass
else:
#data_1[i][j].astype(int).astype(str))
data_1[i][j]=int(data_1[i][j])
data_1[i][j]=str(data_1[i][j])

转载于:https://www.cnblogs.com/rabbittail/p/7859309.html

你可能感兴趣的:(data cleansing)