数据来源:https://forge.educoder.net/attachments/download/376915/input.zip
train.isnull().sum().sort_values(ascending = False)
def solve_nan_str(data, columns):
for column in columns:
if data[column].dtype != 'float64': # 字符串数据
data[column] = data[column].fillna(0) # 缺失值补0
unique_list = data[column].unique().tolist() # 字符串列表
if 0 in unique_list:
unique_list.remove(0)
for index, unique_ in enumerate(unique_list): # 字符串Label
data.loc[data[column] == unique_, column] = index
else:
data[column] = data[column].fillna(data[column].mean())
return data