Python训练营打卡DAY5

离散特征的独热编码

读取数据

import pandas as pd
data=pd.read_csv(r"data.scv")

找到所有离散特征

data.columns
for discrete_features in data.columns:
    if data[discrete_features].dtype==object:
        print(discrete_features)

对Home Ownership列进行独热编码

data['Home Ownership']
data['Home Ownership'].value_counts()
data=pd.get_dummies(data,columns=['Home Ownership'])
data['Home Ownership_Have Mortagage']=data ['Home Ownership_Have Mortgage'].astype(int)

采取循环对所有离散特征进行独热编码

data=pd.read_csv(r"data.scv")
discrete_lists=[]
for discrete_features in data.columns:
    if data[discrete_features].dtype==object:
        discrete_lists.append(discrete_features)
data=pd.get_dummies(data,columns=discrete_lists,drop_first=True)
data.columns

对比独热编码前后的列名

data2=pd.read_csv(r"data.scv")
list_final=[]
for i in data.columns:
    if i not in data2.columns:
        list_final.append(i)
list_final
for i in list_final:
    data[i]=data[i].astype(int)
data.head()

填补每一列的缺失值

for i in data.columns:
    if data[i].pd.isnull().sum()>0:
        mean_value=data[i].mean()
        data[i].fillna(mean_value,inplace=True)
data.isnull().sum()

@浙大疏锦行

你可能感兴趣的:(Python打卡训练营内容,python,开发语言)