python DataFrame onehot编码,一列变多列

import pandas as pd
carria_code_dict={"CZ":0,"SC":1,"Y8":2,"3U":3,"MU":4,"NS":5}
carria_len=carria_code_dict.__len__()
def encode_data(str):
    datas=str.split(',')
    encode_result=['0']*carria_len
    for data in datas:
        temp=data.split(":")
        if(temp.__len__()!=2 or  not carria_code_dict.has_key(temp[0])):
            pass
        else:
            encode_result[carria_code_dict.get(temp[0])]=temp[1]
    return ','.join(encode_result)
df2=pd.DataFrame([["CZ:1,SC:4,Y8:1"],["3U:2,CZ:1,MU:6,NS:3"]],
                 columns=['carrier'])
df2[carria_code_dict.keys()] =df2['carrier'].map(lambda x:encode_data(x)).str.split(',',expand=True)
print df2 

 

你可能感兴趣的:(python)