#数据分析库
import pandas as pd
#科学计算库
import numpy as np
from pandas import Series,DataFrame
data_train = pd.read_csv("/home/yuan/下载/titanic_train.csv")
data_test = pd.read_csv("/home/yuan/下载/titanic_test.csv")
#这里选取的是中位数,平均数测准概率小,但是决策树的概率高。原因是?
data_train["Age"] = data_train['Age'].fillna(data_train['Age'].mean())
#线性回归
from sklearn.linear_model import LinearRegression
#训练集交叉验证,得到平均值
#from sklearn.cross_validation import KFold
from sklearn.model_selection import KFold
#Sex性别列处理:male用0,female用1
data_train.loc[data_train["Sex"] == "male","Sex"] = 1
data_train.loc[data_train["Sex"] == "female","Sex"] = 0
#缺失值用最多的S进行填充
data_train["Embarked"] = data_train["Embarked"].fillna('Q')
-- 插入 -- 1,1 顶端