kaggle房价预测参考danB
链接:https://www.kaggle.com/learn/machine-learning
以下是用到的输出
#print(original_data.isnull().sum()) #统计na的总数
#print(original_data.describe()) #显示描述
#print(original_data.columns) #显示列
#print(original_data.isnull()) #bool显示是否为na
#data_without_missing_values = original_data.dropna(axis=1) 删除na
#print(melbourne_price_data.head()) #默认是前五行
#print(mean_absolute_error(y,predicted_house_prices)) #差值计算
下面用随机森林模型预测,随机选择了四个特征来做,决策树同随机森林用法
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
train = pd.read_csv( 'D:/NOTEBOOK/train.csv') #读取train数据
train_y = train.SalePrice
predictor_x = ['LotArea','YearBuilt','OverallQual','1stFlrSF','FullBath'] #特征
train_x = train[predictor_x]
my_model = RandomForestRegressor() #随机森林模型
my_model.fit(train_x,train_y) #fit
test = pd.read_csv( 'D:/NOTEBOOK/test.csv') #读取test数据
test_x = test[predictor_x]
pre_test_y = my_model.predict(test_x)
print(pre_test_y)
my_submission = pd.DataFrame({'Id':test.Id, 'SalePrice':pre_test_y}) #建csv
my_submission.to_csv('submission2.csv', index=False)