机器学习第二篇:简单线性回归

import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
data=pd.read_csv('F:\\机器学习100天\\007-010 线性回归\\008 简单线性回归-实战\\code\\Data.csv')
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

#对数据的选择的方法:1
# X = data.iloc[:, :-1]
# y = data.iloc[:, 1]

#对数据的选择的方法:2
X=np.array(data['polulation']).reshape(-1,1)
y=data['median_house_value']

#对数据进行拆分
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

#建模
lr=LinearRegression().fit(x_train,y_train)
print('训练集',lr.score(x_train,y_train))
print('测试集',lr.score(x_test,y_test))
# 对测试集进行预测
y_pred = lr.predict(x_test)
print('预测',y_pred)

#计算损失函数的值
J=1/x_train.shape[0]*np.sum((lr.predict(x_train)-y_train)**2)
print('损失函数的值',J)

#计算权重和截距
w=lr.coef_[0]
b=lr.intercept_
print('权重',w)
print('截距',b)

#可视化训练集

plt.scatter(x_train,y_train,color='red')
plt.plot(x_train,lr.predict(x_train),color='blue')
plt.title('population VS median_house_value')
plt.xlabel('population')
plt.ylabel('median_house_value')
plt.show()

#可视化测试集
plt.scatter(x_test,y_test,color='red')
plt.plot(x_test,lr.predict(x_test),color='blue')
plt.title('population VS median_house_value')
plt.xlabel('population')
plt.ylabel('median_house_value')
plt.show()

你可能感兴趣的:(机器学习,python,数据可视化)