**
**
import pandas as pd
import matplotlib.pyplot as plt
filename='ENB2012_data.xlsx'
columns=['Ralative Compactness','Surface Area','wall area','Roof Area','Overall Heirgt',
'orientation','glazing area','grazubg area distribution','Heating Load','Cooling Load']
# data=pd.read_excel(filename,header=None,names=columns,skiprows=1)#两者的读取好像没什么区别 可能就是读取时间的差别
data=pd.io.excel.read_excel(filename,header=None,names=columns,skiprows=1)# 原来如此
#skiprows 这个参数也太重要了 丢弃一行前面无用的头行 这样可以以后的计算均值也就可以直接计算了
# print(data)
# data_std=data.std()
# data_mean=data.mean()
# data_median=data.median()
# print('std\n',data_std)
# print('mean\n',data_mean)
# print('median\n',data_median)
# data.plot(kind='scatter',x=['Surface Area','Surface Area'],y=['Heating Load','Cooling Load'],title='scartter',grid=True)
# #这个相当于在一张图中叠加画了起来,并不是一张图上面显示两个散点图
# plt.show()
# from matplotlib import animation#动态图所需要的包
# import numpy as np
# fig,ax = plt.subplots()#子图像
# x = np.arange(0,2*np.pi,0.01)
# line, = ax.plot(x,np.sin(x))
# def animate(i):
# line.set_ydata(np.sin(x+i/10))#用来改变的y对应的值
# return line,
# def init():
# line.set_ydata(np.sin(x))#动态图初始图像
# return line,
# ani = animation.FuncAnimation(fig=fig,func=animate,init_func=init,interval=20)#动态作图的方法,func动态图函数,init_func初始化函数,interval指图像改变的时间间隔
# plt.show()
# ax1=plt.subplot(2,1,1)
# ax1.scatter(data['Surface Area'],data['Heating Load'],marker='*',label='sandiantu1')
# ax2=plt.subplot(2,1,2)
# ax2.scatter(data['Surface Area'],data['Cooling Load'],marker='*',label='sandiantu2')
# plt.show()
# x=data.iloc[:,0:8].values.astype(float)
# y1=data.iloc[:,8].values.astype(float)
# y2=data.iloc[:,9].values.astype(float)
# x_train,x_test,y_train,y_test=model_selection.train_test_split(x,y1,test_size=0.40)
# linreg = LinearRegression()#建立线性化模型
# linregTr = linreg.fit(x_train,y_train)#训练集 训练出的模型
# y_train_pred = linregTr.predict(x_train)#模型预测
# y_test_pred = linregTr.predict(x_test)#测试集模型测试
# mse_train = metrics.mean_squared_error(y_train,y_train_pred)
# mse_test = metrics.mean_squared_error(y_test,y_test_pred)
# decision_score = linregTr.score(x_train,y_train)
# predict_score = linregTr.score(x_test,y_test)#预测之后的准确度打分
# print(data)
# print('This is trained data\'s mean_squared_error:',mse_train)
# print('This is tested data\'s mean_squared_error:',mse_test)
# print('This is decision_score:',decision_score)
# print('This is predict_score:',predict_score)
from sklearn import model_selection
from sklearn import preprocessing
# from sklearn import LinearRegression##这样导入线性回归模型不可实现
from sklearn.linear_model import LinearRegression
x=data.iloc[:,0:8]
y1=data.iloc[:,8]
y2=data.iloc[:,9]
x_train,x_test,y_train,y_test=model_selection.train_test_split(x,y1,test_size=0.40)
linreg=LinearRegression()
linregTr=linreg.fit(x_train,y_train)#开始用训练集训练模型
decision_score=linregTr.score(x_train,y_train)#用训练集得出的性能参数
predict_score=linregTr.score(x_test,y_test)#用测试集得出的性能参数 实际性能
# decision_score_pred=linregTr.predict(x_train)#准确度预测
# predict_score_pred=linregTr.predict(x_test)
# x_train_predict=linreg.predict(x_train)#预测的过程中的参数变化过程
# x_test_predict=linreg.predict(x_test)
# print(decision_score_pred)
# print(predict_score)
# print(predict_score)
# print(decision_score)
# print(predict_score)
# from sklearn import metrics
from sklearn.metrics import *
y_train_pred =linregTr.predict(x_train)
# mean_error=metrics.mean_squared_error(y_train,y_train_pred)
mean_error=mean_squared_error(y_train_pred,y_train)
print(mean_error)