机器学习——最小二乘法
问题
从1896年开始,每次奥林匹克运动会(简称奥运会)男子100米比赛赢得金牌所需的比赛时间。
我们的目标是用这些数据学习一个函数模型,此模型依赖于奥运会举办年份和100米获胜时间,
并且用这个模型预测将来比赛中的获胜时间。
1.数据处理
def get_initial_data( filename):
data = {}
with open(filename, "r") as f:
for line in f.readlines():
line = line.rstrip()
line = line.lstrip()
lines = line.split(',')
data[int(lines[0])] = float(lines[1])
datas = list(data.items())
return datas
def reckon_xt(data,file_path):
count = 0
file = open(file_path,'w')
for i in data:
x = i[0]
t = i[1]
xt = x*t
xx = x*x
file.write(str(x)+","+str(t)+","+str(xt)+","+str(xx)+"\n")
count +=1
print("已计算 %d 条数据"%(count))
filename = "Olympic_100_initial_data.txt"
data = get_initial_data(filename)
file_path = "Olympic_100_data_.txt"
reckon_xt(data,file_path)
2.建立模型画图
import matplotlib.pyplot as plt
import numpy as np
from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei']
%matplotlib inline
def get_data(filename):
x_list = []
t_list = []
xt_list = []
xx_list = []
with open(filename, "r") as f:
for line in f.readlines():
line = line.rstrip()
line = line.lstrip()
lines = line.split(',')
x = int(lines[0])
t = float(lines[1])
xt = float(lines[2])
xx = float(lines[3])
x_list.append(x)
t_list.append(t)
xt_list.append(xt)
xx_list.append(xx)
return x_list, t_list, xt_list, xx_list
def averagenum(num):
nsum = 0
for i in range(len(num)):
nsum += num[i]
return nsum / len(num)
def reckon_w0_w1(filenames):
x_list, t_list, xt_list, xx_list = get_data(filenames)
x_ave = averagenum(x_list)
t_ave = averagenum(t_list)
xt_ave = averagenum(xt_list)
xx_ave = averagenum(xx_list)
w1 = round((xt_ave - x_ave * t_ave) / (xx_ave - x_ave * x_ave),7)
w0 = round(t_ave - w1 * x_ave,7)
return x_list, t_list,w0,w1
def picture_display(x_list, t_list,w0,w1):
y = t_list
x = x_list
plt.title('奥运百米赛跑获奖时间(s)与年份的微妙关系')
plt.ylim((9.5, 12))
plt.xlim((1880, 2020))
plt.xlabel('年份')
plt.ylabel('时间(S)')
plt.scatter(x, y)
x2 = np.array(x)
y2 = w0 + w1 * x2
plt.plot(x2,y2,color='red',label=' y = w0 + w1 * x')
plt.legend()
plt.show()
filenames = "Olympic_100_data_.txt"
x_list,t_list,w0, w1 = reckon_w0_w1(filenames)
picture_display(x_list, t_list,w0,w1)
##运行结果: