简单来说,K折交叉验证就是:
留一交叉验证是K折交叉验证的特殊情况,即:将数据集划分成N份,N为数据集总数。就是只留一个数据作为测试集,该特殊情况称为“留一交叉验证”。
'''留一交叉验证'''
import numpy as np
# K折交叉验证
data = [[12, 1896], [11, 1900], [11, 1904], [10.8, 1908], [10.8, 1912], [10.8, 1920], [10.6, 1924], [10.8, 1928],
[10.3, 1932], [10.3, 1936], [10.3, 1948], [10.4, 1952], [10.5, 1956], [10.2, 1960], [10.0, 1964], [9.95, 1968],
[10.14, 1972], [10.06, 1976], [10.25, 1980], [9.99, 1984], [9.92, 1988], [9.96, 1992], [9.84, 1996],
[9.87, 2000], [9.85, 2004], [9.69, 2008]]
length = len(data)
# 得到训练集和测试集
def Get_test_train(length, data, i):
test_data = data[i] # 测试集
train_data = data[:]
train_data.pop(i) # 训练集
return train_data, test_data
# 得到线性回归直线
def Get_line(train_data):
time = []
year = []
average_year_time = 0
average_year_year = 0
for i in train_data:
time.append(i[0])
year.append(i[1])
time = np.array(time)
year = np.array(year)
average_year = sum(year) / length # year拔
average_time = sum(time) / length # time拔
for i in train_data:
average_year_time = average_year_time + i[0] * i[1]
average_year_year = average_year_year + i[1] ** 2
average_year_time = average_year_time / length # (year, time)拔
average_year_year = average_year_year / length # (year, year)拔
# 线性回归:t = w0 + w1 * x
w1 = (average_year_time - average_year * average_time) / (average_year_year - average_year * average_year)
w0 = average_time - w1 * average_year
return w0, w1
# 得到损失函数
def Get_loss_func(w0, w1, test_data):
time_real = test_data[0]
time_predict = eval('{} + {} * {}'.format(w0, w1, test_data[1]))
loss = (time_predict - time_real) ** 2
dic['t = {} + {}x'.format(w0, w1)] = loss
return dic
if __name__ == '__main__':
dic = {} # 存放建为回归直线,值为损失函数的字典
for i in range(length):
train_data, test_data = Get_test_train(length, data, i)
w0, w1 = Get_line(train_data)
Get_loss_func(w0, w1, test_data)
dic = Get_loss_func(w0, w1, test_data)
min_loss = min(dic.values())
best_line = [k for k, v in dic.items() if v == min_loss][0]
print('最佳回归直线:', best_line)
print('最小损失函数:', min_loss)