第3章
3.1 通过线性回归预测牧羊犬体重
3.1.1 数据录入
train_x = [27, 29, 34, 40, 42, 47, 48, 49, 50, 52, 52, 52, 54]
train_y = [6, 7.5, 9, 10.7, 12.8, 15.1, 16, 18.5, 19.4, 18.4, 19.7, 21.8, 21.7]
print_shape(train_x)
print_shape(train_y)
3.1.2 定义线性回归模型
model = linear_regressor()
3.1.3 训练线性回归模型
model.train(train_x, train_y)
3.1.4 模型可视化
model.show()
3.1.5 模型预测-predict()函数
x = 40
pred_y = model.predict(x)
print(pred_y)
3.1.6 模型中的参数
weights = model.get_weights()
print(weights)
k = weights[0]
b = weights[1]
print("k=",k)
print("b=",b)
3.1.7 定义线性函数
def linear_function(x, k, b):
y = k * x + b
return y
3.1.8 使用线性函数进行预测
x = 40
pred_y = linear_function(x, k, b)
print(pred_y)
3.1.9 模型分析 - 异常数据拟合和结果的影响
train_y[5] = 30
model = linear_regressor()
model.train(train_x, train_y)
model.show()
3.2 通过多项式回归预测牧羊犬体重
3.2.1 数据录入
train_x = [27, 29, 34, 40, 42, 47, 48, 49, 50, 52, 52, 52, 54]
train_y = [6, 7.5, 9, 10.7, 12.8, 15.1, 16, 18.5, 19.4, 18.4, 19.7, 21.8, 21.7]
3.2.2 定义多项式回归模型
model = poly_regressor(2)
3.2.3 训练多项式回归模型
model.train(train_x,train_y)
3.2.4 模型可视化
model.show()
3.2.5 模型预测
x = 40
pred_y = model.predict(x)
print(pred_y)
3.2.6 模型次数对拟合效果的影响
model = poly_regressor(3)
model.train(train_x,train_y)
model.show()
model = poly_regressor(10)
model.train(train_x,train_y)
model.show()
model = poly_regressor(30)
model.train(train_x,train_y)
model.show()
3.2.7 异常数据对拟合效果的影响
train_y[5] = 30
model = poly_regressor(2)
model.train(train_x, train_y)
model.show()
model = poly_regressor(30)
model.train(train_x,train_y)
model.show()
3.3 线性回归模型评估与测试集
3.3.1 训练线性模型
train_x = [27, 29, 34, 40, 42, 47, 48, 49, 50, 52, 52, 52, 54]
train_y = [6, 7.5, 9, 10.7, 12.8, 15.1, 16, 18.5, 19.4, 18.4, 19.7, 21.8, 21.7]
model = linear_regressor()
model.train(train_x, train_y)
model.show()
3.3.2 定义误差函数
def mse_error(pred, y):
error = 0
for i in range(len(pred)):
error = error + (y[i] - pred[i]) **2
error = error / len(pred)
return error
print(mse_error(train_x, train_y))
3.3.3 计算拟合误差
pred_y = model.predict(train_x)
error = mse_error(pred_y, train_y)
print(error)
3.3.4 把误差计算流程写进函数
def compute_error(model, x, y):
pred = model.predict(x)
error = mse_error(pred, y)
return error
3.3.5 模型比较
model2 = poly_regressor(3)
model2.train(train_x, train_y)
model2.show()
print(compute_error(model2, train_x, train_y))
model3 = poly_regressor(30)
model3.train(train_x, train_y)
model3.show()
print(compute_error(model3, train_x, train_y))
3.3.6 训练集,测试集和过拟合问题
test_x = [23, 31, 32, 38, 40, 45, 49, 50, 50, 51, 51, 53, 55]
test_y = [6.3, 7.2, 9.1, 10.5, 12.9, 15.5, 15.9, 18.3, 19.7, 18.9, 19.3, 21.3, 22.1]
print("线性回归误差:", compute_error(model, test_x, test_y))
print("3次多项式误差:", compute_error(model2, test_x, test_y))
print("30次多项式误差:", compute_error(model3, test_x, test_y))
3.4 线性分类预测性能
3.4.1 读取数据
train_x = [60, 56, 60, 55, 60, 57, 65, 60, 62, 59, 43, 52, 41, 45, 43, 50, 46, 52, 56, 56]
train_y = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
3.4.2 定义线性分类模型
model = linear_classifier()
3.4.3 训练线性分类模型
model.train(train_x, train_y)
3.4.4 模型可视化
model.show()
3.4.5 模型应用 - predict()函数
x = 60
pred_y = model.predict(x)
print(pred_y)
3.4.6 模型中的参数
weights = model.get_weights()
print(weights)
k = weights[0]
b = weights[1]
print("k=", k)
print("b=", b)
3.4.7 定义线性分类函数
def decision_function(x, k, b):
if k * x + b > 0:
return 1
else:
return -1
pred = decision_function(3, 2, -5.5)
print(pred)
pred = decision_function(3, 2, -6.5)
print(pred)
3.4.8 使用线性分类函数进行预测
k, b = model.get_weights()
x = 60
pred_y = decision_function(x, k, b)
print(pred_y)
3.4.9 准确率计算
def accuracy(pred, y):
right = 0
total = 0
for i in range(len(pred)):
if pred[i] == y[i]:
right += 1
total += 1
acc = right / total
return acc
pred_y = model.predict(train_x)
acc = accuracy(pred_y, train_y)
print(acc)
3.4.10 线性分类与线性回归的比较
model2 = linear_regressor()
model2.train(train_x, train_y)
model2.show()
3.5 利用身高和体重预测性别
3.5.1 读取数据
train_x_m = [[163, 60], [164, 56], [165, 60], [168, 55], [169, 60],[170, 57], [170, 65], [171, 60], [170, 62], [169, 59],[153, 43], [158, 52], [156, 41], [158, 45], [159, 43],[160, 50], [159, 46], [158, 52], [157, 56], [158, 55],[167, 53], [168, 52], [163, 65], [171, 52], [169, 52],[170, 57], [170, 60], [168, 52], [166, 60], [165, 51],[153, 43], [158, 55], [156, 41], [156, 57], [159, 43],[163, 41], [162, 56], [155, 52], [152, 56], [153, 55]]
train_x_s = [60, 56, 60, 55, 60, 57, 65, 60, 62, 59, 43, 52, 41, 45, 43, 50, 46, 52, 56, 55, 53, 52, 65, 52, 52, 57, 60, 52, 60, 51, 43, 55, 41, 57, 43, 41, 56, 52, 56, 55]
train_y = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
test_x_m = [[166, 58], [162, 56], [178, 66], [153, 50], [140, 60], [160, 55]]
test_x_s = [58, 56, 66, 50, 60, 55]
test_y = [1, 1, 1, -1, -1, -1]
3.5.2 数据可视化
visualize_data2D(train_x_s, train_y)
visualize_data3D(train_x_m, train_y)
3.5.3 多变量分类训练与验证
model = linear_classifier()
model.train(train_x_m, train_y)
pred_y = model.predict(test_x_m)
acc = accuracy(pred_y, test_y)
print(acc)
3.5.4. 多变量分类器vs 单变量分类器
model2 = linear_classifier()
model2.train(train_x_s, train_y)
red_y = model2.predict(test_x_s)
acc = accuracy(pred_y, test_y)
print(acc)
3.6 利用感知器完成鸢尾花分类
3.6.1 加载数据库
iris = data.get('iris-simple')
3.6.2 展示数据库
fig() + plot(iris)
3.6.3 创建分类器
blc = binary_linear_classifier()
3.6.4 训练分类器
blc.train(iris, alg=Perceptron())
3.6.5 设置感知器的学习率
blc1 = binary_linear_classifier()
blc1.train(iris, alg=Perceptron(lr=0.4))
blc2 = binary_linear_classifier()
blc2.train(iris, alg=Perceptron(lr=0.05))
blc3 = binary_linear_classifier()
blc3.train(iris, alg=Perceptron(w=[1 ,1], b=1))
3.6.6 比较训练结果
fig() + plot(iris) + plot(blc) + plot(blc1) + plot(blc2)
3.7 利用支持向量机完成鸢尾花分类
3.7.1 加载数据集
iris=data.get('iris-simple')
fig() + plot(iris)
3.7.2 创建分类器
blc = binary_linear_classifier()
3.7.3 用支持向量机训练分类器
blc.train(iris, alg=SVM())
fig() + plot(iris) + plot(blc)
3.8 分类器的测试与应用
3.8.1 加载数据集
iris=data.get('iris-simple')
fig() + plot(iris)
3.8.2 数据分割
iris_train, iris_test = iris.split(7,3)
fig() + plot(iris_train)
3.8.3 获取分类器
blc1=binary_linear_classifier()
blc2=binary_linear_classifier()
blc1.train(iris_train, alg=Perceptron(lr=0.2))
blc2.train(iris_train, alg=SVM())
fig() + plot(iris_train) + plot(blc1) + plot(blc2)
3.8.4 测试分类器
acc1 = blc1.accuracy(iris_test)
acc2 = blc2.accuracy(iris_test)
print('Perceptron Accuracy:', acc1)
print('SVM Accuracy:', acc2)
3.8.5 分类器应用
point = [2, 0.7]
fig() + plot(iris) + plot(blc1) + plot(blc2) + plot([point])
label1 = blc1.predict(point)
label2 = blc2.predict(point)
print('Perceptron Prediction: ', label1)
print('SVM Prediction: ', label2)
3.9 理解K均值算法
3.9.1 获取数据集
iris = data.get('iris')
feature, label = iris[0]
print("Feature : ", feature)
3.9.2 数据集特征选取
def select_features(feature):
return feature[2:4]
iris2 = iris.map(select_features, on_field=0)
fig() + plot(iris2, type='scatter')
3.9.3 创造K均值聚类模型
model = KMeans(K=3)
3.9.4 模型训练并观察训练效果
def select_features(feature):
return feature[2:4]
iris2 = iris.map(select_features, on_field=0)
model.train(iris2.field(0))
fig() + plot(model, iris2, type='cluster_statistics')
3.9.5 重复并比较
model2 = KMeans(K=3)
model2.train(iris.field(0))
fig() + plot(model2, iris, type='cluster_statistics')