酶活性预测(多项式回归模型)

单项式模型

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
%matplotlib inline

#导入训练集和测试集
train_data = pd.read_csv("datas/T-R-train.csv")
test_data = pd.read_csv("datas/T-R-test.csv")
image.png
# 定义x和y
x_train = train_data["T"]
y_train = train_data["rate"]
x_test = test_data["T"]
y_test = test_data["rate"]
image.png
#将数据转换成一维
x_train = np.array(x_train).reshape(-1, 1)
x_test = np.array(x_test).reshape(-1, 1)

# 模型训练
line_model = LinearRegression()
line_model.fit(x_train, y_train)

#预测
y_train_predict = line_model.predict(x_train)
y_test_predict = line_model.predict(x_test)

# 查看模型的r2分数
from sklearn.metrics import r2_score
r2_train = r2_score(y_train, y_train_predict)
r2_test = r2_score(y_test, y_test_predict)
print(r2_train, r2_test)  0.016665703886981964   -0.758336343735132

#生成新的数据点
x_range = np.linspace(40, 90, 300).reshape(-1, 1)
new_y_predict = line_model.predict(x_range)
image.png

多项式模型——二阶

# 多项式模型 将原数据转换成二维
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures()
x_2_train = poly.fit_transform(x_train)
x_2_test = poly.transform(x_test)

# 模型训练
line_model2 = LinearRegression()
line_model2.fit(x_2_train, y_train)
#预测
y2_train_predict = line_model2.predict(x_2_train)
y2_test_predict = line_model2.predict(x_2_test)
#r2分数
r2_train2 = r2_score(y_train, y2_train_predict)
r2_test2 = r2_score(y_test, y2_test_predict)
print(r2_train2, r2_test2)   0.970051540068942   0.9963954556468684

#生成新的数据点
x2_range = np.linspace(40, 90, 300).reshape(-1, 1)
x2_range = poly.transform(x2_range)
new_y2_predict = line_model2.predict(x2_range)
image.png

多项式模型——五阶

#五阶模型
poly5 = PolynomialFeatures(degree=5)
x_5_train = poly5.fit_transform(x_train)
x_5_test = poly5.transform(x_test)
# 模型训练
line_model5 = LinearRegression()
line_model5.fit(x_5_train, y_train)
#预测
y5_train_predict = line_model5.predict(x_5_train)
y5_test_predict = line_model5.predict(x_5_test)
#r2分数
r2_train5 = r2_score(y_train, y5_train_predict)
r2_test5 = r2_score(y_test, y5_test_predict)
print(r2_train5, r2_test5)
#生成新的数据点
x5_range = np.linspace(40, 90, 300).reshape(-1, 1)
x5_range = poly5.transform(x5_range)
new_y5_predict = line_model5.predict(x5_range)   0.9978527267187658     0.5437837627379174
image.png

你可能感兴趣的:(酶活性预测(多项式回归模型))