系统:Windows
CPU:i7-6820HK 超频至3.8G (Numpy默认使用CPU)
Python:3.8.6
Numpy:1.19.2
import numpy as np # 用于矩阵运算
from sklearn.metrics import f1_score, accuracy_score # 用于模型评估
from time import time # 用于计算程序用时
"""
设置文件路径
"""
Train_Path = "train_data.txt" # 训练集和标签
Test_Date_Path = "test_data.txt" # 测试集
Test_Label_Path = "answer.txt" # 测试标签
"""
将文件读取为numpy矩阵
"""
Train = np.genfromtxt(Train_Path, dtype='float', delimiter=',')
Test_Date = np.genfromtxt(Test_Date_Path, dtype='float', delimiter=',')
Test_Label = np.genfromtxt(Test_Label_Path, dtype='float', delimiter=',')
"""
分割训练集中的Date和Label
"""
Train_Date = np.array([x[:-1] for x in Train])
Train_Label = np.array([x[-1] for x in Train])
class Lr:
"""
Lr模型
参数说明:
alpha:学习速率
max_iter:最大迭代次数
"""
def __init__(self, alpha=0.01, max_iter=10000):
"""
参数初始化
"""
self.alpha = alpha
self.max_iter = max_iter
def sigmoid(self, z):
"""
sigmoid函数运算
"""
return 1/(1 + np.exp(-z))
def gd(self, X, Y, W, b):
"""
梯度下降
参数说明:
X:数据矩阵
Y:标签矩阵
W:权值
b:偏置
"""
for i in range(self.max_iter): # 迭代更新
z = np.dot(W, X.T) + b
dz = self.sigmoid(z) - Y
dw = 1/self.m * np.dot(dz, X)
db = 1/self.m * np.sum(dz)
W = W - self.alpha * dw # 更新W
b = b - self.alpha * db # 更新b
return W, b
def fit(self, X, Y):
"""
模型训练
"""
m, nx = X.shape
self.m = m
self.nx = nx
W = np.random.random((1, nx))*0.1 # 初始化W
b = 0 # 初始化b
self.W, self.b = self.gd(X, Y, W, b) # 梯度下降更新
def predict(self, testX):
"""
模型预测
"""
result = self.sigmoid(np.dot(self.W, testX.T) + self.b) # Y=WX+b
Y = []
for i in list(result)[0]:
if i > 0.5: # 值大于0.5设为1
Y.append(1)
else: # 否则设为0
Y.append(0)
return Y
def score(y, ym):
"""
评估函数
"""
print("Accuracy:{:.2f}%".format(accuracy_score(y, ym)*100)) # 正确率
print("F1 Score:{:.4f}".format(f1_score(y, ym))) # F1 Score
start = time() # 计时开始
model = Lr() # 初始化模型
model.fit(Train_Date, Train_Label) # 模型训练
Ym = model.predict(Test_Date) # 模型与与测
score(Test_Label, Ym) # 模型评估
end = time() # 计时结束
print("Time: {:.2f}s".format(end-start)) # 输出用时
Accuracy:85.00%
F1 Score:0.7475
Time: 43.30s