2021-11-13偏最小二乘法应用实例python程序代码

本文中的代码是直接修改余胜威老师编写的《matlab优化算法案例分析与应用》中第五章的偏最小二乘法中案例的matlab代码得到的。具体的内容请参考原书。

原MATLAB代码:

clc %清屏
clear all; %删除 workplace 变量
close all; %关掉显示图形窗口
format long 
pz=[191 36 50 5 162 60 
189 37 52 2 110 60 
193 38 58 12 101 101 
162 35 62 12 105 37 
189 35 46 13 155 58 
182 36 56 4 101 42 
211 38 56 8 101 38 
167 34 60 6 125 40 
176 31 74 15 200 40 
154 33 56 17 251 250 
169 34 50 17 120 38 
166 33 52 13 210 115 
154 34 64 14 215 105 
247 46 50 1 50 50 
193 36 46 6 70 31 
202 37 62 12 210 120 
176 37 54 4 60 25 
157 32 52 11 230 80 
156 33 54 15 225 73 
138 33 68 2 110 43]; 
mu=mean(pz); %求均值
sig=std(pz); %求标准差
rr=corrcoef(pz); %求相关系数矩阵
data=zscore(pz); %数据标准化
n=3; %n 是自变量的个数
m=3; %m 是因变量的个数
x0=pz(:,1:n);y0=pz(:,n+1:end); 
e0=data(:,1:n);f0=data(:,n+1:end); %自变量,因变量标准化矩阵
num=size(e0,1); %求样本点的个数
chg=eye(n); %w~w* 变换矩阵的初始化                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        
for i=1:n 
 %计算 w、w* 和 t 的得分向量
 matrix=e0'*f0*f0'*e0;   %偏最小二乘回归的矩阵
 [vec,val]=eig(matrix); %求特征值和特征向量
 val=diag(val); %提出对角线元素
 [val,ind]=sort(val,'descend'); %降序排列
 w(:,i)=vec(:,ind(1)); %提出最大特征值对应的特征向量
 w_star(:,i)=chg*w(:,i); %计算 w*的取值                                               
 t(:,i)=e0*w(:,i); %计算成分 ti 的得分
 alpha=e0'*t(:,i)/(t(:,i)'*t(:,i)); %计算 alpha_i 
 chg=chg*(eye(n)-w(:,i)*alpha'); %计算 w~w*的变换矩阵
 e=e0-t(:,i)*alpha'; %计算残差矩阵
 e0=e; 
 %计算 ss(i)的值,y的误差平方和
 beta=[t(:,1:i),ones(num,1)]\f0; %求回归方程的系数
 beta(end,:)=[]; %删除回归分析的常数项
 cancha=f0-t(:,1:i)*beta; %求残差矩阵
 ss(i)=sum(sum(cancha.^2)); %求误差平方和
 %计算 p(i) ,预测误差平方和
 for j=1:num 
 t1=t(:,1:i);f1=f0;  
 she_t=t1(j,:);she_f=f1(j,:); %把舍去的第 j 个样本点保存起来
 t1(j,:)=[];f1(j,:)=[]; %删除第 j 个观测值
 beta1=[t1,ones(num-1,1)]\f1; %求回归分析的系数
 beta1(end,:)=[]; %删除回归分析的常数项
 cancha=she_f-she_t*beta1; %求残差向量
 p_i(j)=sum(cancha.^2); 
 end 
 p(i)=sum(p_i); 
 if i>1 
 Q_h2(i)=1-p(i)/ss(i-1); 
 else 
 Q_h2(1)=1; 
 end 
 if Q_h2(i)<0.0975 
 fprintf('提出的成分个数 r=%d',i); 
 r=i; 
 break 
 end 
end 
beta_z=[t(:,1:r),ones(num,1)]\f0; %求 Y 关于 t 的回归系数
beta_z(end,:)=[]; %删除常数项
xishu=w_star(:,1:r)*beta_z;
%求 Y 关于 X 的回归系数,且是针对标准数据的回归系数,每一列是一个回归方程
mu_x=mu(1:n);mu_y=mu(n+1:end); 
sig_x=sig(1:n);sig_y=sig(n+1:end); 
for i=1:m 
 ch0(i)=mu_y(i)-mu_x./sig_x*sig_y(i)*xishu(:,i); 
 %计算原始数据的回归方程的常数项
end 
for i=1:m 
 xish(:,i)=xishu(:,i)./sig_x'*sig_y(i); 
 %计算原始数据的回归方程的系数,每一列是一个回归方程
end 
sol=[ch0;xish] %显示回归方程的系数,每一列是一个方程,每一列的第一个数是常项
w1=w(:,1)
w2=w(:,2)
wx1=w_star(:,1)
wx2=w_star(:,2)
tx1=t(:,1)'
tx2=t(:,2)' 
%回归系数beta_z 
%clc %清屏
%clear all; %删除 workplace 变量
%close all; %关掉显示图形窗口
%format short 
%load('mydata.m') 
figure, 
bar(xishu') 
%axis tight 
hold on 
annotation('textbox',[0.26 0.14 0.086 0.07],'String',{' 单 杠 '}, 'FitBoxToText','off'); 
annotation('textbox',[0.56 0.14 0.086 0.07],'String',{' 弯 曲 '}, 'FitBoxToText','off'); 
annotation('textbox',[0.76 0.14 0.086 0.07],'String',{' 跳 高 '}, 'FitBoxToText','off'); 
ch0=repmat(ch0,num,1); 
yhat=ch0+x0*xish; %计算 y 的预测值
y1max=max(yhat); 
y2max=max(y0); 
ymax=max([y1max;y2max]) 
cancha=yhat-y0; %计算残差
%画图
figure, 
subplot(2,2,1) 
plot(0:ymax(1),0:ymax(1),yhat(:,1),y0(:,1),'*')
title('单杠成绩预测') 
subplot(2,2,2) 
plot(0:ymax(2),0:ymax(2),yhat(:,2),y0(:,2),'O') 
title('弯曲成绩预测') 
subplot(2,1,2) 
plot(0:ymax(3),0:ymax(3),yhat(:,3),y0(:,3),'H') 
title('跳高成绩预测') 

修改后的python代码:

import numpy as np
def backslash(a,b):
    m, n=a.shape
    p, q=b.shape
    if(m!=p):
        print('dimensions do not match!')
        return -1
    if(m==n):
        return np.linalg.solve(a,b)
    else:
        a1=a.T.dot(a)
        b1=a.T.dot(b)
        return np.linalg.solve(a1,b1)

def slash(a,b):
    a=a.T
    b=b.T
    x=backslash(a,b)
    return x.T
import numpy as np
pz = np.array([[191, 36, 50, 5, 162, 60],
[189, 37, 52, 2, 110, 60],
 [193, 38, 58, 12, 101, 101],
 [162, 35, 62, 12, 105, 37],
 [189, 35, 46, 13, 155, 58],
 [182, 36, 56, 4, 101, 42],
 [211, 38, 56, 8, 101, 38],
 [167, 34, 60, 6, 125, 40],
 [176, 31, 74, 15, 200, 40],
 [154, 33, 56, 17, 251, 250],
 [169, 34, 50, 17, 120, 38],
 [166, 33, 52, 13, 210, 115],
 [154, 34, 64, 14, 215, 105],
 [247, 46, 50, 1, 50, 50],
 [193, 36, 46, 6, 70, 31],
 [202, 37, 62, 12, 210, 120],
 [176, 37, 54, 4, 60, 25],
 [157, 32, 52, 11, 230, 80],
 [156, 33, 54, 15, 225, 73],
 [138, 33, 68, 2, 110, 43]])
mu = pz.mean(axis=0)#对矩阵每一列求均值
print('mu', mu)
sig = pz.std(axis=0)#对矩阵每列求标准差
print('sig', sig)
rr = np.corrcoef(pz.T)#对矩阵的列求相关系数
print('rr', rr)
def zscoremalization(x):
    """z-score normalization"""
    x = (pz-mu)/sig
    return x
data = zscoremalization(pz)
# 对矩阵PZ按列进行求取的均值与标准差进行数据标准化,与matlab结果有较小的不同
n = 3
m = 3
x0 = pz[:, 0:n]
y0 = pz[:, n::]
# print('输出X0', x0)
#  print('输出Y0', y0)
e0 = data[:, 0:n]
f0 = data[:, n::]
# print('e0',e0)
# print('f0',f0)
num = np.size(e0, 0)
# print('num', num)
chg = np.eye(3)
# print('chg', chg)
w = np.zeros([n, n])
w_star = np.zeros([n, n])
t = np.zeros((20, 3))
ss = np.zeros((1, 3))
p_i = np.zeros((1, 20))
p = np.zeros((1, 3))
Q_h2 = np.zeros((1, 3))
# t1 = np.zeros((20, 3))
for i in range(1, n+1):
    matrix = e0.T @ f0 @ f0.T @ e0
    val, vec = np.linalg.eig(matrix)
    # matlab中特征值会形成特征值对角阵,所以需要提取对角线元素,python中直接得出特征值数组,不用提取对角线元素
    ind = np.argmax(val)
    w[:, i-1] = vec[:, ind]
    w_star[:, i-1] = chg @ w[:, i-1]
    t[:, i-1] = e0 @ w[:, i-1]
    ga = t[:, i-1].T @ t[:, i-1]
    alaph = np.array([e0.T @ t[:, i-1] / ga])
    chg = chg @ (np.eye(n) - np.outer(w[:, i-1], alaph))
    e = e0 - np.outer(t[:, i-1], alaph)
    e0 = e
    betao = np.hstack((t[:, 0:i], np.ones([20, 1])))
    betao = np.matrix(betao)
    #betao = betao.I
    beta = backslash(betao, f0)
    beta = np.delete(beta, -1, axis=0)
    cancha = f0 - t[:, 0:i] @ beta
    ss[:, i-1] = np.sum(np.square(cancha))
    ss = np.matrix(ss)
    for j in range(1, num+1):
        t1 = t[:, 0:i-1]
        f1 = f0
        she_t = t1[j-1, :]
        she_f = f1[j-1, :]
        t1 = np.delete(t1, j-1, axis=0)
        f1 = np.delete(f1, j-1, axis=0)
        beta1o = np.hstack((t1, np.ones([num-1, 1])))
        beta1o = np.matrix(beta1o)
        #beta1o = beta1o.I
        beta1 = backslash(beta1o , f1)
        beta1 = np.delete(beta1, -1, axis=0)
        cancha = she_f - she_t @ beta1
        p_i[:, j-1] = np.sum(np.square(cancha), axis=1)
    p[:, i-1] = np.sum(p_i)
    p = np.matrix(p)
    if i > 1:
        Q_h2[:, i-1] = 1 - p[:, i-1].I @ ss[:, i-2]
    else:
        Q_h2[:, 0] = 1
    if Q_h2[:, i-1] < 0.0975:
        r = i
        break
beta_zo = np.hstack((t[:, 0:r], np.ones([num, 1])))
beta_zo = np.matrix(beta_zo)
#beta_zo = beta_zo.I
beta_z = backslash(beta_zo , f0)
beta_z = np.delete(beta_z, -1, axis=0)
xishu = w_star[:, 0:r] @ beta_z
mu_x = mu[0:n]
#mu_x = np.matrix(mu_x)
mu_y = mu[n::]
#mu_y = np.matrix(mu_y)
sig_x = sig[0:n]
#sig_x = np.matrix(sig_x)
sig_y = sig[n::]
# sig_y = np.matrix(sig_y)
ch0 = np.zeros((1, 3))
for i in range(1, m+1):
    haha = sig_y[i-1] * xishu[:, i-1]
    haha1 = mu_x / sig_x
    haha2 = haha1 @ haha
    ch0[:, i-1] = mu_y[i-1] - haha2
xish = np.zeros((3, 3))
xish = np.mat(xish)
for i in range(1, m+1):
    p = np.mat(sig_x)
    p = p.T
    wawa = xishu[:, i - 1] / p
    k = np.mat(sig_y)
    f = wawa @ k
    xish[:, i - 1] = f[:, i-1]
sol = np.vstack((ch0, xish))
w1 = w[:, 0]
w2 = w[:, 1]
wx1 = w_star[:, 0]
wx2 = w_star[:, 1]
tx1 = t[:, 0].T
tx2 = t[:, 1].T

print('pz', pz)
print('data', data)
print('x0', x0)
print('y0', y0)
print('e0', e0)
print('f0', f0)
print('num', num)
print('chg', chg)
print('matrix', matrix)
print("vec", vec)
print("val", val)
print("ind", ind)
print('w', w)
print('w_star', w_star)
print('t', t)
print('ga', ga)
print('alaph', alaph)
print('chg', chg)
print('e', e)
print('betao', betao)
print('beta', beta)
print('cancha', cancha)
print('ss', ss)
print('t1', t1)
print('f1', f1)
print('she_t', she_t)
print('she_f', she_f)
print('beta1o', beta1o)
print('beta1', beta1)
print('cancha', cancha)
print('p_i', p_i)
print('p', p)
print('Q_h2', Q_h2)
print('r', r)
print('beta_zo', beta_zo)
print('beta_z', beta_z)
print('xishu', xishu)
print('mu_x', mu_x)
print('mu_y', mu_y)
print('sig_x', sig_x)
print('sig_y', sig_y)
print('ch0', ch0)
print('haha', haha)
print('haha1', haha1)
print('wawa', wawa)
print('xish', xish)
print('sol', sol)
print('w1', w1)
print('w2', w2)
print('wx1', wx1)
print('wx2', wx2)
print('tx1', tx1)
print('tx2', tx2)

import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.figure(1)
biaoqian= ('单杠', '弯曲', '跳高')
xishu1 = [-0.0778, -0.01385, -0.0604]
# xishu1 = np.array(xishu1)
xishu2 = [-0.4989, -0.5244, -0.1559]
# xishu2 = np.array(xishu2)
xishu3 = [-0.1322, -0.0854, -0.0073]
# xishu3 = np.array(xishu3)
bar_width = 0.3
index_male = np.arange(len(biaoqian))
index_male2 = index_male + bar_width
index_male3 = index_male2 + bar_width
plt.bar(index_male, height=xishu1, width=bar_width, color='b', label='单杠')
plt.bar(index_male2, height=xishu2, width=bar_width, color='r', label='弯曲')
plt.bar(index_male3, height=xishu3, width=bar_width, color='g', label='跳高')
plt.show()

plt.figure(2)
ch0=np.tile(ch0, (20, 1))
yhat=ch0+x0@xish
print('yhat', yhat)
y1max=np.max(yhat, axis=0)
print('y1max', y1max)
y2max=np.max(y0, axis=0)
print('y2max', y2max)
ymax=np.max(np.vstack((y1max, y2max)), axis=0)
print('ymax', ymax)
cancha=yhat-y0
print('cancha', cancha)
#ch01=ch0[:,0]
#ch02=ch0[:,1]
#ch03=ch0[:,2]
print(ch0)
plt.subplot(2, 2, 1)
plt.title('单杠成绩预测')
plt.plot([0, 20], [0, 20], linewidth='0.5')
plt.xlim(xmax=ymax[:, 0], xmin=0)
plt.ylim(ymax=ymax[:, 0], ymin=0)
plt.scatter(np.array(yhat[:, 0].T), np.array(y0[:, 0].T))
plt.subplot(2, 2, 2)
plt.title('弯曲成绩预测')
plt.plot([0, 250], [0, 250], linewidth='0.5')
plt.xlim(xmax=ymax[:, 1], xmin=0)
plt.ylim(ymax=ymax[:, 1], ymin=0)
plt.scatter(np.array(yhat[:, 1].T), np.array(y0[:, 1].T))
plt.subplot(2, 2, 3)
plt.title('跳高成绩预测')
plt.plot([0, 250], [0, 250], linewidth='0.5')
plt.xlim(xmax=ymax[:, 2], xmin=0)
plt.ylim(ymax=ymax[:, 2], ymin=0)
plt.scatter(np.array(yhat[:, 2].T), np.array(y0[:, 2].T))
plt.show()

新手修改,如有不足之处,多谢批评指正。

你可能感兴趣的:(python,最小二乘法,matlab)