利用差分法得到AX=b,求解 F ( x ) = 0.5 ∗ x T A x − b T x F(x)=0.5*x^{T}Ax-b^{T}x F(x)=0.5∗xTAx−bTx
首先明确里面的A是对称正定矩阵,然后我们开始构造A,b。
import torch
import numpy as np
import time
def UU(X, order,prob):#X表示(x,t)
if prob==1:
temp = 10*(X[:,0]+X[:,1])**2 + (X[:,0]-X[:,1])**2 + 0.5
if order[0]==0 and order[1]==0:
return torch.log(temp)
if order[0]==1 and order[1]==0:#对x求偏导
return temp**(-1) * (20*(X[:,0]+X[:,1]) + 2*(X[:,0]-X[:,1]))
if order[0]==0 and order[1]==1:#对t求偏导
return temp**(-1) * (20*(X[:,0]+X[:,1]) - 2*(X[:,0]-X[:,1]))
if order[0]==2 and order[1]==0:
return - temp**(-2) * (20*(X[:,0]+X[:,1])+2*(X[:,0]-X[:,1])) ** 2 \
+ temp**(-1) * (22)
if order[0]==1 and order[1]==1:
return - temp**(-2) * (20*(X[:,0]+X[:,1])+2*(X[:,0]-X[:,1])) \
* (20*(X[:,0]+X[:,1])-2*(X[:,0]-X[:,1])) \
+ temp**(-1) * (18)
if order[0]==0 and order[1]==2:
return - temp**(-2) * (20*(X[:,0]+X[:,1])-2*(X[:,0]-X[:,1])) ** 2 \
+ temp**(-1) * (22)
if prob==2:
if order[0]==0 and order[1]==0:
return (X[:,0]*X[:,0]*X[:,0]-X[:,0]) * \
0.5*(np.exp(2*X[:,1])+np.exp(-2*X[:,1]))
if order[0]==1 and order[1]==0:
return (3*X[:,0]*X[:,0]-1) * \
0.5*(np.exp(2*X[:,1])+np.exp(-2*X[:,1]))
if order[0]==0 and order[1]==1:
return (X[:,0]*X[:,0]*X[:,0]-X[:,0]) * \
(np.exp(2*X[:,1])-np.exp(-2*X[:,1]))
if order[0]==2 and order[1]==0:
return (6*X[:,0]) * \
0.5*(np.exp(2*X[:,1])+np.exp(-2*X[:,1]))
if order[0]==1 and order[1]==1:
return (3*X[:,0]*X[:,0]-1) * \
(np.exp(2*X[:,1])-np.exp(-2*X[:,1]))
if order[0]==0 and order[1]==2:
return (X[:,0]*X[:,0]*X[:,0]-X[:,0]) * \
2*(np.exp(2*X[:,1])+np.exp(-2*X[:,1]))
if prob==3:
temp1 = X[:,0]*X[:,0] - X[:,1]*X[:,1]
temp2 = X[:,0]*X[:,0] + X[:,1]*X[:,1] + 0.1
if order[0]==0 and order[1]==0:
return temp1 * temp2**(-1)
if order[0]==1 and order[1]==0:
return (2*X[:,0]) * temp2**(-1) + \
temp1 * (-1)*temp2**(-2) * (2*X[:,0])
if order[0]==0 and order[1]==1:
return (-2*X[:,1]) * temp2**(-1) + \
temp1 * (-1)*temp2**(-2) * (2*X[:,1])
if order[0]==2 and order[1]==0:
return (2) * temp2**(-1) + \
2 * (2*X[:,0]) * (-1)*temp2**(-2) * (2*X[:,0]) + \
temp1 * (2)*temp2**(-3) * (2*X[:,0])**2 + \
temp1 * (-1)*temp2**(-2) * (2)
if order[0]==1 and order[1]==1:
return (2*X[:,0]) * (-1)*temp2**(-2) * (2*X[:,1]) + \
(-2*X[:,1]) * (-1)*temp2**(-2) * (2*X[:,0]) + \
temp1 * (2)*temp2**(-3) * (2*X[:,0]) * (2*X[:,1])
if order[0]==0 and order[1]==2:
return (-2) * temp2**(-1) + \
2 * (-2*X[:,1]) * (-1)*temp2**(-2) * (2*X[:,1]) + \
temp1 * (2)*temp2**(-3) * (2*X[:,1])**2 + \
temp1 * (-1)*temp2**(-2) * (2)
def F(prob,X):
return -UU(X,[0,2],prob) - UU(X,[2,0],prob)
np.random.seed(1234)
torch.manual_seed(1234)
class FD():
def __init__(self,bound,hx,prob):
self.prob = prob
self.dim = 2
self.hx = hx
self.nx = [int((bound[0,1] - bound[0,0])/self.hx[0]) + 1,int((bound[1,1] - bound[1,0])/self.hx[1]) + 1]
self.size = self.nx[0]*self.nx[1]
self.X = np.zeros([self.size,self.dim])
m = 0
for i in range(self.nx[0]):
for j in range(self.nx[1]):
self.X[m,0] = bound[0,0] + i*self.hx[0]
self.X[m,1] = bound[1,0] + j*self.hx[1]
m = m + 1
def matrix(self):
self.A = np.zeros([self.nx[0]*self.nx[1],self.nx[0]*self.nx[1]])
dx = self.hx[0];dy = self.hx[1]
for i in range(self.nx[0]):
for j in range(self.nx[1]):
dx = self.hx[0];dy = self.hx[1]
if i== 0 or i == self.nx[0] - 1 or j == 0 or j == self.nx[1] - 1:
self.A[i*self.nx[1]+j,i*self.nx[1]+j] = 1
else:
self.A[i*self.nx[1]+j,i*self.nx[1]+j] = 2*(dx/dy + dy/dx)
self.A[i*self.nx[1]+j,i*self.nx[1]+j-1] = -dx/dy
self.A[i*self.nx[1]+j,i*self.nx[1]+j+1] = -dx/dy
self.A[i*self.nx[1]+j,(i-1)*self.nx[1]+j] = -dy/dx
self.A[i*self.nx[1]+j,(i+1)*self.nx[1]+j] = -dy/dx
return self.A
def right(self,rig):
self.b = np.zeros([self.nx[0]*self.nx[1],1])
dx = self.hx[0];dy = self.hx[1]
for i in range(self.nx[0]):
for j in range(self.nx[1]):
if i== 0 or i == self.nx[0] - 1 or j == 0 or j == self.nx[1] - 1:
self.b[i*self.nx[1]+j] = UU(self.X[i*self.nx[1]+j:i*self.nx[1]+j+1,:],[0,0],self.prob)
else:
self.b[i*self.nx[1]+j] = rig[:,(i - 1)*(self.nx[1] - 2) + j - 1]*dx*dy
return self.b
def allsolve(self,rig):#第0个维度放数值,第1个维度放数量
u = np.zeros([rig.shape[0],self.size])
A = self.matrix()
for i in range(rig.shape[0]):
b = self.right(rig[i:i + 1])
#print(b)
tmp = np.linalg.solve(A,b)
u[i:i + 1,:] = tmp.T
return u
def error(u_pred,u_acc):
temp = ((u_pred - u_acc)**2).sum()/(u_acc**2).sum()
return temp**(0.5)
根据上面这个代码其实可以直接求解近似解,不过这里为了展示BFGS算法,所以我们还是取出A和b来求解优化问题。
def FF(X):#X = [1,n],b = [1,n]
return 0.5*np.dot(np.dot(X,A),X.T) - np.dot(X,b.T)
def GG(X):
tmp = np.dot(A,X.T) - b.T
return tmp.T
def HH(X):
return A
def gold(FF,GG,d,X):
rho = 0.4
am = 0;aM = 1;al = 0.5*(am + aM)
for i in range(100):
rig1 = FF(X) + rho*al*GG(X)@d.T
rig2 = FF(X) + (1 - rho)*al*GG(X)@d.T
lef = FF(X + al*d)
if lef <= rig1:
if lef >= rig2:
break
else:
am = al
al = am + 0.618*(aM - am)
else:
aM = al
al = am + 0.618*(aM - am)
al_k = al
#print(i)
return al_k,2*(i + 1)
def BFGS(FF,GG,HH,X,m,N):
tic = time.time()
eps = 1e-20
n = X.shape[1]
fun_iter = 0
X_new = np.zeros([1,n])
S = np.zeros([m,n])
Y = np.zeros([m,n])
alpha = np.zeros(m)
for i in range(N):
if i <= m:
if i == 0:
al_k,ite = gold(FF,GG,-GG(X),X)
fun_iter += ite
X_new = X - GG(X)*al_k
else:
s = X_new - X
y = GG(X_new) - GG(X)
S[i - 1:i,:] = s
Y[i - 1:i,:] = y
#d = np.linalg.solve(HH(X_new),-GG(X_new).T)
d = -GG(X).T
al_k,ite = gold(FF,GG,d.T,X_new)
#print(np.linalg.norm(X))
X = X_new.copy()
X_new = X_new - al_k*GG(X_new)
fun_iter += ite
#print(i,al_k,np.linalg.norm(X_new - X))
#print(i,al_k,np.linalg.norm(GG(X_new),np.linalg.norm(GG(X_new)))
#print(i,al_k,np.linalg.norm(GG(X_new)))
else:
s = X_new - X
y = GG(X_new) - GG(X)
S[0:m - 1,:] = S[1:m,:]
Y[0:m - 1,:] = Y[1:m,:]
S[m - 1:m,:] = s
Y[m - 1:m,:] = y
tmp = (s*y).sum()/(y*y).sum()
q = GG(X_new)
for j in range(m - 1,- 1,-1):
alpha[j] = (S[j:j + 1,:]*q).sum()/(S[j:j + 1,:]*Y[j:j + 1,:]).sum()
q = q - alpha[j]*Y[j:j + 1,:]
r = tmp*q
for j in range(0,m,1):
beta = (Y[j:j + 1,:]*r).sum()/(S[j:j + 1,:]*Y[j:j + 1,:]).sum()
r = r + S[j:j + 1,:]*(alpha[j] - beta)
al_k,ite = gold(FF,GG,-r,X_new)
X = X_new.copy()
X_new = X_new + (-r)
left = np.linalg.norm(GG(X))
rig = eps
if left < rig:
break
ela = time.time() - tic
print('iteration:%d,grad_2:%.3e,fun_iter = %d,time = %.2f'%(i + 1,np.linalg.norm(GG(X)),fun_iter,ela))
return X_new
#--------------------------------
bound = np.array([[0,1.0],[0,1.0]])
hx = [0.02,0.05]
prob = 3
fd = FD(bound,hx,prob)
M = fd.nx[0];N = fd.nx[1]
X = fd.X
x = X.reshape([M,N,2])[1:M - 1,1:N -1]
rig = F(prob,x.reshape(-1,2)).reshape(1,-1)
#print(rig.shape)
u_pred = fd.allsolve(rig)
u_acc = UU(X,[0,0],prob).reshape(1,-1)
#print(u_pred.shape,u_acc.shape)
print(error(u_pred,u_acc))
print(max(abs(u_pred[0,:] - u_acc[0,:])))
A = fd.matrix()
b = fd.right(rig).T
u_new = (np.linalg.solve(A,b.T)).T
print(max(abs(u_new[0,:] - u_acc[0,:])))
print(u_new.shape,b.shape)
print(np.linalg.norm(np.dot(A,u_new.T) - b.T),(np.dot(A,u_new.T) - b.T).shape)
print(np.linalg.norm(GG(u_new)))
print(np.linalg.norm(np.dot(u_new,A) - b),(np.dot(u_new,A) - b).shape)
#----------------------------------------
n = fd.size
m = 9
X_init = np.random.rand(1,n)
u_new = BFGS(FF,GG,HH,X_init,m,100)
print(u_new.shape)
print(FF(u_new))
print(max(abs(u_new[0,:] - u_acc[0,:])))
根据这个代码可以直接求解,然而发现求解大规模问题迭代耗时。这里就回到我们函数的定义上来理解。在定义FF,GG,HH函数的时候,里面出现了A矩阵,当规模很大的时候,矩阵A的规模也大,构造函数用时很长,我们需要想办法避免这种矩阵参与运算,尽量使用向量与向量之间的运算来定义函数。