求解非线性模型 f ( x , β ) = β 0 + β 1 e ( − β 2 x 2 ) f(x, \beta)=\beta_0+\beta_1 e^{\left(-\beta_2 x^2\right)} f(x,β)=β0+β1e(−β2x2),有一组观测值 ( x i , y i ) \left(x_i, y_i\right) (xi,yi)
利用scipy.optimize.leastsq函数求解最佳最小二乘拟合。
import numpy as np
from scipy.optimize import leastsq
import matplotlib.pyplot as plt
# Exact function
beta = (0.25, 0.75, 0.5)
def f(x, b0, b1, b2):
return b0 + b1 * np.exp(-b2 * x ** 2)
def g(beta):
res = (ydata - f(xdata, *beta))
return res
# return res.shape = {tuple:1}, res ={ndarray:{(50,)}}
# noisy observation
xdata = np.linspace(0, 5, 50)
y = f(xdata, *beta)
ydata = y + 0.05 * np.random.randn(len(xdata))
beta_start = (1, 1, 1)
# beta_opt为优化的参数
beta_opt, beta_cov = leastsq(g, beta_start)
# Exact
xdata = np.linspace(0, 5, 50)
y = f(xdata, *beta)
plt.plot(xdata, y, 'r',label='Exact')
plt.scatter(xdata, ydata,s =None, edgecolors='k', label='observations')
# Predictions
xdata = np.linspace(0, 5, 50)
y_pred = f(xdata, *beta_opt)
plt.plot(xdata, y_pred, 'b',label='Prediction')
plt.legend()
plt.show()
与上一个问题不同,该方法利用求解目标函数的梯度信息构建Jacobi矩阵进行求解 θ 1 , θ 2 \theta_1, \theta_2 θ1,θ2 and ϕ \phi ϕ。
利用optimize.least_squares()求解。
all_phase × A × B × C = [ α β ] \times A \times B \times C=\left[\begin{array}{l}\alpha \\ \beta\end{array}\right] ×A×B×C=[αβ],求解 θ 1 , θ 2 \theta_1, \theta_2 θ1,θ2 , ϕ \phi ϕ
其中,
A = 1 2 [ 1 + i cos 2 θ 1 i sin 2 θ 1 i sin 2 θ 1 1 − i cos 2 θ 1 ] A=\frac{1}{\sqrt{2}}\left[\begin{array}{cc}1+i \cos 2 \theta_1 & i \sin 2 \theta_1 \\ i \sin 2 \theta_1 & 1-i \cos 2 \theta_1\end{array}\right] A=21[1+icos2θ1isin2θ1isin2θ11−icos2θ1]
B = e i π 2 [ cos 2 θ 2 sin 2 θ 2 sin 2 θ 2 − cos 2 θ 2 ] B=e^{i \frac{\pi}{2}}\left[\begin{array}{cc}\cos 2 \theta_2 & \sin 2 \theta_2 \\ \sin 2 \theta_2 & -\cos 2 \theta_2\end{array}\right] B=ei2π[cos2θ2sin2θ2sin2θ2−cos2θ2]
C = [ 1 0 ] C=\left[\begin{array}{l}1 \\ 0\end{array}\right] C=[10]
all_phase = e i ϕ =e^{i \phi} =eiϕ
[ α β ] = [ cos ψ 1 e i ψ 2 sin ψ 1 ] \left[\begin{array}{l}\alpha \\ \beta\end{array}\right]=\left[\begin{array}{c}\cos \psi_1 \\ e^{i \psi_2} \sin \psi_1\end{array}\right] [αβ]=[cosψ1eiψ2sinψ1]
以及, θ 1 , θ 2 \theta_1, \theta_2 θ1,θ2 , ϕ \phi ϕ为未知参数, θ 1 ∈ [ 0 , π ) , θ 2 ∈ [ 0 , π ) , ϕ ∈ [ 0 , π ) \theta_1 \in[0, \pi), \theta_2 \in[0, \pi), \phi \in[0, \pi) θ1∈[0,π),θ2∈[0,π),ϕ∈[0,π),已知参数 ψ 1 \psi_1 ψ1 and ψ 2 , ψ 1 , ψ 2 ∈ R \psi_2, \psi_1, \psi_2 \in \mathcal{R} ψ2,ψ1,ψ2∈R。
步骤:
详细步骤:
# unknown parameters; to be solved
theta1 = sp.Symbol('theta1', real=True)
theta2 = sp.Symbol('theta2', real=True)
phi = sp.Symbol('phi', real=True)
# known hyperparameters; to be set
psi1 = sp.Symbol('psi1', real=True)
psi2 = sp.Symbol('psi2', real=True)
alpha = sp.cos(psi1) # real number
beta = sp.sin(psi1) * sp.exp(psi2 * 1j) # complex number
# construct the matrices
x = theta1 * 2
y = theta2 * 2
# sumpy的 I 等于 python自带的 1j
A = sp.Matrix([[1 + 1j * sp.cos(x), 1j * sp.sin(x)], [1j * sp.sin(x), 1 - 1j * sp.cos(x)]]) * np.sqrt(1/2)
B = sp.Matrix([[sp.cos(y), sp.sin(y)],[sp.sin(y), - sp.cos(y)]]) * 1j
C = sp.Matrix([[1], [0]])
all_phase = sp.exp( phi*1j )
# 优化目标函数
D = A * B * C * all_phase
D1 = sp.simplify(D)
J1 = sp.simplify(sp.re(D1[0]) - sp.re(alpha))
J2 = sp.simplify(sp.im(D1[0]) - sp.im(alpha))
J3 = sp.simplify(sp.re(D1[1]) - sp.re(beta))
J4 = sp.simplify(sp.im(D1[1]) - sp.im(beta)
D 1 = [ 0.707106781186548 ( i cos ( 2 θ 2 ) − cos ( 2 θ 1 − 2 θ 2 ) ) e 1.0 i ϕ 0.707106781186548 ( i sin ( 2 θ 2 ) − sin ( 2 θ 1 − 2 θ 2 ) ) e 1.0 i ϕ ] D1 = \left[\begin{array}{c}0.707106781186548\left(i \cos \left(2 \theta_2\right)-\cos \left(2 \theta_1-2 \theta_2\right)\right) e^{1.0 i \phi} \\ 0.707106781186548\left(i \sin \left(2 \theta_2\right)-\sin \left(2 \theta_1-2 \theta_2\right)\right) e^{1.0 i \phi}\end{array}\right] D1=[0.707106781186548(icos(2θ2)−cos(2θ1−2θ2))e1.0iϕ0.707106781186548(isin(2θ2)−sin(2θ1−2θ2))e1.0iϕ]
α , β = ( cos ( ψ 1 ) , e 1.0 i ψ 2 sin ( ψ 1 ) ) \alpha,\beta=\left(\cos \left(\psi_1\right), \quad e^{1.0 i \psi_2} \sin \left(\psi_1\right)\right) α,β=(cos(ψ1),e1.0iψ2sin(ψ1))
四个方程 J 1 J_{1} J1, J 2 J_{2} J2, J 3 J_{3} J3, J 4 J_{4} J4分别为
J 1 = − 0.707106781186548 sin ( 1.0 ϕ ) cos ( 2 θ 2 ) − 0.707106781186548 cos ( 1.0 ϕ ) cos ( 2 ( θ 1 − θ 2 ) ) − cos ( ψ 1 ) J_{1}=-0.707106781186548 \sin (1.0 \phi) \cos \left(2 \theta_2\right)-0.707106781186548 \cos (1.0 \phi) \cos \left(2\left(\theta_1-\theta_2\right)\right)-\cos \left(\psi_1\right) J1=−0.707106781186548sin(1.0ϕ)cos(2θ2)−0.707106781186548cos(1.0ϕ)cos(2(θ1−θ2))−cos(ψ1)
J 2 = − 0.707106781186548 sin ( 1.0 ϕ ) cos ( 2 ( θ 1 − θ 2 ) ) + 0.707106781186548 cos ( 1.0 ϕ ) cos ( 2 θ 2 ) J_{2}=-0.707106781186548 \sin (1.0 \phi) \cos \left(2\left(\theta_1-\theta_2\right)\right)+0.707106781186548 \cos (1.0 \phi) \cos \left(2 \theta_2\right) J2=−0.707106781186548sin(1.0ϕ)cos(2(θ1−θ2))+0.707106781186548cos(1.0ϕ)cos(2θ2)
J 3 = − 0.707106781186548 sin ( 1.0 ϕ ) sin ( 2 θ 2 ) − sin ( ψ 1 ) cos ( 1.0 ψ 2 ) − 0.707106781186548 sin ( 2 ( θ 1 − θ 2 ) ) cos ( 1.0 ϕ ) J3=-0.707106781186548 \sin (1.0 \phi) \sin \left(2 \theta_2\right)-\sin \left(\psi_1\right) \cos \left(1.0 \psi_2\right)-0.707106781186548 \sin \left(2\left(\theta_1-\theta_2\right)\right) \cos (1.0 \phi) J3=−0.707106781186548sin(1.0ϕ)sin(2θ2)−sin(ψ1)cos(1.0ψ2)−0.707106781186548sin(2(θ1−θ2))cos(1.0ϕ)
J 4 = − 0.707106781186548 sin ( 1.0 ϕ ) sin ( 2 θ 1 − 2 θ 2 ) − sin ( ψ 1 ) sin ( 1.0 ψ 2 ) + 0.707106781186548 sin ( 2 θ 2 ) cos ( 1.0 ϕ ) J4=-0.707106781186548 \sin (1.0 \phi) \sin \left(2 \theta_1-2 \theta_2\right)-\sin \left(\psi_1\right) \sin \left(1.0 \psi_2\right)+0.707106781186548 \sin \left(2 \theta_2\right) \cos (1.0 \phi) J4=−0.707106781186548sin(1.0ϕ)sin(2θ1−2θ2)−sin(ψ1)sin(1.0ψ2)+0.707106781186548sin(2θ2)cos(1.0ϕ)
dJ1_theta1 = sp.simplify(sp.diff(J1, theta1))
dJ1_theta2 = sp.simplify(sp.diff(J1, theta2))
dJ1_phi = sp.simplify(sp.diff(J1, phi))
dJ3_theta1 = sp.simplify(sp.diff(J3, theta1))
dJ3_theta2 = sp.simplify(sp.diff(J3, theta2))
dJ3_phi = sp.simplify(sp.diff(J3, phi))
dJ4_theta1 = sp.simplify(sp.diff(J4, theta1))
dJ4_theta2 = sp.simplify(sp.diff(J4, theta2))
dJ4_phi = sp.simplify(sp.diff(J4, phi))
雅可比行列式的每一行,dJ1_theta1,dJ1_theta2,dJ1_phi, dJ3_theta1,dJ3_theta2,dJ3_phi,dJ4_theta1,dJ4_theta2,dJ4_phi
J = [ ∂ f ∂ x 1 ⋯ ∂ f ∂ x n ] = [ ∂ f 1 ∂ x 1 ⋯ ∂ f 1 ∂ x n ⋮ ⋱ ⋮ ∂ f m ∂ x 1 ⋯ ∂ f m ∂ x n ] \mathbf{J}=\left[\begin{array}{ccc}\frac{\partial \mathbf{f}}{\partial x_1} & \cdots & \frac{\partial \mathbf{f}}{\partial x_n}\end{array}\right]=\left[\begin{array}{ccc}\frac{\partial f_1}{\partial x_1} & \cdots & \frac{\partial f_1}{\partial x_n} \\ \vdots & \ddots & \vdots \\ \frac{\partial f_m}{\partial x_1} & \cdots & \frac{\partial f_m}{\partial x_n}\end{array}\right] J=[∂x1∂f⋯∂xn∂f]=⎣ ⎡∂x1∂f1⋮∂x1∂fm⋯⋱⋯∂xn∂f1⋮∂xn∂fm⎦ ⎤
import sympy as sp
import numpy as np
from scipy import optimize
# x:[theta1, theta2, phi]; psi=pi/4
# f consists of [J1,J3,J4]
# 目标函数
def fun_tf_ls(x, psi1, psi2):
f = [- np.sqrt(1 / 2) * np.sin(x[2]) * np.cos(2 * x[1])
- np.sqrt(1 / 2) * np.cos(x[2]) * np.cos(2 * (x[0] - x[1])) - np.cos(psi1),
- np.sqrt(1 / 2) * np.sin(x[2]) * np.sin(2 * x[1])
- np.sqrt(1 / 2) * np.sin(2 * (x[0] - x[1])) * np.cos(x[2]) - np.sin(psi1) * np.cos(psi2),
- np.sqrt(1 / 2) * np.sin(x[2]) * np.sin(2 * (x[0] - x[1]))
+ np.sqrt(1 / 2) * np.sin(2 * x[1]) * np.cos(x[2]) - np.sin(psi1) * np.sin(psi2)] # 3个方程
return f
# 雅可比行列式
def deri_tf_ls(x, psi1, psi2):
df = np.array([[2 * np.sqrt(1 / 2) * np.sin(2 * (x[0] - x[1])) * np.cos(x[2]),
2 * np.sqrt(1 / 2) * (np.sin(x[2]) * np.sin(2 * x[1]) - np.sin(2 * (x[0] - x[1])) * np.cos(x[2])),
np.sqrt(1 / 2) * (np.sin(x[2]) * np.cos(2 * (x[0] - x[1])) - np.cos(x[2]) * np.cos(2 * x[1]))],
[-2 * np.sqrt(1 / 2) * np.cos(x[2]) * np.cos(2 * (x[0] - x[1])),
-2 * np.sqrt(1 / 2) * (np.sin(x[2]) * np.cos(2 * x[1]) - np.cos(x[2]) * np.cos(2 * (x[0] - x[1]))),
np.sqrt(1 / 2) * (np.sin(x[2]) * np.sin(2 * (x[0] - x[1])) - np.sin(2 * x[1]) * np.cos(x[2]))],
[-2 * np.sqrt(1 / 2) * np.sin(x[2]) * np.cos(2 * (x[0] - x[1])),
2 * np.sqrt(1 / 2) * (np.sin(x[2]) * np.cos(2 * (x[0] - x[1])) + np.cos(x[2]) * np.cos(2 * x[1])),
-np.sqrt(1 / 2) * (np.sin(x[2]) * np.sin(2 * x[1]) + np.cos(x[2]) * np.sin(2 * (x[0] - x[1])))]
]) # 3 X 3
return df
psi1_0 = np.pi / 4
psi2_0 = np.pi / 2
x0 = np.array([1, 1, 1])
# 限定[theta1, theta2, phi]的定义域为[0,pi]
# bounds=(lower_bound, upper_bound);
# lower_bound和upper_bound可以为具体数值,也可以为np.inf(正无穷或-np.inf(负无穷)
# 给每个自变量单独指定定义域:bounds=([0,0,0], [np.pi, np.pi, np.pi])
# 为所有自变量指定相同的定义域: bounds=(0,np.pi)
sol_tf = optimize.least_squares(fun_tf_ls, x0, args=(psi1_0,psi2_0), jac=deri_tf_ls, bounds=(0, np.pi))
print(sol_tf)
# sol_tf.x为优化结果,sol_tf.cost为优化目标损失值,sol_tf.fun为目标函数值
结果为
此外,特别注意的是,需先将Jacobi形式计算出。若deri_tf_ls(x, psi1, psi2)函数中是关于x的变量求导,如下
def deri_tf_ls(c):
f_sym = target
J = [J_ for J_ in f_sym]
J_obj = J
J_dc = np.array([[sp.diff(J_, c_) for J_ in J] for c_ in c]).T # 雅克比矩阵
其中,diff是对自变量c求导,但是调用least_squares函数输入的c为具体的值。就会报错
参考
https://blog.csdn.net/sinat_21591675/article/details/85936621
https://zhuanlan.zhihu.com/p/101645294