插值与拟合较为相似,同样是给出了数据点,要求求出一个函数,但是插值要求插值数据必须100%正确,即求出来的函数必须都过这些点,而拟合则不一定,因为拟合的数据点本身就不一定正确,比如拿尺子测量某物体的形变趋势,在测量的过程中,本身就存在测量误差,拟合函数强行经过这些点毫无意义,并且这个测量过程中会产生大量的测量数据,使用插值的方法也不适合。因此我们可以得出使用插值的条件:
使用拟合的条件:
拟合中最常见的方法就是最小二乘法,其基本思想是:
假如采用直线拟合,则设拟合直线为: y ^ ( x ) = a x + b \hat{y}(x)=ax+b y^(x)=ax+b,
则其相对于数据点的误差为 s = [ y 0 − ( a x 0 + b ) ] 2 + [ y 1 − ( a x 1 + b ) ] 2 + ⋯ [ y n − ( a x n + b ) ] 2 s=[y_0-(ax_0+b)]^2+[y_1-(ax_1+b)]^2+\cdots [y_n-(ax_n+b)]^2 s=[y0−(ax0+b)]2+[y1−(ax1+b)]2+⋯[yn−(axn+b)]2,对其求偏导可以得到
∂ s ∂ a = 2 [ x 0 [ y 0 − ( a x 0 + b ) ] + x 1 [ y 1 − ( a x 1 + b ) ] + ⋯ x n [ y n − ( a x n + b ) ] ] = 2 ( ∑ x i y i − a ∑ x i 2 − b ∑ x i ) ∂ s ∂ b = 2 ( ∑ y i − a ∑ x i − b ∗ n ) \begin{aligned} \frac{\partial s}{\partial a}&=2[x_0[y_0-(ax_0+b)]+x_1[y_1-(ax_1+b)]+\cdots x_n[y_n-(ax_n+b)]]\\ &=2(\sum x_iy_i-a\sum x_i^2-b\sum x_i)\\ \frac{\partial s}{\partial b}&=2(\sum y_i-a\sum x_i-b*n) \end{aligned} ∂a∂s∂b∂s=2[x0[y0−(ax0+b)]+x1[y1−(ax1+b)]+⋯xn[yn−(axn+b)]]=2(∑xiyi−a∑xi2−b∑xi)=2(∑yi−a∑xi−b∗n)
显然当s最小时,两个偏导都为0,所以整理可以得到
a = ∑ x i y i − ∑ x i ∑ y i n ∑ x i 2 − ( ∑ x i ) 2 n = ∑ x i y i − n ∗ x ‾ y ‾ ∑ x i 2 − n ∗ x ‾ 2 \begin{aligned} a&=\frac{\sum x_iy_i-\frac{\sum x_i \sum y_i}{n}}{\sum x_i^2-\frac{(\sum x_i)^2}{n}}\\ &=\frac{\sum x_iy_i-n*\overline{x}\overline{y}}{\sum x_i^2-n*\overline{x}^2}\\ \end{aligned} a=∑xi2−n(∑xi)2∑xiyi−n∑xi∑yi=∑xi2−n∗x2∑xiyi−n∗xy
将此式子代入 ∂ s ∂ b = 0 \frac{\partial s}{\partial b}=0 ∂b∂s=0可以得到
b = y ‾ − a ∗ x ‾ b=\overline{y}-a*\overline{x} b=y−a∗x
当然除了可以使用直线去拟合,也可以使用更高次的函数去拟合,比如抛物线拟合:
s 2 = [ y 0 − ( a x 0 2 + b x 0 + c ) ] 2 + [ y 1 − ( a x 1 2 + b x 1 + c ) ] 2 + ⋯ [ y n − ( a x n 2 + b x n + c ) ] 2 s_2=[y_0-(ax_0^2+bx_0+c)]^2+[y_1-(ax_1^2+bx_1+c)]^2+\cdots [y_n-(ax_n^2+bx_n+c)]^2 s2=[y0−(ax02+bx0+c)]2+[y1−(ax12+bx1+c)]2+⋯[yn−(axn2+bxn+c)]2
一样地求偏导
∂ s 2 ∂ a = 2 [ ∑ x i 2 y i − a ∑ x i 4 − b ∑ x i 3 − c ∑ x i 2 ] ∂ s 2 ∂ b = 2 [ ∑ x i y i − a ∑ x i 3 − b ∑ x i 2 − c ∑ x i ] ∂ s 2 ∂ c = 2 [ ∑ y i − a ∑ x i 2 − b ∑ x i − c ∗ n ] \begin{aligned} \frac{\partial s_2}{\partial a}&=2[\sum{x_i^2y_i}-a\sum x_i^4-b\sum x_i^3-c\sum x_i^2]\\ \frac{\partial s_2}{\partial b}&=2[\sum x_iy_i-a\sum x_i^3-b\sum x_i^2-c\sum x_i]\\ \frac{\partial s_2}{\partial c}&=2[\sum y_i-a\sum x_i^2-b\sum x_i-c*n]\\ \end{aligned} ∂a∂s2∂b∂s2∂c∂s2=2[∑xi2yi−a∑xi4−b∑xi3−c∑xi2]=2[∑xiyi−a∑xi3−b∑xi2−c∑xi]=2[∑yi−a∑xi2−b∑xi−c∗n]
令偏导都为0,化简可以得到
a = ( ∑ x i 2 y i − ∑ x i 2 ∑ y i n + ( ∑ x i y i − ∑ x i ∑ y i n ) ( ∑ x i 2 ∑ x i n − ∑ x i 3 ) / ( ∑ x i 2 − ∑ x i ∑ x i / n ) ) ( ∑ x i 4 − ∑ x i 2 ∑ x i 2 / n + ( ∑ x i 2 ∑ x i / n − ∑ x i 3 ) ( ∑ x i 3 − ∑ x i 2 ∑ x i / n ) / ( ∑ x i 2 − ∑ x i ∑ x i / n ) ) b = ∑ x i y i − a ∗ ∑ x i 3 − ∑ x i ∑ y i / n + a ∗ ∑ x i 2 ∑ x i / n ∑ x i 2 − ∑ x i ∑ x i / n c = y ‾ − b ∑ x i − a ∑ x i 2 n \begin{aligned} a&=\frac{(\sum x_i^2y_i-\frac{\sum x_i^2\sum y_i}{n}+(\sum x_iy_i-\frac{\sum x_i\sum y_i}{n})(\frac{\sum x_i^2\sum x_i}{n}-\sum x_i^3)/(\sum x_i^2-\sum x_i\sum x_i/n))}{(\sum x_i^4-\sum x_i^2\sum x_i^2/n+(\sum x_i^2\sum x_i/n-\sum x_i^3)(\sum x_i^3-\sum x_i^2\sum x_i/n)/(\sum x_i^2-\sum x_i\sum x_i/n))}\\ b&=\frac{\sum x_iy_i-a*\sum x_i^3-\sum x_i\sum y_i/n+a*\sum x_i^2\sum x_i/n}{\sum x_i^2-\sum x_i\sum x_i/n}\\ c&=\frac{\overline{y}-b\sum x_i-a\sum x_i^2}{n} \end{aligned} abc=(∑xi4−∑xi2∑xi2/n+(∑xi2∑xi/n−∑xi3)(∑xi3−∑xi2∑xi/n)/(∑xi2−∑xi∑xi/n))(∑xi2yi−n∑xi2∑yi+(∑xiyi−n∑xi∑yi)(n∑xi2∑xi−∑xi3)/(∑xi2−∑xi∑xi/n))=∑xi2−∑xi∑xi/n∑xiyi−a∗∑xi3−∑xi∑yi/n+a∗∑xi2∑xi/n=ny−b∑xi−a∑xi2
#include
using namespace std;
#define N 5//散点数量
//直线拟合,y=kx+b
void Least_Squares_1(float x[], float y[], float n, float &k, float &b)
{
//分别为x、y、xy和x的平方的和
float sum_x = 0, sum_y = 0, sum_xy = 0, sum_x2 = 0;
for (int i = 0; i < n; i++)
{
sum_x += x[i];//x的和
sum_y += y[i];//y的和
sum_xy += x[i] * y[i];//xy的和
sum_x2 += x[i] * x[i];//x平方和
}
k=(sum_xy-sum_x*sum_y/n)/(sum_x2-sum_x*sum_x/n);
b=(sum_y-k*sum_x)/n;
cout<<"拟合出来的函数为:"<<"y="<<k<<"x"<<(b>=0?"+":"")<<b<<endl;
}
//二次曲线拟合,y=ax^2+bx+c
void Least_Squares_2(float x[], float y[], float n, float &a, float &b,float &c)
{
float sum_x = 0, sum_y = 0, sum_xy = 0, sum_x2 = 0, sum_x3 = 0,sum_x4=0,\
sum_x2y=0;//分别为x、y、xy和x的平方的和
for (int i = 0; i < n; i++)
{
sum_x += x[i];//x的和
sum_y += y[i];//y的和
sum_xy += x[i] * y[i];//xy的和
sum_x2 += x[i] * x[i];//x的平方和
sum_x3 += x[i] * x[i] * x[i];//x的立方和
sum_x4 += x[i] * x[i] * x[i] * x[i];//x的四次方和
sum_x2y += x[i] * x[i] * y[i];//(x^2)(y)的和
}
a = (sum_x2y - sum_x2 * sum_y/n + (sum_xy - sum_x * sum_y/n) *
(sum_x2 * sum_x/n - sum_x3) / (sum_x2 - sum_x * sum_x/n ))/
(sum_x4-sum_x2*sum_x2/n+(sum_x2*sum_x/n-sum_x3)*
(sum_x3-sum_x2*sum_x/n)/(sum_x2-sum_x*sum_x/n));
b = (sum_xy - a * sum_x3 - sum_x*sum_y/n+a*sum_x2*sum_x/n) /
(sum_x2 - sum_x * sum_x/n);
c=(sum_y-b*sum_x-a*sum_x2)/n;
cout<<"拟合出来的函数为:"<<"y="<<a<<"x^2"<<
(b>=0?"+":"")<<b<<"x"<<(c>=0?"+":"")<<c<<endl;
}
int main()
{
float k, b;//拟合出来的系数,y=kx+b
float a, c;
float x1[N] = {1, 1.25, 1.5,1.75,2};//课件例题,使用Least_Squares_1进行拟合
float y1[N] = {5.1,5.79,6.53,7.45,8.46};
Least_Squares_1(x1, y1, N, k, b);
float x2[N] = {1, 2, 3,4,5};//取函数y=0.3x^2+0.7x-7的点验证二次拟合
float y2[N] = {-6, -4.4, -2.2,0.6,4};
Least_Squares_2(x2, y2, N, a, b, c);
return 0;
}
可以发现两者几乎一样。