BFGS优化算法的C++实现

头文件:

/*
 * Copyright (c) 2008-2011 Zhang Ming (M. Zhang), [email protected]
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation, either version 2 or any later version.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
 * more details. A copy of the GNU General Public License is available at:
 * http://www.fsf.org/licensing/licenses
 */


/*****************************************************************************
 *                                 bfgs.h
 *
 * BFGS quasi-Newton method.
 *
 * This class is designed for finding the minimum value of objective function
 * in one dimension or multidimension. Inexact line search algorithm is used
 * to get the step size in each iteration. BFGS (Broyden-Fletcher-Goldfarb
 * -Shanno) modifier formula is used to compute the inverse of Hesse matrix.
 *
 * Zhang Ming, 2010-03, Xi'an Jiaotong University.
 *****************************************************************************/


#ifndef BFGS_H
#define BFGS_H


#include <matrix.h>
#include <linesearch.h>


namespace splab
{

    template <typename Dtype, typename Ftype>
    class BFGS : public LineSearch<Dtype, Ftype>
    {

    public:

        BFGS();
        ~BFGS();

        void optimize( Ftype &func, Vector<Dtype> &x0, Dtype tol=Dtype(1.0e-6),
                       int maxItr=100 );

        Vector<Dtype> getOptValue() const;
        Vector<Dtype> getGradNorm() const;
        Dtype getFuncMin() const;
        int getItrNum() const;

    private:

        // iteration number
        int itrNum;

        // minimum value of objective function
        Dtype fMin;

        // optimal solution
        Vector<Dtype> xOpt;

        // gradient norm for each iteration
        Vector<Dtype> gradNorm;

    };
    // class BFGS


    #include <bfgs-impl.h>

}
// namespace splab


#endif
// BFGS_H

实现文件:

/*
 * Copyright (c) 2008-2011 Zhang Ming (M. Zhang), [email protected]
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation, either version 2 or any later version.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
 * more details. A copy of the GNU General Public License is available at:
 * http://www.fsf.org/licensing/licenses
 */


/*****************************************************************************
 *                               bfgs-impl.h
 *
 * Implementation for BFGS class.
 *
 * Zhang Ming, 2010-03, Xi'an Jiaotong University.
 *****************************************************************************/


/**
 * constructors and destructor
 */
template <typename Dtype, typename Ftype>
BFGS<Dtype, Ftype>::BFGS() : LineSearch<Dtype, Ftype>()
{
}

template <typename Dtype, typename Ftype>
BFGS<Dtype, Ftype>::~BFGS()
{
}


/**
 * Finding the optimal solution. The default tolerance error and maximum
 * iteratin number are "tol=1.0e-6" and "maxItr=100", respectively.
 */
template <typename Dtype, typename Ftype>
void BFGS<Dtype, Ftype>::optimize( Ftype &func, Vector<Dtype> &x0,
                                   Dtype tol, int maxItr )
{
    // initialize parameters.
    int k = 0,
        cnt = 0,
        N = x0.dim();

    Dtype ys,
          yHy,
          alpha;
    Vector<Dtype> d(N),
                  s(N),
                  y(N),
                  v(N),
                  Hy(N),
                  gPrev(N);
    Matrix<Dtype> H = eye( N, Dtype(1.0) );

    Vector<Dtype> x(x0);
    Dtype fx = func(x);
    this->funcNum++;
    Vector<Dtype> gnorm(maxItr);
    Vector<Dtype> g = func.grad(x);
    gnorm[k++]= norm(g);

    while( ( gnorm(k) > tol ) && ( k < maxItr ) )
    {
        // descent direction
        d = - H * g;

        // one dimension searching
        alpha = this->getStep( func, x, d );

        // check flag for restart
        if( !this->success )
            if( norm(H-eye(N,Dtype(1.0))) < EPS )
                break;
            else
            {
                H = eye( N, Dtype(1.0) );
                cnt++;
                if( cnt == maxItr )
                    break;
            }
        else
        {
            // update
            s = alpha * d;
            x += s;
            fx = func(x);
            this->funcNum++;
            gPrev = g;
            g = func.grad(x);
            y = g - gPrev;

            Hy = H * y;
            ys = dotProd( y, s );
            yHy = dotProd( y, Hy );
            if( (ys < EPS) || (yHy < EPS) )
                H = eye( N, Dtype(1.0) );
            else
            {
                v = sqrt(yHy) * ( s/ys - Hy/yHy );
                H = H + multTr(s,s)/ys - multTr(Hy,Hy)/yHy + multTr(v,v);
            }
            gnorm[k++] = norm(g);
        }
    }

    xOpt = x;
    fMin = fx;
    gradNorm.resize(k);
    for( int i=0; i<k; ++i )
        gradNorm[i] = gnorm[i];

    if( gradNorm[k-1] > tol )
        this->success = false;
}


/**
 * Get the optimum point.
 */
template <typename Dtype, typename Ftype>
inline Vector<Dtype> BFGS<Dtype, Ftype>::getOptValue() const
{
    return xOpt;
}


/**
 * Get the norm of gradient in each iteration.
 */
template <typename Dtype, typename Ftype>
inline Vector<Dtype> BFGS<Dtype, Ftype>::getGradNorm() const
{
    return gradNorm;
}


/**
 * Get the minimum value of objective function.
 */
template <typename Dtype, typename Ftype>
inline Dtype BFGS<Dtype, Ftype>::getFuncMin() const
{
    return fMin;
}


/**
 * Get the iteration number.
 */
template <typename Dtype, typename Ftype>
inline int BFGS<Dtype, Ftype>::getItrNum() const
{
    return gradNorm.dim()-1;
}

运行结果:

The iterative number is:   7

The number of function calculation is:   16

The optimal value of x is:   size: 2 by 1
-0.7071
0.0000

The minimum value of f(x) is:   -0.4289

The gradient's norm at x is:   0.0000


Process returned 0 (0x0)   execution time : 0.078 s
Press any key to continue.

你可能感兴趣的:(BFGS优化算法的C++实现)