AMG工作阶段性总结1

这一段时间,研究了一下的AMG ON GPUS,从一开始的比较乱,应为amgcl用的开源的库还是挺多的,最近把这些知识稍微理了一下,以备下一阶段的学习吧。

首先所有的开始是从http://blog.csdn.net/caiye917015406/article/details/8955928,开始的。

一、用到的相关开源库

   1. Boost库:这个库是amgcl必须的(唯一必须的)。对于boost库,使用时需要自己编译,这里是相关教程:http://blog.csdn.net/caiye917015406/article/details/8957131

   2.OpenCL,如果你需要用GPU运算时,这个是必须的(因为我电脑装的是A卡,所以只能用opencl,当然N卡的话cuda)。在amd官网上下就行了,网上有很多教程。

   3.vexcl:一个方便opencl开发人员使用的库,封装了cl的一些初始化和向量操作。详细的教程:http://blog.csdn.net/caiye917015406/article/details/8976689(这个翻译了一部分)

   4.eigen:这是一个c++的矩阵开源库,包换了许多矩阵操作和数值算法。见:http://blog.csdn.net/caiye917015406/article/category/1417227

   5.viennacl:这是一个支持opencl、openmp和uda的数值开源库,包含了许多实现的数值算法和矩阵操作。这个还没有怎么看,这里有http://viennacl.sourceforge.net/

   当然在上面只有boost库是必须的,其它根据需要来添加。我把所有的都配置好了,以备以后学习用。

二、生产数据

  为了进行测试,我们还需要相关数据,在amgcl里提供了生产测试数据的程序。贴出来吧,我把2D和3D的整理到一块了。

#include <vector>
#include <iostream>
#include <fstream>
#include <cstdlib>

/*
 * Generates problem file for poisson equation in a unit square.
 */
using namespace std;

int gen2D(int N,string filename) {
   // int    n   = argc > 1 ? atoi(argv[1]) : 1024;
	int n = N;
    int    n2  = n * n;
    double h   = 1.0 / (n - 1);
    double h2i = (n - 1) * (n - 1);

    vector<int>    row;
    vector<int>    col;
    vector<double> val;
    vector<double> rhs;

    row.reserve(n2 + 1);
    col.reserve(5 * n2);
    val.reserve(5 * n2);
    rhs.reserve(n2);

    row.push_back(0);

    for (int i = 0, idx = 0; i < n; ++i) {
        double x = i * h;
        for(int j = 0; j < n; ++j, ++idx) {
            double y = j * h;
            if (
                    i == 0 || i == n - 1 ||
                    j == 0 || j == n - 1
               )
            {
                col.push_back(idx);
                val.push_back(1);

                rhs.push_back(0);
            } else {
                col.push_back(idx - n);
                val.push_back(-h2i);

                col.push_back(idx - 1);
                val.push_back(-h2i);

                col.push_back(idx);
                val.push_back(4 * h2i);

                col.push_back(idx + 1);
                val.push_back(-h2i);

                col.push_back(idx + n);
                val.push_back(-h2i);

                rhs.push_back( 2 * (x - x * x + y - y * y) );
            }

            row.push_back(col.size());
        }
    }

    ofstream f(filename, ios::binary);

    f.write((char*)&n2, sizeof(n));
    f.write((char*)row.data(), row.size() * sizeof(row[0]));
    f.write((char*)col.data(), col.size() * sizeof(col[0]));
    f.write((char*)val.data(), val.size() * sizeof(val[0]));
    f.write((char*)rhs.data(), rhs.size() * sizeof(rhs[0]));

    cout << "Wrote \"problem.dat\"" << endl;
	return 0;
}


/*
 * Generates problem file for poisson equation in a unit square.
 */

int gen3D(int N,string filename) {
    int    n   = N;//argc > 1 ? atoi(argv[1]) : 64;
    int    n3  = n * n * n;
    double h2i = (n - 1) * (n - 1);

    vector<int>    row;
    vector<int>    col;
    vector<double> val;
    vector<double> rhs;

    row.reserve(n3 + 1);
    col.reserve(7 * n3);
    val.reserve(7 * n3);
    rhs.reserve(n3);

    row.push_back(0);

    for(int k = 0, idx = 0; k < n; ++k) {
        for(int j = 0; j < n; ++j) {
            for (int i = 0; i < n; ++i, ++idx) {
                if (
                        i == 0 || i == n - 1 ||
                        j == 0 || j == n - 1 ||
                        k == 0 || k == n - 1
                   )
                {
                    col.push_back(idx);
                    val.push_back(1);

                    rhs.push_back(0);
                } else {
                    col.push_back(idx - n * n);
                    val.push_back(-h2i);

                    col.push_back(idx - n);
                    val.push_back(-h2i);

                    col.push_back(idx - 1);
                    val.push_back(-h2i);

                    col.push_back(idx);
                    val.push_back(6 * h2i);

                    col.push_back(idx + 1);
                    val.push_back(-h2i);

                    col.push_back(idx + n);
                    val.push_back(-h2i);

                    col.push_back(idx + n * n);
                    val.push_back(-h2i);

                    rhs.push_back(1);
                }

                row.push_back(col.size());
            }
        }
    }

    ofstream f(filename, ios::binary);

    f.write((char*)&n3, sizeof(n));
    f.write((char*)row.data(), row.size() * sizeof(row[0]));
    f.write((char*)col.data(), col.size() * sizeof(col[0]));
    f.write((char*)val.data(), val.size() * sizeof(val[0]));
    f.write((char*)rhs.data(), rhs.size() * sizeof(rhs[0]));

    cout << "Wrote \"problem.dat\"" << endl;
	return 0;
}

int main()
{
	int n=1024;
	string filename ="E:\\D\\项目\\GPU\\数据\\problem.dat";
	gen2D(n,filename) ;
	return 0;
}


三、读取文件

     在这里生成的是CPS格式的系数矩阵(http://blog.csdn.net/caiye917015406/article/details/8983590)。所以读取的也是相应的矩阵。

#ifndef READ_H
#define READ_H

#include <iostream>
#include <fstream>
#include <vector>
#include <stdexcept>

template <class RHS>
inline int read_problem(const std::string &fname,
        std::vector<int>    &row,
        std::vector<int>    &col,
        std::vector<double> &val,
        RHS &rhs
        )
{
    std::cout << "Reading \"" << fname << "\"..." << std::endl;
    std::ifstream f(fname.c_str(), std::ios::binary);
    if (!f) throw std::invalid_argument("Failed to open problem file");

    int n;

    f.read((char*)&n, sizeof(int));

    row.resize(n + 1);
    f.read((char*)row.data(), row.size() * sizeof(int));

    col.resize(row.back());
    val.resize(row.back());
    rhs.resize(n);

    f.read((char*)&col[0], col.size() * sizeof(int));
    f.read((char*)&val[0], val.size() * sizeof(double));
    f.read((char*)&rhs[0], rhs.size() * sizeof(double));

    std::cout << "Done\n" << std::endl;

    return n;
}

template <class RHS, typename value_type>
inline int read_problem(const std::string &fname,
        std::vector<int>        &row,
        std::vector<int>        &col,
        std::vector<value_type> &val,
        RHS &rhs
        )
{
    std::cout << "Reading \"" << fname << "\"..." << std::endl;
    std::ifstream f(fname.c_str(), std::ios::binary);
    if (!f) throw std::invalid_argument("Failed to open problem file");

    int n;

    f.read((char*)&n, sizeof(int));

    row.resize(n + 1);
    f.read((char*)row.data(), row.size() * sizeof(int));

    col.resize(row.back());
    val.resize(row.back());
    rhs.resize(n);

    f.read((char*)col.data(), col.size() * sizeof(int));

    for(size_t i = 0, nnz = row.back(); i < nnz; ++i) {
        double v;
        f.read((char*)&v, sizeof(double));
        val[i] = v;
    }
    for(size_t i = 0; i < n; ++i) {
        double v;
        f.read((char*)&v, sizeof(double));
        rhs[i] = v;
    }

    std::cout << "Done\n" << std::endl;

    return n;
}

#endif


 

四 测试实例

    这里测试实例的时候,在utext的时候遇到编译不通过的情况,主要是命名冲突的问题,有可能作者只是在lniux上跑的原因吧,这要稍微修改一下就可以。

#ifndef AMGCL_LEVEL_VIENNACL_HPP
#define AMGCL_LEVEL_VIENNACL_HPP

/*
The MIT License

Copyright (c) 2012-2013 Denis Demidov <[email protected]>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

/**
 * \file   level_vexcl.hpp
 * \author Denis Demidov <[email protected]>
 * \brief  Level of an AMG hierarchy for use with VexCL vectors.
 */


#include <boost/smart_ptr/scoped_ptr.hpp>

#include <viennacl/vector.hpp>
#include <viennacl/compressed_matrix.hpp>
#include <viennacl/ell_matrix.hpp>
#include <viennacl/hyb_matrix.hpp>
#include <viennacl/linalg/inner_prod.hpp>
#include <viennacl/linalg/prod.hpp>

#include <amgcl/common.hpp>
#include <amgcl/level_params.hpp>
#include <amgcl/spmat.hpp>
#include <amgcl/spai.hpp>
#include <amgcl/operations_viennacl.hpp>
#include <amgcl/gmres.hpp>



namespace amgcl {
namespace level {

	
template <gpu_matrix_format Format, typename value_type>
struct matrix_format;

template <typename value_type>
struct matrix_format<GPU_MATRIX_CRS, value_type> {
    typedef viennacl::compressed_matrix<value_type> type;
	
};

template <typename value_type>
struct matrix_format<GPU_MATRIX_ELL, value_type> {
    typedef viennacl::ell_matrix<value_type> type;
};

template <typename value_type>
struct matrix_format<GPU_MATRIX_HYB, value_type> {
    typedef viennacl::hyb_matrix<value_type> type;
};

struct viennacl_damped_jacobi {
    struct params {
        float damping;
        params(float w = 0.72) : damping(w) {}
    };

    template <typename value_t, typename index_t>
    struct instance {
        instance() {}

        template <class spmat>
        instance(const spmat &A) : dia(sparse::matrix_rows(A)) {
            ::viennacl::fast_copy(sparse::diagonal(A), dia);
        }

        template <class spmat, class vector>
        void apply(const spmat &A, const vector &rhs, vector &x, vector &tmp, const params &prm) const {
            tmp = ::viennacl::linalg::prod(A, x);
            tmp = rhs - tmp;
            x += prm.damping * (::viennacl::linalg::element_div(tmp, dia));
        }

        ::viennacl::vector<value_t> dia;
    };
};

struct viennacl_spai0 {
    struct params { };

    template <typename value_t, typename index_t>
    struct instance {
        instance() {}

        template <class spmat>
        instance(const spmat &A) : M(sparse::matrix_rows(A)) {
            ::viennacl::fast_copy(spai::level0(A), M);
        }

        template <class spmat, class vector>
        void apply(const spmat &A, const vector &rhs, vector &x, vector &tmp, const params&) const {
            tmp = ::viennacl::linalg::prod(A, x);
            tmp = rhs - tmp;
            x += (vector)::viennacl::linalg::element_prod(M, tmp);
        }

        ::viennacl::vector<value_t> M;
    };
};

template <relax::scheme Relaxation>
struct viennacl_relax_scheme;

AMGCL_REGISTER_RELAX_SCHEME(viennacl, damped_jacobi);
AMGCL_REGISTER_RELAX_SCHEME(viennacl, spai0);

/// ViennaCL-based AMG hierarchy.
/**
 * Level of an AMG hierarchy for use with ViennaCL vectors. ViennaCL provides
 * several backends (OpenCL, CUDA, OpenMP) and is thus able to run on various
 * hardware.
 * \ingroup levels
 *
 * \param Format Matrix storage \ref gpu_matrix_format "format" to use on each
 *               level.
 * \param Relaxation Relaxation \ref relax::scheme "scheme" (smoother) to use
 *               inside V-cycles.
 */
template <
    gpu_matrix_format Format = GPU_MATRIX_HYB,
    relax::scheme Relaxation = relax::spai0
    >
struct Viennacl { //////////自己修改。2013/6/2-V

/// Parameters for CPU-based level storage scheme.
struct params : public amgcl::level::params
{
    typename viennacl_relax_scheme<Relaxation>::type::params relax;
};

template <typename value_t, typename index_t = long long>
class instance {
    public:
        typedef sparse::matrix<value_t, index_t>              cpu_matrix;
        typedef typename matrix_format<Format, value_t>::type matrix;
        typedef ::viennacl::vector<value_t>                   vector;

        // Construct complete multigrid level from system matrix (a),
        // prolongation (p) and restriction (r) operators.
        // The matrices are moved into the local members.
        instance(cpu_matrix &a, cpu_matrix &p, cpu_matrix &r, const params &prm, unsigned nlevel)
            : t(a.rows), relax(a), nnz(sparse::matrix_nonzeros(a))
        {
            ::viennacl::copy(sparse::viennacl_map(a), A);
            ::viennacl::copy(sparse::viennacl_map(p), P);
            ::viennacl::copy(sparse::viennacl_map(r), R);

            if (nlevel) {
                u.resize(a.rows);
                f.resize(a.rows);

                if (prm.kcycle && nlevel % prm.kcycle == 0)
                    gmres.reset(new gmres_data<vector>(prm.kcycle_iterations, a.rows));
            }

            a.clear();
            p.clear();
            r.clear();
        }

        // Construct the coarsest hierarchy level from system matrix (a) and
        // its inverse (ai).
        instance(cpu_matrix &a, cpu_matrix &ai, const params&, unsigned /*nlevel*/)
            : u(a.rows), f(a.rows), t(a.rows),
              nnz(sparse::matrix_nonzeros(a))
        {
            ::viennacl::copy(sparse::viennacl_map(a),  A);
            ::viennacl::copy(sparse::viennacl_map(ai), Ainv);

            a.clear();
            ai.clear();
        }

        // Returns reference to the system matrix
        const matrix& get_matrix() const {
            return A;
        }

        // Compute residual value.
        value_t resid(const vector &rhs, vector &x) const {
            t = ::viennacl::linalg::prod(A, x);
            t = rhs - t;

            return sqrt(::viennacl::linalg::inner_prod(t, t));
        }

        // Perform one V-cycle. Coarser levels are cycled recursively. The
        // coarsest level is solved directly.
        template <class Iterator>
        static void cycle(Iterator plvl, Iterator end, const params &prm,
                const vector &rhs, vector &x)
        {
            Iterator pnxt = plvl; ++pnxt;

            instance *lvl = plvl->get();
            instance *nxt = pnxt->get();

            if (pnxt != end) {
                for(unsigned j = 0; j < prm.ncycle; ++j) {
                    for(unsigned i = 0; i < prm.npre; ++i)
                        lvl->relax.apply(lvl->A, rhs, x, lvl->t, prm.relax);

                    lvl->t = ::viennacl::linalg::prod(lvl->A, x);
                    lvl->t = rhs - lvl->t;
                    nxt->f = ::viennacl::linalg::prod(lvl->R, lvl->t);
                    nxt->u.clear();

                    if (nxt->gmres)
                        kcycle(pnxt, end, prm, nxt->f, nxt->u);
                    else
                        cycle(pnxt, end, prm, nxt->f, nxt->u);

                    lvl->t = ::viennacl::linalg::prod(lvl->P, nxt->u);
                    x += lvl->t;

                    for(unsigned i = 0; i < prm.npost; ++i)
                        lvl->relax.apply(lvl->A, rhs, x, lvl->t, prm.relax);
                }
            } else {
                x = ::viennacl::linalg::prod(lvl->Ainv, rhs);
            }
        }

        template <class Iterator>
        static void kcycle(Iterator plvl, Iterator end, const params &prm,
                const vector &rhs, vector &x)
        {
            Iterator pnxt = plvl; ++pnxt;

            instance *lvl = plvl->get();

            if (pnxt != end) {
                cycle_precond<Iterator> p(plvl, end, prm);

                lvl->gmres->restart(lvl->A, rhs, p, x);

                for(int i = 0; i < lvl->gmres->M; ++i)
                    lvl->gmres->iteration(lvl->A, p, i);

                lvl->gmres->update(x, lvl->gmres->M - 1);
            } else {
                x = ::viennacl::linalg::prod(lvl->Ainv, rhs);
            }
        }

        index_t size() const {
            return A.size1();
        }

        index_t nonzeros() const {
            return nnz;
        }
    private:
        matrix A;
        matrix P;
        matrix R;
        matrix Ainv;

        mutable vector u;
        mutable vector f;
        mutable vector t;

        typename viennacl_relax_scheme<Relaxation>::type::template instance<value_t, index_t> relax;

        mutable boost::scoped_ptr< gmres_data<vector> > gmres;

        index_t nnz;

        template <class Iterator>
        struct cycle_precond {
            cycle_precond(Iterator lvl, Iterator end, const params &prm)
                : lvl(lvl), end(end), prm(prm) {}

            void apply(const vector &r, vector &x) const {
                cycle(lvl, end, prm, r, x);
            }

            Iterator lvl, end;
            const params &prm;
        };
};

};

} // namespace level

} // namespace amgcl

#endif


 

以上在struct Viennacl { //////////自己修改。2013/6/2-V,把viennacl改了就可以了,它与viennacl的命名冲突。当然在utext里也要做一下小的调整。

#include <iostream>
#include <cstdlib>

#include <boost/program_options.hpp>

#include <vexcl/vexcl.hpp>

#include <Eigen/Dense>
#include <Eigen/SparseCore>

#define VIENNACL_WITH_OPENCL

#include <amgcl/amgcl.hpp>
#include <amgcl/aggr_plain.hpp>
#include <amgcl/interp_aggr.hpp>
#include <amgcl/interp_smoothed_aggr.hpp>
#include <amgcl/interp_sa_emin.hpp>
#include <amgcl/interp_classic.hpp>
#include <amgcl/operations_vexcl.hpp>
#include <amgcl/operations_eigen.hpp>
#include <amgcl/operations_viennacl.hpp>
#include <amgcl/level_cpu.hpp>
#include <amgcl/level_vexcl.hpp>
#include <amgcl/level_viennacl.hpp>
#include <amgcl/cg.hpp>
#include <amgcl/bicgstab.hpp>
#include <amgcl/gmres.hpp>
#include <amgcl/profiler.hpp>

#include "read.hpp"

typedef double real;
typedef Eigen::Matrix<real, Eigen::Dynamic, 1> EigenVector;

namespace po = boost::program_options;

namespace amgcl {
    profiler<> prof("utest");
}
using amgcl::prof;

enum interp_t {
    classic              = 1,
    aggregation          = 2,
    smoothed_aggregation = 3,
    sa_emin              = 4
};

enum level_t {
    cpu_lvl      = 1,
    vexcl_lvl    = 2,
    viennacl_lvl = 3
};

enum solver_t {
    cg         = 1,
    bicg       = 2,
    gmres      = 3,
    standalone = 4
};

#define GMRES_M 30

enum relax_t {
    damped_jacobi = 1,
    spai0         = 2,
    gauss_seidel  = 3,
    ilu0          = 4
};

struct options {
    int         solver;
    std::string pfile;

    unsigned coarse_enough;
    amgcl::level::params lp;
};

//---------------------------------------------------------------------------
template <class AMG, class spmat, class vector>
void solve(
        const AMG          &amg,
        const spmat        &A,
        const vector       &rhs,
        vector             &x,
        const options      &op
        )
{
    std::pair<int,real> cnv;
    prof.tic("solve");
    switch (static_cast<solver_t>(op.solver)) {
        case cg:
            cnv = amgcl::solve(A, rhs, amg, x, amgcl::cg_tag(op.lp.maxiter, op.lp.tol));
            break;
        case bicg:
            cnv = amgcl::solve(A, rhs, amg, x, amgcl::bicg_tag(op.lp.maxiter, op.lp.tol));
            break;
        case gmres:
            cnv = amgcl::solve(A, rhs, amg, x, amgcl::gmres_tag(GMRES_M, op.lp.maxiter, op.lp.tol));
            break;
        case standalone:
            cnv = amg.solve(rhs, x);
            break;
        default:
            throw std::invalid_argument("Unsupported iterative solver");
    }
    prof.toc("solve");

    std::cout << "Iterations: " << std::get<0>(cnv) << std::endl
              << "Error:      " << std::get<1>(cnv) << std::endl
              << std::endl;
}

//---------------------------------------------------------------------------
template <class Interp, amgcl::relax::scheme Relax, class spmat, class vector>
void run_cpu_test(const spmat &A, const vector &rhs, const options &op) {
    typedef amgcl::solver<real, int, Interp, amgcl::level::cpu<Relax> > AMG;

    typename AMG::params prm;

    prm.coarse_enough = op.coarse_enough;

    prm.level.npre   = op.lp.npre;
    prm.level.npost  = op.lp.npost;
    prm.level.ncycle = op.lp.ncycle;
    prm.level.kcycle = op.lp.kcycle;
    prm.level.tol    = op.lp.tol;
    prm.level.maxiter= op.lp.maxiter;

    EigenVector x = EigenVector::Zero(rhs.size());

    prof.tic("setup");
    AMG amg(A, prm);
    prof.toc("setup");

    std::cout << amg << std::endl;

    Eigen::MappedSparseMatrix<real, Eigen::RowMajor, int> Amap(
            amgcl::sparse::matrix_rows(A),
            amgcl::sparse::matrix_cols(A),
            amgcl::sparse::matrix_nonzeros(A),
            const_cast<int* >(amgcl::sparse::matrix_outer_index(A)),
            const_cast<int* >(amgcl::sparse::matrix_inner_index(A)),
            const_cast<real*>(amgcl::sparse::matrix_values(A))
            );

    solve(amg, Amap, rhs, x, op);
}

//---------------------------------------------------------------------------
template <class Interp, amgcl::relax::scheme Relax, class spmat, class vector>
void run_vexcl_test(const spmat &A, const vector &rhs, const options &op) {
    typedef amgcl::solver<real, int, Interp, amgcl::level::vexcl<Relax> > AMG;

    prof.tic("OpenCL initialization");
    vex::Context ctx( vex::Filter::Env && vex::Filter::DoublePrecision );
    prof.toc("OpenCL initialization");

    if (!ctx.size()) throw std::runtime_error("No available compute devices");
    std::cout << ctx << std::endl;

    typename AMG::params prm;

    prm.coarse_enough = op.coarse_enough;

    prm.level.ctx     = &ctx;
    prm.level.npre    = op.lp.npre;
    prm.level.npost   = op.lp.npost;
    prm.level.ncycle  = op.lp.ncycle;
    prm.level.kcycle  = op.lp.kcycle;
    prm.level.tol     = op.lp.tol;
    prm.level.maxiter = op.lp.maxiter;


    prof.tic("setup");
    AMG amg(A, prm);
    prof.toc("setup");

    std::cout << amg << std::endl;

    vex::vector<real> f(ctx.queue(), rhs.size(), rhs.data());
    vex::vector<real> x(ctx.queue(), rhs.size());
    x = 0;

    solve(amg, amg.top_matrix(), f, x, op);
}

//---------------------------------------------------------------------------
template <class Interp, amgcl::relax::scheme Relax, class spmat, class vector>
void run_viennacl_test(const spmat &A, const vector &rhs, const options &op) {
    typedef amgcl::solver<real, int, Interp, amgcl::level::Viennacl<amgcl::GPU_MATRIX_HYB, Relax> > AMG;

    // Use vexcl for simple OpenCL context setup.
    prof.tic("OpenCL initialization");
    vex::Context ctx( vex::Filter::Env && vex::Filter::DoublePrecision && vex::Filter::Count(1));
    prof.toc("OpenCL initialization");

    if (!ctx.size()) throw std::runtime_error("No available compute devices");
    std::cout << ctx << std::endl;
    std::vector<cl_device_id> dev_id(1, ctx.queue(0).getInfo<CL_QUEUE_DEVICE>()());
    std::vector<cl_command_queue> queue_id(1, ctx.queue(0)());
    viennacl::ocl::setup_context(0, ctx.context(0)(), dev_id, queue_id);

    // Prevent double free from VienaCL
    cl::detail::ReferenceHandler<cl_command_queue>::retain(ctx.queue(0)());

    typename AMG::params prm;

    prm.coarse_enough = op.coarse_enough;

    prm.level.npre    = op.lp.npre;
    prm.level.npost   = op.lp.npost;
    prm.level.ncycle  = op.lp.ncycle;
    prm.level.kcycle  = op.lp.kcycle;
    prm.level.tol     = op.lp.tol;
    prm.level.maxiter = op.lp.maxiter;


    prof.tic("setup");
    AMG amg(A, prm);
    prof.toc("setup");

    std::cout << amg << std::endl;

    viennacl::vector<real> f(rhs.size());
    viennacl::vector<real> x(rhs.size());

    viennacl::fast_copy(&rhs[0], &rhs[0] + rhs.size(), f.begin());
    viennacl::traits::clear(x);

    solve(amg, amg.top_matrix(), f, x, op);
}

//---------------------------------------------------------------------------
template <class interp, class spmat, class vector>
void run_vexcl_test(int relax, const spmat &A, const vector &rhs, const options &op) {
    switch (static_cast<relax_t>(relax)) {
        case damped_jacobi:
            run_vexcl_test<interp, amgcl::relax::damped_jacobi>(A, rhs, op);
            break;
        case spai0:
            run_vexcl_test<interp, amgcl::relax::spai0>(A, rhs, op);
            break;
        default:
            throw std::invalid_argument("Unsupported relaxation scheme for vexcl level");
    }
}

//---------------------------------------------------------------------------
template <class interp, class spmat, class vector>
void run_viennacl_test(int relax, const spmat &A, const vector &rhs, const options &op) {
    switch (static_cast<relax_t>(relax)) {
        case damped_jacobi:
            run_viennacl_test<interp, amgcl::relax::damped_jacobi>(A, rhs, op);
            break;
        case spai0:
            run_viennacl_test<interp, amgcl::relax::spai0>(A, rhs, op);
            break;
        default:
            throw std::invalid_argument("Unsupported relaxation scheme for viennacl level");
    }
}

//---------------------------------------------------------------------------
template <class interp, class spmat, class vector>
void run_cpu_test(int relax, const spmat &A, const vector &rhs, const options &op) {
    switch (static_cast<relax_t>(relax)) {
        case damped_jacobi:
            run_cpu_test<interp, amgcl::relax::damped_jacobi>(A, rhs, op);
            break;
        case spai0:
            run_cpu_test<interp, amgcl::relax::spai0>(A, rhs, op);
            break;
        case gauss_seidel:
            run_cpu_test<interp, amgcl::relax::gauss_seidel>(A, rhs, op);
            break;
        case ilu0:
            run_cpu_test<interp, amgcl::relax::ilu0>(A, rhs, op);
            break;
        default:
            throw std::invalid_argument("Unsupported relaxation scheme");
    }
}

//---------------------------------------------------------------------------
template <class interp, class spmat, class vector>
void run_test(int level, int relax, const spmat &A, const vector &rhs, const options &op) {
    switch(static_cast<level_t>(level)) {
        case cpu_lvl:
            run_cpu_test<interp>(relax, A, rhs, op);
            break;
        case vexcl_lvl:
            run_vexcl_test<interp>(relax, A, rhs, op);
            break;
        case viennacl_lvl:
            run_viennacl_test<interp>(relax, A, rhs, op);
            break;
        default:
            throw std::invalid_argument("Unsupported backend");
    }
}

//---------------------------------------------------------------------------
template <class spmat, class vector>
void run_test(int interp, int level, int relax, const spmat &A, const vector &rhs, const options &op) {
    switch(static_cast<interp_t>(interp)) {
        case classic:
            run_test< amgcl::interp::classic >(level, relax, A, rhs, op);
            break;
        case aggregation:
            run_test< amgcl::interp::aggregation< amgcl::aggr::plain > >(level, relax, A, rhs, op);
            break;
        case smoothed_aggregation:
            run_test< amgcl::interp::smoothed_aggregation< amgcl::aggr::plain > >(level, relax, A, rhs, op);
            break;
        case sa_emin:
            run_test< amgcl::interp::sa_emin< amgcl::aggr::plain > >(level, relax, A, rhs, op);
            break;
        default:
            throw std::invalid_argument("Unsupported interpolation scheme");
    }
}

//---------------------------------------------------------------------------
int main(int argc, char *argv[]) {
    int interp;
    int level;
    int relax;

    options op;

    po::options_description desc("Possible options");

    desc.add_options()
        ("help", "Show help")
        ("interp", po::value<int>(&interp)->default_value(smoothed_aggregation),
            "Interpolation: classic(1), aggregation(2), "
            "smoothed_aggregation (3), smoothed aggregation with energy minimization (4)"
            )
        ("level", po::value<int>(&level)->default_value(vexcl_lvl),
            "Backend: cpu(1), vexcl(2), viennacl(3)"
            )
        ("solver", po::value<int>(&op.solver)->default_value(cg),
            "Iterative solver: cg(1), bicgstab(2), gmres(3), standalone(4)")
        ("relax", po::value<int>(&relax)->default_value(spai0),
            "Iterative solver: damped jacobi(1), spai0(2), gauss-seidel (3), ilu0(4)")

        ("problem",
            po::value<std::string>(&op.pfile)->default_value("problem.dat"),
            "Problem file"
            )

        ("coarse_enough", po::value<unsigned>(&op.coarse_enough)->default_value(300))

        ("npre",   po::value<unsigned>(&op.lp.npre  )->default_value(op.lp.npre))
        ("npost",  po::value<unsigned>(&op.lp.npost )->default_value(op.lp.npost))
        ("ncycle", po::value<unsigned>(&op.lp.ncycle)->default_value(op.lp.ncycle))
        ("kcycle", po::value<unsigned>(&op.lp.kcycle)->default_value(op.lp.kcycle))
        ("tol",    po::value<double  >(&op.lp.tol   )->default_value(op.lp.tol))
        ("maxiter",po::value<unsigned>(&op.lp.maxiter)->default_value(op.lp.maxiter))
        ;

    po::positional_options_description pdesc;
    pdesc.add("problem", -1);


    po::variables_map vm;
    po::store(
            po::command_line_parser(argc, argv).
                options(desc).
                positional(pdesc).
                run(),
            vm);
    po::notify(vm);

    if (vm.count("help")) {
        std::cout << desc << std::endl;
        return 0;
    }

    prof.tic("Read problem");
    std::vector<int>  row;
    std::vector<int>  col;
    std::vector<real> val;
    EigenVector       rhs;
//	op.pfile = "E:\\D\\项目\\GPU\\数据\\problem.dat";
    int n = read_problem(op.pfile, row, col, val, rhs);
    prof.toc("Read problem");

    auto A = amgcl::sparse::map(n, n, row.data(), col.data(), val.data());

    run_test(interp, level, relax, A, rhs, op);

    std::cout << prof << std::endl;
}



    typedef amgcl::solver<real, int, Interp, amgcl::level::Viennacl<amgcl::GPU_MATRIX_HYB, Relax> > AMG;,这里做一下修改就行。当然这是输入命令的,做一个.bat

utest.exe --level 1

具体对应的命令可以看程序。到此应该就可恶意运行了。

其它的没有什么了,当然对viennacl的测试程序如上面utest的一样。

 

你可能感兴趣的:(AMG工作阶段性总结1)