#ifndef VIENNACL_LINALG_AMG_HPP_ #define VIENNACL_LINALG_AMG_HPP_ /* ========================================================================= Copyright (c) 2010-2011, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/linalg/amg.hpp @brief Main include file for algebraic multigrid (AMG) preconditioners. Experimental in 1.2.x. Implementation contributed by Markus Wagner */ #include #include #include #include #include #include #include #include #include #include "viennacl/forwards.h" #include "viennacl/tools/tools.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/detail/amg/amg_base.hpp" #include "viennacl/linalg/detail/amg/amg_coarse.hpp" #include "viennacl/linalg/detail/amg/amg_interpol.hpp" #include #ifdef _OPENMP #include #endif #include "viennacl/linalg/detail/amg/amg_debug.hpp" #define VIENNACL_AMG_COARSE_LIMIT 50 #define VIENNACL_AMG_MAX_LEVELS 100 namespace viennacl { namespace linalg { typedef detail::amg::amg_tag amg_tag; /** @brief Setup AMG preconditioner * * @param A Operator matrices on all levels * @param P Prolongation/Interpolation operators on all levels * @param Pointvector Vector of points on all levels * @param tag AMG preconditioner tag */ template void amg_setup(InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag) { typedef typename InternalType1::value_type SparseMatrixType; typedef typename InternalType2::value_type PointVectorType; typedef typename SparseMatrixType::value_type ScalarType; typedef typename SparseMatrixType::iterator1 InternalRowIterator; typedef typename SparseMatrixType::iterator2 InternalColIterator; unsigned int i, iterations, c_points, f_points; detail::amg::amg_slicing Slicing; // Set number of iterations. If automatic coarse grid construction is chosen (0), then set a maximum size and stop during the process. iterations = tag.get_coarselevels(); if (iterations == 0) iterations = VIENNACL_AMG_MAX_LEVELS; // For parallel coarsenings build data structures (number of threads set automatically). if (tag.get_coarse() == VIENNACL_AMG_COARSE_RS0 || tag.get_coarse() == VIENNACL_AMG_COARSE_RS3) Slicing.init(iterations); for (i=0; i void amg_init(MatrixType const & mat, InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag) { typedef typename MatrixType::value_type ScalarType; typedef typename InternalType1::value_type SparseMatrixType; if (tag.get_coarselevels() > 0) { A.resize(tag.get_coarselevels()+1); P.resize(tag.get_coarselevels()); Pointvector.resize(tag.get_coarselevels()); } else { A.resize(VIENNACL_AMG_MAX_LEVELS+1); P.resize(VIENNACL_AMG_MAX_LEVELS); Pointvector.resize(VIENNACL_AMG_MAX_LEVELS); } // Insert operator matrix as operator for finest level. SparseMatrixType A0 (mat); A.insert_element (0, A0); } /** @brief Save operators after setup phase for CPU computation. * * @param A Operator matrices on all levels on the CPU * @param P Prolongation/Interpolation operators on all levels on the CPU * @param R Restriction operators on all levels on the CPU * @param A_setup Operators matrices on all levels from setup phase * @param P_setup Prolongation/Interpolation operators on all levels from setup phase * @param tag AMG preconditioner tag */ template void amg_transform_cpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag) { typedef typename InternalType1::value_type MatrixType; // Resize internal data structures to actual size. A.resize(tag.get_coarselevels()+1); P.resize(tag.get_coarselevels()); R.resize(tag.get_coarselevels()); // Transform into matrix type. for (unsigned int i=0; i void amg_transform_gpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag) { typedef typename InternalType1::value_type MatrixType; typedef typename InternalType2::value_type::value_type ScalarType; // Resize internal data structures to actual size. A.resize(tag.get_coarselevels()+1); P.resize(tag.get_coarselevels()); R.resize(tag.get_coarselevels()); // Copy to GPU using the internal sparse matrix structure: std::vector. for (unsigned int i=0; i)P_setup[i],P[i]); } for (unsigned int i=0; i void amg_setup_apply (InternalVectorType & result, InternalVectorType & rhs, InternalVectorType & residual, SparseMatrixType const & A, amg_tag const & tag) { typedef typename InternalVectorType::value_type VectorType; result.resize(tag.get_coarselevels()+1); rhs.resize(tag.get_coarselevels()+1); residual.resize(tag.get_coarselevels()); for (unsigned int level=0; level < tag.get_coarselevels()+1; ++level) { result[level] = VectorType(A[level].size1()); result[level].clear(); rhs[level] = VectorType(A[level].size1()); rhs[level].clear(); } for (unsigned int level=0; level < tag.get_coarselevels(); ++level) { residual[level] = VectorType(A[level].size1()); residual[level].clear(); } } /** @brief Pre-compute LU factorization for direct solve (ublas library). * @brief Speeds up precondition phase as this is computed only once overall instead of once per iteration. * * @param op Operator matrix for direct solve * @param Permutation Permutation matrix which saves the factorization result * @param A Operator matrix on coarsest level */ template void amg_lu(boost::numeric::ublas::compressed_matrix & op, boost::numeric::ublas::permutation_matrix & Permutation, SparseMatrixType const & A) { typedef typename SparseMatrixType::const_iterator1 ConstRowIterator; typedef typename SparseMatrixType::const_iterator2 ConstColIterator; // Copy to operator matrix. Needed op.resize(A.size1(),A.size2(),false); for (ConstRowIterator row_iter = A.begin1(); row_iter != A.end1(); ++row_iter) for (ConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter) op (col_iter.index1(), col_iter.index2()) = *col_iter; // Permutation matrix has to be reinitialized with actual size. Do not clear() or resize()! Permutation = boost::numeric::ublas::permutation_matrix (op.size1()); boost::numeric::ublas::lu_factorize(op,Permutation); } /** @brief AMG preconditioner class, can be supplied to solve()-routines */ template class amg_precond { typedef typename MatrixType::value_type ScalarType; typedef boost::numeric::ublas::vector VectorType; typedef detail::amg::amg_sparsematrix SparseMatrixType; typedef detail::amg::amg_pointvector PointVectorType; typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator; typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator; typedef typename SparseMatrixType::iterator1 InternalRowIterator; typedef typename SparseMatrixType::iterator2 InternalColIterator; boost::numeric::ublas::vector A_setup; boost::numeric::ublas::vector P_setup; boost::numeric::ublas::vector A; boost::numeric::ublas::vector P; boost::numeric::ublas::vector R; boost::numeric::ublas::vector Pointvector; mutable boost::numeric::ublas::compressed_matrix op; mutable boost::numeric::ublas::permutation_matrix Permutation; mutable boost::numeric::ublas::vector result; mutable boost::numeric::ublas::vector rhs; mutable boost::numeric::ublas::vector residual; mutable bool done_init_apply; amg_tag _tag; public: amg_precond(): Permutation(0) {} /** @brief The constructor. Saves system matrix, tag and builds data structures for setup. * * @param mat System matrix * @param tag The AMG tag */ amg_precond(MatrixType const & mat, amg_tag const & tag): Permutation(0) { _tag = tag; // Initialize data structures. amg_init (mat,A_setup,P_setup,Pointvector,_tag); done_init_apply = false; } /** @brief Start setup phase for this class and copy data structures. */ void setup() { // Start setup phase. amg_setup(A_setup,P_setup,Pointvector,_tag); // Transform to CPU-Matrixtype for precondition phase. amg_transform_cpu(A,P,R,A_setup,P_setup,_tag); done_init_apply = false; } /** @brief Prepare data structures for preconditioning: * Build data structures for precondition phase. * Do LU factorization on coarsest level. */ void init_apply() const { // Setup precondition phase (Data structures). amg_setup_apply(result,rhs,residual,A_setup,_tag); // Do LU factorization for direct solve. amg_lu(op,Permutation,A_setup[_tag.get_coarselevels()]); done_init_apply = true; } /** @brief Returns complexity measures. * * @param avgstencil Average stencil sizes on all levels * @return Operator complexity of AMG method */ template ScalarType calc_complexity(VectorType & avgstencil) { avgstencil = VectorType (_tag.get_coarselevels()+1); unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0; for (unsigned int level=0; level < _tag.get_coarselevels()+1; ++level) { level_coefficients = 0; for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter) { for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter) { if (level == 0) systemmat_nonzero++; nonzero++; level_coefficients++; } } avgstencil[level] = level_coefficients/(double)A_setup[level].size1(); } return nonzero/static_cast(systemmat_nonzero); } /** @brief Precondition Operation * * @param vec The vector to which preconditioning is applied to (ublas version) */ template void apply(VectorType & vec) const { // Build data structures and do lu factorization before first iteration step. if (!done_init_apply) init_apply(); int level; // Precondition operation (Yang, p.3) rhs[0] = vec; for (level=0; level <(signed)_tag.get_coarselevels(); level++) { result[level].clear(); // Apply Smoother _presmooth times. smooth_jacobi (level, _tag.get_presmooth(), result[level], rhs[level]); #ifdef DEBUG std::cout << "After presmooth:" << std::endl; printvector(result[level]); #endif // Compute residual. residual[level] = rhs[level] - boost::numeric::ublas::prod (A[level],result[level]); #ifdef DEBUG std::cout << "Residual:" << std::endl; printvector(residual[level]); #endif // Restrict to coarse level. Restricted residual is RHS of coarse level. rhs[level+1] = boost::numeric::ublas::prod (R[level],residual[level]); #ifdef DEBUG std::cout << "Restricted Residual: " << std::endl; printvector(rhs[level+1]); #endif } // On highest level use direct solve to solve equation. result[level] = rhs[level]; boost::numeric::ublas::lu_substitute(op,Permutation,result[level]); #ifdef DEBUG std::cout << "After direct solve: " << std::endl; printvector (result[level]); #endif for (level=_tag.get_coarselevels()-1; level >= 0; level--) { #ifdef DEBUG std::cout << "Coarse Error: " << std::endl; printvector(result[level+1]); #endif // Interpolate error to fine level. Correct solution by adding error. result[level] += boost::numeric::ublas::prod (P[level], result[level+1]); #ifdef DEBUG std::cout << "Corrected Result: " << std::endl; printvector (result[level]); #endif // Apply Smoother _postsmooth times. smooth_jacobi (level, _tag.get_postsmooth(), result[level], rhs[level]); #ifdef DEBUG std::cout << "After postsmooth: " << std::endl; printvector (result[level]); #endif } vec = result[0]; } /** @brief (Weighted) Jacobi Smoother (CPU version) * @param level Coarse level to which smoother is applied to * @param iterations Number of smoother iterations * @param x The vector smoothing is applied to * @param rhs The right hand side of the equation for the smoother */ template void smooth_jacobi(int level, int const iterations, VectorType & x, VectorType const & rhs) const { VectorType old_result (x.size()); unsigned int index; ScalarType sum = 0, diag = 1; for (int i=0; i class amg_precond< compressed_matrix > { typedef viennacl::compressed_matrix MatrixType; typedef viennacl::vector VectorType; typedef detail::amg::amg_sparsematrix SparseMatrixType; typedef detail::amg::amg_pointvector PointVectorType; typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator; typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator; typedef typename SparseMatrixType::iterator1 InternalRowIterator; typedef typename SparseMatrixType::iterator2 InternalColIterator; boost::numeric::ublas::vector A_setup; boost::numeric::ublas::vector P_setup; boost::numeric::ublas::vector A; boost::numeric::ublas::vector P; boost::numeric::ublas::vector R; boost::numeric::ublas::vector Pointvector; mutable boost::numeric::ublas::compressed_matrix op; mutable boost::numeric::ublas::permutation_matrix Permutation; mutable boost::numeric::ublas::vector result; mutable boost::numeric::ublas::vector rhs; mutable boost::numeric::ublas::vector residual; mutable bool done_init_apply; amg_tag _tag; public: amg_precond(): Permutation(0) {} /** @brief The constructor. Builds data structures. * * @param mat System matrix * @param tag The AMG tag */ amg_precond(compressed_matrix const & mat, amg_tag const & tag): Permutation(0) { _tag = tag; // Copy to CPU. Internal structure of sparse matrix is used for copy operation. std::vector > mat2 = std::vector >(mat.size1()); viennacl::copy(mat, mat2); // Initialize data structures. amg_init (mat2,A_setup,P_setup,Pointvector,_tag); done_init_apply = false; } /** @brief Start setup phase for this class and copy data structures. */ void setup() { // Start setup phase. amg_setup(A_setup,P_setup,Pointvector,_tag); // Transform to GPU-Matrixtype for precondition phase. amg_transform_gpu(A,P,R,A_setup,P_setup,_tag); done_init_apply = false; } /** @brief Prepare data structures for preconditioning: * Build data structures for precondition phase. * Do LU factorization on coarsest level. */ void init_apply() const { // Setup precondition phase (Data structures). amg_setup_apply(result,rhs,residual,A_setup,_tag); // Do LU factorization for direct solve. amg_lu(op,Permutation,A_setup[_tag.get_coarselevels()]); done_init_apply = true; } /** @brief Returns complexity measures * * @param avgstencil Average stencil sizes on all levels * @return Operator complexity of AMG method */ template ScalarType calc_complexity(VectorType & avgstencil) { avgstencil = VectorType (_tag.get_coarselevels()+1); unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0; for (unsigned int level=0; level < _tag.get_coarselevels()+1; ++level) { level_coefficients = 0; for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter) { for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter) { if (level == 0) systemmat_nonzero++; nonzero++; level_coefficients++; } } avgstencil[level] = level_coefficients/(double)A[level].size1(); } return nonzero/static_cast(systemmat_nonzero); } /** @brief Precondition Operation * * @param vec The vector to which preconditioning is applied to */ template void apply(VectorType & vec) const { if (!done_init_apply) init_apply(); int level; // Precondition operation (Yang, p.3). rhs[0] = vec; for (level=0; level <(signed)_tag.get_coarselevels(); level++) { result[level].clear(); // Apply Smoother _presmooth times. smooth_jacobi (level, _tag.get_presmooth(), result[level], rhs[level]); #ifdef DEBUG std::cout << "After presmooth: " << std::endl; printvector(result[level]); #endif // Compute residual. residual[level] = rhs[level] - viennacl::linalg::prod (A[level],result[level]); #ifdef DEBUG std::cout << "Residual: " << std::endl; printvector(residual[level]); #endif // Restrict to coarse level. Result is RHS of coarse level equation. //residual_coarse[level] = viennacl::linalg::prod(R[level],residual[level]); rhs[level+1] = viennacl::linalg::prod(R[level],residual[level]); #ifdef DEBUG std::cout << "Restricted Residual: " << std::endl; printvector(rhs[level+1]); #endif } // On highest level use direct solve to solve equation (on the CPU) //TODO: Use GPU direct solve! result[level] = rhs[level]; boost::numeric::ublas::vector result_cpu (result[level].size()); copy (result[level],result_cpu); boost::numeric::ublas::lu_substitute(op,Permutation,result_cpu); copy (result_cpu, result[level]); #ifdef DEBUG std::cout << "After direct solve: " << std::endl; printvector (result[level]); #endif for (level=_tag.get_coarselevels()-1; level >= 0; level--) { #ifdef DEBUG std::cout << "Coarse Error: " << std::endl; printvector(result[level+1]); #endif // Interpolate error to fine level and correct solution. result[level] += viennacl::linalg::prod(P[level],result[level+1]); #ifdef DEBUG std::cout << "Corrected Result: " << std::endl; printvector (result[level]); #endif // Apply Smoother _postsmooth times. smooth_jacobi (level, _tag.get_postsmooth(), result[level], rhs[level]); #ifdef DEBUG std::cout << "After postsmooth: " << std::endl; printvector (result[level]); #endif } vec = result[0]; } /** @brief Jacobi Smoother (GPU version) * @param level Coarse level to which smoother is applied to * @param iterations Number of smoother iterations * @param x The vector smoothing is applied to * @param rhs The right hand side of the equation for the smoother */ template void smooth_jacobi(int level, unsigned int iterations, VectorType & x, VectorType const & rhs) const { VectorType old_result (x.size()); //viennacl::ocl::program & p = viennacl::ocl::current_context().add_program // (viennacl::tools::make_double_kernel(jacobi_kernel,viennacl::ocl::current_device().info()), "jacobi_kernel"); //viennacl::ocl::kernel & k = p.add_kernel("jacobi"); viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::compressed_matrix::program_name(), "jacobi"); for (unsigned int i=0; i(_tag.get_jacobiweight()), old_result, x, rhs, static_cast(rhs.size()))); } } amg_tag & tag() { return _tag; } }; } } #endif