ViennaCL - The Vienna Computing Library  1.5.1
spai.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_SPAI_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_SPAI_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <utility>
26 #include <iostream>
27 #include <fstream>
28 #include <string>
29 #include <algorithm>
30 #include <vector>
31 #include <math.h>
32 #include <map>
33 
34 //local includes
36 #include "viennacl/linalg/qr.hpp"
42 
43 //boost includes
44 #include "boost/numeric/ublas/vector.hpp"
45 #include "boost/numeric/ublas/matrix.hpp"
46 #include "boost/numeric/ublas/matrix_proxy.hpp"
47 #include "boost/numeric/ublas/vector_proxy.hpp"
48 #include "boost/numeric/ublas/storage.hpp"
49 #include "boost/numeric/ublas/io.hpp"
50 #include "boost/numeric/ublas/lu.hpp"
51 #include "boost/numeric/ublas/triangular.hpp"
52 #include "boost/numeric/ublas/matrix_expression.hpp"
53 
54 // ViennaCL includes
55 #include "viennacl/linalg/prod.hpp"
56 #include "viennacl/matrix.hpp"
60 #include "viennacl/scalar.hpp"
62 #include "viennacl/linalg/ilu.hpp"
63 #include "viennacl/ocl/backend.hpp"
65 
66 
67 
68 #define VIENNACL_SPAI_K_b 20
69 
70 namespace viennacl
71 {
72  namespace linalg
73  {
74  namespace detail
75  {
76  namespace spai
77  {
78 
79  //debug function for print
80  template<typename SparseVectorType>
81  void print_sparse_vector(const SparseVectorType& v){
82  for(typename SparseVectorType::const_iterator vec_it = v.begin(); vec_it!= v.end(); ++vec_it){
83  std::cout<<"[ "<<vec_it->first<<" ]:"<<vec_it->second<<std::endl;
84  }
85  }
86  template<typename DenseMatrixType>
87  void print_matrix( DenseMatrixType & m){
88  for(int i = 0; i < m.size2(); ++i){
89  for(int j = 0; j < m.size1(); ++j){
90  std::cout<<m(j, i)<<" ";
91  }
92  std::cout<<std::endl;
93  }
94  }
95 
101  template<typename SparseVectorType, typename ScalarType>
102  void add_sparse_vectors(const SparseVectorType& v, const ScalarType b, SparseVectorType& res_v){
103  for(typename SparseVectorType::const_iterator v_it = v.begin(); v_it != v.end(); ++v_it){
104  res_v[v_it->first] += b*v_it->second;
105  }
106  }
107  //sparse-matrix - vector product
114  template<typename SparseVectorType, typename ScalarType>
115  void compute_spai_residual(const std::vector<SparseVectorType>& A_v_c, const SparseVectorType& v,
116  const unsigned int ind, SparseVectorType& res){
117  for(typename SparseVectorType::const_iterator v_it = v.begin(); v_it != v.end(); ++v_it){
118  add_sparse_vectors(A_v_c[v_it->first], v_it->second, res);
119  }
120  res[ind] -= static_cast<ScalarType>(1);
121  }
122 
129  template<typename SparseVectorType>
130  void build_index_set(const std::vector<SparseVectorType>& A_v_c, const SparseVectorType& v, std::vector<unsigned int>& J,
131  std::vector<unsigned int>& I){
132  buildColumnIndexSet(v, J);
133  projectRows(A_v_c, J, I);
134  }
135 
142  template<typename SparseMatrixType, typename DenseMatrixType>
143  void initProjectSubMatrix(const SparseMatrixType& A_in, const std::vector<unsigned int>& J, std::vector<unsigned int>& I,
144  DenseMatrixType& A_out)
145  {
146  A_out.resize(I.size(), J.size(), false);
147  for(vcl_size_t j = 0; j < J.size(); ++j)
148  {
149  for(vcl_size_t i = 0; i < I.size(); ++i)
150  A_out(i,j) = A_in(I[i],J[j]);
151  }
152  }
153 
154 
155  /************************************************** CPU BLOCK SET UP ***************************************/
165  template<typename SparseMatrixType, typename DenseMatrixType, typename SparseVectorType, typename VectorType>
166  void block_set_up(const SparseMatrixType& A,
167  const std::vector<SparseVectorType>& A_v_c,
168  const std::vector<SparseVectorType>& M_v,
169  std::vector<std::vector<unsigned int> >& g_I,
170  std::vector<std::vector<unsigned int> >& g_J,
171  std::vector<DenseMatrixType>& g_A_I_J,
172  std::vector<VectorType>& g_b_v){
173 #ifdef VIENNACL_WITH_OPENMP
174  #pragma omp parallel for
175 #endif
176  for (long i = 0; i < static_cast<long>(M_v.size()); ++i){
177  build_index_set(A_v_c, M_v[i], g_J[i], g_I[i]);
178  initProjectSubMatrix(A, g_J[i], g_I[i], g_A_I_J[i]);
179  //print_matrix(g_A_I_J[i]);
180  single_qr(g_A_I_J[i], g_b_v[i]);
181  //print_matrix(g_A_I_J[i]);
182  }
183  }
184 
191  template<typename SparseVectorType>
192  void index_set_up(const std::vector<SparseVectorType>& A_v_c,
193  const std::vector<SparseVectorType>& M_v,
194  std::vector<std::vector<unsigned int> >& g_J,
195  std::vector<std::vector<unsigned int> >& g_I)
196  {
197 #ifdef VIENNACL_WITH_OPENMP
198  #pragma omp parallel for
199 #endif
200  for (long i = 0; i < static_cast<long>(M_v.size()); ++i){
201  build_index_set(A_v_c, M_v[i], g_J[i], g_I[i]);
202  }
203  }
204 
205  /************************************************** GPU BLOCK SET UP ***************************************/
216  template<typename ScalarType, unsigned int MAT_ALIGNMENT, typename SparseVectorType>
218  const std::vector<SparseVectorType>& A_v_c,
219  const std::vector<SparseVectorType>& M_v,
220  std::vector<cl_uint> g_is_update,
221  std::vector<std::vector<unsigned int> >& g_I,
222  std::vector<std::vector<unsigned int> >& g_J,
223  block_matrix & g_A_I_J,
224  block_vector & g_bv)
225  {
227  bool is_empty_block;
228  //build index set
229  index_set_up(A_v_c, M_v, g_J, g_I);
230  block_assembly(A, g_J, g_I, g_A_I_J, g_is_update, is_empty_block);
231  block_qr<ScalarType>(g_I, g_J, g_A_I_J, g_bv, g_is_update, ctx);
232 
233  }
234 
235 
236  /***************************************************************************************************/
237  /******************************** SOLVING LS PROBLEMS ON GPU ***************************************/
238  /***************************************************************************************************/
245  template<typename ScalarType, typename SparseVectorType>
246  void custom_fan_out(const std::vector<ScalarType> & m_in,
247  unsigned int start_m_ind,
248  const std::vector<unsigned int> & J,
249  SparseVectorType & m)
250  {
251  unsigned int cnt = 0;
252  for (vcl_size_t i = 0; i < J.size(); ++i) {
253  m[J[i]] = m_in[start_m_ind + cnt++];
254  }
255  }
256 
257 
258 
259  //GPU based least square problem
272  template<typename SparseVectorType, typename ScalarType>
273  void least_square_solve(std::vector<SparseVectorType> & A_v_c,
274  std::vector<SparseVectorType> & M_v,
275  std::vector<std::vector<unsigned int> >& g_I,
276  std::vector<std::vector<unsigned int> > & g_J,
277  block_matrix & g_A_I_J_vcl,
278  block_vector & g_bv_vcl,
279  std::vector<SparseVectorType> & g_res,
280  std::vector<cl_uint> & g_is_update,
281  const spai_tag & tag,
282  viennacl::context ctx)
283  {
284  viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context &>(ctx.opencl_context());
285  unsigned int y_sz, m_sz;
286  std::vector<cl_uint> y_inds(M_v.size() + 1, static_cast<cl_uint>(0));
287  std::vector<cl_uint> m_inds(M_v.size() + 1, static_cast<cl_uint>(0));
288  get_size(g_I, y_sz);
289  init_start_inds(g_I, y_inds);
290  init_start_inds(g_J, m_inds);
291  //create y_v
292  std::vector<ScalarType> y_v(y_sz, static_cast<ScalarType>(0));
293  for(vcl_size_t i = 0; i < M_v.size(); ++i)
294  {
295  for(vcl_size_t j = 0; j < g_I[i].size(); ++j)
296  {
297  if(g_I[i][j] == i)
298  y_v[y_inds[i] + j] = static_cast<ScalarType>(1.0);
299  }
300  }
301  //compute m_v
302  get_size(g_J, m_sz);
303  std::vector<ScalarType> m_v(m_sz, static_cast<cl_uint>(0));
304 
305  block_vector y_v_vcl;
306  block_vector m_v_vcl;
307  //prepearing memory for least square problem on GPU
308  y_v_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
309  static_cast<unsigned int>(sizeof(ScalarType)*y_v.size()),
310  &(y_v[0]));
311  m_v_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
312  static_cast<unsigned int>(sizeof(ScalarType)*m_v.size()),
313  &(m_v[0]));
314  y_v_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
315  static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
316  &(y_inds[0]));
317  viennacl::ocl::handle<cl_mem> g_is_update_vcl = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
318  static_cast<unsigned int>(sizeof(cl_uint)*(g_is_update.size())),
319  &(g_is_update[0]));
322  ls_kernel.local_work_size(0, 1);
323  ls_kernel.global_work_size(0, 256);
324  viennacl::ocl::enqueue(ls_kernel(g_A_I_J_vcl.handle(), g_A_I_J_vcl.handle2(), g_bv_vcl.handle(), g_bv_vcl.handle1(), m_v_vcl.handle(),
325  y_v_vcl.handle(), y_v_vcl.handle1(),
326  g_A_I_J_vcl.handle1(), g_is_update_vcl,
327  //viennacl::ocl::local_mem(static_cast<unsigned int>(sizeof(ScalarType)*(local_r_n*local_c_n))),
328  static_cast<unsigned int>(M_v.size())));
329  //copy vector m_v back from GPU to CPU
330  cl_int vcl_err = clEnqueueReadBuffer(opencl_ctx.get_queue().handle().get(),
331  m_v_vcl.handle().get(), CL_TRUE, 0,
332  sizeof(ScalarType)*(m_v.size()),
333  &(m_v[0]), 0, NULL, NULL);
334  VIENNACL_ERR_CHECK(vcl_err);
335  //fan out vector in parallel
336  //#pragma omp parallel for
337  for(long i = 0; i < static_cast<long>(M_v.size()); ++i)
338  {
339  if(g_is_update[i])
340  {
341  //faned out onto sparse vector
342  custom_fan_out(m_v, m_inds[i], g_J[i], M_v[i]);
343  g_res[i].clear();
344  compute_spai_residual<SparseVectorType, ScalarType>(A_v_c, M_v[i], static_cast<unsigned int>(i), g_res[i]);
345  ScalarType res_norm = 0;
346  //compute norm of res - just to make sure that this implementatino works correct
347  sparse_norm_2(g_res[i], res_norm);
348  //std::cout<<"Residual norm of column #: "<<i<<std::endl;
349  //std::cout<<res_norm<<std::endl;
350  //std::cout<<"************************"<<std::endl;
351  g_is_update[i] = (res_norm > tag.getResidualNormThreshold())&& (!tag.getIsStatic())?(1):(0);
352  }
353  }
354  }
355 
356  //CPU based least square problems
368  template<typename SparseVectorType, typename DenseMatrixType, typename VectorType>
369  void least_square_solve(const std::vector<SparseVectorType>& A_v_c,
370  std::vector<DenseMatrixType>& g_R,
371  std::vector<VectorType>& g_b_v,
372  std::vector<std::vector<unsigned int> >& g_I,
373  std::vector<std::vector<unsigned int> >& g_J,
374  std::vector<SparseVectorType>& g_res,
375  std::vector<bool>& g_is_update,
376  std::vector<SparseVectorType>& M_v,
377  const spai_tag& tag){
378  typedef typename DenseMatrixType::value_type ScalarType;
379  //VectorType m_new, y;
380 #ifdef VIENNACL_WITH_OPENMP
381  #pragma omp parallel for
382 #endif
383  for (long i = 0; i < static_cast<long>(M_v.size()); ++i){
384  if(g_is_update[i]){
385  VectorType y = boost::numeric::ublas::zero_vector<ScalarType>(g_I[i].size());
386  //std::cout<<y<<std::endl;
387  projectI<VectorType, ScalarType>(g_I[i], y, static_cast<unsigned int>(tag.getBegInd() + i));
388  apply_q_trans_vec(g_R[i], g_b_v[i], y);
389  VectorType m_new = boost::numeric::ublas::zero_vector<ScalarType>(g_R[i].size2());
390  backwardSolve(g_R[i], y, m_new);
391  fanOutVector(m_new, g_J[i], M_v[i]);
392  g_res[i].clear();
393  compute_spai_residual<SparseVectorType, ScalarType>(A_v_c, M_v[i], static_cast<unsigned int>(tag.getBegInd() + i), g_res[i]);
394  ScalarType res_norm = 0;
395  sparse_norm_2(g_res[i], res_norm);
396 // std::cout<<"Residual norm of column #: "<<i<<std::endl;
397 // std::cout<<res_norm<<std::endl;
398 // std::cout<<"************************"<<std::endl;
399  g_is_update[i] = (res_norm > tag.getResidualNormThreshold())&& (!tag.getIsStatic());
400  }
401  }
402  }
403 
404  //************************************ UPDATE CHECK ***************************************************//
405  template<typename VectorType>
406  bool is_all_update(VectorType& parallel_is_update){
407 
408  for(unsigned int i = 0; i < parallel_is_update.size(); ++i){
409  if(parallel_is_update[i])
410  return true;
411  }
412  return false;
413  }
414 
415  //********************************** MATRIX VECTORIZATION ***********************************************//
416  //Matrix vectorization, column based approach
421  template<typename SparseMatrixType, typename SparseVectorType>
422  void vectorize_column_matrix(const SparseMatrixType& M_in, std::vector<SparseVectorType>& M_v){
423  for(typename SparseMatrixType::const_iterator1 row_it = M_in.begin1(); row_it!= M_in.end1(); ++row_it){
424  //
425  for(typename SparseMatrixType::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it){
426  M_v[static_cast<unsigned int>(col_it.index2())][static_cast<unsigned int>(col_it.index1())] = *col_it;
427  }
428  //std::cout<<std::endl;
429  }
430  }
431 
432  //Matrix vectorization row based approach
433  template<typename SparseMatrixType, typename SparseVectorType>
434  void vectorize_row_matrix(const SparseMatrixType& M_in, std::vector<SparseVectorType>& M_v){
435  for(typename SparseMatrixType::const_iterator1 row_it = M_in.begin1(); row_it!= M_in.end1(); ++row_it){
436  for(typename SparseMatrixType::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it){
437  M_v[static_cast<unsigned int>(col_it.index1())][static_cast<unsigned int>(col_it.index2())] = *col_it;
438  }
439  }
440  }
441 
442  //************************************* BLOCK ASSEMBLY CODE *********************************************//
443 
444 
445  template <typename SizeType>
446  void write_set_to_array(const std::vector<std::vector<SizeType> >& ind_set, std::vector<cl_uint>& a){
447  vcl_size_t cnt = 0;
448  //unsigned int tmp;
449  for(vcl_size_t i = 0; i < ind_set.size(); ++i){
450  for(vcl_size_t j = 0; j < ind_set[i].size(); ++j){
451  a[cnt++] = static_cast<cl_uint>(ind_set[i][j]);
452  }
453  }
454  }
455 
456 
457 
458  //assembling blocks on GPU
467  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
468  void block_assembly(const viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT>& A, const std::vector<std::vector<unsigned int> >& g_J,
469  const std::vector<std::vector<unsigned int> >& g_I,
470  block_matrix& g_A_I_J_vcl,
471  std::vector<cl_uint>& g_is_update,
472  bool& is_empty_block)
473  {
474  //computing start indices for index sets and start indices for block matrices
475  unsigned int sz_I, sz_J, sz_blocks;
476  std::vector<cl_uint> matrix_dims(g_I.size()*2, static_cast<cl_uint>(0));
477  std::vector<cl_uint> i_ind(g_I.size() + 1, static_cast<cl_uint>(0));
478  std::vector<cl_uint> j_ind(g_I.size() + 1, static_cast<cl_uint>(0));
479  std::vector<cl_uint> blocks_ind(g_I.size() + 1, static_cast<cl_uint>(0));
480  //
481  init_start_inds(g_J, j_ind);
482  init_start_inds(g_I, i_ind);
483  //
484  get_size(g_J, sz_J);
485  get_size(g_I, sz_I);
486  std::vector<cl_uint> I_set(sz_I, static_cast<cl_uint>(0));
487  //
488  std::vector<cl_uint> J_set(sz_J, static_cast<cl_uint>(0));
489  // computing size for blocks
490  // writing set to arrays
491  write_set_to_array(g_I, I_set);
492  write_set_to_array(g_J, J_set);
493  // if block for assembly does exist
494  if (I_set.size() > 0 && J_set.size() > 0)
495  {
497  viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context &>(ctx.opencl_context());
498  compute_blocks_size(g_I, g_J, sz_blocks, blocks_ind, matrix_dims);
499  std::vector<ScalarType> con_A_I_J(sz_blocks, static_cast<ScalarType>(0));
500 
501  block_vector set_I_vcl, set_J_vcl;
502  //init memory on GPU
503  //contigious g_A_I_J
504  g_A_I_J_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
505  static_cast<unsigned int>(sizeof(ScalarType)*(sz_blocks)),
506  &(con_A_I_J[0]));
507  g_A_I_J_vcl.handle().context(opencl_ctx);
508 
509  //matrix_dimensions
510  g_A_I_J_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
511  static_cast<unsigned int>(sizeof(cl_uint)*2*static_cast<cl_uint>(g_I.size())),
512  &(matrix_dims[0]));
513  g_A_I_J_vcl.handle1().context(opencl_ctx);
514 
515  //start_block inds
516  g_A_I_J_vcl.handle2() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
517  static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
518  &(blocks_ind[0]));
519  g_A_I_J_vcl.handle2().context(opencl_ctx);
520 
521  //set_I
522  set_I_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
523  static_cast<unsigned int>(sizeof(cl_uint)*sz_I),
524  &(I_set[0]));
525  set_I_vcl.handle().context(opencl_ctx);
526 
527  //set_J
528  set_J_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
529  static_cast<unsigned int>(sizeof(cl_uint)*sz_J),
530  &(J_set[0]));
531  set_J_vcl.handle().context(opencl_ctx);
532 
533  //i_ind
534  set_I_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
535  static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
536  &(i_ind[0]));
537  set_I_vcl.handle().context(opencl_ctx);
538 
539  //j_ind
540  set_J_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
541  static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
542  &(j_ind[0]));
543  set_J_vcl.handle().context(opencl_ctx);
544 
545  viennacl::ocl::handle<cl_mem> g_is_update_vcl = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
546  static_cast<unsigned int>(sizeof(cl_uint)*g_is_update.size()),
547  &(g_is_update[0]));
548 
551  assembly_kernel.local_work_size(0, 1);
552  assembly_kernel.global_work_size(0, 256);
553  viennacl::ocl::enqueue(assembly_kernel(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(),
554  set_I_vcl.handle(), set_J_vcl.handle(), set_I_vcl.handle1(),
555  set_J_vcl.handle1(),
556  g_A_I_J_vcl.handle2(), g_A_I_J_vcl.handle1(), g_A_I_J_vcl.handle(),
557  g_is_update_vcl,
558  static_cast<unsigned int>(g_I.size())));
559  is_empty_block = false;
560  }
561  else
562  is_empty_block = true;
563  }
564 
565  /************************************************************************************************************************/
566 
572  template<typename SparseMatrixType, typename SparseVectorType>
573  void insert_sparse_columns(const std::vector<SparseVectorType>& M_v,
574  SparseMatrixType& M,
575  bool is_right){
576  if (is_right)
577  {
578  for(unsigned int i = 0; i < M_v.size(); ++i){
579  for(typename SparseVectorType::const_iterator vec_it = M_v[i].begin(); vec_it!=M_v[i].end(); ++vec_it){
580  M(vec_it->first, i) = vec_it->second;
581  }
582  }
583  }
584  else //transposed fill of M
585  {
586  for(unsigned int i = 0; i < M_v.size(); ++i){
587  for(typename SparseVectorType::const_iterator vec_it = M_v[i].begin(); vec_it!=M_v[i].end(); ++vec_it){
588  M(i, vec_it->first) = vec_it->second;
589  }
590  }
591  }
592  }
593 
598  template<typename MatrixType>
599  void sparse_transpose(const MatrixType& A_in, MatrixType& A){
600  typedef typename MatrixType::value_type ScalarType;
601  std::vector<std::map<vcl_size_t, ScalarType> > temp_A(A_in.size2());
602  A.resize(A_in.size2(), A_in.size1(), false);
603 
604  for (typename MatrixType::const_iterator1 row_it = A_in.begin1();
605  row_it != A_in.end1();
606  ++row_it)
607  {
608  for (typename MatrixType::const_iterator2 col_it = row_it.begin();
609  col_it != row_it.end();
610  ++col_it)
611  {
612  temp_A[col_it.index2()][col_it.index1()] = *col_it;
613  }
614  }
615 
616  for (vcl_size_t i=0; i<temp_A.size(); ++i)
617  {
618  for (typename std::map<vcl_size_t, ScalarType>::const_iterator it = temp_A[i].begin();
619  it != temp_A[i].end();
620  ++it)
621  A(i, it->first) = it->second;
622  }
623  }
624 
625 
626 
627 
628 // template<typename SparseVectorType>
629 // void custom_copy(std::vector<SparseVectorType> & M_v, std::vector<SparseVectorType> & l_M_v, const unsigned int beg_ind){
630 // for(int i = 0; i < l_M_v.size(); ++i){
631 // l_M_v[i] = M_v[i + beg_ind];
632 // }
633 // }
634 
635  //CPU version
641  template <typename MatrixType>
642  void computeSPAI(const MatrixType & A, MatrixType & M, spai_tag & tag){
643  typedef typename MatrixType::value_type ScalarType;
644  typedef typename boost::numeric::ublas::vector<ScalarType> VectorType;
645  typedef typename viennacl::linalg::detail::spai::sparse_vector<ScalarType> SparseVectorType;
646  typedef typename boost::numeric::ublas::matrix<ScalarType> DenseMatrixType;
647  //sparse matrix transpose...
648  unsigned int cur_iter = 0;
650  bool go_on = true;
651  std::vector<SparseVectorType> A_v_c(M.size2());
652  std::vector<SparseVectorType> M_v(M.size2());
653  vectorize_column_matrix(A, A_v_c);
654  vectorize_column_matrix(M, M_v);
655 
656 
657  while(go_on){
658  go_on = (tag.getEndInd() < static_cast<long>(M.size2()));
659  cur_iter = 0;
660  unsigned int l_sz = tag.getEndInd() - tag.getBegInd();
661  //std::vector<bool> g_is_update(M.size2(), true);
662  std::vector<bool> g_is_update(l_sz, true);
663  //init is update
664  //init_parallel_is_update(g_is_update);
665  //std::vector<SparseVectorType> A_v_c(K);
666  //std::vector<SparseVectorType> M_v(K);
667  //vectorization of marices
668  //print_matrix(M_v);
669  std::vector<SparseVectorType> l_M_v(l_sz);
670  //custom_copy(M_v, l_M_v, beg_ind);
671  std::copy(M_v.begin() + tag.getBegInd(), M_v.begin() + tag.getEndInd(), l_M_v.begin());
672  //print_matrix(l_M_v);
673  //std::vector<SparseVectorType> l_A_v_c(K);
674  //custom_copy(A_v_c, l_A_v_c, beg_ind);
675  //std::copy(A_v_c.begin() + beg_ind, A_v_c.begin() + end_ind, l_A_v_c.begin());
676  //print_matrix(l_A_v_c);
677  //vectorize_row_matrix(A, A_v_r);
678  //working blocks
679  //std::vector<DenseMatrixType> g_A_I_J(M.size2())
680  std::vector<DenseMatrixType> g_A_I_J(l_sz);
681  //std::vector<VectorType> g_b_v(M.size2());
682  std::vector<VectorType> g_b_v(l_sz);
683  //std::vector<SparseVectorType> g_res(M.size2());
684  std::vector<SparseVectorType> g_res(l_sz);
685  //std::vector<std::vector<unsigned int> > g_I(M.size2());
686  std::vector<std::vector<unsigned int> > g_I(l_sz);
687  //std::vector<std::vector<unsigned int> > g_J(M.size2());
688  std::vector<std::vector<unsigned int> > g_J(l_sz);
689  while((cur_iter < tag.getIterationLimit())&&is_all_update(g_is_update)){
690  // SET UP THE BLOCKS..
691  // PHASE ONE
692  if(cur_iter == 0) block_set_up(A, A_v_c, l_M_v, g_I, g_J, g_A_I_J, g_b_v);
693  else block_update(A, A_v_c, g_res, g_is_update, g_I, g_J, g_b_v, g_A_I_J, tag);
694 
695  //PHASE TWO, LEAST SQUARE SOLUTION
696  least_square_solve(A_v_c, g_A_I_J, g_b_v, g_I, g_J, g_res, g_is_update, l_M_v, tag);
697 
698  if(tag.getIsStatic()) break;
699  cur_iter++;
700 
701 
702  }
703  std::copy(l_M_v.begin(), l_M_v.end(), M_v.begin() + tag.getBegInd());
704  tag.setBegInd(tag.getEndInd());//beg_ind = end_ind;
705  tag.setEndInd(std::min(static_cast<long>(tag.getBegInd() + VIENNACL_SPAI_K_b), static_cast<long>(M.size2())));
706  //std::copy(l_M_v.begin(), l_M_v.end(), M_v.begin() + tag.getBegInd());
707 
708  }
709  M.resize(M.size1(), M.size2(), false);
710  insert_sparse_columns(M_v, M, tag.getIsRight());
711  }
712 
713 
714  //GPU - based version
722  template <typename ScalarType, unsigned int MAT_ALIGNMENT>
724  const boost::numeric::ublas::compressed_matrix<ScalarType>& cpu_A,
725  boost::numeric::ublas::compressed_matrix<ScalarType>& cpu_M, //output
727  const spai_tag& tag){
728  typedef typename viennacl::linalg::detail::spai::sparse_vector<ScalarType> SparseVectorType;
729  //typedef typename viennacl::compressed_matrix<ScalarType> GPUSparseMatrixType;
730  //sparse matrix transpose...
731  unsigned int cur_iter = 0;
732  std::vector<cl_uint> g_is_update(cpu_M.size2(), static_cast<cl_uint>(1));
733  //init is update
734  //init_parallel_is_update(g_is_update);
735  std::vector<SparseVectorType> A_v_c(cpu_M.size2());
736  std::vector<SparseVectorType> M_v(cpu_M.size2());
737  vectorize_column_matrix(cpu_A, A_v_c);
738  vectorize_column_matrix(cpu_M, M_v);
739  std::vector<SparseVectorType> g_res(cpu_M.size2());
740  std::vector<std::vector<unsigned int> > g_I(cpu_M.size2());
741  std::vector<std::vector<unsigned int> > g_J(cpu_M.size2());
742 
743  //OpenCL variables
744  block_matrix g_A_I_J_vcl;
745  block_vector g_bv_vcl;
746  while((cur_iter < tag.getIterationLimit())&&is_all_update(g_is_update)){
747  // SET UP THE BLOCKS..
748  // PHASE ONE..
749  //timer.start();
750  //index set up on CPU
751  if(cur_iter == 0)
752  block_set_up(A, A_v_c, M_v, g_is_update, g_I, g_J, g_A_I_J_vcl, g_bv_vcl);
753  else
754  block_update(A, A_v_c, g_is_update, g_res, g_J, g_I, g_A_I_J_vcl, g_bv_vcl, tag);
755  //std::cout<<"Phase 2 timing: "<<timer.get()<<std::endl;
756  //PERFORM LEAST SQUARE problems solution
757  //PHASE TWO
758  //timer.start();
759  least_square_solve<SparseVectorType, ScalarType>(A_v_c, M_v, g_I, g_J, g_A_I_J_vcl, g_bv_vcl, g_res, g_is_update, tag, viennacl::traits::context(A));
760  //std::cout<<"Phase 3 timing: "<<timer.get()<<std::endl;
761  if(tag.getIsStatic()) break;
762  cur_iter++;
763  }
764  cpu_M.resize(cpu_M.size1(), cpu_M.size2(), false);
765  insert_sparse_columns(M_v, cpu_M, tag.getIsRight());
766  //copy back to GPU
767  M.resize(static_cast<unsigned int>(cpu_M.size1()), static_cast<unsigned int>(cpu_M.size2()));
768  viennacl::copy(cpu_M, M);
769  }
770 
771  }
772  }
773  }
774 }
775 #endif
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
Main kernel class for generating OpenCL kernels for the sparse approximate inverse preconditioners...
Definition: spai.hpp:570
viennacl::ocl::handle< cl_mem > & handle1()
Return handle to start indices.
Definition: block_vector.hpp:60
std::size_t vcl_size_t
Definition: forwards.h:58
void least_square_solve(std::vector< SparseVectorType > &A_v_c, std::vector< SparseVectorType > &M_v, std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, block_matrix &g_A_I_J_vcl, block_vector &g_bv_vcl, std::vector< SparseVectorType > &g_res, std::vector< cl_uint > &g_is_update, const spai_tag &tag, viennacl::context ctx)
Solution of Least square problem on GPU.
Definition: spai.hpp:273
Represents a contigious matrices on GPU.
Definition: block_matrix.hpp:49
bool is_all_update(VectorType &parallel_is_update)
Definition: spai.hpp:406
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix.
Definition: compressed_matrix.hpp:618
void sparse_norm_2(const SparseVectorType &v, ScalarType &norm)
Computation of Euclidean norm for sparse vector.
Definition: spai-dynamic.hpp:130
void index_set_up(const std::vector< SparseVectorType > &A_v_c, const std::vector< SparseVectorType > &M_v, std::vector< std::vector< unsigned int > > &g_J, std::vector< std::vector< unsigned int > > &g_I)
Setting up index set of columns and rows for all columns.
Definition: spai.hpp:192
Implementations of dense matrix related operations including matrix-vector products.
viennacl::ocl::handle< cl_mem > & handle()
Returns a handle to the elements.
Definition: block_matrix.hpp:56
bool getIsStatic() const
Definition: spai_tag.hpp:94
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Implementation of the dense matrix class.
void vectorize_column_matrix(const SparseMatrixType &M_in, std::vector< SparseVectorType > &M_v)
Solution of Least square problem on CPU.
Definition: spai.hpp:422
void backwardSolve(const MatrixType &R, const VectorType &y, VectorType &x)
Solution of linear:R*x=y system by backward substitution.
Definition: spai-static.hpp:98
void block_set_up(const SparseMatrixType &A, const std::vector< SparseVectorType > &A_v_c, const std::vector< SparseVectorType > &M_v, std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, std::vector< DenseMatrixType > &g_A_I_J, std::vector< VectorType > &g_b_v)
Setting up blocks and QR factorizing them on CPU.
Definition: spai.hpp:166
OpenCL kernel file for sparse approximate inverse operations.
void block_update(const SparseMatrixType &A, const std::vector< SparseVectorType > &A_v_c, std::vector< SparseVectorType > &g_res, std::vector< bool > &g_is_update, std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, std::vector< VectorType > &g_b_v, std::vector< DenseMatrixType > &g_A_I_J, spai_tag const &tag)
CPU-based dynamic update for SPAI preconditioner.
Definition: spai-dynamic.hpp:258
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
viennacl::ocl::handle< cl_mem > & handle2()
Returns a handle to the start indices of matrix.
Definition: block_matrix.hpp:64
void projectRows(const std::vector< SparseVectorType > &A_v_c, const std::vector< unsigned int > &J, std::vector< unsigned int > &I)
Row projection for matrix A(:,J) -> A(I,J), building index set of non-zero rows.
Definition: spai-static.hpp:169
viennacl::ocl::handle< cl_mem > & handle()
Return handle to the elements.
Definition: block_vector.hpp:56
void compute_spai_residual(const std::vector< SparseVectorType > &A_v_c, const SparseVectorType &v, const unsigned int ind, SparseVectorType &res)
Computation of residual res = A*v - e.
Definition: spai.hpp:115
void insert_sparse_columns(const std::vector< SparseVectorType > &M_v, SparseMatrixType &M, bool is_right)
Insertion of vectorized matrix column into original sparse matrix.
Definition: spai.hpp:573
long getEndInd() const
Definition: spai_tag.hpp:103
viennacl::ocl::handle< cl_command_queue > const & handle() const
Definition: command_queue.hpp:81
A tag for SPAI Contains values for the algorithm. Must be passed to spai_precond constructor.
Definition: spai_tag.hpp:63
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Definition: compressed_matrix.hpp:701
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
Implementation of a helper sparse vector class for SPAI. Experimental.
void computeSPAI(const MatrixType &A, MatrixType &M, spai_tag &tag)
Construction of SPAI preconditioner on CPU.
Definition: spai.hpp:642
void print_matrix(DenseMatrixType &m)
Definition: spai.hpp:87
void setBegInd(long beg_ind)
Definition: spai_tag.hpp:130
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
#define VIENNACL_ERR_CHECK(err)
Definition: error.hpp:655
Implementation of a bunch of (small) matrices on GPU. Experimental.
long getBegInd() const
Definition: spai_tag.hpp:100
viennacl::ocl::command_queue & get_queue()
Definition: context.hpp:249
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const
Creates a memory buffer within the context.
Definition: context.hpp:199
void initProjectSubMatrix(const SparseMatrixType &A_in, const std::vector< unsigned int > &J, std::vector< unsigned int > &I, DenseMatrixType &A_out)
Initializes a dense matrix from a sparse one.
Definition: spai.hpp:143
const OCL_TYPE & get() const
Definition: handle.hpp:189
void write_set_to_array(const std::vector< std::vector< SizeType > > &ind_set, std::vector< cl_uint > &a)
Definition: spai.hpp:446
Implementations of incomplete factorization preconditioners. Convenience header file.
Implementation of a static SPAI. Experimental.
Implementation of the compressed_matrix class.
Implementation of a bunch of vectors on GPU. Experimental.
Implementations of operations using sparse matrices.
void block_assembly(const viennacl::compressed_matrix< ScalarType, MAT_ALIGNMENT > &A, const std::vector< std::vector< unsigned int > > &g_J, const std::vector< std::vector< unsigned int > > &g_I, block_matrix &g_A_I_J_vcl, std::vector< cl_uint > &g_is_update, bool &is_empty_block)
Assembly of blocks on GPU by a gived set of row indices: g_I and column indices: g_J.
Definition: spai.hpp:468
void print_sparse_vector(const SparseVectorType &v)
Definition: spai.hpp:81
void copy(std::vector< SCALARTYPE > &cpu_vec, circulant_matrix< SCALARTYPE, ALIGNMENT > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150
void get_size(const std::vector< std::vector< SizeType > > &inds, SizeType &size)
Computes size of particular container of index set.
Definition: qr.hpp:129
void setEndInd(long end_ind)
Definition: spai_tag.hpp:132
Represents sparse vector based on std::map<unsigned int, ScalarType>
Definition: sparse_vector.hpp:52
Provides a QR factorization using a block-based approach.
Implementation of the spai tag holding SPAI configuration parameters. Experimental.
void init_start_inds(const std::vector< std::vector< SizeType > > &inds, std::vector< cl_uint > &start_inds)
Initializes start indices of particular index set.
Definition: qr.hpp:141
void apply_q_trans_vec(const MatrixType &R, const VectorType &b_v, VectorType &y)
Recovery Q from matrix R and vector of betas b_v.
Definition: qr.hpp:330
void fanOutVector(const VectorType &m_in, const std::vector< unsigned int > &J, SparseVectorType &m)
Projects solution of LS problem onto original column m.
Definition: spai-static.hpp:86
#define VIENNACL_SPAI_K_b
Definition: spai.hpp:68
void compute_blocks_size(const std::vector< std::vector< unsigned int > > &g_I, const std::vector< std::vector< unsigned int > > &g_J, unsigned int &sz, std::vector< cl_uint > &blocks_ind, std::vector< cl_uint > &matrix_dims)
**************************************** BLOCK FUNCTIONS ************************************// ...
Definition: qr.hpp:112
Implementations of the OpenCL backend, where all contexts are stored in.
bool getIsRight() const
Definition: spai_tag.hpp:97
void build_index_set(const std::vector< SparseVectorType > &A_v_c, const SparseVectorType &v, std::vector< unsigned int > &J, std::vector< unsigned int > &I)
Setting up index set of columns and rows for certain column.
Definition: spai.hpp:130
void custom_fan_out(const std::vector< ScalarType > &m_in, unsigned int start_m_ind, const std::vector< unsigned int > &J, SparseVectorType &m)
Elicitation of sparse vector m for particular column from m_in - contigious vector for all columns...
Definition: spai.hpp:246
unsigned int getIterationLimit() const
Definition: spai_tag.hpp:91
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
void buildColumnIndexSet(const SparseVectorType &v, std::vector< unsigned int > &J)
Builds index set of projected columns for current column of preconditioner.
Definition: spai-static.hpp:132
Represents a contigious vector on GPU.
Definition: block_vector.hpp:49
void vectorize_row_matrix(const SparseMatrixType &M_in, std::vector< SparseVectorType > &M_v)
Definition: spai.hpp:434
void single_qr(MatrixType &R, VectorType &b_v)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224.
Definition: qr.hpp:257
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
double getResidualNormThreshold() const
Definition: spai_tag.hpp:85
static void init(viennacl::ocl::context &ctx)
Definition: spai.hpp:577
viennacl::ocl::context const & context() const
Definition: handle.hpp:191
viennacl::ocl::handle< cl_mem > & handle1()
Returns a handle to the matrix dimensions.
Definition: block_matrix.hpp:60
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
void sparse_transpose(const MatrixType &A_in, MatrixType &A)
Transposition of sparse matrix.
Definition: spai.hpp:599
Implementation of the ViennaCL scalar class.
Implementation of a dynamic SPAI. Provides the routines for automatic pattern updates Experimental...
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
Definition: compressed_matrix.hpp:703
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Definition: compressed_matrix.hpp:699
void add_sparse_vectors(const SparseVectorType &v, const ScalarType b, SparseVectorType &res_v)
Add two sparse vectors res_v = b*v.
Definition: spai.hpp:102