ViennaCL - The Vienna Computing Library  1.5.1
spai-dynamic.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
27 #include <utility>
28 #include <iostream>
29 #include <fstream>
30 #include <string>
31 #include <algorithm>
32 #include <vector>
33 #include <math.h>
34 #include <map>
35 //#include "block_matrix.hpp"
36 //#include "block_vector.hpp"
37 //#include "benchmark-utils.hpp"
38 #include "boost/numeric/ublas/vector.hpp"
39 #include "boost/numeric/ublas/matrix.hpp"
40 #include "boost/numeric/ublas/matrix_proxy.hpp"
41 #include "boost/numeric/ublas/vector_proxy.hpp"
42 #include "boost/numeric/ublas/storage.hpp"
43 #include "boost/numeric/ublas/io.hpp"
44 #include "boost/numeric/ublas/lu.hpp"
45 #include "boost/numeric/ublas/triangular.hpp"
46 #include "boost/numeric/ublas/matrix_expression.hpp"
47 // ViennaCL includes
48 #include "viennacl/linalg/prod.hpp"
49 #include "viennacl/matrix.hpp"
53 #include "viennacl/scalar.hpp"
54 #include "viennacl/linalg/cg.hpp"
56 #include "viennacl/linalg/ilu.hpp"
57 #include "viennacl/ocl/backend.hpp"
58 
66 
67 namespace viennacl
68 {
69  namespace linalg
70  {
71  namespace detail
72  {
73  namespace spai
74  {
75 
78  {
79  template <typename T1, typename T2>
80  bool operator()(std::pair<T1, T2> const & left, std::pair<T1, T2> const & right)
81  {
82  return static_cast<double>(left.second) > static_cast<double>(right.second);
83  }
84  };
85 
86 
92  template<typename MatrixType>
93  void composeNewR(const MatrixType& A, const MatrixType& R_n, MatrixType& R)
94  {
95  typedef typename MatrixType::value_type ScalarType;
96  vcl_size_t row_n = R_n.size1() - (A.size1() - R.size2());
97  MatrixType C = boost::numeric::ublas::zero_matrix<ScalarType>(R.size1() + row_n, R.size2() + A.size2());
98  //write original R to new Composite R
100  //write upper part of Q'*A_I_\hatJ, all columns and number of rows that equals to R.size2()
102  R.size2() + A.size2())) +=
104  //adding decomposed(QR) block to Composite R
105  if(R_n.size1() > 0 && R_n.size2() > 0)
106  boost::numeric::ublas::project(C, boost::numeric::ublas::range(R.size2(), R.size1() + row_n),
107  boost::numeric::ublas::range(R.size2(), R.size2() + A.size2())) += R_n;
108  R = C;
109  }
110 
115  template<typename VectorType>
116  void composeNewVector(const VectorType& v_n, VectorType& v)
117  {
118  typedef typename VectorType::value_type ScalarType;
119  VectorType w = boost::numeric::ublas::zero_vector<ScalarType>(v.size() + v_n.size());
121  boost::numeric::ublas::project(w, boost::numeric::ublas::range(v.size(), v.size() + v_n.size())) += v_n;
122  v = w;
123  }
124 
129  template<typename SparseVectorType, typename ScalarType>
130  void sparse_norm_2(const SparseVectorType& v, ScalarType& norm)
131  {
132  for(typename SparseVectorType::const_iterator vec_it = v.begin(); vec_it != v.end(); ++vec_it)
133  norm += (vec_it->second)*(vec_it->second);
134 
135  norm = std::sqrt(norm);
136  }
137 
143  template<typename SparseVectorType, typename ScalarType>
144  void sparse_inner_prod(const SparseVectorType& v1, const SparseVectorType& v2, ScalarType& res_v)
145  {
146  typename SparseVectorType::const_iterator v_it1 = v1.begin();
147  typename SparseVectorType::const_iterator v_it2 = v2.begin();
148  while((v_it1 != v1.end())&&(v_it2 != v2.end()))
149  {
150  if(v_it1->first == v_it2->first)
151  {
152  res_v += (v_it1->second)*(v_it2->second);
153  ++v_it1;
154  ++v_it2;
155  }
156  else if(v_it1->first < v_it2->first)
157  ++v_it1;
158  else
159  ++v_it2;
160  }
161  }
162 
170  template <typename SparseVectorType, typename ScalarType>
171  bool buildAugmentedIndexSet(const std::vector<SparseVectorType>& A_v_c,
172  const SparseVectorType& res,
173  std::vector<unsigned int>& J,
174  std::vector<unsigned int>& J_u,
175  const spai_tag& tag)
176  {
177  std::vector<std::pair<unsigned int, ScalarType> > p;
178  vcl_size_t cur_size = 0;
179  ScalarType inprod, norm2;
180  //print_sparse_vector(res);
181  for(typename SparseVectorType::const_iterator res_it = res.begin(); res_it != res.end(); ++res_it)
182  {
183  if(!isInIndexSet(J, res_it->first) && (std::fabs(res_it->second) > tag.getResidualThreshold()))
184  {
185  inprod = norm2 = 0;
186  sparse_inner_prod(res, A_v_c[res_it->first], inprod);
187  sparse_norm_2(A_v_c[res_it->first], norm2);
188  p.push_back(std::pair<unsigned int, ScalarType>(res_it->first, (inprod*inprod)/(norm2*norm2)));
189  }
190  }
191 
192  std::sort(p.begin(), p.end(), CompareSecond());
193  while ((cur_size < J.size())&&(p.size() > 0))
194  {
195  J_u.push_back(p[0].first);
196  p.erase(p.begin());
197  cur_size++;
198  }
199  p.clear();
200  return (cur_size > 0);
201  }
202 
209  template<typename SparseVectorType>
210  void buildNewRowSet(const std::vector<SparseVectorType>& A_v_c, const std::vector<unsigned int>& I,
211  const std::vector<unsigned int>& J_n, std::vector<unsigned int>& I_n)
212  {
213  for(vcl_size_t i = 0; i < J_n.size(); ++i)
214  {
215  for(typename SparseVectorType::const_iterator col_it = A_v_c[J_n[i]].begin(); col_it!=A_v_c[J_n[i]].end(); ++col_it)
216  {
217  if(!isInIndexSet(I, col_it->first)&&!isInIndexSet(I_n, col_it->first))
218  I_n.push_back(col_it->first);
219  }
220  }
221  }
222 
228  template<typename MatrixType>
229  void QRBlockComposition(const MatrixType& A_I_J, const MatrixType& A_I_J_u, MatrixType& A_I_u_J_u)
230  {
231  typedef typename MatrixType::value_type ScalarType;
232  vcl_size_t row_n1 = A_I_J_u.size1() - A_I_J.size2();
233  vcl_size_t row_n2 = A_I_u_J_u.size1();
234  vcl_size_t row_n = row_n1 + row_n2;
235  vcl_size_t col_n = A_I_J_u.size2();
236  MatrixType C = boost::numeric::ublas::zero_matrix<ScalarType>(row_n, col_n);
238  boost::numeric::ublas::project(A_I_J_u, boost::numeric::ublas::range(A_I_J.size2(), A_I_J_u.size1()),
239  boost::numeric::ublas::range(0, col_n));
240 
242  boost::numeric::ublas::range(0, col_n)) += A_I_u_J_u;
243  A_I_u_J_u = C;
244  }
245 
257  template<typename SparseMatrixType, typename SparseVectorType, typename DenseMatrixType, typename VectorType>
258  void block_update(const SparseMatrixType& A, const std::vector<SparseVectorType>& A_v_c,
259  std::vector<SparseVectorType>& g_res,
260  std::vector<bool>& g_is_update,
261  std::vector<std::vector<unsigned int> >& g_I,
262  std::vector<std::vector<unsigned int> >& g_J,
263  std::vector<VectorType>& g_b_v,
264  std::vector<DenseMatrixType>& g_A_I_J,
265  spai_tag const & tag)
266  {
267  typedef typename DenseMatrixType::value_type ScalarType;
268  //set of new column indices
269  std::vector<std::vector<unsigned int> > g_J_u(g_J.size());
270  //set of new row indices
271  std::vector<std::vector<unsigned int> > g_I_u(g_J.size());
272  //matrix A(I, \tilde J), cf. Kallischko p.31-32
273  std::vector<DenseMatrixType> g_A_I_J_u(g_J.size());
274  //matrix A(\tilde I, \tilde J), cf. Kallischko
275  std::vector<DenseMatrixType> g_A_I_u_J_u(g_J.size());
276  //new vector of beta coefficients from QR factorization
277  std::vector<VectorType> g_b_v_u(g_J.size());
278 #ifdef VIENNACL_WITH_OPENMP
279  #pragma omp parallel for
280 #endif
281  for(long i = 0; i < static_cast<long>(g_J.size()); ++i)
282  {
283  if(g_is_update[i])
284  {
285  if(buildAugmentedIndexSet<SparseVectorType, ScalarType>(A_v_c, g_res[i], g_J[i], g_J_u[i], tag))
286  {
287  //initialize matrix A_I_\hatJ
288  initProjectSubMatrix(A, g_J_u[i], g_I[i], g_A_I_J_u[i]);
289  //multiplication of Q'*A_I_\hatJ
290  apply_q_trans_mat(g_A_I_J[i], g_b_v[i], g_A_I_J_u[i]);
291  //building new rows index set \hatI
292  buildNewRowSet(A_v_c, g_I[i], g_J_u[i], g_I_u[i]);
293  initProjectSubMatrix(A, g_J_u[i], g_I_u[i], g_A_I_u_J_u[i]);
294  //composition of block for new QR factorization
295  QRBlockComposition(g_A_I_J[i], g_A_I_J_u[i], g_A_I_u_J_u[i]);
296  //QR factorization
297  single_qr(g_A_I_u_J_u[i], g_b_v_u[i]);
298  //composition of new R and new vector b_v
299  composeNewR(g_A_I_J_u[i], g_A_I_u_J_u[i], g_A_I_J[i]);
300  composeNewVector(g_b_v_u[i], g_b_v[i]);
301  //composition of new sets: I and J
302  g_J[i].insert(g_J[i].end(), g_J_u[i].begin(), g_J_u[i].end());
303  g_I[i].insert(g_I[i].end(), g_I_u[i].begin(), g_I_u[i].end());
304  }
305  else
306  {
307  g_is_update[i] = false;
308  }
309  }
310  }
311  }
312  /**************************************************** GPU SPAI Update ****************************************************************/
313 
314 
315  //performs Q'*A(I, \tilde J) on GPU
325  template<typename ScalarType>
326  void block_q_multiplication(const std::vector<std::vector<unsigned int> >& g_J_u,
327  const std::vector<std::vector<unsigned int> >& g_I,
328  block_matrix& g_A_I_J_vcl,
329  block_vector& g_bv_vcl,
330  block_matrix& g_A_I_J_u_vcl,
331  std::vector<cl_uint>& g_is_update,
332  viennacl::context ctx)
333  {
334  viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context &>(ctx.opencl_context());
335  unsigned int local_r_n = 0;
336  unsigned int local_c_n = 0;
337  unsigned int sz_blocks = 0;
338  get_max_block_size(g_I, local_r_n);
339  get_max_block_size(g_J_u, local_c_n);
340  //for debug
341  std::vector<cl_uint> matrix_dims(g_I.size()*2, static_cast<cl_uint>(0));
342  std::vector<cl_uint> blocks_ind(g_I.size() + 1, static_cast<cl_uint>(0));
343  compute_blocks_size(g_I, g_J_u, sz_blocks, blocks_ind, matrix_dims);
344  //std::vector<ScalarType> con_A_I_J(sz_blocks, static_cast<ScalarType>(0));
345 
346  viennacl::ocl::handle<cl_mem> g_is_update_vcl = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
347  static_cast<unsigned int>(sizeof(cl_uint)*(g_is_update.size())),
348  &(g_is_update[0]));
351  block_q_kernel.local_work_size(0, local_c_n);
352  block_q_kernel.global_work_size(0, 128*local_c_n);
353  viennacl::ocl::enqueue(block_q_kernel(g_A_I_J_vcl.handle(), g_A_I_J_vcl.handle2(), g_A_I_J_u_vcl.handle(), g_A_I_J_u_vcl.handle2(),
354  g_bv_vcl.handle(),
355  g_bv_vcl.handle1(), g_A_I_J_vcl.handle1(), g_A_I_J_u_vcl.handle1(), g_is_update_vcl,
356  viennacl::ocl::local_mem(static_cast<unsigned int>(sizeof(ScalarType)*(local_r_n*local_c_n))),
357  static_cast<cl_uint>(g_I.size())));
358  }
359 
366  template <typename SizeType>
367  void assemble_qr_row_inds(const std::vector<std::vector<SizeType> >& g_I, const std::vector<std::vector<SizeType> > g_J,
368  const std::vector<std::vector<SizeType> >& g_I_u,
369  std::vector<std::vector<SizeType> >& g_I_q)
370  {
371 #ifdef VIENNACL_WITH_OPENMP
372  #pragma omp parallel for
373 #endif
374  for(long i = 0; i < static_cast<long>(g_I.size()); ++i)
375  {
376  for(vcl_size_t j = g_J[i].size(); j < g_I[i].size(); ++j)
377  g_I_q[i].push_back(g_I[i][j]);
378 
379  for(vcl_size_t j = 0; j < g_I_u[i].size(); ++j)
380  g_I_q[i].push_back(g_I_u[i][j]);
381  }
382  }
383 
398  template<typename ScalarType>
400  const std::vector<std::vector<unsigned int> >& g_J,
401  const std::vector<std::vector<unsigned int> >& g_I,
402  const std::vector<std::vector<unsigned int> >& g_J_u,
403  const std::vector<std::vector<unsigned int> >& g_I_u,
404  std::vector<std::vector<unsigned int> >& g_I_q,
405  block_matrix& g_A_I_J_u_vcl,
406  viennacl::ocl::handle<cl_mem>& matrix_dimensions,
407  block_matrix& g_A_I_u_J_u_vcl,
408  std::vector<cl_uint>& g_is_update,
409  const bool is_empty_block,
410  viennacl::context ctx)
411  {
412  viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context &>(ctx.opencl_context());
413 
414  //std::vector<std::vector<unsigned int> > g_I_q(g_I.size());
415  assemble_qr_row_inds(g_I, g_J, g_I_u, g_I_q);
416  unsigned int sz_blocks;
417  std::vector<cl_uint> matrix_dims(g_I.size()*2, static_cast<cl_uint>(0));
418  std::vector<cl_uint> blocks_ind(g_I.size() + 1, static_cast<cl_uint>(0));
419  compute_blocks_size(g_I_q, g_J_u, sz_blocks, blocks_ind, matrix_dims);
420  std::vector<ScalarType> con_A_I_J_q(sz_blocks, static_cast<ScalarType>(0));
421 
422  block_matrix g_A_I_J_q_vcl;
423  //need to allocate memory for QR block
424  g_A_I_J_q_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
425  static_cast<unsigned int>(sizeof(ScalarType)*sz_blocks),
426  &(con_A_I_J_q[0]));
427  g_A_I_J_q_vcl.handle().context(opencl_ctx);
428 
429  g_A_I_J_q_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
430  static_cast<unsigned int>(sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
431  &(matrix_dims[0]));
432  g_A_I_J_q_vcl.handle1().context(opencl_ctx);
433 
434  g_A_I_J_q_vcl.handle2() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
435  static_cast<unsigned int>(sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size() + 1)),
436  &(blocks_ind[0]));
437  g_A_I_J_q_vcl.handle2().context(opencl_ctx);
438 
439  viennacl::ocl::handle<cl_mem> g_is_update_vcl = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
440  static_cast<unsigned int>(sizeof(cl_uint)*(g_is_update.size())),
441  &(g_is_update[0]));
442 
444  if(!is_empty_block)
445  {
446  viennacl::ocl::kernel& qr_assembly_kernel = opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<ScalarType>::program_name(), "block_qr_assembly");
447  qr_assembly_kernel.local_work_size(0, 1);
448  qr_assembly_kernel.global_work_size(0, 256);
449  viennacl::ocl::enqueue(qr_assembly_kernel(matrix_dimensions,
450  g_A_I_J_u_vcl.handle(),
451  g_A_I_J_u_vcl.handle2(),
452  g_A_I_J_u_vcl.handle1(),
453  g_A_I_u_J_u_vcl.handle(),
454  g_A_I_u_J_u_vcl.handle2(),
455  g_A_I_u_J_u_vcl.handle1(),
456  g_A_I_J_q_vcl.handle(),
457  g_A_I_J_q_vcl.handle2(),
458  g_A_I_J_q_vcl.handle1(),
459  g_is_update_vcl,
460  static_cast<unsigned int>(g_I.size())));
461  }
462  else
463  {
464  viennacl::ocl::kernel& qr_assembly_kernel = opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<ScalarType>::program_name(), "block_qr_assembly_1");
465  qr_assembly_kernel.local_work_size(0, 1);
466  qr_assembly_kernel.global_work_size(0, 256);
467  viennacl::ocl::enqueue(qr_assembly_kernel(matrix_dimensions, g_A_I_J_u_vcl.handle(), g_A_I_J_u_vcl.handle2(),
468  g_A_I_J_u_vcl.handle1(),
469  g_A_I_J_q_vcl.handle(),
470  g_A_I_J_q_vcl.handle2(), g_A_I_J_q_vcl.handle1(),
471  g_is_update_vcl,
472  static_cast<unsigned int>(g_I.size())));
473  }
474  g_A_I_u_J_u_vcl.handle() = g_A_I_J_q_vcl.handle();
475  g_A_I_u_J_u_vcl.handle1() = g_A_I_J_q_vcl.handle1();
476  g_A_I_u_J_u_vcl.handle2() = g_A_I_J_q_vcl.handle2();
477  }
478 
490  template<typename ScalarType>
491  void assemble_r(std::vector<std::vector<unsigned int> >& g_I, std::vector<std::vector<unsigned int> >& g_J,
492  block_matrix& g_A_I_J_vcl,
493  block_matrix& g_A_I_J_u_vcl,
494  block_matrix& g_A_I_u_J_u_vcl,
495  block_vector& g_bv_vcl,
496  block_vector& g_bv_vcl_u,
497  std::vector<cl_uint>& g_is_update,
498  viennacl::context ctx)
499  {
500  viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context &>(ctx.opencl_context());
501  std::vector<cl_uint> matrix_dims(g_I.size()*2, static_cast<cl_uint>(0));
502  std::vector<cl_uint> blocks_ind(g_I.size() + 1, static_cast<cl_uint>(0));
503  std::vector<cl_uint> start_bv_r_inds(g_I.size() + 1, 0);
504  unsigned int sz_blocks, bv_size;
505  compute_blocks_size(g_I, g_J, sz_blocks, blocks_ind, matrix_dims);
506  get_size(g_J, bv_size);
507  init_start_inds(g_J, start_bv_r_inds);
508  std::vector<ScalarType> con_A_I_J_r(sz_blocks, static_cast<ScalarType>(0));
509  std::vector<ScalarType> b_v_r(bv_size, static_cast<ScalarType>(0));
510 
511  block_matrix g_A_I_J_r_vcl;
512  block_vector g_bv_r_vcl;
513  g_A_I_J_r_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
514  static_cast<unsigned int>(sizeof(ScalarType)*sz_blocks),
515  &(con_A_I_J_r[0]));
516  g_A_I_J_r_vcl.handle().context(opencl_ctx);
517 
518  g_A_I_J_r_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
519  static_cast<unsigned int>(sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
520  &(matrix_dims[0]));
521  g_A_I_J_r_vcl.handle1().context(opencl_ctx);
522 
523  g_A_I_J_r_vcl.handle2() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
524  static_cast<unsigned int>(sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size() + 1)),
525  &(blocks_ind[0]));
526  g_A_I_J_r_vcl.handle2().context(opencl_ctx);
527 
528  g_bv_r_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
529  static_cast<unsigned int>(sizeof(ScalarType)*bv_size),
530  &(b_v_r[0]));
531  g_bv_r_vcl.handle().context(opencl_ctx);
532 
533  g_bv_r_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
534  static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
535  &(start_bv_r_inds[0]));
536  g_bv_r_vcl.handle().context(opencl_ctx);
537 
538  viennacl::ocl::handle<cl_mem> g_is_update_vcl = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
539  static_cast<unsigned int>(sizeof(cl_uint)*(g_is_update.size())),
540  &(g_is_update[0]));
542  viennacl::ocl::kernel& r_assembly_kernel = opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<ScalarType>::program_name(), "block_r_assembly");
543  r_assembly_kernel.local_work_size(0, 1);
544  r_assembly_kernel.global_work_size(0, 256);
545 
546  viennacl::ocl::enqueue(r_assembly_kernel(g_A_I_J_vcl.handle(), g_A_I_J_vcl.handle2(), g_A_I_J_vcl.handle1(),
547  g_A_I_J_u_vcl.handle(), g_A_I_J_u_vcl.handle2(), g_A_I_J_u_vcl.handle1(),
548  g_A_I_u_J_u_vcl.handle(), g_A_I_u_J_u_vcl.handle2(), g_A_I_u_J_u_vcl.handle1(),
549  g_A_I_J_r_vcl.handle(), g_A_I_J_r_vcl.handle2(), g_A_I_J_r_vcl.handle1(),
550  g_is_update_vcl, static_cast<cl_uint>(g_I.size())));
551 
552  viennacl::ocl::kernel & bv_assembly_kernel = opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<ScalarType>::program_name(), "block_bv_assembly");
553  bv_assembly_kernel.local_work_size(0, 1);
554  bv_assembly_kernel.global_work_size(0, 256);
555  viennacl::ocl::enqueue(bv_assembly_kernel(g_bv_vcl.handle(), g_bv_vcl.handle1(), g_A_I_J_vcl.handle1(), g_bv_vcl_u.handle(),
556  g_bv_vcl_u.handle1(), g_A_I_J_u_vcl.handle1(),
557  g_bv_r_vcl.handle(), g_bv_r_vcl.handle1(), g_A_I_J_r_vcl.handle1(), g_is_update_vcl,
558  static_cast<cl_uint>(g_I.size())));
559  g_bv_vcl.handle() = g_bv_r_vcl.handle();
560  g_bv_vcl.handle1() = g_bv_r_vcl.handle1();
561 
562  g_A_I_J_vcl.handle() = g_A_I_J_r_vcl.handle();
563  g_A_I_J_vcl.handle2() = g_A_I_J_r_vcl.handle2();
564  g_A_I_J_vcl.handle1() = g_A_I_J_r_vcl.handle1();
565  }
566 
578  template<typename ScalarType, unsigned int MAT_ALIGNMENT, typename SparseVectorType>
579  void block_update(const viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT>& A, const std::vector<SparseVectorType>& A_v_c,
580  std::vector<cl_uint>& g_is_update,
581  std::vector<SparseVectorType>& g_res,
582  std::vector<std::vector<unsigned int> >& g_J,
583  std::vector<std::vector<unsigned int> >& g_I,
584  block_matrix& g_A_I_J_vcl,
585  block_vector& g_bv_vcl,
586  spai_tag const & tag)
587  {
589  //updated index set for columns
590  std::vector<std::vector<unsigned int> > g_J_u(g_J.size());
591  //updated index set for rows
592  std::vector<std::vector<unsigned int> > g_I_u(g_J.size());
593  //mixed index set of old and updated indices for rows
594  std::vector<std::vector<unsigned int> > g_I_q(g_J.size());
595  //GPU memory for A_I_\hatJ
596  block_matrix g_A_I_J_u_vcl;
597  //GPU memory for A_\hatI_\hatJ
598  block_matrix g_A_I_u_J_u_vcl;
599  bool is_empty_block;
600  //GPU memory for new b_v
601  block_vector g_bv_u_vcl;
602 #ifdef VIENNACL_WITH_OPENMP
603  #pragma omp parallel for
604 #endif
605  for(long i = 0; i < static_cast<long>(g_J.size()); ++i)
606  {
607  if(g_is_update[i])
608  {
609  if(buildAugmentedIndexSet<SparseVectorType, ScalarType>(A_v_c, g_res[i], g_J[i], g_J_u[i], tag))
610  buildNewRowSet(A_v_c, g_I[i], g_J_u[i], g_I_u[i]);
611  }
612  }
613  //assemble new A_I_J_u blocks on GPU and multiply them with Q'
614  block_assembly(A, g_J_u, g_I, g_A_I_J_u_vcl, g_is_update, is_empty_block);
615  //I have matrix A_I_J_u ready..
616  block_q_multiplication<ScalarType>(g_J_u, g_I, g_A_I_J_vcl, g_bv_vcl, g_A_I_J_u_vcl, g_is_update, ctx);
617  //assemble A_\hatI_\hatJ
618  block_assembly(A, g_J_u, g_I_u, g_A_I_u_J_u_vcl, g_is_update, is_empty_block);
619  assemble_qr_block<ScalarType>(g_J, g_I, g_J_u, g_I_u, g_I_q, g_A_I_J_u_vcl, g_A_I_J_vcl.handle1(),
620  g_A_I_u_J_u_vcl, g_is_update, is_empty_block, ctx);
621 
622  block_qr<ScalarType>(g_I_q, g_J_u, g_A_I_u_J_u_vcl, g_bv_u_vcl, g_is_update, ctx);
623  //concatanation of new and old indices
624 #ifdef VIENNACL_WITH_OPENMP
625  #pragma omp parallel for
626 #endif
627  for(long i = 0; i < static_cast<long>(g_J.size()); ++i)
628  {
629  g_J[i].insert(g_J[i].end(), g_J_u[i].begin(), g_J_u[i].end());
630  g_I[i].insert(g_I[i].end(), g_I_u[i].begin(), g_I_u[i].end());
631  }
632  assemble_r<ScalarType>(g_I, g_J, g_A_I_J_vcl, g_A_I_J_u_vcl, g_A_I_u_J_u_vcl, g_bv_vcl, g_bv_u_vcl, g_is_update, ctx);
633  }
634 
635  }
636  }
637  }
638 }
639 #endif
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
Main kernel class for generating OpenCL kernels for the sparse approximate inverse preconditioners...
Definition: spai.hpp:570
void apply_q_trans_mat(const MatrixType &R, const VectorType &b_v, MatrixType &A)
Multiplication of Q'*A, where Q is in implicit for lower part of R and vector of betas - b_v...
Definition: qr.hpp:353
viennacl::ocl::handle< cl_mem > & handle1()
Return handle to start indices.
Definition: block_vector.hpp:60
std::size_t vcl_size_t
Definition: forwards.h:58
Represents a contigious matrices on GPU.
Definition: block_matrix.hpp:49
void sparse_norm_2(const SparseVectorType &v, ScalarType &norm)
Computation of Euclidean norm for sparse vector.
Definition: spai-dynamic.hpp:130
Implementations of dense matrix related operations including matrix-vector products.
Helper functor for comparing std::pair<> based on the second member.
Definition: spai-dynamic.hpp:77
viennacl::ocl::handle< cl_mem > & handle()
Returns a handle to the elements.
Definition: block_matrix.hpp:56
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Implementation of the dense matrix class.
OpenCL kernel file for sparse approximate inverse operations.
matrix_range< MatrixType > project(MatrixType &A, viennacl::range const &r1, viennacl::range const &r2)
Definition: matrix_proxy.hpp:251
void block_update(const SparseMatrixType &A, const std::vector< SparseVectorType > &A_v_c, std::vector< SparseVectorType > &g_res, std::vector< bool > &g_is_update, std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, std::vector< VectorType > &g_b_v, std::vector< DenseMatrixType > &g_A_I_J, spai_tag const &tag)
CPU-based dynamic update for SPAI preconditioner.
Definition: spai-dynamic.hpp:258
bool isInIndexSet(const std::vector< SizeType > &J, SizeType ind)
Determines if element ind is in set {J}.
Definition: spai-static.hpp:71
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
viennacl::ocl::handle< cl_mem > & handle2()
Returns a handle to the start indices of matrix.
Definition: block_matrix.hpp:64
viennacl::ocl::handle< cl_mem > & handle()
Return handle to the elements.
Definition: block_vector.hpp:56
void composeNewVector(const VectorType &v_n, VectorType &v)
Composition of new vector of coefficients beta from QR factorizations(necessary for Q recovery) ...
Definition: spai-dynamic.hpp:116
void assemble_qr_block(const std::vector< std::vector< unsigned int > > &g_J, const std::vector< std::vector< unsigned int > > &g_I, const std::vector< std::vector< unsigned int > > &g_J_u, const std::vector< std::vector< unsigned int > > &g_I_u, std::vector< std::vector< unsigned int > > &g_I_q, block_matrix &g_A_I_J_u_vcl, viennacl::ocl::handle< cl_mem > &matrix_dimensions, block_matrix &g_A_I_u_J_u_vcl, std::vector< cl_uint > &g_is_update, const bool is_empty_block, viennacl::context ctx)
Performs assembly for new QR block.
Definition: spai-dynamic.hpp:399
Main implementation of SPAI (not FSPAI). Experimental.
A tag for SPAI Contains values for the algorithm. Must be passed to spai_precond constructor.
Definition: spai_tag.hpp:63
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
void get_max_block_size(const std::vector< std::vector< SizeType > > &inds, SizeType &max_size)
Getting max size of rows/columns from container of index set.
Definition: qr.hpp:295
bool buildAugmentedIndexSet(const std::vector< SparseVectorType > &A_v_c, const SparseVectorType &res, std::vector< unsigned int > &J, std::vector< unsigned int > &J_u, const spai_tag &tag)
Building a new set of column indices J_u, cf. Kallischko dissertation p.31.
Definition: spai-dynamic.hpp:171
void composeNewR(const MatrixType &A, const MatrixType &R_n, MatrixType &R)
Composition of new matrix R, that is going to be used in Least Square problem solving.
Definition: spai-dynamic.hpp:93
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
void buildNewRowSet(const std::vector< SparseVectorType > &A_v_c, const std::vector< unsigned int > &I, const std::vector< unsigned int > &J_n, std::vector< unsigned int > &I_n)
Building a new indices to current set of row indices I_n, cf. Kallischko dissertation p...
Definition: spai-dynamic.hpp:210
Implementation of a bunch of (small) matrices on GPU. Experimental.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const
Creates a memory buffer within the context.
Definition: context.hpp:199
void initProjectSubMatrix(const SparseMatrixType &A_in, const std::vector< unsigned int > &J, std::vector< unsigned int > &I, DenseMatrixType &A_out)
Initializes a dense matrix from a sparse one.
Definition: spai.hpp:143
Implementation of a simultaneous QR factorization of multiple matrices. Experimental.
Implementations of incomplete factorization preconditioners. Convenience header file.
Implementation of a static SPAI. Experimental.
void assemble_r(std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, block_matrix &g_A_I_J_vcl, block_matrix &g_A_I_J_u_vcl, block_matrix &g_A_I_u_J_u_vcl, block_vector &g_bv_vcl, block_vector &g_bv_vcl_u, std::vector< cl_uint > &g_is_update, viennacl::context ctx)
Performs assembly for new R matrix on GPU.
Definition: spai-dynamic.hpp:491
Implementation of the compressed_matrix class.
Implementation of a bunch of vectors on GPU. Experimental.
Implementations of operations using sparse matrices.
void block_assembly(const viennacl::compressed_matrix< ScalarType, MAT_ALIGNMENT > &A, const std::vector< std::vector< unsigned int > > &g_J, const std::vector< std::vector< unsigned int > > &g_I, block_matrix &g_A_I_J_vcl, std::vector< cl_uint > &g_is_update, bool &is_empty_block)
Assembly of blocks on GPU by a gived set of row indices: g_I and column indices: g_J.
Definition: spai.hpp:468
void get_size(const std::vector< std::vector< SizeType > > &inds, SizeType &size)
Computes size of particular container of index set.
Definition: qr.hpp:129
Implementation of the spai tag holding SPAI configuration parameters. Experimental.
void init_start_inds(const std::vector< std::vector< SizeType > > &inds, std::vector< cl_uint > &start_inds)
Initializes start indices of particular index set.
Definition: qr.hpp:141
The conjugate gradient method is implemented here.
double getResidualThreshold() const
Definition: spai_tag.hpp:88
void compute_blocks_size(const std::vector< std::vector< unsigned int > > &g_I, const std::vector< std::vector< unsigned int > > &g_J, unsigned int &sz, std::vector< cl_uint > &blocks_ind, std::vector< cl_uint > &matrix_dims)
**************************************** BLOCK FUNCTIONS ************************************// ...
Definition: qr.hpp:112
Implementations of the OpenCL backend, where all contexts are stored in.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
Represents a contigious vector on GPU.
Definition: block_vector.hpp:49
void sparse_inner_prod(const SparseVectorType &v1, const SparseVectorType &v2, ScalarType &res_v)
Dot product of two sparse vectors.
Definition: spai-dynamic.hpp:144
void QRBlockComposition(const MatrixType &A_I_J, const MatrixType &A_I_J_u, MatrixType &A_I_u_J_u)
Composition of new block for QR factorization cf. Kallischko dissertation p.82, figure 4...
Definition: spai-dynamic.hpp:229
void single_qr(MatrixType &R, VectorType &b_v)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224.
Definition: qr.hpp:257
basic_range range
Definition: forwards.h:339
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
void block_q_multiplication(const std::vector< std::vector< unsigned int > > &g_J_u, const std::vector< std::vector< unsigned int > > &g_I, block_matrix &g_A_I_J_vcl, block_vector &g_bv_vcl, block_matrix &g_A_I_J_u_vcl, std::vector< cl_uint > &g_is_update, viennacl::context ctx)
Performs multiplication Q'*A(I, \tilde J) on GPU.
Definition: spai-dynamic.hpp:326
static void init(viennacl::ocl::context &ctx)
Definition: spai.hpp:577
viennacl::ocl::context const & context() const
Definition: handle.hpp:191
viennacl::ocl::handle< cl_mem > & handle1()
Returns a handle to the matrix dimensions.
Definition: block_matrix.hpp:60
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
Implementation of the ViennaCL scalar class.
bool operator()(std::pair< T1, T2 > const &left, std::pair< T1, T2 > const &right)
Definition: spai-dynamic.hpp:80
void assemble_qr_row_inds(const std::vector< std::vector< SizeType > > &g_I, const std::vector< std::vector< SizeType > > g_J, const std::vector< std::vector< SizeType > > &g_I_u, std::vector< std::vector< SizeType > > &g_I_q)
Assembly of container of index row sets: I_q, row indices for new "QR block".
Definition: spai-dynamic.hpp:367