ViennaCL - The Vienna Computing Library  1.5.1
sparse_matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <list>
26 
27 #include "viennacl/forwards.h"
28 #include "viennacl/scalar.hpp"
29 #include "viennacl/vector.hpp"
30 #include "viennacl/tools/tools.hpp"
33 
34 namespace viennacl
35 {
36  namespace linalg
37  {
38  namespace host_based
39  {
40  //
41  // Compressed matrix
42  //
43 
44  namespace detail
45  {
46  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
50  {
51  ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
52  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle());
53  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle1());
54  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle2());
55 
56  for (vcl_size_t row = 0; row < mat.size1(); ++row)
57  {
58  ScalarType value = 0;
59  unsigned int row_end = row_buffer[row+1];
60 
61  switch (info_selector)
62  {
64  for (unsigned int i = row_buffer[row]; i < row_end; ++i)
65  value = std::max<ScalarType>(value, std::fabs(elements[i]));
66  break;
67 
69  for (unsigned int i = row_buffer[row]; i < row_end; ++i)
70  value += std::fabs(elements[i]);
71  break;
72 
74  for (unsigned int i = row_buffer[row]; i < row_end; ++i)
75  value += elements[i] * elements[i];
76  value = std::sqrt(value);
77  break;
78 
80  for (unsigned int i = row_buffer[row]; i < row_end; ++i)
81  {
82  if (col_buffer[i] == row)
83  {
84  value = elements[i];
85  break;
86  }
87  }
88  break;
89 
90  default:
91  break;
92  }
93  result_buf[row] = value;
94  }
95  }
96  }
97 
98 
107  template<class ScalarType, unsigned int ALIGNMENT>
111  {
112  ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.handle());
113  ScalarType const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
114  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle());
115  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle1());
116  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle2());
117 
118 #ifdef VIENNACL_WITH_OPENMP
119  #pragma omp parallel for
120 #endif
121  for (long row = 0; row < static_cast<long>(mat.size1()); ++row)
122  {
123  ScalarType dot_prod = 0;
124  vcl_size_t row_end = row_buffer[row+1];
125  for (vcl_size_t i = row_buffer[row]; i < row_end; ++i)
126  dot_prod += elements[i] * vec_buf[col_buffer[i] * vec.stride() + vec.start()];
127  result_buf[row * result.stride() + result.start()] = dot_prod;
128  }
129 
130  }
131 
140  template< class ScalarType, typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
144 
145  ScalarType const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.handle());
146  unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle1());
147  unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2());
148 
149  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
150  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
151 
152  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat);
153  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat);
154  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat);
155  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat);
156  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat);
157  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat);
158 
159  vcl_size_t result_start1 = viennacl::traits::start1(result);
160  vcl_size_t result_start2 = viennacl::traits::start2(result);
161  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
162  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
163  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
164  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
165 
167  d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
169  result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
170 
171  if ( detail::is_row_major(typename F1::orientation_category()) ) {
172 #ifdef VIENNACL_WITH_OPENMP
173  #pragma omp parallel for
174 #endif
175  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) {
176  vcl_size_t row_start = sp_mat_row_buffer[row];
177  vcl_size_t row_end = sp_mat_row_buffer[row+1];
178  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
179  NumericT temp = 0;
180  for (vcl_size_t k = row_start; k < row_end; ++k) {
181  temp += sp_mat_elements[k] * d_mat_wrapper(sp_mat_col_buffer[k], col);
182  }
183  result_wrapper(row, col) = temp;
184  }
185  }
186  }
187  else {
188 #ifdef VIENNACL_WITH_OPENMP
189  #pragma omp parallel for
190 #endif
191  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
192  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) {
193  vcl_size_t row_start = sp_mat_row_buffer[row];
194  vcl_size_t row_end = sp_mat_row_buffer[row+1];
195  NumericT temp = 0;
196  for (vcl_size_t k = row_start; k < row_end; ++k) {
197  temp += sp_mat_elements[k] * d_mat_wrapper(sp_mat_col_buffer[k], col);
198  }
199  result_wrapper(row, col) = temp;
200  }
201  }
202  }
203 
204  }
205 
215  template< class ScalarType, typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
219  viennacl::op_trans > & d_mat,
221 
222  ScalarType const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.handle());
223  unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle1());
224  unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2());
225 
226  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
227  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
228 
229  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs());
230  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs());
231  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat.lhs());
232  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat.lhs());
233  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat.lhs());
234  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat.lhs());
235 
236  vcl_size_t result_start1 = viennacl::traits::start1(result);
237  vcl_size_t result_start2 = viennacl::traits::start2(result);
238  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
239  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
240  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
241  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
242 
244  d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
246  result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
247 
248  if ( detail::is_row_major(typename F1::orientation_category()) ) {
249 #ifdef VIENNACL_WITH_OPENMP
250  #pragma omp parallel for
251 #endif
252  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) {
253  vcl_size_t row_start = sp_mat_row_buffer[row];
254  vcl_size_t row_end = sp_mat_row_buffer[row+1];
255  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
256  NumericT temp = 0;
257  for (vcl_size_t k = row_start; k < row_end; ++k) {
258  temp += sp_mat_elements[k] * d_mat_wrapper(col, sp_mat_col_buffer[k]);
259  }
260  result_wrapper(row, col) = temp;
261  }
262  }
263  }
264  else {
265 #ifdef VIENNACL_WITH_OPENMP
266  #pragma omp parallel for
267 #endif
268  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
269  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row) {
270  vcl_size_t row_start = sp_mat_row_buffer[row];
271  vcl_size_t row_end = sp_mat_row_buffer[row+1];
272  NumericT temp = 0;
273  for (vcl_size_t k = row_start; k < row_end; ++k) {
274  temp += sp_mat_elements[k] * d_mat_wrapper(col, sp_mat_col_buffer[k]);
275  }
276  result_wrapper(row, col) = temp;
277  }
278  }
279  }
280 
281  }
282 
283 
284  //
285  // Triangular solve for compressed_matrix, A \ b
286  //
287  namespace detail
288  {
289  template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
290  void csr_inplace_solve(SizeTypeArray const & row_buffer,
291  SizeTypeArray const & col_buffer,
292  ConstScalarTypeArray const & element_buffer,
293  ScalarTypeArray & vec_buffer,
294  vcl_size_t num_cols,
296  {
297  vcl_size_t row_begin = row_buffer[1];
298  for (vcl_size_t row = 1; row < num_cols; ++row)
299  {
300  NumericT vec_entry = vec_buffer[row];
301  vcl_size_t row_end = row_buffer[row+1];
302  for (vcl_size_t i = row_begin; i < row_end; ++i)
303  {
304  vcl_size_t col_index = col_buffer[i];
305  if (col_index < row)
306  vec_entry -= vec_buffer[col_index] * element_buffer[i];
307  }
308  vec_buffer[row] = vec_entry;
309  row_begin = row_end;
310  }
311  }
312 
313  template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
314  void csr_inplace_solve(SizeTypeArray const & row_buffer,
315  SizeTypeArray const & col_buffer,
316  ConstScalarTypeArray const & element_buffer,
317  ScalarTypeArray & vec_buffer,
318  vcl_size_t num_cols,
320  {
321  vcl_size_t row_begin = row_buffer[0];
322  for (vcl_size_t row = 0; row < num_cols; ++row)
323  {
324  NumericT vec_entry = vec_buffer[row];
325 
326  // substitute and remember diagonal entry
327  vcl_size_t row_end = row_buffer[row+1];
328  NumericT diagonal_entry = 0;
329  for (vcl_size_t i = row_begin; i < row_end; ++i)
330  {
331  vcl_size_t col_index = col_buffer[i];
332  if (col_index < row)
333  vec_entry -= vec_buffer[col_index] * element_buffer[i];
334  else if (col_index == row)
335  diagonal_entry = element_buffer[i];
336  }
337 
338  vec_buffer[row] = vec_entry / diagonal_entry;
339  row_begin = row_end;
340  }
341  }
342 
343 
344  template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
345  void csr_inplace_solve(SizeTypeArray const & row_buffer,
346  SizeTypeArray const & col_buffer,
347  ConstScalarTypeArray const & element_buffer,
348  ScalarTypeArray & vec_buffer,
349  vcl_size_t num_cols,
351  {
352  for (vcl_size_t row2 = 1; row2 < num_cols; ++row2)
353  {
354  vcl_size_t row = (num_cols - row2) - 1;
355  NumericT vec_entry = vec_buffer[row];
356  vcl_size_t row_begin = row_buffer[row];
357  vcl_size_t row_end = row_buffer[row+1];
358  for (vcl_size_t i = row_begin; i < row_end; ++i)
359  {
360  vcl_size_t col_index = col_buffer[i];
361  if (col_index > row)
362  vec_entry -= vec_buffer[col_index] * element_buffer[i];
363  }
364  vec_buffer[row] = vec_entry;
365  }
366  }
367 
368  template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
369  void csr_inplace_solve(SizeTypeArray const & row_buffer,
370  SizeTypeArray const & col_buffer,
371  ConstScalarTypeArray const & element_buffer,
372  ScalarTypeArray & vec_buffer,
373  vcl_size_t num_cols,
375  {
376  for (vcl_size_t row2 = 0; row2 < num_cols; ++row2)
377  {
378  vcl_size_t row = (num_cols - row2) - 1;
379  NumericT vec_entry = vec_buffer[row];
380 
381  // substitute and remember diagonal entry
382  vcl_size_t row_begin = row_buffer[row];
383  vcl_size_t row_end = row_buffer[row+1];
384  NumericT diagonal_entry = 0;
385  for (vcl_size_t i = row_begin; i < row_end; ++i)
386  {
387  vcl_size_t col_index = col_buffer[i];
388  if (col_index > row)
389  vec_entry -= vec_buffer[col_index] * element_buffer[i];
390  else if (col_index == row)
391  diagonal_entry = element_buffer[i];
392  }
393 
394  vec_buffer[row] = vec_entry / diagonal_entry;
395  }
396  }
397 
398  } //namespace detail
399 
400 
401 
408  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
412  {
413  ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
414  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(L.handle());
415  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());
416  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());
417 
418  detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, L.size2(), tag);
419  }
420 
427  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
431  {
432  ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
433  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(L.handle());
434  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());
435  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());
436 
437  detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, L.size2(), tag);
438  }
439 
440 
447  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
451  {
452  ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
453  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(U.handle());
454  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.handle1());
455  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.handle2());
456 
457  detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, U.size2(), tag);
458  }
459 
466  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
470  {
471  ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
472  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(U.handle());
473  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.handle1());
474  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.handle2());
475 
476  detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, U.size2(), tag);
477  }
478 
479 
480 
481 
482 
483 
484 
485  //
486  // Triangular solve for compressed_matrix, A^T \ b
487  //
488 
489  namespace detail
490  {
491  template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
492  void csr_trans_inplace_solve(SizeTypeArray const & row_buffer,
493  SizeTypeArray const & col_buffer,
494  ConstScalarTypeArray const & element_buffer,
495  ScalarTypeArray & vec_buffer,
496  vcl_size_t num_cols,
498  {
499  vcl_size_t col_begin = row_buffer[0];
500  for (vcl_size_t col = 0; col < num_cols; ++col)
501  {
502  NumericT vec_entry = vec_buffer[col];
503  vcl_size_t col_end = row_buffer[col+1];
504  for (vcl_size_t i = col_begin; i < col_end; ++i)
505  {
506  unsigned int row_index = col_buffer[i];
507  if (row_index > col)
508  vec_buffer[row_index] -= vec_entry * element_buffer[i];
509  }
510  col_begin = col_end;
511  }
512  }
513 
514  template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
515  void csr_trans_inplace_solve(SizeTypeArray const & row_buffer,
516  SizeTypeArray const & col_buffer,
517  ConstScalarTypeArray const & element_buffer,
518  ScalarTypeArray & vec_buffer,
519  vcl_size_t num_cols,
521  {
522  vcl_size_t col_begin = row_buffer[0];
523  for (vcl_size_t col = 0; col < num_cols; ++col)
524  {
525  vcl_size_t col_end = row_buffer[col+1];
526 
527  // Stage 1: Find diagonal entry:
528  NumericT diagonal_entry = 0;
529  for (vcl_size_t i = col_begin; i < col_end; ++i)
530  {
531  vcl_size_t row_index = col_buffer[i];
532  if (row_index == col)
533  {
534  diagonal_entry = element_buffer[i];
535  break;
536  }
537  }
538 
539  // Stage 2: Substitute
540  NumericT vec_entry = vec_buffer[col] / diagonal_entry;
541  vec_buffer[col] = vec_entry;
542  for (vcl_size_t i = col_begin; i < col_end; ++i)
543  {
544  vcl_size_t row_index = col_buffer[i];
545  if (row_index > col)
546  vec_buffer[row_index] -= vec_entry * element_buffer[i];
547  }
548  col_begin = col_end;
549  }
550  }
551 
552  template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
553  void csr_trans_inplace_solve(SizeTypeArray const & row_buffer,
554  SizeTypeArray const & col_buffer,
555  ConstScalarTypeArray const & element_buffer,
556  ScalarTypeArray & vec_buffer,
557  vcl_size_t num_cols,
559  {
560  for (vcl_size_t col2 = 0; col2 < num_cols; ++col2)
561  {
562  vcl_size_t col = (num_cols - col2) - 1;
563 
564  NumericT vec_entry = vec_buffer[col];
565  vcl_size_t col_begin = row_buffer[col];
566  vcl_size_t col_end = row_buffer[col+1];
567  for (vcl_size_t i = col_begin; i < col_end; ++i)
568  {
569  vcl_size_t row_index = col_buffer[i];
570  if (row_index < col)
571  vec_buffer[row_index] -= vec_entry * element_buffer[i];
572  }
573 
574  }
575  }
576 
577  template <typename NumericT, typename ConstScalarTypeArray, typename ScalarTypeArray, typename SizeTypeArray>
578  void csr_trans_inplace_solve(SizeTypeArray const & row_buffer,
579  SizeTypeArray const & col_buffer,
580  ConstScalarTypeArray const & element_buffer,
581  ScalarTypeArray & vec_buffer,
582  vcl_size_t num_cols,
584  {
585  for (vcl_size_t col2 = 0; col2 < num_cols; ++col2)
586  {
587  vcl_size_t col = (num_cols - col2) - 1;
588  vcl_size_t col_begin = row_buffer[col];
589  vcl_size_t col_end = row_buffer[col+1];
590 
591  // Stage 1: Find diagonal entry:
592  NumericT diagonal_entry = 0;
593  for (vcl_size_t i = col_begin; i < col_end; ++i)
594  {
595  vcl_size_t row_index = col_buffer[i];
596  if (row_index == col)
597  {
598  diagonal_entry = element_buffer[i];
599  break;
600  }
601  }
602 
603  // Stage 2: Substitute
604  NumericT vec_entry = vec_buffer[col] / diagonal_entry;
605  vec_buffer[col] = vec_entry;
606  for (vcl_size_t i = col_begin; i < col_end; ++i)
607  {
608  vcl_size_t row_index = col_buffer[i];
609  if (row_index < col)
610  vec_buffer[row_index] -= vec_entry * element_buffer[i];
611  }
612  }
613  }
614 
615 
616  //
617  // block solves
618  //
619  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
622  op_trans> & L,
623  viennacl::backend::mem_handle const & /* block_indices */, vcl_size_t /* num_blocks */,
624  vector_base<ScalarType> const & /* L_diagonal */, //ignored
627  {
628  // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP.
629 
630  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
631  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
632  ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L.lhs().handle());
633  ScalarType * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.handle());
634 
635  vcl_size_t col_begin = row_buffer[0];
636  for (vcl_size_t col = 0; col < L.lhs().size1(); ++col)
637  {
638  ScalarType vec_entry = vec_buffer[col];
639  vcl_size_t col_end = row_buffer[col+1];
640  for (vcl_size_t i = col_begin; i < col_end; ++i)
641  {
642  unsigned int row_index = col_buffer[i];
643  if (row_index > col)
644  vec_buffer[row_index] -= vec_entry * elements[i];
645  }
646  col_begin = col_end;
647  }
648  }
649 
650  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
653  op_trans> & L,
654  viennacl::backend::mem_handle const & /*block_indices*/, vcl_size_t /* num_blocks */,
655  vector_base<ScalarType> const & L_diagonal,
658  {
659  // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP.
660 
661  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
662  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
663  ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L.lhs().handle());
664  ScalarType const * diagonal_buffer = detail::extract_raw_pointer<ScalarType>(L_diagonal.handle());
665  ScalarType * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.handle());
666 
667  vcl_size_t col_begin = row_buffer[0];
668  for (vcl_size_t col = 0; col < L.lhs().size1(); ++col)
669  {
670  vcl_size_t col_end = row_buffer[col+1];
671 
672  ScalarType vec_entry = vec_buffer[col] / diagonal_buffer[col];
673  vec_buffer[col] = vec_entry;
674  for (vcl_size_t i = col_begin; i < col_end; ++i)
675  {
676  vcl_size_t row_index = col_buffer[i];
677  if (row_index > col)
678  vec_buffer[row_index] -= vec_entry * elements[i];
679  }
680  col_begin = col_end;
681  }
682  }
683 
684 
685 
686  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
689  op_trans> & U,
690  viennacl::backend::mem_handle const & /*block_indices*/, vcl_size_t /* num_blocks */,
691  vector_base<ScalarType> const & /* U_diagonal */, //ignored
694  {
695  // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP.
696 
697  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
698  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
699  ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U.lhs().handle());
700  ScalarType * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.handle());
701 
702  for (vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
703  {
704  vcl_size_t col = (U.lhs().size1() - col2) - 1;
705 
706  ScalarType vec_entry = vec_buffer[col];
707  vcl_size_t col_begin = row_buffer[col];
708  vcl_size_t col_end = row_buffer[col+1];
709  for (vcl_size_t i = col_begin; i < col_end; ++i)
710  {
711  vcl_size_t row_index = col_buffer[i];
712  if (row_index < col)
713  vec_buffer[row_index] -= vec_entry * elements[i];
714  }
715 
716  }
717  }
718 
719  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
722  op_trans> & U,
723  viennacl::backend::mem_handle const & /* block_indices */, vcl_size_t /* num_blocks */,
724  vector_base<ScalarType> const & U_diagonal,
727  {
728  // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP.
729 
730  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
731  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
732  ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U.lhs().handle());
733  ScalarType const * diagonal_buffer = detail::extract_raw_pointer<ScalarType>(U_diagonal.handle());
734  ScalarType * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.handle());
735 
736  for (vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
737  {
738  vcl_size_t col = (U.lhs().size1() - col2) - 1;
739  vcl_size_t col_begin = row_buffer[col];
740  vcl_size_t col_end = row_buffer[col+1];
741 
742  // Stage 2: Substitute
743  ScalarType vec_entry = vec_buffer[col] / diagonal_buffer[col];
744  vec_buffer[col] = vec_entry;
745  for (vcl_size_t i = col_begin; i < col_end; ++i)
746  {
747  vcl_size_t row_index = col_buffer[i];
748  if (row_index < col)
749  vec_buffer[row_index] -= vec_entry * elements[i];
750  }
751  }
752  }
753 
754 
755  } //namespace detail
756 
763  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
766  op_trans> const & proxy,
769  {
770  ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
771  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle());
772  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
773  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
774 
775  detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
776  }
777 
784  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
787  op_trans> const & proxy,
790  {
791  ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
792  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle());
793  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
794  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
795 
796  detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
797  }
798 
799 
806  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
809  op_trans> const & proxy,
812  {
813  ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
814  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle());
815  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
816  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
817 
818  detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
819  }
820 
821 
828  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
831  op_trans> const & proxy,
834  {
835  ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
836  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle());
837  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
838  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
839 
840  detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
841  }
842 
843 
844 
845  //
846  // Compressed Compressed Matrix
847  //
848 
857  template<class ScalarType>
861  {
862  ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.handle());
863  ScalarType const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
864  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle());
865  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle1());
866  unsigned int const * row_indices = detail::extract_raw_pointer<unsigned int>(mat.handle3());
867  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle2());
868 
869  vector_assign(result, ScalarType(0));
870 
871 #ifdef VIENNACL_WITH_OPENMP
872  #pragma omp parallel for
873 #endif
874  for (long i = 0; i < static_cast<long>(mat.nnz1()); ++i)
875  {
876  ScalarType dot_prod = 0;
877  vcl_size_t row_end = row_buffer[i+1];
878  for (vcl_size_t j = row_buffer[i]; j < row_end; ++j)
879  dot_prod += elements[j] * vec_buf[col_buffer[j] * vec.stride() + vec.start()];
880  result_buf[row_indices[i] * result.stride() + result.start()] = dot_prod;
881  }
882 
883  }
884 
885 
886 
887  //
888  // Coordinate Matrix
889  //
890 
891  namespace detail
892  {
893  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
897  {
898  ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
899  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle());
900  unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle12());
901 
902  ScalarType value = 0;
903  unsigned int last_row = 0;
904 
905  for (vcl_size_t i = 0; i < mat.nnz(); ++i)
906  {
907  unsigned int current_row = coord_buffer[2*i];
908 
909  if (current_row != last_row)
910  {
911  if (info_selector == viennacl::linalg::detail::SPARSE_ROW_NORM_2)
912  value = std::sqrt(value);
913 
914  result_buf[last_row] = value;
915  value = 0;
916  last_row = current_row;
917  }
918 
919  switch (info_selector)
920  {
922  value = std::max<ScalarType>(value, std::fabs(elements[i]));
923  break;
924 
926  value += std::fabs(elements[i]);
927  break;
928 
930  value += elements[i] * elements[i];
931  break;
932 
933  case viennacl::linalg::detail::SPARSE_ROW_DIAGONAL: //diagonal entry
934  if (coord_buffer[2*i+1] == current_row)
935  value = elements[i];
936  break;
937 
938  default:
939  break;
940  }
941  }
942 
943  if (info_selector == viennacl::linalg::detail::SPARSE_ROW_NORM_2)
944  value = std::sqrt(value);
945 
946  result_buf[last_row] = value;
947  }
948  }
949 
958  template<class ScalarType, unsigned int ALIGNMENT>
962  {
963  ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.handle());
964  ScalarType const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
965  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle());
966  unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle12());
967 
968  for (vcl_size_t i = 0; i< result.size(); ++i)
969  result_buf[i * result.stride() + result.start()] = 0;
970 
971  for (vcl_size_t i = 0; i < mat.nnz(); ++i)
972  result_buf[coord_buffer[2*i] * result.stride() + result.start()]
973  += elements[i] * vec_buf[coord_buffer[2*i+1] * vec.stride() + vec.start()];
974  }
975 
984  template<class ScalarType, unsigned int ALIGNMENT, class NumericT, typename F1, typename F2>
988 
989  ScalarType const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.handle());
990  unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.handle12());
991 
992  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
993  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
994 
995  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat);
996  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat);
997  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat);
998  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat);
999  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat);
1000  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat);
1001 
1002  vcl_size_t result_start1 = viennacl::traits::start1(result);
1003  vcl_size_t result_start2 = viennacl::traits::start2(result);
1004  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
1005  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
1006  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
1007  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
1008 
1010  d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1012  result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1013 
1014  if ( detail::is_row_major(typename F1::orientation_category()) ) {
1015 
1016 #ifdef VIENNACL_WITH_OPENMP
1017  #pragma omp parallel for
1018 #endif
1019  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
1020  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1021  result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1022 
1023 #ifdef VIENNACL_WITH_OPENMP
1024  #pragma omp parallel for
1025 #endif
1026  for (long i = 0; i < static_cast<long>(sp_mat.nnz()); ++i) {
1027  NumericT x = static_cast<NumericT>(sp_mat_elements[i]);
1028  unsigned int r = sp_mat_coords[2*i];
1029  unsigned int c = sp_mat_coords[2*i+1];
1030  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1031  NumericT y = d_mat_wrapper( c, col);
1032  result_wrapper(r, col) += x * y;
1033  }
1034  }
1035  }
1036 
1037  else {
1038 
1039 #ifdef VIENNACL_WITH_OPENMP
1040  #pragma omp parallel for
1041 #endif
1042  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1043  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row)
1044  result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1045 
1046 #ifdef VIENNACL_WITH_OPENMP
1047  #pragma omp parallel for
1048 #endif
1049  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
1050 
1051  for (vcl_size_t i = 0; i < sp_mat.nnz(); ++i) {
1052 
1053  NumericT x = static_cast<NumericT>(sp_mat_elements[i]);
1054  unsigned int r = sp_mat_coords[2*i];
1055  unsigned int c = sp_mat_coords[2*i+1];
1056  NumericT y = d_mat_wrapper( c, col);
1057 
1058  result_wrapper( r, col) += x*y;
1059  }
1060 
1061  }
1062  }
1063 
1064  }
1065 
1066 
1075  template<class ScalarType, unsigned int ALIGNMENT, class NumericT, typename F1, typename F2>
1079  viennacl::op_trans > & d_mat,
1081 
1082  ScalarType const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.handle());
1083  unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.handle12());
1084 
1085  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
1086  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1087 
1088  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs());
1089  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs());
1090  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat.lhs());
1091  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat.lhs());
1092  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat.lhs());
1093  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat.lhs());
1094 
1095  vcl_size_t result_start1 = viennacl::traits::start1(result);
1096  vcl_size_t result_start2 = viennacl::traits::start2(result);
1097  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
1098  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
1099  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
1100  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
1101 
1103  d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1105  result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1106 
1107  if ( detail::is_row_major(typename F1::orientation_category()) ) {
1108 
1109 #ifdef VIENNACL_WITH_OPENMP
1110  #pragma omp parallel for
1111 #endif
1112  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
1113  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1114  result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1115  }
1116  else {
1117 
1118 #ifdef VIENNACL_WITH_OPENMP
1119  #pragma omp parallel for
1120 #endif
1121  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1122  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row)
1123  result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1124  }
1125 
1126 #ifdef VIENNACL_WITH_OPENMP
1127  #pragma omp parallel for
1128 #endif
1129  for (long i = 0; i < static_cast<long>(sp_mat.nnz()); ++i) {
1130  NumericT x = static_cast<NumericT>(sp_mat_elements[i]);
1131  unsigned int r = sp_mat_coords[2*i];
1132  unsigned int c = sp_mat_coords[2*i+1];
1133  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1134  NumericT y = d_mat_wrapper( col, c);
1135  result_wrapper(r, col) += x * y;
1136  }
1137  }
1138 
1139  }
1140  //
1141  // ELL Matrix
1142  //
1151  template<class ScalarType, unsigned int ALIGNMENT>
1155  {
1156  ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.handle());
1157  ScalarType const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
1158  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle());
1159  unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.handle2());
1160 
1161  for(vcl_size_t row = 0; row < mat.size1(); ++row)
1162  {
1163  ScalarType sum = 0;
1164 
1165  for(unsigned int item_id = 0; item_id < mat.internal_maxnnz(); ++item_id)
1166  {
1167  vcl_size_t offset = row + item_id * mat.internal_size1();
1168  ScalarType val = elements[offset];
1169 
1170  if(val != 0)
1171  {
1172  unsigned int col = coords[offset];
1173  sum += (vec_buf[col * vec.stride() + vec.start()] * val);
1174  }
1175  }
1176 
1177  result_buf[row * result.stride() + result.start()] = sum;
1178  }
1179  }
1180 
1189  template<class ScalarType, typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
1193  {
1194  ScalarType const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.handle());
1195  unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2());
1196 
1197  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1198  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1199 
1200  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat);
1201  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat);
1202  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat);
1203  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat);
1204  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat);
1205  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat);
1206 
1207  vcl_size_t result_start1 = viennacl::traits::start1(result);
1208  vcl_size_t result_start2 = viennacl::traits::start2(result);
1209  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
1210  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
1211  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
1212  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
1213 
1215  d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1217  result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1218 
1219  if ( detail::is_row_major(typename F1::orientation_category()) ) {
1220 #ifdef VIENNACL_WITH_OPENMP
1221  #pragma omp parallel for
1222 #endif
1223  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
1224  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1225  result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1226 
1227 #ifdef VIENNACL_WITH_OPENMP
1228  #pragma omp parallel for
1229 #endif
1230  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
1231  {
1232  for (long item_id = 0; item_id < static_cast<long>(sp_mat.maxnnz()); ++item_id)
1233  {
1234  vcl_size_t offset = static_cast<vcl_size_t>(row) + item_id * sp_mat.internal_size1();
1235  NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]);
1236  unsigned int sp_mat_col = sp_mat_coords[offset];
1237 
1238  if( sp_mat_val != 0)
1239  {
1240  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1241  result_wrapper(static_cast<vcl_size_t>(row), col) += sp_mat_val * d_mat_wrapper( sp_mat_col, col);
1242  }
1243  }
1244  }
1245  }
1246  else {
1247 #ifdef VIENNACL_WITH_OPENMP
1248  #pragma omp parallel for
1249 #endif
1250  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1251  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
1252  result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1253 
1254 #ifdef VIENNACL_WITH_OPENMP
1255  #pragma omp parallel for
1256 #endif
1257  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
1258 
1259  for(unsigned int item_id = 0; item_id < sp_mat.maxnnz(); ++item_id) {
1260 
1261  for(vcl_size_t row = 0; row < sp_mat.size1(); ++row) {
1262 
1263  vcl_size_t offset = row + item_id * sp_mat.internal_size1();
1264  NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]);
1265  unsigned int sp_mat_col = sp_mat_coords[offset];
1266 
1267  if( sp_mat_val != 0) {
1268 
1269  result_wrapper( row, col) += sp_mat_val * d_mat_wrapper( sp_mat_col, col);
1270  }
1271  }
1272  }
1273  }
1274  }
1275 
1276  }
1277 
1287  template<class ScalarType, typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
1291  viennacl::op_trans > & d_mat,
1293 
1294  ScalarType const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.handle());
1295  unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2());
1296 
1297  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
1298  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1299 
1300  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs());
1301  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs());
1302  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat.lhs());
1303  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat.lhs());
1304  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat.lhs());
1305  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat.lhs());
1306 
1307  vcl_size_t result_start1 = viennacl::traits::start1(result);
1308  vcl_size_t result_start2 = viennacl::traits::start2(result);
1309  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
1310  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
1311  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
1312  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
1313 
1315  d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1317  result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1318 
1319  if ( detail::is_row_major(typename F1::orientation_category()) ) {
1320 #ifdef VIENNACL_WITH_OPENMP
1321  #pragma omp parallel for
1322 #endif
1323  for(long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
1324  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1325  result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1326 
1327  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1328 
1329  for(unsigned int item_id = 0; item_id < sp_mat.maxnnz(); ++item_id) {
1330 
1331  for(vcl_size_t row = 0; row < sp_mat.size1(); ++row) {
1332 
1333  vcl_size_t offset = row + item_id * sp_mat.internal_size1();
1334  NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]);
1335  unsigned int sp_mat_col = sp_mat_coords[offset];
1336 
1337  if( sp_mat_val != 0) {
1338 
1339  result_wrapper( row, col) += sp_mat_val * d_mat_wrapper( col, sp_mat_col);
1340  }
1341  }
1342  }
1343  }
1344  }
1345  else {
1346 #ifdef VIENNACL_WITH_OPENMP
1347  #pragma omp parallel for
1348 #endif
1349  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1350  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row)
1351  result_wrapper( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1352 
1353 #ifdef VIENNACL_WITH_OPENMP
1354  #pragma omp parallel for
1355 #endif
1356  for(long item_id = 0; item_id < static_cast<long>(sp_mat.maxnnz()); ++item_id) {
1357 
1358  for(vcl_size_t row = 0; row < sp_mat.size1(); ++row) {
1359 
1360  vcl_size_t offset = row + item_id * sp_mat.internal_size1();
1361  NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]);
1362  unsigned int sp_mat_col = sp_mat_coords[offset];
1363 
1364  if( sp_mat_val != 0) {
1365 
1366  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1367 
1368  result_wrapper( row, col) += sp_mat_val * d_mat_wrapper( col, sp_mat_col);
1369  }
1370  }
1371  }
1372  }
1373  }
1374 
1375  }
1376 
1377  //
1378  // Hybrid Matrix
1379  //
1388  template<class ScalarType, unsigned int ALIGNMENT>
1392  {
1393  ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.handle());
1394  ScalarType const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.handle());
1395  ScalarType const * elements = detail::extract_raw_pointer<ScalarType>(mat.handle());
1396  unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.handle2());
1397  ScalarType const * csr_elements = detail::extract_raw_pointer<ScalarType>(mat.handle5());
1398  unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle3());
1399  unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle4());
1400 
1401 
1402  for(vcl_size_t row = 0; row < mat.size1(); ++row)
1403  {
1404  ScalarType sum = 0;
1405 
1406  //
1407  // Part 1: Process ELL part
1408  //
1409  for(unsigned int item_id = 0; item_id < mat.internal_ellnnz(); ++item_id)
1410  {
1411  vcl_size_t offset = row + item_id * mat.internal_size1();
1412  ScalarType val = elements[offset];
1413 
1414  if(val != 0)
1415  {
1416  unsigned int col = coords[offset];
1417  sum += (vec_buf[col * vec.stride() + vec.start()] * val);
1418  }
1419  }
1420 
1421  //
1422  // Part 2: Process HYB part
1423  //
1424  vcl_size_t col_begin = csr_row_buffer[row];
1425  vcl_size_t col_end = csr_row_buffer[row + 1];
1426 
1427  for(vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1428  {
1429  sum += (vec_buf[csr_col_buffer[item_id] * vec.stride() + vec.start()] * csr_elements[item_id]);
1430  }
1431 
1432  result_buf[row * result.stride() + result.start()] = sum;
1433  }
1434 
1435  }
1436 
1437  //
1438  // Hybrid Matrix
1439  //
1448  template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
1452  {
1453  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1454  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1455 
1456  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat);
1457  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat);
1458  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat);
1459  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat);
1460  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat);
1461  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat);
1462 
1463  vcl_size_t result_start1 = viennacl::traits::start1(result);
1464  vcl_size_t result_start2 = viennacl::traits::start2(result);
1465  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
1466  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
1467  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
1468  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
1469 
1471  d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1473  result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1474 
1475  NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle());
1476  unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.handle2());
1477  NumericT const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.handle5());
1478  unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle3());
1479  unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle4());
1480 
1481 
1482  for (vcl_size_t result_col = 0; result_col < result.size2(); ++result_col)
1483  {
1484  for(vcl_size_t row = 0; row < mat.size1(); ++row)
1485  {
1486  NumericT sum = 0;
1487 
1488  //
1489  // Part 1: Process ELL part
1490  //
1491  for(unsigned int item_id = 0; item_id < mat.internal_ellnnz(); ++item_id)
1492  {
1493  vcl_size_t offset = row + item_id * mat.internal_size1();
1494  NumericT val = elements[offset];
1495 
1496  if(val != 0)
1497  {
1498  unsigned int col = coords[offset];
1499  sum += d_mat_wrapper(col, result_col) * val;
1500  }
1501  }
1502 
1503  //
1504  // Part 2: Process HYB/CSR part
1505  //
1506  vcl_size_t col_begin = csr_row_buffer[row];
1507  vcl_size_t col_end = csr_row_buffer[row + 1];
1508 
1509  for(vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1510  sum += d_mat_wrapper(csr_col_buffer[item_id], result_col) * csr_elements[item_id];
1511 
1512  result_wrapper(row, result_col) = sum;
1513  }
1514  } // for result_col
1515  }
1516 
1517 
1526  template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
1530  viennacl::op_trans > & d_mat,
1532  {
1533  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1534  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1535 
1536  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs());
1537  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs());
1538  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat.lhs());
1539  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat.lhs());
1540  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat.lhs());
1541  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat.lhs());
1542 
1543  vcl_size_t result_start1 = viennacl::traits::start1(result);
1544  vcl_size_t result_start2 = viennacl::traits::start2(result);
1545  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
1546  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
1547  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
1548  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
1549 
1551  d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1553  result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1554 
1555  NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle());
1556  unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.handle2());
1557  NumericT const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.handle5());
1558  unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle3());
1559  unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle4());
1560 
1561 
1562  for (vcl_size_t result_col = 0; result_col < result.size2(); ++result_col)
1563  {
1564  for(vcl_size_t row = 0; row < mat.size1(); ++row)
1565  {
1566  NumericT sum = 0;
1567 
1568  //
1569  // Part 1: Process ELL part
1570  //
1571  for(unsigned int item_id = 0; item_id < mat.internal_ellnnz(); ++item_id)
1572  {
1573  vcl_size_t offset = row + item_id * mat.internal_size1();
1574  NumericT val = elements[offset];
1575 
1576  if(val != 0)
1577  {
1578  unsigned int col = coords[offset];
1579  sum += d_mat_wrapper(result_col, col) * val;
1580  }
1581  }
1582 
1583  //
1584  // Part 2: Process HYB/CSR part
1585  //
1586  vcl_size_t col_begin = csr_row_buffer[row];
1587  vcl_size_t col_end = csr_row_buffer[row + 1];
1588 
1589  for(vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1590  sum += d_mat_wrapper(result_col, csr_col_buffer[item_id]) * csr_elements[item_id];
1591 
1592  result_wrapper(row, result_col) = sum;
1593  }
1594  } // for result_col
1595  }
1596 
1597 
1598  } // namespace host_based
1599  } //namespace linalg
1600 } //namespace viennacl
1601 
1602 
1603 #endif
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:321
void vector_assign(vector_base< T > &vec1, const T &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
Definition: vector_operations.hpp:253
vcl_size_t size1() const
Definition: hyb_matrix.hpp:74
bool is_row_major(viennacl::row_major_tag)
Definition: common.hpp:73
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Definition: compressed_compressed_matrix.hpp:452
std::size_t vcl_size_t
Definition: forwards.h:58
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
Definition: compressed_compressed_matrix.hpp:456
const handle_type & handle3() const
Definition: hyb_matrix.hpp:83
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector.hpp:859
result_of::size_type< matrix_base< NumericT, F > >::type stride2(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:68
const handle_type & handle4() const
Definition: hyb_matrix.hpp:84
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Definition: compressed_compressed_matrix.hpp:450
Various little tools used here and there in ViennaCL.
A tag class representing a lower triangular matrix.
Definition: forwards.h:703
vcl_size_t size1() const
Definition: ell_matrix.hpp:80
vcl_size_t internal_ellnnz() const
Definition: hyb_matrix.hpp:77
A dense matrix class.
Definition: forwards.h:290
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:283
vcl_size_t size1() const
Returns the number of rows.
Definition: coordinate_matrix.hpp:343
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:64
vcl_size_t internal_maxnnz() const
Definition: ell_matrix.hpp:83
size_type start() const
Returns the offset within the buffer.
Definition: vector.hpp:867
void prod_impl(const matrix_base< NumericT, F > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
Definition: matrix_operations.hpp:737
const handle_type & handle2() const
Definition: hyb_matrix.hpp:82
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Definition: compressed_matrix.hpp:701
vcl_size_t internal_size1() const
Definition: hyb_matrix.hpp:71
size_type size2() const
Returns the number of columns.
Definition: matrix.hpp:627
const vcl_size_t & size2() const
Returns the number of columns.
Definition: compressed_matrix.hpp:694
void inplace_solve(const matrix_base< NumericT, F1 > &A, matrix_base< NumericT, F2 > &B, SOLVERTAG)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
Definition: direct_solve.hpp:130
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
Definition: coordinate_matrix.hpp:354
const handle_type & handle() const
Definition: hyb_matrix.hpp:81
result_of::size_type< matrix_base< NumericT, F > >::type stride1(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:57
const vcl_size_t & nnz1() const
Returns the number of nonzero entries.
Definition: compressed_compressed_matrix.hpp:445
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:83
Helper array for accessing a strided submatrix embedded in a larger matrix.
Definition: common.hpp:100
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
A tag class representing an upper triangular matrix.
Definition: forwards.h:708
handle_type & handle()
Definition: ell_matrix.hpp:89
A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows c...
Definition: compressed_compressed_matrix.hpp:263
const handle_type & handle3() const
Returns the OpenCL handle to the row index array.
Definition: compressed_compressed_matrix.hpp:454
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
Definition: coordinate_matrix.hpp:352
const vcl_size_t & size1() const
Returns the number of rows.
Definition: compressed_matrix.hpp:692
vcl_size_t internal_size2(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:910
void block_inplace_solve(const matrix_expression< const compressed_matrix< ScalarType, MAT_ALIGNMENT >, const compressed_matrix< ScalarType, MAT_ALIGNMENT >, op_trans > &L, viennacl::backend::mem_handle const &, vcl_size_t, vector_base< ScalarType > const &, vector_base< ScalarType > &vec, viennacl::linalg::unit_lower_tag)
Definition: sparse_matrix_operations.hpp:620
void dot_prod(const MatrixType &A, unsigned int beg_ind, ScalarType &res)
Dot prod of particular column of martix A with it's self starting at a certain index beg_ind...
Definition: qr.hpp:154
row_info_types
Definition: forwards.h:691
vcl_size_t maxnnz() const
Definition: ell_matrix.hpp:84
Common routines for single-threaded or OpenMP-enabled execution on CPU.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
handle_type & handle2()
Definition: ell_matrix.hpp:92
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:713
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:62
A tag class representing transposed matrices.
Definition: forwards.h:165
void csr_trans_inplace_solve(SizeTypeArray const &row_buffer, SizeTypeArray const &col_buffer, ConstScalarTypeArray const &element_buffer, ScalarTypeArray &vec_buffer, vcl_size_t num_cols, viennacl::linalg::unit_lower_tag)
Definition: sparse_matrix_operations.hpp:492
vcl_size_t internal_size1(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
const handle_type & handle() const
Returns the memory handle.
Definition: vector.hpp:878
void csr_inplace_solve(SizeTypeArray const &row_buffer, SizeTypeArray const &col_buffer, ConstScalarTypeArray const &element_buffer, ScalarTypeArray &vec_buffer, vcl_size_t num_cols, viennacl::linalg::unit_lower_tag)
Definition: sparse_matrix_operations.hpp:290
const handle_type & handle5() const
Definition: hyb_matrix.hpp:85
Implementation of the ViennaCL scalar class.
vcl_size_t internal_size1() const
Definition: ell_matrix.hpp:77
size_type stride() const
Returns the stride within the buffer (in multiples of sizeof(SCALARTYPE))
Definition: vector.hpp:871
void row_info(compressed_matrix< ScalarType, MAT_ALIGNMENT > const &mat, vector_base< ScalarType > &vec, viennacl::linalg::detail::row_info_types info_selector)
Definition: sparse_matrix_operations.hpp:47
A tag class representing an upper triangular matrix with unit diagonal.
Definition: forwards.h:718
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
Definition: coordinate_matrix.hpp:186
vcl_size_t nnz() const
Returns the number of nonzero entries.
Definition: coordinate_matrix.hpp:347
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
Definition: compressed_matrix.hpp:703
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Definition: compressed_matrix.hpp:699
Implementations of vector operations using a plain single-threaded or OpenMP-enabled execution on CPU...