ViennaCL - The Vienna Computing Library  1.5.1
matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/ocl/device.hpp"
27 #include "viennacl/ocl/handle.hpp"
28 #include "viennacl/ocl/kernel.hpp"
29 #include "viennacl/scalar.hpp"
30 #include "viennacl/vector.hpp"
32 #include "viennacl/tools/tools.hpp"
36 
38 
40 
41 #include "viennacl/traits/size.hpp"
45 
47 
50 
52 
53 
54 namespace viennacl
55 {
56  namespace linalg
57  {
58  namespace opencl
59  {
60  //
61  // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
62  //
63 
64  template <typename NumericT, typename F,
65  typename ScalarType1>
67  matrix_base<NumericT, F> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
68  {
69  typedef NumericT value_type;
70 
71  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat1).context());
73  KernelClass::init(ctx);
74 
75  cl_uint options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
76 
77  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(),
78  (viennacl::is_cpu_scalar<ScalarType1>::value ? "am_cpu" : "am_gpu"));
79  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat1),
80  cl_uint(viennacl::traits::start1(mat1)), cl_uint(viennacl::traits::start2(mat1)),
81  cl_uint(viennacl::traits::stride1(mat1)), cl_uint(viennacl::traits::stride2(mat1)),
82  cl_uint(viennacl::traits::size1(mat1)), cl_uint(viennacl::traits::size2(mat1)),
84 
85  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(alpha)),
86  options_alpha,
87  viennacl::traits::opencl_handle(mat2),
88  cl_uint(viennacl::traits::start1(mat2)), cl_uint(viennacl::traits::start2(mat2)),
89  cl_uint(viennacl::traits::stride1(mat2)), cl_uint(viennacl::traits::stride2(mat2)),
91  )
92  );
93  }
94 
95 
96  template <typename NumericT, typename F,
97  typename ScalarType1, typename ScalarType2>
99  matrix_base<NumericT, F> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
100  matrix_base<NumericT, F> const & mat3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
101  {
102  typedef NumericT value_type;
103 
104  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat1).context());
106  KernelClass::init(ctx);
107 
108  std::string kernel_name;
110  kernel_name = "ambm_cpu_cpu";
112  kernel_name = "ambm_cpu_gpu";
114  kernel_name = "ambm_gpu_cpu";
115  else
116  kernel_name = "ambm_gpu_gpu";
117 
118  cl_uint options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
119  cl_uint options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta);
120 
121  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), kernel_name);
122  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat1),
123  cl_uint(viennacl::traits::start1(mat1)), cl_uint(viennacl::traits::start2(mat1)),
124  cl_uint(viennacl::traits::stride1(mat1)), cl_uint(viennacl::traits::stride2(mat1)),
125  cl_uint(viennacl::traits::size1(mat1)), cl_uint(viennacl::traits::size2(mat1)),
127 
128  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(alpha)),
129  options_alpha,
130  viennacl::traits::opencl_handle(mat2),
131  cl_uint(viennacl::traits::start1(mat2)), cl_uint(viennacl::traits::start2(mat2)),
132  cl_uint(viennacl::traits::stride1(mat2)), cl_uint(viennacl::traits::stride2(mat2)),
134 
135  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(beta)),
136  options_beta,
137  viennacl::traits::opencl_handle(mat3),
138  cl_uint(viennacl::traits::start1(mat3)), cl_uint(viennacl::traits::start2(mat3)),
139  cl_uint(viennacl::traits::stride1(mat3)), cl_uint(viennacl::traits::stride2(mat3)),
141  )
142  );
143  }
144 
145 
146  template <typename NumericT, typename F,
147  typename ScalarType1, typename ScalarType2>
149  matrix_base<NumericT, F> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
150  matrix_base<NumericT, F> const & mat3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
151  {
152  typedef NumericT value_type;
153 
154  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat1).context());
156  KernelClass::init(ctx);
157 
158  std::string kernel_name;
160  kernel_name = "ambm_m_cpu_cpu";
162  kernel_name = "ambm_m_cpu_gpu";
164  kernel_name = "ambm_m_gpu_cpu";
165  else
166  kernel_name = "ambm_m_gpu_gpu";
167 
168  cl_uint options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
169  cl_uint options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta);
170 
171  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), kernel_name);
172  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat1),
173  cl_uint(viennacl::traits::start1(mat1)), cl_uint(viennacl::traits::start2(mat1)),
174  cl_uint(viennacl::traits::stride1(mat1)), cl_uint(viennacl::traits::stride2(mat1)),
175  cl_uint(viennacl::traits::size1(mat1)), cl_uint(viennacl::traits::size2(mat1)),
177 
178  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(alpha)),
179  options_alpha,
180  viennacl::traits::opencl_handle(mat2),
181  cl_uint(viennacl::traits::start1(mat2)), cl_uint(viennacl::traits::start2(mat2)),
182  cl_uint(viennacl::traits::stride1(mat2)), cl_uint(viennacl::traits::stride2(mat2)),
184 
185  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(beta)),
186  options_beta,
187  viennacl::traits::opencl_handle(mat3),
188  cl_uint(viennacl::traits::start1(mat3)), cl_uint(viennacl::traits::start2(mat3)),
189  cl_uint(viennacl::traits::stride1(mat3)), cl_uint(viennacl::traits::stride2(mat3)),
191  )
192  );
193  }
194 
195 
196 
197  template <typename NumericT, typename F>
198  void matrix_assign(matrix_base<NumericT, F> & mat, NumericT s, bool clear = false)
199  {
200  typedef NumericT value_type;
201 
202  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
204  KernelClass::init(ctx);
205 
206  value_type alpha = static_cast<value_type>(s);
207 
208  cl_uint s1 = clear ? cl_uint(viennacl::traits::internal_size1(mat)) : cl_uint(viennacl::traits::size1(mat));
209  cl_uint s2 = clear ? cl_uint(viennacl::traits::internal_size2(mat)) : cl_uint(viennacl::traits::size2(mat));
210 
211  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), "assign_cpu");
212  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat),
213  cl_uint(viennacl::traits::start1(mat)), cl_uint(viennacl::traits::start2(mat)),
214  cl_uint(viennacl::traits::stride1(mat)), cl_uint(viennacl::traits::stride2(mat)),
215  s1, s2,
217  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(alpha))
218  )
219  );
220  }
221 
222  template <typename NumericT, typename F>
224  {
225  typedef NumericT value_type;
226 
227  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
229  KernelClass::init(ctx);
230 
231  value_type alpha = static_cast<value_type>(s);
232 
233  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), "diagonal_assign_cpu");
234  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat),
235  cl_uint(viennacl::traits::start1(mat)), cl_uint(viennacl::traits::start2(mat)),
236  cl_uint(viennacl::traits::stride1(mat)), cl_uint(viennacl::traits::stride2(mat)),
237  cl_uint(viennacl::traits::size1(mat)), cl_uint(viennacl::traits::size2(mat)),
239  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(alpha))
240  )
241  );
242  }
243 
244  template <typename NumericT, typename F>
246  {
247  // Step 1: set everything to zero
248  matrix_assign(mat, NumericT(0));
249 
250  // Step 2: set the diagonal:
251 
252  // reuse vector ambm kernel for assigning the elements:
253  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
255  KernelClass::init(ctx);
256 
257  cl_uint options_alpha = 0;
260  {
261  vcl_size_t first_row_index = 0;
262  vcl_size_t first_col_index = 0;
263  if (k < 0)
264  first_row_index = vcl_size_t(-k);
265  else
266  first_col_index = vcl_size_t(k);
267  size_mat.start = cl_uint( (viennacl::traits::start1(mat) + first_row_index * viennacl::traits::stride1(mat)) * viennacl::traits::internal_size2(mat)
268  + viennacl::traits::start2(mat) + first_col_index * viennacl::traits::stride2(mat));
270  size_mat.size = cl_uint(viennacl::traits::size(vec));
271  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
272  }
273  else
274  {
275  vcl_size_t first_row_index = 0;
276  vcl_size_t first_col_index = 0;
277  if (k < 0)
278  first_row_index = vcl_size_t(-k);
279  else
280  first_col_index = vcl_size_t(k);
281  size_mat.start = cl_uint( viennacl::traits::start1(mat) + first_row_index * viennacl::traits::stride1(mat)
284  size_mat.size = cl_uint(viennacl::traits::size(vec));
285  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
286  }
287 
289  size_vec.start = cl_uint(viennacl::traits::start(vec));
290  size_vec.stride = cl_uint(viennacl::traits::stride(vec));
291  size_vec.size = cl_uint(viennacl::traits::size(vec));
292  size_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec));
293 
294  viennacl::ocl::kernel & kern = ctx.get_kernel(KernelClass::program_name(), "av_cpu");
295  viennacl::ocl::enqueue(kern(viennacl::traits::opencl_handle(mat),
296  size_mat,
297 
298  viennacl::traits::opencl_handle(NumericT(1)),
299  options_alpha,
300  viennacl::traits::opencl_handle(vec),
301  size_vec)
302  );
303  }
304 
305  template <typename NumericT, typename F>
307  {
308  // reuse vector ambm kernel for assigning the elements:
309  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
311  KernelClass::init(ctx);
312 
313  cl_uint options_alpha = 0;
316  {
317  vcl_size_t first_row_index = 0;
318  vcl_size_t first_col_index = 0;
319  if (k < 0)
320  first_row_index = vcl_size_t(-k);
321  else
322  first_col_index = vcl_size_t(k);
323  size_mat.start = cl_uint( (viennacl::traits::start1(mat) + first_row_index * viennacl::traits::stride1(mat)) * viennacl::traits::internal_size2(mat)
324  + viennacl::traits::start2(mat) + first_col_index * viennacl::traits::stride2(mat));
326  size_mat.size = cl_uint(viennacl::traits::size(vec));
327  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
328  }
329  else
330  {
331  vcl_size_t first_row_index = 0;
332  vcl_size_t first_col_index = 0;
333  if (k < 0)
334  first_row_index = vcl_size_t(-k);
335  else
336  first_col_index = vcl_size_t(k);
337  size_mat.start = cl_uint( viennacl::traits::start1(mat) + first_row_index * viennacl::traits::stride1(mat)
340  size_mat.size = cl_uint(viennacl::traits::size(vec));
341  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
342  }
343 
345  size_vec.start = cl_uint(viennacl::traits::start(vec));
346  size_vec.stride = cl_uint(viennacl::traits::stride(vec));
347  size_vec.size = cl_uint(viennacl::traits::size(vec));
348  size_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec));
349 
350 
351  viennacl::ocl::kernel & kern = ctx.get_kernel(KernelClass::program_name(), "av_cpu");
352  viennacl::ocl::enqueue(kern(viennacl::traits::opencl_handle(vec),
353  size_vec,
354 
355  viennacl::traits::opencl_handle(NumericT(1)),
356  options_alpha,
357  viennacl::traits::opencl_handle(mat),
358  size_mat)
359  );
360  }
361 
362  template <typename NumericT, typename F>
363  void matrix_row(const matrix_base<NumericT, F> & mat, unsigned int i, vector_base<NumericT> & vec)
364  {
365  // reuse vector ambm kernel for assigning the elements:
366  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
368  KernelClass::init(ctx);
369 
370  cl_uint options_alpha = 0;
373  {
375  size_mat.stride = cl_uint(viennacl::traits::stride2(mat));
376  size_mat.size = cl_uint(viennacl::traits::size(vec));
377  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
378  }
379  else
380  {
382  size_mat.stride = cl_uint(viennacl::traits::stride2(mat) * viennacl::traits::internal_size1(mat));
383  size_mat.size = cl_uint(viennacl::traits::size(vec));
384  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
385  }
386 
388  size_vec.start = cl_uint(viennacl::traits::start(vec));
389  size_vec.stride = cl_uint(viennacl::traits::stride(vec));
390  size_vec.size = cl_uint(viennacl::traits::size(vec));
391  size_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec));
392 
393 
394  viennacl::ocl::kernel & kern = ctx.get_kernel(KernelClass::program_name(), "av_cpu");
395  viennacl::ocl::enqueue(kern(viennacl::traits::opencl_handle(vec),
396  size_vec,
397 
398  viennacl::traits::opencl_handle(NumericT(1)),
399  options_alpha,
400  viennacl::traits::opencl_handle(mat),
401  size_mat)
402  );
403  }
404 
405  template <typename NumericT, typename F>
406  void matrix_column(const matrix_base<NumericT, F> & mat, unsigned int j, vector_base<NumericT> & vec)
407  {
408  // reuse vector ambm kernel for assigning the elements:
409  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
411  KernelClass::init(ctx);
412 
413  cl_uint options_alpha = 0;
416  {
418  size_mat.stride = cl_uint(viennacl::traits::stride2(mat) * viennacl::traits::internal_size2(mat));
419  size_mat.size = cl_uint(viennacl::traits::size(vec));
420  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
421  }
422  else
423  {
425  size_mat.stride = cl_uint(viennacl::traits::stride2(mat));
426  size_mat.size = cl_uint(viennacl::traits::size(vec));
427  size_mat.internal_size = cl_uint(viennacl::traits::internal_size(vec));
428  }
429 
431  size_vec.start = cl_uint(viennacl::traits::start(vec));
432  size_vec.stride = cl_uint(viennacl::traits::stride(vec));
433  size_vec.size = cl_uint(viennacl::traits::size(vec));
434  size_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec));
435 
436 
437  viennacl::ocl::kernel & kern = ctx.get_kernel(KernelClass::program_name(), "av_cpu");
438  viennacl::ocl::enqueue(kern(viennacl::traits::opencl_handle(vec),
439  size_vec,
440 
441  viennacl::traits::opencl_handle(NumericT(1)),
442  options_alpha,
443  viennacl::traits::opencl_handle(mat),
444  size_mat)
445  );
446  }
447 
448 
449  //
451  //
452 
453  // Binary operations A = B .* C and A = B ./ C
459  template <typename T, typename F, typename OP>
462  {
463  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
464  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
465 
466  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
468  KernelClass::init(ctx);
469 
470  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), "element_op");
471 
472  cl_uint op_type = 2; //0: product, 1: division, 2: power
474  op_type = 1;
476  op_type = 0;
477 
478  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(A),
479  cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
480  cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)),
481  cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
483 
484  viennacl::traits::opencl_handle(proxy.lhs()),
485  cl_uint(viennacl::traits::start1(proxy.lhs())), cl_uint(viennacl::traits::start2(proxy.lhs())),
486  cl_uint(viennacl::traits::stride1(proxy.lhs())), cl_uint(viennacl::traits::stride2(proxy.lhs())),
487  cl_uint(viennacl::traits::internal_size1(proxy.lhs())), cl_uint(viennacl::traits::internal_size2(proxy.lhs())),
488 
489  viennacl::traits::opencl_handle(proxy.rhs()),
490  cl_uint(viennacl::traits::start1(proxy.rhs())), cl_uint(viennacl::traits::start2(proxy.rhs())),
491  cl_uint(viennacl::traits::stride1(proxy.rhs())), cl_uint(viennacl::traits::stride2(proxy.rhs())),
492  cl_uint(viennacl::traits::internal_size1(proxy.rhs())), cl_uint(viennacl::traits::internal_size2(proxy.rhs())),
493 
494  op_type)
495  );
496  }
497 
498 
499  // Unary operations
500 
506  template <typename T, typename F, typename OP>
509  {
510  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
511  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
512 
513  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
514 
517 
518  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(A),
519  cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
520  cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)),
521  cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
523 
524  viennacl::traits::opencl_handle(proxy.lhs()),
525  cl_uint(viennacl::traits::start1(proxy.lhs())), cl_uint(viennacl::traits::start2(proxy.lhs())),
526  cl_uint(viennacl::traits::stride1(proxy.lhs())), cl_uint(viennacl::traits::stride2(proxy.lhs())),
527  cl_uint(viennacl::traits::internal_size1(proxy.lhs())), cl_uint(viennacl::traits::internal_size2(proxy.lhs())))
528  );
529  }
530 
531 
532  //
534  //
535 
536  // A * x
537 
546  template <typename NumericT, typename F>
548  const vector_base<NumericT> & vec,
549  vector_base<NumericT> & result)
550  {
551  typedef NumericT value_type;
552 
553  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
555  KernelClass::init(ctx);
556 
557  assert(mat.size2() == vec.size());
558  // Inplace matrix-vector products like x = prod(A, x) are currently illegal: Introduce a temporary like y = prod(A, x); x = y; instead
559  assert(viennacl::traits::handle(vec) != viennacl::traits::handle(result) && bool("No direct inplace matrix-vector product possible. Introduce a temporary!"));
560  //result.resize(mat.size1());
561 
562  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), "vec_mul");
563  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat),
564  cl_uint(viennacl::traits::start1(mat)), cl_uint(viennacl::traits::start2(mat)),
565  cl_uint(viennacl::traits::stride1(mat)), cl_uint(viennacl::traits::stride2(mat)),
566  cl_uint(viennacl::traits::size1(mat)), cl_uint(viennacl::traits::size2(mat)),
568 
569  viennacl::traits::opencl_handle(vec),
570  cl_uint(viennacl::traits::start(vec)),
571  cl_uint(viennacl::traits::stride(vec)),
572  cl_uint(viennacl::traits::size(vec)),
573 
574  viennacl::traits::opencl_handle(result),
575  cl_uint(viennacl::traits::start(result)),
576  cl_uint(viennacl::traits::stride(result)),
577  cl_uint(viennacl::traits::size(result)),
578 
579  viennacl::ocl::local_mem(sizeof(value_type) * k.local_work_size())
580  ) );
581  }
582 
583 
584  // trans(A) * x
585 
594  template <typename NumericT, typename F>
596  const vector_base<NumericT> & vec,
597  vector_base<NumericT> & result)
598  {
599  assert( (viennacl::traits::size1(mat_trans) == viennacl::traits::size(result)) && bool("Size check failed for transposed matrix-vector product: size1(A^T) == size(result)"));
600  assert( (viennacl::traits::size2(mat_trans) == viennacl::traits::size(vec)) && bool("Size check failed for transposed matrix-vector product: size2(A^T) == size(x)")); //remember: mat is transposed!
601 
602 
603  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec).context());
605  KernelClass::init(ctx);
606 
607 
608  // Inplace matrix-vector products like x = prod(A, x) are currently illegal: Introduce a temporary like y = prod(A, x); x = y; instead
609  assert(viennacl::traits::handle(vec) != viennacl::traits::handle(result) && bool("No direct inplace transposed matrix-vector product possible. Introduce a temporary!"));
610 
611  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), "trans_vec_mul");
612 
613  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat_trans.lhs()),
614  cl_uint(viennacl::traits::start1(mat_trans.lhs())), cl_uint(viennacl::traits::start2(mat_trans.lhs())),
615  cl_uint(viennacl::traits::stride1(mat_trans.lhs())), cl_uint(viennacl::traits::stride2(mat_trans.lhs())),
616  cl_uint(viennacl::traits::size1(mat_trans.lhs())), cl_uint(viennacl::traits::size2(mat_trans.lhs())),
617  cl_uint(viennacl::traits::internal_size1(mat_trans.lhs())), cl_uint(viennacl::traits::internal_size2(mat_trans.lhs())),
618 
619  viennacl::traits::opencl_handle(vec),
620  cl_uint(viennacl::traits::start(vec)),
621  cl_uint(viennacl::traits::stride(vec)),
622  cl_uint(viennacl::traits::size(vec)),
623 
624  viennacl::traits::opencl_handle(result),
625  cl_uint(viennacl::traits::start(result)),
626  cl_uint(viennacl::traits::stride(result)),
627  cl_uint(viennacl::traits::size(result)),
628 
629  viennacl::ocl::local_mem(sizeof(NumericT) * k.local_work_size())
630  ) );
631  }
632 
633 
634  //
636  //
637 
638  namespace detail
639  {
640  // C = A * B and possibly transposed variants
641  template <typename T1, typename T2, typename T3, typename ScalarType >
642  void prod_slow_kernel(const T1 & A,
643  const T2 & B,
644  T3 & C,
645  ScalarType alpha,
646  ScalarType beta,
647  std::string kernel_name)
648  {
650  typedef typename viennacl::result_of::orientation_functor<T1>::type orientation_A;
651  typedef typename viennacl::result_of::orientation_functor<T2>::type orientation_B;
652  typedef typename viennacl::result_of::orientation_functor<T3>::type orientation_C;
653 
654  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
655 
657  KernelClass::init(ctx);
658 
659  //std::cout << "KernelClass::program_name() : " << KernelClass::program_name() << std::endl;
660  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), kernel_name);
661 
662  k.global_work_size(0, viennacl::tools::align_to_multiple<unsigned int>(static_cast<unsigned int>(viennacl::traits::size1(C)), 16));
663  k.global_work_size(1, viennacl::tools::align_to_multiple<unsigned int>(static_cast<unsigned int>(viennacl::traits::size2(C)), 16));
664  k.local_work_size(0, 16);
665  k.local_work_size(1, 16);
666 
667  cpu_value_type cl_alpha = static_cast<cpu_value_type>(alpha);
668  cpu_value_type cl_beta = static_cast<cpu_value_type>(beta);
669 
670  viennacl::ocl::enqueue(k(cl_alpha,
671  viennacl::traits::opencl_handle(A),
672  cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
673  cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)),
674  cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
676 
677  viennacl::traits::opencl_handle(B),
678  cl_uint(viennacl::traits::start1(B)), cl_uint(viennacl::traits::start2(B)),
679  cl_uint(viennacl::traits::stride1(B)), cl_uint(viennacl::traits::stride2(B)),
680  cl_uint(viennacl::traits::size1(B)), cl_uint(viennacl::traits::size2(B)),
682 
683  cl_beta,
684  viennacl::traits::opencl_handle(C),
685  cl_uint(viennacl::traits::start1(C)), cl_uint(viennacl::traits::start2(C)),
686  cl_uint(viennacl::traits::stride1(C)), cl_uint(viennacl::traits::stride2(C)),
687  cl_uint(viennacl::traits::size1(C)), cl_uint(viennacl::traits::size2(C)),
689  )
690  );
691  }
692 
693  // C = A * B, using fast kernel for NVIDIA
694  template <typename T1, typename T2, typename T3, typename ScalarType >
695  void prod_fast_kernel(const T1 & A,
696  const T2 & B,
697  T3 & C,
698  ScalarType alpha,
699  ScalarType beta,
700  std::string kernel_name)
701  {
703  typedef typename viennacl::result_of::orientation_functor<T1>::type orientation_A;
704  typedef typename viennacl::result_of::orientation_functor<T2>::type orientation_B;
705  typedef typename viennacl::result_of::orientation_functor<T3>::type orientation_C;
706 
707  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
708 
710  KernelClass::init(ctx);
711 
712  //std::cout << "KernelClass::program_name() : " << KernelClass::program_name() << std::endl;
713  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), kernel_name);
714 
715  k.global_work_size(0, viennacl::traits::size2(C) / 4); //column blocks
716  k.global_work_size(1, viennacl::traits::size1(C) / 4); //row blocks
717  k.local_work_size(0, 16); //columns
718  k.local_work_size(1, 4); //rows
719 
720  cpu_value_type cl_alpha = static_cast<cpu_value_type>(alpha);
721  cpu_value_type cl_beta = static_cast<cpu_value_type>(beta);
722 
723  viennacl::ocl::enqueue(k(cl_alpha,
724  viennacl::traits::opencl_handle(A),
725  cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
726  cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)),
727  cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
729 
730  viennacl::traits::opencl_handle(B),
731  cl_uint(viennacl::traits::start1(B)), cl_uint(viennacl::traits::start2(B)),
732  cl_uint(viennacl::traits::stride1(B)), cl_uint(viennacl::traits::stride2(B)),
733  cl_uint(viennacl::traits::size1(B)), cl_uint(viennacl::traits::size2(B)),
735 
736  cl_beta,
737  viennacl::traits::opencl_handle(C),
738  cl_uint(viennacl::traits::start1(C)), cl_uint(viennacl::traits::start2(C)),
739  cl_uint(viennacl::traits::stride1(C)), cl_uint(viennacl::traits::stride2(C)),
740  cl_uint(viennacl::traits::size1(C)), cl_uint(viennacl::traits::size2(C)),
742  )
743  );
744  }
745 
746  template <typename T1, typename T2, typename T3, typename ScalarType >
747  void prod(const T1 & A,
748  const T2 & B,
749  T3 & C,
750  ScalarType alpha,
751  ScalarType beta,
752  std::string fast_kernel_name,
753  std::string slow_kernel_name)
754  {
755  if ( (viennacl::traits::size1(A) < 64)
756  || (viennacl::traits::size2(A) < 64)
757  || (viennacl::traits::size1(B) < 64)
758  || (viennacl::traits::size2(B) < 64) ) //there is most likely not enough to compute, rendering kernel launch overhead considerable
759  {
760  prod_slow_kernel(A, B, C, alpha, beta, slow_kernel_name);
761  }
762  else if ( (viennacl::traits::size1(A) % 64 == 0)
763  && (viennacl::traits::size2(A) % 64 == 0)
764  && (viennacl::traits::size1(B) % 64 == 0)
765  && (viennacl::traits::size2(B) % 64 == 0) ) // allows the use of the fast NVIDIA kernel
766  {
767  prod_fast_kernel(A, B, C, alpha, beta, fast_kernel_name);
768  //prod_slow_kernel(A, B, C, slow_kernel_name);
769  }
770  else //TODO: use four kernels
771  {
772  prod_slow_kernel(A, B, C, alpha, beta, slow_kernel_name);
773  }
774 
775  }
776  } // namespace detail
777 
778 
784  template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType >
786  const matrix_base<NumericT, F2> & B,
788  ScalarType alpha,
789  ScalarType beta)
790  {
791  assert( (viennacl::traits::size1(A) == viennacl::traits::size1(C)) && bool("Size mismatch in C = prod(A, B): size1(A) != size1(C)"));
792  assert( (viennacl::traits::size2(A) == viennacl::traits::size1(B)) && bool("Size mismatch in C = prod(A, B): size2(A) != size1(B)"));
793  assert( (viennacl::traits::size2(B) == viennacl::traits::size2(C)) && bool("Size mismatch in C = prod(A, B): size2(B) != size2(C)"));
794 
798  // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
799  /*assert( (viennacl::traits::handle(C) != viennacl::traits::handle(A))
800  && (viennacl::traits::handle(C) != viennacl::traits::handle(B))
801  && bool("No direct inplace matrix-matrix product possible. Introduce a temporary!"));*/
802 
803  if(A_not_aligned || A.start1() > 0 || A.start2() > 0 || A.stride1() > 1 || A.stride2() > 1
804  ||B_not_aligned || B.start1() > 0 || B.start2() > 0 || B.stride1() > 1 || B.stride2() > 1
805  ||C_not_aligned || C.start1() > 0 || C.start2() > 0 || C.stride1() > 1 || C.stride2() > 1)
806  detail::prod(A, B, C, alpha, beta, "prod16_AA", "prod_AA");
807  else{
810  }
811  }
812 
813 
814 
820  template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType >
823  op_trans> & A,
824  const matrix_base<NumericT, F2> & B,
826  ScalarType alpha,
827  ScalarType beta)
828  {
829  //std::cout << "size2(A): " << viennacl::traits::size2(A.lhs()) << std::endl;
830  //std::cout << "size1(C): " << viennacl::traits::size1(C) << std::endl;
831  assert( (viennacl::traits::size2(A.lhs()) == viennacl::traits::size1(C)) && bool("Size mismatch in C = prod(trans(A), B): size2(A) != size1(C)"));
832  assert( (viennacl::traits::size1(A.lhs()) == viennacl::traits::size1(B)) && bool("Size mismatch in C = prod(trans(A), B): size1(A) != size1(B)"));
833  assert( (viennacl::traits::size2(B) == viennacl::traits::size2(C)) && bool("Size mismatch in C = prod(trans(A), B): size2(B) != size2(C)"));
834 
835  // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
836  /*assert( (viennacl::traits::handle(C) != viennacl::traits::handle(A.lhs()))
837  && (viennacl::traits::handle(C) != viennacl::traits::handle(B))
838  && bool("No direct inplace matrix-matrix product possible. Introduce a temporary!"));*/
839 
840  bool A_not_aligned = (A.lhs().internal_size1()%matrix_base<NumericT, F1>::alignment>0) ||(A.lhs().internal_size2()%matrix_base<NumericT, F1>::alignment>0);
843 
844 
845  if(A_not_aligned || A.lhs().start1() > 0 || A.lhs().start2() > 0 || A.lhs().stride1() > 1 || A.lhs().stride2() > 1
846  ||B_not_aligned || B.start1() > 0 || B.start2() > 0 || B.stride1() > 1 || B.stride2() > 1
847  ||C_not_aligned || C.start1() > 0 || C.start2() > 0 || C.stride1() > 1 || C.stride2() > 1)
848  detail::prod(A.lhs(), B, C, alpha, beta, "prod16_TA", "prod_TA");
849  else{
853  }
854  }
855 
856 
857 
858 
864  template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType >
868  ScalarType alpha,
869  ScalarType beta)
870  {
871  assert( (viennacl::traits::size1(A) == viennacl::traits::size1(C)) && bool("Size mismatch in C = prod(A, trans(B)): size1(A) != size1(C)"));
872  assert( (viennacl::traits::size2(A) == viennacl::traits::size2(B.lhs())) && bool("Size mismatch in C = prod(A, trans(B)): size2(A) != size2(B)"));
873  assert( (viennacl::traits::size1(B.lhs()) == viennacl::traits::size2(C)) && bool("Size mismatch in C = prod(A, trans(B)): size1(B) != size2(C)"));
874 
876  bool B_not_aligned = (B.lhs().internal_size1()%matrix_base<NumericT, F2>::alignment>0) ||(B.lhs().internal_size2()%matrix_base<NumericT, F2>::alignment>0);
878 
879  // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
880  /*assert( (viennacl::traits::handle(C) != viennacl::traits::handle(A))
881  && (viennacl::traits::handle(C) != viennacl::traits::handle(B.lhs()))
882  && bool("No direct inplace matrix-matrix product possible. Introduce a temporary!"));*/
883 
884  if(A_not_aligned || A.start1() > 0 || A.start2() > 0 || A.stride1() > 1 || A.stride2() > 1
885  ||B_not_aligned || B.lhs().start1() > 0 || B.lhs().start2() > 0 || B.lhs().stride1() > 1 || B.lhs().stride2() > 1
886  ||C_not_aligned || C.start1() > 0 || C.start2() > 0 || C.stride1() > 1 || C.stride2() > 1)
887  detail::prod(A, B.lhs(), C, alpha, beta, "prod16_AT", "prod_AT");
888  else{
892  }
893  }
894 
895 
896 
902  template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType >
906  ScalarType alpha,
907  ScalarType beta)
908  {
909  assert(viennacl::traits::size2(A.lhs()) == viennacl::traits::size1(C) && bool("Size mismatch in C = prod(trans(A), trans(B)): size2(A) != size1(C)"));
910  assert(viennacl::traits::size1(A.lhs()) == viennacl::traits::size2(B.lhs()) && bool("Size mismatch in C = prod(trans(A), trans(B)): size1(A) != size2(B)"));
911  assert(viennacl::traits::size1(B.lhs()) == viennacl::traits::size2(C) && bool("Size mismatch in C = prod(trans(A), trans(B)): size1(B) != size2(C)"));
912 
913  bool A_not_aligned = (A.lhs().internal_size1()%matrix_base<NumericT, F1>::alignment>0) ||(A.lhs().internal_size2()%matrix_base<NumericT, F1>::alignment>0);
914  bool B_not_aligned = (B.lhs().internal_size1()%matrix_base<NumericT, F2>::alignment>0) ||(B.lhs().internal_size2()%matrix_base<NumericT, F2>::alignment>0);
916 
917  // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
918  /*assert( (viennacl::traits::handle(C) != viennacl::traits::handle(A.lhs()))
919  && (viennacl::traits::handle(C) != viennacl::traits::handle(B.lhs()))
920  && bool("No direct inplace matrix-matrix product possible. Introduce a temporary!"));*/
921 
922  if(A_not_aligned || A.lhs().start1() > 0 || A.lhs().start2() > 0 || A.lhs().stride1() > 1 || A.lhs().stride2() > 1
923  ||B_not_aligned || B.lhs().start1() > 0 || B.lhs().start2() > 0 || B.lhs().stride1() > 1 || B.lhs().stride2() > 1
924  ||C_not_aligned || C.start1() > 0 || C.start2() > 0 || C.stride1() > 1 || C.stride2() > 1)
925  detail::prod(A.lhs(), B.lhs(), C, alpha, beta, "prod16_TT", "prod_TT");
926  else{
931  }
932  }
933 
934 
935 
936 
937  //
939  //
940 
941 
954  template <typename NumericT, typename F, typename S1>
956  S1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
957  const vector_base<NumericT> & vec1,
958  const vector_base<NumericT> & vec2)
959  {
960  assert( (viennacl::traits::size1(mat1) == viennacl::traits::size(vec1)) && bool("Size mismatch in scaled_rank_1_update: size1(A) != size(v1)"));
961  assert( (viennacl::traits::size2(mat1) == viennacl::traits::size(vec2)) && bool("Size mismatch in scaled_rank_1_update: size2(A) != size(v2)"));
962 
963  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat1).context());
965  KernelClass::init(ctx);
966 
967  cl_uint options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
968 
969  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), viennacl::is_cpu_scalar<S1>::value ? "scaled_rank1_update_cpu" : "scaled_rank1_update_gpu");
970 
971  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat1),
972  cl_uint(viennacl::traits::start1(mat1)), cl_uint(viennacl::traits::start2(mat1)),
973  cl_uint(viennacl::traits::stride1(mat1)), cl_uint(viennacl::traits::stride2(mat1)),
974  cl_uint(viennacl::traits::size1(mat1)), cl_uint(viennacl::traits::size2(mat1)),
976 
977  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
978  options_alpha,
979 
980  viennacl::traits::opencl_handle(vec1),
981  cl_uint(viennacl::traits::start(vec1)),
982  cl_uint(viennacl::traits::stride(vec1)),
983  cl_uint(viennacl::traits::size(vec1)),
984 
985  viennacl::traits::opencl_handle(vec2),
986  cl_uint(viennacl::traits::start(vec2)),
987  cl_uint(viennacl::traits::stride(vec2)),
988  cl_uint(viennacl::traits::size(vec2))
989  )
990  );
991  }
992 
993  } // namespace opencl
994  } //namespace linalg
995 } //namespace viennacl
996 
997 
998 #endif
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
std::size_t vcl_size_t
Definition: forwards.h:58
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
Definition: kernel.hpp:46
void ambm_m(matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT, F > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Definition: matrix_operations.hpp:148
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector.hpp:859
result_of::size_type< matrix_base< NumericT, F > >::type stride2(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:68
Represents an OpenCL device within ViennaCL.
Common implementations shared by OpenCL-based operations.
Generic size and resize functionality for different vector and matrix types.
Helper class for checking whether a matrix has a row-major layout.
Definition: forwards.h:399
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
Runtime generation of OpenCL kernels for dense matrix-matrix products.
void matrix_column(const matrix_base< NumericT, F > &mat, unsigned int j, vector_base< NumericT > &vec)
Definition: matrix_operations.hpp:406
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:647
A tag class representing assignment.
Definition: forwards.h:63
A dense matrix class.
Definition: forwards.h:290
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:283
size_type stride2() const
Returns the number of columns.
Definition: matrix.hpp:637
cl_uint start
Starting value of the integer stride.
Definition: kernel.hpp:49
void prod_slow_kernel(const T1 &A, const T2 &B, T3 &C, ScalarType alpha, ScalarType beta, std::string kernel_name)
Definition: matrix_operations.hpp:642
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:46
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:57
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:64
Determines row and column increments for matrices and matrix proxies.
Main kernel class for the generation of matrix-matrix product kernels C = A * B.
Definition: matrix_prod.hpp:431
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:268
the user interface for the code generator
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
size_type size2() const
Returns the number of columns.
Definition: matrix.hpp:627
result_of::size_type< matrix_base< NumericT, F > >::type stride1(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:57
Main kernel class for generating OpenCL kernels for elementwise-operations such as element_sin() on/w...
Definition: matrix_element.hpp:77
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT, F > &mat)
Definition: matrix_operations.hpp:245
void am(matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
Definition: matrix_operations.hpp:66
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:83
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Definition: forwards.h:363
size_type stride1() const
Returns the number of rows.
Definition: matrix.hpp:635
void generate_enqueue_statement(viennacl::scheduler::statement const &s, scheduler::statement_node const &root_node)
Generate and enqueue a statement plus root_node into the current queue.
Definition: generate.hpp:395
size_type start1() const
Returns the number of rows.
Definition: matrix.hpp:630
size_type start2() const
Returns the number of columns.
Definition: matrix.hpp:632
void element_op(matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_binary< OP > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C)
Definition: matrix_operations.hpp:460
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:43
cl_uint make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
Definition: common.hpp:39
void matrix_assign(matrix_base< NumericT, F > &mat, NumericT s, bool clear=false)
Definition: matrix_operations.hpp:198
void matrix_row(const matrix_base< NumericT, F > &mat, unsigned int i, vector_base< NumericT > &vec)
Definition: matrix_operations.hpp:363
Common base class for dense vectors, vector ranges, and vector slices.
Definition: forwards.h:205
void prod(const T1 &A, const T2 &B, T3 &C, ScalarType alpha, ScalarType beta, std::string fast_kernel_name, std::string slow_kernel_name)
Definition: matrix_operations.hpp:747
void matrix_diagonal_assign(matrix_base< NumericT, F > &mat, NumericT s)
Definition: matrix_operations.hpp:223
A tag class representing matrix-matrix products.
Definition: forwards.h:78
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
Helper metafunction for checking whether the provided type is viennacl::op_div (for division) ...
Definition: predicate.hpp:448
vcl_size_t internal_size2(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
Proxy classes for vectors.
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
static void init(viennacl::ocl::context &ctx)
Definition: matrix_element.hpp:84
T::ERROR_ARGUMENT_PROVIDED_IS_NOT_A_MATRIX type
Definition: result_of.hpp:137
Main kernel class for generating OpenCL kernels for operations on/with dense matrix objects of type v...
Definition: matrix.hpp:877
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
Representation of an OpenCL kernel in ViennaCL.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:276
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void scaled_rank_1_update(matrix_base< NumericT, F > &mat1, S1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
Definition: matrix_operations.hpp:955
cl_uint stride
Increment between integers.
Definition: kernel.hpp:51
A tag class representing transposed matrices.
Definition: forwards.h:165
void prod_impl(const matrix_base< NumericT, F > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
Definition: matrix_operations.hpp:547
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:86
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:447
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
vcl_size_t internal_size1(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
Helper metafunction for checking whether the provided type is viennacl::op_prod (for products/multipl...
Definition: predicate.hpp:418
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
std::string op_to_string(op_abs)
Definition: common.hpp:71
cl_uint size
Number of values in the stride.
Definition: kernel.hpp:53
OpenCL kernel file for element-wise matrix operations.
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:90
Implementation of the ViennaCL scalar class.
void prod_fast_kernel(const T1 &A, const T2 &B, T3 &C, ScalarType alpha, ScalarType beta, std::string kernel_name)
Definition: matrix_operations.hpp:695
A collection of compile time type deductions.
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:649
void ambm(matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT, F > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Definition: matrix_operations.hpp:98
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Definition: vector.hpp:600
Simple enable-if variant that uses the SFINAE pattern.
Runtime generation of OpenCL kernels for matrix operations.
void matrix_diag_to_vector(const matrix_base< NumericT, F > &mat, int k, vector_base< NumericT > &vec)
Definition: matrix_operations.hpp:306
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
Definition: kernel.hpp:55