ViennaCL - The Vienna Computing Library  1.5.1
scalar_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_CUDA_SCALAR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_CUDA_SCALAR_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/tools/tools.hpp"
29 #include "viennacl/traits/size.hpp"
33 
34 // includes CUDA
35 #include <cuda_runtime.h>
36 
37 
38 namespace viennacl
39 {
40  namespace linalg
41  {
42  namespace cuda
43  {
44 
45  namespace detail
46  {
47 
48  }
49 
51 
52  template <typename T>
53  __global__ void as_kernel(T * s1, const T * fac2, unsigned int options2, const T * s2)
54  {
55  T alpha = *fac2;
56  if (options2 & (1 << 0))
57  alpha = -alpha;
58  if (options2 & (1 << 1))
59  alpha = ((T)(1)) / alpha;
60 
61  *s1 = *s2 * alpha;
62  }
63 
64  template <typename T>
65  __global__ void as_kernel(T * s1, T fac2, unsigned int options2, const T * s2)
66  {
67  T alpha = fac2;
68  if (options2 & (1 << 0))
69  alpha = -alpha;
70  if (options2 & (1 << 1))
71  alpha = ((T)(1)) / alpha;
72 
73  *s1 = *s2 * alpha;
74  }
75 
76  template <typename S1,
77  typename S2, typename ScalarType1>
81  >::type
82  as(S1 & s1,
83  S2 const & s2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
84  {
85  typedef typename viennacl::result_of::cpu_value_type<S1>::type value_type;
86 
87  unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
88 
89  value_type temporary_alpha = 0;
91  temporary_alpha = alpha;
92 
93  as_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(s1),
94  detail::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)),
95  options_alpha,
96  detail::cuda_arg<value_type>(s2));
97  VIENNACL_CUDA_LAST_ERROR_CHECK("as_kernel");
98  }
99 
101 
102  // alpha and beta on GPU
103  template <typename T>
104  __global__ void asbs_kernel(T * s1,
105  const T * fac2, unsigned int options2, const T * s2,
106  const T * fac3, unsigned int options3, const T * s3)
107  {
108  T alpha = *fac2;
109  if (options2 & (1 << 0))
110  alpha = -alpha;
111  if (options2 & (1 << 1))
112  alpha = ((T)(1)) / alpha;
113 
114  T beta = *fac3;
115  if (options3 & (1 << 0))
116  beta = -beta;
117  if (options3 & (1 << 1))
118  beta = ((T)(1)) / beta;
119 
120  *s1 = *s2 * alpha + *s3 * beta;
121  }
122 
123  // alpha on CPU, beta on GPU
124  template <typename T>
125  __global__ void asbs_kernel(T * s1,
126  T fac2, unsigned int options2, const T * s2,
127  const T * fac3, unsigned int options3, const T * s3)
128  {
129  T alpha = fac2;
130  if (options2 & (1 << 0))
131  alpha = -alpha;
132  if (options2 & (1 << 1))
133  alpha = ((T)(1)) / alpha;
134 
135  T beta = *fac3;
136  if (options3 & (1 << 0))
137  beta = -beta;
138  if (options3 & (1 << 1))
139  beta = ((T)(1)) / beta;
140 
141  *s1 = *s2 * alpha + *s3 * beta;
142  }
143 
144  // alpha on GPU, beta on CPU
145  template <typename T>
146  __global__ void asbs_kernel(T * s1,
147  const T * fac2, unsigned int options2, const T * s2,
148  T fac3, unsigned int options3, const T * s3)
149  {
150  T alpha = *fac2;
151  if (options2 & (1 << 0))
152  alpha = -alpha;
153  if (options2 & (1 << 1))
154  alpha = ((T)(1)) / alpha;
155 
156  T beta = fac3;
157  if (options3 & (1 << 0))
158  beta = -beta;
159  if (options3 & (1 << 1))
160  beta = ((T)(1)) / beta;
161 
162  *s1 = *s2 * alpha + *s3 * beta;
163  }
164 
165  // alpha and beta on CPU
166  template <typename T>
167  __global__ void asbs_kernel(T * s1,
168  T fac2, unsigned int options2, const T * s2,
169  T fac3, unsigned int options3, const T * s3)
170  {
171  T alpha = fac2;
172  if (options2 & (1 << 0))
173  alpha = -alpha;
174  if (options2 & (1 << 1))
175  alpha = ((T)(1)) / alpha;
176 
177  T beta = fac3;
178  if (options3 & (1 << 0))
179  beta = -beta;
180  if (options3 & (1 << 1))
181  beta = ((T)(1)) / beta;
182 
183  *s1 = *s2 * alpha + *s3 * beta;
184  }
185 
186 
187  template <typename S1,
188  typename S2, typename ScalarType1,
189  typename S3, typename ScalarType2>
195  >::type
196  asbs(S1 & s1,
197  S2 const & s2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
198  S3 const & s3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
199  {
200  typedef typename viennacl::result_of::cpu_value_type<S1>::type value_type;
201 
202  unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
203  unsigned int options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta);
204 
205  value_type temporary_alpha = 0;
207  temporary_alpha = alpha;
208 
209  value_type temporary_beta = 0;
211  temporary_beta = beta;
212 
213  asbs_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(s1),
214  detail::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)),
215  options_alpha,
216  detail::cuda_arg<value_type>(s2),
217  detail::cuda_arg<value_type>(detail::arg_reference(beta, temporary_beta)),
218  options_beta,
219  detail::cuda_arg<value_type>(s3) );
220  VIENNACL_CUDA_LAST_ERROR_CHECK("asbs_kernel");
221  }
222 
224 
225  // alpha and beta on GPU
226  template <typename T>
227  __global__ void asbs_s_kernel(T * s1,
228  const T * fac2, unsigned int options2, const T * s2,
229  const T * fac3, unsigned int options3, const T * s3)
230  {
231  T alpha = *fac2;
232  if (options2 & (1 << 0))
233  alpha = -alpha;
234  if (options2 & (1 << 1))
235  alpha = ((T)(1)) / alpha;
236 
237  T beta = *fac3;
238  if (options3 & (1 << 0))
239  beta = -beta;
240  if (options3 & (1 << 1))
241  beta = ((T)(1)) / beta;
242 
243  *s1 += *s2 * alpha + *s3 * beta;
244  }
245 
246  // alpha on CPU, beta on GPU
247  template <typename T>
248  __global__ void asbs_s_kernel(T * s1,
249  T fac2, unsigned int options2, const T * s2,
250  const T * fac3, unsigned int options3, const T * s3)
251  {
252  T alpha = fac2;
253  if (options2 & (1 << 0))
254  alpha = -alpha;
255  if (options2 & (1 << 1))
256  alpha = ((T)(1)) / alpha;
257 
258  T beta = *fac3;
259  if (options3 & (1 << 0))
260  beta = -beta;
261  if (options3 & (1 << 1))
262  beta = ((T)(1)) / beta;
263 
264  *s1 += *s2 * alpha + *s3 * beta;
265  }
266 
267  // alpha on GPU, beta on CPU
268  template <typename T>
269  __global__ void asbs_s_kernel(T * s1,
270  const T * fac2, unsigned int options2, const T * s2,
271  T fac3, unsigned int options3, const T * s3)
272  {
273  T alpha = *fac2;
274  if (options2 & (1 << 0))
275  alpha = -alpha;
276  if (options2 & (1 << 1))
277  alpha = ((T)(1)) / alpha;
278 
279  T beta = fac3;
280  if (options3 & (1 << 0))
281  beta = -beta;
282  if (options3 & (1 << 1))
283  beta = ((T)(1)) / beta;
284 
285  *s1 += *s2 * alpha + *s3 * beta;
286  }
287 
288  // alpha and beta on CPU
289  template <typename T>
290  __global__ void asbs_s_kernel(T * s1,
291  T fac2, unsigned int options2, const T * s2,
292  T fac3, unsigned int options3, const T * s3)
293  {
294  T alpha = fac2;
295  if (options2 & (1 << 0))
296  alpha = -alpha;
297  if (options2 & (1 << 1))
298  alpha = ((T)(1)) / alpha;
299 
300  T beta = fac3;
301  if (options3 & (1 << 0))
302  beta = -beta;
303  if (options3 & (1 << 1))
304  beta = ((T)(1)) / beta;
305 
306  *s1 += *s2 * alpha + *s3 * beta;
307  }
308 
309 
310  template <typename S1,
311  typename S2, typename ScalarType1,
312  typename S3, typename ScalarType2>
318  >::type
319  asbs_s(S1 & s1,
320  S2 const & s2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
321  S3 const & s3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
322  {
323  typedef typename viennacl::result_of::cpu_value_type<S1>::type value_type;
324 
325  unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
326  unsigned int options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta);
327 
328  value_type temporary_alpha = 0;
330  temporary_alpha = alpha;
331 
332  value_type temporary_beta = 0;
334  temporary_beta = beta;
335 
336  std::cout << "Launching asbs_s_kernel..." << std::endl;
337  asbs_s_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(s1),
338  detail::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)),
339  options_alpha,
340  detail::cuda_arg<value_type>(s2),
341  detail::cuda_arg<value_type>(detail::arg_reference(beta, temporary_beta)),
342  options_beta,
343  detail::cuda_arg<value_type>(s3) );
344  VIENNACL_CUDA_LAST_ERROR_CHECK("asbs_s_kernel");
345  }
346 
348 
349  template <typename T>
350  __global__ void scalar_swap_kernel(T * s1, T * s2)
351  {
352  T tmp = *s2;
353  *s2 = *s1;
354  *s1 = tmp;
355  }
356 
362  template <typename S1, typename S2>
365  >::type
366  swap(S1 & s1, S2 & s2)
367  {
368  typedef typename viennacl::result_of::cpu_value_type<S1>::type value_type;
369 
370  scalar_swap_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(s1),detail::cuda_arg<value_type>(s2));
371  }
372 
373 
374 
375  } //namespace single_threaded
376  } //namespace linalg
377 } //namespace viennacl
378 
379 
380 #endif
__global__ void as_kernel(T *s1, const T *fac2, unsigned int options2, const T *s2)
Definition: scalar_operations.hpp:53
Simple enable-if variant that uses the SFINAE pattern.
Definition: enable_if.hpp:29
std::size_t vcl_size_t
Definition: forwards.h:58
unsigned int make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
Definition: common.hpp:37
Generic size and resize functionality for different vector and matrix types.
Common routines for CUDA execution.
viennacl::backend::mem_handle::cuda_handle_type & arg_reference(viennacl::scalar< T > &s, U)
Definition: common.hpp:127
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
#define VIENNACL_CUDA_LAST_ERROR_CHECK(message)
Definition: common.hpp:27
__global__ void asbs_s_kernel(T *s1, const T *fac2, unsigned int options2, const T *s2, const T *fac3, unsigned int options3, const T *s3)
Definition: scalar_operations.hpp:227
__global__ void scalar_swap_kernel(T *s1, T *s2)
Definition: scalar_operations.hpp:350
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Definition: forwards.h:363
viennacl::enable_if< viennacl::is_scalar< S1 >::value &&viennacl::is_scalar< S2 >::value &&viennacl::is_scalar< S3 >::value &&viennacl::is_any_scalar< ScalarType1 >::value &&viennacl::is_any_scalar< ScalarType2 >::value >::type asbs(S1 &s1, S2 const &s2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, S3 const &s3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Definition: scalar_operations.hpp:196
Helper struct for checking whether the provided type represents a scalar (either host, from ViennaCL, or a flip-sign proxy)
Definition: forwards.h:384
viennacl::enable_if< viennacl::is_scalar< S1 >::value &&viennacl::is_scalar< S2 >::value &&viennacl::is_any_scalar< ScalarType1 >::value >::type as(S1 &s1, S2 const &s2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
Definition: scalar_operations.hpp:82
__global__ void asbs_kernel(T *s1, const T *fac2, unsigned int options2, const T *s2, const T *fac3, unsigned int options3, const T *s3)
Definition: scalar_operations.hpp:104
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
Helper struct for checking whether a type is a viennacl::scalar<>
Definition: forwards.h:370
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:276
viennacl::enable_if< viennacl::is_scalar< S1 >::value &&viennacl::is_scalar< S2 >::value >::type swap(S1 &s1, S2 &s2)
Swaps the contents of two scalars, data is copied.
Definition: scalar_operations.hpp:366
viennacl::enable_if< viennacl::is_scalar< S1 >::value &&viennacl::is_scalar< S2 >::value &&viennacl::is_scalar< S3 >::value &&viennacl::is_any_scalar< ScalarType1 >::value &&viennacl::is_any_scalar< ScalarType2 >::value >::type asbs_s(S1 &s1, S2 const &s2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, S3 const &s3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Definition: scalar_operations.hpp:319
Simple enable-if variant that uses the SFINAE pattern.