1 #ifndef VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
41 #ifndef VIENNACL_OPENMP_VECTOR_MIN_SIZE
42 #define VIENNACL_OPENMP_VECTOR_MIN_SIZE 5000
53 template <
typename NumericT>
55 inline unsigned long flip_sign(
unsigned long val) {
return val; }
56 inline unsigned int flip_sign(
unsigned int val) {
return val; }
57 inline unsigned short flip_sign(
unsigned short val) {
return val; }
58 inline unsigned char flip_sign(
unsigned char val) {
return val; }
65 template <
typename T,
typename ScalarType1>
71 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
72 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
74 value_type data_alpha = alpha;
87 #ifdef VIENNACL_WITH_OPENMP
88 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
90 for (
long i = 0; i < static_cast<long>(
size1); ++i)
91 data_vec1[i*inc1+start1] = data_vec2[i*inc2+start2] / data_alpha;
95 #ifdef VIENNACL_WITH_OPENMP
96 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
98 for (
long i = 0; i < static_cast<long>(
size1); ++i)
99 data_vec1[i*inc1+start1] = data_vec2[i*inc2+start2] * data_alpha;
104 template <
typename T,
typename ScalarType1,
typename ScalarType2>
109 typedef T value_type;
111 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
112 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
113 value_type
const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
115 value_type data_alpha = alpha;
119 value_type data_beta = beta;
133 if (reciprocal_alpha)
137 #ifdef VIENNACL_WITH_OPENMP
138 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
140 for (
long i = 0; i < static_cast<long>(
size1); ++i)
141 data_vec1[i*inc1+start1] = data_vec2[i*inc2+start2] / data_alpha + data_vec3[i*inc3+start3] / data_beta;
145 #ifdef VIENNACL_WITH_OPENMP
146 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
148 for (
long i = 0; i < static_cast<long>(
size1); ++i)
149 data_vec1[i*inc1+start1] = data_vec2[i*inc2+start2] / data_alpha + data_vec3[i*inc3+start3] * data_beta;
156 #ifdef VIENNACL_WITH_OPENMP
157 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
159 for (
long i = 0; i < static_cast<long>(
size1); ++i)
160 data_vec1[i*inc1+start1] = data_vec2[i*inc2+start2] * data_alpha + data_vec3[i*inc3+start3] / data_beta;
164 #ifdef VIENNACL_WITH_OPENMP
165 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
167 for (
long i = 0; i < static_cast<long>(
size1); ++i)
168 data_vec1[i*inc1+start1] = data_vec2[i*inc2+start2] * data_alpha + data_vec3[i*inc3+start3] * data_beta;
174 template <
typename T,
typename ScalarType1,
typename ScalarType2>
179 typedef T value_type;
181 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
182 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
183 value_type
const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
185 value_type data_alpha = alpha;
189 value_type data_beta = beta;
203 if (reciprocal_alpha)
207 #ifdef VIENNACL_WITH_OPENMP
208 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
210 for (
long i = 0; i < static_cast<long>(
size1); ++i)
211 data_vec1[i*inc1+start1] += data_vec2[i*inc2+start2] / data_alpha + data_vec3[i*inc3+start3] / data_beta;
215 #ifdef VIENNACL_WITH_OPENMP
216 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
218 for (
long i = 0; i < static_cast<long>(
size1); ++i)
219 data_vec1[i*inc1+start1] += data_vec2[i*inc2+start2] / data_alpha + data_vec3[i*inc3+start3] * data_beta;
226 #ifdef VIENNACL_WITH_OPENMP
227 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
229 for (
long i = 0; i < static_cast<long>(
size1); ++i)
230 data_vec1[i*inc1+start1] += data_vec2[i*inc2+start2] * data_alpha + data_vec3[i*inc3+start3] / data_beta;
234 #ifdef VIENNACL_WITH_OPENMP
235 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
237 for (
long i = 0; i < static_cast<long>(
size1); ++i)
238 data_vec1[i*inc1+start1] += data_vec2[i*inc2+start2] * data_alpha + data_vec3[i*inc3+start3] * data_beta;
252 template <
typename T>
255 typedef T value_type;
257 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
264 value_type data_alpha =
static_cast<value_type
>(alpha);
266 #ifdef VIENNACL_WITH_OPENMP
267 #pragma omp parallel for if (loop_bound > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
269 for (
long i = 0; i < static_cast<long>(loop_bound); ++i)
270 data_vec1[i*inc1+start1] = data_alpha;
279 template <
typename T>
282 typedef T value_type;
284 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
285 value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
294 #ifdef VIENNACL_WITH_OPENMP
295 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
297 for (
long i = 0; i < static_cast<long>(
size1); ++i)
299 value_type temp = data_vec2[i*inc2+
start2];
301 data_vec1[i*inc1+
start1] = temp;
313 template <
typename T,
typename OP>
317 typedef T value_type;
320 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
321 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
322 value_type
const * data_vec3 = detail::extract_raw_pointer<value_type>(proxy.rhs());
334 #ifdef VIENNACL_WITH_OPENMP
335 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
337 for (
long i = 0; i < static_cast<long>(
size1); ++i)
338 OpFunctor::apply(data_vec1[i*inc1+start1], data_vec2[i*inc2+start2], data_vec3[i*inc3+start3]);
346 template <
typename T,
typename OP>
350 typedef T value_type;
353 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
354 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
363 #ifdef VIENNACL_WITH_OPENMP
364 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
366 for (
long i = 0; i < static_cast<long>(
size1); ++i)
367 OpFunctor::apply(data_vec1[i*inc1+start1], data_vec2[i*inc2+start2]);
382 template <
typename T,
typename S3>
387 typedef T value_type;
389 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
390 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
401 #ifdef VIENNACL_WITH_OPENMP
402 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
404 for (
long i = 0; i < static_cast<long>(
size1); ++i)
405 temp += data_vec1[i*inc1+start1] * data_vec2[i*inc2+start2];
410 template <
typename T>
415 typedef T value_type;
417 value_type
const * data_x = detail::extract_raw_pointer<value_type>(x);
423 std::vector<value_type> temp(vec_tuple.
const_size());
424 std::vector<value_type const *> data_y(vec_tuple.
const_size());
425 std::vector<vcl_size_t> start_y(vec_tuple.
const_size());
426 std::vector<vcl_size_t> stride_y(vec_tuple.
const_size());
430 data_y[j] = detail::extract_raw_pointer<value_type>(vec_tuple.
const_at(j));
438 value_type entry_x = data_x[i*inc_x+start_x];
440 temp[j] += entry_x * data_y[j][i*stride_y[j]+start_y[j]];
453 template <
typename T,
typename S2>
457 typedef T value_type;
459 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
467 #ifdef VIENNACL_WITH_OPENMP
468 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
470 for (
long i = 0; i < static_cast<long>(
size1); ++i)
471 temp += static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1])));
481 template <
typename T,
typename S2>
485 typedef T value_type;
487 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
496 #ifdef VIENNACL_WITH_OPENMP
497 #pragma omp parallel for reduction(+: temp) private(data) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
499 for (
long i = 0; i < static_cast<long>(
size1); ++i)
501 data = data_vec1[i*inc1+
start1];
505 result = std::sqrt(temp);
513 template <
typename T,
typename S2>
517 typedef T value_type;
519 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
529 temp = std::max<value_type>(temp, static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1]))));
542 template <
typename T>
545 typedef T value_type;
547 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
560 data =
static_cast<value_type
>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1])));
581 template <
typename T>
586 typedef T value_type;
588 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
589 value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
598 value_type temp1 = 0;
599 value_type temp2 = 0;
600 value_type data_alpha = alpha;
601 value_type data_beta = beta;
603 #ifdef VIENNACL_WITH_OPENMP
604 #pragma omp parallel for private(temp1, temp2) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
606 for (
long i = 0; i < static_cast<long>(
size1); ++i)
608 temp1 = data_vec1[i*inc1+
start1];
609 temp2 = data_vec2[i*inc2+
start2];
611 data_vec1[i*inc1+
start1] = data_alpha * temp1 + data_beta * temp2;
612 data_vec2[i*inc2+
start2] = data_alpha * temp2 - data_beta * temp1;
void vector_assign(vector_base< T > &vec1, const T &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
Definition: vector_operations.hpp:253
VectorType const & const_at(vcl_size_t i) const
Definition: vector.hpp:1196
void avbv_v(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Definition: vector_operations.hpp:175
std::size_t vcl_size_t
Definition: forwards.h:58
void av(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
Definition: vector_operations.hpp:66
Generic size and resize functionality for different vector and matrix types.
void inner_prod_impl(vector_base< T > const &vec1, vector_base< T > const &vec2, S3 &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
Definition: vector_operations.hpp:383
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
void element_op(matrix_base< NumericT, F > &A, matrix_expression< const matrix_base< NumericT, F >, const matrix_base< NumericT, F >, op_element_binary< OP > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
Definition: matrix_operations.hpp:604
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
Worker class for decomposing expression templates.
Definition: op_applier.hpp:43
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:46
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:64
Determines row and column increments for matrices and matrix proxies.
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:181
vcl_size_t index_norm_inf(vector_base< T > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
Definition: vector_operations.hpp:543
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:83
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
Definition: forwards.h:211
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
Definition: vector_operations.hpp:582
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:43
NumericT flip_sign(NumericT val)
Definition: vector_operations.hpp:54
Common base class for dense vectors, vector ranges, and vector slices.
Definition: forwards.h:205
void vector_swap(vector_base< T > &vec1, vector_base< T > &vec2)
Swaps the contents of two vectors, data is copied.
Definition: vector_operations.hpp:280
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void avbv(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Definition: vector_operations.hpp:105
Common routines for single-threaded or OpenMP-enabled execution on CPU.
void norm_1_impl(vector_base< T > const &vec1, S2 &result)
Computes the l^1-norm of a vector.
Definition: vector_operations.hpp:454
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:86
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:90
Implementation of the ViennaCL scalar class.
size_type internal_size() const
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
Definition: vector.hpp:863
Simple enable-if variant that uses the SFINAE pattern.
void norm_2_impl(vector_base< T > const &vec1, S2 &result)
Computes the l^2-norm of a vector - implementation.
Definition: vector_operations.hpp:482
void norm_inf_impl(vector_base< T > const &vec1, S2 &result)
Computes the supremum-norm of a vector.
Definition: vector_operations.hpp:514
vcl_size_t const_size() const
Definition: vector.hpp:1193