ViennaCL - The Vienna Computing Library  1.5.1
Namespaces | Data Structures | Functions
viennacl::linalg::cuda Namespace Reference

Holds all CUDA compute kernels used by ViennaCL. More...

Namespaces

 detail
 Helper functions for the CUDA linear algebra backend.
 

Data Structures

struct  mat_mult_matrix_index
 Helper struct for accessing an element of a row- or column-major matrix. More...
 

Functions

template<typename T >
__global__ void matrix_matrix_upper_solve_kernel (const T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, bool transpose_A, T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool transpose_B, bool unit_diagonal)
 
template<typename T >
__global__ void matrix_matrix_lower_solve_kernel (const T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, bool transpose_A, T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool transpose_B, bool unit_diagonal)
 
template<typename NumericT , typename F1 , typename F2 , typename SOLVERTAG >
void inplace_solve (const matrix_base< NumericT, F1 > &A, matrix_base< NumericT, F2 > &B, SOLVERTAG tag)
 Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation) More...
 
template<typename NumericT , typename F1 , typename F2 , typename SOLVERTAG >
void inplace_solve (const matrix_base< NumericT, F1 > &A, matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > proxy_B, SOLVERTAG tag)
 Direct inplace solver for triangular systems with multiple transposed right hand sides, i.e. A \ B^T (MATLAB notation) More...
 
template<typename NumericT , typename F1 , typename F2 , typename SOLVERTAG >
void inplace_solve (const matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > &proxy_A, matrix_base< NumericT, F2 > &B, SOLVERTAG tag)
 Direct inplace solver for transposed triangular systems with multiple right hand sides, i.e. A^T \ B (MATLAB notation) More...
 
template<typename NumericT , typename F1 , typename F2 , typename SOLVERTAG >
void inplace_solve (const matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > &proxy_A, matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > proxy_B, SOLVERTAG tag)
 Direct inplace solver for transposed triangular systems with multiple transposed right hand sides, i.e. A^T \ B^T (MATLAB notation) More...
 
template<typename T >
__global__ void triangular_substitute_inplace_row_kernel (T const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options)
 
template<typename T >
__global__ void triangular_substitute_inplace_col_kernel (T const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options)
 
template<typename NumericT , typename F , typename SOLVERTAG >
void inplace_solve (const matrix_base< NumericT, F > &mat, vector_base< NumericT > &vec, SOLVERTAG)
 Direct inplace solver for dense triangular systems (non-transposed version) More...
 
template<typename NumericT , typename F , typename SOLVERTAG >
void inplace_solve (const matrix_expression< const matrix_base< NumericT, F >, const matrix_base< NumericT, F >, op_trans > &proxy, vector_base< NumericT > &vec, SOLVERTAG)
 Direct inplace solver for dense triangular systems (transposed version) More...
 
template<typename NumericT , typename F , typename ScalarType1 >
void am (matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
 
template<typename NumericT , typename F , typename ScalarType1 , typename ScalarType2 >
void ambm (matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT, F > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT , typename F , typename ScalarType1 , typename ScalarType2 >
void ambm_m (matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT, F > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT , typename F >
void matrix_assign (matrix_base< NumericT, F > &mat, NumericT s, bool clear=false)
 
template<typename NumericT , typename F >
void matrix_diagonal_assign (matrix_base< NumericT, F > &mat, NumericT s)
 
template<typename NumericT , typename F >
void matrix_diag_from_vector (const vector_base< NumericT > &vec, int k, matrix_base< NumericT, F > &mat)
 
template<typename NumericT , typename F >
void matrix_diag_to_vector (const matrix_base< NumericT, F > &mat, int k, vector_base< NumericT > &vec)
 
template<typename NumericT , typename F >
void matrix_row (const matrix_base< NumericT, F > &mat, unsigned int i, vector_base< NumericT > &vec)
 
template<typename NumericT , typename F >
void matrix_column (const matrix_base< NumericT, F > &mat, unsigned int j, vector_base< NumericT > &vec)
 
template<typename T , typename F , typename OP >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_binary< OP > > const &proxy)
 
template<typename F , typename OP >
void element_op (matrix_base< float, F > &A, matrix_expression< const matrix_base< float, F >, const matrix_base< float, F >, op_element_binary< OP > > const &proxy)
 
template<typename F , typename OP >
void element_op (matrix_base< double, F > &A, matrix_expression< const matrix_base< double, F >, const matrix_base< double, F >, op_element_binary< OP > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_abs > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_acos > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_asin > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_atan > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_ceil > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_cos > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_cosh > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_exp > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_fabs > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_floor > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_log > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_log10 > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_sin > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_sinh > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_sqrt > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_tan > > const &proxy)
 
template<typename T , typename F >
void element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_tanh > > const &proxy)
 
template<typename NumericT , typename F >
void prod_impl (const matrix_base< NumericT, F > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
 Carries out matrix-vector multiplication. More...
 
template<typename NumericT , typename F >
void prod_impl (const viennacl::matrix_expression< const matrix_base< NumericT, F >, const matrix_base< NumericT, F >, op_trans > &mat_trans, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
 Carries out matrix-vector multiplication with a transposed matrix. More...
 
template<typename NumericT , typename F1 , typename F2 , typename F3 , typename ScalarType >
void prod_impl (const matrix_base< NumericT, F1 > &A, const matrix_base< NumericT, F2 > &B, matrix_base< NumericT, F3 > &C, ScalarType alpha, ScalarType beta)
 Carries out matrix-matrix multiplication. More...
 
template<typename NumericT , typename F1 , typename F2 , typename F3 , typename ScalarType >
void prod_impl (const viennacl::matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > &A, const matrix_base< NumericT, F2 > &B, matrix_base< NumericT, F3 > &C, ScalarType alpha, ScalarType beta)
 Carries out matrix-matrix multiplication. More...
 
template<typename NumericT , typename F1 , typename F2 , typename F3 , typename ScalarType >
void prod_impl (const matrix_base< NumericT, F1 > &A, const viennacl::matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > &B, matrix_base< NumericT, F3 > &C, ScalarType alpha, ScalarType beta)
 Carries out matrix-matrix multiplication. More...
 
template<typename NumericT , typename F1 , typename F2 , typename F3 , typename ScalarType >
void prod_impl (const viennacl::matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > &A, const viennacl::matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > &B, matrix_base< NumericT, F3 > &C, ScalarType alpha, ScalarType beta)
 Carries out matrix-matrix multiplication. More...
 
template<typename NumericT , typename F , typename S1 >
void scaled_rank_1_update (matrix_base< NumericT, F > &mat1, S1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
 The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update. More...
 
template<typename T >
__global__ void am_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void am_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void ambm_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_m_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_m_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_m_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_m_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void matrix_col_assign_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T alpha)
 
template<typename T >
__global__ void matrix_col_diagonal_assign_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T alpha)
 
template<typename T >
__global__ void element_op_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename T >
__global__ void element_op_int_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename T >
__global__ void matrix_col_element_abs_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_acos_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_asin_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_atan_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_ceil_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_cos_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_cosh_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_exp_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_fabs_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_floor_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_log_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_log10_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_sin_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_sinh_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_sqrt_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_tan_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_col_element_tanh_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void vec_mul_col_kernel (const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, T *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename T >
__global__ void trans_vec_mul_col_kernel (const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, T *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename T >
__global__ void scaled_rank1_update_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T val, unsigned int options2, const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename T >
__global__ void scaled_rank1_update_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *val, unsigned int options2, const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename T >
__global__ void matrix_matrix_col_col_col_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_col_col_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_col_col_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_col_col_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_col_col_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_col_col_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_col_col_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_col_col_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_col_row_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_col_row_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_col_row_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_col_row_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_col_row_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_col_row_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_col_row_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_col_row_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_row_col_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_row_col_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_row_col_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_row_col_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_row_col_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_row_col_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_row_col_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_row_col_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_row_row_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_row_row_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_row_row_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_col_row_row_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_row_row_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_row_row_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_row_row_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void matrix_matrix_row_row_row_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename T >
__global__ void am_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void am_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void ambm_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_m_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_m_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_m_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void ambm_m_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename T >
__global__ void matrix_row_assign_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T alpha)
 
template<typename T >
__global__ void matrix_row_diagonal_assign_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T alpha)
 
template<typename T >
__global__ void element_op_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename T >
__global__ void element_op_int_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename T >
__global__ void matrix_row_element_abs_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_acos_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_asin_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_atan_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_ceil_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_cos_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_cosh_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_exp_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_fabs_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_floor_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_log_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_log10_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_sin_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_sinh_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_sqrt_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_tan_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void matrix_row_element_tanh_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename T >
__global__ void vec_mul_row_kernel (const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, T *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename T >
__global__ void trans_vec_mul_row_kernel (const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, T *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename T >
__global__ void scaled_rank1_update_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T val, unsigned int options2, const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename T >
__global__ void scaled_rank1_update_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *val, unsigned int options2, const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename T >
__global__ void as_kernel (T *s1, const T *fac2, unsigned int options2, const T *s2)
 
template<typename T >
__global__ void as_kernel (T *s1, T fac2, unsigned int options2, const T *s2)
 
template<typename S1 , typename S2 , typename ScalarType1 >
viennacl::enable_if
< viennacl::is_scalar< S1 >
::value &&viennacl::is_scalar
< S2 >::value
&&viennacl::is_any_scalar
< ScalarType1 >::value >::type 
as (S1 &s1, S2 const &s2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
 
template<typename T >
__global__ void asbs_kernel (T *s1, const T *fac2, unsigned int options2, const T *s2, const T *fac3, unsigned int options3, const T *s3)
 
template<typename T >
__global__ void asbs_kernel (T *s1, T fac2, unsigned int options2, const T *s2, const T *fac3, unsigned int options3, const T *s3)
 
template<typename T >
__global__ void asbs_kernel (T *s1, const T *fac2, unsigned int options2, const T *s2, T fac3, unsigned int options3, const T *s3)
 
template<typename T >
__global__ void asbs_kernel (T *s1, T fac2, unsigned int options2, const T *s2, T fac3, unsigned int options3, const T *s3)
 
template<typename S1 , typename S2 , typename ScalarType1 , typename S3 , typename ScalarType2 >
viennacl::enable_if
< viennacl::is_scalar< S1 >
::value &&viennacl::is_scalar
< S2 >::value
&&viennacl::is_scalar< S3 >
::value
&&viennacl::is_any_scalar
< ScalarType1 >::value
&&viennacl::is_any_scalar
< ScalarType2 >::value >::type 
asbs (S1 &s1, S2 const &s2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, S3 const &s3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename T >
__global__ void asbs_s_kernel (T *s1, const T *fac2, unsigned int options2, const T *s2, const T *fac3, unsigned int options3, const T *s3)
 
template<typename T >
__global__ void asbs_s_kernel (T *s1, T fac2, unsigned int options2, const T *s2, const T *fac3, unsigned int options3, const T *s3)
 
template<typename T >
__global__ void asbs_s_kernel (T *s1, const T *fac2, unsigned int options2, const T *s2, T fac3, unsigned int options3, const T *s3)
 
template<typename T >
__global__ void asbs_s_kernel (T *s1, T fac2, unsigned int options2, const T *s2, T fac3, unsigned int options3, const T *s3)
 
template<typename S1 , typename S2 , typename ScalarType1 , typename S3 , typename ScalarType2 >
viennacl::enable_if
< viennacl::is_scalar< S1 >
::value &&viennacl::is_scalar
< S2 >::value
&&viennacl::is_scalar< S3 >
::value
&&viennacl::is_any_scalar
< ScalarType1 >::value
&&viennacl::is_any_scalar
< ScalarType2 >::value >::type 
asbs_s (S1 &s1, S2 const &s2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, S3 const &s3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename T >
__global__ void scalar_swap_kernel (T *s1, T *s2)
 
template<typename S1 , typename S2 >
viennacl::enable_if
< viennacl::is_scalar< S1 >
::value &&viennacl::is_scalar
< S2 >::value >::type 
swap (S1 &s1, S2 &s2)
 Swaps the contents of two scalars, data is copied. More...
 
template<typename T >
__global__ void compressed_matrix_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, const T *x, unsigned int start_x, unsigned int inc_x, T *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result)
 
template<class ScalarType , unsigned int ALIGNMENT>
void prod_impl (const viennacl::compressed_matrix< ScalarType, ALIGNMENT > &mat, const viennacl::vector_base< ScalarType > &vec, viennacl::vector_base< ScalarType > &result)
 Carries out matrix-vector multiplication with a compressed_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename T >
__global__ void compressed_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const T *sp_mat_elements, const T *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, T *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename TYPE , unsigned int ALIGNMENT, typename F1 , typename F2 >
void prod_impl (const viennacl::compressed_matrix< TYPE, ALIGNMENT > &sp_mat, const viennacl::matrix_base< TYPE, F1 > &d_mat, viennacl::matrix_base< TYPE, F2 > &result)
 Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename T >
__global__ void compressed_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const T *sp_mat_elements, const T *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, T *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename TYPE , unsigned int ALIGNMENT, typename F1 , typename F2 >
void prod_impl (const viennacl::compressed_matrix< TYPE, ALIGNMENT > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< TYPE, F1 >, const viennacl::matrix_base< TYPE, F1 >, viennacl::op_trans > &d_mat, viennacl::matrix_base< TYPE, F2 > &result)
 Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed. More...
 
template<typename T >
__global__ void compressed_matrix_diagonal_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *result, unsigned int size)
 
template<typename SparseMatrixType , class ScalarType >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixType >::value >
::type 
inplace_solve (const SparseMatrixType &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::unit_lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixType , class ScalarType >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixType >::value >
::type 
inplace_solve (const SparseMatrixType &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixType , class ScalarType >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixType >::value >
::type 
inplace_solve (const SparseMatrixType &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::unit_upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixType , class ScalarType >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixType >::value >
::type 
inplace_solve (const SparseMatrixType &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixType , class ScalarType >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixType >::value >
::type 
inplace_solve (const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::unit_lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixType , class ScalarType >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixType >::value >
::type 
inplace_solve (const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixType , class ScalarType >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixType >::value >
::type 
inplace_solve (const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::unit_upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixType , class ScalarType >
viennacl::enable_if
< viennacl::is_any_sparse_matrix
< SparseMatrixType >::value >
::type 
inplace_solve (const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename T >
__global__ void compressed_compressed_matrix_vec_mul_kernel (const unsigned int *row_jumper, const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, unsigned int nonzero_rows, const T *x, unsigned int start_x, unsigned int inc_x, T *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result)
 
template<class ScalarType >
void prod_impl (const viennacl::compressed_compressed_matrix< ScalarType > &mat, const viennacl::vector_base< ScalarType > &vec, viennacl::vector_base< ScalarType > &result)
 Carries out matrix-vector multiplication with a compressed_compressed_matrix. More...
 
template<typename T >
__global__ void coordinate_matrix_vec_mul_kernel (const unsigned int *coords, const T *elements, const unsigned int *group_boundaries, const T *x, unsigned int start_x, unsigned int inc_x, T *result, unsigned int start_result, unsigned int inc_result)
 
template<class ScalarType , unsigned int ALIGNMENT>
void prod_impl (const viennacl::coordinate_matrix< ScalarType, ALIGNMENT > &mat, const viennacl::vector_base< ScalarType > &vec, viennacl::vector_base< ScalarType > &result)
 Carries out matrix-vector multiplication with a coordinate_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename ScalarType , typename NumericT >
__global__ void coordinate_matrix_d_mat_mul_kernel (const unsigned int *coords, const ScalarType *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int ALIGNMENT, typename F1 , typename F2 >
void prod_impl (const viennacl::coordinate_matrix< NumericT, ALIGNMENT > &sp_mat, const viennacl::matrix_base< NumericT, F1 > &d_mat, viennacl::matrix_base< NumericT, F2 > &result)
 Carries out Compressed Matrix(COO)-Dense Matrix multiplication. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename ScalarType , typename NumericT >
__global__ void coordinate_matrix_d_tr_mat_mul_kernel (const unsigned int *coords, const ScalarType *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<class ScalarType , unsigned int ALIGNMENT, class NumericT , typename F1 , typename F2 >
void prod_impl (const viennacl::coordinate_matrix< ScalarType, ALIGNMENT > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT, F1 >, const viennacl::matrix_base< NumericT, F1 >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT, F2 > &result)
 Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication. More...
 
template<typename T >
__global__ void ell_matrix_vec_mul_kernel (const unsigned int *coords, const T *elements, const T *x, unsigned int start_x, unsigned int inc_x, T *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int col_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row)
 
template<class ScalarType , unsigned int ALIGNMENT>
void prod_impl (const viennacl::ell_matrix< ScalarType, ALIGNMENT > &mat, const viennacl::vector_base< ScalarType > &vec, viennacl::vector_base< ScalarType > &result)
 Carries out matrix-vector multiplication with a ell_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename ScalarType , typename NumericT >
__global__ void ell_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_coords, const ScalarType *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<class ScalarType , unsigned int ALIGNMENT, class NumericT , typename F1 , typename F2 >
void prod_impl (const viennacl::ell_matrix< ScalarType, ALIGNMENT > &sp_mat, const viennacl::matrix_base< NumericT, F1 > &d_mat, viennacl::matrix_base< NumericT, F2 > &result)
 Carries out Sparse Matrix(ELL)-Dense Matrix multiplication. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename ScalarType , typename NumericT >
__global__ void ell_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_coords, const ScalarType *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<class ScalarType , unsigned int ALIGNMENT, class NumericT , typename F1 , typename F2 >
void prod_impl (const viennacl::ell_matrix< ScalarType, ALIGNMENT > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT, F1 >, const viennacl::matrix_base< NumericT, F1 >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT, F2 > &result)
 Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication. More...
 
template<typename T >
__global__ void hyb_matrix_vec_mul_kernel (const unsigned int *ell_coords, const T *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const T *csr_elements, const T *x, unsigned int start_x, unsigned int inc_x, T *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row)
 
template<class ScalarType , unsigned int ALIGNMENT>
void prod_impl (const viennacl::hyb_matrix< ScalarType, ALIGNMENT > &mat, const viennacl::vector_base< ScalarType > &vec, viennacl::vector_base< ScalarType > &result)
 Carries out matrix-vector multiplication with a hyb_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void hyb_matrix_d_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int ALIGNMENT, typename F1 , typename F2 >
void prod_impl (const viennacl::hyb_matrix< NumericT, ALIGNMENT > &mat, const viennacl::matrix_base< NumericT, F1 > &d_mat, viennacl::matrix_base< NumericT, F2 > &result)
 Carries out matrix-vector multiplication with a hyb_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void hyb_matrix_d_tr_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int ALIGNMENT, typename F1 , typename F2 >
void prod_impl (const viennacl::hyb_matrix< NumericT, ALIGNMENT > &mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT, F1 >, const viennacl::matrix_base< NumericT, F1 >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT, F2 > &result)
 Carries out matrix-vector multiplication with a hyb_matrix. More...
 
template<typename T >
__global__ void csr_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size)
 
template<typename T >
__global__ void csr_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size)
 
template<typename T >
__global__ void csr_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size)
 
template<typename T >
__global__ void csr_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size)
 
template<typename T >
__global__ void csr_trans_lu_forward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size)
 
template<typename T >
__global__ void csr_trans_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size)
 
template<typename T >
__global__ void csr_trans_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, const T *diagonal_entries, T *vector, unsigned int size)
 
template<typename T >
__global__ void csr_trans_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size)
 
template<typename T >
__global__ void csr_trans_lu_backward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, const T *diagonal_entries, T *vector, unsigned int size)
 
template<typename T >
__global__ void csr_trans_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, const T *diagonal_entries, T *vector, unsigned int size)
 
template<typename T >
__global__ void csr_block_trans_unit_lu_forward (const unsigned int *row_jumper_L, const unsigned int *column_indices_L, const T *elements_L, const unsigned int *block_offsets, T *result, unsigned int size)
 
template<typename T >
__global__ void csr_block_trans_lu_backward (const unsigned int *row_jumper_U, const unsigned int *column_indices_U, const T *elements_U, const T *diagonal_U, const unsigned int *block_offsets, T *result, unsigned int size)
 
template<typename T >
__global__ void av_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
__global__ void av_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T , typename ScalarType1 >
void av (vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
 
template<typename T >
__global__ void avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
 
template<typename T >
__global__ void avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
 
template<typename T >
__global__ void avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
 
template<typename T >
__global__ void avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
 
template<typename T , typename ScalarType1 , typename ScalarType2 >
void avbv (vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename T >
__global__ void avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
 
template<typename T >
__global__ void avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
 
template<typename T >
__global__ void avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
 
template<typename T >
__global__ void avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3)
 
template<typename T , typename ScalarType1 , typename ScalarType2 >
void avbv_v (vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename T >
__global__ void vector_assign_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int internal_size1, T alpha)
 
template<typename T , typename S1 >
void vector_assign (vector_base< T > &vec1, const S1 &alpha, bool up_to_internal_size=false)
 Assign a constant value to a vector (-range/-slice) More...
 
template<typename T >
__global__ void vector_swap_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void vector_swap (vector_base< T > &vec1, vector_base< T > &vec2)
 Swaps the contents of two vectors, data is copied. More...
 
template<typename T >
__global__ void element_op_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2, T const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type)
 
template<typename T >
__global__ void element_op_int_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2, T const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type)
 
template<typename T , typename OP >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< OP > > const &proxy)
 Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax) More...
 
template<typename OP >
void element_op (vector_base< float > &vec1, vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OP > > const &proxy)
 
template<typename OP >
void element_op (vector_base< double > &vec1, vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OP > > const &proxy)
 
template<typename T >
__global__ void vec_element_acos_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_acos > > const &proxy)
 
template<typename T >
__global__ void vec_element_asin_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_asin > > const &proxy)
 
template<typename T >
__global__ void vec_element_atan_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_atan > > const &proxy)
 
template<typename T >
__global__ void vec_element_ceil_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_ceil > > const &proxy)
 
template<typename T >
__global__ void vec_element_cos_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_cos > > const &proxy)
 
template<typename T >
__global__ void vec_element_cosh_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_cosh > > const &proxy)
 
template<typename T >
__global__ void vec_element_exp_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_exp > > const &proxy)
 
template<typename T >
__global__ void vec_element_fabs_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_fabs > > const &proxy)
 
template<typename T >
__global__ void vec_element_abs_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_abs > > const &proxy)
 
template<typename T >
__global__ void vec_element_floor_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_floor > > const &proxy)
 
template<typename T >
__global__ void vec_element_log_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_log > > const &proxy)
 
template<typename T >
__global__ void vec_element_log10_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_log10 > > const &proxy)
 
template<typename T >
__global__ void vec_element_sin_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sin > > const &proxy)
 
template<typename T >
__global__ void vec_element_sinh_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sinh > > const &proxy)
 
template<typename T >
__global__ void vec_element_sqrt_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sqrt > > const &proxy)
 
template<typename T >
__global__ void vec_element_tan_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_tan > > const &proxy)
 
template<typename T >
__global__ void vec_element_tanh_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename T >
void element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_tanh > > const &proxy)
 
template<typename T >
__global__ void inner_prod_kernel (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, T *group_buffer)
 
template<typename T >
__global__ void vector_sum_kernel_floats (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, T *result)
 
template<typename T >
__global__ void vector_sum_kernel_integers (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, T *result)
 
template<typename T >
__global__ void vector_sum_kernel_unsigned_integers (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, T *result)
 
template<typename T , typename S3 >
void inner_prod_impl (vector_base< T > const &vec1, vector_base< T > const &vec2, S3 &result)
 Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More...
 
template<typename T >
void inner_prod_cpu (vector_base< T > const &vec1, vector_base< T > const &vec2, T &result)
 Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More...
 
template<typename NumericT >
__global__ void inner_prod_2_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, NumericT *group_results)
 
template<typename NumericT >
__global__ void inner_prod_3_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, NumericT *group_results)
 
template<typename NumericT >
__global__ void inner_prod_4_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, NumericT *group_results)
 
template<typename NumericT >
__global__ void inner_prod_8_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, const NumericT *y4, unsigned int start4, unsigned int stride4, const NumericT *y5, unsigned int start5, unsigned int stride5, const NumericT *y6, unsigned int start6, unsigned int stride6, const NumericT *y7, unsigned int start7, unsigned int stride7, NumericT *group_results)
 
template<typename T >
__global__ void vector_multi_sum_kernel (T const *vec1, T *result, unsigned int start_result, unsigned int inc_result)
 
template<typename T >
void inner_prod_impl (vector_base< T > const &x, vector_tuple< T > const &vec_tuple, vector_base< T > &result)
 
template<typename T >
__global__ void norm_kernel_floats (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, T *group_buffer)
 
template<typename T >
__global__ void norm_kernel_integers (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, T *group_buffer)
 
template<typename T >
__global__ void norm_kernel_unsigned_integers (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, T *group_buffer)
 
template<typename T >
void norm_1_impl (vector_base< T > const &vec1, scalar< T > &result)
 Computes the l^1-norm of a vector. More...
 
template<typename T >
void norm_1_cpu (vector_base< T > const &vec1, T &result)
 Computes the l^1-norm of a vector. More...
 
template<typename T >
void norm_2_impl (vector_base< T > const &vec1, scalar< T > &result)
 Computes the l^2-norm of a vector - implementation. More...
 
template<typename T >
void norm_2_cpu (vector_base< T > const &vec1, T &result)
 Computes the l^2-norm of a vector - implementation. More...
 
template<typename T >
void norm_inf_impl (vector_base< T > const &vec1, scalar< T > &result)
 Computes the supremum-norm of a vector. More...
 
template<typename T >
void norm_inf_cpu (vector_base< T > const &vec1, T &result)
 Computes the supremum-norm of a vector. More...
 
template<typename T >
__device__ T cuda_abs (T val)
 
__device__ unsigned long cuda_abs (unsigned long val)
 
__device__ unsigned int cuda_abs (unsigned int val)
 
__device__ unsigned short cuda_abs (unsigned short val)
 
__device__ unsigned char cuda_abs (unsigned char val)
 
template<typename T >
__global__ void index_norm_inf_kernel (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int *result)
 
template<typename T >
vcl_size_t index_norm_inf (vector_base< T > const &vec1)
 Computes the index of the first entry that is equal to the supremum-norm in modulus. More...
 
template<typename T >
__global__ void plane_rotation_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, T alpha, T beta)
 
template<typename T >
void plane_rotation (vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
 Computes a plane rotation of two vectors. More...
 

Detailed Description

Holds all CUDA compute kernels used by ViennaCL.

Function Documentation

void viennacl::linalg::cuda::am ( matrix_base< NumericT, F > &  mat1,
matrix_base< NumericT, F > const &  mat2,
ScalarType1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha 
)
__global__ void viennacl::linalg::cuda::am_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::am_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::am_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::am_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
void viennacl::linalg::cuda::ambm ( matrix_base< NumericT, F > &  mat1,
matrix_base< NumericT, F > const &  mat2,
ScalarType1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
matrix_base< NumericT, F > const &  mat3,
ScalarType2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const T *  fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const T *  fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
void viennacl::linalg::cuda::ambm_m ( matrix_base< NumericT, F > &  mat1,
matrix_base< NumericT, F > const &  mat2,
ScalarType1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
matrix_base< NumericT, F > const &  mat3,
ScalarType2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const T *  fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const T *  fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const T *  fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const T *  fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const T *  fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  fac2,
unsigned int  options2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const T *  fac3,
unsigned int  options3,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)
viennacl::enable_if< viennacl::is_scalar<S1>::value && viennacl::is_scalar<S2>::value && viennacl::is_any_scalar<ScalarType1>::value >::type viennacl::linalg::cuda::as ( S1 &  s1,
S2 const &  s2,
ScalarType1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha 
)
__global__ void viennacl::linalg::cuda::as_kernel ( T *  s1,
const T *  fac2,
unsigned int  options2,
const T *  s2 
)
__global__ void viennacl::linalg::cuda::as_kernel ( T *  s1,
fac2,
unsigned int  options2,
const T *  s2 
)
viennacl::enable_if< viennacl::is_scalar<S1>::value && viennacl::is_scalar<S2>::value && viennacl::is_scalar<S3>::value && viennacl::is_any_scalar<ScalarType1>::value && viennacl::is_any_scalar<ScalarType2>::value >::type viennacl::linalg::cuda::asbs ( S1 &  s1,
S2 const &  s2,
ScalarType1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
S3 const &  s3,
ScalarType2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)
__global__ void viennacl::linalg::cuda::asbs_kernel ( T *  s1,
const T *  fac2,
unsigned int  options2,
const T *  s2,
const T *  fac3,
unsigned int  options3,
const T *  s3 
)
__global__ void viennacl::linalg::cuda::asbs_kernel ( T *  s1,
fac2,
unsigned int  options2,
const T *  s2,
const T *  fac3,
unsigned int  options3,
const T *  s3 
)
__global__ void viennacl::linalg::cuda::asbs_kernel ( T *  s1,
const T *  fac2,
unsigned int  options2,
const T *  s2,
fac3,
unsigned int  options3,
const T *  s3 
)
__global__ void viennacl::linalg::cuda::asbs_kernel ( T *  s1,
fac2,
unsigned int  options2,
const T *  s2,
fac3,
unsigned int  options3,
const T *  s3 
)
viennacl::enable_if< viennacl::is_scalar<S1>::value && viennacl::is_scalar<S2>::value && viennacl::is_scalar<S3>::value && viennacl::is_any_scalar<ScalarType1>::value && viennacl::is_any_scalar<ScalarType2>::value >::type viennacl::linalg::cuda::asbs_s ( S1 &  s1,
S2 const &  s2,
ScalarType1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
S3 const &  s3,
ScalarType2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( T *  s1,
const T *  fac2,
unsigned int  options2,
const T *  s2,
const T *  fac3,
unsigned int  options3,
const T *  s3 
)
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( T *  s1,
fac2,
unsigned int  options2,
const T *  s2,
const T *  fac3,
unsigned int  options3,
const T *  s3 
)
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( T *  s1,
const T *  fac2,
unsigned int  options2,
const T *  s2,
fac3,
unsigned int  options3,
const T *  s3 
)
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( T *  s1,
fac2,
unsigned int  options2,
const T *  s2,
fac3,
unsigned int  options3,
const T *  s3 
)
void viennacl::linalg::cuda::av ( vector_base< T > &  vec1,
vector_base< T > const &  vec2,
ScalarType1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha 
)
__global__ void viennacl::linalg::cuda::av_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const T *  fac2,
unsigned int  options2,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::av_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
fac2,
unsigned int  options2,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
void viennacl::linalg::cuda::avbv ( vector_base< T > &  vec1,
vector_base< T > const &  vec2,
ScalarType1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
vector_base< T > const &  vec3,
ScalarType2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)
__global__ void viennacl::linalg::cuda::avbv_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const T *  fac2,
unsigned int  options2,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2,
const T *  fac3,
unsigned int  options3,
const T *  vec3,
unsigned int  start3,
unsigned int  inc3 
)
__global__ void viennacl::linalg::cuda::avbv_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
fac2,
unsigned int  options2,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2,
const T *  fac3,
unsigned int  options3,
const T *  vec3,
unsigned int  start3,
unsigned int  inc3 
)
__global__ void viennacl::linalg::cuda::avbv_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const T *  fac2,
unsigned int  options2,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2,
fac3,
unsigned int  options3,
const T *  vec3,
unsigned int  start3,
unsigned int  inc3 
)
__global__ void viennacl::linalg::cuda::avbv_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
fac2,
unsigned int  options2,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2,
fac3,
unsigned int  options3,
const T *  vec3,
unsigned int  start3,
unsigned int  inc3 
)
void viennacl::linalg::cuda::avbv_v ( vector_base< T > &  vec1,
vector_base< T > const &  vec2,
ScalarType1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
vector_base< T > const &  vec3,
ScalarType2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const T *  fac2,
unsigned int  options2,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2,
const T *  fac3,
unsigned int  options3,
const T *  vec3,
unsigned int  start3,
unsigned int  inc3 
)
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
fac2,
unsigned int  options2,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2,
const T *  fac3,
unsigned int  options3,
const T *  vec3,
unsigned int  start3,
unsigned int  inc3 
)
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const T *  fac2,
unsigned int  options2,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2,
fac3,
unsigned int  options3,
const T *  vec3,
unsigned int  start3,
unsigned int  inc3 
)
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
fac2,
unsigned int  options2,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2,
fac3,
unsigned int  options3,
const T *  vec3,
unsigned int  start3,
unsigned int  inc3 
)
__global__ void viennacl::linalg::cuda::compressed_compressed_matrix_vec_mul_kernel ( const unsigned int *  row_jumper,
const unsigned int *  row_indices,
const unsigned int *  column_indices,
const T *  elements,
unsigned int  nonzero_rows,
const T *  x,
unsigned int  start_x,
unsigned int  inc_x,
T *  result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  size_result 
)
__global__ void viennacl::linalg::cuda::compressed_matrix_d_mat_mul_kernel ( const unsigned int *  sp_mat_row_indices,
const unsigned int *  sp_mat_col_indices,
const T *  sp_mat_elements,
const T *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
T *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)
__global__ void viennacl::linalg::cuda::compressed_matrix_d_tr_mat_mul_kernel ( const unsigned int *  sp_mat_row_indices,
const unsigned int *  sp_mat_col_indices,
const T *  sp_mat_elements,
const T *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
T *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)
__global__ void viennacl::linalg::cuda::compressed_matrix_diagonal_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const T *  elements,
T *  result,
unsigned int  size 
)
__global__ void viennacl::linalg::cuda::compressed_matrix_vec_mul_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const T *  elements,
const T *  x,
unsigned int  start_x,
unsigned int  inc_x,
T *  result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  size_result 
)
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_mat_mul_kernel ( const unsigned int *  coords,
const ScalarType *  elements,
const unsigned int *  group_boundaries,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_tr_mat_mul_kernel ( const unsigned int *  coords,
const ScalarType *  elements,
const unsigned int *  group_boundaries,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)
__global__ void viennacl::linalg::cuda::coordinate_matrix_vec_mul_kernel ( const unsigned int *  coords,
const T *  elements,
const unsigned int *  group_boundaries,
const T *  x,
unsigned int  start_x,
unsigned int  inc_x,
T *  result,
unsigned int  start_result,
unsigned int  inc_result 
)
__global__ void viennacl::linalg::cuda::csr_block_trans_lu_backward ( const unsigned int *  row_jumper_U,
const unsigned int *  column_indices_U,
const T *  elements_U,
const T *  diagonal_U,
const unsigned int *  block_offsets,
T *  result,
unsigned int  size 
)
__global__ void viennacl::linalg::cuda::csr_block_trans_unit_lu_forward ( const unsigned int *  row_jumper_L,
const unsigned int *  column_indices_L,
const T *  elements_L,
const unsigned int *  block_offsets,
T *  result,
unsigned int  size 
)
__global__ void viennacl::linalg::cuda::csr_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const T *  elements,
T *  vector,
unsigned int  size 
)
__global__ void viennacl::linalg::cuda::csr_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const T *  elements,
T *  vector,
unsigned int  size 
)
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const T *  elements,
const T *  diagonal_entries,
T *  vector,
unsigned int  size 
)
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel2 ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const T *  elements,
const T *  diagonal_entries,
T *  vector,
unsigned int  size 
)
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const T *  elements,
const T *  diagonal_entries,
T *  vector,
unsigned int  size 
)
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel2 ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const T *  elements,
T *  vector,
unsigned int  size 
)
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const T *  elements,
T *  vector,
unsigned int  size 
)
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const T *  elements,
T *  vector,
unsigned int  size 
)
__global__ void viennacl::linalg::cuda::csr_unit_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const T *  elements,
T *  vector,
unsigned int  size 
)
__global__ void viennacl::linalg::cuda::csr_unit_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const T *  elements,
T *  vector,
unsigned int  size 
)
__device__ T viennacl::linalg::cuda::cuda_abs ( val)
__device__ unsigned long viennacl::linalg::cuda::cuda_abs ( unsigned long  val)
inline
__device__ unsigned int viennacl::linalg::cuda::cuda_abs ( unsigned int  val)
inline
__device__ unsigned short viennacl::linalg::cuda::cuda_abs ( unsigned short  val)
inline
__device__ unsigned char viennacl::linalg::cuda::cuda_abs ( unsigned char  val)
inline
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_binary< OP > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< float, F > &  A,
matrix_expression< const matrix_base< float, F >, const matrix_base< float, F >, op_element_binary< OP > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< double, F > &  A,
matrix_expression< const matrix_base< double, F >, const matrix_base< double, F >, op_element_binary< OP > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_abs > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_acos > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_asin > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_atan > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_ceil > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_cos > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_cosh > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< OP > > const &  proxy 
)

Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax)

Parameters
vec1The result vector (or -range, or -slice)
proxyThe proxy object holding v2, v3 and the operation
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_exp > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< float > &  vec1,
vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OP > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_fabs > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< double > &  vec1,
vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OP > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_floor > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_acos > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_asin > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_log > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_atan > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_log10 > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_ceil > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_cos > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_sin > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_cosh > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_sinh > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_exp > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_fabs > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_sqrt > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_abs > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_tan > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_floor > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_log > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( matrix_base< T, F > &  A,
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_tanh > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_log10 > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sin > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sinh > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sqrt > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_tan > > const &  proxy 
)
void viennacl::linalg::cuda::element_op ( vector_base< T > &  vec1,
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_tanh > > const &  proxy 
)
__global__ void viennacl::linalg::cuda::element_op_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)
__global__ void viennacl::linalg::cuda::element_op_int_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)
__global__ void viennacl::linalg::cuda::element_op_int_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2,
T const *  vec3,
unsigned int  start3,
unsigned int  inc3,
unsigned int  op_type 
)
__global__ void viennacl::linalg::cuda::element_op_int_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)
__global__ void viennacl::linalg::cuda::element_op_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2,
T const *  vec3,
unsigned int  start3,
unsigned int  inc3,
unsigned int  op_type 
)
__global__ void viennacl::linalg::cuda::element_op_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const T *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)
__global__ void viennacl::linalg::cuda::ell_matrix_d_mat_mul_kernel ( const unsigned int *  sp_mat_coords,
const ScalarType *  sp_mat_elements,
unsigned int  sp_mat_row_num,
unsigned int  sp_mat_col_num,
unsigned int  sp_mat_internal_row_num,
unsigned int  sp_mat_items_per_row,
unsigned int  sp_mat_aligned_items_per_row,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)
__global__ void viennacl::linalg::cuda::ell_matrix_d_tr_mat_mul_kernel ( const unsigned int *  sp_mat_coords,
const ScalarType *  sp_mat_elements,
unsigned int  sp_mat_row_num,
unsigned int  sp_mat_col_num,
unsigned int  sp_mat_internal_row_num,
unsigned int  sp_mat_items_per_row,
unsigned int  sp_mat_aligned_items_per_row,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)
__global__ void viennacl::linalg::cuda::ell_matrix_vec_mul_kernel ( const unsigned int *  coords,
const T *  elements,
const T *  x,
unsigned int  start_x,
unsigned int  inc_x,
T *  result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  row_num,
unsigned int  col_num,
unsigned int  internal_row_num,
unsigned int  items_per_row,
unsigned int  aligned_items_per_row 
)
__global__ void viennacl::linalg::cuda::hyb_matrix_d_mat_mul_kernel ( const unsigned int *  ell_coords,
const NumericT *  ell_elements,
const unsigned int *  csr_rows,
const unsigned int *  csr_cols,
const NumericT *  csr_elements,
unsigned int  row_num,
unsigned int  internal_row_num,
unsigned int  items_per_row,
unsigned int  aligned_items_per_row,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)
__global__ void viennacl::linalg::cuda::hyb_matrix_d_tr_mat_mul_kernel ( const unsigned int *  ell_coords,
const NumericT *  ell_elements,
const unsigned int *  csr_rows,
const unsigned int *  csr_cols,
const NumericT *  csr_elements,
unsigned int  row_num,
unsigned int  internal_row_num,
unsigned int  items_per_row,
unsigned int  aligned_items_per_row,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)
__global__ void viennacl::linalg::cuda::hyb_matrix_vec_mul_kernel ( const unsigned int *  ell_coords,
const T *  ell_elements,
const unsigned int *  csr_rows,
const unsigned int *  csr_cols,
const T *  csr_elements,
const T *  x,
unsigned int  start_x,
unsigned int  inc_x,
T *  result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  row_num,
unsigned int  internal_row_num,
unsigned int  items_per_row,
unsigned int  aligned_items_per_row 
)
vcl_size_t viennacl::linalg::cuda::index_norm_inf ( vector_base< T > const &  vec1)

Computes the index of the first entry that is equal to the supremum-norm in modulus.

Parameters
vec1The vector
Returns
The result. Note that the result must be a CPU scalar (unsigned int), since gpu scalars are floating point types.
__global__ void viennacl::linalg::cuda::index_norm_inf_kernel ( const T *  vec,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int *  result 
)
__global__ void viennacl::linalg::cuda::inner_prod_2_kernel ( const NumericT *  x,
unsigned int  startx,
unsigned int  stridex,
unsigned int  sizex,
const NumericT *  y0,
unsigned int  start0,
unsigned int  stride0,
const NumericT *  y1,
unsigned int  start1,
unsigned int  stride1,
NumericT *  group_results 
)
__global__ void viennacl::linalg::cuda::inner_prod_3_kernel ( const NumericT *  x,
unsigned int  startx,
unsigned int  stridex,
unsigned int  sizex,
const NumericT *  y0,
unsigned int  start0,
unsigned int  stride0,
const NumericT *  y1,
unsigned int  start1,
unsigned int  stride1,
const NumericT *  y2,
unsigned int  start2,
unsigned int  stride2,
NumericT *  group_results 
)
__global__ void viennacl::linalg::cuda::inner_prod_4_kernel ( const NumericT *  x,
unsigned int  startx,
unsigned int  stridex,
unsigned int  sizex,
const NumericT *  y0,
unsigned int  start0,
unsigned int  stride0,
const NumericT *  y1,
unsigned int  start1,
unsigned int  stride1,
const NumericT *  y2,
unsigned int  start2,
unsigned int  stride2,
const NumericT *  y3,
unsigned int  start3,
unsigned int  stride3,
NumericT *  group_results 
)
__global__ void viennacl::linalg::cuda::inner_prod_8_kernel ( const NumericT *  x,
unsigned int  startx,
unsigned int  stridex,
unsigned int  sizex,
const NumericT *  y0,
unsigned int  start0,
unsigned int  stride0,
const NumericT *  y1,
unsigned int  start1,
unsigned int  stride1,
const NumericT *  y2,
unsigned int  start2,
unsigned int  stride2,
const NumericT *  y3,
unsigned int  start3,
unsigned int  stride3,
const NumericT *  y4,
unsigned int  start4,
unsigned int  stride4,
const NumericT *  y5,
unsigned int  start5,
unsigned int  stride5,
const NumericT *  y6,
unsigned int  start6,
unsigned int  stride6,
const NumericT *  y7,
unsigned int  start7,
unsigned int  stride7,
NumericT *  group_results 
)
void viennacl::linalg::cuda::inner_prod_cpu ( vector_base< T > const &  vec1,
vector_base< T > const &  vec2,
T &  result 
)

Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).

Parameters
vec1The first vector
vec2The second vector
resultThe result scalar (on the host)
void viennacl::linalg::cuda::inner_prod_impl ( vector_base< T > const &  vec1,
vector_base< T > const &  vec2,
S3 &  result 
)

Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).

Parameters
vec1The first vector
vec2The second vector
resultThe result scalar (on the gpu)
void viennacl::linalg::cuda::inner_prod_impl ( vector_base< T > const &  x,
vector_tuple< T > const &  vec_tuple,
vector_base< T > &  result 
)
__global__ void viennacl::linalg::cuda::inner_prod_kernel ( const T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2,
T *  group_buffer 
)
void viennacl::linalg::cuda::inplace_solve ( const matrix_base< NumericT, F1 > &  A,
matrix_base< NumericT, F2 > &  B,
SOLVERTAG  tag 
)

Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation)

Parameters
AThe system matrix
BThe matrix of row vectors, where the solution is directly written to
tagSolver tag for identifying the respective triangular solver
void viennacl::linalg::cuda::inplace_solve ( const matrix_base< NumericT, F1 > &  A,
matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans >  proxy_B,
SOLVERTAG  tag 
)

Direct inplace solver for triangular systems with multiple transposed right hand sides, i.e. A \ B^T (MATLAB notation)

Parameters
AThe system matrix
proxy_BThe proxy for the transposed matrix of row vectors, where the solution is directly written to
tagSolver tag for identifying the respective triangular solver
void viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > &  proxy_A,
matrix_base< NumericT, F2 > &  B,
SOLVERTAG  tag 
)

Direct inplace solver for transposed triangular systems with multiple right hand sides, i.e. A^T \ B (MATLAB notation)

Parameters
proxy_AThe transposed system matrix proxy
BThe matrix holding the load vectors, where the solution is directly written to
tagSolver tag for identifying the respective triangular solver
void viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > &  proxy_A,
matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans >  proxy_B,
SOLVERTAG  tag 
)

Direct inplace solver for transposed triangular systems with multiple transposed right hand sides, i.e. A^T \ B^T (MATLAB notation)

Parameters
proxy_AThe transposed system matrix proxy
proxy_BThe transposed matrix holding the load vectors, where the solution is directly written to
tagSolver tag for identifying the respective triangular solver
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve ( const SparseMatrixType &  mat,
viennacl::vector_base< ScalarType > &  vec,
viennacl::linalg::unit_lower_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve ( const SparseMatrixType &  mat,
viennacl::vector_base< ScalarType > &  vec,
viennacl::linalg::lower_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve ( const SparseMatrixType &  mat,
viennacl::vector_base< ScalarType > &  vec,
viennacl::linalg::unit_upper_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve ( const SparseMatrixType &  mat,
viennacl::vector_base< ScalarType > &  vec,
viennacl::linalg::upper_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.
void viennacl::linalg::cuda::inplace_solve ( const matrix_base< NumericT, F > &  mat,
vector_base< NumericT > &  vec,
SOLVERTAG   
)

Direct inplace solver for dense triangular systems (non-transposed version)

Parameters
matThe system matrix proxy
vecThe load vector, where the solution is directly written to
void viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const matrix_base< NumericT, F >, const matrix_base< NumericT, F >, op_trans > &  proxy,
vector_base< NumericT > &  vec,
SOLVERTAG   
)

Direct inplace solver for dense triangular systems (transposed version)

Parameters
proxyThe system matrix proxy
vecThe load vector, where the solution is directly written to
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &  mat,
viennacl::vector_base< ScalarType > &  vec,
viennacl::linalg::unit_lower_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &  mat,
viennacl::vector_base< ScalarType > &  vec,
viennacl::linalg::lower_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &  mat,
viennacl::vector_base< ScalarType > &  vec,
viennacl::linalg::unit_upper_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &  mat,
viennacl::vector_base< ScalarType > &  vec,
viennacl::linalg::upper_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.
void viennacl::linalg::cuda::matrix_assign ( matrix_base< NumericT, F > &  mat,
NumericT  s,
bool  clear = false 
)
__global__ void viennacl::linalg::cuda::matrix_col_assign_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
alpha 
)
__global__ void viennacl::linalg::cuda::matrix_col_diagonal_assign_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
alpha 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_abs_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_acos_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_asin_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_atan_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_ceil_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_cos_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_cosh_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_exp_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_fabs_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_floor_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_log10_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_log_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_sin_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_sinh_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_sqrt_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_tan_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_col_element_tanh_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
void viennacl::linalg::cuda::matrix_column ( const matrix_base< NumericT, F > &  mat,
unsigned int  j,
vector_base< NumericT > &  vec 
)
void viennacl::linalg::cuda::matrix_diag_from_vector ( const vector_base< NumericT > &  vec,
int  k,
matrix_base< NumericT, F > &  mat 
)
void viennacl::linalg::cuda::matrix_diag_to_vector ( const matrix_base< NumericT, F > &  mat,
int  k,
vector_base< NumericT > &  vec 
)
void viennacl::linalg::cuda::matrix_diagonal_assign ( matrix_base< NumericT, F > &  mat,
NumericT  s 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_lower_solve_kernel ( const T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
bool  row_major_A,
bool  transpose_A,
T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_size1,
unsigned int  B_size2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
bool  row_major_B,
bool  transpose_B,
bool  unit_diagonal 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TA_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TT_kernel ( alpha,
const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
beta,
T *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)
__global__ void viennacl::linalg::cuda::matrix_matrix_upper_solve_kernel ( const T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
bool  row_major_A,
bool  transpose_A,
T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_size1,
unsigned int  B_size2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
bool  row_major_B,
bool  transpose_B,
bool  unit_diagonal 
)
void viennacl::linalg::cuda::matrix_row ( const matrix_base< NumericT, F > &  mat,
unsigned int  i,
vector_base< NumericT > &  vec 
)
__global__ void viennacl::linalg::cuda::matrix_row_assign_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
alpha 
)
__global__ void viennacl::linalg::cuda::matrix_row_diagonal_assign_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
alpha 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_abs_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_acos_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_asin_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_atan_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_ceil_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_cos_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_cosh_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_exp_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_fabs_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_floor_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_log10_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_log_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_sin_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_sinh_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_sqrt_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_tan_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
__global__ void viennacl::linalg::cuda::matrix_row_element_tanh_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)
void viennacl::linalg::cuda::norm_1_cpu ( vector_base< T > const &  vec1,
T &  result 
)

Computes the l^1-norm of a vector.

Parameters
vec1The vector
resultThe result scalar
void viennacl::linalg::cuda::norm_1_impl ( vector_base< T > const &  vec1,
scalar< T > &  result 
)

Computes the l^1-norm of a vector.

Parameters
vec1The vector
resultThe result scalar
void viennacl::linalg::cuda::norm_2_cpu ( vector_base< T > const &  vec1,
T &  result 
)

Computes the l^2-norm of a vector - implementation.

Parameters
vec1The vector
resultThe result scalar
void viennacl::linalg::cuda::norm_2_impl ( vector_base< T > const &  vec1,
scalar< T > &  result 
)

Computes the l^2-norm of a vector - implementation.

Parameters
vec1The vector
resultThe result scalar
void viennacl::linalg::cuda::norm_inf_cpu ( vector_base< T > const &  vec1,
T &  result 
)

Computes the supremum-norm of a vector.

Parameters
vec1The vector
resultThe result scalar
void viennacl::linalg::cuda::norm_inf_impl ( vector_base< T > const &  vec1,
scalar< T > &  result 
)

Computes the supremum-norm of a vector.

Parameters
vec1The vector
resultThe result scalar
__global__ void viennacl::linalg::cuda::norm_kernel_floats ( const T *  vec,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  norm_selector,
T *  group_buffer 
)
__global__ void viennacl::linalg::cuda::norm_kernel_integers ( const T *  vec,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  norm_selector,
T *  group_buffer 
)
__global__ void viennacl::linalg::cuda::norm_kernel_unsigned_integers ( const T *  vec,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  norm_selector,
T *  group_buffer 
)
void viennacl::linalg::cuda::plane_rotation ( vector_base< T > &  vec1,
vector_base< T > &  vec2,
alpha,
beta 
)

Computes a plane rotation of two vectors.

Computes (x,y) <- (alpha * x + beta * y, -beta * x + alpha * y)

Parameters
vec1The first vector
vec2The second vector
alphaThe first transformation coefficient
betaThe second transformation coefficient
__global__ void viennacl::linalg::cuda::plane_rotation_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T *  vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2,
alpha,
beta 
)
void viennacl::linalg::cuda::prod_impl ( const viennacl::compressed_matrix< ScalarType, ALIGNMENT > &  mat,
const viennacl::vector_base< ScalarType > &  vec,
viennacl::vector_base< ScalarType > &  result 
)

Carries out matrix-vector multiplication with a compressed_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector
void viennacl::linalg::cuda::prod_impl ( const viennacl::compressed_matrix< TYPE, ALIGNMENT > &  sp_mat,
const viennacl::matrix_base< TYPE, F1 > &  d_mat,
viennacl::matrix_base< TYPE, F2 > &  result 
)

Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
sp_matThe sparse matrix
d_matThe dense matrix
resultThe result matrix
void viennacl::linalg::cuda::prod_impl ( const viennacl::compressed_matrix< TYPE, ALIGNMENT > &  sp_mat,
const viennacl::matrix_expression< const viennacl::matrix_base< TYPE, F1 >, const viennacl::matrix_base< TYPE, F1 >, viennacl::op_trans > &  d_mat,
viennacl::matrix_base< TYPE, F2 > &  result 
)

Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed.

Implementation of the convenience expression result = prod(sp_mat, d_mat);

Parameters
sp_matThe sparse matrix
d_matThe transposed dense matrix proxy
resultThe result matrix
void viennacl::linalg::cuda::prod_impl ( const viennacl::compressed_compressed_matrix< ScalarType > &  mat,
const viennacl::vector_base< ScalarType > &  vec,
viennacl::vector_base< ScalarType > &  result 
)

Carries out matrix-vector multiplication with a compressed_compressed_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector
void viennacl::linalg::cuda::prod_impl ( const viennacl::coordinate_matrix< ScalarType, ALIGNMENT > &  mat,
const viennacl::vector_base< ScalarType > &  vec,
viennacl::vector_base< ScalarType > &  result 
)

Carries out matrix-vector multiplication with a coordinate_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector
void viennacl::linalg::cuda::prod_impl ( const viennacl::coordinate_matrix< NumericT, ALIGNMENT > &  sp_mat,
const viennacl::matrix_base< NumericT, F1 > &  d_mat,
viennacl::matrix_base< NumericT, F2 > &  result 
)

Carries out Compressed Matrix(COO)-Dense Matrix multiplication.

Implementation of the convenience expression result = prod(sp_mat, d_mat);

Parameters
sp_matThe Sparse Matrix (Coordinate format)
d_matThe Dense Matrix
resultThe Result Matrix
void viennacl::linalg::cuda::prod_impl ( const viennacl::coordinate_matrix< ScalarType, ALIGNMENT > &  sp_mat,
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT, F1 >, const viennacl::matrix_base< NumericT, F1 >, viennacl::op_trans > &  d_mat,
viennacl::matrix_base< NumericT, F2 > &  result 
)

Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication.

Implementation of the convenience expression result = prod(sp_mat, trans(d_mat));

Parameters
sp_matThe Sparse Matrix (Coordinate format)
d_matThe Dense Transposed Matrix
resultThe Result Matrix
void viennacl::linalg::cuda::prod_impl ( const viennacl::ell_matrix< ScalarType, ALIGNMENT > &  mat,
const viennacl::vector_base< ScalarType > &  vec,
viennacl::vector_base< ScalarType > &  result 
)

Carries out matrix-vector multiplication with a ell_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector
void viennacl::linalg::cuda::prod_impl ( const matrix_base< NumericT, F > &  mat,
const vector_base< NumericT > &  vec,
vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector
void viennacl::linalg::cuda::prod_impl ( const viennacl::ell_matrix< ScalarType, ALIGNMENT > &  sp_mat,
const viennacl::matrix_base< NumericT, F1 > &  d_mat,
viennacl::matrix_base< NumericT, F2 > &  result 
)

Carries out Sparse Matrix(ELL)-Dense Matrix multiplication.

Implementation of the convenience expression result = prod(sp_mat, d_mat); sp_mat being in ELL format

Parameters
sp_matThe sparse matrix (ELL)
d_matThe dense matrix
resultThe result matrix
void viennacl::linalg::cuda::prod_impl ( const viennacl::matrix_expression< const matrix_base< NumericT, F >, const matrix_base< NumericT, F >, op_trans > &  mat_trans,
const vector_base< NumericT > &  vec,
vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a transposed matrix.

Implementation of the convenience expression result = trans(mat) * vec;

Parameters
mat_transThe transposed matrix proxy
vecThe vector
resultThe result vector
void viennacl::linalg::cuda::prod_impl ( const viennacl::ell_matrix< ScalarType, ALIGNMENT > &  sp_mat,
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT, F1 >, const viennacl::matrix_base< NumericT, F1 >, viennacl::op_trans > &  d_mat,
viennacl::matrix_base< NumericT, F2 > &  result 
)

Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication.

Implementation of the convenience expression result = prod(sp_mat, trans(d_mat)); sp_mat being in ELL format

Parameters
sp_matThe sparse matrix (ELL)
d_matThe dense matrix
resultThe result matrix
void viennacl::linalg::cuda::prod_impl ( const viennacl::hyb_matrix< ScalarType, ALIGNMENT > &  mat,
const viennacl::vector_base< ScalarType > &  vec,
viennacl::vector_base< ScalarType > &  result 
)

Carries out matrix-vector multiplication with a hyb_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector
void viennacl::linalg::cuda::prod_impl ( const viennacl::hyb_matrix< NumericT, ALIGNMENT > &  mat,
const viennacl::matrix_base< NumericT, F1 > &  d_mat,
viennacl::matrix_base< NumericT, F2 > &  result 
)

Carries out matrix-vector multiplication with a hyb_matrix.

Implementation of the convenience expression result = prod(mat, d_mat);

Parameters
matThe sparse matrix
d_matThe dense matrix (row- or column-major)
resultThe dense result matrix (row- or column-major)
void viennacl::linalg::cuda::prod_impl ( const viennacl::hyb_matrix< NumericT, ALIGNMENT > &  mat,
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT, F1 >, const viennacl::matrix_base< NumericT, F1 >, viennacl::op_trans > &  d_mat,
viennacl::matrix_base< NumericT, F2 > &  result 
)

Carries out matrix-vector multiplication with a hyb_matrix.

Implementation of the convenience expression result = prod(mat, trans(d_mat));

Parameters
matThe sparse matrix
d_matTransposed matrix proxy object for the rhs dense matrix (row- or column-major)
resultThe dense result matrix (row- or column-major)
void viennacl::linalg::cuda::prod_impl ( const matrix_base< NumericT, F1 > &  A,
const matrix_base< NumericT, F2 > &  B,
matrix_base< NumericT, F3 > &  C,
ScalarType  alpha,
ScalarType  beta 
)

Carries out matrix-matrix multiplication.

Implementation of C = prod(A, B);

void viennacl::linalg::cuda::prod_impl ( const viennacl::matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > &  A,
const matrix_base< NumericT, F2 > &  B,
matrix_base< NumericT, F3 > &  C,
ScalarType  alpha,
ScalarType  beta 
)

Carries out matrix-matrix multiplication.

Implementation of C = prod(trans(A), B);

void viennacl::linalg::cuda::prod_impl ( const matrix_base< NumericT, F1 > &  A,
const viennacl::matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > &  B,
matrix_base< NumericT, F3 > &  C,
ScalarType  alpha,
ScalarType  beta 
)

Carries out matrix-matrix multiplication.

Implementation of C = prod(A, trans(B));

void viennacl::linalg::cuda::prod_impl ( const viennacl::matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > &  A,
const viennacl::matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > &  B,
matrix_base< NumericT, F3 > &  C,
ScalarType  alpha,
ScalarType  beta 
)

Carries out matrix-matrix multiplication.

Implementation of C = prod(trans(A), trans(B));

__global__ void viennacl::linalg::cuda::scalar_swap_kernel ( T *  s1,
T *  s2 
)
__global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
val,
unsigned int  options2,
const T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2 
)
__global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  val,
unsigned int  options2,
const T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2 
)
__global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
val,
unsigned int  options2,
const T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2 
)
__global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel ( T *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const T *  val,
unsigned int  options2,
const T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const T *  vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2 
)
void viennacl::linalg::cuda::scaled_rank_1_update ( matrix_base< NumericT, F > &  mat1,
S1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
const vector_base< NumericT > &  vec1,
const vector_base< NumericT > &  vec2 
)

The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update.

Implementation of the convenience expression result += alpha * outer_prod(vec1, vec2);

Parameters
mat1The matrix to be updated
alphaThe scaling factor (either a viennacl::scalar<>, float, or double)
len_alphaLength of the buffer for an eventual final reduction step (currently always '1')
reciprocal_alphaUse 1/alpha instead of alpha
flip_sign_alphaUse -alpha instead of alpha
vec1The first vector
vec2The second vector
viennacl::enable_if< viennacl::is_scalar<S1>::value && viennacl::is_scalar<S2>::value >::type viennacl::linalg::cuda::swap ( S1 &  s1,
S2 &  s2 
)

Swaps the contents of two scalars, data is copied.

Parameters
s1The first scalar
s2The second scalar
__global__ void viennacl::linalg::cuda::trans_vec_mul_col_kernel ( const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
T *  result,
unsigned int  result_start,
unsigned int  result_inc,
unsigned int  result_size 
)
__global__ void viennacl::linalg::cuda::trans_vec_mul_row_kernel ( const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
T *  result,
unsigned int  result_start,
unsigned int  result_inc,
unsigned int  result_size 
)
__global__ void viennacl::linalg::cuda::triangular_substitute_inplace_col_kernel ( T const *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
T *  v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
unsigned int  options 
)
__global__ void viennacl::linalg::cuda::triangular_substitute_inplace_row_kernel ( T const *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
T *  v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
unsigned int  options 
)
__global__ void viennacl::linalg::cuda::vec_element_abs_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_acos_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_asin_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_atan_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_ceil_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_cos_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_cosh_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_exp_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_fabs_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_floor_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_log10_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_log_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_sin_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_sinh_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_sqrt_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_tan_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_element_tanh_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)
__global__ void viennacl::linalg::cuda::vec_mul_col_kernel ( const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
T *  result,
unsigned int  result_start,
unsigned int  result_inc,
unsigned int  result_size 
)
__global__ void viennacl::linalg::cuda::vec_mul_row_kernel ( const T *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const T *  v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
T *  result,
unsigned int  result_start,
unsigned int  result_inc,
unsigned int  result_size 
)
void viennacl::linalg::cuda::vector_assign ( vector_base< T > &  vec1,
const S1 &  alpha,
bool  up_to_internal_size = false 
)

Assign a constant value to a vector (-range/-slice)

Parameters
vec1The vector to which the value should be assigned
alphaThe value to be assigned
up_to_internal_sizeSpecifies whether alpha should also be written to padded memory (mostly used for clearing the whole buffer).
__global__ void viennacl::linalg::cuda::vector_assign_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  internal_size1,
alpha 
)
__global__ void viennacl::linalg::cuda::vector_multi_sum_kernel ( T const *  vec1,
T *  result,
unsigned int  start_result,
unsigned int  inc_result 
)
__global__ void viennacl::linalg::cuda::vector_sum_kernel_floats ( const T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  option,
T *  result 
)
__global__ void viennacl::linalg::cuda::vector_sum_kernel_integers ( const T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  option,
T *  result 
)
__global__ void viennacl::linalg::cuda::vector_sum_kernel_unsigned_integers ( const T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  option,
T *  result 
)
void viennacl::linalg::cuda::vector_swap ( vector_base< T > &  vec1,
vector_base< T > &  vec2 
)

Swaps the contents of two vectors, data is copied.

Parameters
vec1The first vector (or -range, or -slice)
vec2The second vector (or -range, or -slice)
__global__ void viennacl::linalg::cuda::vector_swap_kernel ( T *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
T *  vec2,
unsigned int  start2,
unsigned int  inc2 
)