ViennaCL - The Vienna Computing Library  1.5.1
ell_matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_ELL_MATRIX_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_ELL_MATRIX_HPP
3 
7 #include "viennacl/ocl/utils.hpp"
8 
10 
13 namespace viennacl
14 {
15  namespace linalg
16  {
17  namespace opencl
18  {
19  namespace kernels
20  {
21 
23 
24  template <typename StringType>
25  void generate_ell_vec_mul(StringType & source, std::string const & numeric_string)
26  {
27  source.append("__kernel void vec_mul( \n");
28  source.append(" __global const unsigned int * coords, \n");
29  source.append(" __global const "); source.append(numeric_string); source.append(" * elements, \n");
30  source.append(" __global const "); source.append(numeric_string); source.append(" * x, \n");
31  source.append(" uint4 layout_x, \n");
32  source.append(" __global "); source.append(numeric_string); source.append(" * result, \n");
33  source.append(" uint4 layout_result, \n");
34  source.append(" unsigned int row_num, \n");
35  source.append(" unsigned int col_num, \n");
36  source.append(" unsigned int internal_row_num, \n");
37  source.append(" unsigned int items_per_row, \n");
38  source.append(" unsigned int aligned_items_per_row) \n");
39  source.append("{ \n");
40  source.append(" uint glb_id = get_global_id(0); \n");
41  source.append(" uint glb_sz = get_global_size(0); \n");
42 
43  source.append(" for(uint row_id = glb_id; row_id < row_num; row_id += glb_sz) { \n");
44  source.append(" "); source.append(numeric_string); source.append(" sum = 0; \n");
45 
46  source.append(" uint offset = row_id; \n");
47  source.append(" for(uint item_id = 0; item_id < items_per_row; item_id++, offset += internal_row_num) { \n");
48  source.append(" "); source.append(numeric_string); source.append(" val = elements[offset]; \n");
49 
50  source.append(" if(val != 0.0f) { \n");
51  source.append(" int col = coords[offset]; \n");
52  source.append(" sum += (x[col * layout_x.y + layout_x.x] * val); \n");
53  source.append(" } \n");
54 
55  source.append(" } \n");
56 
57  source.append(" result[row_id * layout_result.y + layout_result.x] = sum; \n");
58  source.append(" } \n");
59  source.append("} \n");
60  }
61 
62  namespace detail
63  {
64  template <typename StringType>
65  void generate_ell_matrix_dense_matrix_mul(StringType & source, std::string const & numeric_string,
66  bool B_transposed, bool B_row_major, bool C_row_major)
67  {
68  source.append("__kernel void ");
69  source.append(viennacl::linalg::opencl::detail::sparse_dense_matmult_kernel_name(B_transposed, B_row_major, C_row_major));
70  source.append("( \n");
71  source.append(" __global const unsigned int * sp_mat_coords, \n");
72  source.append(" __global const "); source.append(numeric_string); source.append(" * sp_mat_elems, \n");
73  source.append(" unsigned int sp_mat_row_num, \n");
74  source.append(" unsigned int sp_mat_col_num, \n");
75  source.append(" unsigned int sp_mat_internal_row_num, \n");
76  source.append(" unsigned int sp_mat_items_per_row, \n");
77  source.append(" unsigned int sp_mat_aligned_items_per_row, \n");
78  source.append(" __global const "); source.append(numeric_string); source.append("* d_mat, \n");
79  source.append(" unsigned int d_mat_row_start, \n");
80  source.append(" unsigned int d_mat_col_start, \n");
81  source.append(" unsigned int d_mat_row_inc, \n");
82  source.append(" unsigned int d_mat_col_inc, \n");
83  source.append(" unsigned int d_mat_row_size, \n");
84  source.append(" unsigned int d_mat_col_size, \n");
85  source.append(" unsigned int d_mat_internal_rows, \n");
86  source.append(" unsigned int d_mat_internal_cols, \n");
87  source.append(" __global "); source.append(numeric_string); source.append(" * result, \n");
88  source.append(" unsigned int result_row_start, \n");
89  source.append(" unsigned int result_col_start, \n");
90  source.append(" unsigned int result_row_inc, \n");
91  source.append(" unsigned int result_col_inc, \n");
92  source.append(" unsigned int result_row_size, \n");
93  source.append(" unsigned int result_col_size, \n");
94  source.append(" unsigned int result_internal_rows, \n");
95  source.append(" unsigned int result_internal_cols) { \n");
96 
97  source.append(" uint glb_id = get_global_id(0); \n");
98  source.append(" uint glb_sz = get_global_size(0); \n");
99 
100  source.append(" for( uint rc = glb_id; rc < (sp_mat_row_num * result_col_size); rc += glb_sz) { \n");
101  source.append(" uint row = rc % sp_mat_row_num; \n");
102  source.append(" uint col = rc / sp_mat_row_num; \n");
103 
104  source.append(" uint offset = row; \n");
105  source.append(" "); source.append(numeric_string); source.append(" r = ("); source.append(numeric_string); source.append(")0; \n");
106 
107  source.append(" for( uint k = 0; k < sp_mat_items_per_row; k++, offset += sp_mat_internal_row_num) { \n");
108 
109  source.append(" uint j = sp_mat_coords[offset]; \n");
110  source.append(" "); source.append(numeric_string); source.append(" x = sp_mat_elems[offset]; \n");
111 
112  source.append(" if(x != ("); source.append(numeric_string); source.append(")0) { \n");
113  source.append(" "); source.append(numeric_string);
114  if (B_transposed && B_row_major)
115  source.append(" y = d_mat[ (d_mat_row_start + col * d_mat_row_inc) * d_mat_internal_cols + d_mat_col_start + j * d_mat_col_inc ]; \n");
116  else if (B_transposed && !B_row_major)
117  source.append(" y = d_mat[ (d_mat_row_start + col * d_mat_row_inc) + (d_mat_col_start + j * d_mat_col_inc) * d_mat_internal_rows ]; \n");
118  else if (!B_transposed && B_row_major)
119  source.append(" y = d_mat[ (d_mat_row_start + j * d_mat_row_inc) * d_mat_internal_cols + d_mat_col_start + col * d_mat_col_inc ]; \n");
120  else
121  source.append(" y = d_mat[ (d_mat_row_start + j * d_mat_row_inc) + (d_mat_col_start + col * d_mat_col_inc) * d_mat_internal_rows ]; \n");
122 
123  source.append(" r += x*y; \n");
124  source.append(" } \n");
125  source.append(" } \n");
126 
127  if (C_row_major)
128  source.append(" result[ (result_row_start + row * result_row_inc) * result_internal_cols + result_col_start + col * result_col_inc ] = r; \n");
129  else
130  source.append(" result[ (result_row_start + row * result_row_inc) + (result_col_start + col * result_col_inc) * result_internal_rows ] = r; \n");
131  source.append(" } \n");
132  source.append("} \n");
133 
134  }
135  }
136 
137  template <typename StringType>
138  void generate_ell_matrix_dense_matrix_multiplication(StringType & source, std::string const & numeric_string)
139  {
140  detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, false, false, false);
141  detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, false, false, true);
142  detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, false, true, false);
143  detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, false, true, true);
144 
145  detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, true, false, false);
146  detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, true, false, true);
147  detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, true, true, false);
148  detail::generate_ell_matrix_dense_matrix_mul(source, numeric_string, true, true, true);
149  }
150 
152 
153  // main kernel class
155  template <typename NumericT>
156  struct ell_matrix
157  {
158  static std::string program_name()
159  {
160  return viennacl::ocl::type_to_string<NumericT>::apply() + "_ell_matrix";
161  }
162 
163  static void init(viennacl::ocl::context & ctx)
164  {
166  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
167 
168  static std::map<cl_context, bool> init_done;
169  if (!init_done[ctx.handle().get()])
170  {
171  std::string source;
172  source.reserve(1024);
173 
174  viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
175 
176  // fully parametrized kernels:
177  generate_ell_vec_mul(source, numeric_string);
178  generate_ell_matrix_dense_matrix_multiplication(source, numeric_string);
179 
180  std::string prog_name = program_name();
181  #ifdef VIENNACL_BUILD_INFO
182  std::cout << "Creating program " << prog_name << std::endl;
183  #endif
184  ctx.add_program(source, prog_name);
185  init_done[ctx.handle().get()] = true;
186  } //if
187  } //init
188  };
189 
190  } // namespace kernels
191  } // namespace opencl
192  } // namespace linalg
193 } // namespace viennacl
194 #endif
195 
Implements a OpenCL platform within ViennaCL.
void generate_ell_matrix_dense_matrix_multiplication(StringType &source, std::string const &numeric_string)
Definition: ell_matrix.hpp:138
Common implementations shared by OpenCL-based operations.
Various little tools used here and there in ViennaCL.
std::string sparse_dense_matmult_kernel_name(bool B_transposed, bool B_row_major, bool C_row_major)
Returns the OpenCL kernel string for the operation C = A * B with A sparse, B, C dense matrices...
Definition: common.hpp:46
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
Provides OpenCL-related utilities.
Main kernel class for generating OpenCL kernels for ell_matrix.
Definition: ell_matrix.hpp:156
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:476
static std::string program_name()
Definition: ell_matrix.hpp:158
const OCL_TYPE & get() const
Definition: handle.hpp:189
void generate_ell_vec_mul(StringType &source, std::string const &numeric_string)
Definition: ell_matrix.hpp:25
static void init(viennacl::ocl::context &ctx)
Definition: ell_matrix.hpp:163
void generate_ell_matrix_dense_matrix_mul(StringType &source, std::string const &numeric_string, bool B_transposed, bool B_row_major, bool C_row_major)
Definition: ell_matrix.hpp:65
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
Representation of an OpenCL kernel in ViennaCL.
Helper class for converting a type to its string representation.
Definition: utils.hpp:57