ViennaCL - The Vienna Computing Library  1.5.1
svd.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_SVD_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_SVD_HPP
3 
7 #include "viennacl/ocl/utils.hpp"
8 
11 namespace viennacl
12 {
13  namespace linalg
14  {
15  namespace opencl
16  {
17  namespace kernels
18  {
19  template <typename StringType>
20  void generate_svd_bidiag_pack(StringType & source, std::string const & numeric_string)
21  {
22  source.append("__kernel void bidiag_pack(__global "); source.append(numeric_string); source.append("* A, \n");
23  source.append(" __global "); source.append(numeric_string); source.append("* D, \n");
24  source.append(" __global "); source.append(numeric_string); source.append("* S, \n");
25  source.append(" uint size1, \n");
26  source.append(" uint size2, \n");
27  source.append(" uint stride \n");
28  source.append(") { \n");
29  source.append(" uint size = min(size1, size2); \n");
30 
31  source.append(" if(get_global_id(0) == 0) \n");
32  source.append(" S[0] = 0; \n");
33 
34  source.append(" for(uint i = get_global_id(0); i < size ; i += get_global_size(0)) { \n");
35  source.append(" D[i] = A[i*stride + i]; \n");
36  source.append(" S[i + 1] = (i + 1 < size2) ? A[i*stride + (i + 1)] : 0; \n");
37  source.append(" } \n");
38  source.append("} \n");
39  }
40 
41  template <typename StringType>
42  void generate_svd_col_reduce_lcl_array(StringType & source, std::string const & numeric_string)
43  {
44  // calculates a sum of local array elements
45  source.append("void col_reduce_lcl_array(__local "); source.append(numeric_string); source.append("* sums, uint lcl_id, uint lcl_sz) { \n");
46  source.append(" uint step = lcl_sz >> 1; \n");
47 
48  source.append(" while(step > 0) { \n");
49  source.append(" if(lcl_id < step) { \n");
50  source.append(" sums[lcl_id] += sums[lcl_id + step]; \n");
51  source.append(" } \n");
52  source.append(" step >>= 1; \n");
53  source.append(" barrier(CLK_LOCAL_MEM_FENCE); \n");
54  source.append(" } \n");
55  source.append("} \n");
56  }
57 
58  template <typename StringType>
59  void generate_svd_copy_col(StringType & source, std::string const & numeric_string)
60  {
61  // probably, this is a ugly way
62  source.append("__kernel void copy_col(__global "); source.append(numeric_string); source.append("* A, \n");
63  source.append(" __global "); source.append(numeric_string); source.append("* V, \n");
64  source.append(" uint row_start, \n");
65  source.append(" uint col_start, \n");
66  source.append(" uint size, \n");
67  source.append(" uint stride \n");
68  source.append(" ) { \n");
69  source.append(" uint glb_id = get_global_id(0); \n");
70  source.append(" uint glb_sz = get_global_size(0); \n");
71 
72  source.append(" for(uint i = row_start + glb_id; i < size; i += glb_sz) { \n");
73  source.append(" V[i - row_start] = A[i * stride + col_start]; \n");
74  source.append(" } \n");
75  source.append("} \n");
76  }
77 
78  template <typename StringType>
79  void generate_svd_copy_row(StringType & source, std::string const & numeric_string)
80  {
81  // probably, this is too
82  source.append("__kernel void copy_row(__global "); source.append(numeric_string); source.append("* A, \n");
83  source.append(" __global "); source.append(numeric_string); source.append("* V, \n");
84  source.append(" uint row_start, \n");
85  source.append(" uint col_start, \n");
86  source.append(" uint size, \n");
87  source.append(" uint stride \n");
88  source.append(" ) { \n");
89  source.append(" uint glb_id = get_global_id(0); \n");
90  source.append(" uint glb_sz = get_global_size(0); \n");
91 
92  source.append(" for(uint i = col_start + glb_id; i < size; i += glb_sz) { \n");
93  source.append(" V[i - col_start] = A[row_start * stride + i]; \n");
94  source.append(" } \n");
95  source.append("} \n");
96  }
97 
98  template <typename StringType>
99  void generate_svd_final_iter_update(StringType & source, std::string const & numeric_string)
100  {
101  source.append("__kernel void final_iter_update(__global "); source.append(numeric_string); source.append("* A, \n");
102  source.append(" uint stride, \n");
103  source.append(" uint n, \n");
104  source.append(" uint last_n, \n");
105  source.append(" "); source.append(numeric_string); source.append(" q, \n");
106  source.append(" "); source.append(numeric_string); source.append(" p \n");
107  source.append(" ) \n");
108  source.append("{ \n");
109  source.append(" uint glb_id = get_global_id(0); \n");
110  source.append(" uint glb_sz = get_global_size(0); \n");
111 
112  source.append(" for (uint px = glb_id; px < last_n; px += glb_sz) \n");
113  source.append(" { \n");
114  source.append(" "); source.append(numeric_string); source.append(" v_in = A[n * stride + px]; \n");
115  source.append(" "); source.append(numeric_string); source.append(" z = A[(n - 1) * stride + px]; \n");
116  source.append(" A[(n - 1) * stride + px] = q * z + p * v_in; \n");
117  source.append(" A[n * stride + px] = q * v_in - p * z; \n");
118  source.append(" } \n");
119  source.append("} \n");
120  }
121 
122  template <typename StringType>
123  void generate_svd_givens_next(StringType & source, std::string const & numeric_string)
124  {
125  source.append("__kernel void givens_next(__global "); source.append(numeric_string); source.append("* matr, \n");
126  source.append(" __global "); source.append(numeric_string); source.append("* cs, \n");
127  source.append(" __global "); source.append(numeric_string); source.append("* ss, \n");
128  source.append(" uint size, \n");
129  source.append(" uint stride, \n");
130  source.append(" uint start_i, \n");
131  source.append(" uint end_i \n");
132  source.append(" ) \n");
133  source.append("{ \n");
134  source.append(" uint glb_id = get_global_id(0); \n");
135  source.append(" uint glb_sz = get_global_size(0); \n");
136 
137  source.append(" uint lcl_id = get_local_id(0); \n");
138  source.append(" uint lcl_sz = get_local_size(0); \n");
139 
140  source.append(" uint j = glb_id; \n");
141 
142  source.append(" __local "); source.append(numeric_string); source.append(" cs_lcl[256]; \n");
143  source.append(" __local "); source.append(numeric_string); source.append(" ss_lcl[256]; \n");
144 
145  source.append(" "); source.append(numeric_string); source.append(" x = (j < size) ? matr[(end_i + 1) * stride + j] : 0; \n");
146 
147  source.append(" uint elems_num = end_i - start_i + 1; \n");
148  source.append(" uint block_num = (elems_num + lcl_sz - 1) / lcl_sz; \n");
149 
150  source.append(" for(uint block_id = 0; block_id < block_num; block_id++) \n");
151  source.append(" { \n");
152  source.append(" uint to = min(elems_num - block_id * lcl_sz, lcl_sz); \n");
153 
154  source.append(" if(lcl_id < to) \n");
155  source.append(" { \n");
156  source.append(" cs_lcl[lcl_id] = cs[end_i - (lcl_id + block_id * lcl_sz)]; \n");
157  source.append(" ss_lcl[lcl_id] = ss[end_i - (lcl_id + block_id * lcl_sz)]; \n");
158  source.append(" } \n");
159 
160  source.append(" barrier(CLK_LOCAL_MEM_FENCE); \n");
161 
162  source.append(" if(j < size) \n");
163  source.append(" { \n");
164  source.append(" for(uint ind = 0; ind < to; ind++) \n");
165  source.append(" { \n");
166  source.append(" uint i = end_i - (ind + block_id * lcl_sz); \n");
167 
168  source.append(" "); source.append(numeric_string); source.append(" z = matr[i * stride + j]; \n");
169 
170  source.append(" "); source.append(numeric_string); source.append(" cs_val = cs_lcl[ind]; \n");
171  source.append(" "); source.append(numeric_string); source.append(" ss_val = ss_lcl[ind]; \n");
172 
173  source.append(" matr[(i + 1) * stride + j] = x * cs_val + z * ss_val; \n");
174  source.append(" x = -x * ss_val + z * cs_val; \n");
175  source.append(" } \n");
176  source.append(" } \n");
177  source.append(" barrier(CLK_LOCAL_MEM_FENCE); \n");
178  source.append(" } \n");
179  source.append(" if(j < size) \n");
180  source.append(" matr[(start_i) * stride + j] = x; \n");
181  source.append("} \n");
182  }
183 
184  template <typename StringType>
185  void generate_svd_givens_prev(StringType & source, std::string const & numeric_string)
186  {
187  source.append("__kernel void givens_prev(__global "); source.append(numeric_string); source.append("* matr, \n");
188  source.append(" __global "); source.append(numeric_string); source.append("* cs, \n");
189  source.append(" __global "); source.append(numeric_string); source.append("* ss, \n");
190  source.append(" uint size, \n");
191  source.append(" uint stride, \n");
192  source.append(" uint start_i, \n");
193  source.append(" uint end_i \n");
194  source.append(" ) \n");
195  source.append("{ \n");
196  source.append(" uint glb_id = get_global_id(0); \n");
197  source.append(" uint glb_sz = get_global_size(0); \n");
198 
199  source.append(" uint lcl_id = get_local_id(0); \n");
200  source.append(" uint lcl_sz = get_local_size(0); \n");
201 
202  source.append(" uint j = glb_id; \n");
203 
204  source.append(" __local "); source.append(numeric_string); source.append(" cs_lcl[256]; \n");
205  source.append(" __local "); source.append(numeric_string); source.append(" ss_lcl[256]; \n");
206 
207  source.append(" "); source.append(numeric_string); source.append(" x = (j < size) ? matr[(start_i - 1) * stride + j] : 0; \n");
208 
209  source.append(" uint elems_num = end_i - start_i; \n");
210  source.append(" uint block_num = (elems_num + lcl_sz - 1) / lcl_sz; \n");
211 
212  source.append(" for(uint block_id = 0; block_id < block_num; block_id++) \n");
213  source.append(" { \n");
214  source.append(" uint to = min(elems_num - block_id * lcl_sz, lcl_sz); \n");
215 
216  source.append(" if(lcl_id < to) \n");
217  source.append(" { \n");
218  source.append(" cs_lcl[lcl_id] = cs[lcl_id + start_i + block_id * lcl_sz]; \n");
219  source.append(" ss_lcl[lcl_id] = ss[lcl_id + start_i + block_id * lcl_sz]; \n");
220  source.append(" } \n");
221 
222  source.append(" barrier(CLK_LOCAL_MEM_FENCE); \n");
223 
224  source.append(" if(j < size) \n");
225  source.append(" { \n");
226  source.append(" for(uint ind = 0; ind < to; ind++) \n");
227  source.append(" { \n");
228  source.append(" uint i = ind + start_i + block_id * lcl_sz; \n");
229 
230  source.append(" "); source.append(numeric_string); source.append(" z = matr[i * stride + j]; \n");
231 
232  source.append(" "); source.append(numeric_string); source.append(" cs_val = cs_lcl[ind];//cs[i]; \n");
233  source.append(" "); source.append(numeric_string); source.append(" ss_val = ss_lcl[ind];//ss[i]; \n");
234 
235  source.append(" matr[(i - 1) * stride + j] = x * cs_val + z * ss_val; \n");
236  source.append(" x = -x * ss_val + z * cs_val; \n");
237  source.append(" } \n");
238  source.append(" } \n");
239  source.append(" barrier(CLK_LOCAL_MEM_FENCE); \n");
240  source.append(" } \n");
241  source.append(" if(j < size) \n");
242  source.append(" matr[(end_i - 1) * stride + j] = x; \n");
243  source.append("} \n");
244  }
245 
246  template <typename StringType>
247  void generate_svd_house_update_A_left(StringType & source, std::string const & numeric_string)
248  {
249  source.append("__kernel void house_update_A_left( \n");
250  source.append(" __global "); source.append(numeric_string); source.append("* A, \n");
251  source.append(" __constant "); source.append(numeric_string); source.append("* V, \n"); //householder vector
252  source.append(" uint row_start, \n");
253  source.append(" uint col_start, \n");
254  source.append(" uint size1, \n");
255  source.append(" uint size2, \n");
256  source.append(" uint stride, \n");
257  source.append(" __local "); source.append(numeric_string); source.append("* sums \n");
258  source.append(" ) { \n");
259  source.append(" uint glb_id = get_global_id(0); \n");
260  source.append(" uint glb_sz = get_global_size(0); \n");
261 
262  source.append(" uint grp_id = get_group_id(0); \n");
263  source.append(" uint grp_nm = get_num_groups(0); \n");
264 
265  source.append(" uint lcl_id = get_local_id(0); \n");
266  source.append(" uint lcl_sz = get_local_size(0); \n");
267 
268  source.append(" "); source.append(numeric_string); source.append(" ss = 0; \n");
269 
270  // doing it in slightly different way to avoid cache misses
271  source.append(" for(uint i = glb_id + col_start; i < size2; i += glb_sz) { \n");
272  source.append(" ss = 0; \n");
273  source.append(" for(uint j = row_start; j < size1; j++) ss = ss + (V[j] * A[j * stride + i]); \n");
274 
275  source.append(" for(uint j = row_start; j < size1; j++) \n");
276  source.append(" A[j * stride + i] = A[j * stride + i] - (2 * V[j] * ss); \n");
277  source.append(" } \n");
278  source.append("} \n");
279  }
280 
281  template <typename StringType>
282  void generate_svd_house_update_A_right(StringType & source, std::string const & numeric_string)
283  {
284 
285  source.append("__kernel void house_update_A_right( \n");
286  source.append(" __global "); source.append(numeric_string); source.append("* A, \n");
287  source.append(" __global "); source.append(numeric_string); source.append("* V, \n"); // householder vector
288  source.append(" uint row_start, \n");
289  source.append(" uint col_start, \n");
290  source.append(" uint size1, \n");
291  source.append(" uint size2, \n");
292  source.append(" uint stride, \n");
293  source.append(" __local "); source.append(numeric_string); source.append("* sums \n");
294  source.append(" ) { \n");
295 
296  source.append(" uint glb_id = get_global_id(0); \n");
297 
298  source.append(" uint grp_id = get_group_id(0); \n");
299  source.append(" uint grp_nm = get_num_groups(0); \n");
300 
301  source.append(" uint lcl_id = get_local_id(0); \n");
302  source.append(" uint lcl_sz = get_local_size(0); \n");
303 
304  source.append(" "); source.append(numeric_string); source.append(" ss = 0; \n");
305 
306  // update of A matrix
307  source.append(" for(uint i = grp_id + row_start; i < size1; i += grp_nm) { \n");
308  source.append(" ss = 0; \n");
309 
310  source.append(" for(uint j = lcl_id; j < size2; j += lcl_sz) ss = ss + (V[j] * A[i * stride + j]); \n");
311  source.append(" sums[lcl_id] = ss; \n");
312 
313  source.append(" barrier(CLK_LOCAL_MEM_FENCE); \n");
314  source.append(" col_reduce_lcl_array(sums, lcl_id, lcl_sz); \n");
315  source.append(" barrier(CLK_LOCAL_MEM_FENCE); \n");
316 
317  source.append(" "); source.append(numeric_string); source.append(" sum_Av = sums[0]; \n");
318 
319  source.append(" for(uint j = lcl_id; j < size2; j += lcl_sz) \n");
320  source.append(" A[i * stride + j] = A[i * stride + j] - (2 * V[j] * sum_Av); \n");
321  source.append(" } \n");
322  source.append("} \n");
323 
324  }
325 
326  template <typename StringType>
327  void generate_svd_house_update_QL(StringType & source, std::string const & numeric_string)
328  {
329  source.append("__kernel void house_update_QL( \n");
330  source.append(" __global "); source.append(numeric_string); source.append("* QL, \n");
331  source.append(" __constant "); source.append(numeric_string); source.append("* V, \n"); //householder vector
332  source.append(" uint size1, \n");
333  source.append(" uint size2, \n");
334  source.append(" uint strideQ, \n");
335  source.append(" __local "); source.append(numeric_string); source.append("* sums \n");
336  source.append(" ) { \n");
337  source.append(" uint glb_id = get_global_id(0); \n");
338  source.append(" uint glb_sz = get_global_size(0); \n");
339 
340  source.append(" uint grp_id = get_group_id(0); \n");
341  source.append(" uint grp_nm = get_num_groups(0); \n");
342 
343  source.append(" uint lcl_id = get_local_id(0); \n");
344  source.append(" uint lcl_sz = get_local_size(0); \n");
345 
346  source.append(" "); source.append(numeric_string); source.append(" ss = 0; \n");
347  // update of left matrix
348  source.append(" for(uint i = grp_id; i < size1; i += grp_nm) { \n");
349  source.append(" ss = 0; \n");
350  source.append(" for(uint j = lcl_id; j < size1; j += lcl_sz) ss = ss + (V[j] * QL[i * strideQ + j]); \n");
351  source.append(" sums[lcl_id] = ss; \n");
352 
353  source.append(" barrier(CLK_LOCAL_MEM_FENCE); \n");
354  source.append(" col_reduce_lcl_array(sums, lcl_id, lcl_sz); \n");
355  source.append(" barrier(CLK_LOCAL_MEM_FENCE); \n");
356 
357  source.append(" "); source.append(numeric_string); source.append(" sum_Qv = sums[0]; \n");
358 
359  source.append(" for(uint j = lcl_id; j < size1; j += lcl_sz) \n");
360  source.append(" QL[i * strideQ + j] = QL[i * strideQ + j] - (2 * V[j] * sum_Qv); \n");
361  source.append(" } \n");
362  source.append("} \n");
363 
364  }
365 
366  template <typename StringType>
367  void generate_svd_house_update_QR(StringType & source, std::string const & numeric_string)
368  {
369  source.append("__kernel void house_update_QR( \n");
370  source.append(" __global "); source.append(numeric_string); source.append("* QR, \n");
371  source.append(" __global "); source.append(numeric_string); source.append("* V, \n"); // householder vector
372  source.append(" uint size1, \n");
373  source.append(" uint size2, \n");
374  source.append(" uint strideQ, \n");
375  source.append(" __local "); source.append(numeric_string); source.append("* sums \n");
376  source.append(" ) { \n");
377 
378  source.append(" uint glb_id = get_global_id(0); \n");
379 
380  source.append(" uint grp_id = get_group_id(0); \n");
381  source.append(" uint grp_nm = get_num_groups(0); \n");
382 
383  source.append(" uint lcl_id = get_local_id(0); \n");
384  source.append(" uint lcl_sz = get_local_size(0); \n");
385 
386  source.append(" "); source.append(numeric_string); source.append(" ss = 0; \n");
387 
388  // update of QR matrix
389  // Actually, we are calculating a transpose of right matrix. This allows to avoid cache
390  // misses.
391  source.append(" for(uint i = grp_id; i < size2; i += grp_nm) { \n");
392  source.append(" ss = 0; \n");
393  source.append(" for(uint j = lcl_id; j < size2; j += lcl_sz) ss = ss + (V[j] * QR[i * strideQ + j]); \n");
394  source.append(" sums[lcl_id] = ss; \n");
395 
396  source.append(" barrier(CLK_LOCAL_MEM_FENCE); \n");
397  source.append(" col_reduce_lcl_array(sums, lcl_id, lcl_sz); \n");
398  source.append(" barrier(CLK_LOCAL_MEM_FENCE); \n");
399 
400  source.append(" "); source.append(numeric_string); source.append(" sum_Qv = sums[0]; \n");
401  source.append(" for(uint j = lcl_id; j < size2; j += lcl_sz) \n");
402  source.append(" QR[i * strideQ + j] = QR[i * strideQ + j] - (2 * V[j] * sum_Qv); \n");
403  source.append(" } \n");
404  source.append("} \n");
405  }
406 
407  template <typename StringType>
408  void generate_svd_inverse_signs(StringType & source, std::string const & numeric_string)
409  {
410  source.append("__kernel void inverse_signs(__global "); source.append(numeric_string); source.append("* v, \n");
411  source.append(" __global "); source.append(numeric_string); source.append("* signs, \n");
412  source.append(" uint size, \n");
413  source.append(" uint stride \n");
414  source.append(" ) \n");
415  source.append("{ \n");
416  source.append(" uint glb_id_x = get_global_id(0); \n");
417  source.append(" uint glb_id_y = get_global_id(1); \n");
418 
419  source.append(" if((glb_id_x < size) && (glb_id_y < size)) \n");
420  source.append(" v[glb_id_x * stride + glb_id_y] *= signs[glb_id_x]; \n");
421  source.append("} \n");
422 
423  }
424 
425  template <typename StringType>
426  void generate_svd_transpose_inplace(StringType & source, std::string const & numeric_string)
427  {
428 
429  source.append("__kernel void transpose_inplace(__global "); source.append(numeric_string); source.append("* input, \n");
430  source.append(" unsigned int row_num, \n");
431  source.append(" unsigned int col_num) { \n");
432  source.append(" unsigned int size = row_num * col_num; \n");
433  source.append(" for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) { \n");
434  source.append(" unsigned int row = i / col_num; \n");
435  source.append(" unsigned int col = i - row*col_num; \n");
436 
437  source.append(" unsigned int new_pos = col * row_num + row; \n");
438 
439  //new_pos = (col < row) ? 0 : 1;
440  //input[i] = new_pos;
441 
442  source.append(" if(i < new_pos) { \n");
443  source.append(" "); source.append(numeric_string); source.append(" val = input[i]; \n");
444  source.append(" input[i] = input[new_pos]; \n");
445  source.append(" input[new_pos] = val; \n");
446  source.append(" } \n");
447  source.append(" } \n");
448  source.append("} \n");
449 
450  }
451 
452  template <typename StringType>
453  void generate_svd_update_qr_column(StringType & source, std::string const & numeric_string)
454  {
455  source.append("__kernel void update_qr_column(__global "); source.append(numeric_string); source.append("* A, \n");
456  source.append(" uint stride, \n");
457  source.append(" __global "); source.append(numeric_string); source.append("* buf, \n");
458  source.append(" int m, \n");
459  source.append(" int n, \n");
460  source.append(" int last_n) \n");
461  source.append("{ \n");
462  source.append(" uint glb_id = get_global_id(0); \n");
463  source.append(" uint glb_sz = get_global_size(0); \n");
464 
465  source.append(" for (int i = glb_id; i < last_n; i += glb_sz) \n");
466  source.append(" { \n");
467  source.append(" "); source.append(numeric_string); source.append(" a_ik = A[m * stride + i], a_ik_1, a_ik_2; \n");
468 
469  source.append(" a_ik_1 = A[(m + 1) * stride + i]; \n");
470 
471  source.append(" for(int k = m; k < n; k++) \n");
472  source.append(" { \n");
473  source.append(" bool notlast = (k != n - 1); \n");
474 
475  source.append(" "); source.append(numeric_string); source.append(" p = buf[5 * k] * a_ik + buf[5 * k + 1] * a_ik_1; \n");
476 
477  source.append(" if (notlast) \n");
478  source.append(" { \n");
479  source.append(" a_ik_2 = A[(k + 2) * stride + i]; \n");
480  source.append(" p = p + buf[5 * k + 2] * a_ik_2; \n");
481  source.append(" a_ik_2 = a_ik_2 - p * buf[5 * k + 4]; \n");
482  source.append(" } \n");
483 
484  source.append(" A[k * stride + i] = a_ik - p; \n");
485  source.append(" a_ik_1 = a_ik_1 - p * buf[5 * k + 3]; \n");
486 
487  source.append(" a_ik = a_ik_1; \n");
488  source.append(" a_ik_1 = a_ik_2; \n");
489  source.append(" } \n");
490 
491  source.append(" A[n * stride + i] = a_ik; \n");
492  source.append(" } \n");
493 
494  source.append("} \n");
495  }
496 
497 
498 
499 
500  // main kernel class
502  template <class NumericT>
503  struct svd
504  {
505  static std::string program_name()
506  {
508  }
509 
510  static void init(viennacl::ocl::context & ctx)
511  {
513  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
514 
515  static std::map<cl_context, bool> init_done;
516  if (!init_done[ctx.handle().get()])
517  {
518  std::string source;
519  source.reserve(1024);
520 
521  viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
522 
523  // only generate for floating points (forces error for integers)
524  if (numeric_string == "float" || numeric_string == "double")
525  {
526  //helper function used by multiple kernels:
527  generate_svd_col_reduce_lcl_array(source, numeric_string);
528 
529  //kernels:
530  generate_svd_bidiag_pack(source, numeric_string);
531  generate_svd_copy_col(source, numeric_string);
532  generate_svd_copy_row(source, numeric_string);
533  generate_svd_final_iter_update(source, numeric_string);
534  generate_svd_givens_next(source, numeric_string);
535  generate_svd_givens_prev(source, numeric_string);
536  generate_svd_house_update_A_left(source, numeric_string);
537  generate_svd_house_update_A_right(source, numeric_string);
538  generate_svd_house_update_QL(source, numeric_string);
539  generate_svd_house_update_QR(source, numeric_string);
540  generate_svd_inverse_signs(source, numeric_string);
541  generate_svd_transpose_inplace(source, numeric_string);
542  generate_svd_update_qr_column(source, numeric_string);
543  }
544 
545  std::string prog_name = program_name();
546  #ifdef VIENNACL_BUILD_INFO
547  std::cout << "Creating program " << prog_name << std::endl;
548  #endif
549  ctx.add_program(source, prog_name);
550  init_done[ctx.handle().get()] = true;
551  } //if
552  } //init
553  };
554 
555  } // namespace kernels
556  } // namespace opencl
557  } // namespace linalg
558 } // namespace viennacl
559 #endif
560 
void generate_svd_givens_next(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:123
Implements a OpenCL platform within ViennaCL.
static std::string program_name()
Definition: svd.hpp:505
Various little tools used here and there in ViennaCL.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
void generate_svd_copy_col(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:59
Provides OpenCL-related utilities.
void generate_svd_copy_row(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:79
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:476
static void init(viennacl::ocl::context &ctx)
Definition: svd.hpp:510
void generate_svd_bidiag_pack(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:20
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
Definition: svd.hpp:503
const OCL_TYPE & get() const
Definition: handle.hpp:189
void generate_svd_house_update_A_right(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:282
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
void generate_svd_update_qr_column(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:453
void generate_svd_inverse_signs(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:408
void generate_svd_house_update_QR(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:367
void generate_svd_col_reduce_lcl_array(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:42
Representation of an OpenCL kernel in ViennaCL.
void generate_svd_final_iter_update(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:99
void generate_svd_givens_prev(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:185
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
void generate_svd_house_update_QL(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:327
void generate_svd_house_update_A_left(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:247
void generate_svd_transpose_inplace(StringType &source, std::string const &numeric_string)
Definition: svd.hpp:426