ViennaCL - The Vienna Computing Library  1.5.1
profile_base.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_GENERATOR_GENERATE_TEMPLATE_BASE_BASE
2 #define VIENNACL_GENERATOR_GENERATE_TEMPLATE_BASE_BASE
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 
27 #include <list>
28 #include <set>
29 
30 #include "viennacl/ocl/backend.hpp"
31 #include "viennacl/ocl/kernel.hpp"
32 #include "viennacl/ocl/device.hpp"
34 #include "viennacl/ocl/infos.hpp"
35 
37 
40 
41 namespace viennacl{
42 
43  namespace generator{
44 
45 
47  class profile_base{
48  public:
49  typedef std::list< std::pair<scheduler::statement, scheduler::statement_node> > statements_type;
50 
51  protected:
52  friend std::ostream & operator<<(std::ostream &, profile_base const &);
53 
54  virtual bool invalid_impl(viennacl::ocl::device const & /*dev*/, vcl_size_t /*scalartype_size*/) const { return false; }
55  virtual bool is_slow_impl(viennacl::ocl::device const &) const { return false; }
56 
57  virtual vcl_size_t lmem_used(vcl_size_t /*scalartype_size*/) const { return 0; }
58 
59  void configure_local_sizes(viennacl::ocl::kernel & k, vcl_size_t /*kernel_id*/) const {
62  }
63 
64  virtual void print(std::ostream & s) const{
65  s << csv_representation();
66  }
67 
75  virtual void core(vcl_size_t kernel_id, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const = 0;
76 
77  public:
79  profile_base(unsigned int vectorization, vcl_size_t local_size_1, vcl_size_t local_size_2, vcl_size_t num_kernels) : vector_size_(vectorization), local_size_1_(local_size_1), local_size_2_(local_size_2), num_kernels_(num_kernels){ }
80 
82  virtual ~profile_base(){ }
83 
85  virtual void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const = 0;
86 
87  virtual void kernel_arguments(statements_type const & statements, std::string & arguments_string) const = 0;
88 
90  unsigned int vector_size() const { return vector_size_; }
91 
95  virtual std::string csv_representation() const = 0;
96 
99  bool is_slow(viennacl::ocl::device const & dev) const{
100  bool res = false;
101  if(dev.type()==CL_DEVICE_TYPE_GPU){
102  vcl_size_t warp_size = 32;
103  if(dev.vendor_id()==4098)
104  warp_size = 64;
105  res = static_cast<bool>(((local_size_1_*local_size_2_)%warp_size)>0);
106  }
107  return res || is_slow_impl(dev);
108  }
109 
114  bool is_invalid(viennacl::ocl::device const & dev, vcl_size_t scalartype_size) const{
115  //Query device informations
116  vcl_size_t lmem_available = static_cast<vcl_size_t>(dev.local_mem_size());
117  vcl_size_t max_workgroup_size = dev.max_work_group_size();
118 
119  std::vector<vcl_size_t> max_work_item_sizes = dev.max_work_item_sizes();
120  bool invalid_work_group_sizes = local_size_1_*local_size_2_ > max_workgroup_size
121  || local_size_1_ > max_work_item_sizes[0]
122  || local_size_2_ > max_work_item_sizes[1]; // uses too much resources
123 
124  return invalid_work_group_sizes
125  || lmem_used(scalartype_size)>lmem_available
126  || invalid_impl(dev, scalartype_size);
127  }
128 
131 
138  virtual void operator()(utils::kernel_generation_stream & stream, vcl_size_t device_offset, statements_type const & statements) const {
139  std::vector<detail::mapping_type> mapping(statements.size());
140 
142  std::string prototype;
143  std::set<std::string> already_generated;
144  kernel_arguments(statements, prototype);
145 
146  {
147  std::map<void *, vcl_size_t> memory;
148  unsigned int current_arg = 0;
149  vcl_size_t i = 0;
150  for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it)
151  detail::traverse(it->first, it->second, detail::map_functor(memory,current_arg,mapping[i++]));
152  }
153 
154  for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
155  detail::traverse(it->first, it->second, detail::prototype_generation_traversal(already_generated, prototype, vector_size(), mapping[std::distance(statements.begin(), it)]));
156  }
157 
158  prototype.erase(prototype.size()-1); //Last comma pruned
159 
160  //Generate
161  for(vcl_size_t n = 0 ; n < num_kernels() ; ++n){
162  //stream << "__attribute__((vec_type_hint()))" << std::endl;
163  stream << " __attribute__((reqd_work_group_size(" << local_size_1_ << "," << local_size_2_ << "," << 1 << ")))" << std::endl;
164  stream << "__kernel " << "void " << "kernel_" << device_offset << "_" << n << "(" << std::endl;
165  stream << prototype << std::endl;
166  stream << ")" << std::endl;
167 
168  //core:
169  stream << "{" << std::endl;
170  stream.inc_tab();
171  core(n, stream, statements, mapping);
172  stream.dec_tab();
173  stream << "}" << std::endl;
174  }
175  }
176 
177  protected:
178  unsigned int vector_size_;
182  };
183 
184 
185  inline std::ostream & operator<<(std::ostream & os, profile_base const & profile){
186  profile.print(os);
187  return os;
188  }
189 
190  }
191 
192 }
193 
194 #endif
friend std::ostream & operator<<(std::ostream &, profile_base const &)
Definition: profile_base.hpp:185
A stream class where the kernel sources are streamed to. Takes care of indentation of the sources...
Definition: utils.hpp:233
cl_ulong local_mem_size() const
Size of local memory arena in bytes. The minimum value is 32 KB.
Definition: device.hpp:358
virtual void operator()(utils::kernel_generation_stream &stream, vcl_size_t device_offset, statements_type const &statements) const
Generates the code associated with this profile onto the provided stream Redirects to the virtual cor...
Definition: profile_base.hpp:138
std::size_t vcl_size_t
Definition: forwards.h:58
vcl_size_t num_kernels_
Definition: profile_base.hpp:181
Represents an OpenCL device within ViennaCL.
size_t max_work_group_size() const
Maximum number of work-items in a work-group executing a kernel using the data parallel execution mod...
Definition: device.hpp:481
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Base class for an operation profile.
Definition: profile_base.hpp:47
virtual ~profile_base()
The destructor.
Definition: profile_base.hpp:82
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
virtual bool is_slow_impl(viennacl::ocl::device const &) const
Definition: profile_base.hpp:55
Functor to map the statements to the types defined in mapped_objects.hpp.
virtual std::string csv_representation() const =0
csv representation of an operation
std::list< std::pair< scheduler::statement, scheduler::statement_node > > statements_type
Definition: profile_base.hpp:49
several code generation helpers
Implementation of convenience functions to get infos.
std::vector< size_t > max_work_item_sizes() const
Maximum number of work-items that can be specified in each dimension of the work-group.
Definition: device.hpp:508
virtual void core(vcl_size_t kernel_id, utils::kernel_generation_stream &stream, statements_type const &statements, std::vector< detail::mapping_type > const &mapping) const =0
Generates the body of the associated kernel function.
functor for generating the prototype of a statement
Definition: helpers.hpp:152
virtual void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const &statements, viennacl::ocl::kernel &k, unsigned int &n_arg) const =0
Configures the range and enqueues the arguments associated with the profile.
Functor to map the statements to the types defined in mapped_objects.hpp.
Definition: map_functor.hpp:47
cl_uint vendor_id() const
A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID...
Definition: device.hpp:897
Various utility implementations for dispatching with respect to the different devices available on th...
void configure_local_sizes(viennacl::ocl::kernel &k, vcl_size_t) const
Definition: profile_base.hpp:59
std::ostream & operator<<(std::ostream &os, profile_base const &profile)
Definition: profile_base.hpp:185
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
Implementations of the OpenCL backend, where all contexts are stored in.
virtual void kernel_arguments(statements_type const &statements, std::string &arguments_string) const =0
unsigned int vector_size_
Definition: profile_base.hpp:178
unsigned int vector_size() const
Get the vector size of the kernel.
Definition: profile_base.hpp:90
virtual void print(std::ostream &s) const
Definition: profile_base.hpp:64
Representation of an OpenCL kernel in ViennaCL.
bool is_slow(viennacl::ocl::device const &dev) const
returns whether or not the profile is likely to be slow on a particular device
Definition: profile_base.hpp:99
vcl_size_t num_kernels() const
Returns the number of kernels needed by this operation.
Definition: profile_base.hpp:130
virtual bool invalid_impl(viennacl::ocl::device const &, vcl_size_t) const
Definition: profile_base.hpp:54
vcl_size_t local_size_2_
Definition: profile_base.hpp:180
virtual vcl_size_t lmem_used(vcl_size_t) const
Definition: profile_base.hpp:57
profile_base(unsigned int vectorization, vcl_size_t local_size_1, vcl_size_t local_size_2, vcl_size_t num_kernels)
The constructor.
Definition: profile_base.hpp:79
cl_device_type type() const
The OpenCL device type.
Definition: device.hpp:873
vcl_size_t local_size_1_
Definition: profile_base.hpp:179
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
bool is_invalid(viennacl::ocl::device const &dev, vcl_size_t scalartype_size) const
returns whether or not the profile leads to undefined behavior on particular device ...
Definition: profile_base.hpp:114