ViennaCL - The Vienna Computing Library  1.5.1
autotune.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_GENERATOR_AUTOTUNE_HPP
2 #define VIENNACL_GENERATOR_AUTOTUNE_HPP
3 
4 
5 /* =========================================================================
6  Copyright (c) 2010-2014, Institute for Microelectronics,
7  Institute for Analysis and Scientific Computing,
8  TU Wien.
9  Portions of this software are copyright by UChicago Argonne, LLC.
10 
11  -----------------
12  ViennaCL - The Vienna Computing Library
13  -----------------
14 
15  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
16 
17  (A list of authors and contributors can be found in the PDF manual)
18 
19  License: MIT (X11), see file LICENSE in the base directory
20 ============================================================================= */
21 
22 
28 #include <ctime>
29 #include <iomanip>
30 #include <cmath>
31 #include <iterator>
32 
33 #include "viennacl/ocl/kernel.hpp"
34 #include "viennacl/ocl/infos.hpp"
35 
38 
39 #include "viennacl/tools/timer.hpp"
40 
41 namespace viennacl{
42 
43  namespace generator{
44 
45  namespace autotune{
46 
48  class tuning_param{
49  public:
50 
55  tuning_param(std::vector<int> const & values) : values_(values){ reset(); }
56 
58  bool is_max() const { return current_ == (values_.size()-1); }
59 
61  bool inc(){
62  ++current_ ;
63  if(current_ < values_.size() )
64  return false;
65  reset();
66  return true;
67  }
68 
70  int current() const{ return values_[current_]; }
71 
73  void reset() { current_ = 0; }
74 
75  private:
76  std::vector<int> values_;
77  unsigned int current_;
78  };
79 
88  template<class ConfigType>
90  private:
92  typedef std::map<std::string, viennacl::generator::autotune::tuning_param> params_t;
93 
94  public:
95  typedef ConfigType config_type;
96 
98  typedef typename config_type::profile_type profile_type;
99 
101  void add_tuning_param(std::string const & name, std::vector<int> const & values){
102  params_.insert(std::make_pair(name,values));
103  }
104 
106  bool has_next() const{
107  bool res = false;
108  for(typename params_t::const_iterator it = params_.begin() ; it != params_.end() ; ++it)
109  res = res || !it->second.is_max();
110  return res;
111  }
112 
114  void update(){
115  for(typename params_t::iterator it = params_.begin() ; it != params_.end() ; ++it)
116  if(it->second.inc()==false)
117  break;
118  }
119 
121  bool is_invalid(viennacl::ocl::device const & dev) const{
122  return config_type::is_invalid(dev,params_);
123  }
124 
126  typename config_type::profile_type get_current(){
127  return config_type::create_profile(params_);
128  }
129 
131  void reset(){
132  for(params_t::iterator it = params_.begin() ; it != params_.end() ; ++it){
133  it->second.reset();
134  }
135  }
136 
137  private:
138  params_t params_;
139  };
140 
142  template<class ProfileT>
143  double benchmark_impl(viennacl::scheduler::statement const & statement, code_generator::forced_profile_key_type key, ProfileT const & prof, unsigned int n_runs){
144 
145  tools::timer t;
146  std::list<viennacl::ocl::kernel *> kernels;
148  gen.force_profile(key, prof);
149  gen.add(statement, statement.array()[0]);
153  t.start();
154 
155  for(unsigned int i = 0 ; i < n_runs ; ++i)
158  return (double)t.get()/n_runs;
159  }
160 
161 
173  template<class ConfigType>
174  void benchmark(std::map<double, typename ConfigType::profile_type> * timings, scheduler::statement const & op, code_generator::forced_profile_key_type const & key, tuning_config<ConfigType> & config, unsigned int n_runs, std::ofstream * out){
176  unsigned int n_conf = 0;
177  while(config.has_next()){
178  config.update();
179  typename ConfigType::profile_type const & profile = config.get_current();
180  if(config.is_invalid(dev) || profile.is_slow(dev))
181  continue;
182  ++n_conf;
183  }
184  config.reset();
185 
186  unsigned int n = 0;
187  while(config.has_next()){
188  config.update();
189  typename ConfigType::profile_type const & profile = config.get_current();
190  if(config.is_invalid(dev) || profile.is_slow(dev))
191  continue;
192  double percent = (double)n++*100/n_conf;
193  double exec_time = benchmark_impl(op,key,profile,n_runs);
194  timings->insert(std::make_pair(exec_time, profile));
195  std::cout << '\r' << "Autotuning..." << "[" << std::setprecision(2) << std::setfill (' ') << std::setw(6) << std::fixed << percent << "%" << "]"
196  << " | Best : " << timings->begin()->second << " => " << std::scientific << std::right << std::setprecision(2) << timings->begin()->first << std::flush;
197  if(out)
198  *out << std::setprecision(3) << std::scientific << exec_time << "," << profile.csv_representation() << std::endl ;
199  }
200  std::cout << '\r' << "Autotuning..." << "[100.00%]" << std::endl;
201  }
202 
203  }
204 
205  }
206 
207 }
208 #endif // AUTOTUNE_HPP
config_type::profile_type get_current()
Returns the current profile.
Definition: autotune.hpp:126
Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows).
Definition: timer.hpp:87
bool has_next() const
Returns true if the tuning config has still not explored all its possibilities.
Definition: autotune.hpp:106
viennacl::ocl::program & get_configured_program(viennacl::generator::code_generator const &generator, std::list< viennacl::ocl::kernel * > &kernels, bool force_recompilation=false)
Creates the program associated with a generator object and fills the kernels. Checks the context for ...
Definition: generate.hpp:351
void reset()
Resets the parameter to its minimum value.
Definition: autotune.hpp:73
void enqueue(viennacl::generator::code_generator const &generator, bool force_recompilation=false)
Set the arguments and enqueue a generator object.
Definition: generate.hpp:372
bool is_invalid(viennacl::ocl::device const &dev) const
Returns true if the compilation/execution of the underlying profile has an undefined behavior...
Definition: autotune.hpp:121
void force_profile(forced_profile_key_type key, T const &t)
Force the generator to use a specific profile for an operation.
Definition: generate.hpp:225
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
double get() const
Definition: timer.hpp:101
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
class for a tuning parameter
Definition: autotune.hpp:48
the user interface for the code generator
tuning_param(std::vector< int > const &values)
The constructor.
Definition: autotune.hpp:55
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:330
double benchmark_impl(viennacl::scheduler::statement const &statement, code_generator::forced_profile_key_type key, ProfileT const &prof, unsigned int n_runs)
Add the timing value for a given profile and an statement.
Definition: autotune.hpp:143
Implementation of convenience functions to get infos.
Tuning configuration.
Definition: autotune.hpp:89
bool inc()
Increments the parameter.
Definition: autotune.hpp:61
void update()
Update the parameters of the config.
Definition: autotune.hpp:114
void add_tuning_param(std::string const &name, std::vector< int > const &values)
Add a tuning parameter to the config.
Definition: autotune.hpp:101
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
void benchmark(std::map< double, typename ConfigType::profile_type > *timings, scheduler::statement const &op, code_generator::forced_profile_key_type const &key, tuning_config< ConfigType > &config, unsigned int n_runs, std::ofstream *out)
Fills a timing map for a given statement and a benchmark configuration.
Definition: autotune.hpp:174
int current() const
Returns the current value of the parameter.
Definition: autotune.hpp:70
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling.
container_type const & array() const
Definition: forwards.h:473
Representation of an OpenCL kernel in ViennaCL.
Class for handling code generation.
Definition: generate.hpp:47
bool add(scheduler::statement const &statement, scheduler::statement_node const &root_node)
Add a statement and the root node to the expression list.
Definition: generate.hpp:232
config_type::profile_type profile_type
Accessor for profile_type.
Definition: autotune.hpp:98
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:447
void reset()
Reset the config.
Definition: autotune.hpp:131
std::pair< expression_type, vcl_size_t > forced_profile_key_type
typedef of the key used in the forced profiles. Contains the expression type and the size of the scal...
Definition: generate.hpp:50
bool is_max() const
Returns true if the parameter has reached its maximum value.
Definition: autotune.hpp:58
void start()
Definition: timer.hpp:94
ConfigType config_type
Definition: autotune.hpp:95