Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
gfx_factory.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16 
17 
18 
19 */
20 
21 #ifndef __TBB_flow_graph_gfx_factory_H
22 #define __TBB_flow_graph_gfx_factory_H
23 
24 #include "tbb/tbb_config.h"
25 
26 #if __TBB_PREVIEW_GFX_FACTORY
27 
28 #include <vector>
29 #include <future>
30 #include <mutex>
31 #include <iostream>
32 
33 #include <gfx/gfx_rt.h>
34 #include <gfx/gfx_intrin.h>
35 #include <gfx/gfx_types.h>
36 
37 namespace tbb {
38 
39 namespace flow {
40 
41 namespace interface9 {
42 
43 template <typename T>
44 class gfx_buffer;
45 
46 namespace gfx_offload {
47 
48  typedef GfxTaskId task_id_type;
49 
50  //-----------------------------------------------------------------------
51  // GFX errors checkers.
52  // For more debug output, set GFX_LOG_OFFLOAD=2 macro
53  //-----------------------------------------------------------------------
54 
55  // TODO: reconsider error handling approach. If exception is the right way
56  // then need to define and document a specific exception type.
57  inline void throw_gfx_exception() {
58  std::string msg = "GFX error occurred: " + std::to_string(_GFX_get_last_error());
59  std::cerr << msg << std::endl;
60  throw msg;
61  }
62 
64  if (err == 0) {
66  }
67  }
68 
69  inline void check_gfx_retcode(task_id_type err) {
70  if (err != GFX_SUCCESS) {
72  }
73  }
74 
75  //---------------------------------------------------------------------
76  // GFX asynchronous offload and share API
77  //---------------------------------------------------------------------
78 
79  // Sharing and unsharing data API
80  template<typename DataType, typename SizeType>
81  void share(DataType* p, SizeType n) { check_gfx_retcode(_GFX_share(p, sizeof(*p)*n)); }
82  template<typename DataType>
83  void unshare(DataType* p) { check_gfx_retcode(_GFX_unshare(p)); }
84 
85  // Retrieving array pointer from shared gfx_buffer
86  // Other types remain the same
87  template <typename T>
88  T* raw_data(gfx_buffer<T>& buffer) { return buffer.data(); }
89  template <typename T>
90  const T* raw_data(const gfx_buffer<T>& buffer) { return buffer.data(); }
91  template <typename T>
92  T& raw_data(T& data) { return data; }
93  template <typename T>
94  const T& raw_data(const T& data) { return data; }
95 
96  // Kernel enqueuing on device with arguments
97  template <typename F, typename ...ArgType>
98  task_id_type run_kernel(F ptr, ArgType&... args) {
99  task_id_type id = _GFX_offload(ptr, raw_data(args)...);
100 
101  // Check if something during offload went wrong (ex: driver initialization failure)
103 
104  return id;
105  }
106 
107  // Waiting for tasks completion
108  void wait_for_task(task_id_type id) { check_gfx_retcode(_GFX_wait(id)); }
109 
110 } // namespace gfx_offload
111 
112 template <typename T>
113 class gfx_buffer {
114 public:
115 
116  typedef typename std::vector<T>::iterator iterator;
117  typedef typename std::vector<T>::const_iterator const_iterator;
118 
119  typedef std::size_t size_type;
120 
121  gfx_buffer() : my_vector_ptr(std::make_shared< std::vector<T> >()) {}
122  gfx_buffer(size_type size) : my_vector_ptr(std::make_shared< std::vector<T> >(size)) {}
123 
124  T* data() { return &(my_vector_ptr->front()); }
125  const T* data() const { return &(my_vector_ptr->front()); }
126 
127  size_type size() const { return my_vector_ptr->size(); }
128 
129  const_iterator cbegin() const { return my_vector_ptr->cbegin(); }
130  const_iterator cend() const { return my_vector_ptr->cend(); }
131  iterator begin() { return my_vector_ptr->begin(); }
132  iterator end() { return my_vector_ptr->end(); }
133 
134  T& operator[](size_type pos) { return (*my_vector_ptr)[pos]; }
135  const T& operator[](size_type pos) const { return (*my_vector_ptr)[pos]; }
136 
137 private:
138  std::shared_ptr< std::vector<T> > my_vector_ptr;
139 };
140 
141 template<typename T>
143 public:
145 
147  gfx_async_msg(const T& input_data) : my_data(input_data), my_task_id(0) {}
148 
149  T& data() { return my_data; }
150  const T& data() const { return my_data; }
151 
153  kernel_id_type task_id() const { return my_task_id; }
154 
155 private:
158 };
159 
160 class gfx_factory {
161 private:
162 
163  // Wrapper for GFX kernel which is just a function
164  class func_wrapper {
165  public:
166 
167  template <typename F>
168  func_wrapper(F ptr) { my_ptr = reinterpret_cast<void*>(ptr); }
169 
170  template<typename ...Args>
171  void operator()(Args&&... args) {}
172 
173  operator void*() { return my_ptr; }
174 
175  private:
176  void* my_ptr;
177  };
178 
179 public:
180 
181  // Device specific types
182  template<typename T> using async_msg_type = gfx_async_msg<T>;
183 
185 
186  // Empty device type that is needed for Factory Concept
187  // but is not used in gfx_factory
188  typedef struct {} device_type;
189 
191 
193 
194  // Upload data to the device
195  template <typename ...Args>
196  void send_data(device_type /*device*/, Args&... args) {
197  send_data_impl(args...);
198  }
199 
200  // Run kernel on the device
201  template <typename ...Args>
202  void send_kernel(device_type /*device*/, const kernel_type& kernel, Args&... args) {
203  // Get packed T data from async_msg<T> and pass it to kernel
204  kernel_id_type id = gfx_offload::run_kernel(kernel, args.data()...);
205 
206  // Set id to async_msg
207  set_kernel_id(id, args...);
208 
209  // Extend the graph lifetime until the callback completion.
211 
212  // Mutex for future assignment
213  std::lock_guard<std::mutex> lock(future_assignment_mutex);
214 
215  // Set callback that waits for kernel execution
216  callback_future = std::async(std::launch::async, &gfx_factory::callback<Args...>, this, id, args...);
217  }
218 
219  // Finalization action after the kernel run
220  template <typename FinalizeFn, typename ...Args>
221  void finalize(device_type /*device*/, FinalizeFn fn, Args&... /*args*/) {
222  fn();
223  }
224 
225  // Empty device selector.
226  // No way to choose a device with GFX API.
228  public:
230  return device_type();
231  }
232  };
233 
234 private:
235 
236  //---------------------------------------------------------------------
237  // Callback for kernel result
238  //---------------------------------------------------------------------
239 
240  template <typename ...Args>
241  void callback(kernel_id_type id, Args... args) {
242  // Waiting for specific tasks id to complete
243  {
244  std::lock_guard<std::mutex> lock(task_wait_mutex);
245  if (current_task_id < id) {
248  }
249  }
250 
251  // Get result from device and set to async_msg (args)
252  receive_data(args...);
253 
254  // Data was sent to the graph, release the reference
256  }
257 
258  //---------------------------------------------------------------------
259  // send_data() arguments processing
260  //---------------------------------------------------------------------
261 
262  // GFX buffer shared data with device that will be executed on
263  template <typename T>
264  void share_data(T) {}
265 
266  template <typename T>
267  void share_data(gfx_buffer<T>& buffer) {
268  gfx_offload::share(buffer.data(), buffer.size());
269  }
270 
271  template <typename T>
272  void send_arg(T) {}
273 
274  template <typename T>
276  share_data(msg.data());
277  }
278 
279  void send_data_impl() {}
280 
281  template <typename T, typename ...Rest>
282  void send_data_impl(T& arg, Rest&... args) {
283  send_arg(arg);
284  send_data_impl(args...);
285  }
286 
287  //----------------------------------------------------------------------
288  // send_kernel() arguments processing
289  //----------------------------------------------------------------------
290 
291  template <typename T>
293 
294  template <typename T>
296  msg.set_task_id(id);
297  }
298 
300 
301  template <typename T, typename ...Rest>
302  void set_kernel_id(kernel_id_type id, T& arg, Rest&... args) {
303  set_kernel_id_arg(id, arg);
304  set_kernel_id(id, args...);
305  }
306 
307  //-----------------------------------------------------------------------
308  // Arguments processing after kernel execution.
309  // Unsharing buffers and forwarding results to the graph
310  //-----------------------------------------------------------------------
311 
312  // After kernel execution the data should be unshared
313  template <typename T>
314  void unshare_data(T) {}
315 
316  template <typename T>
317  void unshare_data(gfx_buffer<T>& buffer) {
318  gfx_offload::unshare(buffer.data());
319  }
320 
321  template <typename T>
322  void receive_arg(T) {}
323 
324  template <typename T>
326  unshare_data(msg.data());
327  msg.set(msg.data());
328  }
329 
330  void receive_data() {}
331 
332  template <typename T, typename ...Rest>
333  void receive_data(T& arg, Rest&... args) {
334  receive_arg(arg);
335  receive_data(args...);
336  }
337 
338  //-----------------------------------------------------------------------
340 
341  std::future<void> callback_future;
343 
345  std::mutex task_wait_mutex;
346 };
347 
348 } // namespace interface9
349 
352 
353 } // namespace flow
354 
355 } // namespace tbb
356 
357 #endif // __TBB_PREVIEW_GFX_FACTORY
358 
359 #endif // __TBB_flow_graph_gfx_factory_H
std::future< void > callback_future
Definition: gfx_factory.h:341
const_iterator cend() const
Definition: gfx_factory.h:130
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id id
void share_data(gfx_buffer< T > &buffer)
Definition: gfx_factory.h:267
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event ITT_FORMAT __itt_group_mark d void const wchar_t const wchar_t int ITT_FORMAT __itt_group_sync __itt_group_fsync x void const wchar_t int const wchar_t int int ITT_FORMAT __itt_group_sync __itt_group_fsync x void ITT_FORMAT __itt_group_sync __itt_group_fsync p void ITT_FORMAT __itt_group_sync __itt_group_fsync p void size_t ITT_FORMAT lu no args __itt_obj_prop_t __itt_obj_state_t ITT_FORMAT d const char ITT_FORMAT s __itt_frame ITT_FORMAT p const char const char ITT_FORMAT s __itt_counter ITT_FORMAT p __itt_counter unsigned long long ITT_FORMAT lu const wchar_t ITT_FORMAT S __itt_mark_type const wchar_t ITT_FORMAT S __itt_mark_type const char ITT_FORMAT s __itt_mark_type ITT_FORMAT d __itt_caller ITT_FORMAT p __itt_caller ITT_FORMAT p no args const __itt_domain __itt_clock_domain unsigned long long __itt_id ITT_FORMAT lu const __itt_domain __itt_clock_domain unsigned long long __itt_id __itt_id void * fn
void finalize(device_type, FinalizeFn fn, Args &...)
Definition: gfx_factory.h:221
std::shared_ptr< std::vector< T > > my_vector_ptr
Definition: gfx_factory.h:138
void send_arg(async_msg_type< T > &msg)
Definition: gfx_factory.h:275
void share(DataType *p, SizeType n)
Definition: gfx_factory.h:81
void send_data_impl(T &arg, Rest &... args)
Definition: gfx_factory.h:282
const T & operator[](size_type pos) const
Definition: gfx_factory.h:135
void unshare_data(gfx_buffer< T > &buffer)
Definition: gfx_factory.h:317
void send_data(device_type, Args &... args)
Definition: gfx_factory.h:196
void const char const char int ITT_FORMAT __itt_group_sync p
gfx_offload::task_id_type kernel_id_type
Definition: gfx_factory.h:190
gfx_async_msg(const T &input_data)
Definition: gfx_factory.h:147
gfx_factory(tbb::flow::graph &g)
Definition: gfx_factory.h:192
void set_kernel_id(kernel_id_type id, T &arg, Rest &... args)
Definition: gfx_factory.h:302
void set_kernel_id(kernel_id_type)
Definition: gfx_factory.h:299
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock
void callback(kernel_id_type id, Args... args)
Definition: gfx_factory.h:241
void receive_arg(async_msg_type< T > &msg)
Definition: gfx_factory.h:325
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data
void release_wait() __TBB_override
Deregisters an external entity that may have interacted with the graph.
Definition: flow_graph.h:782
void set_task_id(kernel_id_type id)
Definition: gfx_factory.h:152
The graph class.
void set_kernel_id_arg(kernel_id_type id, async_msg_type< T > &msg)
Definition: gfx_factory.h:295
void set_kernel_id_arg(kernel_id_type, T)
Definition: gfx_factory.h:292
void wait_for_task(task_id_type id)
Definition: gfx_factory.h:108
const_iterator cbegin() const
Definition: gfx_factory.h:129
void check_enqueue_retcode(task_id_type err)
Definition: gfx_factory.h:63
std::vector< T >::iterator iterator
Definition: gfx_factory.h:116
kernel_id_type task_id() const
Definition: gfx_factory.h:153
void check_gfx_retcode(task_id_type err)
Definition: gfx_factory.h:69
T * raw_data(gfx_buffer< T > &buffer)
Definition: gfx_factory.h:88
std::vector< T >::const_iterator const_iterator
Definition: gfx_factory.h:117
void reserve_wait() __TBB_override
Used to register that an external entity may still interact with the graph.
Definition: flow_graph.h:775
task_id_type run_kernel(F ptr, ArgType &... args)
Definition: gfx_factory.h:98
void receive_data(T &arg, Rest &... args)
Definition: gfx_factory.h:333
void send_kernel(device_type, const kernel_type &kernel, Args &... args)
Definition: gfx_factory.h:202
gfx_offload::task_id_type kernel_id_type
Definition: gfx_factory.h:144

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.