Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
tbb::flow::interface10::opencl_factory< DeviceFilter > Class Template Reference

#include <flow_graph_opencl_node.h>

Inheritance diagram for tbb::flow::interface10::opencl_factory< DeviceFilter >:
Collaboration diagram for tbb::flow::interface10::opencl_factory< DeviceFilter >:

Classes

struct  finalize_fn
 
struct  finalize_fn_leaf
 
class  kernel
 

Public Types

template<typename T >
using async_msg_type = opencl_async_msg< T, opencl_factory< DeviceFilter > >
 
typedef opencl_device device_type
 
typedef kernel kernel_type
 
typedef opencl_range range_type
 

Public Member Functions

 opencl_factory ()
 
 ~opencl_factory ()
 
bool init (const opencl_device_list &device_list)
 
template<typename ... Args>
void send_kernel (opencl_device device, const kernel_type &kernel, const range_type &work_size, Args &... args)
 
template<typename T , typename ... Rest>
void send_data (opencl_device device, T &t, Rest &... args)
 
void send_data (opencl_device)
 
template<typename FinalizeFn , typename ... Args>
void finalize (opencl_device device, FinalizeFn fn, Args &... args)
 
const opencl_device_listdevices ()
 

Private Member Functions

template<typename Factory >
void enqueue_map_buffer (opencl_device device, opencl_buffer_impl< Factory > &buffer, opencl_async_msg< void *, Factory > &dmsg)
 
template<typename Factory >
void enqueue_unmap_buffer (opencl_device device, opencl_memory< Factory > &memory, opencl_async_msg< void *, Factory > &dmsg)
 
template<size_t NUM_ARGS, typename T >
void process_one_arg (const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)
 
template<size_t NUM_ARGS, typename T , typename F >
void process_one_arg (const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const opencl_async_msg< T, F > &msg)
 
template<size_t NUM_ARGS, typename T , typename ... Rest>
void process_arg_list (const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
 
template<size_t NUM_ARGS>
void process_arg_list (const kernel_type &, std::array< cl_event, NUM_ARGS > &, int &, int &)
 
template<typename T >
void update_one_arg (cl_event, T &)
 
template<typename T , typename F >
void update_one_arg (cl_event e, opencl_async_msg< T, F > &msg)
 
template<typename T , typename ... Rest>
void update_arg_list (cl_event e, T &t, Rest &... args)
 
void update_arg_list (cl_event)
 
cl_event send_kernel_impl (opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
 
template<typename T >
bool get_event_from_one_arg (cl_event &, const T &)
 
template<typename T , typename F >
bool get_event_from_one_arg (cl_event &e, const opencl_async_msg< T, F > &msg)
 
template<typename T , typename ... Rest>
bool get_event_from_args (cl_event &e, const T &t, const Rest &... args)
 
bool get_event_from_args (cl_event &)
 
bool is_same_context (opencl_device::device_id_type d1, opencl_device::device_id_type d2)
 
 opencl_factory (const opencl_factory &)
 
opencl_factoryoperator= (const opencl_factory &)
 
cl_context context ()
 
void init_once ()
 

Static Private Member Functions

static void CL_CALLBACK finalize_callback (cl_event, cl_int event_command_exec_status, void *data)
 

Private Attributes

std::once_flag my_once_flag
 
opencl_device_list my_devices
 
cl_context my_cl_context
 
tbb::spin_mutex my_devices_mutex
 

Friends

template<typename Factory >
class opencl_program
 
template<typename Factory >
class opencl_buffer_impl
 
template<typename Factory >
class opencl_memory
 

Detailed Description

template<typename DeviceFilter>
class tbb::flow::interface10::opencl_factory< DeviceFilter >

Definition at line 49 of file flow_graph_opencl_node.h.

Member Typedef Documentation

◆ async_msg_type

template<typename DeviceFilter>
template<typename T >
using tbb::flow::interface10::opencl_factory< DeviceFilter >::async_msg_type = opencl_async_msg<T, opencl_factory<DeviceFilter> >

Definition at line 813 of file flow_graph_opencl_node.h.

◆ device_type

template<typename DeviceFilter>
typedef opencl_device tbb::flow::interface10::opencl_factory< DeviceFilter >::device_type

Definition at line 814 of file flow_graph_opencl_node.h.

◆ kernel_type

template<typename DeviceFilter>
typedef kernel tbb::flow::interface10::opencl_factory< DeviceFilter >::kernel_type

Definition at line 856 of file flow_graph_opencl_node.h.

◆ range_type

template<typename DeviceFilter>
typedef opencl_range tbb::flow::interface10::opencl_factory< DeviceFilter >::range_type

Definition at line 860 of file flow_graph_opencl_node.h.

Constructor & Destructor Documentation

◆ opencl_factory() [1/2]

template<typename DeviceFilter>
tbb::flow::interface10::opencl_factory< DeviceFilter >::opencl_factory ( )
inline

Definition at line 862 of file flow_graph_opencl_node.h.

862 {}

◆ ~opencl_factory()

template<typename DeviceFilter>
tbb::flow::interface10::opencl_factory< DeviceFilter >::~opencl_factory ( )
inline

Definition at line 863 of file flow_graph_opencl_node.h.

863  {
864  if ( my_devices.size() ) {
865  for ( auto d = my_devices.begin(); d != my_devices.end(); ++d ) {
866  enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ), "Failed to release a command queue" );
867  }
868  enforce_cl_retcode( clReleaseContext( my_cl_context ), "Failed to release a context" );
869  }
870  }
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
void enforce_cl_retcode(cl_int err, std::string msg)

◆ opencl_factory() [2/2]

template<typename DeviceFilter>
tbb::flow::interface10::opencl_factory< DeviceFilter >::opencl_factory ( const opencl_factory< DeviceFilter > &  )
private

Member Function Documentation

◆ context()

template<typename DeviceFilter>
cl_context tbb::flow::interface10::opencl_factory< DeviceFilter >::context ( )
inlineprivate

◆ devices()

template<typename DeviceFilter>
const opencl_device_list& tbb::flow::interface10::opencl_factory< DeviceFilter >::devices ( )
inline

Definition at line 1062 of file flow_graph_opencl_node.h.

◆ enqueue_map_buffer()

template<typename DeviceFilter>
template<typename Factory >
void tbb::flow::interface10::opencl_factory< DeviceFilter >::enqueue_map_buffer ( opencl_device  device,
opencl_buffer_impl< Factory > &  buffer,
opencl_async_msg< void *, Factory > &  dmsg 
)
inlineprivate

Definition at line 884 of file flow_graph_opencl_node.h.

884  {
885  cl_event const* e1 = dmsg.get_event();
886  cl_event e2;
887  cl_int err;
888  void *ptr = clEnqueueMapBuffer( device.my_cl_command_queue, buffer.get_cl_mem(), false, CL_MAP_READ | CL_MAP_WRITE, 0, buffer.size(),
889  e1 == NULL ? 0 : 1, e1, &e2, &err );
890  enforce_cl_retcode( err, "Failed to map a buffer" );
891  dmsg.data( false ) = ptr;
892  dmsg.set_event( e2 );
893  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
894  }
void enforce_cl_retcode(cl_int err, std::string msg)

◆ enqueue_unmap_buffer()

template<typename DeviceFilter>
template<typename Factory >
void tbb::flow::interface10::opencl_factory< DeviceFilter >::enqueue_unmap_buffer ( opencl_device  device,
opencl_memory< Factory > &  memory,
opencl_async_msg< void *, Factory > &  dmsg 
)
inlineprivate

Definition at line 898 of file flow_graph_opencl_node.h.

898  {
899  cl_event const* e1 = dmsg.get_event();
900  cl_event e2;
902  clEnqueueUnmapMemObject( device.my_cl_command_queue, memory.get_cl_mem(), memory.get_host_ptr(), e1 == NULL ? 0 : 1, e1, &e2 ),
903  "Failed to unmap a buffer" );
904  dmsg.set_event( e2 );
905  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
906  }
void enforce_cl_retcode(cl_int err, std::string msg)

◆ finalize()

template<typename DeviceFilter>
template<typename FinalizeFn , typename ... Args>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::finalize ( opencl_device  device,
FinalizeFn  fn,
Args &...  args 
)
inline

Definition at line 1051 of file flow_graph_opencl_node.h.

1051  {
1052  cl_event e;
1053 
1054  if ( get_event_from_args( e, args... ) ) {
1055  enforce_cl_retcode( clSetEventCallback( e, CL_COMPLETE, finalize_callback,
1056  new finalize_fn_leaf<FinalizeFn>(fn) ), "Failed to set a callback" );
1057  }
1058 
1059  enforce_cl_retcode( clFlush( device.my_cl_command_queue ), "Failed to flush an OpenCL command queue" );
1060  }
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event ITT_FORMAT __itt_group_mark d void const wchar_t const wchar_t int ITT_FORMAT __itt_group_sync __itt_group_fsync x void const wchar_t int const wchar_t int int ITT_FORMAT __itt_group_sync __itt_group_fsync x void ITT_FORMAT __itt_group_sync __itt_group_fsync p void ITT_FORMAT __itt_group_sync __itt_group_fsync p void size_t ITT_FORMAT lu no args __itt_obj_prop_t __itt_obj_state_t ITT_FORMAT d const char ITT_FORMAT s __itt_frame ITT_FORMAT p const char const char ITT_FORMAT s __itt_counter ITT_FORMAT p __itt_counter unsigned long long ITT_FORMAT lu const wchar_t ITT_FORMAT S __itt_mark_type const wchar_t ITT_FORMAT S __itt_mark_type const char ITT_FORMAT s __itt_mark_type ITT_FORMAT d __itt_caller ITT_FORMAT p __itt_caller ITT_FORMAT p no args const __itt_domain __itt_clock_domain unsigned long long __itt_id ITT_FORMAT lu const __itt_domain __itt_clock_domain unsigned long long __itt_id __itt_id void * fn
static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data)
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)
void enforce_cl_retcode(cl_int err, std::string msg)

◆ finalize_callback()

template<typename DeviceFilter>
static void CL_CALLBACK tbb::flow::interface10::opencl_factory< DeviceFilter >::finalize_callback ( cl_event  ,
cl_int  event_command_exec_status,
void data 
)
inlinestaticprivate

Definition at line 1038 of file flow_graph_opencl_node.h.

1038  {
1039  tbb::internal::suppress_unused_warning(event_command_exec_status);
1040  __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1041 
1042  finalize_fn * const fn_ptr = static_cast<finalize_fn*>(data);
1043  __TBB_ASSERT(fn_ptr != NULL, "Invalid finalize function pointer");
1044  (*fn_ptr)();
1045 
1046  // Function pointer was created by 'new' & this callback must be called once only
1047  delete fn_ptr;
1048  }
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:169
void suppress_unused_warning(const T1 &)
Utility template function to prevent "unused" warnings by various compilers.
Definition: tbb_stddef.h:381
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data

Referenced by tbb::flow::interface10::opencl_factory< default_device_filter >::finalize().

Here is the caller graph for this function:

◆ get_event_from_args() [1/2]

template<typename DeviceFilter>
template<typename T , typename ... Rest>
bool tbb::flow::interface10::opencl_factory< DeviceFilter >::get_event_from_args ( cl_event &  e,
const T &  t,
const Rest &...  args 
)
inlineprivate

Definition at line 1013 of file flow_graph_opencl_node.h.

1013  {
1014  if ( get_event_from_one_arg( e, t ) ) {
1015  return true;
1016  }
1017 
1018  return get_event_from_args( e, args... );
1019  }
bool get_event_from_one_arg(cl_event &, const T &)
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)

Referenced by tbb::flow::interface10::opencl_factory< default_device_filter >::finalize(), and tbb::flow::interface10::opencl_factory< default_device_filter >::get_event_from_args().

Here is the caller graph for this function:

◆ get_event_from_args() [2/2]

template<typename DeviceFilter>
bool tbb::flow::interface10::opencl_factory< DeviceFilter >::get_event_from_args ( cl_event &  )
inlineprivate

Definition at line 1021 of file flow_graph_opencl_node.h.

1021  {
1022  return false;
1023  }

◆ get_event_from_one_arg() [1/2]

template<typename DeviceFilter>
template<typename T >
bool tbb::flow::interface10::opencl_factory< DeviceFilter >::get_event_from_one_arg ( cl_event &  ,
const T &   
)
inlineprivate

Definition at line 996 of file flow_graph_opencl_node.h.

996  {
997  return false;
998  }

Referenced by tbb::flow::interface10::opencl_factory< default_device_filter >::get_event_from_args().

Here is the caller graph for this function:

◆ get_event_from_one_arg() [2/2]

template<typename DeviceFilter>
template<typename T , typename F >
bool tbb::flow::interface10::opencl_factory< DeviceFilter >::get_event_from_one_arg ( cl_event &  e,
const opencl_async_msg< T, F > &  msg 
)
inlineprivate

Definition at line 1001 of file flow_graph_opencl_node.h.

1001  {
1002  cl_event const *e_ptr = msg.get_event();
1003 
1004  if ( e_ptr != NULL ) {
1005  e = *e_ptr;
1006  return true;
1007  }
1008 
1009  return false;
1010  }

◆ init()

template<typename DeviceFilter>
bool tbb::flow::interface10::opencl_factory< DeviceFilter >::init ( const opencl_device_list device_list)
inline

Definition at line 872 of file flow_graph_opencl_node.h.

872  {
874  if ( !my_devices.size() ) {
875  my_devices = device_list;
876  return true;
877  }
878  return false;
879  }
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock
Represents acquisition of a mutex.
Definition: spin_mutex.h:54

◆ init_once()

template<typename DeviceFilter>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::init_once ( )
inlineprivate

Definition at line 1084 of file flow_graph_opencl_node.h.

1084  {
1085  {
1087  if (!my_devices.size())
1088  my_devices = DeviceFilter()( opencl_info::available_devices() );
1089  }
1090 
1091  enforce_cl_retcode(my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE, "No devices in the device list");
1092  cl_platform_id platform_id = my_devices.begin()->platform_id();
1093  for (opencl_device_list::iterator it = ++my_devices.begin(); it != my_devices.end(); ++it)
1094  enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM, "All devices should be in the same platform");
1095 
1096  std::vector<cl_device_id> cl_device_ids;
1097  for (auto d = my_devices.begin(); d != my_devices.end(); ++d) {
1098  cl_device_ids.push_back((*d).my_cl_device_id);
1099  }
1100 
1101  cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1102  cl_int err;
1103  cl_context ctx = clCreateContext(context_properties,
1104  (cl_uint)cl_device_ids.size(),
1105  cl_device_ids.data(),
1106  NULL, NULL, &err);
1107  enforce_cl_retcode(err, "Failed to create context");
1108  my_cl_context = ctx;
1109 
1110  size_t device_counter = 0;
1111  for (auto d = my_devices.begin(); d != my_devices.end(); d++) {
1112  (*d).my_device_id = device_counter++;
1113  cl_int err2;
1114  cl_command_queue cq;
1115 #if CL_VERSION_2_0
1116  if ((*d).major_version() >= 2) {
1117  if ((*d).out_of_order_exec_mode_on_host_present()) {
1118  cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1119  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1120  } else {
1121  cl_queue_properties props[] = { 0 };
1122  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1123  }
1124  } else
1125 #endif
1126  {
1127  cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1128  // Suppress "declared deprecated" warning for the next line.
1129 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1130 #pragma GCC diagnostic push
1131 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1132 #endif
1133 #if _MSC_VER || __INTEL_COMPILER
1134 #pragma warning( push )
1135 #if __INTEL_COMPILER
1136 #pragma warning (disable: 1478)
1137 #else
1138 #pragma warning (disable: 4996)
1139 #endif
1140 #endif
1141  cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1142 #if _MSC_VER || __INTEL_COMPILER
1143 #pragma warning( pop )
1144 #endif
1145 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1146 #pragma GCC diagnostic pop
1147 #endif
1148  }
1149  enforce_cl_retcode(err2, "Failed to create command queue");
1150  (*d).my_cl_command_queue = cq;
1151  }
1152  }
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock
void enforce_cl_retcode(cl_int err, std::string msg)
Represents acquisition of a mutex.
Definition: spin_mutex.h:54
const opencl_device_list & available_devices()

Referenced by tbb::flow::interface10::opencl_factory< default_device_filter >::context(), and tbb::flow::interface10::opencl_factory< default_device_filter >::devices().

Here is the caller graph for this function:

◆ is_same_context()

template<typename DeviceFilter>
bool tbb::flow::interface10::opencl_factory< DeviceFilter >::is_same_context ( opencl_device::device_id_type  d1,
opencl_device::device_id_type  d2 
)
inlineprivate

Definition at line 1068 of file flow_graph_opencl_node.h.

1068  {
1070  // Currently, factory supports only one context so if the both devices are not host it means the are in the same context.
1071  if ( d1 != opencl_device::host && d2 != opencl_device::host )
1072  return true;
1073  return d1 == d2;
1074  }
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:169

◆ operator=()

template<typename DeviceFilter>
opencl_factory& tbb::flow::interface10::opencl_factory< DeviceFilter >::operator= ( const opencl_factory< DeviceFilter > &  )
private

◆ process_arg_list() [1/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS, typename T , typename ... Rest>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::process_arg_list ( const kernel_type kernel,
std::array< cl_event, NUM_ARGS > &  events,
int num_events,
int place,
const T &  t,
const Rest &...  args 
)
inlineprivate

Definition at line 928 of file flow_graph_opencl_node.h.

928  {
929  process_one_arg( kernel, events, num_events, place, t );
930  process_arg_list( kernel, events, num_events, place, args... );
931  }
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)

Referenced by tbb::flow::interface10::opencl_factory< default_device_filter >::process_arg_list(), and tbb::flow::interface10::opencl_factory< default_device_filter >::send_kernel().

Here is the caller graph for this function:

◆ process_arg_list() [2/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::process_arg_list ( const kernel_type ,
std::array< cl_event, NUM_ARGS > &  ,
int ,
int  
)
inlineprivate

Definition at line 934 of file flow_graph_opencl_node.h.

934 {}

◆ process_one_arg() [1/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS, typename T >
void tbb::flow::interface10::opencl_factory< DeviceFilter >::process_one_arg ( const kernel_type kernel,
std::array< cl_event, NUM_ARGS > &  ,
int ,
int place,
const T &  t 
)
inlineprivate

Definition at line 910 of file flow_graph_opencl_node.h.

910  {
911  auto p = get_native_object(t);
912  enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++, sizeof(p), &p), "Failed to set a kernel argument" );
913  }
void const char const char int ITT_FORMAT __itt_group_sync p
std::enable_if< is_native_object_type< T >::value, typename T::native_object_type >::type get_native_object(const T &t)
void enforce_cl_retcode(cl_int err, std::string msg)

Referenced by tbb::flow::interface10::opencl_factory< default_device_filter >::process_arg_list(), and tbb::flow::interface10::opencl_factory< default_device_filter >::process_one_arg().

Here is the caller graph for this function:

◆ process_one_arg() [2/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS, typename T , typename F >
void tbb::flow::interface10::opencl_factory< DeviceFilter >::process_one_arg ( const kernel_type kernel,
std::array< cl_event, NUM_ARGS > &  events,
int num_events,
int place,
const opencl_async_msg< T, F > &  msg 
)
inlineprivate

Definition at line 916 of file flow_graph_opencl_node.h.

916  {
917  __TBB_ASSERT((static_cast<typename std::array<cl_event, NUM_ARGS>::size_type>(num_events) < events.size()), NULL);
918 
919  const cl_event * const e = msg.get_event();
920  if (e != NULL) {
921  events[num_events++] = *e;
922  }
923 
924  process_one_arg( kernel, events, num_events, place, msg.data(false) );
925  }
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:169
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)

◆ send_data() [1/2]

template<typename DeviceFilter>
template<typename T , typename ... Rest>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::send_data ( opencl_device  device,
T &  t,
Rest &...  args 
)
inline

Definition at line 970 of file flow_graph_opencl_node.h.

970  {
971  send_if_memory_object( device, t );
972  send_data( device, args... );
973  }
std::enable_if< is_memory_object_type< T >::value >::type send_if_memory_object(opencl_device device, opencl_async_msg< T, Factory > &dmsg)
void send_data(opencl_device device, T &t, Rest &... args)

Referenced by tbb::flow::interface10::opencl_factory< default_device_filter >::send_data().

Here is the caller graph for this function:

◆ send_data() [2/2]

template<typename DeviceFilter>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::send_data ( opencl_device  )
inline

Definition at line 975 of file flow_graph_opencl_node.h.

975 {}

◆ send_kernel()

template<typename DeviceFilter>
template<typename ... Args>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::send_kernel ( opencl_device  device,
const kernel_type kernel,
const range_type work_size,
Args &...  args 
)
inline

Definition at line 954 of file flow_graph_opencl_node.h.

954  {
955  std::array<cl_event, sizeof...(Args)> events;
956  int num_events = 0;
957  int place = 0;
958  process_arg_list( kernel, events, num_events, place, args... );
959 
960  const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
961 
962  update_arg_list(e, args...);
963 
964  // Release our own reference to cl_event
965  enforce_cl_retcode( clReleaseEvent(e), "Failed to release an event" );
966  }
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
cl_event send_kernel_impl(opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
void enforce_cl_retcode(cl_int err, std::string msg)
void update_arg_list(cl_event e, T &t, Rest &... args)

◆ send_kernel_impl()

template<typename DeviceFilter>
cl_event tbb::flow::interface10::opencl_factory< DeviceFilter >::send_kernel_impl ( opencl_device  device,
const cl_kernel &  kernel,
const range_type work_size,
cl_uint  num_events,
cl_event *  event_list 
)
inlineprivate

Definition at line 979 of file flow_graph_opencl_node.h.

980  {
981  const typename range_type::nd_range_type g_offset = { { 0, 0, 0 } };
982  const typename range_type::nd_range_type& g_size = work_size.global_range();
983  const typename range_type::nd_range_type& l_size = work_size.local_range();
984  cl_uint s;
985  for ( s = 1; s < 3 && g_size[s] != size_t(-1); ++s) {}
986  cl_event event;
988  clEnqueueNDRangeKernel( device.my_cl_command_queue, kernel, s,
989  g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &event ),
990  "Failed to enqueue a kernel" );
991  return event;
992  }
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event
void enforce_cl_retcode(cl_int err, std::string msg)
void const char const char int ITT_FORMAT __itt_group_sync s
std::array< range_index_type, 3 > nd_range_type

Referenced by tbb::flow::interface10::opencl_factory< default_device_filter >::send_kernel().

Here is the caller graph for this function:

◆ update_arg_list() [1/2]

template<typename DeviceFilter>
template<typename T , typename ... Rest>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::update_arg_list ( cl_event  e,
T &  t,
Rest &...  args 
)
inlineprivate

Definition at line 945 of file flow_graph_opencl_node.h.

945  {
946  update_one_arg( e, t );
947  update_arg_list( e, args... );
948  }
void update_arg_list(cl_event e, T &t, Rest &... args)

Referenced by tbb::flow::interface10::opencl_factory< default_device_filter >::send_kernel(), and tbb::flow::interface10::opencl_factory< default_device_filter >::update_arg_list().

Here is the caller graph for this function:

◆ update_arg_list() [2/2]

template<typename DeviceFilter>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::update_arg_list ( cl_event  )
inlineprivate

Definition at line 950 of file flow_graph_opencl_node.h.

950 {}

◆ update_one_arg() [1/2]

template<typename DeviceFilter>
template<typename T >
void tbb::flow::interface10::opencl_factory< DeviceFilter >::update_one_arg ( cl_event  ,
T &   
)
inlineprivate

Definition at line 937 of file flow_graph_opencl_node.h.

937 {}

Referenced by tbb::flow::interface10::opencl_factory< default_device_filter >::update_arg_list().

Here is the caller graph for this function:

◆ update_one_arg() [2/2]

template<typename DeviceFilter>
template<typename T , typename F >
void tbb::flow::interface10::opencl_factory< DeviceFilter >::update_one_arg ( cl_event  e,
opencl_async_msg< T, F > &  msg 
)
inlineprivate

Definition at line 940 of file flow_graph_opencl_node.h.

940  {
941  msg.set_event( e );
942  }

Friends And Related Function Documentation

◆ opencl_buffer_impl

template<typename DeviceFilter>
template<typename Factory >
friend class opencl_buffer_impl
friend

Definition at line 1163 of file flow_graph_opencl_node.h.

◆ opencl_memory

template<typename DeviceFilter>
template<typename Factory >
friend class opencl_memory
friend

Definition at line 1165 of file flow_graph_opencl_node.h.

◆ opencl_program

template<typename DeviceFilter>
template<typename Factory >
friend class opencl_program
friend

Definition at line 1161 of file flow_graph_opencl_node.h.

Member Data Documentation

◆ my_cl_context

◆ my_devices

◆ my_devices_mutex

◆ my_once_flag


The documentation for this class was generated from the following file:

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.