diff --git a/docs/dpCtl_api.rst b/docs/dpCtl_api.rst index bba32ae4f5..5da706288f 100644 --- a/docs/dpCtl_api.rst +++ b/docs/dpCtl_api.rst @@ -55,9 +55,7 @@ Functions .. autofunction:: get_include .. autofunction:: get_num_activated_queues .. autofunction:: get_num_platforms -.. autofunction:: get_num_queues -.. autofunction:: has_cpu_queues -.. autofunction:: has_gpu_queues +.. autofunction:: get_num_devices .. autofunction:: has_sycl_platforms .. autofunction:: is_in_device_context -.. autofunction:: set_default_queue +.. autofunction:: set_global_queue diff --git a/dpctl-capi/helper/include/dpctl_async_error_handler.h b/dpctl-capi/helper/include/dpctl_async_error_handler.h new file mode 100644 index 0000000000..37343c8901 --- /dev/null +++ b/dpctl-capi/helper/include/dpctl_async_error_handler.h @@ -0,0 +1,45 @@ +//===-- dpctl_async_error_handler.h - An async error handler -*-C++-*- ===// +// +// Data Parallel Control (dpCtl) +// +// Copyright 2020-2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A functor to use for passing an error handler callback function to sycl +/// context and queue contructors. +//===----------------------------------------------------------------------===// + +#pragma once + +#include "dpctl_error_handler_type.h" +#include + +/*! + * @brief Functor class used by DPCTL to handle SYCL asynchronous errors. + */ +class DPCTL_AsyncErrorHandler +{ + error_handler_callback *handler_ = nullptr; + +public: + DPCTL_AsyncErrorHandler(error_handler_callback *err_handler) + : handler_(err_handler) + { + } + + void operator()(const cl::sycl::exception_list &exceptions); +}; diff --git a/dpctl-capi/helper/source/dpctl_async_error_handler.cpp b/dpctl-capi/helper/source/dpctl_async_error_handler.cpp new file mode 100644 index 0000000000..948f1c25e8 --- /dev/null +++ b/dpctl-capi/helper/source/dpctl_async_error_handler.cpp @@ -0,0 +1,42 @@ +//===-- dpctl_async_error_handler.h - An async error handler -*-C++-*- ===// +// +// Data Parallel Control (dpCtl) +// +// Copyright 2020-2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A functor to use for passing an error handler callback function to sycl +/// context and queue contructors. +//===----------------------------------------------------------------------===// + +#include "dpctl_async_error_handler.h" + +void DPCTL_AsyncErrorHandler::operator()( + const cl::sycl::exception_list &exceptions) +{ + for (std::exception_ptr const &e : exceptions) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception const &e) { + std::cerr << "Caught asynchronous SYCL exception:\n" + << e.what() << std::endl; + // FIXME: Change get_cl_code() to code() once DPCPP supports it. + auto err_code = e.get_cl_code(); + handler_(err_code); + } + } +} diff --git a/dpctl-capi/include/dpctl_error_handler_type.h b/dpctl-capi/include/dpctl_error_handler_type.h new file mode 100644 index 0000000000..db9aa9190a --- /dev/null +++ b/dpctl-capi/include/dpctl_error_handler_type.h @@ -0,0 +1,35 @@ +//===--- dpctl_error_handler_types.h - Error handler callbacks -*-C++-*- ===// +// +// Data Parallel Control (dpCtl) +// +// Copyright 2020-2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines types for callback functions to use for error handling in dpctl. +/// +//===----------------------------------------------------------------------===// + +#pragma once + +/*! + * @brief Type of function to be used in SYCL async error handler provide by + * DPCTL. + * + * @param err_code Error code extracted from an SYCL asynchronous + * error. + */ +typedef void error_handler_callback(int err_code); diff --git a/dpctl-capi/include/dpctl_sycl_device_selector_interface.h b/dpctl-capi/include/dpctl_sycl_device_selector_interface.h index a7576dff91..3e1063e9da 100644 --- a/dpctl-capi/include/dpctl_sycl_device_selector_interface.h +++ b/dpctl-capi/include/dpctl_sycl_device_selector_interface.h @@ -105,4 +105,19 @@ __dpctl_give DPCTLSyclDeviceSelectorRef DPCTLHostSelector_Create(); DPCTL_API void DPCTLDeviceSelector_Delete(__dpctl_take DPCTLSyclDeviceSelectorRef DSRef); +/*! + *@brief Scores the device specified by DRef by device selector specified by + *DSRef. + * + * @param DSRef An opaque DPCTLSyclDeviceSelectorRef pointer. + * @param DRef An opaque DPCTLSyclDeviceRef pointer. + * + * @return A integer score. The negative value indicates select rejected the + *device. + * @ingroup DeviceSelectors + */ +DPCTL_API +int DPCTLDeviceSelector_Score(__dpctl_keep DPCTLSyclDeviceSelectorRef DSRef, + __dpctl_keep DPCTLSyclDeviceRef DRef); + DPCTL_C_EXTERN_C_END diff --git a/dpctl-capi/include/dpctl_sycl_enum_types.h b/dpctl-capi/include/dpctl_sycl_enum_types.h index b69b67ea07..177c928eac 100644 --- a/dpctl-capi/include/dpctl_sycl_enum_types.h +++ b/dpctl-capi/include/dpctl_sycl_enum_types.h @@ -130,6 +130,7 @@ enum DPCTLSyclAspectType typedef enum { // clang-format off + DPCTL_DEFAULT_PROPERTY = 0, DPCTL_ENABLE_PROFILING = 1 << 1, DPCTL_IN_ORDER = 1 << 2 // clang-format on diff --git a/dpctl-capi/include/dpctl_sycl_queue_interface.h b/dpctl-capi/include/dpctl_sycl_queue_interface.h index 81e5d562b8..c1a60d7258 100644 --- a/dpctl-capi/include/dpctl_sycl_queue_interface.h +++ b/dpctl-capi/include/dpctl_sycl_queue_interface.h @@ -31,25 +31,98 @@ #include "Support/ExternC.h" #include "Support/MemOwnershipAttrs.h" #include "dpctl_data_types.h" +#include "dpctl_error_handler_type.h" #include "dpctl_sycl_enum_types.h" #include "dpctl_sycl_types.h" DPCTL_C_EXTERN_C_BEGIN +/** + * @defgroup QueueInterface sycl::queue class wrapper functions. + */ + +/*! + * @brief A wrapper for sycl::queue contructor to construct a new queue from the + * provided context, device, async handler and propertis bit flags. + * + * @param CRef An opaque pointer to a sycl::context. + * @param DRef An opaque pointer to a sycl::device + * @param error_handler A callback function that will be invoked by the + * async_handler used during queue creation. Can be + * NULL if no async_handler is needed. + * @param properties A combination of bit flags using the values defined + * in the DPCTLQueuePropertyType enum. The bit flags + * are used to create a sycl::property_list that is + * passed to the SYCL queue constructor. + * @return An opaque DPCTLSyclQueueRef pointer containing the new sycl::queue + * object. A nullptr is returned if the queue could not be created. + * @ingroup QueueInterface + */ +DPCTL_API +__dpctl_give DPCTLSyclQueueRef +DPCTLQueue_Create(__dpctl_keep const DPCTLSyclContextRef CRef, + __dpctl_keep const DPCTLSyclDeviceRef DRef, + error_handler_callback *error_handler, + int properties); + +/*! + * @brief Constructs a sycl::queue object of the specified SYCL device. + * + * Constructs a new SYCL queue for the specified SYCL device. The behaviour of + * this function differs from the SYCL `queue(const device &syclDevice, const + * async_handler &asyncHandler, const property_list &propList = {})` constructor + * of the queue class. Unlike the SYCL queue class constructor, we try not to + * create a new SYCL context for the device and instead look to reuse a + * previously cached SYCL context for the device (refer + * dpctl_sycl_device_manager.cpp). DPCTL caches contexts only for root devices + * and for all custom devices the function begaves the same way as the SYCL + * constructor. + * + * @param dRef An opaque pointer to a sycl::device. + * @param error_handler A callback function that will be invoked by the + * async_handler used during queue creation. Can be + * NULL if no async_handler is needed. + * @param properties A combination of bit flags using the values defined + * in the DPCTLQueuePropertyType enum. The bit flags + * are used to create a sycl::property_list that is + * passed to the SYCL queue constructor. + * @return An opaque DPCTLSyclQueueRef pointer containing the new sycl::queue + * object. A nullptr is returned if the queue could not be created. + * @ingroup QueueInterface + */ +DPCTL_API +__dpctl_give DPCTLSyclQueueRef +DPCTLQueue_CreateForDevice(__dpctl_keep const DPCTLSyclDeviceRef dRef, + error_handler_callback *error_handler, + int properties); + /*! * @brief Delete the pointer after casting it to sycl::queue. * * @param QRef A DPCTLSyclQueueRef pointer that gets deleted. + * @ingroup QueueInterface */ DPCTL_API void DPCTLQueue_Delete(__dpctl_take DPCTLSyclQueueRef QRef); +/*! + * @brief Returns a copy of the DPCTLSyclQueueRef object. + * + * @param DRef DPCTLSyclQueueRef object to be copied. + * @return A new DPCTLSyclQueueRef created by copying the passed in + * DPCTLSyclQueueRef object. + */ +DPCTL_API +__dpctl_give DPCTLSyclQueueRef +DPCTLQueue_Copy(__dpctl_keep const DPCTLSyclQueueRef QRef); + /*! * @brief Checks if two DPCTLSyclQueueRef objects point to the same sycl::queue. * * @param QRef1 First opaque pointer to the sycl queue. * @param QRef2 Second opaque pointer to the sycl queue. * @return True if the underlying sycl::queue are same, false otherwise. + * @ingroup QueueInterface */ DPCTL_API bool DPCTLQueue_AreEq(__dpctl_keep const DPCTLSyclQueueRef QRef1, @@ -61,6 +134,7 @@ bool DPCTLQueue_AreEq(__dpctl_keep const DPCTLSyclQueueRef QRef1, * @param QRef An opaque pointer to the sycl queue. * @return A enum DPCTLSyclBackendType corresponding to the backed for the * queue. + * @ingroup QueueInterface */ DPCTL_API DPCTLSyclBackendType DPCTLQueue_GetBackend(__dpctl_keep DPCTLSyclQueueRef QRef); @@ -70,6 +144,7 @@ DPCTLSyclBackendType DPCTLQueue_GetBackend(__dpctl_keep DPCTLSyclQueueRef QRef); * * @param QRef An opaque pointer to the sycl queue. * @return A DPCTLSyclContextRef pointer to the sycl context for the queue. + * @ingroup QueueInterface */ DPCTL_API __dpctl_give DPCTLSyclContextRef @@ -80,6 +155,7 @@ DPCTLQueue_GetContext(__dpctl_keep const DPCTLSyclQueueRef QRef); * * @param QRef An opaque pointer to the sycl queue. * @return A DPCTLSyclDeviceRef pointer to the sycl device for the queue. + * @ingroup QueueInterface */ DPCTL_API __dpctl_give DPCTLSyclDeviceRef @@ -115,6 +191,7 @@ DPCTLQueue_GetDevice(__dpctl_keep const DPCTLSyclQueueRef QRef); * @param NDepEvents Size of the DepEvents list. * @return An opaque pointer to the sycl::event returned by the * sycl::queue.submit() function. + * @ingroup QueueInterface */ DPCTL_API DPCTLSyclEventRef @@ -162,6 +239,7 @@ DPCTLQueue_SubmitRange(__dpctl_keep const DPCTLSyclKernelRef KRef, * @param NDepEvents Size of the DepEvents list. * @return An opaque pointer to the sycl::event returned by the * sycl::queue.submit() function. + * @ingroup QueueInterface */ DPCTL_API DPCTLSyclEventRef @@ -181,6 +259,7 @@ DPCTLQueue_SubmitNDRange(__dpctl_keep const DPCTLSyclKernelRef KRef, * enqueued tasks in the queue. * * @param QRef Opaque pointer to a sycl::queue. + * @ingroup QueueInterface */ DPCTL_API void DPCTLQueue_Wait(__dpctl_keep const DPCTLSyclQueueRef QRef); @@ -193,6 +272,7 @@ void DPCTLQueue_Wait(__dpctl_keep const DPCTLSyclQueueRef QRef); * @param Dest An USM pointer to the destination memory. * @param Src An USM pointer to the source memory. * @param Count A number of bytes to copy. + * @ingroup QueueInterface */ DPCTL_API void DPCTLQueue_Memcpy(__dpctl_keep const DPCTLSyclQueueRef QRef, @@ -207,6 +287,7 @@ void DPCTLQueue_Memcpy(__dpctl_keep const DPCTLSyclQueueRef QRef, * @param QRef An opaque pointer to the sycl queue. * @param Ptr An USM pointer to memory. * @param Count A number of bytes to prefetch. + * @ingroup QueueInterface */ DPCTL_API void DPCTLQueue_Prefetch(__dpctl_keep DPCTLSyclQueueRef QRef, @@ -223,6 +304,7 @@ void DPCTLQueue_Prefetch(__dpctl_keep DPCTLSyclQueueRef QRef, * @param Advice Device-defined advice for the specified allocation. * A value of 0 reverts the advice for Ptr to the * default behavior. + * @ingroup QueueInterface */ DPCTL_API void DPCTLQueue_MemAdvise(__dpctl_keep DPCTLSyclQueueRef QRef, diff --git a/dpctl-capi/include/dpctl_sycl_queue_manager.h b/dpctl-capi/include/dpctl_sycl_queue_manager.h index 0fee49e41d..7847a43229 100644 --- a/dpctl-capi/include/dpctl_sycl_queue_manager.h +++ b/dpctl-capi/include/dpctl_sycl_queue_manager.h @@ -19,15 +19,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This header declares a C interface to DPCTL's sycl::queue manager to -/// maintain a thread local stack of sycl::queues objects for use inside -/// Python programs. The C interface is designed in a way to not have to -/// include the Sycl headers inside a Python extension module, since that would -/// require the extension to be compiled using dpc++ or another Sycl compiler. -/// Compiling the extension with a compiler different from what was used to -/// compile the Python interpreter can cause run-time problems especially on MS -/// Windows. Additionally, the C interface makes it easier to interoperate with -/// Numba without having to deal with C++ name mangling. +/// This header declares a set of functions to support a concept of current +/// queue for applications using dpCtl. /// //===----------------------------------------------------------------------===// @@ -37,145 +30,107 @@ #include "Support/ExternC.h" #include "Support/MemOwnershipAttrs.h" #include "dpctl_data_types.h" -#include "dpctl_sycl_context_interface.h" -#include "dpctl_sycl_device_interface.h" #include "dpctl_sycl_types.h" DPCTL_C_EXTERN_C_BEGIN -/*! - * @brief Get the sycl::queue object that is currently activated for this - * thread. - * - * @return A copy of the current (top of the stack) sycl::queue is returned - * wrapped inside an opaque DPCTLSyclQueueRef pointer. +/** + * @defgroup QueueManager Queue management helper functions */ -DPCTL_API -__dpctl_give DPCTLSyclQueueRef DPCTLQueueMgr_GetCurrentQueue(); /*! - * @brief Get a sycl::queue object of the specified type and device id. + * @brief Get the current sycl::queue for the thread of execution. * - * @param BETy A valid Sycl backend value. - * @param DeviceTy The type of Sycl device (sycl_device_type) - * @param DNum Device id for the device (defaults to 0) + * DpCtl lets an application access a "current queue" as soon as the application + * loads dpCtl. The initial current queue also termed the global queue is a + * queue created using SYCL's default_selector. The current queue is set per + * thread and can be changed for a specific execution scope using the PushQueue + * and PopQueue functions in this module. The global queue can also be changed + * by using SetGlobalQueue. * - * @return A copy of the sycl::queue corresponding to the device is returned - * wrapped inside a DPCTLSyclDeviceType pointer. A runtime_error exception is - * raised if no such device exists. - */ -DPCTL_API -__dpctl_give DPCTLSyclQueueRef -DPCTLQueueMgr_GetQueue(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum); - -/*! - * @brief Get the number of activated queues not including the global or - * default queue. + * The DPCTLQueueMgr_GetCurrentQueue function returns the current queue in the + * current scope from where the function was called. * - * @return The number of activated queues. + * @return An opaque DPCTLSyclQueueRef pointer wrapping a sycl::queue*. + * @ingroup QueueManager */ DPCTL_API -size_t DPCTLQueueMgr_GetNumActivatedQueues(); +__dpctl_give DPCTLSyclQueueRef DPCTLQueueMgr_GetCurrentQueue(); /*! - * @brief Get the number of available queues for given backend and device type - * combination. + * @brief Returns true if the global queue set for the queue manager is also the + * current queue. * - * @param BETy Type of Sycl backend. - * @param DeviceTy Type of Sycl device. - * @return The number of available queues. + * The default current queue provided by the queue manager is termed as the + * global queue. If DPCTLQueueMgr_PushQueue is used to make another queue the + * current queue, then the global queue no longer remains the current queue till + * all pushed queues are popped using DPCTLQueueMgr_PopQueue. The + * DPCTLQueueMgr_GlobalQueueIsCurrent checks if the global queue is also the + * current queue, i.e., no queues have been pushed and are yet to be popped. + * + * @return True if the global queue is the current queue, else false. + * @ingroup QueueManager */ DPCTL_API -size_t DPCTLQueueMgr_GetNumQueues(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy); +bool DPCTLQueueMgr_GlobalQueueIsCurrent(); /*! - * @brief Returns True if the passed in queue and the current queue are the - * same, else returns False. + * @brief Check if the queue argument is also the current queue. * * @param QRef An opaque pointer to a sycl::queue. - * @return True or False depending on whether the QRef argument is the same as - * the currently activated queue. + * @return True if QRef argument is the the current queue, else False. + * @ingroup QueueManager */ DPCTL_API bool DPCTLQueueMgr_IsCurrentQueue(__dpctl_keep const DPCTLSyclQueueRef QRef); /*! - * @brief Set the default DPCTL queue to the sycl::queue for the given backend - * and device type combination and return a DPCTLSyclQueueRef for that queue. - * If no queue was created Null is returned to caller. + * @brief Resets the global queue using the passed in DPCTLSyclQueueRef the + * previous global queue is deleted. * - * @param BETy Type of Sycl backend. - * @param DeviceTy The type of Sycl device (sycl_device_type) - * @param DNum Device id for the device - * @return A copy of the sycl::queue that was set as the new default queue. If - * no queue could be created then returns Null. + * @param QRef An opaque reference to a sycl::device. + * @ingroup QueueManager */ DPCTL_API -__dpctl_give DPCTLSyclQueueRef -DPCTLQueueMgr_SetAsDefaultQueue(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum); +void DPCTLQueueMgr_SetGlobalQueue(__dpctl_keep const DPCTLSyclQueueRef QRef); /*! - * @brief Pushes a new sycl::queue object to the top of DPCTL's thread-local - * stack of a "activated" queues, and returns a copy of the queue to caller. - * - * The DPCTL queue manager maintains a thread-local stack of sycl::queue objects - * to facilitate nested parallelism. The sycl::queue at the top of the stack is - * termed as the currently activated queue, and is always the one returned by - * DPCTLQueueMgr_GetCurrentQueue(). DPCTLPushSyclQueueToStack creates a new - * sycl::queue corresponding to the specified device and pushes it to the top - * of the stack. A copy of the sycl::queue is returned to the caller wrapped - * inside the opaque DPCTLSyclQueueRef pointer. A runtime_error exception is - * thrown when a new sycl::queue could not be created for the specified device. + * @brief Pushes the passed in sycl::queue object to the queue manager's + * internal stack of queues and makes the queue the current queue. * - * @param BETy Type of Sycl backend. - * @param DeviceTy The type of Sycl device (sycl_device_type) - * @param DNum Device id for the device (defaults to 0) + * The queue manager maintains a thread-local stack of sycl::queue + * objects. The DPCTLQueueMgr_PushQueue() function pushes to the stack and sets + * the passed in DPCTLSyclQueueRef object as the current queue. The + * current queue is the queue returned by the DPCTLQueueMgr_GetCurrentQueue() + * function. * - * @return A copy of the sycl::queue that was pushed to the top of DPCTL's - * stack of sycl::queue objects. Nullptr is returned if no such device exists. + * @param QRef An opaque reference to a syc::queue. + * @ingroup QueueManager */ DPCTL_API -__dpctl_give DPCTLSyclQueueRef -DPCTLQueueMgr_PushQueue(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum); +void DPCTLQueueMgr_PushQueue(__dpctl_keep const DPCTLSyclQueueRef QRef); /*! - * @brief Pops the top of stack element from DPCTL's stack of activated - * sycl::queue objects. - * - * DPCTLPopSyclQueue only removes the reference from the DPCTL stack of - * sycl::queue objects. Any instance of the popped queue that were previously - * acquired by calling DPCTLPushSyclQueue() or DPCTLQueueMgr_GetCurrentQueue() - * needs to be freed separately. In addition, a runtime_error is thrown when - * the stack contains only one sycl::queue, i.e., the default queue. + * @brief Pops the top of stack sycl::queue object from the queue manager's * + * internal stack of queues and makes the next queue in the stack the current + * queue. * + * DPCTLPopSyclQueue removes the top of stack queue and changes the + * current queue. If no queue was previously pushed, then a + * DPCTLQueueMgr_PopQueue call is a no-op. + * @ingroup QueueManager */ DPCTL_API void DPCTLQueueMgr_PopQueue(); /*! - * @brief Creates a new instance of SYCL queue from SYCL context and - * SYCL device. - * - * The instance is not placed into queue manager. The user assumes - * ownership of the queue reference and should deallocate it using - * DPCTLQueue_Delete. - * - * @param CRef Sycl context reference - * @param DRef Sycl device reference + * @brief A helper function meant for unit testing. Returns the current number + * of queues pushed to the queue manager's internal stack of sycl::queue + * objects. * - * @return A copy of the sycl::queue created from given context and device - * references. + * @return The current size of the queue manager's stack of queues. */ DPCTL_API -__dpctl_give DPCTLSyclQueueRef DPCTLQueueMgr_GetQueueFromContextAndDevice( - __dpctl_keep DPCTLSyclContextRef CRef, - __dpctl_keep DPCTLSyclDeviceRef DRef); +size_t DPCTLQueueMgr_GetQueueStackSize(); DPCTL_C_EXTERN_C_END diff --git a/dpctl-capi/source/dpctl_sycl_device_selector_interface.cpp b/dpctl-capi/source/dpctl_sycl_device_selector_interface.cpp index d597386142..64cf1bc261 100644 --- a/dpctl-capi/source/dpctl_sycl_device_selector_interface.cpp +++ b/dpctl-capi/source/dpctl_sycl_device_selector_interface.cpp @@ -33,6 +33,7 @@ namespace { // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(device_selector, DPCTLSyclDeviceSelectorRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(device, DPCTLSyclDeviceRef) } /* end of anonymous namespace */ @@ -121,6 +122,18 @@ __dpctl_give DPCTLSyclDeviceSelectorRef DPCTLHostSelector_Create() } } +int DPCTLDeviceSelector_Score(__dpctl_keep DPCTLSyclDeviceSelectorRef DSRef, + __dpctl_keep DPCTLSyclDeviceRef DRef) +{ + constexpr int REJECT_DEVICE_SCORE = -1; + if (DSRef && DRef) { + auto dev = *(unwrap(DRef)); + return (*unwrap(DSRef))(dev); + } + else + return REJECT_DEVICE_SCORE; +} + void DPCTLDeviceSelector_Delete(__dpctl_take DPCTLSyclDeviceSelectorRef DSRef) { auto Selector = unwrap(DSRef); diff --git a/dpctl-capi/source/dpctl_sycl_queue_interface.cpp b/dpctl-capi/source/dpctl_sycl_queue_interface.cpp index 56fd77286f..1e595b5a19 100644 --- a/dpctl-capi/source/dpctl_sycl_queue_interface.cpp +++ b/dpctl-capi/source/dpctl_sycl_queue_interface.cpp @@ -25,8 +25,11 @@ //===----------------------------------------------------------------------===// #include "dpctl_sycl_queue_interface.h" +#include "../helper/include/dpctl_async_error_handler.h" #include "Support/CBindingWrapping.h" #include "dpctl_sycl_context_interface.h" +#include "dpctl_sycl_device_interface.h" +#include "dpctl_sycl_device_manager.h" #include /* SYCL headers */ #include #include @@ -113,8 +116,138 @@ bool set_kernel_arg(handler &cgh, return arg_set; } +std::unique_ptr create_property_list(int properties) +{ + std::unique_ptr propList; + if (properties & (DPCTL_ENABLE_PROFILING | DPCTL_IN_ORDER)) { + propList = std::make_unique( + sycl::property::queue::enable_profiling(), + sycl::property::queue::in_order()); + } + else if (properties & DPCTL_ENABLE_PROFILING) { + propList = std::make_unique( + sycl::property::queue::enable_profiling()); + } + else if (properties & DPCTL_IN_ORDER) { + propList = + std::make_unique(sycl::property::queue::in_order()); + } + + return propList; +} + +__dpctl_give DPCTLSyclQueueRef +getQueueImpl(__dpctl_take DPCTLSyclContextRef cRef, + __dpctl_take DPCTLSyclDeviceRef dRef, + error_handler_callback *handler, + int properties) +{ + DPCTLSyclQueueRef qRef = nullptr; + qRef = DPCTLQueue_Create(cRef, dRef, handler, properties); + DPCTLContext_Delete(cRef); + DPCTLDevice_Delete(dRef); + + return qRef; +} + } /* end of anonymous namespace */ +DPCTL_API +__dpctl_give DPCTLSyclQueueRef +DPCTLQueue_Create(__dpctl_keep const DPCTLSyclContextRef CRef, + __dpctl_keep const DPCTLSyclDeviceRef DRef, + error_handler_callback *error_handler, + int properties) +{ + DPCTLSyclQueueRef q = nullptr; + auto dev = unwrap(DRef); + auto ctx = unwrap(CRef); + + if (!(dev && ctx)) { + /* \todo handle error */ + return q; + } + auto propList = create_property_list(properties); + + if (propList && error_handler) { + try { + auto Queue = new queue( + *ctx, *dev, DPCTL_AsyncErrorHandler(error_handler), *propList); + q = wrap(Queue); + } catch (std::bad_alloc const &ba) { + std::cerr << ba.what() << '\n'; + } catch (runtime_error &re) { + std::cerr << re.what() << '\n'; + } + } + else if (properties) { + try { + auto Queue = new queue(*ctx, *dev, *propList); + q = wrap(Queue); + } catch (std::bad_alloc const &ba) { + std::cerr << ba.what() << '\n'; + } catch (runtime_error &re) { + std::cerr << re.what() << '\n'; + } + } + else if (error_handler) { + try { + auto Queue = + new queue(*ctx, *dev, DPCTL_AsyncErrorHandler(error_handler)); + q = wrap(Queue); + } catch (std::bad_alloc const &ba) { + std::cerr << ba.what() << '\n'; + } catch (runtime_error &re) { + std::cerr << re.what() << '\n'; + } + } + else { + try { + auto Queue = new queue(*ctx, *dev); + q = wrap(Queue); + } catch (std::bad_alloc const &ba) { + std::cerr << ba.what() << '\n'; + } catch (runtime_error &re) { + std::cerr << re.what() << '\n'; + } + } + + return q; +} + +__dpctl_give DPCTLSyclQueueRef +DPCTLQueue_CreateForDevice(__dpctl_keep const DPCTLSyclDeviceRef dRef, + error_handler_callback *handler, + int properties) +{ + DPCTLSyclQueueRef qRef = nullptr; + auto Device = unwrap(dRef); + + if (!Device) { + std::cerr << "Cannot create queue from NULL device reference.\n"; + return qRef; + } + auto cached = DPCTLDeviceMgr_GetDeviceAndContextPair(dRef); + if (cached.CRef) { + qRef = getQueueImpl(cached.CRef, cached.DRef, handler, properties); + } + // We only cache contexts for root devices. If the dRef argument points to + // a sub-device, then the queue manager allocates a new context and creates + // a new queue to retrun to caller. Note that any context for a sub-device + // is not cached. + else { + try { + auto CRef = wrap(new context(*Device)); + auto DRef_copy = wrap(new device(*Device)); + qRef = getQueueImpl(CRef, DRef_copy, handler, properties); + } catch (std::bad_alloc const &ba) { + std::cerr << ba.what() << std::endl; + } + } + + return qRef; +} + /*! * Delete the passed in pointer after verifying it points to a sycl::queue. */ @@ -123,6 +256,29 @@ void DPCTLQueue_Delete(__dpctl_take DPCTLSyclQueueRef QRef) delete unwrap(QRef); } +/*! + * Make copy of sycl::queue referenced by passed pointer + */ +__dpctl_give DPCTLSyclQueueRef +DPCTLQueue_Copy(__dpctl_keep const DPCTLSyclQueueRef QRef) +{ + auto Queue = unwrap(QRef); + if (Queue) { + try { + auto CopiedQueue = new queue(*Queue); + return wrap(CopiedQueue); + } catch (std::bad_alloc &ba) { + std::cerr << ba.what() << std::endl; + return nullptr; + } + } + else { + std::cerr << "Can not copy DPCTLSyclQueueRef as input is a nullptr" + << std::endl; + return nullptr; + } +} + bool DPCTLQueue_AreEq(__dpctl_keep const DPCTLSyclQueueRef QRef1, __dpctl_keep const DPCTLSyclQueueRef QRef2) { diff --git a/dpctl-capi/source/dpctl_sycl_queue_manager.cpp b/dpctl-capi/source/dpctl_sycl_queue_manager.cpp index 1c63a3eeac..60eb7bb498 100644 --- a/dpctl-capi/source/dpctl_sycl_queue_manager.cpp +++ b/dpctl-capi/source/dpctl_sycl_queue_manager.cpp @@ -25,8 +25,8 @@ //===----------------------------------------------------------------------===// #include "dpctl_sycl_queue_manager.h" #include "Support/CBindingWrapping.h" +#include "dpctl_sycl_device_manager.h" #include /* SYCL headers */ -#include #include using namespace cl::sycl; @@ -36,503 +36,147 @@ using namespace cl::sycl; // Anonymous namespace for private helpers namespace { - // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(queue, DPCTLSyclQueueRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(device, DPCTLSyclDeviceRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(context, DPCTLSyclContextRef) -/*! - * @brief A helper class to support the DPCTLSyclQueuemanager. - * - * The QMgrHelper is needed so that sycl headers are not exposed at the - * top-level DPCTL API. - * - */ -class QMgrHelper +struct QueueManager { -public: - using QVec = vector_class; - - static QVec *init_queues(backend BE, info::device_type DTy) + using QueueStack = vector_class; + static QueueStack &getQueueStack() { - QVec *queues = new QVec(); - auto Platforms = platform::get_platforms(); - for (auto &p : Platforms) { - if (p.is_host()) - continue; - auto be = p.get_backend(); - auto Devices = p.get_devices(); - - if (Devices.size() == 1) { - auto d = Devices[0]; - auto devty = d.get_info(); - if (devty == DTy && be == BE) { - auto Ctx = context(d); - queues->emplace_back(Ctx, d); - break; + thread_local static QueueStack *activeQueues = new QueueStack([] { + QueueStack qs; + auto DS = default_selector(); + try { + auto DRef = wrap(new device(DS.select_device())); + auto cached = DPCTLDeviceMgr_GetDeviceAndContextPair(DRef); + if (cached.CRef) { + qs.emplace_back(*unwrap(cached.CRef), *unwrap(cached.DRef)); } - } - else { - vector_class SelectedDevices; - for (auto &d : Devices) { - auto devty = d.get_info(); - if (devty == DTy && be == BE) { - SelectedDevices.push_back(d); - } - } - if (SelectedDevices.size() > 0) { - auto Ctx = context(SelectedDevices); - auto d = SelectedDevices[0]; - queues->emplace_back(Ctx, d); + else { + std::cerr << "Fatal Error: No cached context for default " + "device.\n"; + std::terminate(); } + delete unwrap(DRef); + delete unwrap(cached.DRef); + delete unwrap(cached.CRef); + } catch (std::bad_alloc const &ba) { + std::cerr << ba.what() << '\n'; } - } - return queues; - } - static QVec *init_active_queues() - { - QVec *active_queues; - try { - auto def_device{default_selector().select_device()}; - auto BE = def_device.get_platform().get_backend(); - auto DevTy = def_device.get_info(); + return qs; + }()); - // \todo : We need to have a better way to match the default device - // to what SYCL returns based on the same scoring logic. Just - // storing the first device is not correct when we will have - // multiple devices of same type. - if (BE == backend::opencl && DevTy == info::device_type::cpu) { - active_queues = new QVec({get_opencl_cpu_queues()[0]}); - } - else if (BE == backend::opencl && DevTy == info::device_type::gpu) { - active_queues = new QVec({get_opencl_gpu_queues()[0]}); - } - else if (BE == backend::level_zero && - DevTy == info::device_type::gpu) { - active_queues = new QVec({get_level0_gpu_queues()[0]}); - } - else { - active_queues = new QVec(); - } - } catch (runtime_error &re) { - // \todo Handle the error - active_queues = new QVec(); - } - - return active_queues; + return *activeQueues; } +}; - static QVec &get_opencl_cpu_queues() - { - static QVec *queues = - init_queues(backend::opencl, info::device_type::cpu); - return *queues; - } - - static QVec &get_opencl_gpu_queues() - { - static QVec *queues = - init_queues(backend::opencl, info::device_type::gpu); - return *queues; - } - - static QVec get_level0_gpu_queues() - { - static QVec *queues = - init_queues(backend::level_zero, info::device_type::gpu); - return *queues; - } - - static QVec &get_active_queues() - { - thread_local static QVec *active_queues = init_active_queues(); - return *active_queues; - } - - static __dpctl_give DPCTLSyclQueueRef getQueue(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum); - - static __dpctl_give DPCTLSyclQueueRef getCurrentQueue(); - - static bool isCurrentQueue(__dpctl_keep const DPCTLSyclQueueRef QRef); - - static __dpctl_give DPCTLSyclQueueRef - setAsDefaultQueue(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum); - - static __dpctl_give DPCTLSyclQueueRef - pushSyclQueue(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum); +} /* end of anonymous namespace */ - static void popSyclQueue(); -}; +//----------------------------- Public API -----------------------------------// -/*! - * Allocates a new copy of the present top of stack queue, which can be the - * default queue and returns to caller. The caller owns the pointer and is - * responsible for deallocating it. The helper function DPCTLQueue_Delete should - * be used for that purpose. - */ -DPCTLSyclQueueRef QMgrHelper::getCurrentQueue() +// If there are any queues in the QueueStack except the global queue return +// true, else return false. +bool DPCTLQueueMgr_GlobalQueueIsCurrent() { - auto &activated_q = get_active_queues(); - if (activated_q.empty()) { + auto &qs = QueueManager::getQueueStack(); + if (qs.empty()) { // \todo handle error - std::cerr << "No currently active queues.\n"; - return nullptr; + std::cerr << "Error: No global queue found.\n"; + return false; } - auto last = activated_q.size() - 1; - return wrap(new queue(activated_q[last])); + // The first entry of the QueueStack is always the global queue. If there + // are any more queues in the QueueStack, that indicates that the global + // queue is not the current queue. + return (qs.size() - 1) ? false : true; } /*! - * Allocates a sycl::queue by copying from the cached {cpu|gpu}_queues vector - * and returns it to the caller. The caller owns the pointer and is responsible - * for deallocating it. The helper function DPCTLQueue_Delete should + * Allocates a new copy of the current queue. The caller owns the pointer and is + * responsible for deallocating it. The helper function DPCTLQueue_Delete should * be used for that purpose. */ -__dpctl_give DPCTLSyclQueueRef -QMgrHelper::getQueue(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum) +DPCTLSyclQueueRef DPCTLQueueMgr_GetCurrentQueue() { - queue *QRef = nullptr; - - switch (BETy | DeviceTy) { - case DPCTLSyclBackendType::DPCTL_OPENCL | DPCTLSyclDeviceType::DPCTL_CPU: - { - auto cpuQs = get_opencl_cpu_queues(); - if (DNum >= cpuQs.size()) { - // \todo handle error - std::cerr << "OpenCL CPU device " << DNum - << " not found on system.\n"; - return nullptr; - } - QRef = new queue(cpuQs[DNum]); - break; - } - case DPCTLSyclBackendType::DPCTL_OPENCL | DPCTLSyclDeviceType::DPCTL_GPU: - { - auto gpuQs = get_opencl_gpu_queues(); - if (DNum >= gpuQs.size()) { - // \todo handle error - std::cerr << "OpenCL GPU device " << DNum - << " not found on system.\n"; - return nullptr; - } - QRef = new queue(gpuQs[DNum]); - break; - } - case DPCTLSyclBackendType::DPCTL_LEVEL_ZERO | - DPCTLSyclDeviceType::DPCTL_GPU: - { - auto l0GpuQs = get_level0_gpu_queues(); - if (DNum >= l0GpuQs.size()) { - // \todo handle error - std::cerr << "Level-0 GPU device " << DNum - << " not found on system.\n"; - return nullptr; - } - QRef = new queue(l0GpuQs[DNum]); - break; - } - default: - std::cerr << "Unsupported device type.\n"; + auto &qs = QueueManager::getQueueStack(); + if (qs.empty()) { + // \todo handle error + std::cerr << "No currently active queues.\n"; return nullptr; } - - return wrap(QRef); + auto last = qs.size() - 1; + return wrap(new queue(qs[last])); } -/*! - * Compares the context and device of the current queue to the context and - * device of the queue passed as input. Return true if both queues have the - * same context and device. - */ -bool QMgrHelper::isCurrentQueue(__dpctl_keep const DPCTLSyclQueueRef QRef) +// Relies on sycl::queue class' operator= to check for equivalent of queues. +bool DPCTLQueueMgr_IsCurrentQueue(__dpctl_keep const DPCTLSyclQueueRef QRef) { - auto &activated_q = get_active_queues(); - if (activated_q.empty()) { + auto &qs = QueueManager::getQueueStack(); + if (qs.empty()) { // \todo handle error std::cerr << "No currently active queues.\n"; return false; } - auto last = activated_q.size() - 1; - auto currQ = activated_q[last]; + auto last = qs.size() - 1; + auto currQ = qs[last]; return (*unwrap(QRef) == currQ); } -/*! - * Changes the first entry into the stack, i.e., the default queue to a new - * sycl::queue corresponding to the device type and device number. - */ -__dpctl_give DPCTLSyclQueueRef -QMgrHelper::setAsDefaultQueue(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum) +// The function sets the global queue, i.e., the sycl::queue object at +// getQueueStack()[0] to the passed in sycl::queue. +void DPCTLQueueMgr_SetGlobalQueue(__dpctl_keep const DPCTLSyclQueueRef qRef) { - queue *QRef = nullptr; - auto &activeQ = get_active_queues(); - if (activeQ.empty()) { - std::cerr << "active queue vector is corrupted.\n"; - return nullptr; - } - - switch (BETy | DeviceTy) { - case DPCTLSyclBackendType::DPCTL_OPENCL | DPCTLSyclDeviceType::DPCTL_CPU: - { - auto oclcpu_q = get_opencl_cpu_queues(); - if (DNum >= oclcpu_q.size()) { - // \todo handle error - std::cerr << "OpenCL CPU device " << DNum - << " not found on system\n."; - return nullptr; - } - activeQ[0] = oclcpu_q[DNum]; - break; - } - case DPCTLSyclBackendType::DPCTL_OPENCL | DPCTLSyclDeviceType::DPCTL_GPU: - { - auto oclgpu_q = get_opencl_gpu_queues(); - if (DNum >= oclgpu_q.size()) { - // \todo handle error - std::cerr << "OpenCL GPU device " << DNum - << " not found on system\n."; - return nullptr; - } - activeQ[0] = oclgpu_q[DNum]; - break; - } - case DPCTLSyclBackendType::DPCTL_LEVEL_ZERO | - DPCTLSyclDeviceType::DPCTL_GPU: - { - auto l0gpu_q = get_level0_gpu_queues(); - if (DNum >= l0gpu_q.size()) { - // \todo handle error - std::cerr << "Level-0 GPU device " << DNum - << " not found on system\n."; - return nullptr; - } - activeQ[0] = l0gpu_q[DNum]; - break; - } - default: - { - std::cerr << "Unsupported device type.\n"; - return nullptr; + auto &qs = QueueManager::getQueueStack(); + if (qRef) { + qs[0] = *unwrap(qRef); } + else { + // TODO: This should be an error and we should not fail silently. + std::cerr << "Error: Failed to set the global queue.\n"; } - - QRef = new queue(activeQ[0]); - return wrap(QRef); } -/*! - * Allocates a new sycl::queue by copying from the cached {cpu|gpu}_queues - * vector. The pointer returned is now owned by the caller and must be properly - * cleaned up. The helper function DPCTLDeleteSyclQueue() can be used is for - * that purpose. - */ -__dpctl_give DPCTLSyclQueueRef -QMgrHelper::pushSyclQueue(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum) +// Push the passed in queue to the QueueStack +void DPCTLQueueMgr_PushQueue(__dpctl_keep const DPCTLSyclQueueRef qRef) { - queue *QRef = nullptr; - auto &activeQ = get_active_queues(); - if (activeQ.empty()) { - std::cerr << "Why is there no previous global context?\n"; - return nullptr; - } - - switch (BETy | DeviceTy) { - case DPCTLSyclBackendType::DPCTL_OPENCL | DPCTLSyclDeviceType::DPCTL_CPU: - { - if (DNum >= get_opencl_cpu_queues().size()) { - // \todo handle error - std::cerr << "OpenCL CPU device " << DNum - << " not found on system\n."; - return nullptr; - } - activeQ.emplace_back(get_opencl_cpu_queues()[DNum]); - QRef = new queue(activeQ[activeQ.size() - 1]); - break; - } - case DPCTLSyclBackendType::DPCTL_OPENCL | DPCTLSyclDeviceType::DPCTL_GPU: - { - if (DNum >= get_opencl_gpu_queues().size()) { - // \todo handle error - std::cerr << "OpenCL GPU device " << DNum - << " not found on system\n."; - return nullptr; - } - activeQ.emplace_back(get_opencl_gpu_queues()[DNum]); - QRef = new queue(activeQ[get_active_queues().size() - 1]); - break; - } - case DPCTLSyclBackendType::DPCTL_LEVEL_ZERO | - DPCTLSyclDeviceType::DPCTL_GPU: - { - if (DNum >= get_level0_gpu_queues().size()) { - // \todo handle error - std::cerr << "Level-0 GPU device " << DNum - << " not found on system\n."; - return nullptr; - } - activeQ.emplace_back(get_level0_gpu_queues()[DNum]); - QRef = new queue(activeQ[get_active_queues().size() - 1]); - break; - } - default: - { - std::cerr << "Unsupported device type.\n"; - return nullptr; + auto &qs = QueueManager::getQueueStack(); + if (qRef) { + qs.emplace_back(*unwrap(qRef)); } + else { + // TODO: This should be an error and we should not fail silently. + std::cerr << "Error: Failed to set the current queue.\n"; } - - return wrap(QRef); } -/*! - * If there were any sycl::queue that were activated and added to the stack of - * activated queues then the top of the stack entry is popped. Note that since - * the same std::vector is used to keep track of the activated queues and the - * global queue a popSyclQueue call can never make the stack empty. Even - * after all activated queues are popped, the global queue is still available as - * the first element added to the stack. - */ -void QMgrHelper::popSyclQueue() +// Pop's a previously pushed queue from the QueueStack. Note that since the +// global queue is always stored at getQueueStack()[0] we check that the size of +// the QueueStack is >=1 before popping. +void DPCTLQueueMgr_PopQueue() { - // The first queue which is the "default" queue can not be removed. - if (get_active_queues().size() <= 1) { - std::cerr << "No active contexts.\n"; + auto &qs = QueueManager::getQueueStack(); + // The first entry in the QueueStack is the global queue, and should not be + // removed. + if (qs.size() <= 1) { + std::cerr << "No queue to pop.\n"; return; } - get_active_queues().pop_back(); + qs.pop_back(); } -} /* end of anonymous namespace */ - -//----------------------------- Public API -----------------------------------// - -/*! - * Returns inside the number of activated queues not including the global queue - * (QMgrHelper::active_queues[0]). - */ -size_t DPCTLQueueMgr_GetNumActivatedQueues() +size_t DPCTLQueueMgr_GetQueueStackSize() { - if (QMgrHelper::get_active_queues().empty()) { - // \todo handle error - std::cerr << "No active contexts.\n"; - return 0; - } - return QMgrHelper::get_active_queues().size() - 1; -} - -/*! - * Returns the number of available queues for a specific backend and device - * type combination. - */ -size_t DPCTLQueueMgr_GetNumQueues(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy) -{ - switch (BETy | DeviceTy) { - case DPCTLSyclBackendType::DPCTL_OPENCL | DPCTLSyclDeviceType::DPCTL_CPU: - { - return QMgrHelper::get_opencl_cpu_queues().size(); - } - case DPCTLSyclBackendType::DPCTL_OPENCL | DPCTLSyclDeviceType::DPCTL_GPU: - { - return QMgrHelper::get_opencl_gpu_queues().size(); - } - case DPCTLSyclBackendType::DPCTL_LEVEL_ZERO | - DPCTLSyclDeviceType::DPCTL_GPU: - { - return QMgrHelper::get_level0_gpu_queues().size(); - } - default: - { + auto &qs = QueueManager::getQueueStack(); + if (qs.empty()) { // \todo handle error - std::cerr << "Unsupported device type.\n"; - return 0; - } + std::cerr << "Error: No global queue found.\n"; + return -1; } -} - -/*! - * \see QMgrHelper::getCurrentQueue() - */ -DPCTLSyclQueueRef DPCTLQueueMgr_GetCurrentQueue() -{ - return QMgrHelper::getCurrentQueue(); -} - -/*! - * Returns a copy of a sycl::queue corresponding to the specified device type - * and device number. A runtime_error gets thrown if no such device exists. - */ -DPCTLSyclQueueRef DPCTLQueueMgr_GetQueue(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum) -{ - return QMgrHelper::getQueue(BETy, DeviceTy, DNum); -} - -/*! - -* */ -bool DPCTLQueueMgr_IsCurrentQueue(__dpctl_keep const DPCTLSyclQueueRef QRef) -{ - return QMgrHelper::isCurrentQueue(QRef); -} -/*! - * The function sets the global queue, i.e., the sycl::queue object at - * QMgrHelper::active_queues[0] vector to the sycl::queue corresponding to the - * specified device type and id. If not queue was found for the backend and - * device, Null is returned. - */ -__dpctl_give DPCTLSyclQueueRef -DPCTLQueueMgr_SetAsDefaultQueue(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum) -{ - return QMgrHelper::setAsDefaultQueue(BETy, DeviceTy, DNum); -} - -/*! - * \see QMgrHelper::pushSyclQueue() - */ -__dpctl_give DPCTLSyclQueueRef -DPCTLQueueMgr_PushQueue(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum) -{ - return QMgrHelper::pushSyclQueue(BETy, DeviceTy, DNum); -} - -/*! - * \see QMgrHelper::popSyclQueue() - */ -void DPCTLQueueMgr_PopQueue() -{ - QMgrHelper::popSyclQueue(); -} - -/*! - * The function constructs a new SYCL queue instance from SYCL conext and - * SYCL device. - */ -DPCTLSyclQueueRef DPCTLQueueMgr_GetQueueFromContextAndDevice( - __dpctl_keep DPCTLSyclContextRef CRef, - __dpctl_keep DPCTLSyclDeviceRef DRef) -{ - auto dev = unwrap(DRef); - auto ctx = unwrap(CRef); - - return wrap(new queue(*ctx, *dev)); + // The first entry of the QueueStack is always the global queue. If there + // are any more queues in the QueueStack, that indicates that the global + // queue is not the current queue. + return (qs.size() - 1); } diff --git a/dpctl-capi/tests/test_sycl_device_interface.cpp b/dpctl-capi/tests/test_sycl_device_interface.cpp index 97483d1ce6..a25f35d926 100644 --- a/dpctl-capi/tests/test_sycl_device_interface.cpp +++ b/dpctl-capi/tests/test_sycl_device_interface.cpp @@ -58,6 +58,19 @@ struct TestDPCTLSyclDeviceInterface } }; +TEST_P(TestDPCTLSyclDeviceInterface, Chk_Copy) +{ + DPCTLSyclDeviceRef DRef = nullptr; + DPCTLSyclDeviceRef Copied_DRef = nullptr; + EXPECT_NO_FATAL_FAILURE(DRef = DPCTLDevice_CreateFromSelector(DSRef)); + if (!DRef) + GTEST_SKIP_("Device not found"); + EXPECT_NO_FATAL_FAILURE(Copied_DRef = DPCTLDevice_Copy(DRef)); + EXPECT_TRUE(bool(Copied_DRef)); + EXPECT_NO_FATAL_FAILURE(DPCTLDevice_Delete(Copied_DRef)); + EXPECT_NO_FATAL_FAILURE(DPCTLDevice_Delete(DRef)); +} + TEST_P(TestDPCTLSyclDeviceInterface, Chk_GetBackend) { DPCTLSyclDeviceRef DRef = nullptr; diff --git a/dpctl-capi/tests/test_sycl_device_selector_interface.cpp b/dpctl-capi/tests/test_sycl_device_selector_interface.cpp index 2225c4378a..862845c804 100644 --- a/dpctl-capi/tests/test_sycl_device_selector_interface.cpp +++ b/dpctl-capi/tests/test_sycl_device_selector_interface.cpp @@ -182,6 +182,26 @@ TEST_P(TestUnsupportedFilters, Chk_DPCTLFilterSelector_Create) EXPECT_NO_FATAL_FAILURE(DPCTLDevice_Delete(DRef)); } +TEST_F(TestDeviceSelectorInterface, Chk_DPCTLGPUSelector_Score) +{ + DPCTLSyclDeviceSelectorRef DSRef_GPU = nullptr; + DPCTLSyclDeviceSelectorRef DSRef_CPU = nullptr; + EXPECT_NO_FATAL_FAILURE(DSRef_GPU = DPCTLGPUSelector_Create()); + EXPECT_NO_FATAL_FAILURE(DSRef_CPU = DPCTLCPUSelector_Create()); + if (DSRef_CPU && DSRef_GPU) { + DPCTLSyclDeviceRef DRef = nullptr; + EXPECT_NO_FATAL_FAILURE(DRef = + DPCTLDevice_CreateFromSelector(DSRef_CPU)); + ASSERT_TRUE(DRef != nullptr); + EXPECT_TRUE(DPCTLDevice_IsCPU(DRef)); + EXPECT_TRUE(DPCTLDeviceSelector_Score(DSRef_GPU, DRef) < 0); + EXPECT_TRUE(DPCTLDeviceSelector_Score(DSRef_CPU, DRef) > 0); + EXPECT_NO_FATAL_FAILURE(DPCTLDevice_Delete(DRef)); + } + EXPECT_NO_FATAL_FAILURE(DPCTLDeviceSelector_Delete(DSRef_GPU)); + EXPECT_NO_FATAL_FAILURE(DPCTLDeviceSelector_Delete(DSRef_CPU)); +} + INSTANTIATE_TEST_SUITE_P(FilterSelectorCreation, TestFilterSelector, ::testing::Values("opencl", diff --git a/dpctl-capi/tests/test_sycl_kernel_interface.cpp b/dpctl-capi/tests/test_sycl_kernel_interface.cpp index 01d26cdae5..4fb38f6f2e 100644 --- a/dpctl-capi/tests/test_sycl_kernel_interface.cpp +++ b/dpctl-capi/tests/test_sycl_kernel_interface.cpp @@ -25,6 +25,8 @@ //===----------------------------------------------------------------------===// #include "dpctl_sycl_context_interface.h" +#include "dpctl_sycl_device_interface.h" +#include "dpctl_sycl_device_selector_interface.h" #include "dpctl_sycl_kernel_interface.h" #include "dpctl_sycl_program_interface.h" #include "dpctl_sycl_queue_interface.h" @@ -36,7 +38,10 @@ using namespace cl::sycl; -struct TestDPCTLSyclKernelInterface : public ::testing::Test +namespace +{ +struct TestDPCTLSyclKernelInterface + : public ::testing::TestWithParam { const char *CLProgramStr = R"CLC( kernel void add(global int* a, global int* b, global int* c) { @@ -50,20 +55,35 @@ struct TestDPCTLSyclKernelInterface : public ::testing::Test } )CLC"; const char *CompileOpts = "-cl-fast-relaxed-math"; + DPCTLSyclDeviceSelectorRef DSRef = nullptr; + DPCTLSyclDeviceRef DRef = nullptr; - size_t nOpenCLGpuQ = 0; TestDPCTLSyclKernelInterface() - : nOpenCLGpuQ(DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_GPU)) { + DSRef = DPCTLFilterSelector_Create(GetParam()); + DRef = DPCTLDevice_CreateFromSelector(DSRef); + } + + ~TestDPCTLSyclKernelInterface() + { + DPCTLDeviceSelector_Delete(DSRef); + DPCTLDevice_Delete(DRef); + } + + void SetUp() + { + if (!DRef) { + auto message = "Skipping as no device of type " + + std::string(GetParam()) + "."; + GTEST_SKIP_(message.c_str()); + } } }; +} // namespace -TEST_F(TestDPCTLSyclKernelInterface, CheckGetFunctionName) +TEST_P(TestDPCTLSyclKernelInterface, CheckGetFunctionName) { - if (!nOpenCLGpuQ) - GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); - - auto QueueRef = DPCTLQueueMgr_GetQueue(DPCTL_OPENCL, DPCTL_GPU, 0); + auto QueueRef = DPCTLQueue_CreateForDevice(DRef, nullptr, 0); auto CtxRef = DPCTLQueue_GetContext(QueueRef); auto PRef = DPCTLProgram_CreateFromOCLSource(CtxRef, CLProgramStr, CompileOpts); @@ -86,12 +106,9 @@ TEST_F(TestDPCTLSyclKernelInterface, CheckGetFunctionName) DPCTLKernel_Delete(AxpyKernel); } -TEST_F(TestDPCTLSyclKernelInterface, CheckGetNumArgs) +TEST_P(TestDPCTLSyclKernelInterface, CheckGetNumArgs) { - if (!nOpenCLGpuQ) - GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); - - auto QueueRef = DPCTLQueueMgr_GetQueue(DPCTL_OPENCL, DPCTL_GPU, 0); + auto QueueRef = DPCTLQueue_CreateForDevice(DRef, nullptr, 0); auto CtxRef = DPCTLQueue_GetContext(QueueRef); auto PRef = DPCTLProgram_CreateFromOCLSource(CtxRef, CLProgramStr, CompileOpts); @@ -107,3 +124,7 @@ TEST_F(TestDPCTLSyclKernelInterface, CheckGetNumArgs) DPCTLKernel_Delete(AddKernel); DPCTLKernel_Delete(AxpyKernel); } + +INSTANTIATE_TEST_SUITE_P(TestKernelInterfaceFunctions, + TestDPCTLSyclKernelInterface, + ::testing::Values("opencl:gpu:0", "opencl:cpu:0")); diff --git a/dpctl-capi/tests/test_sycl_program_interface.cpp b/dpctl-capi/tests/test_sycl_program_interface.cpp index eb089f1c61..77972d59f1 100644 --- a/dpctl-capi/tests/test_sycl_program_interface.cpp +++ b/dpctl-capi/tests/test_sycl_program_interface.cpp @@ -1,4 +1,4 @@ -//===-- test_sycl_program_interface.cpp - Test cases for program interface ===// +//===-- test_sycl_program_interface.cpp - Test cases for module interface -===// // // Data Parallel Control (dpCtl) // @@ -20,12 +20,15 @@ /// /// \file /// This file has unit test cases for functions defined in -/// dpctl_sycl_program_interface.h. +/// dpctl_sycl_module_interface.h. /// //===----------------------------------------------------------------------===// #include "Config/dpctl_config.h" #include "dpctl_sycl_context_interface.h" +#include "dpctl_sycl_device_interface.h" +#include "dpctl_sycl_device_manager.h" +#include "dpctl_sycl_device_selector_interface.h" #include "dpctl_sycl_kernel_interface.h" #include "dpctl_sycl_program_interface.h" #include "dpctl_sycl_queue_interface.h" @@ -132,7 +135,7 @@ struct TestDPCTLSyclProgramInterface : public ::testing::Test : spirvFile{"./multi_kernel.spv", std::ios::binary | std::ios::ate}, spirvFileSize(std::filesystem::file_size("./multi_kernel.spv")), spirvBuffer(spirvFileSize), - nOpenCLGpuQ(DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_GPU)) + nOpenCLGpuQ(DPCTLDeviceMgr_GetNumDevices(DPCTL_OPENCL | DPCTL_GPU)) { spirvFile.seekg(0, std::ios::beg); spirvFile.read(spirvBuffer.data(), spirvFileSize); @@ -148,8 +151,9 @@ TEST_F(TestDPCTLSyclProgramInterface, CheckCreateFromOCLSource) { if (!nOpenCLGpuQ) GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); - - auto QueueRef = DPCTLQueueMgr_GetQueue(DPCTL_OPENCL, DPCTL_GPU, 0); + auto FSRef = DPCTLFilterSelector_Create("opencl:gpu:0"); + auto DRef = DPCTLDevice_CreateFromSelector(FSRef); + auto QueueRef = DPCTLQueue_CreateForDevice(DRef, nullptr, 0); auto CtxRef = DPCTLQueue_GetContext(QueueRef); auto PRef = DPCTLProgram_CreateFromOCLSource(CtxRef, CLProgramStr, CompileOpts); @@ -160,14 +164,17 @@ TEST_F(TestDPCTLSyclProgramInterface, CheckCreateFromOCLSource) DPCTLQueue_Delete(QueueRef); DPCTLContext_Delete(CtxRef); DPCTLProgram_Delete(PRef); + DPCTLDeviceSelector_Delete(FSRef); + DPCTLDevice_Delete(DRef); } TEST_F(TestDPCTLSyclProgramInterface, CheckCreateFromSpirvOCL) { if (!nOpenCLGpuQ) GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); - - auto QueueRef = DPCTLQueueMgr_GetQueue(DPCTL_OPENCL, DPCTL_GPU, 0); + auto FSRef = DPCTLFilterSelector_Create("opencl:gpu:0"); + auto DRef = DPCTLDevice_CreateFromSelector(FSRef); + auto QueueRef = DPCTLQueue_CreateForDevice(DRef, nullptr, 0); auto CtxRef = DPCTLQueue_GetContext(QueueRef); auto PRef = DPCTLProgram_CreateFromSpirv(CtxRef, spirvBuffer.data(), spirvFileSize, nullptr); @@ -178,16 +185,19 @@ TEST_F(TestDPCTLSyclProgramInterface, CheckCreateFromSpirvOCL) DPCTLQueue_Delete(QueueRef); DPCTLContext_Delete(CtxRef); DPCTLProgram_Delete(PRef); + DPCTLDeviceSelector_Delete(FSRef); + DPCTLDevice_Delete(DRef); } #ifdef DPCTL_ENABLE_LO_PROGRAM_CREATION TEST_F(TestDPCTLSyclProgramInterface, CheckCreateFromSpirvL0) { - auto nL0GpuQ = DPCTLQueueMgr_GetNumQueues(DPCTL_LEVEL_ZERO, DPCTL_GPU); + auto nL0GpuQ = DPCTLDeviceMgr_GetNumDevices(DPCTL_LEVEL_ZERO | DPCTL_GPU); if (!nL0GpuQ) GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); - - auto QueueRef = DPCTLQueueMgr_GetQueue(DPCTL_LEVEL_ZERO, DPCTL_GPU, 0); + auto FSRef = DPCTLFilterSelector_Create("level_zero:gpu:0"); + auto DRef = DPCTLDevice_CreateFromSelector(FSRef); + auto QueueRef = DPCTLQueue_CreateForDevice(DRef, nullptr, 0); auto CtxRef = DPCTLQueue_GetContext(QueueRef); auto PRef = DPCTLProgram_CreateFromSpirv(CtxRef, spirvBuffer.data(), spirvFileSize, nullptr); @@ -198,6 +208,8 @@ TEST_F(TestDPCTLSyclProgramInterface, CheckCreateFromSpirvL0) DPCTLQueue_Delete(QueueRef); DPCTLContext_Delete(CtxRef); DPCTLProgram_Delete(PRef); + DPCTLDeviceSelector_Delete(FSRef); + DPCTLDevice_Delete(DRef); } #endif @@ -205,8 +217,9 @@ TEST_F(TestDPCTLSyclProgramInterface, CheckGetKernelOCLSource) { if (!nOpenCLGpuQ) GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); - - auto QueueRef = DPCTLQueueMgr_GetQueue(DPCTL_OPENCL, DPCTL_GPU, 0); + auto FSRef = DPCTLFilterSelector_Create("opencl:gpu:0"); + auto DRef = DPCTLDevice_CreateFromSelector(FSRef); + auto QueueRef = DPCTLQueue_CreateForDevice(DRef, nullptr, 0); auto CtxRef = DPCTLQueue_GetContext(QueueRef); auto PRef = DPCTLProgram_CreateFromOCLSource(CtxRef, CLProgramStr, CompileOpts); @@ -226,14 +239,17 @@ TEST_F(TestDPCTLSyclProgramInterface, CheckGetKernelOCLSource) DPCTLQueue_Delete(QueueRef); DPCTLContext_Delete(CtxRef); DPCTLProgram_Delete(PRef); + DPCTLDeviceSelector_Delete(FSRef); + DPCTLDevice_Delete(DRef); } TEST_F(TestDPCTLSyclProgramInterface, CheckGetKernelSpirv) { if (!nOpenCLGpuQ) GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); - - auto QueueRef = DPCTLQueueMgr_GetQueue(DPCTL_OPENCL, DPCTL_GPU, 0); + auto FSRef = DPCTLFilterSelector_Create("opencl:gpu:0"); + auto DRef = DPCTLDevice_CreateFromSelector(FSRef); + auto QueueRef = DPCTLQueue_CreateForDevice(DRef, nullptr, 0); auto CtxRef = DPCTLQueue_GetContext(QueueRef); auto PRef = DPCTLProgram_CreateFromSpirv(CtxRef, spirvBuffer.data(), spirvFileSize, nullptr); @@ -253,4 +269,6 @@ TEST_F(TestDPCTLSyclProgramInterface, CheckGetKernelSpirv) DPCTLQueue_Delete(QueueRef); DPCTLContext_Delete(CtxRef); DPCTLProgram_Delete(PRef); + DPCTLDeviceSelector_Delete(FSRef); + DPCTLDevice_Delete(DRef); } diff --git a/dpctl-capi/tests/test_sycl_queue_interface.cpp b/dpctl-capi/tests/test_sycl_queue_interface.cpp index c6e1669ad0..9e14e49980 100644 --- a/dpctl-capi/tests/test_sycl_queue_interface.cpp +++ b/dpctl-capi/tests/test_sycl_queue_interface.cpp @@ -26,6 +26,9 @@ #include "Support/CBindingWrapping.h" #include "dpctl_sycl_context_interface.h" +#include "dpctl_sycl_device_interface.h" +#include "dpctl_sycl_device_manager.h" +#include "dpctl_sycl_device_selector_interface.h" #include "dpctl_sycl_event_interface.h" #include "dpctl_sycl_kernel_interface.h" #include "dpctl_sycl_program_interface.h" @@ -42,6 +45,7 @@ namespace constexpr size_t SIZE = 1024; DEFINE_SIMPLE_CONVERSION_FUNCTIONS(void, DPCTLSyclUSMRef); +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(queue, DPCTLSyclQueueRef); void add_kernel_checker(const float *a, const float *b, const float *c) { @@ -75,7 +79,7 @@ bool has_devices() return ret; } -} // namespace +} /* End of anonymous namespace */ struct TestDPCTLSyclQueueInterface : public ::testing::Test { @@ -103,34 +107,93 @@ struct TestDPCTLSyclQueueInterface : public ::testing::Test ~TestDPCTLSyclQueueInterface() {} }; -TEST_F(TestDPCTLSyclQueueInterface, CheckAreEq) +struct TestDPCTLQueueMemberFunctions + : public ::testing::TestWithParam { - if (!has_devices()) - GTEST_SKIP_("Skipping: No Sycl devices.\n"); +protected: + DPCTLSyclDeviceSelectorRef DSRef = nullptr; + DPCTLSyclDeviceRef DRef = nullptr; + DPCTLSyclQueueRef QRef = nullptr; + + TestDPCTLQueueMemberFunctions() + { + DSRef = DPCTLFilterSelector_Create(GetParam()); + DRef = DPCTLDevice_CreateFromSelector(DSRef); + QRef = + DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); + } + + void SetUp() + { + if (!QRef) { + auto message = "Skipping as no device of type " + + std::string(GetParam()) + "."; + GTEST_SKIP_(message.c_str()); + } + } + + ~TestDPCTLQueueMemberFunctions() + { + DPCTLQueue_Delete(QRef); + DPCTLDeviceSelector_Delete(DSRef); + DPCTLDevice_Delete(DRef); + } +}; + +TEST_F(TestDPCTLSyclQueueInterface, Check_CreateForDevice) +{ + /* We are testing that we do not crash even when input is garbage. */ + DPCTLSyclQueueRef QRef = nullptr; + + EXPECT_NO_FATAL_FAILURE( + QRef = DPCTLQueue_CreateForDevice(nullptr, nullptr, 0)); + ASSERT_TRUE(QRef == nullptr); +} - auto nOclGPU = DPCTLQueueMgr_GetNumQueues( - DPCTLSyclBackendType::DPCTL_OPENCL, DPCTLSyclDeviceType::DPCTL_GPU); +TEST_F(TestDPCTLSyclQueueInterface, Check_Copy) +{ + DPCTLSyclQueueRef Q1 = nullptr; + DPCTLSyclQueueRef Q2 = nullptr; + EXPECT_NO_FATAL_FAILURE(Q1 = DPCTLQueueMgr_GetCurrentQueue()); + EXPECT_NO_FATAL_FAILURE(Q2 = DPCTLQueue_Copy(Q1)); + EXPECT_TRUE(bool(Q2)); + EXPECT_NO_FATAL_FAILURE(DPCTLQueue_Delete(Q1)); + EXPECT_NO_FATAL_FAILURE(DPCTLQueue_Delete(Q2)); +} + +TEST_F(TestDPCTLSyclQueueInterface, CheckAreEq) +{ + auto nOclGPU = DPCTLDeviceMgr_GetNumDevices(DPCTL_OPENCL | DPCTL_GPU); if (!nOclGPU) GTEST_SKIP_("Skipping: No OpenCL GPUs available.\n"); auto Q1 = DPCTLQueueMgr_GetCurrentQueue(); auto Q2 = DPCTLQueueMgr_GetCurrentQueue(); + EXPECT_TRUE(DPCTLQueue_AreEq(Q1, Q2)); - auto Def_Q = DPCTLQueueMgr_SetAsDefaultQueue( - DPCTLSyclBackendType::DPCTL_OPENCL, DPCTLSyclDeviceType::DPCTL_GPU, 0); - auto OclGPU_Q0 = DPCTLQueueMgr_PushQueue(DPCTLSyclBackendType::DPCTL_OPENCL, - DPCTLSyclDeviceType::DPCTL_GPU, 0); - auto OclGPU_Q1 = DPCTLQueueMgr_PushQueue(DPCTLSyclBackendType::DPCTL_OPENCL, - DPCTLSyclDeviceType::DPCTL_GPU, 0); - EXPECT_TRUE(DPCTLQueue_AreEq(Def_Q, OclGPU_Q0)); - EXPECT_TRUE(DPCTLQueue_AreEq(Def_Q, OclGPU_Q1)); - EXPECT_TRUE(DPCTLQueue_AreEq(OclGPU_Q0, OclGPU_Q1)); - DPCTLQueue_Delete(Def_Q); - DPCTLQueue_Delete(OclGPU_Q0); - DPCTLQueue_Delete(OclGPU_Q1); - DPCTLQueueMgr_PopQueue(); - DPCTLQueueMgr_PopQueue(); + auto FSRef = DPCTLFilterSelector_Create("opencl:gpu:0"); + auto DRef = DPCTLDevice_CreateFromSelector(FSRef); + auto Q3 = DPCTLQueue_CreateForDevice(DRef, nullptr, 0); + auto Q4 = DPCTLQueue_CreateForDevice(DRef, nullptr, 0); + + // These are different queues + EXPECT_FALSE(DPCTLQueue_AreEq(Q3, Q4)); + + auto C0 = DPCTLQueue_GetContext(Q3); + auto C1 = DPCTLQueue_GetContext(Q4); + + // All the queues should share the same context + EXPECT_TRUE(DPCTLContext_AreEq(C0, C1)); + + DPCTLContext_Delete(C0); + DPCTLContext_Delete(C1); + DPCTLQueue_Delete(Q1); + DPCTLQueue_Delete(Q2); + DPCTLQueue_Delete(Q3); + DPCTLQueue_Delete(Q4); + DPCTLDeviceSelector_Delete(FSRef); + DPCTLDevice_Delete(DRef); } TEST_F(TestDPCTLSyclQueueInterface, CheckAreEq2) @@ -138,139 +201,88 @@ TEST_F(TestDPCTLSyclQueueInterface, CheckAreEq2) if (!has_devices()) GTEST_SKIP_("Skipping: No Sycl devices.\n"); - auto nOclGPU = DPCTLQueueMgr_GetNumQueues( - DPCTLSyclBackendType::DPCTL_OPENCL, DPCTLSyclDeviceType::DPCTL_GPU); - auto nOclCPU = DPCTLQueueMgr_GetNumQueues( - DPCTLSyclBackendType::DPCTL_OPENCL, DPCTLSyclDeviceType::DPCTL_CPU); + auto nOclGPU = DPCTLDeviceMgr_GetNumDevices(DPCTL_OPENCL | DPCTL_GPU); + auto nOclCPU = DPCTLDeviceMgr_GetNumDevices( + DPCTLSyclBackendType::DPCTL_OPENCL | DPCTL_CPU); if (!nOclGPU || !nOclCPU) GTEST_SKIP_("OpenCL GPUs and CPU not available.\n"); - auto GPU_Q = DPCTLQueueMgr_PushQueue(DPCTLSyclBackendType::DPCTL_OPENCL, - DPCTLSyclDeviceType::DPCTL_GPU, 0); - auto CPU_Q = DPCTLQueueMgr_PushQueue(DPCTLSyclBackendType::DPCTL_OPENCL, - DPCTLSyclDeviceType::DPCTL_CPU, 0); + + auto FSRef = DPCTLFilterSelector_Create("opencl:gpu:0"); + auto DRef = DPCTLDevice_CreateFromSelector(FSRef); + auto FSRef2 = DPCTLFilterSelector_Create("opencl:cpu:0"); + auto DRef2 = DPCTLDevice_CreateFromSelector(FSRef2); + auto GPU_Q = + DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); + auto CPU_Q = + DPCTLQueue_CreateForDevice(DRef2, nullptr, DPCTL_DEFAULT_PROPERTY); + EXPECT_FALSE(DPCTLQueue_AreEq(GPU_Q, CPU_Q)); - DPCTLQueueMgr_PopQueue(); - DPCTLQueueMgr_PopQueue(); + + DPCTLQueue_Delete(GPU_Q); + DPCTLQueue_Delete(CPU_Q); + DPCTLDeviceSelector_Delete(FSRef); + DPCTLDevice_Delete(DRef); + DPCTLDeviceSelector_Delete(FSRef2); + DPCTLDevice_Delete(DRef2); } -TEST_F(TestDPCTLSyclQueueInterface, CheckGetBackend) +TEST_P(TestDPCTLQueueMemberFunctions, CheckGetBackend) { - if (!has_devices()) - GTEST_SKIP_("Skipping: No Sycl devices.\n"); - - auto Q1 = DPCTLQueueMgr_GetCurrentQueue(); - auto BE = DPCTLQueue_GetBackend(Q1); - EXPECT_TRUE((BE == DPCTL_OPENCL) || (BE == DPCTL_LEVEL_ZERO) || - (BE == DPCTL_CUDA) || (BE == DPCTL_HOST)); - DPCTLQueue_Delete(Q1); - if (DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_GPU)) { - auto Q = DPCTLQueueMgr_PushQueue(DPCTL_OPENCL, DPCTL_GPU, 0); - EXPECT_TRUE(DPCTLQueue_GetBackend(Q) == DPCTL_OPENCL); - DPCTLQueue_Delete(Q); - DPCTLQueueMgr_PopQueue(); - } - if (DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_CPU)) { - auto Q = DPCTLQueueMgr_PushQueue(DPCTL_OPENCL, DPCTL_CPU, 0); - EXPECT_TRUE(DPCTLQueue_GetBackend(Q) == DPCTL_OPENCL); - DPCTLQueue_Delete(Q); - DPCTLQueueMgr_PopQueue(); - } - if (DPCTLQueueMgr_GetNumQueues(DPCTL_LEVEL_ZERO, DPCTL_GPU)) { - auto Q = DPCTLQueueMgr_PushQueue(DPCTL_LEVEL_ZERO, DPCTL_GPU, 0); - EXPECT_TRUE(DPCTLQueue_GetBackend(Q) == DPCTL_LEVEL_ZERO); - DPCTLQueue_Delete(Q); - DPCTLQueueMgr_PopQueue(); + auto q = unwrap(QRef); + auto Backend = q->get_device().get_platform().get_backend(); + auto Bty = DPCTLQueue_GetBackend(QRef); + switch (Bty) { + case DPCTL_CUDA: + EXPECT_TRUE(Backend == backend::cuda); + break; + case DPCTL_HOST: + EXPECT_TRUE(Backend == backend::host); + break; + case DPCTL_LEVEL_ZERO: + EXPECT_TRUE(Backend == backend::level_zero); + break; + case DPCTL_OPENCL: + EXPECT_TRUE(Backend == backend::opencl); + break; + default: + FAIL(); } } -TEST_F(TestDPCTLSyclQueueInterface, CheckGetContext) +TEST_P(TestDPCTLQueueMemberFunctions, CheckGetContext) { - if (!has_devices()) - GTEST_SKIP_("Skipping: No Sycl devices.\n"); - - auto Q1 = DPCTLQueueMgr_GetCurrentQueue(); - auto Ctx = DPCTLQueue_GetContext(Q1); + auto Ctx = DPCTLQueue_GetContext(QRef); ASSERT_TRUE(Ctx != nullptr); - DPCTLQueue_Delete(Q1); DPCTLContext_Delete(Ctx); - - if (DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_GPU)) { - auto Q = DPCTLQueueMgr_PushQueue(DPCTL_OPENCL, DPCTL_GPU, 0); - auto OclGpuCtx = DPCTLQueue_GetContext(Q); - ASSERT_TRUE(OclGpuCtx != nullptr); - DPCTLQueue_Delete(Q); - DPCTLContext_Delete(OclGpuCtx); - DPCTLQueueMgr_PopQueue(); - } - if (DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_CPU)) { - auto Q = DPCTLQueueMgr_PushQueue(DPCTL_OPENCL, DPCTL_CPU, 0); - auto OclCpuCtx = DPCTLQueue_GetContext(Q); - ASSERT_TRUE(OclCpuCtx != nullptr); - DPCTLQueue_Delete(Q); - DPCTLContext_Delete(OclCpuCtx); - DPCTLQueueMgr_PopQueue(); - } - if (DPCTLQueueMgr_GetNumQueues(DPCTL_LEVEL_ZERO, DPCTL_GPU)) { - auto Q = DPCTLQueueMgr_PushQueue(DPCTL_LEVEL_ZERO, DPCTL_GPU, 0); - auto L0Ctx = DPCTLQueue_GetContext(Q); - ASSERT_TRUE(Ctx != nullptr); - DPCTLQueue_Delete(Q); - DPCTLContext_Delete(L0Ctx); - DPCTLQueueMgr_PopQueue(); - } } -TEST_F(TestDPCTLSyclQueueInterface, CheckGetDevice) +TEST_P(TestDPCTLQueueMemberFunctions, CheckGetDevice) { - if (!has_devices()) - GTEST_SKIP_("Skipping: No Sycl devices.\n"); - - auto Q1 = DPCTLQueueMgr_GetCurrentQueue(); - auto D = DPCTLQueue_GetDevice(Q1); + auto D = DPCTLQueue_GetDevice(QRef); ASSERT_TRUE(D != nullptr); - DPCTLQueue_Delete(Q1); DPCTLDevice_Delete(D); - - if (DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_GPU)) { - auto Q = DPCTLQueueMgr_PushQueue(DPCTL_OPENCL, DPCTL_GPU, 0); - auto OCLGPU_D = DPCTLQueue_GetDevice(Q); - ASSERT_TRUE(OCLGPU_D != nullptr); - EXPECT_TRUE(DPCTLDevice_IsGPU(OCLGPU_D)); - DPCTLQueue_Delete(Q); - DPCTLDevice_Delete(OCLGPU_D); - DPCTLQueueMgr_PopQueue(); - } - if (DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_CPU)) { - auto Q = DPCTLQueueMgr_PushQueue(DPCTL_OPENCL, DPCTL_CPU, 0); - auto OCLCPU_D = DPCTLQueue_GetDevice(Q); - ASSERT_TRUE(OCLCPU_D != nullptr); - EXPECT_TRUE(DPCTLDevice_IsCPU(OCLCPU_D)); - DPCTLQueue_Delete(Q); - DPCTLDevice_Delete(OCLCPU_D); - DPCTLQueueMgr_PopQueue(); - } - if (DPCTLQueueMgr_GetNumQueues(DPCTL_LEVEL_ZERO, DPCTL_GPU)) { - auto Q = DPCTLQueueMgr_PushQueue(DPCTL_LEVEL_ZERO, DPCTL_GPU, 0); - auto L0GPU_D = DPCTLQueue_GetDevice(Q); - ASSERT_TRUE(L0GPU_D != nullptr); - EXPECT_TRUE(DPCTLDevice_IsGPU(L0GPU_D)); - DPCTLQueue_Delete(Q); - DPCTLDevice_Delete(L0GPU_D); - DPCTLQueueMgr_PopQueue(); - } } +INSTANTIATE_TEST_SUITE_P(DPCTLQueueMemberFuncTests, + TestDPCTLQueueMemberFunctions, + ::testing::Values("opencl:gpu:0", + "opencl:cpu:0", + "level_zero:gpu:0")); + TEST_F(TestDPCTLSyclQueueInterface, CheckSubmit) { if (!has_devices()) GTEST_SKIP_("Skipping: No Sycl devices.\n"); - auto nOpenCLGpuQ = DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_GPU); + auto nOpenCLGpuQ = DPCTLDeviceMgr_GetNumDevices(DPCTL_OPENCL | DPCTL_GPU); if (!nOpenCLGpuQ) GTEST_SKIP_("Skipping: No OpenCL GPU device.\n"); - auto Queue = DPCTLQueueMgr_GetQueue(DPCTL_OPENCL, DPCTL_GPU, 0); + auto FSRef = DPCTLFilterSelector_Create("opencl:gpu:0"); + auto DRef = DPCTLDevice_CreateFromSelector(FSRef); + auto Queue = + DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); auto CtxRef = DPCTLQueue_GetContext(Queue); auto PRef = DPCTLProgram_CreateFromOCLSource(CtxRef, CLProgramStr, CompileOpts); @@ -349,4 +361,6 @@ TEST_F(TestDPCTLSyclQueueInterface, CheckSubmit) DPCTLQueue_Delete(Queue); DPCTLContext_Delete(CtxRef); DPCTLProgram_Delete(PRef); + DPCTLDeviceSelector_Delete(FSRef); + DPCTLDevice_Delete(DRef); } diff --git a/dpctl-capi/tests/test_sycl_queue_manager.cpp b/dpctl-capi/tests/test_sycl_queue_manager.cpp index a5eb406b62..bf30ee9d2a 100644 --- a/dpctl-capi/tests/test_sycl_queue_manager.cpp +++ b/dpctl-capi/tests/test_sycl_queue_manager.cpp @@ -23,159 +23,121 @@ /// dpctl_sycl_queue_interface.h and dpctl_sycl_queue_manager.h. /// //===----------------------------------------------------------------------===// +#include "Support/CBindingWrapping.h" #include "dpctl_sycl_context_interface.h" #include "dpctl_sycl_device_interface.h" #include "dpctl_sycl_device_manager.h" +#include "dpctl_sycl_device_selector_interface.h" #include "dpctl_sycl_queue_interface.h" #include "dpctl_sycl_queue_manager.h" +#include #include #include -#include - using namespace std; using namespace cl::sycl; namespace { + +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(queue, DPCTLSyclQueueRef); + void foo(size_t &num) { - auto q1 = DPCTLQueueMgr_PushQueue(DPCTL_OPENCL, DPCTL_CPU, 0); - auto q2 = DPCTLQueueMgr_PushQueue(DPCTL_OPENCL, DPCTL_GPU, 0); + auto DS1 = DPCTLFilterSelector_Create("opencl:gpu"); + auto DS2 = DPCTLFilterSelector_Create("opencl:cpu"); + auto D1 = DPCTLDevice_CreateFromSelector(DS1); + auto D2 = DPCTLDevice_CreateFromSelector(DS2); + auto Q1 = DPCTLQueue_CreateForDevice(D1, nullptr, DPCTL_DEFAULT_PROPERTY); + auto Q2 = DPCTLQueue_CreateForDevice(D2, nullptr, DPCTL_DEFAULT_PROPERTY); + DPCTLQueueMgr_PushQueue(Q2); + DPCTLQueueMgr_PushQueue(Q1); + // Capture the number of active queues in first - num = DPCTLQueueMgr_GetNumActivatedQueues(); + num = DPCTLQueueMgr_GetQueueStackSize(); DPCTLQueueMgr_PopQueue(); DPCTLQueueMgr_PopQueue(); - DPCTLQueue_Delete(q1); - DPCTLQueue_Delete(q2); + DPCTLQueue_Delete(Q1); + DPCTLQueue_Delete(Q2); + DPCTLDeviceSelector_Delete(DS1); + DPCTLDeviceSelector_Delete(DS2); + DPCTLDevice_Delete(D1); + DPCTLDevice_Delete(D2); } void bar(size_t &num) { - auto q1 = DPCTLQueueMgr_PushQueue(DPCTL_OPENCL, DPCTL_GPU, 0); + auto DS1 = DPCTLFilterSelector_Create("opencl:gpu"); + auto D1 = DPCTLDevice_CreateFromSelector(DS1); + auto Q1 = DPCTLQueue_CreateForDevice(D1, nullptr, DPCTL_DEFAULT_PROPERTY); + DPCTLQueueMgr_PushQueue(Q1); // Capture the number of active queues in second - num = DPCTLQueueMgr_GetNumActivatedQueues(); + num = DPCTLQueueMgr_GetQueueStackSize(); DPCTLQueueMgr_PopQueue(); - DPCTLQueue_Delete(q1); + DPCTLQueue_Delete(Q1); + DPCTLDeviceSelector_Delete(DS1); + DPCTLDevice_Delete(D1); } -bool has_devices() +} /* end of anonymous namespace */ + +struct TestDPCTLSyclQueueManager : public ::testing::TestWithParam { - bool ret = false; - for (auto &p : platform::get_platforms()) { - if (p.is_host()) - continue; - if (!p.get_devices().empty()) { - ret = true; - break; - } + DPCTLSyclDeviceSelectorRef DSRef = DPCTLFilterSelector_Create(GetParam()); + DPCTLSyclDeviceRef DRef = DPCTLDevice_CreateFromSelector(DSRef); + + TestDPCTLSyclQueueManager() + { + DSRef = DPCTLFilterSelector_Create(GetParam()); + DRef = DPCTLDevice_CreateFromSelector(DSRef); } - return ret; -} -} // namespace + void SetUp() + { + if (!DRef) { + auto message = "Skipping as no device of type " + + std::string(GetParam()) + "."; + GTEST_SKIP_(message.c_str()); + } + } -struct TestDPCTLSyclQueueManager : public ::testing::Test -{ + ~TestDPCTLSyclQueueManager() + { + DPCTLDeviceSelector_Delete(DSRef); + DPCTLDevice_Delete(DRef); + } }; -TEST_F(TestDPCTLSyclQueueManager, CheckDPCTLGetCurrentQueue) +TEST_P(TestDPCTLSyclQueueManager, CheckDPCTLGetCurrentQueue) { - if (!has_devices()) - GTEST_SKIP_("Skipping: No Sycl devices.\n"); - - DPCTLSyclQueueRef q = nullptr; - ASSERT_NO_THROW(q = DPCTLQueueMgr_GetCurrentQueue()); + DPCTLSyclQueueRef q = DPCTLQueueMgr_GetCurrentQueue(); ASSERT_TRUE(q != nullptr); } -TEST_F(TestDPCTLSyclQueueManager, CheckDPCTLGetOpenCLCpuQ) -{ - if (!has_devices()) - GTEST_SKIP_("Skipping: No Sycl devices.\n"); - - auto nOpenCLCpuQ = DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_CPU); - if (!nOpenCLCpuQ) - GTEST_SKIP_("Skipping: No OpenCL CPU device found."); - - auto q = DPCTLQueueMgr_GetQueue(DPCTL_OPENCL, DPCTL_CPU, 0); - EXPECT_TRUE(q != nullptr); - auto sycl_q = reinterpret_cast(q); - auto be = sycl_q->get_context().get_platform().get_backend(); - EXPECT_EQ(be, backend::opencl); - auto devty = sycl_q->get_device().get_info(); - EXPECT_EQ(devty, info::device_type::cpu); - - auto non_existent_device_num = nOpenCLCpuQ + 1; - // Non-existent device number should return nullptr - auto null_q = DPCTLQueueMgr_GetQueue(DPCTL_OPENCL, DPCTL_CPU, - non_existent_device_num); - ASSERT_TRUE(null_q == nullptr); -} - -TEST_F(TestDPCTLSyclQueueManager, CheckDPCTLGetOpenCLGpuQ) +TEST_P(TestDPCTLSyclQueueManager, CheckIsCurrentQueue) { - if (!has_devices()) - GTEST_SKIP_("Skipping: No Sycl devices.\n"); - - auto nOpenCLGpuQ = DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_GPU); - if (!nOpenCLGpuQ) - GTEST_SKIP_("Skipping: No OpenCL GPU device found.\n"); - - auto q = DPCTLQueueMgr_GetQueue(DPCTL_OPENCL, DPCTL_GPU, 0); - EXPECT_TRUE(q != nullptr); - auto sycl_q = reinterpret_cast(q); - auto be = sycl_q->get_context().get_platform().get_backend(); - EXPECT_EQ(be, backend::opencl); - auto devty = sycl_q->get_device().get_info(); - EXPECT_EQ(devty, info::device_type::gpu); - - auto non_existent_device_num = nOpenCLGpuQ + 1; - // Non-existent device number should return nullptr - auto null_q = DPCTLQueueMgr_GetQueue(DPCTL_OPENCL, DPCTL_GPU, - non_existent_device_num); - ASSERT_TRUE(null_q == nullptr); -} - -TEST_F(TestDPCTLSyclQueueManager, CheckDPCTLGetLevel0GpuQ) -{ - if (!has_devices()) - GTEST_SKIP_("Skipping: No Sycl devices.\n"); - - auto nL0GpuQ = DPCTLQueueMgr_GetNumQueues(DPCTL_LEVEL_ZERO, DPCTL_GPU); - if (!nL0GpuQ) - GTEST_SKIP_("Skipping: No OpenCL GPU device found.\n"); - - auto q = DPCTLQueueMgr_GetQueue(DPCTL_LEVEL_ZERO, DPCTL_GPU, 0); - EXPECT_TRUE(q != nullptr); - auto sycl_q = reinterpret_cast(q); - auto be = sycl_q->get_context().get_platform().get_backend(); - EXPECT_EQ(be, backend::level_zero); - auto devty = sycl_q->get_device().get_info(); - EXPECT_EQ(devty, info::device_type::gpu); - - auto non_existent_device_num = nL0GpuQ + 1; - // Non-existent device number should return nullptr - auto null_q = DPCTLQueueMgr_GetQueue(DPCTL_LEVEL_ZERO, DPCTL_GPU, - non_existent_device_num); - ASSERT_TRUE(null_q == nullptr); + auto Q0 = DPCTLQueueMgr_GetCurrentQueue(); + EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q0)); + auto Q1 = DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); + DPCTLQueueMgr_PushQueue(Q1); + EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q1)); + DPCTLQueue_Delete(Q1); + DPCTLQueueMgr_PopQueue(); + EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q0)); + DPCTLQueue_Delete(Q0); } -TEST_F(TestDPCTLSyclQueueManager, CheckGetNumActivatedQueues) +TEST(TestDPCTLSyclQueueManager, CheckGetNumActivatedQueues) { - if (!has_devices()) - GTEST_SKIP_("Skipping: No Sycl devices.\n"); + if (!(DPCTLDeviceMgr_GetNumDevices(DPCTL_OPENCL | DPCTL_GPU) && + DPCTLDeviceMgr_GetNumDevices(DPCTL_OPENCL | DPCTL_CPU))) + GTEST_SKIP_("Both OpenCL gpu and cpu drivers needed for this test."); size_t num0, num1, num2, num4; - - auto nOpenCLCpuQ = DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_CPU); - auto nOpenCLGpuQ = DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_GPU); - - // Add a queue to main thread - if (!nOpenCLCpuQ || !nOpenCLGpuQ) - GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); - - auto q = DPCTLQueueMgr_PushQueue(DPCTL_OPENCL, DPCTL_CPU, 0); + auto DS1 = DPCTLFilterSelector_Create("opencl:gpu"); + auto D1 = DPCTLDevice_CreateFromSelector(DS1); + auto Q1 = DPCTLQueue_CreateForDevice(D1, nullptr, DPCTL_DEFAULT_PROPERTY); + DPCTLQueueMgr_PushQueue(Q1); std::thread first(foo, std::ref(num1)); std::thread second(bar, std::ref(num2)); @@ -185,9 +147,9 @@ TEST_F(TestDPCTLSyclQueueManager, CheckGetNumActivatedQueues) second.join(); // Capture the number of active queues in first - num0 = DPCTLQueueMgr_GetNumActivatedQueues(); + num0 = DPCTLQueueMgr_GetQueueStackSize(); DPCTLQueueMgr_PopQueue(); - num4 = DPCTLQueueMgr_GetNumActivatedQueues(); + num4 = DPCTLQueueMgr_GetQueueStackSize(); // Verify what the expected number of activated queues each time a thread // called getNumActivatedQueues. @@ -196,45 +158,26 @@ TEST_F(TestDPCTLSyclQueueManager, CheckGetNumActivatedQueues) EXPECT_EQ(num2, 1ul); EXPECT_EQ(num4, 0ul); - DPCTLQueue_Delete(q); -} - -TEST_F(TestDPCTLSyclQueueManager, CheckDPCTLDumpDeviceInfo) -{ - if (!has_devices()) - GTEST_SKIP_("Skipping: No Sycl devices.\n"); - auto q = DPCTLQueueMgr_GetCurrentQueue(); - EXPECT_NO_FATAL_FAILURE( - DPCTLDeviceMgr_PrintDeviceInfo(DPCTLQueue_GetDevice(q))); - EXPECT_NO_FATAL_FAILURE(DPCTLQueue_Delete(q)); -} - -TEST_F(TestDPCTLSyclQueueManager, CheckIsCurrentQueue) -{ - if (!has_devices()) - GTEST_SKIP_("Skipping: No Sycl devices.\n"); - if (!DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_GPU)) - GTEST_SKIP_("Skipping: No OpenCL GPU.\n"); - - auto Q0 = DPCTLQueueMgr_GetCurrentQueue(); - EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q0)); - auto Q1 = DPCTLQueueMgr_PushQueue(DPCTL_OPENCL, DPCTL_GPU, 0); - EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q1)); DPCTLQueue_Delete(Q1); - DPCTLQueueMgr_PopQueue(); - EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q0)); - DPCTLQueue_Delete(Q0); + DPCTLDeviceSelector_Delete(DS1); + DPCTLDevice_Delete(D1); } -TEST_F(TestDPCTLSyclQueueManager, CheckIsCurrentQueue2) +TEST(TestDPCTLSyclQueueManager, CheckIsCurrentQueue2) { - if (!DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_CPU) || - !DPCTLQueueMgr_GetNumQueues(DPCTL_OPENCL, DPCTL_GPU)) - GTEST_SKIP_("Skipping: No OpenCL GPU and OpenCL CPU.\n"); - - auto Q1 = DPCTLQueueMgr_PushQueue(DPCTL_OPENCL, DPCTL_GPU, 0); + if (!(DPCTLDeviceMgr_GetNumDevices(DPCTL_OPENCL | DPCTL_GPU) && + DPCTLDeviceMgr_GetNumDevices(DPCTL_OPENCL | DPCTL_CPU))) + GTEST_SKIP_("Both OpenCL gpu and cpu drivers needed for this test."); + + auto DS1 = DPCTLFilterSelector_Create("opencl:gpu"); + auto DS2 = DPCTLFilterSelector_Create("opencl:cpu"); + auto D1 = DPCTLDevice_CreateFromSelector(DS1); + auto D2 = DPCTLDevice_CreateFromSelector(DS2); + auto Q1 = DPCTLQueue_CreateForDevice(D1, nullptr, DPCTL_DEFAULT_PROPERTY); + DPCTLQueueMgr_PushQueue(Q1); EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q1)); - auto Q2 = DPCTLQueueMgr_PushQueue(DPCTL_OPENCL, DPCTL_CPU, 0); + auto Q2 = DPCTLQueue_CreateForDevice(D2, nullptr, DPCTL_DEFAULT_PROPERTY); + DPCTLQueueMgr_PushQueue(Q2); EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q2)); EXPECT_FALSE(DPCTLQueueMgr_IsCurrentQueue(Q1)); DPCTLQueue_Delete(Q2); @@ -242,21 +185,14 @@ TEST_F(TestDPCTLSyclQueueManager, CheckIsCurrentQueue2) EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q1)); DPCTLQueue_Delete(Q1); DPCTLQueueMgr_PopQueue(); + DPCTLDeviceSelector_Delete(DS1); + DPCTLDeviceSelector_Delete(DS2); + DPCTLDevice_Delete(D1); + DPCTLDevice_Delete(D2); } -TEST_F(TestDPCTLSyclQueueManager, CreateQueueFromDeviceAndContext) -{ - auto Q = DPCTLQueueMgr_GetCurrentQueue(); - auto D = DPCTLQueue_GetDevice(Q); - auto C = DPCTLQueue_GetContext(Q); - - auto Q2 = DPCTLQueueMgr_GetQueueFromContextAndDevice(C, D); - auto D2 = DPCTLQueue_GetDevice(Q2); - auto C2 = DPCTLQueue_GetContext(Q2); - - EXPECT_TRUE(DPCTLDevice_AreEq(D, D2)); - EXPECT_TRUE(DPCTLContext_AreEq(C, C2)); - - DPCTLQueue_Delete(Q2); - DPCTLQueue_Delete(Q); -} +INSTANTIATE_TEST_SUITE_P(QueueMgrFunctions, + TestDPCTLSyclQueueManager, + ::testing::Values("opencl:gpu:0", + "opencl:cpu:0", + "level_zero:gpu:0")); diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index 183dad3dea..7aab8ca8d5 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -25,6 +25,9 @@ from libcpp cimport bool from libc.stdint cimport uint32_t +cdef extern from "dpctl_error_handler_type.h": + ctypedef void error_handler_callback(int err_code) + cdef extern from "dpctl_utils.h": cdef void DPCTLCString_Delete(const char *str) cdef void DPCTLSize_t_Array_Delete(size_t *arr) @@ -73,6 +76,11 @@ cdef extern from "dpctl_sycl_enum_types.h": ctypedef _arg_data_type DPCTLKernelArgType + ctypedef enum _queue_property_type 'DPCTLQueuePropertyType': + _DEFAULT_PROPERTY 'DPCTL_DEFAULT_PROPERTY' + _ENABLE_PROFILING 'DPCTL_ENABLE_PROFILING' + _IN_ORDER 'DPCTL_IN_ORDER' + cdef enum _aspect_type 'DPCTLSyclAspectType': _host 'host', _cpu 'cpu', @@ -152,6 +160,9 @@ cdef extern from "dpctl_sycl_device_interface.h": cdef extern from "dpctl_sycl_device_manager.h": cdef struct DPCTLDeviceVector ctypedef DPCTLDeviceVector *DPCTLDeviceVectorRef + ctypedef struct DPCTL_DeviceAndContextPair: + DPCTLSyclDeviceRef DRef + DPCTLSyclContextRef CRef cdef void DPCTLDeviceVector_Delete(DPCTLDeviceVectorRef DVRef) cdef void DPCTLDeviceVector_Clear(DPCTLDeviceVectorRef DVRef) @@ -162,6 +173,8 @@ cdef extern from "dpctl_sycl_device_manager.h": cdef DPCTLDeviceVectorRef DPCTLDeviceMgr_GetDevices(int device_identifier) cdef size_t DPCTLDeviceMgr_GetNumDevices(int device_identifier) cdef void DPCTLDeviceMgr_PrintDeviceInfo(const DPCTLSyclDeviceRef DRef) + cdef DPCTL_DeviceAndContextPair DPCTLDeviceMgr_GetDeviceAndContextPair( + const DPCTLSyclDeviceRef DRef) cdef extern from "dpctl_sycl_device_selector_interface.h": @@ -173,6 +186,7 @@ cdef extern from "dpctl_sycl_device_selector_interface.h": DPCTLSyclDeviceSelectorRef DPCTLGPUSelector_Create() DPCTLSyclDeviceSelectorRef DPCTLHostSelector_Create() void DPCTLDeviceSelector_Delete(DPCTLSyclDeviceSelectorRef DSRef) + int DPCTLDeviceSelector_Score(DPCTLSyclDeviceSelectorRef, DPCTLSyclDeviceRef) cdef extern from "dpctl_sycl_event_interface.h": @@ -224,7 +238,17 @@ cdef extern from "dpctl_sycl_program_interface.h": cdef extern from "dpctl_sycl_queue_interface.h": cdef bool DPCTLQueue_AreEq(const DPCTLSyclQueueRef QRef1, const DPCTLSyclQueueRef QRef2) + cdef DPCTLSyclQueueRef DPCTLQueue_Create( + const DPCTLSyclContextRef CRef, + const DPCTLSyclDeviceRef DRef, + error_handler_callback *error_handler, + int properties) + cdef DPCTLSyclQueueRef DPCTLQueue_CreateForDevice( + const DPCTLSyclDeviceRef dRef, + error_handler_callback *handler, + int properties) cdef void DPCTLQueue_Delete(DPCTLSyclQueueRef QRef) + cdef DPCTLSyclQueueRef DPCTLQueue_Copy(DPCTLSyclQueueRef QRef) cdef DPCTLSyclBackendType DPCTLQueue_GetBackend(const DPCTLSyclQueueRef Q) cdef DPCTLSyclContextRef DPCTLQueue_GetContext(const DPCTLSyclQueueRef Q) cdef DPCTLSyclDeviceRef DPCTLQueue_GetDevice(const DPCTLSyclQueueRef Q) @@ -268,26 +292,12 @@ cdef extern from "dpctl_sycl_queue_interface.h": cdef extern from "dpctl_sycl_queue_manager.h": cdef DPCTLSyclQueueRef DPCTLQueueMgr_GetCurrentQueue() - cdef size_t DPCTLQueueMgr_GetNumQueues(DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy) - cdef size_t DPCTLQueueMgr_GetNumActivatedQueues() - cdef DPCTLSyclQueueRef DPCTLQueueMgr_GetQueue( - DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum) + cdef bool DPCTLQueueMgr_GlobalQueueIsCurrent() cdef bool DPCTLQueueMgr_IsCurrentQueue(const DPCTLSyclQueueRef QRef) cdef void DPCTLQueueMgr_PopQueue() - cdef DPCTLSyclQueueRef DPCTLQueueMgr_PushQueue( - DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum) - cdef DPCTLSyclQueueRef DPCTLQueueMgr_SetAsDefaultQueue( - DPCTLSyclBackendType BETy, - DPCTLSyclDeviceType DeviceTy, - size_t DNum) - cdef DPCTLSyclQueueRef DPCTLQueueMgr_GetQueueFromContextAndDevice( - DPCTLSyclContextRef CRef, - DPCTLSyclDeviceRef DRef) + cdef void DPCTLQueueMgr_PushQueue(const DPCTLSyclQueueRef dRef) + cdef void DPCTLQueueMgr_SetGlobalQueue(const DPCTLSyclQueueRef dRef) + cdef size_t DPCTLQueueMgr_GetQueueStackSize() cdef extern from "dpctl_sycl_usm_interface.h": diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index 01c0c5642f..016f249aa5 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -46,6 +46,7 @@ from ._backend cimport ( DPCTLDeviceMgr_PrintDeviceInfo, DPCTLFilterSelector_Create, DPCTLDeviceSelector_Delete, + DPCTLDeviceSelector_Score, DPCTLSize_t_Array_Delete, DPCTLSyclBackendType, DPCTLSyclDeviceRef, @@ -124,7 +125,7 @@ cdef class SyclDevice(_SyclDevice): device._host_device = DPCTLDevice_IsHost(DRef) device._max_compute_units = DPCTLDevice_GetMaxComputeUnits(DRef) if (device._host_device): - device._max_num_sub_groups = 0 + device._max_num_sub_groups = -1 else: device._max_num_sub_groups = DPCTLDevice_GetMaxNumSubGroups(DRef) device._max_work_group_size = DPCTLDevice_GetMaxWorkGroupSize(DRef) @@ -463,6 +464,15 @@ cdef class SyclDevice(_SyclDevice): cdef _aspect_type AT = _aspect_type._usm_system_allocator return DPCTLDevice_HasAspect(self._device_ref, AT) + @property + def default_selector_score(self): + cdef DPCTLSyclDeviceSelectorRef DSRef = DPCTLDefaultSelector_Create() + cdef int score = -1 + if (DSRef): + score = DPCTLDeviceSelector_Score(DSRef, self._device_ref) + DPCTLDeviceSelector_Delete(DSRef) + return score + @property def __name__(self): return "SyclDevice" diff --git a/dpctl/_sycl_device_factory.pxd b/dpctl/_sycl_device_factory.pxd index 88f9d3bc77..1a37c425cf 100644 --- a/dpctl/_sycl_device_factory.pxd +++ b/dpctl/_sycl_device_factory.pxd @@ -33,7 +33,6 @@ cpdef SyclDevice select_host_device() cpdef list get_devices(backend=*, device_type=*) cpdef int get_num_devices(backend=*, device_type=*) cpdef cpp_bool has_gpu_devices() -cpdef cpp_bool has_cpu_devices() -cpdef cpp_bool has_accelerator_devices() +cpdef cpp_bool has_cpu_devices() +cpdef cpp_bool has_accelerator_devices() cpdef cpp_bool has_host_device() - diff --git a/dpctl/_sycl_device_factory.pyx b/dpctl/_sycl_device_factory.pyx index 389553f877..7bac86e176 100644 --- a/dpctl/_sycl_device_factory.pyx +++ b/dpctl/_sycl_device_factory.pyx @@ -179,7 +179,9 @@ cpdef list get_devices(backend=backend_type.all, device_type=device_type_t.all): return devices -cpdef int get_num_devices(backend=backend_type.all, device_type=device_type_t.all): +cpdef int get_num_devices( + backend=backend_type.all, device_type=device_type_t.all +): cdef DPCTLSyclBackendType BTy = _backend_type._ALL_BACKENDS cdef DPCTLSyclDeviceType DTy = _device_type._ALL_DEVICES cdef int num_devices = 0 diff --git a/dpctl/_sycl_queue.pxd b/dpctl/_sycl_queue.pxd index f7bb0a4355..eab701dc05 100644 --- a/dpctl/_sycl_queue.pxd +++ b/dpctl/_sycl_queue.pxd @@ -20,39 +20,66 @@ """ This file declares the SyclQueue extension type. """ -from ._backend cimport * +from ._backend cimport ( + DPCTLSyclDeviceRef, + DPCTLKernelArgType, + DPCTLSyclQueueRef, +) from ._sycl_context cimport SyclContext from ._sycl_event cimport SyclEvent from ._sycl_device cimport SyclDevice from .program._program cimport SyclKernel +from libcpp cimport bool as cpp_bool -cdef class SyclQueue: +cdef class _SyclQueue: """ Python wrapper class for a sycl::queue. """ - cdef DPCTLSyclQueueRef _queue_ref cdef SyclContext _context cdef SyclDevice _device - cdef _raise_queue_submit_error (self, fname, errcode) - cdef _raise_invalid_range_error (self, fname, ndims, errcode) - cdef int _populate_args (self, list args, void **kargs, - DPCTLKernelArgType *kargty) - cdef int _populate_range (self, size_t Range[3], list gS, size_t nGS) +cdef class SyclQueue (_SyclQueue): + """ Python wrapper class for a sycl::queue. + """ + cdef int _init_queue_default(self, int) + cdef int _init_queue_from__SyclQueue(self, _SyclQueue) + cdef int _init_queue_from_DPCTLSyclDeviceRef(self, DPCTLSyclDeviceRef, int) + cdef int _init_queue_from_device(self, SyclDevice, int) + cdef int _init_queue_from_filter_string(self, const char *, int) + cdef int _init_queue_from_context_and_device( + self, SyclContext, SyclDevice, int + ) + cdef _raise_queue_submit_error(self, fname, errcode) + cdef _raise_invalid_range_error(self, fname, ndims, errcode) + cdef int _populate_args( + self, + list args, + void **kargs, + DPCTLKernelArgType *kargty + ) + cdef int _populate_range(self, size_t Range[3], list gS, size_t nGS) @staticmethod - cdef SyclQueue _create (DPCTLSyclQueueRef qref) + cdef SyclQueue _create(DPCTLSyclQueueRef qref) @staticmethod - cdef SyclQueue _create_from_context_and_device (SyclContext ctx, SyclDevice dev) - cpdef bool equals (self, SyclQueue q) - cpdef SyclContext get_sycl_context (self) - cpdef SyclDevice get_sycl_device (self) - cdef DPCTLSyclQueueRef get_queue_ref (self) - cpdef SyclEvent submit (self, SyclKernel kernel, list args, list gS, - list lS=*, list dEvents=*) - cpdef void wait (self) - cdef DPCTLSyclQueueRef get_queue_ref (self) - cpdef memcpy (self, dest, src, size_t count) - cpdef prefetch (self, ptr, size_t count=*) - cpdef mem_advise (self, ptr, size_t count, int mem) + cdef SyclQueue _create_from_context_and_device( + SyclContext ctx, SyclDevice dev + ) + cpdef cpp_bool equals(self, SyclQueue q) + cpdef SyclContext get_sycl_context(self) + cpdef SyclDevice get_sycl_device(self) + cdef DPCTLSyclQueueRef get_queue_ref(self) + cpdef SyclEvent submit( + self, + SyclKernel kernel, + list args, + list gS, + list lS=*, + list dEvents=* + ) + cpdef void wait(self) + cdef DPCTLSyclQueueRef get_queue_ref(self) + cpdef memcpy(self, dest, src, size_t count) + cpdef prefetch(self, ptr, size_t count=*) + cpdef mem_advise(self, ptr, size_t count, int mem) diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx index e4430568a7..ed1c95e7c8 100644 --- a/dpctl/_sycl_queue.pyx +++ b/dpctl/_sycl_queue.pyx @@ -21,7 +21,37 @@ """ from __future__ import print_function -from ._backend cimport * +from ._backend cimport ( + _arg_data_type, + _backend_type, + _queue_property_type, + DPCTL_DeviceAndContextPair, + DPCTLContext_Delete, + DPCTLDefaultSelector_Create, + DPCTLDevice_CreateFromSelector, + DPCTLDeviceMgr_GetDeviceAndContextPair, + DPCTLDeviceSelector_Delete, + DPCTLDevice_Delete, + DPCTLFilterSelector_Create, + DPCTLQueue_AreEq, + DPCTLQueue_Copy, + DPCTLQueue_Create, + DPCTLQueue_Delete, + DPCTLQueue_GetBackend, + DPCTLQueue_GetContext, + DPCTLQueue_GetDevice, + DPCTLQueue_MemAdvise, + DPCTLQueue_Memcpy, + DPCTLQueue_Prefetch, + DPCTLQueue_SubmitNDRange, + DPCTLQueue_SubmitRange, + DPCTLQueue_Wait, + DPCTLSyclBackendType, + DPCTLSyclContextRef, + DPCTLSyclDeviceSelectorRef, + DPCTLSyclEventRef, + error_handler_callback, +) from .memory._memory cimport _Memory from . import backend_type import ctypes @@ -67,54 +97,312 @@ cdef class SyclQueueCreationError(Exception): pass +cdef class SyclAsynchronousError(Exception): + """ + A SyclAsynchronousError exception is raised when SYCL operation submission + or execution encounters an error. + """ + + +cdef void default_async_error_handler(int err) nogil except *: + with gil: + raise SyclAsynchronousError(err) + + +cdef int _parse_queue_properties(object prop) except *: + cdef int res = 0 + cdef object props + if isinstance(prop, int): + return prop + if not isinstance(prop, (tuple, list)): + props = (prop, ) + else: + props = prop + for p in props: + if isinstance(p, int): + res = res | p + elif isinstance(p, str): + if (p == "in_order"): + res = res | _queue_property_type._IN_ORDER + elif (p == "enable_profiling"): + res = res | _queue_property_type._ENABLE_PROFILING + elif (p == "default"): + res = res | _queue_property_type._DEFAULT_PROPERTY + else: + raise ValueError("queue property '{}' is not understood.".format(prop)) + else: + raise ValueError("queue property '{}' is not understood.".format(prop)) + return res + + +cdef class _SyclQueue: + """ Internal helper metaclass to abstract `cl::sycl::queue` instance. + """ + def __dealloc__(self): + if (self._queue_ref): + DPCTLQueue_Delete(self._queue_ref) + # self._context is a Python object and will be GC-ed + # self._device is a Python object + + cdef class SyclQueue: """ Python wrapper class for cl::sycl::queue. """ + def __cinit__(self, *args, **kwargs): + """ + SyclQueue(*, /, property=None) + create SyclQueue from default selector + SyclQueue(filter_string, *, /, propery=None) + create SyclQueue from filter selector string + SyclQueue(SyclDevice, *, / property=None) + create SyclQueue from give SyclDevice automatically + finding/creating SyclContext. + SyclQueue(SyclContext, SyclDevice, *, /, property=None) + create SyclQueue from give SyclContext, SyclDevice + """ + cdef int len_args + cdef int status = 0 + cdef const char *filter_c_str = NULL + if len(args) > 2: + raise TypeError( + "SyclQueue constructor takes 0, 1, or 2 positinal arguments, " + "but {} were given.".format(len(args)) + ) + props = _parse_queue_properties( + kwargs.pop('property', _queue_property_type._DEFAULT_PROPERTY) + ) + len_args = len(args) + if len_args == 0: + status = self._init_queue_default(props) + elif len_args == 1: + arg = args[0] + if type(arg) is unicode: + string = bytes(arg, "utf-8") + filter_c_str = string + status = self._init_queue_from_filter_string( + filter_c_str, props) + elif type(arg) is _SyclQueue: + status = self._init_queue_from__SyclQueue(<_SyclQueue>arg) + elif isinstance(arg, unicode): + string = bytes(unicode(arg), "utf-8") + filter_c_str = string + status = self._init_queue_from_filter_string( + filter_c_str, props) + elif isinstance(arg, SyclDevice): + status = self._init_queue_from_device( arg, props) + else: + raise TypeError( + "Positional argument {} is not a filter string or a " + "SyclDevice".format(arg) + ) + else: + ctx, dev = args + if not isinstance(ctx, SyclContext): + raise TypeError( + "SyclQueue constructor with two positional arguments " + "expected SyclContext as its first argument, but got {}." + .format(type(ctx)) + ) + if not isinstance(dev, SyclDevice): + raise TypeError( + "SyclQueue constructor with two positional arguments " + "expected SyclDevice as its second argument, but got {}." + .format(type(dev)) + ) + status = self._init_queue_from_context_and_device( + ctx, dev, props + ) + if status < 0: + if status == -1: + raise SyclQueueCreationError( + "Device filter selector string '{}' is not understood." + .format(arg) + ) + elif status == -2: + raise SyclQueueCreationError( + "SYCL Device '{}' could not be created.".format(arg) + ) + elif status == -3: + raise SyclQueueCreationError( + "SYCL Context could not be created from '{}'.".format(arg) + ) + elif status == -4: + if len_args == 2: + arg = args + raise SyclQueueCreationError( + "SYCL Queue failed to be created from '{}'.".format(arg) + ) + + cdef int _init_queue_from__SyclQueue(self, _SyclQueue other): + """ Copy data container _SyclQueue fields over. + """ + cdef DPCTLSyclQueueRef QRef = DPCTLQueue_Copy(other._queue_ref) + if (QRef is NULL): + return -4 + self._queue_ref = QRef + self._context = other._context + self._device = other._device + + cdef int _init_queue_from_DPCTLSyclDeviceRef( + self, DPCTLSyclDeviceRef DRef, int props + ): + """ + Initializes self by creating SyclQueue with specified error handler and + specified properties from the given device instance. SyclContext is + looked-up by DPCTL from a cache to avoid repeated construction of new + context for performance reasons. + + Returns: 0 : normal execution + -3 : Context creation/look-up failed + -4 : queue could not be created from context,device, error + handler and properties + """ + cdef DPCTLSyclContextRef CRef + cdef DPCTLSyclQueueRef QRef + cdef DPCTL_DeviceAndContextPair dev_ctx + + dev_ctx = DPCTLDeviceMgr_GetDeviceAndContextPair(DRef) + if (dev_ctx.CRef is NULL) or (dev_ctx.DRef is NULL): + return -3 + DRef = dev_ctx.DRef + CRef = dev_ctx.CRef + QRef = DPCTLQueue_Create( + CRef, + DRef, + &default_async_error_handler, + props + ) + if QRef is NULL: + DPCTLDevice_Delete(DRef) + DPCTLContext_Delete(CRef) + return -4 + _dev = SyclDevice._create(DRef) + _ctxt = SyclContext._create(CRef) + self._device = _dev + self._context = _ctxt + self._queue_ref = QRef + return 0 # normal return + + cdef int _init_queue_from_filter_string(self, const char *c_str, int props): + """ + Initializes self from filter string, error handler and properties. + Creates device from device selector, then calls helper function above. + + Returns: + 0 : normal execution + -1 : filter selector could not be created (malformed?) + -2 : Device could not be created from filter selector + -3 : Context creation/look-up failed + -4 : queue could not be created from context,device, error handler + and properties + """ + cdef DPCTLSyclDeviceSelectorRef DSRef = DPCTLFilterSelector_Create(c_str) + cdef DPCTLSyclDeviceRef DRef + cdef int ret = 0 + + if DSRef is NULL: + ret = -1 # Filter selector failed to be created + else: + DRef = DPCTLDevice_CreateFromSelector(DSRef) + DPCTLDeviceSelector_Delete(DSRef) + if (DRef is NULL): + ret = -2 # Device could not be created + else: + ret = self._init_queue_from_DPCTLSyclDeviceRef(DRef, props) + DPCTLDevice_Delete(DRef) + return ret + + cdef int _init_queue_from_device(self, SyclDevice dev, int props): + cdef DPCTLSyclDeviceRef DRef = dev.get_device_ref() + + if (DRef is NULL): + return -2 # Device could not be created + else: + return self._init_queue_from_DPCTLSyclDeviceRef(DRef, props) + + cdef int _init_queue_default(self, int props): + cdef DPCTLSyclDeviceSelectorRef DSRef = DPCTLDefaultSelector_Create() + cdef int ret = 0 + + DRef = DPCTLDevice_CreateFromSelector(DSRef) + DPCTLDeviceSelector_Delete(DSRef) + if (DRef is NULL): + ret = -2 # Device could not be created + else: + ret = self._init_queue_from_DPCTLSyclDeviceRef(DRef, props) + DPCTLDevice_Delete(DRef) + return ret + + cdef int _init_queue_from_context_and_device( + self, SyclContext ctxt, SyclDevice dev, int props + ): + """ + """ + cdef DPCTLSyclContextRef CRef + cdef DPCTLSyclDeviceRef DRef + cdef DPCTLSyclQueueRef QRef + CRef = ctxt.get_context_ref() + DRef = dev.get_device_ref() + QRef = DPCTLQueue_Create( + CRef, + DRef, + &default_async_error_handler, + props + ) + if (QRef is NULL): + return -4 + self._device = dev + self._context = ctxt + self._queue_ref = QRef + return 0 # normal return @staticmethod cdef SyclQueue _create(DPCTLSyclQueueRef qref): if qref is NULL: raise SyclQueueCreationError("Queue creation failed.") - cdef SyclQueue ret = SyclQueue.__new__(SyclQueue) + cdef _SyclQueue ret = _SyclQueue.__new__(_SyclQueue) ret._context = SyclContext._create(DPCTLQueue_GetContext(qref)) ret._device = SyclDevice._create(DPCTLQueue_GetDevice(qref)) ret._queue_ref = qref - return ret + return SyclQueue(ret) @staticmethod - cdef SyclQueue _create_from_context_and_device(SyclContext ctx, SyclDevice dev): - cdef SyclQueue ret = SyclQueue.__new__(SyclQueue) + cdef SyclQueue _create_from_context_and_device( + SyclContext ctx, SyclDevice dev + ): + cdef _SyclQueue ret = _SyclQueue.__new__(_SyclQueue) cdef DPCTLSyclContextRef cref = ctx.get_context_ref() cdef DPCTLSyclDeviceRef dref = dev.get_device_ref() - cdef DPCTLSyclQueueRef qref = DPCTLQueueMgr_GetQueueFromContextAndDevice( - cref, dref) + cdef DPCTLSyclQueueRef qref = DPCTLQueue_Create(cref, dref, NULL, 0) if qref is NULL: raise SyclQueueCreationError("Queue creation failed.") ret._queue_ref = qref ret._context = ctx ret._device = dev - return ret - - def __dealloc__ (self): - DPCTLQueue_Delete(self._queue_ref) + return SyclQueue(ret) - cdef _raise_queue_submit_error (self, fname, errcode): + cdef _raise_queue_submit_error(self, fname, errcode): e = SyclKernelSubmitError("Kernel submission to Sycl queue failed.") e.fname = fname e.code = errcode raise e - cdef _raise_invalid_range_error (self, fname, ndims, errcode): - e = SyclKernelInvalidRangeError("Range with ", ndims, " not allowed. " - "Range should have between one and " - "three dimensions.") + cdef _raise_invalid_range_error(self, fname, ndims, errcode): + e = SyclKernelInvalidRangeError( + "Range with ", ndims, " not allowed. Range should have between " + " one and three dimensions." + ) e.fname = fname e.code = errcode raise e - cdef int _populate_args (self, list args, void **kargs, \ - DPCTLKernelArgType *kargty): + cdef int _populate_args( + self, + list args, + void **kargs, + DPCTLKernelArgType *kargty + ): cdef int ret = 0 for idx, arg in enumerate(args): if isinstance(arg, ctypes.c_char): @@ -160,8 +448,7 @@ cdef class SyclQueue: ret = -1 return ret - - cdef int _populate_range (self, size_t Range[3], list S, size_t nS): + cdef int _populate_range(self, size_t Range[3], list S, size_t nS): cdef int ret = 0 @@ -182,7 +469,7 @@ cdef class SyclQueue: return ret - cpdef bool equals (self, SyclQueue q): + cpdef cpp_bool equals(self, SyclQueue q): """ Returns true if the SyclQueue argument has the same _queue_ref as this SycleQueue. """ @@ -203,16 +490,24 @@ cdef class SyclQueue: else: raise ValueError("Unknown backend type.") - cpdef SyclContext get_sycl_context (self): + @property + def sycl_context(self): + return self._context + + @property + def sycl_device(self): + return self._device + + cpdef SyclContext get_sycl_context(self): return self._context - cpdef SyclDevice get_sycl_device (self): + cpdef SyclDevice get_sycl_device(self): return self._device - cdef DPCTLSyclQueueRef get_queue_ref (self): + cdef DPCTLSyclQueueRef get_queue_ref(self): return self._queue_ref - def addressof_ref (self): + def addressof_ref(self): """ Returns the address of the C API DPCTLSyclQueueRef pointer as a size_t. @@ -222,9 +517,14 @@ cdef class SyclQueue: """ return int(self._queue_ref) - cpdef SyclEvent submit (self, SyclKernel kernel, list args, list gS, \ - list lS = None, list dEvents = None): - + cpdef SyclEvent submit( + self, + SyclKernel kernel, + list args, + list gS, + list lS = None, + list dEvents = None + ): cdef void **kargs = NULL cdef DPCTLKernelArgType *kargty = NULL cdef DPCTLSyclEventRef *depEvents = NULL @@ -264,22 +564,23 @@ cdef class SyclQueue: raise TypeError("Unsupported type for a kernel argument") if lS is None: - ret = self._populate_range (gRange, gS, nGS) + ret = self._populate_range(gRange, gS, nGS) if ret == -1: free(kargs) free(kargty) free(depEvents) self._raise_invalid_range_error("SyclQueue.submit", nGS, -1) - - Eref = DPCTLQueue_SubmitRange(kernel.get_kernel_ref(), - self.get_queue_ref(), - kargs, - kargty, - len(args), - gRange, - nGS, - depEvents, - nDE) + Eref = DPCTLQueue_SubmitRange( + kernel.get_kernel_ref(), + self.get_queue_ref(), + kargs, + kargty, + len(args), + gRange, + nGS, + depEvents, + nDE + ) else: ret = self._populate_range (gRange, gS, nGS) if ret == -1: @@ -293,38 +594,39 @@ cdef class SyclQueue: free(kargty) free(depEvents) self._raise_invalid_range_error("SyclQueue.submit", nLS, -1) - if nGS != nLS: free(kargs) free(kargty) free(depEvents) - raise ValueError("Local and global ranges need to have same " - "number of dimensions.") - - Eref = DPCTLQueue_SubmitNDRange(kernel.get_kernel_ref(), - self.get_queue_ref(), - kargs, - kargty, - len(args), - gRange, - lRange, - nGS, - depEvents, - nDE) + raise ValueError( + "Local and global ranges need to have same " + "number of dimensions." + ) + Eref = DPCTLQueue_SubmitNDRange( + kernel.get_kernel_ref(), + self.get_queue_ref(), + kargs, + kargty, + len(args), + gRange, + lRange, + nGS, + depEvents, + nDE + ) free(kargs) free(kargty) free(depEvents) if Eref is NULL: - # \todo get the error number from dpctl-capi self._raise_queue_submit_error("DPCTLQueue_Submit", -1) return SyclEvent._create(Eref, args) - cpdef void wait (self): + cpdef void wait(self): DPCTLQueue_Wait(self._queue_ref) - cpdef memcpy (self, dest, src, size_t count): + cpdef memcpy(self, dest, src, size_t count): cdef void *c_dest cdef void *c_src @@ -340,7 +642,7 @@ cdef class SyclQueue: DPCTLQueue_Memcpy(self._queue_ref, c_dest, c_src, count) - cpdef prefetch (self, mem, size_t count=0): + cpdef prefetch(self, mem, size_t count=0): cdef void *ptr if isinstance(mem, _Memory): @@ -353,7 +655,7 @@ cdef class SyclQueue: DPCTLQueue_Prefetch(self._queue_ref, ptr, count) - cpdef mem_advise (self, mem, size_t count, int advice): + cpdef mem_advise(self, mem, size_t count, int advice): cdef void *ptr if isinstance(mem, _Memory): @@ -365,3 +667,10 @@ cdef class SyclQueue: count = self.nbytes DPCTLQueue_MemAdvise(self._queue_ref, ptr, count, advice) + + @property + def __name__(self): + return "SyclQueue" + + def __repr__(self): + return "".format(hex(id(self))) diff --git a/dpctl/_sycl_queue_manager.pyx b/dpctl/_sycl_queue_manager.pyx index dec9c07390..848456cac8 100644 --- a/dpctl/_sycl_queue_manager.pyx +++ b/dpctl/_sycl_queue_manager.pyx @@ -21,18 +21,18 @@ from __future__ import print_function from enum import Enum, auto import logging from . import backend_type, device_type -from ._backend cimport ( +from ._backend cimport( _backend_type, _device_type, DPCTLPlatform_DumpInfo, DPCTLPlatform_GetNumNonHostPlatforms, DPCTLQueueMgr_GetCurrentQueue, - DPCTLQueueMgr_GetNumActivatedQueues, - DPCTLQueueMgr_GetNumQueues, + DPCTLQueueMgr_GlobalQueueIsCurrent, DPCTLQueueMgr_PushQueue, DPCTLQueueMgr_PopQueue, - DPCTLQueueMgr_SetAsDefaultQueue, + DPCTLQueueMgr_SetGlobalQueue, DPCTLSyclQueueRef, + DPCTLQueueMgr_GetQueueStackSize, ) from ._sycl_context cimport SyclContext @@ -45,82 +45,32 @@ __all__ = [ "get_current_queue", "get_num_activated_queues", "get_num_platforms", - "get_num_queues", - "has_cpu_queues", - "has_gpu_queues", "has_sycl_platforms", "is_in_device_context", - "set_default_queue", - "UnsupportedBackendError", - "UnsupportedDeviceError", + "set_global_queue", ] _logger = logging.getLogger(__name__) -cdef class UnsupportedBackendError(Exception): - """ - An UnsupportedBackendError exception is raised when a backend value - is other than `backend_type.opencl` or `backend_type.level_zero` is - encountered. All other backends are currently not supported. - +cdef class _SyclQueueManager: + """ Provides a SYCL queue manager interface for Python. """ - pass + def _set_as_current_queue(self, arg): + cdef SyclQueue q + cdef DPCTLSyclQueueRef queue_ref = NULL -cdef class UnsupportedDeviceError(Exception): - """ - An UnsupportedDeviceError exception is raised when a device type value - other than `device_type.cpu` or `device_type.gpu` is encountered. + if isinstance(arg, SyclQueue): + q_obj = arg + else: + q_obj = SyclQueue(arg) - """ - pass + q = q_obj + queue_ref = q.get_queue_ref() + DPCTLQueueMgr_PushQueue(queue_ref) - -cdef class _SyclQueueManager: - """ Provides a SYCL queue manager interface for Python. - """ - cdef dict _backend_str_ty_dict - cdef dict _device_str_ty_dict - cdef dict _backend_enum_ty_dict - cdef dict _device_enum_ty_dict - - def __cinit__(self): - - self._backend_str_ty_dict = { - "opencl" : _backend_type._OPENCL, - "level0" : _backend_type._LEVEL_ZERO, - } - - self._device_str_ty_dict = { - "gpu" : _device_type._GPU, - "cpu" : _device_type._CPU, - } - - self._backend_enum_ty_dict = { - backend_type.opencl : _backend_type._OPENCL, - backend_type.level_zero : _backend_type._LEVEL_ZERO, - } - - self._device_enum_ty_dict = { - device_type.cpu : _device_type._CPU, - device_type.gpu : _device_type._GPU, - } - - def _set_as_current_queue(self, backend_ty, device_ty, device_id): - cdef DPCTLSyclQueueRef queue_ref - - try : - beTy = self._backend_str_ty_dict[backend_ty] - try : - devTy = self._device_str_ty_dict[device_ty] - queue_ref = DPCTLQueueMgr_PushQueue(beTy, devTy, device_id) - return SyclQueue._create(queue_ref) - except KeyError: - raise UnsupportedDeviceError("Device can only be gpu or cpu") - except KeyError: - raise UnsupportedBackendError("Backend can only be opencl or " - "level0") + return q_obj def _remove_current_queue(self): DPCTLQueueMgr_PopQueue() @@ -177,12 +127,6 @@ cdef class _SyclQueueManager: """ DPCTLPlatform_DumpInfo() - def print_available_backends(self): - """ - Prints the list of available SYCL backends. - """ - print(self._backend_str_ty_dict.keys()) - cpdef get_current_backend(self): """ Returns the backend for the current queue as a `backend_type` enum. @@ -236,7 +180,7 @@ cdef class _SyclQueueManager: int: The number of currently activated queues. """ - return DPCTLQueueMgr_GetNumActivatedQueues() + return DPCTLQueueMgr_GetQueueStackSize() def get_num_platforms(self): """ @@ -248,106 +192,14 @@ cdef class _SyclQueueManager: """ return DPCTLPlatform_GetNumNonHostPlatforms() - def get_num_queues(self, backend_ty, device_ty): - """ - Returns the number of devices for the input backend and device type - combination. *WARNING: To be depracated in the near future.* - - Args: - backend_ty (backend_type): Enum value specifying a SYCL backend. - device_ty (device_type): Enum value specifying a SYCL device type. - - Returns: - int: Number of devices for the input backend and device type - combination. - Raises: - UnsupportedDeviceError: If the device type value is invalid. - UnsupportedBackendError: If the backend value is invalid. - """ - cdef size_t num = 0 - try : - beTy = self._backend_enum_ty_dict[backend_ty] - try : - devTy = self._device_enum_ty_dict[device_ty] - num = DPCTLQueueMgr_GetNumQueues(beTy, devTy) - except KeyError: - raise UnsupportedDeviceError( - "Device can only be device_type.gpu or device_type.cpu" - ) - except KeyError: - raise UnsupportedBackendError( - "Backend can only be backend_type.opencl or " - "backend_type.level_zero" - ) - - return num - - def has_gpu_queues(self, backend_ty=backend_type.opencl): - """ - Checks if the system has a GPU device for the specified SYCL backend - type. *WARNING: To be depracated in the near future.* - - Args: - backend_ty (backend_type) : Enum value specifying a SYCL backend \ - defaults to `backend_type.opencl`. - - Returns: - bool:True if the backend has a GPU device else False. - - Raises: - UnsupportedBackendError: If the backend value is invalid. - """ - cdef size_t num = 0 - try : - beTy = self._backend_enum_ty_dict[backend_ty] - num = DPCTLQueueMgr_GetNumQueues(beTy, _device_type._GPU) - except KeyError: - raise UnsupportedBackendError( - "Backend can only be backend_type.opencl or " - "backend_type.level_zero" - ) - if num: - return True - else: - return False - - def has_cpu_queues(self, backend_ty=backend_type.opencl): - """ - Checks if the system has a CPU device for the specified SYCL backend - type. *WARNING: To be depracated in the near future.* - - Args: - backend_ty (backend_type) : Enum value specifying a SYCL backend \ - defaults to `backend_type.opencl`. - - Returns: - bool:True if the backend has a CPU device else False. - - Raises: - UnsupportedBackendError: If the backend value is invalid. - """ - cdef size_t num = 0 - try : - beTy = self._backend_enum_ty_dict[backend_ty] - num = DPCTLQueueMgr_GetNumQueues(beTy, _device_type._CPU) - except KeyError: - raise UnsupportedBackendError( - "Backend can only be backend_type.opencl or " - "backend_type.level_zero" - ) - if num: - return True - else: - return False - def has_sycl_platforms(self): """ - Checks if the system has any non-host SYCL platforms. *WARNING: The \ - behavior of the function may change in the future to include the host \ + Checks if the system has any non-host SYCL platforms. *WARNING: The + behavior of the function may change in the future to include the host platform.* Returns: - bool: Returns True if there is at least one non-host SYCL, \ + bool: Returns True if there is at least one non-host SYCL, platform, otherwise returns False. """ @@ -357,62 +209,42 @@ cdef class _SyclQueueManager: else: return False + def is_in_device_context(self): """ Checks if the control is inside a :func:`dpctl.device_context()` scope. Returns: - bool: True if the control is within a \ + bool: True if the control is within a :func:`dpctl.device_context()` scope, otherwise False. """ - cdef size_t num = DPCTLQueueMgr_GetNumActivatedQueues() - if num: - return True - else: - return False + cdef int inCtx = DPCTLQueueMgr_GlobalQueueIsCurrent() + return not bool(inCtx) - def set_default_queue(self, backend_ty, device_ty, device_id): + def set_global_queue(self, arg): """ - Sets the global (default) queue to the SYCL queue specified using the - backend, device type, and relative device id parameters. *WARNING: To \ - be depracated in the near future.* + Sets the global queue to the SYCL queue specified explicitly, + or created from given arguments. Args: - backend_ty (backend_type) : Enum value specifying a SYCL backend. - device_ty (device_type) : Enum value specifying a SYCL device type. - device_id (int) : A relative device number. The relative device \ - id is based on the ordering of the devices in the list returned \ - by SYCL's `platform::get_platforms().get_devices()` function. + A SyclQueue instance to be used as a global queue. + Alternatively, a filter selector string, or a SyclDevice + instance to be used to construct SyclQueue. Raises: SyclQueueCreationError: If a SYCL queue could not be created. - UnsupportedDeviceError: If the device type is invalid. - UnsupportedBackendError: If the backend type is invalid. - """ - cdef DPCTLSyclQueueRef ret - try : - if isinstance(backend_ty, str): - beTy = self._backend_str_ty_dict[backend_ty] - else: - beTy = self._backend_enum_ty_dict[backend_ty] - try : - if isinstance(device_ty, str): - devTy = self._device_str_ty_dict[device_ty] - else: - devTyp = self._device_enum_ty_dist[device_ty] - ret = DPCTLQueueMgr_SetAsDefaultQueue(beTy, devTy, device_id) - if ret is NULL: - self._raise_queue_creation_error( - backend_ty, device_ty, device_id, - "DPCTLQueueMgr_PushQueue" - ) - - except KeyError: - raise UnsupportedDeviceError("Device can only be gpu or cpu") - except KeyError: - raise UnsupportedBackendError("Backend can only be opencl or " - "level0") + cdef SyclQueue q + cdef DPCTLSyclQueueRef queue_ref = NULL + + if type(arg) is SyclQueue: + q = arg + else: + q_obj = SyclQueue(arg) + q = q_obj + + queue_ref = q.get_queue_ref() + DPCTLQueueMgr_SetGlobalQueue(queue_ref) # This private instance of the _SyclQueueManager should not be directly @@ -423,13 +255,11 @@ _mgr = _SyclQueueManager() dump = _mgr.dump get_num_platforms = _mgr.get_num_platforms get_num_activated_queues = _mgr.get_num_activated_queues -get_num_queues = _mgr.get_num_queues -has_cpu_queues = _mgr.has_cpu_queues -has_gpu_queues = _mgr.has_gpu_queues has_sycl_platforms = _mgr.has_sycl_platforms -set_default_queue = _mgr.set_default_queue +set_global_queue = _mgr.set_global_queue is_in_device_context = _mgr.is_in_device_context + cpdef SyclQueue get_current_queue(): """ Returns the currently activate SYCL queue as a new SyclQueue object. @@ -446,6 +276,7 @@ cpdef SyclQueue get_current_queue(): """ return _mgr.get_current_queue() + cpdef get_current_device_type(): """ Returns current device type as a `device_type` enum. @@ -456,6 +287,7 @@ cpdef get_current_device_type(): """ return _mgr.get_current_device_type() + cpdef get_current_backend(): """ Returns the backend for the current queue as a `backend_type` enum. @@ -465,33 +297,31 @@ cpdef get_current_backend(): """ return _mgr.get_current_backend() + from contextlib import contextmanager + @contextmanager -def device_context(str queue_str="opencl:gpu:0"): +def device_context(arg): """ Yields a SYCL queue corresponding to the input filter string. This context manager "activates", *i.e.*, sets as the currently usable - queue, the SYCL queue defined by the "backend:device type:device id" tuple. + queue, the SYCL queue defined by the argument `arg`. The activated queue is yielded by the context manager and can also be accessed by any subsequent call to :func:`dpctl.get_current_queue()` inside the context manager's scope. The yielded queue is removed as the currently usable queue on exiting the context manager. Args: - queue_str (str) : A string corresponding to the DPC++ filter spec \ - that should be a three tuple specified as \ - "backend:device-type:device-id", defaults to "opencl:gpu:0". + + queue_str (str) : A string corresponding to the DPC++ filter selector. Yields: - :class:`.SyclQueue`: A SYCL queue corresponding to the specified \ + :class:`.SyclQueue`: A SYCL queue corresponding to the specified filter string. Raises: - ValueError: If the filter string is malformed. - UnsupportedDeviceError: If the device type value is invalid. - UnsupportedBackendError: If the backend value is invalid. SyclQueueCreationError: If the SYCL queue creation failed. :Example: @@ -507,16 +337,7 @@ def device_context(str queue_str="opencl:gpu:0"): """ ctxt = None try: - attrs = queue_str.split(':') - nattrs = len(attrs) - if (nattrs < 2 or nattrs > 3): - raise ValueError("Invalid queue filter string. Should be " - "backend:device:device_number or " - "backend:device. In the later case the " - "device_number defaults to 0") - if nattrs == 2: - attrs.append("0") - ctxt = _mgr._set_as_current_queue(attrs[0], attrs[1], int(attrs[2])) + ctxt = _mgr._set_as_current_queue(arg) yield ctxt finally: # Code to release resource diff --git a/dpctl/tests/test_sycl_program.py b/dpctl/tests/test_sycl_program.py index 330e13d611..287dae2756 100644 --- a/dpctl/tests/test_sycl_program.py +++ b/dpctl/tests/test_sycl_program.py @@ -96,7 +96,7 @@ def test_create_program_from_spirv(self): spirv_file = os.path.join(CURR_DIR, "input_files/multi_kernel.spv") with open(spirv_file, "rb") as fin: spirv = fin.read() - with dpctl.device_context("level0:gpu:0"): + with dpctl.device_context("level_zero:gpu:0"): q = dpctl.get_current_queue() prog = dpctl_prog.create_program_from_spirv(q, spirv) @@ -111,7 +111,7 @@ def test_create_program_from_source(self): size_t index = get_global_id(0); \ c[index] = a[index] + d*b[index]; \ }" - with dpctl.device_context("level0:gpu:0"): + with dpctl.device_context("level_zero:gpu:0"): q = dpctl.get_current_queue() prog = dpctl_prog.create_program_from_source(q, oclSrc) diff --git a/dpctl/tests/test_sycl_queue.py b/dpctl/tests/test_sycl_queue.py index bd31769cfc..cc0e342d12 100644 --- a/dpctl/tests/test_sycl_queue.py +++ b/dpctl/tests/test_sycl_queue.py @@ -18,28 +18,342 @@ """ import dpctl -import unittest -from ._helper import has_cpu, has_gpu - - -class TestSyclQueue(unittest.TestCase): - @unittest.skipUnless(has_gpu(), "No OpenCL GPU queues available") - @unittest.skipUnless(has_cpu(), "No OpenCL CPU queues available") - def test_queue_not_equals(self): - with dpctl.device_context("opencl:gpu") as gpuQ: - ctx_gpu = gpuQ.get_sycl_context() - with dpctl.device_context("opencl:cpu") as cpuQ: - ctx_cpu = cpuQ.get_sycl_context() - self.assertFalse(ctx_cpu.equals(ctx_gpu)) - - @unittest.skipUnless(has_gpu(), "No OpenCL GPU queues available") - def test_queue_equals(self): - with dpctl.device_context("opencl:gpu") as gpuQ0: - ctx0 = gpuQ0.get_sycl_context() - with dpctl.device_context("opencl:gpu") as gpuQ1: - ctx1 = gpuQ1.get_sycl_context() - self.assertTrue(ctx0.equals(ctx1)) - - -if __name__ == "__main__": - unittest.main() +import pytest + +list_of_standard_selectors = [ + dpctl.select_accelerator_device, + dpctl.select_cpu_device, + dpctl.select_default_device, + dpctl.select_gpu_device, + dpctl.select_host_device, +] + +list_of_valid_filter_selectors = [ + "opencl", + "opencl:gpu", + "opencl:cpu", + "opencl:gpu:0", + "gpu", + "cpu", + "level_zero", + "level_zero:gpu", + "opencl:cpu:0", + "level_zero:gpu:0", + "gpu:0", + "gpu:1", + "1", +] + +list_of_invalid_filter_selectors = [ + "-1", + "opencl:gpu:-1", + "level_zero:cpu:0", + "abc", +] + +# Unit test cases that will be run for every device +def check_get_max_compute_units(device): + max_compute_units = device.get_max_compute_units() + assert max_compute_units > 0 + + +def check_get_max_work_item_dims(device): + max_work_item_dims = device.get_max_work_item_dims() + assert max_work_item_dims > 0 + + +def check_get_max_work_item_sizes(device): + max_work_item_sizes = device.get_max_work_item_sizes() + for size in max_work_item_sizes: + assert size is not None + + +def check_get_max_work_group_size(device): + max_work_group_size = device.get_max_work_group_size() + # Special case for FPGA simulator + if device.is_accelerator(): + assert max_work_group_size >= 0 + else: + assert max_work_group_size > 0 + + +def check_get_max_num_sub_groups(device): + max_num_sub_groups = device.get_max_num_sub_groups() + # Special case for FPGA simulator + if device.is_accelerator() or device.is_host(): + assert max_num_sub_groups >= 0 + else: + assert max_num_sub_groups > 0 + + +def check_has_aspect_host(device): + try: + device.has_aspect_host + except Exception: + pytest.fail("has_aspect_host call failed") + + +def check_has_aspect_cpu(device): + try: + device.has_aspect_cpu + except Exception: + pytest.fail("has_aspect_cpu call failed") + + +def check_has_aspect_gpu(device): + try: + device.has_aspect_gpu + except Exception: + pytest.fail("has_aspect_gpu call failed") + + +def check_has_aspect_accelerator(device): + try: + device.has_aspect_accelerator + except Exception: + pytest.fail("has_aspect_accelerator call failed") + + +def check_has_aspect_custom(device): + try: + device.has_aspect_custom + except Exception: + pytest.fail("has_aspect_custom call failed") + + +def check_has_aspect_fp16(device): + try: + device.has_aspect_fp16 + except Exception: + pytest.fail("has_aspect_fp16 call failed") + + +def check_has_aspect_fp64(device): + try: + device.has_aspect_fp64 + except Exception: + pytest.fail("has_aspect_fp64 call failed") + + +def check_has_aspect_int64_base_atomics(device): + try: + device.has_aspect_int64_base_atomics + except Exception: + pytest.fail("has_aspect_int64_base_atomics call failed") + + +def check_has_aspect_int64_extended_atomics(device): + try: + device.has_aspect_int64_extended_atomics + except Exception: + pytest.fail("has_aspect_int64_extended_atomics call failed") + + +def check_has_aspect_image(device): + try: + device.has_aspect_image + except Exception: + pytest.fail("has_aspect_image call failed") + + +def check_has_aspect_online_compiler(device): + try: + device.has_aspect_online_compiler + except Exception: + pytest.fail("has_aspect_online_compiler call failed") + + +def check_has_aspect_online_linker(device): + try: + device.has_aspect_online_linker + except Exception: + pytest.fail("has_aspect_online_linker call failed") + + +def check_has_aspect_queue_profiling(device): + try: + device.has_aspect_queue_profiling + except Exception: + pytest.fail("has_aspect_queue_profiling call failed") + + +def check_has_aspect_usm_device_allocations(device): + try: + device.has_aspect_usm_device_allocations + except Exception: + pytest.fail("has_aspect_usm_device_allocations call failed") + + +def check_has_aspect_usm_host_allocations(device): + try: + device.has_aspect_usm_host_allocations + except Exception: + pytest.fail("has_aspect_usm_host_allocations call failed") + + +def check_has_aspect_usm_shared_allocations(device): + try: + device.has_aspect_usm_shared_allocations + except Exception: + pytest.fail("has_aspect_usm_shared_allocations call failed") + + +def check_has_aspect_usm_restricted_shared_allocations(device): + try: + device.has_aspect_usm_restricted_shared_allocations + except Exception: + pytest.fail("has_aspect_usm_restricted_shared_allocations call failed") + + +def check_has_aspect_usm_system_allocator(device): + try: + device.has_aspect_usm_system_allocator + except Exception: + pytest.fail("has_aspect_usm_system_allocator call failed") + + +def check_is_accelerator(device): + try: + device.is_accelerator() + except Exception: + pytest.fail("is_accelerator call failed") + + +def check_is_cpu(device): + try: + device.is_cpu() + except Exception: + pytest.fail("is_cpu call failed") + + +def check_is_gpu(device): + try: + device.is_gpu() + except Exception: + pytest.fail("is_gpu call failed") + + +def check_is_host(device): + try: + device.is_host() + except Exception: + pytest.fail("is_hostcall failed") + + +list_of_checks = [ + check_get_max_compute_units, + check_get_max_work_item_dims, + check_get_max_work_item_sizes, + check_get_max_work_group_size, + check_get_max_num_sub_groups, + check_is_accelerator, + check_is_cpu, + check_is_gpu, + check_is_host, + check_has_aspect_host, + check_has_aspect_cpu, + check_has_aspect_gpu, + check_has_aspect_accelerator, + check_has_aspect_custom, + check_has_aspect_fp16, + check_has_aspect_fp64, + check_has_aspect_int64_base_atomics, + check_has_aspect_int64_extended_atomics, + check_has_aspect_image, + check_has_aspect_online_compiler, + check_has_aspect_online_linker, + check_has_aspect_queue_profiling, + check_has_aspect_usm_device_allocations, + check_has_aspect_usm_host_allocations, + check_has_aspect_usm_shared_allocations, + check_has_aspect_usm_restricted_shared_allocations, + check_has_aspect_usm_system_allocator, +] + + +@pytest.fixture(params=list_of_valid_filter_selectors) +def valid_filter(request): + return request.param + + +@pytest.fixture(params=list_of_invalid_filter_selectors) +def invalid_filter(request): + return request.param + + +@pytest.fixture(params=list_of_standard_selectors) +def device_selector(request): + return request.param + + +@pytest.fixture(params=list_of_checks) +def check(request): + return request.param + + +def test_standard_selectors(device_selector, check): + """Tests if the standard SYCL device_selectors are able to select a + device. + """ + try: + device = device_selector() + if device.default_selector_score < 0: + pytest.skip() + q = dpctl.SyclQueue(device) + check(q.get_sycl_device()) + except ValueError: + pytest.skip() + + +def test_current_device(check): + """Test is the device for the current queue is valid.""" + try: + q = dpctl.get_current_queue() + except Exception: + pytest.fail("Encountered an exception inside get_current_queue().") + device = q.get_sycl_device() + check(device) + + +def test_valid_filter_selectors(valid_filter, check): + """Tests if we can create a SyclDevice using a supported filter selector string.""" + device = None + try: + q = dpctl.SyclQueue(valid_filter) + device = q.get_sycl_device() + except dpctl.SyclQueueCreationError: + pytest.skip("Failed to create device with supported filter") + check(device) + + +def test_invalid_filter_selectors(invalid_filter): + """An invalid filter string should always be caught and a SyclQueueCreationError + raised. + """ + with pytest.raises(dpctl.SyclQueueCreationError): + q = dpctl.SyclQueue(invalid_filter) + + +def test_context_not_equals(): + try: + gpuQ = dpctl.SyclQueue("gpu") + except dpctl.SyclQueueCreationError: + pytest.skip() + ctx_gpu = gpuQ.get_sycl_context() + try: + cpuQ = dpctl.SyclQueue("cpu") + except dpctl.SyclQueueCreationError: + pytest.skip() + ctx_cpu = cpuQ.get_sycl_context() + assert not ctx_cpu.equals(ctx_gpu) + + +def test_context_equals(): + try: + gpuQ1 = dpctl.SyclQueue("gpu") + gpuQ0 = dpctl.SyclQueue("gpu") + except dpctl.SyclQueueCreationError: + pytest.skip() + ctx0 = gpuQ0.get_sycl_context() + ctx1 = gpuQ1.get_sycl_context() + assert ctx0.equals(ctx1) diff --git a/examples/cython/sycl_buffer/bench.py b/examples/cython/sycl_buffer/bench.py index 8e05f84b69..36c66780c7 100644 --- a/examples/cython/sycl_buffer/bench.py +++ b/examples/cython/sycl_buffer/bench.py @@ -24,7 +24,7 @@ print("=" * 10 + " Executing warm-up " + "=" * 10) print("NumPy result: ", X.sum(axis=0)) -dpctl.set_default_queue("opencl", "cpu", 0) +dpctl.set_global_queue("opencl:cpu") print( "SYCL({}) result: {}".format( dpctl.get_current_queue().get_sycl_device().get_device_name(), @@ -32,7 +32,7 @@ ) ) -dpctl.set_default_queue("opencl", "gpu", 0) +dpctl.set_default_queue("opencl:gpu") print( "SYCL({}) result: {}".format( dpctl.get_current_queue().get_sycl_device().get_device_name(), @@ -42,22 +42,22 @@ import timeit -print("Times for 'opencl:cpu:0'") +print("Times for 'opencl:cpu'") print( timeit.repeat( stmt="sb.columnwise_total(X)", - setup='dpctl.set_default_queue("opencl", "cpu", 0); ' + setup='dpctl.set_global_queue("opencl:cpu"); ' "sb.columnwise_total(X)", # ensure JIT compilation is not counted number=100, globals=globals(), ) ) -print("Times for 'opencl:gpu:0'") +print("Times for 'opencl:gpu'") print( timeit.repeat( stmt="sb.columnwise_total(X)", - setup='dpctl.set_default_queue("opencl", "gpu", 0); sb.columnwise_total(X)', + setup='dpctl.set_default_queue("opencl:gpu"); sb.columnwise_total(X)', number=100, globals=globals(), ) diff --git a/examples/cython/usm_memory/blackscholes.pyx b/examples/cython/usm_memory/blackscholes.pyx index fb5e9c54dd..bc26c7c750 100644 --- a/examples/cython/usm_memory/blackscholes.pyx +++ b/examples/cython/usm_memory/blackscholes.pyx @@ -26,12 +26,13 @@ import dpctl import numpy as np cdef extern from "sycl_blackscholes.hpp": - cdef void cpp_blackscholes[T](c_dpctl.DPCTLSyclQueueRef, size_t n_opts, T* option_params, T* callput) - cdef void cpp_populate_params[T](c_dpctl.DPCTLSyclQueueRef, size_t n_opts, T* option_params, T pl, T ph, T sl, T sh, T tl, T th, T rl, T rh, T vl, T vh, int seed) + cdef void cpp_blackscholes[T](c_dpctl.DPCTLSyclQueueRef, size_t n_opts, T* option_params, T* callput) except + + cdef void cpp_populate_params[T](c_dpctl.DPCTLSyclQueueRef, size_t n_opts, T* option_params, T pl, T ph, T sl, T sh, T tl, T th, T rl, T rh, T vl, T vh, int seed) except + def black_scholes_price(floating[:, ::1] option_params): cdef size_t n_opts = option_params.shape[0] cdef size_t n_params = option_params.shape[1] + cdef size_t n_bytes = 0 cdef c_dpctl.SyclQueue q cdef c_dpctl.DPCTLSyclQueueRef q_ptr cdef c_dpctl_mem.MemoryUSMShared mobj @@ -51,14 +52,16 @@ def black_scholes_price(floating[:, ::1] option_params): q = c_dpctl.get_current_queue() q_ptr = q.get_queue_ref() if (floating is double): - mobj = c_dpctl_mem.MemoryUSMShared(nbytes=2*n_opts * sizeof(double)) + n_bytes = 2*n_opts * sizeof(double) + mobj = c_dpctl_mem.MemoryUSMShared(n_bytes) callput_arr = np.ndarray((n_opts, 2), buffer=mobj, dtype='d') call_put_prices = callput_arr dp1 = &option_params[0,0] dp2 = &call_put_prices[0,0]; cpp_blackscholes[double](q_ptr, n_opts, dp1, dp2) elif (floating is float): - mobj = c_dpctl_mem.MemoryUSMShared(nbytes=2*n_opts * sizeof(float)) + n_bytes = 2*n_opts * sizeof(float) + mobj = c_dpctl_mem.MemoryUSMShared(n_bytes) callput_arr = np.ndarray((n_opts, 2), buffer=mobj, dtype='f') call_put_prices = callput_arr fp1 = &option_params[0,0] diff --git a/examples/cython/usm_memory/run.py b/examples/cython/usm_memory/run.py index 7576ac5b89..fb9305ea65 100644 --- a/examples/cython/usm_memory/run.py +++ b/examples/cython/usm_memory/run.py @@ -56,7 +56,7 @@ def gen_option_params(n_opts, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, dtype): for _ in range(3): - dpctl.set_default_queue("opencl", "cpu", 0) + dpctl.set_global_queue("opencl:cpu:0") print( "Using : {}".format( dpctl.get_current_queue().get_sycl_device().get_device_name() @@ -73,7 +73,7 @@ def gen_option_params(n_opts, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, dtype): print("Elapsed: {}".format(t1 - t0)) # compute on GPU sycl device - dpctl.set_default_queue("level0", "gpu", 0) + dpctl.set_global_queue("level_zero:gpu:0") print( "Using : {}".format( dpctl.get_current_queue().get_sycl_device().get_device_name()