From 2a6314c96bfaa798a9739762a1b53b11c809f500 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 15 May 2022 09:04:37 -0500 Subject: [PATCH 1/3] Fixed docstring, added wait method to Device class --- dpctl/tensor/_device.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dpctl/tensor/_device.py b/dpctl/tensor/_device.py index 3703237957..7a0b1c02e9 100644 --- a/dpctl/tensor/_device.py +++ b/dpctl/tensor/_device.py @@ -22,8 +22,8 @@ class Device: This is a wrapper around :class:`dpctl.SyclQueue` with custom formatting. The class does not have public constructor, - but a class method to construct it from device= keyword - in Array-API functions. + but a class method `create_device` to construct it from device= keyword + argument in Array-API functions. Instance can be queried for ``sycl_queue``, ``sycl_context``, or ``sycl_device``. @@ -111,6 +111,12 @@ def __repr__(self): # This is a sub-device return repr(self.sycl_queue) + def wait(self): + """ + Call ``wait`` method of the underlying ``sycl_queue``. + """ + self.sycl_queue_.wait() + def normalize_queue_device(sycl_queue=None, device=None): """ From c372cb71e790584f50347b09f6f6aafd3ba3414a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 16 May 2022 12:14:15 -0500 Subject: [PATCH 2/3] Untangled input type overloads for functions to get default type category for queue/device --- dpctl/tensor/libtensor/source/tensor_py.cpp | 32 ++++++++++++--------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index 8d385c99b8..4ef68a30a0 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -1946,28 +1946,34 @@ PYBIND11_MODULE(_tensor_impl, m) py::arg("fill_value"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("default_device_fp_type", [](sycl::queue q) { + m.def("default_device_fp_type", [](sycl::queue q) -> std::string { return get_default_device_fp_type(q.get_device()); }); - m.def("default_device_fp_type", - [](sycl::device dev) { return get_default_device_fp_type(dev); }); + m.def("default_device_fp_type_device", [](sycl::device dev) -> std::string { + return get_default_device_fp_type(dev); + }); - m.def("default_device_int_type", [](sycl::queue q) { + m.def("default_device_int_type", [](sycl::queue q) -> std::string { return get_default_device_int_type(q.get_device()); }); - m.def("default_device_int_type", - [](sycl::device dev) { return get_default_device_int_type(dev); }); + m.def("default_device_int_type_device", + [](sycl::device dev) -> std::string { + return get_default_device_int_type(dev); + }); - m.def("default_device_bool_type", [](sycl::queue q) { + m.def("default_device_bool_type", [](sycl::queue q) -> std::string { return get_default_device_bool_type(q.get_device()); }); - m.def("default_device_bool_type", - [](sycl::device dev) { return get_default_device_bool_type(dev); }); + m.def("default_device_bool_type_device", + [](sycl::device dev) -> std::string { + return get_default_device_bool_type(dev); + }); - m.def("default_device_complex_type", [](sycl::queue q) { + m.def("default_device_complex_type", [](sycl::queue q) -> std::string { return get_default_device_complex_type(q.get_device()); }); - m.def("default_device_complex_type", [](sycl::device dev) { - return get_default_device_complex_type(dev); - }); + m.def("default_device_complex_type_device", + [](sycl::device dev) -> std::string { + return get_default_device_complex_type(dev); + }); } From ba50f98c4b0b85683ef6dd9fa73e0a7caad18dba Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 16 May 2022 12:14:58 -0500 Subject: [PATCH 3/3] Improve performance of default constructors for usm_memory and ums_ndarray --- dpctl/apis/include/dpctl4pybind11.hpp | 207 +++++++++++++++++++------- 1 file changed, 151 insertions(+), 56 deletions(-) diff --git a/dpctl/apis/include/dpctl4pybind11.hpp b/dpctl/apis/include/dpctl4pybind11.hpp index cc9bfa3171..ee43dbcbbf 100644 --- a/dpctl/apis/include/dpctl4pybind11.hpp +++ b/dpctl/apis/include/dpctl4pybind11.hpp @@ -39,6 +39,45 @@ namespace pybind11 namespace detail { +#define DPCTL_TYPE_CASTER(type, py_name) \ +protected: \ + std::unique_ptr value; \ + \ +public: \ + static constexpr auto name = py_name; \ + template < \ + typename T_, \ + ::pybind11::detail::enable_if_t< \ + std::is_same>::value, \ + int> = 0> \ + static ::pybind11::handle cast(T_ *src, \ + ::pybind11::return_value_policy policy, \ + ::pybind11::handle parent) \ + { \ + if (!src) \ + return ::pybind11::none().release(); \ + if (policy == ::pybind11::return_value_policy::take_ownership) { \ + auto h = cast(std::move(*src), policy, parent); \ + delete src; \ + return h; \ + } \ + return cast(*src, policy, parent); \ + } \ + operator type *() \ + { \ + return value.get(); \ + } /* NOLINT(bugprone-macro-parentheses) */ \ + operator type &() \ + { \ + return *value; \ + } /* NOLINT(bugprone-macro-parentheses) */ \ + operator type &&() && \ + { \ + return std::move(*value); \ + } /* NOLINT(bugprone-macro-parentheses) */ \ + template \ + using cast_op_type = ::pybind11::detail::movable_cast_op_type + /* This type caster associates ``sycl::queue`` C++ class with * :class:`dpctl.SyclQueue` for the purposes of generation of * Python bindings by pybind11. @@ -46,16 +85,14 @@ namespace detail template <> struct type_caster { public: - PYBIND11_TYPE_CASTER(sycl::queue, _("dpctl.SyclQueue")); - bool load(handle src, bool) { PyObject *source = src.ptr(); if (PyObject_TypeCheck(source, &PySyclQueueType)) { DPCTLSyclQueueRef QRef = SyclQueue_GetQueueRef( reinterpret_cast(source)); - sycl::queue *q = reinterpret_cast(QRef); - value = *q; + value = std::make_unique( + *(reinterpret_cast(QRef))); return true; } else { @@ -69,6 +106,8 @@ template <> struct type_caster auto tmp = SyclQueue_Make(reinterpret_cast(&src)); return handle(reinterpret_cast(tmp)); } + + DPCTL_TYPE_CASTER(sycl::queue, _("dpctl.SyclQueue")); }; /* This type caster associates ``sycl::device`` C++ class with @@ -78,20 +117,14 @@ template <> struct type_caster template <> struct type_caster { public: - PYBIND11_TYPE_CASTER(sycl::device, _("dpctl.SyclDevice")); - bool load(handle src, bool) { PyObject *source = src.ptr(); if (PyObject_TypeCheck(source, &PySyclDeviceType)) { DPCTLSyclDeviceRef DRef = SyclDevice_GetDeviceRef( reinterpret_cast(source)); - sycl::device *d = reinterpret_cast(DRef); - value = *d; - return true; - } - else if (source == Py_None) { - value = sycl::device{}; + value = std::make_unique( + *(reinterpret_cast(DRef))); return true; } else { @@ -105,6 +138,8 @@ template <> struct type_caster auto tmp = SyclDevice_Make(reinterpret_cast(&src)); return handle(reinterpret_cast(tmp)); } + + DPCTL_TYPE_CASTER(sycl::device, _("dpctl.SyclDevice")); }; /* This type caster associates ``sycl::context`` C++ class with @@ -114,16 +149,14 @@ template <> struct type_caster template <> struct type_caster { public: - PYBIND11_TYPE_CASTER(sycl::context, _("dpctl.SyclContext")); - bool load(handle src, bool) { PyObject *source = src.ptr(); if (PyObject_TypeCheck(source, &PySyclContextType)) { DPCTLSyclContextRef CRef = SyclContext_GetContextRef( reinterpret_cast(source)); - sycl::context *ctx = reinterpret_cast(CRef); - value = *ctx; + value = std::make_unique( + *(reinterpret_cast(CRef))); return true; } else { @@ -138,6 +171,8 @@ template <> struct type_caster SyclContext_Make(reinterpret_cast(&src)); return handle(reinterpret_cast(tmp)); } + + DPCTL_TYPE_CASTER(sycl::context, _("dpctl.SyclContext")); }; /* This type caster associates ``sycl::event`` C++ class with @@ -147,16 +182,14 @@ template <> struct type_caster template <> struct type_caster { public: - PYBIND11_TYPE_CASTER(sycl::event, _("dpctl.SyclEvent")); - bool load(handle src, bool) { PyObject *source = src.ptr(); if (PyObject_TypeCheck(source, &PySyclEventType)) { DPCTLSyclEventRef ERef = SyclEvent_GetEventRef( reinterpret_cast(source)); - sycl::event *ev = reinterpret_cast(ERef); - value = *ev; + value = std::make_unique( + *(reinterpret_cast(ERef))); return true; } else { @@ -170,12 +203,102 @@ template <> struct type_caster auto tmp = SyclEvent_Make(reinterpret_cast(&src)); return handle(reinterpret_cast(tmp)); } + + DPCTL_TYPE_CASTER(sycl::event, _("dpctl.SyclEvent")); }; } // namespace detail } // namespace pybind11 namespace dpctl { + +namespace detail +{ + +struct dpctl_api +{ +public: + static dpctl_api &get() + { + static dpctl_api api; + return api; + } + + py::object sycl_queue_() + { + return *sycl_queue; + } + py::object default_usm_memory_() + { + return *default_usm_memory; + } + py::object default_usm_ndarray_() + { + return *default_usm_ndarray; + } + py::object as_usm_memory_() + { + return *as_usm_memory; + } + +private: + struct Deleter + { + void operator()(py::object *p) const + { + bool guard = (Py_IsInitialized() && !_Py_IsFinalizing()); + + if (guard) { + delete p; + } + } + }; + + std::shared_ptr sycl_queue; + std::shared_ptr default_usm_memory; + std::shared_ptr default_usm_ndarray; + std::shared_ptr as_usm_memory; + + dpctl_api() : sycl_queue{}, default_usm_memory{}, default_usm_ndarray{} + { + import_dpctl(); + + sycl::queue q_; + py::object py_sycl_queue = py::cast(q_); + sycl_queue = std::shared_ptr(new py::object{py_sycl_queue}, + Deleter{}); + + py::module_ mod_memory = py::module_::import("dpctl.memory"); + py::object py_as_usm_memory = mod_memory.attr("as_usm_memory"); + as_usm_memory = std::shared_ptr( + new py::object{py_as_usm_memory}, Deleter{}); + + auto mem_kl = mod_memory.attr("MemoryUSMHost"); + py::object py_default_usm_memory = + mem_kl(1, py::arg("queue") = py_sycl_queue); + default_usm_memory = std::shared_ptr( + new py::object{py_default_usm_memory}, Deleter{}); + + py::module_ mod_usmarray = + py::module_::import("dpctl.tensor._usmarray"); + auto tensor_kl = mod_usmarray.attr("usm_ndarray"); + + py::object py_default_usm_ndarray = + tensor_kl(py::tuple(), py::arg("dtype") = py::str("u1"), + py::arg("buffer") = py_default_usm_memory); + + default_usm_ndarray = std::shared_ptr( + new py::object{py_default_usm_ndarray}, Deleter{}); + } + +public: + dpctl_api(dpctl_api const &) = delete; + void operator=(dpctl_api const &) = delete; + ~dpctl_api(){}; +}; + +} // namespace detail + namespace memory { @@ -232,7 +355,9 @@ class usm_memory : public py::object } // END_TOKEN - usm_memory() : py::object(default_constructed(), stolen_t{}) + usm_memory() + : py::object(::dpctl::detail::dpctl_api::get().default_usm_memory_(), + borrowed_t{}) { if (!m_ptr) throw py::error_already_set(); @@ -267,26 +392,12 @@ class usm_memory : public py::object "cannot create a usm_memory from a nullptr"); return nullptr; } - py::module_ m = py::module_::import("dpctl.memory"); - auto convertor = m.attr("as_usm_memory"); - py::object res; - try { - res = convertor(py::handle(o)); - } catch (const py::error_already_set &e) { - return nullptr; - } - return res.ptr(); - } + auto convertor = ::dpctl::detail::dpctl_api::get().as_usm_memory_(); - static PyObject *default_constructed() - { - py::module_ m = py::module_::import("dpctl.memory"); - auto kl = m.attr("MemoryUSMDevice"); py::object res; try { - // allocate 1 byte - res = kl(1); + res = convertor(py::handle(o)); } catch (const py::error_already_set &e) { return nullptr; } @@ -295,10 +406,7 @@ class usm_memory : public py::object }; } // end namespace memory -} // end namespace dpctl -namespace dpctl -{ namespace tensor { class usm_ndarray : public py::object @@ -349,7 +457,9 @@ class usm_ndarray : public py::object } // END_TOKEN - usm_ndarray() : py::object(default_constructed(), stolen_t{}) + usm_ndarray() + : py::object(::dpctl::detail::dpctl_api::get().default_usm_ndarray_(), + borrowed_t{}) { if (!m_ptr) throw py::error_already_set(); @@ -481,21 +591,6 @@ class usm_ndarray : public py::object return UsmNDArray_GetElementSize(raw_ar); } - -private: - static PyObject *default_constructed() - { - py::module_ m = py::module_::import("dpctl.tensor"); - auto kl = m.attr("usm_ndarray"); - py::object res; - try { - // allocate 1 byte - res = kl(py::make_tuple(), py::arg("dtype") = "u1"); - } catch (const py::error_already_set &e) { - return nullptr; - } - return res.ptr(); - } }; } // end namespace tensor