From 5e5ddb941ed231e7c590a4d6b18d8d1fff70efe0 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 24 Aug 2021 16:53:24 -0500 Subject: [PATCH 1/2] Use public api keyword in declaring Cython classes Use api keyword in declaring API functions. Use of API defines initializer functions which allows native extensions that work with dpctl Python types to not link to Python-produced shared objects on Linux, and allows for pybind11 to portably work with Python types defined in dpctl. Example for working with dpctl types is to be added to dpctl/examples --- MANIFEST.in | 8 +++++++- dpctl/_sycl_context.pxd | 4 ++-- dpctl/_sycl_context.pyx | 2 +- dpctl/_sycl_device.pxd | 4 ++-- dpctl/_sycl_device.pyx | 2 +- dpctl/_sycl_event.pxd | 2 +- dpctl/_sycl_event.pyx | 7 +++++++ dpctl/_sycl_queue.pxd | 4 ++-- dpctl/_sycl_queue.pyx | 2 +- dpctl/memory/_memory.pxd | 12 ++++++------ dpctl/memory/_memory.pyx | 6 +++--- dpctl/tensor/_usmarray.pxd | 2 +- 12 files changed, 34 insertions(+), 21 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index d2fa8a807f..8452ac3331 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,9 +2,15 @@ include versioneer.py recursive-include dpctl/include *.h recursive-include dpctl *.pxd include dpctl/_sycl_context.h +include dpctl/_sycl_context_api.h include dpctl/_sycl_device.h +include dpctl/_sycl_device_api.h include dpctl/_sycl_queue.h -include dpctl/_sycl_queue_manager.h +include dpctl/_sycl_queue_api.h include dpctl/_sycl_event.h +include dpctl/_sycl_event_api.h include dpctl/memory/_memory.h +include dpctl/memory/_memory_api.h +include dpctl/tensor/_usmarray.h +include dpctl/tensor/_usmarray_api.h include dpctl/tests/input_files/* diff --git a/dpctl/_sycl_context.pxd b/dpctl/_sycl_context.pxd index a7760b6b81..a69910cd16 100644 --- a/dpctl/_sycl_context.pxd +++ b/dpctl/_sycl_context.pxd @@ -26,7 +26,7 @@ from ._backend cimport DPCTLSyclContextRef from ._sycl_device cimport SyclDevice -cdef public class _SyclContext [ +cdef public api class _SyclContext [ object Py_SyclContextObject, type Py_SyclContextType ]: @@ -35,7 +35,7 @@ cdef public class _SyclContext [ cdef DPCTLSyclContextRef _ctxt_ref -cdef public class SyclContext(_SyclContext) [ +cdef public api class SyclContext(_SyclContext) [ object PySyclContextObject, type PySyclContextType ]: diff --git a/dpctl/_sycl_context.pyx b/dpctl/_sycl_context.pyx index 3abc8db6ce..fa584e4ae2 100644 --- a/dpctl/_sycl_context.pyx +++ b/dpctl/_sycl_context.pyx @@ -474,7 +474,7 @@ cdef class SyclContext(_SyclContext): &_context_capsule_deleter ) -cdef public DPCTLSyclContextRef get_context_ref(SyclContext ctx): +cdef api DPCTLSyclContextRef get_context_ref(SyclContext ctx): """ C-API function to get opaque context reference from :class:`dpctl.SyclContext` instance. diff --git a/dpctl/_sycl_device.pxd b/dpctl/_sycl_device.pxd index 3e9262b2a2..a083ba4993 100644 --- a/dpctl/_sycl_device.pxd +++ b/dpctl/_sycl_device.pxd @@ -29,7 +29,7 @@ from ._backend cimport ( ) -cdef public class _SyclDevice [ +cdef public api class _SyclDevice [ object Py_SyclDeviceObject, type Py_SyclDeviceType ]: @@ -42,7 +42,7 @@ cdef public class _SyclDevice [ cdef size_t *_max_work_item_sizes -cdef public class SyclDevice(_SyclDevice) [ +cdef public api class SyclDevice(_SyclDevice) [ object PySyclDeviceObject, type PySyclDeviceType ]: diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index de9345a237..4bb02a5812 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -1118,7 +1118,7 @@ cdef class SyclDevice(_SyclDevice): else: return str(relId) -cdef public DPCTLSyclDeviceRef get_device_ref(SyclDevice dev): +cdef api DPCTLSyclDeviceRef get_device_ref(SyclDevice dev): """ C-API function to get opaque device reference from :class:`dpctl.SyclDevice` instance. diff --git a/dpctl/_sycl_event.pxd b/dpctl/_sycl_event.pxd index 7f397cb716..64f4b30fac 100644 --- a/dpctl/_sycl_event.pxd +++ b/dpctl/_sycl_event.pxd @@ -23,7 +23,7 @@ from ._backend cimport DPCTLSyclEventRef -cdef public class SyclEvent [object PySyclEventObject, type PySyclEventType]: +cdef public api class SyclEvent [object PySyclEventObject, type PySyclEventType]: ''' Wrapper class for a Sycl Event ''' cdef DPCTLSyclEventRef _event_ref diff --git a/dpctl/_sycl_event.pyx b/dpctl/_sycl_event.pyx index eac541fbb1..29b7733913 100644 --- a/dpctl/_sycl_event.pyx +++ b/dpctl/_sycl_event.pyx @@ -32,6 +32,13 @@ __all__ = [ _logger = logging.getLogger(__name__) +cdef api DPCTLSyclEventRef get_event_ref(SyclEvent ev): + """ C-API function to access opaque event reference from + Python object of type :class:`dpctl.SyclEvent`. + """ + return ev.get_event_ref() + + cdef class SyclEvent: """ Python wrapper class for cl::sycl::event. """ diff --git a/dpctl/_sycl_queue.pxd b/dpctl/_sycl_queue.pxd index 0ce90a966f..92a9102021 100644 --- a/dpctl/_sycl_queue.pxd +++ b/dpctl/_sycl_queue.pxd @@ -31,7 +31,7 @@ from .program._program cimport SyclKernel cdef void default_async_error_handler(int) nogil except * -cdef public class _SyclQueue [ +cdef public api class _SyclQueue [ object Py_SyclQueueObject, type Py_SyclQueueType ]: """ Python data owner class for a sycl::queue. @@ -41,7 +41,7 @@ cdef public class _SyclQueue [ cdef SyclDevice _device -cdef public class SyclQueue (_SyclQueue) [ +cdef public api class SyclQueue (_SyclQueue) [ object PySyclQueueObject, type PySyclQueueType ]: """ Python wrapper class for a sycl::queue. diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx index 817ad23bb9..2abfd3039f 100644 --- a/dpctl/_sycl_queue.pyx +++ b/dpctl/_sycl_queue.pyx @@ -969,7 +969,7 @@ cdef class SyclQueue(_SyclQueue): self.sycl_device.print_device_info() -cdef public DPCTLSyclQueueRef get_queue_ref(SyclQueue q): +cdef api DPCTLSyclQueueRef get_queue_ref(SyclQueue q): """ C-API function to get opaque queue reference from :class:`dpctl.SyclQueue` instance. diff --git a/dpctl/memory/_memory.pxd b/dpctl/memory/_memory.pxd index 1e9b796ff7..ec94eb6b0f 100644 --- a/dpctl/memory/_memory.pxd +++ b/dpctl/memory/_memory.pxd @@ -32,7 +32,7 @@ cdef DPCTLSyclQueueRef get_queue_ref_from_ptr_and_syclobj( DPCTLSyclUSMRef ptr, object syclobj) -cdef public class _Memory [object Py_MemoryObject, type Py_MemoryType]: +cdef public api class _Memory [object Py_MemoryObject, type Py_MemoryType]: cdef DPCTLSyclUSMRef memory_ptr cdef Py_ssize_t nbytes cdef SyclQueue queue @@ -51,12 +51,12 @@ cdef public class _Memory [object Py_MemoryObject, type Py_MemoryType]: cpdef bytes tobytes(self) @staticmethod - cdef public SyclDevice get_pointer_device( + cdef SyclDevice get_pointer_device( DPCTLSyclUSMRef p, SyclContext ctx) @staticmethod - cdef public bytes get_pointer_type(DPCTLSyclUSMRef p, SyclContext ctx) + cdef bytes get_pointer_type(DPCTLSyclUSMRef p, SyclContext ctx) @staticmethod - cdef public object create_from_usm_pointer_size_qref( + cdef object create_from_usm_pointer_size_qref( DPCTLSyclUSMRef USMRef, Py_ssize_t nbytes, DPCTLSyclQueueRef QRef, @@ -64,12 +64,12 @@ cdef public class _Memory [object Py_MemoryObject, type Py_MemoryType]: ) -cdef public class MemoryUSMShared(_Memory) [object PyMemoryUSMSharedObject, +cdef public api class MemoryUSMShared(_Memory) [object PyMemoryUSMSharedObject, type PyMemoryUSMSharedType]: pass -cdef public class MemoryUSMHost(_Memory) [object PyMemoryUSMHostObject, +cdef public api class MemoryUSMHost(_Memory) [object PyMemoryUSMHostObject, type PyMemoryUSMHostType]: pass diff --git a/dpctl/memory/_memory.pyx b/dpctl/memory/_memory.pyx index 8aa07a6f82..a0164d235c 100644 --- a/dpctl/memory/_memory.pyx +++ b/dpctl/memory/_memory.pyx @@ -708,11 +708,11 @@ def as_usm_memory(obj): ) -cdef public DPCTLSyclUSMRef get_usm_pointer(_Memory obj): +cdef api DPCTLSyclUSMRef get_usm_pointer(_Memory obj): return obj.memory_ptr -cdef public DPCTLSyclContextRef get_context(_Memory obj): +cdef api DPCTLSyclContextRef get_context(_Memory obj): return obj.queue._context.get_context_ref() -cdef public size_t get_nbytes(_Memory obj): +cdef api size_t get_nbytes(_Memory obj): return obj.nbytes diff --git a/dpctl/tensor/_usmarray.pxd b/dpctl/tensor/_usmarray.pxd index 0c97a3982f..1063e9135e 100644 --- a/dpctl/tensor/_usmarray.pxd +++ b/dpctl/tensor/_usmarray.pxd @@ -9,7 +9,7 @@ cdef public int USM_ARRAY_F_CONTIGUOUS cdef public int USM_ARRAY_WRITEABLE -cdef public class usm_ndarray [object PyUSMArrayObject, type PyUSMArrayType]: +cdef public api class usm_ndarray [object PyUSMArrayObject, type PyUSMArrayType]: # data fields cdef char* data_ cdef readonly int nd_ From 5560978bc00b145a237a244a4b17c8e73d61d668 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 24 Aug 2021 17:28:26 -0500 Subject: [PATCH 2/2] Added pybind11 example that uses dpctl.SyclQueue to indicate offload --- .../pybind11/use_dpctl_syclqueue/README.md | 25 +++++ .../pybind11/use_dpctl_syclqueue/example.py | 48 ++++++++++ .../use_dpctl_syclqueue/pybind11_example.cpp | 92 +++++++++++++++++++ .../pybind11/use_dpctl_syclqueue/setup.py | 33 +++++++ 4 files changed, 198 insertions(+) create mode 100644 examples/pybind11/use_dpctl_syclqueue/README.md create mode 100644 examples/pybind11/use_dpctl_syclqueue/example.py create mode 100644 examples/pybind11/use_dpctl_syclqueue/pybind11_example.cpp create mode 100644 examples/pybind11/use_dpctl_syclqueue/setup.py diff --git a/examples/pybind11/use_dpctl_syclqueue/README.md b/examples/pybind11/use_dpctl_syclqueue/README.md new file mode 100644 index 0000000000..3e7b6804a9 --- /dev/null +++ b/examples/pybind11/use_dpctl_syclqueue/README.md @@ -0,0 +1,25 @@ +# Usage of dpctl entities in Pybind11 + +This extension demonstrates how dpctl Python types, +such as dpctl.SyclQueue could be used in Pybind11 +extensions. + + +# Building extension + +``` +source /opt/intel/oneapi/compiler/latest/env/vars.sh +CXX=dpcpp CC=dpcpp python setup.py build_ext --inplace +python example.py +``` + +# Sample output + +``` +(idp) [17:25:27 ansatnuc04 use_dpctl_syclqueue]$ python example.py +EU count returned by Pybind11 extension 24 +EU count computed by dpctl 24 + +Computing modular reduction using SYCL on a NumPy array +Offloaded result agrees with reference one computed by NumPy +``` diff --git a/examples/pybind11/use_dpctl_syclqueue/example.py b/examples/pybind11/use_dpctl_syclqueue/example.py new file mode 100644 index 0000000000..88860c1569 --- /dev/null +++ b/examples/pybind11/use_dpctl_syclqueue/example.py @@ -0,0 +1,48 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +import numpy as np +import pybind11_example as eg + +import dpctl + +q = dpctl.SyclQueue() + +# Pass dpctl.SyclQueue to Pybind11 extension +eu_count = eg.get_max_compute_units(q) + +print(f"EU count returned by Pybind11 extension {eu_count}") +print("EU count computed by dpctl {}".format(q.sycl_device.max_compute_units)) + +print("") +print("Computing modular reduction using SYCL on a NumPy array") + +X = np.random.randint(low=1, high=2 ** 16 - 1, size=10 ** 6, dtype=np.longlong) +modulus_p = 347 + +Y = eg.offloaded_array_mod( + q, X, modulus_p +) # Y is a regular array with host memory underneath it +Ynp = X % modulus_p + +check = np.array_equal(Y, Ynp) + +if check: + print("Offloaded result agrees with reference one computed by NumPy") +else: + print("Offloaded array differs from reference result computed by NumPy") diff --git a/examples/pybind11/use_dpctl_syclqueue/pybind11_example.cpp b/examples/pybind11/use_dpctl_syclqueue/pybind11_example.cpp new file mode 100644 index 0000000000..a63f2bb617 --- /dev/null +++ b/examples/pybind11/use_dpctl_syclqueue/pybind11_example.cpp @@ -0,0 +1,92 @@ +#include +#include +#include +#include + +#include "../_sycl_queue.h" +#include "../_sycl_queue_api.h" +#include "dpctl_sycl_types.h" + +namespace py = pybind11; + +size_t get_max_compute_units(py::object queue) +{ + PyObject *queue_ptr = queue.ptr(); + if (PyObject_TypeCheck(queue_ptr, &PySyclQueueType)) { + DPCTLSyclQueueRef QRef = + get_queue_ref(reinterpret_cast(queue_ptr)); + sycl::queue *q = reinterpret_cast(QRef); + + return q->get_device() + .get_info(); + } + else { + throw std::runtime_error("expected dpctl.SyclQueue as argument"); + } +} + +py::array_t +offloaded_array_mod(py::object queue, + py::array_t array, + int64_t mod) +{ + sycl::queue *q_ptr; + + PyObject *queue_pycapi = queue.ptr(); + if (PyObject_TypeCheck(queue_pycapi, &PySyclQueueType)) { + DPCTLSyclQueueRef QRef = + get_queue_ref(reinterpret_cast(queue_pycapi)); + q_ptr = reinterpret_cast(QRef); + } + else { + throw std::runtime_error("expected dpctl.SyclQueue as argument"); + } + + py::buffer_info arg_pybuf = array.request(); + if (arg_pybuf.ndim != 1) { + throw std::runtime_error("Expecting a vector"); + } + if (mod <= 0) { + throw std::runtime_error("Modulus must be non-negative"); + } + + size_t n = arg_pybuf.size; + + auto res = py::array_t(n); + py::buffer_info res_pybuf = res.request(); + + int64_t *a = static_cast(arg_pybuf.ptr); + int64_t *r = static_cast(res_pybuf.ptr); + + { + const sycl::property_list props = { + sycl::property::buffer::use_host_ptr()}; + sycl::buffer a_buf(a, sycl::range<1>(n), props); + sycl::buffer r_buf(r, sycl::range<1>(n), props); + + q_ptr + ->submit([&](sycl::handler &cgh) { + sycl::accessor a_acc(a_buf, cgh, sycl::read_only); + sycl::accessor r_acc(r_buf, cgh, sycl::write_only, + sycl::noinit); + + cgh.parallel_for(sycl::range<1>(n), [=](sycl::id<1> idx) { + r_acc[idx] = a_acc[idx] % mod; + }); + }) + .wait_and_throw(); + } + + return res; +} + +PYBIND11_MODULE(pybind11_example, m) +{ + // Import the dpctl._sycl_queue extension + import_dpctl___sycl_queue(); + m.def("get_max_compute_units", &get_max_compute_units, + "Computes max_compute_units property of the device underlying given " + "dpctl.SyclQueue"); + m.def("offloaded_array_mod", &offloaded_array_mod, + "Compute offloaded modular reduction of integer-valued NumPy array"); +} diff --git a/examples/pybind11/use_dpctl_syclqueue/setup.py b/examples/pybind11/use_dpctl_syclqueue/setup.py new file mode 100644 index 0000000000..4569c99029 --- /dev/null +++ b/examples/pybind11/use_dpctl_syclqueue/setup.py @@ -0,0 +1,33 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pybind11.setup_helpers import Pybind11Extension +from setuptools import setup + +import dpctl + +exts = [ + Pybind11Extension( + "pybind11_example", + ["./pybind11_example.cpp"], + include_dirs=[dpctl.get_include()], + extra_compile_args=["-fPIC"], + extra_link_args=["-fPIC"], + language="c++", + ), +] + +setup(name="pybind11_example", ext_modules=exts)