diff --git a/MANIFEST.in b/MANIFEST.in index d2fa8a807f..8452ac3331 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,9 +2,15 @@ include versioneer.py recursive-include dpctl/include *.h recursive-include dpctl *.pxd include dpctl/_sycl_context.h +include dpctl/_sycl_context_api.h include dpctl/_sycl_device.h +include dpctl/_sycl_device_api.h include dpctl/_sycl_queue.h -include dpctl/_sycl_queue_manager.h +include dpctl/_sycl_queue_api.h include dpctl/_sycl_event.h +include dpctl/_sycl_event_api.h include dpctl/memory/_memory.h +include dpctl/memory/_memory_api.h +include dpctl/tensor/_usmarray.h +include dpctl/tensor/_usmarray_api.h include dpctl/tests/input_files/* diff --git a/dpctl/_sycl_context.pxd b/dpctl/_sycl_context.pxd index a7760b6b81..a69910cd16 100644 --- a/dpctl/_sycl_context.pxd +++ b/dpctl/_sycl_context.pxd @@ -26,7 +26,7 @@ from ._backend cimport DPCTLSyclContextRef from ._sycl_device cimport SyclDevice -cdef public class _SyclContext [ +cdef public api class _SyclContext [ object Py_SyclContextObject, type Py_SyclContextType ]: @@ -35,7 +35,7 @@ cdef public class _SyclContext [ cdef DPCTLSyclContextRef _ctxt_ref -cdef public class SyclContext(_SyclContext) [ +cdef public api class SyclContext(_SyclContext) [ object PySyclContextObject, type PySyclContextType ]: diff --git a/dpctl/_sycl_context.pyx b/dpctl/_sycl_context.pyx index 3abc8db6ce..fa584e4ae2 100644 --- a/dpctl/_sycl_context.pyx +++ b/dpctl/_sycl_context.pyx @@ -474,7 +474,7 @@ cdef class SyclContext(_SyclContext): &_context_capsule_deleter ) -cdef public DPCTLSyclContextRef get_context_ref(SyclContext ctx): +cdef api DPCTLSyclContextRef get_context_ref(SyclContext ctx): """ C-API function to get opaque context reference from :class:`dpctl.SyclContext` instance. diff --git a/dpctl/_sycl_device.pxd b/dpctl/_sycl_device.pxd index 3e9262b2a2..a083ba4993 100644 --- a/dpctl/_sycl_device.pxd +++ b/dpctl/_sycl_device.pxd @@ -29,7 +29,7 @@ from ._backend cimport ( ) -cdef public class _SyclDevice [ +cdef public api class _SyclDevice [ object Py_SyclDeviceObject, type Py_SyclDeviceType ]: @@ -42,7 +42,7 @@ cdef public class _SyclDevice [ cdef size_t *_max_work_item_sizes -cdef public class SyclDevice(_SyclDevice) [ +cdef public api class SyclDevice(_SyclDevice) [ object PySyclDeviceObject, type PySyclDeviceType ]: diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index de9345a237..4bb02a5812 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -1118,7 +1118,7 @@ cdef class SyclDevice(_SyclDevice): else: return str(relId) -cdef public DPCTLSyclDeviceRef get_device_ref(SyclDevice dev): +cdef api DPCTLSyclDeviceRef get_device_ref(SyclDevice dev): """ C-API function to get opaque device reference from :class:`dpctl.SyclDevice` instance. diff --git a/dpctl/_sycl_event.pxd b/dpctl/_sycl_event.pxd index 7f397cb716..64f4b30fac 100644 --- a/dpctl/_sycl_event.pxd +++ b/dpctl/_sycl_event.pxd @@ -23,7 +23,7 @@ from ._backend cimport DPCTLSyclEventRef -cdef public class SyclEvent [object PySyclEventObject, type PySyclEventType]: +cdef public api class SyclEvent [object PySyclEventObject, type PySyclEventType]: ''' Wrapper class for a Sycl Event ''' cdef DPCTLSyclEventRef _event_ref diff --git a/dpctl/_sycl_event.pyx b/dpctl/_sycl_event.pyx index eac541fbb1..29b7733913 100644 --- a/dpctl/_sycl_event.pyx +++ b/dpctl/_sycl_event.pyx @@ -32,6 +32,13 @@ __all__ = [ _logger = logging.getLogger(__name__) +cdef api DPCTLSyclEventRef get_event_ref(SyclEvent ev): + """ C-API function to access opaque event reference from + Python object of type :class:`dpctl.SyclEvent`. + """ + return ev.get_event_ref() + + cdef class SyclEvent: """ Python wrapper class for cl::sycl::event. """ diff --git a/dpctl/_sycl_queue.pxd b/dpctl/_sycl_queue.pxd index 0ce90a966f..92a9102021 100644 --- a/dpctl/_sycl_queue.pxd +++ b/dpctl/_sycl_queue.pxd @@ -31,7 +31,7 @@ from .program._program cimport SyclKernel cdef void default_async_error_handler(int) nogil except * -cdef public class _SyclQueue [ +cdef public api class _SyclQueue [ object Py_SyclQueueObject, type Py_SyclQueueType ]: """ Python data owner class for a sycl::queue. @@ -41,7 +41,7 @@ cdef public class _SyclQueue [ cdef SyclDevice _device -cdef public class SyclQueue (_SyclQueue) [ +cdef public api class SyclQueue (_SyclQueue) [ object PySyclQueueObject, type PySyclQueueType ]: """ Python wrapper class for a sycl::queue. diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx index 817ad23bb9..2abfd3039f 100644 --- a/dpctl/_sycl_queue.pyx +++ b/dpctl/_sycl_queue.pyx @@ -969,7 +969,7 @@ cdef class SyclQueue(_SyclQueue): self.sycl_device.print_device_info() -cdef public DPCTLSyclQueueRef get_queue_ref(SyclQueue q): +cdef api DPCTLSyclQueueRef get_queue_ref(SyclQueue q): """ C-API function to get opaque queue reference from :class:`dpctl.SyclQueue` instance. diff --git a/dpctl/memory/_memory.pxd b/dpctl/memory/_memory.pxd index 1e9b796ff7..ec94eb6b0f 100644 --- a/dpctl/memory/_memory.pxd +++ b/dpctl/memory/_memory.pxd @@ -32,7 +32,7 @@ cdef DPCTLSyclQueueRef get_queue_ref_from_ptr_and_syclobj( DPCTLSyclUSMRef ptr, object syclobj) -cdef public class _Memory [object Py_MemoryObject, type Py_MemoryType]: +cdef public api class _Memory [object Py_MemoryObject, type Py_MemoryType]: cdef DPCTLSyclUSMRef memory_ptr cdef Py_ssize_t nbytes cdef SyclQueue queue @@ -51,12 +51,12 @@ cdef public class _Memory [object Py_MemoryObject, type Py_MemoryType]: cpdef bytes tobytes(self) @staticmethod - cdef public SyclDevice get_pointer_device( + cdef SyclDevice get_pointer_device( DPCTLSyclUSMRef p, SyclContext ctx) @staticmethod - cdef public bytes get_pointer_type(DPCTLSyclUSMRef p, SyclContext ctx) + cdef bytes get_pointer_type(DPCTLSyclUSMRef p, SyclContext ctx) @staticmethod - cdef public object create_from_usm_pointer_size_qref( + cdef object create_from_usm_pointer_size_qref( DPCTLSyclUSMRef USMRef, Py_ssize_t nbytes, DPCTLSyclQueueRef QRef, @@ -64,12 +64,12 @@ cdef public class _Memory [object Py_MemoryObject, type Py_MemoryType]: ) -cdef public class MemoryUSMShared(_Memory) [object PyMemoryUSMSharedObject, +cdef public api class MemoryUSMShared(_Memory) [object PyMemoryUSMSharedObject, type PyMemoryUSMSharedType]: pass -cdef public class MemoryUSMHost(_Memory) [object PyMemoryUSMHostObject, +cdef public api class MemoryUSMHost(_Memory) [object PyMemoryUSMHostObject, type PyMemoryUSMHostType]: pass diff --git a/dpctl/memory/_memory.pyx b/dpctl/memory/_memory.pyx index 8aa07a6f82..a0164d235c 100644 --- a/dpctl/memory/_memory.pyx +++ b/dpctl/memory/_memory.pyx @@ -708,11 +708,11 @@ def as_usm_memory(obj): ) -cdef public DPCTLSyclUSMRef get_usm_pointer(_Memory obj): +cdef api DPCTLSyclUSMRef get_usm_pointer(_Memory obj): return obj.memory_ptr -cdef public DPCTLSyclContextRef get_context(_Memory obj): +cdef api DPCTLSyclContextRef get_context(_Memory obj): return obj.queue._context.get_context_ref() -cdef public size_t get_nbytes(_Memory obj): +cdef api size_t get_nbytes(_Memory obj): return obj.nbytes diff --git a/dpctl/tensor/_usmarray.pxd b/dpctl/tensor/_usmarray.pxd index 0c97a3982f..1063e9135e 100644 --- a/dpctl/tensor/_usmarray.pxd +++ b/dpctl/tensor/_usmarray.pxd @@ -9,7 +9,7 @@ cdef public int USM_ARRAY_F_CONTIGUOUS cdef public int USM_ARRAY_WRITEABLE -cdef public class usm_ndarray [object PyUSMArrayObject, type PyUSMArrayType]: +cdef public api class usm_ndarray [object PyUSMArrayObject, type PyUSMArrayType]: # data fields cdef char* data_ cdef readonly int nd_ diff --git a/examples/pybind11/use_dpctl_syclqueue/README.md b/examples/pybind11/use_dpctl_syclqueue/README.md new file mode 100644 index 0000000000..3e7b6804a9 --- /dev/null +++ b/examples/pybind11/use_dpctl_syclqueue/README.md @@ -0,0 +1,25 @@ +# Usage of dpctl entities in Pybind11 + +This extension demonstrates how dpctl Python types, +such as dpctl.SyclQueue could be used in Pybind11 +extensions. + + +# Building extension + +``` +source /opt/intel/oneapi/compiler/latest/env/vars.sh +CXX=dpcpp CC=dpcpp python setup.py build_ext --inplace +python example.py +``` + +# Sample output + +``` +(idp) [17:25:27 ansatnuc04 use_dpctl_syclqueue]$ python example.py +EU count returned by Pybind11 extension 24 +EU count computed by dpctl 24 + +Computing modular reduction using SYCL on a NumPy array +Offloaded result agrees with reference one computed by NumPy +``` diff --git a/examples/pybind11/use_dpctl_syclqueue/example.py b/examples/pybind11/use_dpctl_syclqueue/example.py new file mode 100644 index 0000000000..88860c1569 --- /dev/null +++ b/examples/pybind11/use_dpctl_syclqueue/example.py @@ -0,0 +1,48 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +import numpy as np +import pybind11_example as eg + +import dpctl + +q = dpctl.SyclQueue() + +# Pass dpctl.SyclQueue to Pybind11 extension +eu_count = eg.get_max_compute_units(q) + +print(f"EU count returned by Pybind11 extension {eu_count}") +print("EU count computed by dpctl {}".format(q.sycl_device.max_compute_units)) + +print("") +print("Computing modular reduction using SYCL on a NumPy array") + +X = np.random.randint(low=1, high=2 ** 16 - 1, size=10 ** 6, dtype=np.longlong) +modulus_p = 347 + +Y = eg.offloaded_array_mod( + q, X, modulus_p +) # Y is a regular array with host memory underneath it +Ynp = X % modulus_p + +check = np.array_equal(Y, Ynp) + +if check: + print("Offloaded result agrees with reference one computed by NumPy") +else: + print("Offloaded array differs from reference result computed by NumPy") diff --git a/examples/pybind11/use_dpctl_syclqueue/pybind11_example.cpp b/examples/pybind11/use_dpctl_syclqueue/pybind11_example.cpp new file mode 100644 index 0000000000..a63f2bb617 --- /dev/null +++ b/examples/pybind11/use_dpctl_syclqueue/pybind11_example.cpp @@ -0,0 +1,92 @@ +#include +#include +#include +#include + +#include "../_sycl_queue.h" +#include "../_sycl_queue_api.h" +#include "dpctl_sycl_types.h" + +namespace py = pybind11; + +size_t get_max_compute_units(py::object queue) +{ + PyObject *queue_ptr = queue.ptr(); + if (PyObject_TypeCheck(queue_ptr, &PySyclQueueType)) { + DPCTLSyclQueueRef QRef = + get_queue_ref(reinterpret_cast(queue_ptr)); + sycl::queue *q = reinterpret_cast(QRef); + + return q->get_device() + .get_info(); + } + else { + throw std::runtime_error("expected dpctl.SyclQueue as argument"); + } +} + +py::array_t +offloaded_array_mod(py::object queue, + py::array_t array, + int64_t mod) +{ + sycl::queue *q_ptr; + + PyObject *queue_pycapi = queue.ptr(); + if (PyObject_TypeCheck(queue_pycapi, &PySyclQueueType)) { + DPCTLSyclQueueRef QRef = + get_queue_ref(reinterpret_cast(queue_pycapi)); + q_ptr = reinterpret_cast(QRef); + } + else { + throw std::runtime_error("expected dpctl.SyclQueue as argument"); + } + + py::buffer_info arg_pybuf = array.request(); + if (arg_pybuf.ndim != 1) { + throw std::runtime_error("Expecting a vector"); + } + if (mod <= 0) { + throw std::runtime_error("Modulus must be non-negative"); + } + + size_t n = arg_pybuf.size; + + auto res = py::array_t(n); + py::buffer_info res_pybuf = res.request(); + + int64_t *a = static_cast(arg_pybuf.ptr); + int64_t *r = static_cast(res_pybuf.ptr); + + { + const sycl::property_list props = { + sycl::property::buffer::use_host_ptr()}; + sycl::buffer a_buf(a, sycl::range<1>(n), props); + sycl::buffer r_buf(r, sycl::range<1>(n), props); + + q_ptr + ->submit([&](sycl::handler &cgh) { + sycl::accessor a_acc(a_buf, cgh, sycl::read_only); + sycl::accessor r_acc(r_buf, cgh, sycl::write_only, + sycl::noinit); + + cgh.parallel_for(sycl::range<1>(n), [=](sycl::id<1> idx) { + r_acc[idx] = a_acc[idx] % mod; + }); + }) + .wait_and_throw(); + } + + return res; +} + +PYBIND11_MODULE(pybind11_example, m) +{ + // Import the dpctl._sycl_queue extension + import_dpctl___sycl_queue(); + m.def("get_max_compute_units", &get_max_compute_units, + "Computes max_compute_units property of the device underlying given " + "dpctl.SyclQueue"); + m.def("offloaded_array_mod", &offloaded_array_mod, + "Compute offloaded modular reduction of integer-valued NumPy array"); +} diff --git a/examples/pybind11/use_dpctl_syclqueue/setup.py b/examples/pybind11/use_dpctl_syclqueue/setup.py new file mode 100644 index 0000000000..4569c99029 --- /dev/null +++ b/examples/pybind11/use_dpctl_syclqueue/setup.py @@ -0,0 +1,33 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pybind11.setup_helpers import Pybind11Extension +from setuptools import setup + +import dpctl + +exts = [ + Pybind11Extension( + "pybind11_example", + ["./pybind11_example.cpp"], + include_dirs=[dpctl.get_include()], + extra_compile_args=["-fPIC"], + extra_link_args=["-fPIC"], + language="c++", + ), +] + +setup(name="pybind11_example", ext_modules=exts)