From 05f8d5a0f3289c1cf8555f3fffafe0daf62f5f86 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 12 Apr 2021 16:40:07 -0500 Subject: [PATCH 1/3] Modified queue=None behavior for Memory objects queue=None is now understood as use `dpctl.SyclQueue()` queue. --- dpctl/memory/_memory.pyx | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dpctl/memory/_memory.pyx b/dpctl/memory/_memory.pyx index dfa96f8f31..9ac4799d91 100644 --- a/dpctl/memory/_memory.pyx +++ b/dpctl/memory/_memory.pyx @@ -28,7 +28,6 @@ from dpctl._backend cimport * from .._sycl_context cimport SyclContext from .._sycl_device cimport SyclDevice from .._sycl_queue cimport SyclQueue -from .._sycl_queue_manager cimport get_current_queue from cpython cimport Py_buffer from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_FromStringAndSize @@ -222,7 +221,7 @@ cdef class _Memory: if (nbytes > 0): if queue is None: - queue = get_current_queue() + queue = dpctl.SyclQueue() if (ptr_type == b"shared"): if alignment > 0: @@ -493,7 +492,7 @@ cdef class MemoryUSMShared(_Memory): USM shared memory. Non-positive alignments are not used (malloc_shared is used instead). - The queue=None the current `dpctl.get_current_queue()` is used to allocate memory. + For the queue=None cast the `dpctl.SyclQueue()` is used to allocate memory. MemoryUSMShared(usm_obj) constructor create instance from `usm_obj` expected to implement `__sycl_usm_array_interface__` protocol and exposing a contiguous block of @@ -525,7 +524,7 @@ cdef class MemoryUSMHost(_Memory): USM host memory. Non-positive alignments are not used (malloc_host is used instead). - The queue=None the current `dpctl.get_current_queue()` is used to allocate memory. + For the queue=None case `dpctl.SyclQueue()` is used to allocate memory. MemoryUSMDevice(usm_obj) constructor create instance from `usm_obj` expected to implement `__sycl_usm_array_interface__` protocol and exposing a contiguous block of @@ -557,7 +556,7 @@ cdef class MemoryUSMDevice(_Memory): USM device memory. Non-positive alignments are not used (malloc_device is used instead). - The queue=None the current `dpctl.get_current_queue()` is used to allocate memory. + For the queue=None cast the `dpctl.SyclQueue()` is used to allocate memory. MemoryUSMDevice(usm_obj) constructor create instance from `usm_obj` expected to implement `__sycl_usm_array_interface__` protocol and exposing a contiguous block of From 136e5fb7b0d996c889fdd0d96735ce7b03567fbe Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 12 Apr 2021 16:41:25 -0500 Subject: [PATCH 2/3] use raise newError from oldError syntax --- dpctl/_sycl_device.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index ad3beb8d1c..65f0a709d2 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -762,7 +762,7 @@ cdef class SyclDevice(_SyclDevice): partition = int(partition) return self.create_sub_devices_equally(partition) except Exception as e: - raise TypeError("Unsupported type of sub-device argument") + raise TypeError("Unsupported type of sub-device argument") from e @property def parent_device(self): From e89a6c76e4f25838627ccd9371373c6a657c63e9 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 12 Apr 2021 17:14:03 -0500 Subject: [PATCH 3/3] Use explicit queue= argument in memory buffer constructors, now that it no longer uses get_current_queue --- dpctl/tests/test_sycl_kernel_submit.py | 44 +++++++++++++------------- dpctl/tests/test_sycl_queue_memcpy.py | 2 +- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/dpctl/tests/test_sycl_kernel_submit.py b/dpctl/tests/test_sycl_kernel_submit.py index de05935c5e..0edae2cf94 100644 --- a/dpctl/tests/test_sycl_kernel_submit.py +++ b/dpctl/tests/test_sycl_kernel_submit.py @@ -34,32 +34,32 @@ def test_create_program_from_source(self): size_t index = get_global_id(0); \ c[index] = d*a[index] + b[index]; \ }" - with dpctl.device_context("opencl:gpu:0"): - q = dpctl.get_current_queue() - prog = dpctl_prog.create_program_from_source(q, oclSrc) - axpyKernel = prog.get_sycl_kernel("axpy") + q = dpctl.SyclQueue("opencl:gpu") + prog = dpctl_prog.create_program_from_source(q, oclSrc) + axpyKernel = prog.get_sycl_kernel("axpy") - abuf = dpctl_mem.MemoryUSMShared(1024 * np.dtype("i").itemsize) - bbuf = dpctl_mem.MemoryUSMShared(1024 * np.dtype("i").itemsize) - cbuf = dpctl_mem.MemoryUSMShared(1024 * np.dtype("i").itemsize) - a = np.ndarray((1024), buffer=abuf, dtype="i") - b = np.ndarray((1024), buffer=bbuf, dtype="i") - c = np.ndarray((1024), buffer=cbuf, dtype="i") - a[:] = np.arange(1024) - b[:] = np.arange(1024, 0, -1) - c[:] = 0 - d = 2 - args = [] + bufBytes = 1024 * np.dtype("i").itemsize + abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) + bbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) + cbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) + a = np.ndarray((1024), buffer=abuf, dtype="i") + b = np.ndarray((1024), buffer=bbuf, dtype="i") + c = np.ndarray((1024), buffer=cbuf, dtype="i") + a[:] = np.arange(1024) + b[:] = np.arange(1024, 0, -1) + c[:] = 0 + d = 2 + args = [] - args.append(a.base) - args.append(b.base) - args.append(c.base) - args.append(ctypes.c_int(d)) + args.append(a.base) + args.append(b.base) + args.append(c.base) + args.append(ctypes.c_int(d)) - r = [1024] + r = [1024] - q.submit(axpyKernel, args, r) - self.assertTrue(np.allclose(c, a * d + b)) + q.submit(axpyKernel, args, r) + self.assertTrue(np.allclose(c, a * d + b)) if __name__ == "__main__": diff --git a/dpctl/tests/test_sycl_queue_memcpy.py b/dpctl/tests/test_sycl_queue_memcpy.py index 2728a60b3b..15a943b665 100644 --- a/dpctl/tests/test_sycl_queue_memcpy.py +++ b/dpctl/tests/test_sycl_queue_memcpy.py @@ -51,7 +51,7 @@ def test_memcpy_copy_usm_to_usm(self): ) def test_memcpy_type_error(self): mobj = self._create_memory() - q = dpctl.get_current_queue() + q = mobj._queue with self.assertRaises(TypeError) as cm: q.memcpy(None, mobj, 3)