diff --git a/dpctl/memory/_memory.pxd b/dpctl/memory/_memory.pxd index 9eaf9b6046..2723d3435a 100644 --- a/dpctl/memory/_memory.pxd +++ b/dpctl/memory/_memory.pxd @@ -51,18 +51,22 @@ cdef public class _Memory [object Py_MemoryObject, type Py_MemoryType]: cpdef bytes tobytes(self) @staticmethod - cdef public SyclDevice get_pointer_device(DPCTLSyclUSMRef p, SyclContext ctx) + cdef public SyclDevice get_pointer_device( + DPCTLSyclUSMRef p, SyclContext ctx) @staticmethod cdef public bytes get_pointer_type(DPCTLSyclUSMRef p, SyclContext ctx) -cdef public class MemoryUSMShared(_Memory) [object PyMemoryUSMSharedObject, type PyMemoryUSMSharedType]: +cdef public class MemoryUSMShared(_Memory) [object PyMemoryUSMSharedObject, + type PyMemoryUSMSharedType]: pass -cdef public class MemoryUSMHost(_Memory) [object PyMemoryUSMHostObject, type PyMemoryUSMHostType]: +cdef public class MemoryUSMHost(_Memory) [object PyMemoryUSMHostObject, + type PyMemoryUSMHostType]: pass -cdef public class MemoryUSMDevice(_Memory) [object PyMemoryUSMDeviceObject, type PyMemoryUSMDeviceType]: +cdef public class MemoryUSMDevice(_Memory) [object PyMemoryUSMDeviceObject, + type PyMemoryUSMDeviceType]: pass diff --git a/dpctl/memory/_memory.pyx b/dpctl/memory/_memory.pyx index 6934744d55..46ef819a38 100644 --- a/dpctl/memory/_memory.pyx +++ b/dpctl/memory/_memory.pyx @@ -35,6 +35,7 @@ from cpython cimport pycapsule import numpy as np import numbers +import collections __all__ = [ "MemoryUSMShared", @@ -42,61 +43,7 @@ __all__ = [ "MemoryUSMDevice" ] -cdef object _sycl_usm_ary_iface_error(): - return ValueError("__sycl_usm_array_interface__ is malformed") - - -cdef DPCTLSyclQueueRef _queue_ref_copy_from_SyclQueue(SyclQueue q): - return DPCTLQueue_Copy(q.get_queue_ref()) - - -cdef DPCTLSyclQueueRef _queue_ref_copy_from_USMRef_and_SyclContext( - DPCTLSyclUSMRef ptr, SyclContext ctx): - """ Obtain device from pointer and sycl context, use - context and device to create a queue from which this memory - can be accessible. - """ - cdef SyclDevice dev = _Memory.get_pointer_device(ptr, ctx) - cdef DPCTLSyclContextRef CRef = NULL - cdef DPCTLSyclDeviceRef DRef = NULL - CRef = ctx.get_context_ref() - DRef = dev.get_device_ref() - return DPCTLQueue_Create(CRef, DRef, NULL, 0) - - -cdef DPCTLSyclQueueRef get_queue_ref_from_ptr_and_syclobj( - DPCTLSyclUSMRef ptr, object syclobj): - """ Constructs queue from pointer and syclobject from - __sycl_usm_array_interface__ - """ - cdef DPCTLSyclQueueRef QRef = NULL - cdef SyclContext ctx - if type(syclobj) is SyclQueue: - return _queue_ref_copy_from_SyclQueue( syclobj) - elif type(syclobj) is SyclContext: - ctx = syclobj - return _queue_ref_copy_from_USMRef_and_SyclContext(ptr, ctx) - elif type(syclobj) is str: - q = SyclQueue(syclobj) - return _queue_ref_copy_from_SyclQueue( q) - elif pycapsule.PyCapsule_IsValid(syclobj, "SyclQueueRef"): - q = SyclQueue(syclobj) - return _queue_ref_copy_from_SyclQueue( q) - elif pycapsule.PyCapsule_IsValid(syclobj, "SyclContextRef"): - ctx = SyclContext(syclobj) - return _queue_ref_copy_from_USMRef_and_SyclContext(ptr, ctx) - elif hasattr(syclobj, '_get_capsule'): - cap = syclobj._get_capsule() - if pycapsule.PyCapsule_IsValid(cap, "SyclQueueRef"): - q = SyclQueue(cap) - return _queue_ref_copy_from_SyclQueue( q) - elif pycapsule.PyCapsule_IsValid(cap, "SyclContexRef"): - ctx = SyclContext(cap) - return _queue_ref_copy_from_USMRef_and_SyclContext(ptr, ctx) - else: - return QRef - else: - return QRef +include "_sycl_usm_array_interface_utils.pxi" cdef void copy_via_host(void *dest_ptr, SyclQueue dest_queue, @@ -126,66 +73,6 @@ cdef void copy_via_host(void *dest_ptr, SyclQueue dest_queue, ) -cdef class _BufferData: - """ - Internal data struct populated from parsing - `__sycl_usm_array_interface__` dictionary - """ - cdef DPCTLSyclUSMRef p - cdef int writeable - cdef object dt - cdef Py_ssize_t itemsize - cdef Py_ssize_t nbytes - cdef SyclQueue queue - - @staticmethod - cdef _BufferData from_sycl_usm_ary_iface(dict ary_iface): - cdef object ary_data_tuple = ary_iface.get('data', None) - cdef object ary_typestr = ary_iface.get('typestr', None) - cdef object ary_shape = ary_iface.get('shape', None) - cdef object ary_strides = ary_iface.get('strides', None) - cdef object ary_syclobj = ary_iface.get('syclobj', None) - cdef Py_ssize_t ary_offset = ary_iface.get('offset', 0) - cdef int ary_version = ary_iface.get('version', 0) - cdef object dt - cdef _BufferData buf - cdef Py_ssize_t arr_data_ptr - cdef SyclDevice dev - cdef SyclContext ctx - cdef DPCTLSyclQueueRef QRef = NULL - - if ary_version != 1: - raise _sycl_usm_ary_iface_error() - if not ary_data_tuple or len(ary_data_tuple) != 2: - raise _sycl_usm_ary_iface_error() - if not ary_shape or len(ary_shape) != 1 or ary_shape[0] < 1: - raise ValueError - try: - dt = np.dtype(ary_typestr) - except TypeError: - raise _sycl_usm_ary_iface_error() - if (ary_strides and len(ary_strides) != 1 - and ary_strides[0] != dt.itemsize): - raise ValueError("Must be contiguous") - - if (not ary_syclobj or - not isinstance(ary_syclobj, - (dpctl.SyclQueue, dpctl.SyclContext))): - raise _sycl_usm_ary_iface_error() - - buf = _BufferData.__new__(_BufferData) - arr_data_ptr = ary_data_tuple[0] - buf.p = (arr_data_ptr) - buf.writeable = 1 if ary_data_tuple[1] else 0 - buf.itemsize = (dt.itemsize) - buf.nbytes = (ary_shape[0]) * buf.itemsize - - QRef = get_queue_ref_from_ptr_and_syclobj(buf.p, ary_syclobj) - buf.queue = SyclQueue._create(QRef) - - return buf - - def _to_memory(unsigned char [::1] b, str usm_kind): """ Constructs Memory of the same size as the argument @@ -272,7 +159,7 @@ cdef class _Memory: elif hasattr(other, '__sycl_usm_array_interface__'): other_iface = other.__sycl_usm_array_interface__ if isinstance(other_iface, dict): - other_buf = _BufferData.from_sycl_usm_ary_iface(other_iface) + other_buf = _USMBufferData.from_sycl_usm_ary_iface(other_iface) self.memory_ptr = other_buf.p self.nbytes = other_buf.nbytes self.queue = other_buf.queue @@ -415,13 +302,16 @@ cdef class _Memory: return obj cpdef copy_from_host(self, object obj): - """Copy content of Python buffer provided by `obj` to instance memory.""" + """ + Copy content of Python buffer provided by `obj` to instance memory. + """ cdef const unsigned char[::1] host_buf = obj cdef Py_ssize_t buf_len = len(host_buf) if (buf_len > self.nbytes): raise ValueError("Source object is too large to be " - "accommodated in {} bytes buffer".format(self.nbytes)) + "accommodated in {} bytes buffer".format( + self.nbytes)) # call kernel to copy from DPCTLQueue_Memcpy( self.queue.get_queue_ref(), @@ -433,7 +323,7 @@ cdef class _Memory: cpdef copy_from_device(self, object sycl_usm_ary): """Copy SYCL memory underlying the argument object into the memory of the instance""" - cdef _BufferData src_buf + cdef _USMBufferData src_buf cdef const char* kind if not hasattr(sycl_usm_ary, '__sycl_usm_array_interface__'): @@ -441,11 +331,12 @@ cdef class _Memory: "`__sycl_usm_array_interface__` protocol") sycl_usm_ary_iface = sycl_usm_ary.__sycl_usm_array_interface__ if isinstance(sycl_usm_ary_iface, dict): - src_buf = _BufferData.from_sycl_usm_ary_iface(sycl_usm_ary_iface) + src_buf = _USMBufferData.from_sycl_usm_ary_iface(sycl_usm_ary_iface) if (src_buf.nbytes > self.nbytes): raise ValueError("Source object is too large to " - "be accommondated in {} bytes buffer".format(self.nbytes)) + "be accommondated in {} bytes buffer".format( + self.nbytes)) kind = DPCTLUSM_GetPointerType( src_buf.p, self.queue.get_sycl_context().get_context_ref()) if (kind == b'unknown'): @@ -477,46 +368,57 @@ cdef class _Memory: @staticmethod cdef SyclDevice get_pointer_device(DPCTLSyclUSMRef p, SyclContext ctx): - """Returns sycl device used to allocate given pointer `p` in given sycl context `ctx`""" - cdef DPCTLSyclDeviceRef dref = DPCTLUSM_GetPointerDevice(p, ctx.get_context_ref()) + """ + Returns sycl device used to allocate given pointer `p` in + given sycl context `ctx` + """ + cdef DPCTLSyclDeviceRef dref = DPCTLUSM_GetPointerDevice( + p, ctx.get_context_ref()) return SyclDevice._create(dref) @staticmethod cdef bytes get_pointer_type(DPCTLSyclUSMRef p, SyclContext ctx): """Returns USM-type of given pointer `p` in given sycl context `ctx`""" - cdef const char * usm_type = DPCTLUSM_GetPointerType(p, ctx.get_context_ref()) + cdef const char * usm_type = DPCTLUSM_GetPointerType( + p, ctx.get_context_ref()) return usm_type cdef class MemoryUSMShared(_Memory): """ - MemoryUSMShared(nbytes, alignment=0, queue=None, copy=False) allocates nbytes of - USM shared memory. + MemoryUSMShared(nbytes, alignment=0, queue=None, copy=False) + allocates nbytes of USM shared memory. Non-positive alignments are not used (malloc_shared is used instead). For the queue=None cast the `dpctl.SyclQueue()` is used to allocate memory. - MemoryUSMShared(usm_obj) constructor create instance from `usm_obj` expected to - implement `__sycl_usm_array_interface__` protocol and exposing a contiguous block of - USM memory of USM shared type. Using copy=True to perform a copy if USM type is other - than 'shared'. + MemoryUSMShared(usm_obj) constructor create instance from `usm_obj` + expected to implement `__sycl_usm_array_interface__` protocol and exposing + a contiguous block of USM memory of USM shared type. Using copy=True to + perform a copy if USM type is other than 'shared'. """ - def __cinit__(self, other, *, Py_ssize_t alignment=0, SyclQueue queue=None, int copy=False): + def __cinit__(self, other, *, Py_ssize_t alignment=0, + SyclQueue queue=None, int copy=False): if (isinstance(other, numbers.Integral)): self._cinit_alloc(alignment, other, b"shared", queue) else: self._cinit_other(other) if (self.get_usm_type() != "shared"): if copy: - self._cinit_alloc(0, self.nbytes, b"shared", queue) + self._cinit_alloc(0, self.nbytes, + b"shared", queue) self.copy_from_device(other) else: - raise ValueError("USM pointer in the argument {} is not a USM shared pointer. " - "Zero-copy operation is not possible with copy=False. " - "Either use copy=True, or use a constructor appropriate for " - "type '{}'".format(other, self.get_usm_type())) + raise ValueError( + "USM pointer in the argument {} is not a " + "USM shared pointer. " + "Zero-copy operation is not possible with " + "copy=False. " + "Either use copy=True, or use a constructor " + "appropriate for " + "type '{}'".format(other, self.get_usm_type())) def __getbuffer__(self, Py_buffer *buffer, int flags): self._getbuffer(buffer, flags) @@ -524,31 +426,36 @@ cdef class MemoryUSMShared(_Memory): cdef class MemoryUSMHost(_Memory): """ - MemoryUSMHost(nbytes, alignment=0, queue=None, copy=False) allocates nbytes of - USM host memory. + MemoryUSMHost(nbytes, alignment=0, queue=None, copy=False) + allocates nbytes of USM host memory. Non-positive alignments are not used (malloc_host is used instead). For the queue=None case `dpctl.SyclQueue()` is used to allocate memory. - MemoryUSMDevice(usm_obj) constructor create instance from `usm_obj` expected to - implement `__sycl_usm_array_interface__` protocol and exposing a contiguous block of - USM memory of USM host type. Using copy=True to perform a copy if USM type is other - than 'host'. + MemoryUSMDevice(usm_obj) constructor create instance from `usm_obj` + expected to implement `__sycl_usm_array_interface__` protocol and exposing + a contiguous block of USM memory of USM host type. Using copy=True to + perform a copy if USM type is other than 'host'. """ - def __cinit__(self, other, *, Py_ssize_t alignment=0, SyclQueue queue=None, int copy=False): + def __cinit__(self, other, *, Py_ssize_t alignment=0, + SyclQueue queue=None, int copy=False): if (isinstance(other, numbers.Integral)): self._cinit_alloc(alignment, other, b"host", queue) else: self._cinit_other(other) if (self.get_usm_type() != "host"): if copy: - self._cinit_alloc(0, self.nbytes, b"host", queue) + self._cinit_alloc(0, self.nbytes, + b"host", queue) self.copy_from_device(other) else: - raise ValueError("USM pointer in the argument {} is not a USM host pointer. " - "Zero-copy operation is not possible with copy=False. " - "Either use copy=True, or use a constructor appropriate for " - "type '{}'".format(other, self.get_usm_type())) + raise ValueError( + "USM pointer in the argument {} is " + "not a USM host pointer. " + "Zero-copy operation is not possible with copy=False. " + "Either use copy=True, or use a constructor " + "appropriate for type '{}'".format( + other, self.get_usm_type())) def __getbuffer__(self, Py_buffer *buffer, int flags): self._getbuffer(buffer, flags) @@ -556,28 +463,33 @@ cdef class MemoryUSMHost(_Memory): cdef class MemoryUSMDevice(_Memory): """ - MemoryUSMDevice(nbytes, alignment=0, queue=None, copy=False) allocates nbytes of - USM device memory. + MemoryUSMDevice(nbytes, alignment=0, queue=None, copy=False) + allocates nbytes of USM device memory. Non-positive alignments are not used (malloc_device is used instead). For the queue=None cast the `dpctl.SyclQueue()` is used to allocate memory. - MemoryUSMDevice(usm_obj) constructor create instance from `usm_obj` expected to - implement `__sycl_usm_array_interface__` protocol and exposing a contiguous block of - USM memory of USM device type. Using copy=True to perform a copy if USM type is other - than 'device'. + MemoryUSMDevice(usm_obj) constructor create instance from `usm_obj` + expected to implement `__sycl_usm_array_interface__` protocol and exposing + a contiguous block of USM memory of USM device type. Using copy=True to + perform a copy if USM type is other than 'device'. """ - def __cinit__(self, other, *, Py_ssize_t alignment=0, SyclQueue queue=None, int copy=False): + def __cinit__(self, other, *, Py_ssize_t alignment=0, + SyclQueue queue=None, int copy=False): if (isinstance(other, numbers.Integral)): self._cinit_alloc(alignment, other, b"device", queue) else: self._cinit_other(other) if (self.get_usm_type() != "device"): if copy: - self._cinit_alloc(0, self.nbytes, b"device", queue) + self._cinit_alloc(0, self.nbytes, + b"device", queue) self.copy_from_device(other) else: - raise ValueError("USM pointer in the argument {} is not a USM device pointer. " - "Zero-copy operation is not possible with copy=False. " - "Either use copy=True, or use a constructor appropriate for " - "type '{}'".format(other, self.get_usm_type())) + raise ValueError( + "USM pointer in the argument {} is not " + "a USM device pointer. " + "Zero-copy operation is not possible with copy=False. " + "Either use copy=True, or use a constructor " + "appropriate for type '{}'".format( + other, self.get_usm_type())) diff --git a/dpctl/memory/_sycl_usm_array_interface_utils.pxi b/dpctl/memory/_sycl_usm_array_interface_utils.pxi new file mode 100644 index 0000000000..2c20d2ea0c --- /dev/null +++ b/dpctl/memory/_sycl_usm_array_interface_utils.pxi @@ -0,0 +1,200 @@ + + +cdef bint _valid_usm_ptr_and_context(DPCTLSyclUSMRef ptr, SyclContext ctx): + usm_type = _Memory.get_pointer_type(ptr, ctx) + return usm_type in (b'shared', b'device', b'host') + + +cdef DPCTLSyclQueueRef _queue_ref_copy_from_SyclQueue( + DPCTLSyclUSMRef ptr, SyclQueue q): + """ Check that USM ptr is consistent with SYCL context in the queue, + and return a copy of QueueRef if so, or NULL otherwise. + """ + cdef SyclContext ctx = q.get_sycl_context() + if (_valid_usm_ptr_and_context(ptr, ctx)): + return DPCTLQueue_Copy(q.get_queue_ref()) + else: + return NULL + + +cdef DPCTLSyclQueueRef _queue_ref_copy_from_USMRef_and_SyclContext( + DPCTLSyclUSMRef ptr, SyclContext ctx): + """ Obtain device from pointer and sycl context, use + context and device to create a queue from which this memory + can be accessible. + """ + cdef SyclDevice dev = _Memory.get_pointer_device(ptr, ctx) + cdef DPCTLSyclContextRef CRef = ctx.get_context_ref() + cdef DPCTLSyclDeviceRef DRef = dev.get_device_ref() + return DPCTLQueue_Create(CRef, DRef, NULL, 0) + + +cdef DPCTLSyclQueueRef get_queue_ref_from_ptr_and_syclobj( + DPCTLSyclUSMRef ptr, object syclobj): + """ Constructs queue from pointer and syclobject from + __sycl_usm_array_interface__ + """ + cdef SyclContext ctx + if type(syclobj) is SyclQueue: + return _queue_ref_copy_from_SyclQueue(ptr, syclobj) + elif type(syclobj) is SyclContext: + ctx = syclobj + return _queue_ref_copy_from_USMRef_and_SyclContext(ptr, ctx) + elif type(syclobj) is str: + q = SyclQueue(syclobj) + return _queue_ref_copy_from_SyclQueue(ptr, q) + elif pycapsule.PyCapsule_IsValid(syclobj, "SyclQueueRef"): + q = SyclQueue(syclobj) + return _queue_ref_copy_from_SyclQueue(ptr, q) + elif pycapsule.PyCapsule_IsValid(syclobj, "SyclContextRef"): + ctx = SyclContext(syclobj) + return _queue_ref_copy_from_USMRef_and_SyclContext(ptr, ctx) + elif hasattr(syclobj, '_get_capsule'): + cap = syclobj._get_capsule() + if pycapsule.PyCapsule_IsValid(cap, "SyclQueueRef"): + q = SyclQueue(cap) + return _queue_ref_copy_from_SyclQueue(ptr, q) + elif pycapsule.PyCapsule_IsValid(cap, "SyclContexRef"): + ctx = SyclContext(cap) + return _queue_ref_copy_from_USMRef_and_SyclContext(ptr, ctx) + else: + return NULL + else: + return NULL + + +cdef object _pointers_from_shape_and_stride( + int nd, object ary_shape, Py_ssize_t itemsize, Py_ssize_t ary_offset, + object ary_strides): + """ + Internal utility: for given array data about shape/layout/element + compute left-most displacement when enumerating all elements of the array + and the number of bytes of memory between the left-most and right-most + displacements. + + Returns: tuple(min_disp, nbytes) + """ + if (nd > 0): + if (ary_strides is None): + nelems = 1 + for si in ary_shape: + sh_i = int(si) + if (sh_i <= 0): + raise ValueError("Array shape elements need to be positive") + nelems = nelems * sh_i + return (ary_offset, nelems * itemsize) + else: + min_disp = ary_offset + max_disp = ary_offset + for i in range(nd): + str_i = int(ary_strides[i]) + sh_i = int(ary_shape[i]) + if (str_i > 0): + max_disp += str_i * (sh_i - 1) + else: + min_disp += str_i * (sh_i - 1); + return (min_disp, (max_disp - min_disp + 1) * itemsize) + elif (nd == 0): + return (ary_offset, itemsize) + else: + raise ValueError("Array dimensions can not be negative") + + +cdef class _USMBufferData: + """ + Internal data struct populated from parsing + `__sycl_usm_array_interface__` dictionary + """ + cdef DPCTLSyclUSMRef p + cdef int writeable + cdef object dt + cdef Py_ssize_t itemsize + cdef Py_ssize_t nbytes + cdef SyclQueue queue + + @staticmethod + cdef _USMBufferData from_sycl_usm_ary_iface(dict ary_iface): + cdef object ary_data_tuple = ary_iface.get('data', None) + cdef object ary_typestr = ary_iface.get('typestr', None) + cdef object ary_shape = ary_iface.get('shape', None) + cdef object ary_strides = ary_iface.get('strides', None) + cdef object ary_syclobj = ary_iface.get('syclobj', None) + cdef Py_ssize_t ary_offset = ary_iface.get('offset', 0) + cdef int ary_version = ary_iface.get('version', 0) + cdef Py_ssize_t arr_data_ptr = 0 + cdef DPCTLSyclUSMRef memRef = NULL + cdef Py_ssize_t itemsize = -1 + cdef int writeable = -1 + cdef int nd = -1 + cdef DPCTLSyclQueueRef QRef = NULL + cdef object dt + cdef _USMBufferData buf + cdef SyclDevice dev + cdef SyclContext ctx + + if ary_version != 1: + raise ValueError(("__sycl_usm_array_interface__ is malformed:" + " dict('version': {}) is unexpected." + " The only recognized version is 1.").format( + ary_version)) + if not ary_data_tuple or len(ary_data_tuple) != 2: + raise ValueError("__sycl_usm_array_interface__ is malformed:" + " 'data' field is required, and must be a tuple" + " (usm_pointer, is_writeable_boolean).") + arr_data_ptr = ary_data_tuple[0] + writeable = 1 if ary_data_tuple[1] else 0 + # Check that memory and syclobj are consistent: + # (USM pointer is bound to this sycl context) + memRef = arr_data_ptr + QRef = get_queue_ref_from_ptr_and_syclobj(memRef, ary_syclobj) + if (QRef is NULL): + raise ValueError("__sycl_usm_array_interface__ is malformed:" + " 'data' field is not consistent with 'syclobj'" + " field, the pointer {} is not bound to" + " SyclContext derived from" + " dict('syclobj': {}).".format( + hex(arr_data_ptr), ary_syclobj)) + # shape must be present + if ary_shape is None or not ( + isinstance(ary_shape, collections.abc.Sized) and + isinstance(ary_shape, collections.abc.Iterable)): + DPCTLQueue_Delete(QRef) + raise ValueError("Shape entry is a required element of " + "`__sycl_usm_array_interface__` dictionary") + nd = len(ary_shape) + try: + dt = np.dtype(ary_typestr) + if (dt.hasobject or not (np.issubdtype(dt.type, np.integer) or + np.issubdtype(dt.type, np.inexact))): + DPCTLQueue_Delete(QRef) + raise TypeError("Only integer types, floating and complex " + "floating types are supported.") + itemsize = (dt.itemsize) + except TypeError as e: + raise ValueError( + "__sycl_usm_array_interface__ is malformed:" + " dict('typestr': {}) is unexpected. ".format(ary_typestr) + ) from e + + if (ary_strides is None or ( + isinstance(ary_strides, collections.abc.Sized) and + isinstance(ary_strides, collections.abc.Iterable) and + len(ary_strides) == nd)): + min_disp, nbytes = _pointers_from_shape_and_stride( + nd, ary_shape, itemsize, ary_offset, ary_strides) + else: + DPCTLQueue_Delete(QRef) + raise ValueError("__sycl_usm_array_interface__ is malformed: " + "'strides' must be a tuple or " + "list of the same length as shape") + + buf = _USMBufferData.__new__(_USMBufferData) + buf.p = ( + arr_data_ptr + (min_disp) * itemsize) + buf.writeable = writeable + buf.itemsize = itemsize + buf.nbytes = nbytes + + buf.queue = SyclQueue._create(QRef) + + return buf diff --git a/dpctl/tests/test_sycl_usm.py b/dpctl/tests/test_sycl_usm.py index 9b82b9acf5..7e3c056ff2 100644 --- a/dpctl/tests/test_sycl_usm.py +++ b/dpctl/tests/test_sycl_usm.py @@ -243,5 +243,82 @@ def setUp(self): self.usm_type = "device" +class View: + def __init__(self, buf, shape, strides, offset): + self.buffer = buf + self.shape = shape + self.strides = strides + self.offset = offset + + @property + def __sycl_usm_array_interface__(self): + sua_iface = self.buffer.__sycl_usm_array_interface__ + sua_iface["offset"] = self.offset + sua_iface["shape"] = self.shape + sua_iface["strides"] = self.strides + return sua_iface + + +class TestMemoryWithView(unittest.TestCase): + def test_suai_non_contig_1D(self): + """ Test of zero-copy using sycl_usm_array_interface with non-contiguous data """ + + MemoryUSMClass = MemoryUSMShared + try: + buf = MemoryUSMClass(32) + except: + self.skipTest("MemoryUSMShared could not be allocated") + host_canary = np.full((buf.nbytes,), 77, dtype="|u1") + buf.copy_from_host(host_canary) + n1d = 10 + step_1d = 2 + offset = 8 + v = View(buf, shape=(n1d,), strides=(step_1d,), offset=offset) + buf2 = MemoryUSMClass(v) + expected_nbytes = ( + np.flip(host_canary[offset : offset + n1d * step_1d : step_1d]).ctypes.data + + 1 + - host_canary[offset:].ctypes.data + ) + self.assertEqual(buf2.nbytes, expected_nbytes) + inset_canary = np.arange(0, buf2.nbytes, dtype="|u1") + buf2.copy_from_host(inset_canary) + res = buf.copy_to_host() + del buf + del buf2 + expected_res = host_canary.copy() + expected_res[offset : offset + (n1d - 1) * step_1d + 1] = inset_canary + self.assertTrue(np.array_equal(res, expected_res)) + + def test_suai_non_contig_2D(self): + MemoryUSMClass = MemoryUSMDevice + try: + buf = MemoryUSMClass(20) + except: + self.skipTest("MemoryUSMShared could not be allocated") + host_canary = np.arange(20, dtype="|u1") + buf.copy_from_host(host_canary) + shape_2d = (2, 2) + strides_2d = (10, -2) + offset = 9 + idx = [] + for i0 in range(shape_2d[0]): + for i1 in range(shape_2d[1]): + idx.append(offset + i0 * strides_2d[0] + i1 * strides_2d[1]) + idx.sort() + v = View(buf, shape=shape_2d, strides=strides_2d, offset=offset) + buf2 = MemoryUSMClass(v) + expected_nbytes = idx[-1] - idx[0] + 1 + self.assertEqual(buf2.nbytes, expected_nbytes) + inset_canary = np.full((buf2.nbytes), 255, dtype="|u1") + buf2.copy_from_host(inset_canary) + res = buf.copy_to_host() + del buf + del buf2 + expected_res = host_canary.copy() + expected_res[idx[0] : idx[-1] + 1] = inset_canary + self.assertTrue(np.array_equal(res, expected_res)) + + if __name__ == "__main__": unittest.main()