Skip to content
Merged
4 changes: 2 additions & 2 deletions dpctl/_sycl_queue_manager.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ cdef class _SyclQueueManager:
Returns:
backend_type: The SYCL backend for the currently selected queue.
"""
return self.get_current_queue().get_sycl_backend()
return self.get_current_queue().backend

cpdef get_current_device_type(self):
"""
Expand All @@ -88,7 +88,7 @@ cdef class _SyclQueueManager:
device_type: The SYCL device type for the currently selected queue.
Possible values can be gpu, cpu, accelerator, or host.
"""
return self.get_current_queue().get_sycl_device().device_type
return self.get_current_queue().sycl_device.device_type

cpdef SyclQueue get_current_queue(self):
"""
Expand Down
22 changes: 14 additions & 8 deletions dpctl/memory/_memory.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ from dpctl._backend cimport ( # noqa: E211
DPCTLaligned_alloc_host,
DPCTLaligned_alloc_shared,
DPCTLContext_Delete,
DPCTLDevice_Copy,
DPCTLEvent_Delete,
DPCTLEvent_Wait,
DPCTLfree_with_queue,
Expand All @@ -48,6 +49,7 @@ from dpctl._backend cimport ( # noqa: E211
DPCTLSyclContextRef,
DPCTLSyclDeviceRef,
DPCTLSyclEventRef,
DPCTLSyclQueueRef,
DPCTLSyclUSMRef,
DPCTLUSM_GetPointerDevice,
DPCTLUSM_GetPointerType,
Expand Down Expand Up @@ -138,7 +140,7 @@ cdef class _Memory:

cdef _cinit_alloc(self, Py_ssize_t alignment, Py_ssize_t nbytes,
bytes ptr_type, SyclQueue queue):
cdef DPCTLSyclUSMRef p
cdef DPCTLSyclUSMRef p = NULL

self._cinit_empty()

Expand Down Expand Up @@ -215,10 +217,12 @@ cdef class _Memory:
)

def __dealloc__(self):
if (self.refobj is None and self.memory_ptr):
DPCTLfree_with_queue(
self.memory_ptr, self.queue.get_queue_ref()
)
if (self.refobj is None):
if self.memory_ptr:
if (type(self.queue) is SyclQueue):
DPCTLfree_with_queue(
self.memory_ptr, self.queue.get_queue_ref()
)
self._cinit_empty()

cdef _getbuffer(self, Py_buffer *buffer, int flags):
Expand Down Expand Up @@ -267,7 +271,7 @@ cdef class _Memory:
property _queue:
"""
:class:`dpctl.SyclQueue` with :class:`dpctl.SyclContext` the
USM pointer is bound to and :class:`dpctl.SyclDevice` it was
USM allocation is bound to and :class:`dpctl.SyclDevice` it was
allocated on.
"""
def __get__(self):
Expand Down Expand Up @@ -477,8 +481,10 @@ cdef class _Memory:
cdef DPCTLSyclDeviceRef dref = DPCTLUSM_GetPointerDevice(
p, ctx.get_context_ref()
)

return SyclDevice._create(dref)
cdef DPCTLSyclDeviceRef dref_copy = DPCTLDevice_Copy(dref)
if (dref_copy is NULL):
raise RuntimeError("Could not create a copy of sycl device")
return SyclDevice._create(dref_copy) # deletes the argument

@staticmethod
cdef bytes get_pointer_type(DPCTLSyclUSMRef p, SyclContext ctx):
Expand Down
2 changes: 1 addition & 1 deletion dpctl/memory/_sycl_usm_array_interface_utils.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ cdef DPCTLSyclQueueRef get_queue_ref_from_ptr_and_syclobj(
if pycapsule.PyCapsule_IsValid(cap, "SyclQueueRef"):
q = SyclQueue(cap)
return _queue_ref_copy_from_SyclQueue(ptr, <SyclQueue> q)
elif pycapsule.PyCapsule_IsValid(cap, "SyclContexRef"):
elif pycapsule.PyCapsule_IsValid(cap, "SyclContextRef"):
ctx = <SyclContext>SyclContext(cap)
return _queue_ref_copy_from_USMRef_and_SyclContext(ptr, ctx)
else:
Expand Down
139 changes: 79 additions & 60 deletions dpctl/tests/test_dparray.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,85 +17,104 @@
"""Unit test cases for dpctl.tensor.numpy_usm_shared.
"""

import unittest

import numpy

from dpctl.tensor import numpy_usm_shared as dparray


class Test_dparray(unittest.TestCase):
def setUp(self):
self.X = dparray.ndarray((256, 4), dtype="d")
self.X.fill(1.0)
def get_arg():
X = dparray.ndarray((256, 4), dtype="d")
X.fill(1.0)
return X


def test_dparray_type():
X = get_arg()
assert isinstance(X, dparray.ndarray)


def test_dparray_as_ndarray_self():
X = get_arg()
Y = X.as_ndarray()
assert type(Y) == numpy.ndarray


def test_dparray_as_ndarray():
X = get_arg()
Y = dparray.as_ndarray(X)
assert type(Y) == numpy.ndarray


def test_dparray_from_ndarray():
X = get_arg()
Y = dparray.as_ndarray(X)
dp1 = dparray.from_ndarray(Y)
assert isinstance(dp1, dparray.ndarray)


def test_multiplication_dparray():
C = get_arg() * 5
assert isinstance(C, dparray.ndarray)


def test_inplace_sub():
X = get_arg()
X -= 1

def test_dparray_type(self):
self.assertIsInstance(self.X, dparray.ndarray)

def test_dparray_as_ndarray_self(self):
Y = self.X.as_ndarray()
self.assertEqual(type(Y), numpy.ndarray)
def test_dparray_through_python_func():
def func_operation_with_const(dpctl_array):
return dpctl_array * 2.0 + 13

def test_dparray_as_ndarray(self):
Y = dparray.as_ndarray(self.X)
self.assertEqual(type(Y), numpy.ndarray)
C = get_arg() * 5
dp_func = func_operation_with_const(C)
assert isinstance(dp_func, dparray.ndarray)

def test_dparray_from_ndarray(self):
Y = dparray.as_ndarray(self.X)
dp1 = dparray.from_ndarray(Y)
self.assertIsInstance(dp1, dparray.ndarray)

def test_multiplication_dparray(self):
C = self.X * 5
self.assertIsInstance(C, dparray.ndarray)
def test_dparray_mixing_dpctl_and_numpy():
dp_numpy = numpy.ones((256, 4), dtype="d")
X = get_arg()
res = dp_numpy * X
assert isinstance(X, dparray.ndarray)
assert isinstance(res, dparray.ndarray)

def test_inplace_sub(self):
self.X -= 1

def test_dparray_through_python_func(self):
def func_operation_with_const(dpctl_array):
return dpctl_array * 2.0 + 13
def test_dparray_shape():
X = get_arg()
res = X.shape
assert res == (256, 4)

C = self.X * 5
dp_func = func_operation_with_const(C)
self.assertIsInstance(dp_func, dparray.ndarray)

def test_dparray_mixing_dpctl_and_numpy(self):
dp_numpy = numpy.ones((256, 4), dtype="d")
res = dp_numpy * self.X
self.assertIsInstance(self.X, dparray.ndarray)
self.assertIsInstance(res, dparray.ndarray)
def test_dparray_T():
X = get_arg()
res = X.T
assert res.shape == (4, 256)

def test_dparray_shape(self):
res = self.X.shape
self.assertEqual(res, (256, 4))

def test_dparray_T(self):
res = self.X.T
self.assertEqual(res.shape, (4, 256))
def test_numpy_ravel_with_dparray():
X = get_arg()
res = numpy.ravel(X)
assert res.shape == (1024,)

def test_numpy_ravel_with_dparray(self):
res = numpy.ravel(self.X)
self.assertEqual(res.shape, (1024,))

def test_numpy_sum_with_dparray(self):
res = numpy.sum(self.X)
self.assertEqual(res, 1024.0)
def test_numpy_sum_with_dparray():
X = get_arg()
res = numpy.sum(X)
assert res == 1024.0

def test_numpy_sum_with_dparray_out(self):
res = dparray.empty((self.X.shape[1],), dtype=self.X.dtype)
res2 = numpy.sum(self.X, axis=0, out=res)
self.assertTrue(res is res2)
self.assertIsInstance(res2, dparray.ndarray)

def test_frexp_with_out(self):
X = dparray.array([0.5, 4.7])
mant = dparray.empty((2,), dtype="d")
exp = dparray.empty((2,), dtype="i4")
res = numpy.frexp(X, out=(mant, exp))
self.assertTrue(res[0] is mant)
self.assertTrue(res[1] is exp)
def test_numpy_sum_with_dparray_out():
X = get_arg()
res = dparray.empty((X.shape[1],), dtype=X.dtype)
res2 = numpy.sum(X, axis=0, out=res)
assert res is res2
assert isinstance(res2, dparray.ndarray)


if __name__ == "__main__":
unittest.main()
def test_frexp_with_out():
X = dparray.array([0.5, 4.7])
mant = dparray.empty((2,), dtype="d")
exp = dparray.empty((2,), dtype="i4")
res = numpy.frexp(X, out=(mant, exp))
assert res[0] is mant
assert res[1] is exp
85 changes: 40 additions & 45 deletions dpctl/tests/test_sycl_kernel_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,60 +18,55 @@
"""

import ctypes
import unittest

import numpy as np
import pytest

import dpctl
import dpctl.memory as dpctl_mem
import dpctl.program as dpctl_prog

from ._helper import has_gpu

def test_create_program_from_source():
try:
q = dpctl.SyclQueue("opencl", property="enable_profiling")
except dpctl.SyclQueueCreationError:
pytest.skip("OpenCL queue could not be created")
oclSrc = " \
kernel void axpy(global int* a, global int* b, global int* c, int d) { \
size_t index = get_global_id(0); \
c[index] = d*a[index] + b[index]; \
}"
prog = dpctl_prog.create_program_from_source(q, oclSrc)
axpyKernel = prog.get_sycl_kernel("axpy")

@unittest.skipUnless(has_gpu(), "No OpenCL GPU queues available")
class Test1DKernelSubmit(unittest.TestCase):
def test_create_program_from_source(self):
oclSrc = " \
kernel void axpy(global int* a, global int* b, global int* c, int d) { \
size_t index = get_global_id(0); \
c[index] = d*a[index] + b[index]; \
}"
q = dpctl.SyclQueue("opencl:gpu", property="enable_profiling")
prog = dpctl_prog.create_program_from_source(q, oclSrc)
axpyKernel = prog.get_sycl_kernel("axpy")
n_elems = 1024 * 512
bufBytes = n_elems * np.dtype("i").itemsize
abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
bbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
cbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
a = np.ndarray((n_elems,), buffer=abuf, dtype="i")
b = np.ndarray((n_elems,), buffer=bbuf, dtype="i")
c = np.ndarray((n_elems,), buffer=cbuf, dtype="i")
a[:] = np.arange(n_elems)
b[:] = np.arange(n_elems, 0, -1)
c[:] = 0
d = 2
args = []

n_elems = 1024 * 512
bufBytes = n_elems * np.dtype("i").itemsize
abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
bbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
cbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
a = np.ndarray((n_elems,), buffer=abuf, dtype="i")
b = np.ndarray((n_elems,), buffer=bbuf, dtype="i")
c = np.ndarray((n_elems,), buffer=cbuf, dtype="i")
a[:] = np.arange(n_elems)
b[:] = np.arange(n_elems, 0, -1)
c[:] = 0
d = 2
args = []
args.append(a.base)
args.append(b.base)
args.append(c.base)
args.append(ctypes.c_int(d))

args.append(a.base)
args.append(b.base)
args.append(c.base)
args.append(ctypes.c_int(d))
r = [
n_elems,
]

r = [
n_elems,
]

timer = dpctl.SyclTimer()
with timer(q):
q.submit(axpyKernel, args, r)
ref_c = a * d + b
host_dt, device_dt = timer.dt
self.assertTrue(host_dt > device_dt)
self.assertTrue(np.allclose(c, ref_c))


if __name__ == "__main__":
unittest.main()
timer = dpctl.SyclTimer()
with timer(q):
q.submit(axpyKernel, args, r)
ref_c = a * d + b
host_dt, device_dt = timer.dt
assert host_dt > device_dt
assert np.allclose(c, ref_c)
Loading