Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion dpctl/tensor/_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ def create_device(cls, dev):
"targeting this device".format(dev)
)
else:
obj.sycl_queue_ = dpctl.SyclQueue(dev)
if dev is None:
obj.sycl_queue_ = dpctl.SyclQueue()
else:
obj.sycl_queue_ = dpctl.SyclQueue(dev)
return obj

@property
Expand Down
25 changes: 21 additions & 4 deletions dpctl/tensor/_slicing.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,23 @@
import numbers


cdef Py_ssize_t _slice_len(
Py_ssize_t sl_start,
Py_ssize_t sl_stop,
Py_ssize_t sl_step
):
"""
Compute len(range(sl_start, sl_stop, sl_step))
"""
if sl_start == sl_stop:
return 0
if sl_step > 0:
# 1 + argmax k such htat sl_start + sl_step*k < sl_stop
return 1 + ((sl_stop - sl_start - 1) // sl_step)
else:
return 1 + ((sl_stop - sl_start + 1) // sl_step)


cdef object _basic_slice_meta(object ind, tuple shape,
tuple strides, Py_ssize_t offset):
"""
Expand All @@ -33,9 +50,9 @@ cdef object _basic_slice_meta(object ind, tuple shape,
return ((1,) + shape, (0,) + strides, offset)
elif isinstance(ind, slice):
sl_start, sl_stop, sl_step = ind.indices(shape[0])
sh0 = (sl_stop - sl_start) // sl_step
sh0 = _slice_len(sl_start, sl_stop, sl_step)
str0 = sl_step * strides[0]
new_strides = strides if (sl_step == 1) else (str0,) + strides[1:]
new_strides = strides if (sl_step == 1 or sh0 == 0) else (str0,) + strides[1:]
return (
(sh0, ) + shape[1:],
new_strides,
Expand Down Expand Up @@ -101,8 +118,8 @@ cdef object _basic_slice_meta(object ind, tuple shape,
elif isinstance(ind_i, slice):
k_new = k + 1
sl_start, sl_stop, sl_step = ind_i.indices(shape[k])
sh_i = (sl_stop - sl_start) // sl_step
str_i = sl_step * strides[k]
sh_i = _slice_len(sl_start, sl_stop, sl_step)
str_i = (1 if sh_i == 0 else sl_step) * strides[k]
new_shape.append(sh_i)
new_strides.append(str_i)
new_offset = new_offset + sl_start * strides[k]
Expand Down
23 changes: 19 additions & 4 deletions dpctl/tensor/_stride_utils.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ cdef int _from_input_shape_strides(

# 0-d array
if (nd == 0):
contig[0] = USM_ARRAY_C_CONTIGUOUS
contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS)
nelems[0] = 1
min_disp[0] = 0
max_disp[0] = 0
Expand All @@ -88,17 +88,28 @@ cdef int _from_input_shape_strides(
shape_arr[i] = <Py_ssize_t> shape[i]
elem_count *= shape_arr[i]
if elem_count == 0:
contig[0] = USM_ARRAY_C_CONTIGUOUS
contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS)
nelems[0] = 1
min_disp[0] = 0
max_disp[0] = 0
strides_ptr[0] = <Py_ssize_t *>(<size_t>0)
if strides is None:
strides_ptr[0] = <Py_ssize_t *>(<size_t>0)
else:
strides_arr = <Py_ssize_t*>PyMem_Malloc(nd * sizeof(Py_ssize_t))
if (not strides_arr):
PyMem_Free(shape_ptr[0]);
shape_ptr[0] = <Py_ssize_t *>(<size_t>0)
return ERROR_MALLOC
strides_ptr[0] = strides_arr
for i in range(0, nd):
strides_arr[i] = <Py_ssize_t> strides[i]
return 0
nelems[0] = elem_count

if (strides is None):
# no need to allocate and populate strides
if (int(order) not in [ord('C'), ord('F'), ord('c'), ord('f')]):
PyMem_Free(shape_ptr[0]);
shape_ptr[0] = <Py_ssize_t *>(<size_t>0)
return ERROR_INCORRECT_ORDER
if order == <char> ord('C') or order == <char> ord('c'):
contig[0] = USM_ARRAY_C_CONTIGUOUS
Expand All @@ -112,6 +123,8 @@ cdef int _from_input_shape_strides(
and len(strides) == nd):
strides_arr = <Py_ssize_t*>PyMem_Malloc(nd * sizeof(Py_ssize_t))
if (not strides_arr):
PyMem_Free(shape_ptr[0]);
shape_ptr[0] = <Py_ssize_t *>(<size_t>0)
return ERROR_MALLOC
strides_ptr[0] = strides_arr
for i in range(0, nd):
Expand Down Expand Up @@ -143,6 +156,8 @@ cdef int _from_input_shape_strides(
contig[0] = 0 # non-contiguous
return 0
else:
PyMem_Free(shape_ptr[0]);
shape_ptr[0] = <Py_ssize_t *>(<size_t>0)
return ERROR_UNEXPECTED_STRIDES
# return ERROR_INTERNAL

Expand Down
8 changes: 4 additions & 4 deletions dpctl/tensor/_types.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ cdef str _make_typestr(int typenum):
Make typestring from type number
"""
cdef type_to_str = ['|b1', '|i1', '|u1', '|i2', '|u2',
'|i4', '|u4', '', '', '|i8', '|u8',
'|i4', '|u4', '|i4', '|u4', '|i8', '|u8',
'|f4', '|f8', '', '|c8', '|c16', '']

if (typenum < 0):
Expand All @@ -63,8 +63,8 @@ cdef int type_bytesize(int typenum):
NPY_USHORT=4 : 2
NPY_INT=5 : 4
NPY_UINT=6 : 4
NPY_LONG=7 :
NPY_ULONG=8 :
NPY_LONG=7 : 4
NPY_ULONG=8 : 4
NPY_LONGLONG=9 : 8
NPY_ULONGLONG=10 : 8
NPY_FLOAT=11 : 4
Expand All @@ -76,7 +76,7 @@ cdef int type_bytesize(int typenum):
NPY_HALF=23 : 2
"""
cdef int *type_to_bytesize = [
1, 1, 1, 2, 2, 4, 4, 8, 8, 8, 8, 4, 8, -1, 8, 16, -1]
1, 1, 1, 2, 2, 4, 4, 4, 4, 8, 8, 4, 8, -1, 8, 16, -1]

if typenum < 0:
return -1
Expand Down
3 changes: 2 additions & 1 deletion dpctl/tensor/_usmarray.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,8 @@ cdef class usm_ndarray:
cdef usm_ndarray res

res = usm_ndarray.__new__(
usm_ndarray, _meta[0],
usm_ndarray,
_meta[0],
dtype=_make_typestr(self.typenum_),
strides=_meta[1],
buffer=self.base_,
Expand Down
82 changes: 81 additions & 1 deletion dpctl/tests/test_usm_ndarray_ctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@
import numbers

import numpy as np
import numpy.lib.stride_tricks as np_st
import pytest

import dpctl

# import dpctl.memory as dpmem
import dpctl.tensor as dpt
from dpctl.tensor._usmarray import Device


@pytest.mark.parametrize(
Expand Down Expand Up @@ -112,6 +114,8 @@ def test_properties():
(2, 2, None, 3, 4),
(Ellipsis,),
(None, slice(0, None, 2), Ellipsis, slice(0, None, 3)),
(None, slice(1, None, 2), Ellipsis, slice(1, None, 3)),
(None, slice(None, -1, -2), Ellipsis, slice(2, None, 3)),
(
slice(None, None, -1),
slice(None, None, -1),
Expand All @@ -121,10 +125,86 @@ def test_properties():
],
)
def test_basic_slice(ind):
X = dpt.usm_ndarray((2 * 3, 2 * 4, 3 * 5, 3 * 7), dtype="u1")
X = dpt.usm_ndarray((2 * 3, 2 * 4, 3 * 5, 2 * 7), dtype="u1")
Xnp = np.empty(X.shape, dtype=X.dtype)
S = X[ind]
Snp = Xnp[ind]
assert S.shape == Snp.shape
assert S.strides == Snp.strides
assert S.dtype == X.dtype


def _from_numpy(np_ary, device=None, usm_type="shared"):
if type(np_ary) is np.ndarray:
if np_ary.flags["FORC"]:
x = np_ary
else:
x = np.ascontiguous(np_ary)
R = dpt.usm_ndarray(
np_ary.shape,
dtype=np_ary.dtype,
buffer=usm_type,
buffer_ctor_kwargs={
"queue": Device.create_device(device).sycl_queue
},
)
R.usm_data.copy_from_host(x.reshape((-1)).view("|u1"))
return R
else:
raise ValueError("Expected numpy.ndarray, got {}".format(type(np_ary)))


def _to_numpy(usm_ary):
if type(usm_ary) is dpt.usm_ndarray:
usm_buf = usm_ary.usm_data
s = usm_buf.nbytes
host_buf = usm_buf.copy_to_host().view(usm_ary.dtype)
usm_ary_itemsize = usm_ary.itemsize
R_offset = (
usm_ary.__sycl_usm_array_interface__["offset"] * usm_ary_itemsize
)
R = np.ndarray((s,), dtype="u1", buffer=host_buf)
R = R[R_offset:].view(usm_ary.dtype)
R_strides = (usm_ary_itemsize * si for si in usm_ary.strides)
return np_st.as_strided(R, shape=usm_ary.shape, strides=R_strides)
else:
raise ValueError(
"Expected dpctl.tensor.usm_ndarray, got {}".format(type(usm_ary))
)


def test_slice_constructor_1d():
Xh = np.arange(37, dtype="i4")
Xusm = _from_numpy(Xh, device="gpu", usm_type="device")
for ind in [
slice(1, None, 2),
slice(0, None, 3),
slice(1, None, 3),
slice(2, None, 3),
slice(None, None, -1),
slice(-2, 2, -2),
slice(-1, 1, -2),
slice(None, None, -13),
]:
assert np.array_equal(
_to_numpy(Xusm[ind]), Xh[ind]
), "Failed for {}".format(ind)


def test_slice_constructor_3d():
Xh = np.empty((37, 24, 35), dtype="i4")
Xusm = _from_numpy(Xh, device="gpu", usm_type="device")
for ind in [
slice(1, None, 2),
slice(0, None, 3),
slice(1, None, 3),
slice(2, None, 3),
slice(None, None, -1),
slice(-2, 2, -2),
slice(-1, 1, -2),
slice(None, None, -13),
(slice(None, None, -2), Ellipsis, None, 15),
]:
assert np.array_equal(
_to_numpy(Xusm[ind]), Xh[ind]
), "Failed for {}".format(ind)