From 1529cea6aa3269bf29c0d3c8cd0641d84e404f51 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 26 Aug 2021 13:04:51 -0500 Subject: [PATCH 1/3] Moved sycl timer into dpctl.SyclTimer Added docstring, made dt into a property (rather than a method). Removed stray "SyclEventRaw" from __all__ in dpctl/__init__.py --- dpctl/__init__.py | 3 +- .../sycl_timer.py => dpctl/_sycl_timer.py | 55 ++++++++++++++++--- examples/python/dppy_kernel.py | 13 +++-- 3 files changed, 56 insertions(+), 15 deletions(-) rename examples/python/sycl_timer.py => dpctl/_sycl_timer.py (50%) diff --git a/dpctl/__init__.py b/dpctl/__init__.py index c033a75de2..a0b1d8a0cc 100644 --- a/dpctl/__init__.py +++ b/dpctl/__init__.py @@ -64,6 +64,7 @@ set_global_queue, ) +from ._sycl_timer import SyclTimer from ._version import get_versions from .enum_types import backend_type, device_type, event_status_type @@ -88,7 +89,7 @@ ] __all__ += [ "SyclEvent", - "SyclEventRaw", + "SyclTimer", ] __all__ += [ "get_platforms", diff --git a/examples/python/sycl_timer.py b/dpctl/_sycl_timer.py similarity index 50% rename from examples/python/sycl_timer.py rename to dpctl/_sycl_timer.py index 60422b9ebc..ac3e3814dd 100644 --- a/examples/python/sycl_timer.py +++ b/dpctl/_sycl_timer.py @@ -17,27 +17,65 @@ import timeit -import dpctl +from . import SyclQueue class SyclTimer: - def __init__(self, host_time=timeit.default_timer, time_scale=1): - self.timer = host_time + """ + SyclTimer(host_timer=timeit.default_timer, time_scale=1) + Python class to measure device time of execution of commands submitted to + :class:`dpctl.SyclQueue` as well as the wall-time. + + :Example: + .. code-block:: python + + import dpctl + + # Create a default SyclQueue + q = dpctl.SyclQueue(property='enable_profiling') + + # create the timer + miliseconds_sc = 1e-3 + timer = dpctl.SyclTimer(time_scale = miliseconds_sc) + + # use the timer + with timer(queue=q): + code_block + + # retrieve elapsed times in miliseconds + sycl_dt, wall_dt = timer.dt + + Remark: + The timer synchronizes the queue at the entrance and the + exit of the context. + + Args: + host_timer (callable): A callable such that host_timer() returns current + host time in seconds. + time_scale (int, float): Ratio of the unit of time of interest and + one second. + """ + + def __init__(self, host_timer=timeit.default_timer, time_scale=1): + self.timer = host_timer self.time_scale = time_scale + self.queue = None def __call__(self, queue=None): - if isinstance(queue, dpctl.SyclQueue): + if isinstance(queue, SyclQueue): if queue.has_enable_profiling: self.queue = queue else: raise ValueError( - "The queue does not contain the enable_profiling property" + "The given queue was not created with the " + "enable_profiling property" ) else: - raise ValueError( - "The passed queue must be " + raise TypeError( + "The passed queue must have type dpctl.SyclQueue, " + "got {}".format(type(queue)) ) - return self.__enter__() + return self def __enter__(self): self.event_start = self.queue.submit_barrier() @@ -48,6 +86,7 @@ def __exit__(self, *args): self.event_finish = self.queue.submit_barrier() self.host_finish = self.timer() + @property def dt(self): self.event_start.wait() self.event_finish.wait() diff --git a/examples/python/dppy_kernel.py b/examples/python/dppy_kernel.py index 3384fd5183..cfbbc947f6 100644 --- a/examples/python/dppy_kernel.py +++ b/examples/python/dppy_kernel.py @@ -17,9 +17,9 @@ import numba_dppy import numpy as np -from sycl_timer import SyclTimer import dpctl +from dpctl import SyclTimer @numba_dppy.kernel @@ -45,11 +45,12 @@ def dppy_gemm(a, b, c): c = np.ones_like(a).reshape(X, X) q = dpctl.SyclQueue("opencl:gpu", property="enable_profiling") +timer = SyclTimer(time_scale=1) with dpctl.device_context(q): - timers = SyclTimer(time_scale=1) - with timers(q): + with timer(q): dppy_gemm[griddim, blockdim](a, b, c) cc = np.dot(a, b) - host_time, device_time = timers.dt() - print("Wall time: ", host_time, "\n", "Device time: ", device_time) - print(np.allclose(c, cc)) + host_time, device_time = timer.dt + +print("Wall time: ", host_time, "\nDevice time: ", device_time) +print(np.allclose(c, cc)) From d7fec539315a501d2d5513cd684b13f833565c55 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 27 Aug 2021 12:06:32 -0500 Subject: [PATCH 2/3] Use dpctl.SyclTimer in test for kernel submit --- dpctl/tests/test_sycl_kernel_submit.py | 28 +++++++++++++++++--------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/dpctl/tests/test_sycl_kernel_submit.py b/dpctl/tests/test_sycl_kernel_submit.py index ddcda2609f..5c8c2ce331 100644 --- a/dpctl/tests/test_sycl_kernel_submit.py +++ b/dpctl/tests/test_sycl_kernel_submit.py @@ -37,19 +37,20 @@ def test_create_program_from_source(self): size_t index = get_global_id(0); \ c[index] = d*a[index] + b[index]; \ }" - q = dpctl.SyclQueue("opencl:gpu") + q = dpctl.SyclQueue("opencl:gpu", property="enable_profiling") prog = dpctl_prog.create_program_from_source(q, oclSrc) axpyKernel = prog.get_sycl_kernel("axpy") - bufBytes = 1024 * np.dtype("i").itemsize + n_elems = 1024 * 512 + bufBytes = n_elems * np.dtype("i").itemsize abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) bbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) cbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) - a = np.ndarray((1024), buffer=abuf, dtype="i") - b = np.ndarray((1024), buffer=bbuf, dtype="i") - c = np.ndarray((1024), buffer=cbuf, dtype="i") - a[:] = np.arange(1024) - b[:] = np.arange(1024, 0, -1) + a = np.ndarray((n_elems,), buffer=abuf, dtype="i") + b = np.ndarray((n_elems,), buffer=bbuf, dtype="i") + c = np.ndarray((n_elems,), buffer=cbuf, dtype="i") + a[:] = np.arange(n_elems) + b[:] = np.arange(n_elems, 0, -1) c[:] = 0 d = 2 args = [] @@ -59,10 +60,17 @@ def test_create_program_from_source(self): args.append(c.base) args.append(ctypes.c_int(d)) - r = [1024] + r = [ + n_elems, + ] - q.submit(axpyKernel, args, r) - self.assertTrue(np.allclose(c, a * d + b)) + timer = dpctl.SyclTimer() + with timer(q): + q.submit(axpyKernel, args, r) + ref_c = a * d + b + host_dt, device_dt = timer.dt + self.assertTrue(host_dt > device_dt) + self.assertTrue(np.allclose(c, ref_c)) if __name__ == "__main__": From 90ebdb740352873d0d8bbd6622a018f99cdf5034 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 27 Aug 2021 13:36:06 -0500 Subject: [PATCH 3/3] Added test_sycl_timer Times memcpy operation and a host computation --- dpctl/tests/test_sycl_event.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/dpctl/tests/test_sycl_event.py b/dpctl/tests/test_sycl_event.py index 289060ec1b..4515895d8c 100644 --- a/dpctl/tests/test_sycl_event.py +++ b/dpctl/tests/test_sycl_event.py @@ -157,3 +157,26 @@ def test_profiling_info(): assert event.profiling_info_end else: pytest.skip("No OpenCL CPU queues available") + + +def test_sycl_timer(): + try: + q = dpctl.SyclQueue(property="enable_profiling") + except dpctl.SyclQueueCreationError: + pytest.skip("Queue creation of default device failed") + timer = dpctl.SyclTimer() + m1 = dpctl_mem.MemoryUSMDevice(256 * 1024, queue=q) + m2 = dpctl_mem.MemoryUSMDevice(256 * 1024, queue=q) + with timer(q): + # device task + m1.copy_from_device(m2) + # host task + [x ** 2 for x in range(1024)] + host_dt, device_dt = timer.dt + assert host_dt > device_dt + q_no_profiling = dpctl.SyclQueue() + assert q_no_profiling.has_enable_profiling is False + with pytest.raises(ValueError): + timer(queue=q_no_profiling) + with pytest.raises(TypeError): + timer(queue=None)