diff --git a/.flake8 b/.flake8
index 6e9c78b236..5a20d20b6e 100644
--- a/.flake8
+++ b/.flake8
@@ -13,6 +13,7 @@ exclude =
.git
per-file-ignores =
+ dpctl/_diagnostics.pyx: E999
dpctl/_sycl_context.pyx: E999, E225, E227
dpctl/_sycl_device.pyx: E999, E225
dpctl/_sycl_device_factory.pyx: E999, E225
@@ -23,6 +24,7 @@ per-file-ignores =
dpctl/memory/_memory.pyx: E999, E225, E226, E227
dpctl/program/_program.pyx: E999, E225, E226, E227
dpctl/tensor/_usmarray.pyx: E999, E225, E226, E227
+ dpctl/tensor/_dlpack.pyx: E999, E225, E226, E227
dpctl/tensor/numpy_usm_shared.py: F821
dpctl/tests/_cython_api.pyx: E999, E225, E227, E402
dpctl/utils/_compute_follows_data.pyx: E999, E225, E227
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000..f10261fb12
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+dpctl/_version.py export-subst
diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 4e72d54517..9e07dc8f69 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -1,6 +1,10 @@
name: Conda package
-on: [push, pull_request]
+on:
+ push:
+ branches:
+ - master
+ pull_request:
env:
PACKAGE_NAME: dpctl
@@ -115,6 +119,7 @@ jobs:
- name: Add conda to system path
run: echo $CONDA/bin >> $GITHUB_PATH
- name: Install conda-build
+ # Needed to be able to run conda index
run: conda install conda-build
- name: Create conda channel
run: |
@@ -147,6 +152,11 @@ jobs:
conda install $PACKAGE_NAME pytest python=${{ matrix.python }} $CHANNELS
# Test installed packages
conda list
+ - name: Smoke test
+ run: |
+ export OCL_ICD_FILENAMES=libintelocl.so
+ export SYCL_ENABLE_HOST_DEVICE=1
+ python -c "import dpctl; dpctl.lsplatform()"
- name: Run tests
run: |
# echo "libintelocl.so" | tee /etc/OpenCL/vendors/intel-cpu.icd
@@ -178,6 +188,7 @@ jobs:
auto-activate-base: true
activate-environment: ""
- name: Install conda-build
+ # Needed to be able to run conda index
run: conda install conda-build
- name: Create conda channel
run: |
@@ -207,15 +218,52 @@ jobs:
# Test installed packages
conda list
- name: Add library
- run: echo "OCL_ICD_FILENAMES=C:\Miniconda\Library\lib\intelocl64.dll" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
+ run: |
+ echo "OCL_ICD_FILENAMES=C:\Miniconda\Library\lib\intelocl64.dll" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
+ try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()}
+ if ($list.count -eq 0) {
+ if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos)) {
+ New-Item -Path HKLM:\SOFTWARE\Khronos
+ }
+ if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos\OpenCL)) {
+ New-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL
+ }
+ if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors)) {
+ New-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors
+ }
+ New-ItemProperty -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors -Name C:\Miniconda\Library\lib\intelocl64.dll -Value 0
+ try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()}
+ Write-Output $(Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors)
+ # Now copy OpenCL.dll into system folder
+ $system_ocl_icd_loader="C:\Windows\System32\OpenCL.dll"
+ $python_ocl_icd_loader="C:\Miniconda\Library\bin\OpenCL.dll"
+ Copy-Item -Path $python_ocl_icd_loader -Destination $system_ocl_icd_loader
+ if (Test-Path -Path $system_ocl_icd_loader) {
+ Write-Output "$system_ocl_icd_loader has been copied"
+ $acl = Get-Acl $system_ocl_icd_loader
+ Write-Output $acl
+ } else {
+ Write-Output "OCL-ICD-Loader was not copied"
+ }
+ # Variable assisting OpenCL CPU driver to find TBB DLLs which are not located where it expects them by default
+ echo "TBB_DLL_PATH=C:\Miniconda\Library\bin" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
+ }
+ - name: Smoke test
+ run: |
+ set SYCL_ENABLE_HOST_DEVICE=1
+ & { [Environment]::SetEnvironmentVariable("Path", $env:Path + ";C:\Miniconda\Library\bin\", [EnvironmentVariableTarget]::Machine) }
+ python -c "import dpctl; dpctl.lsplatform()"
+ python -c "import dpctl; print(dpctl.get_devices(backend='opencl', device_type='gpu'))"
+ python -c "import dpctl; print(dpctl.get_num_devices(backend='opencl', device_type='gpu'))"
- name: Run tests
run: |
set SYCL_ENABLE_HOST_DEVICE=1
+ & { [Environment]::SetEnvironmentVariable("Path", $env:Path + ";C:\Miniconda\Library\bin\", [EnvironmentVariableTarget]::Machine) }
python -m pytest --pyargs ${{ env.MODULE_NAME }}
upload_linux:
needs: test_linux
- if: ${{github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true)}}
+ if: ${{github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/')}}
runs-on: ubuntu-latest
strategy:
matrix:
@@ -240,7 +288,7 @@ jobs:
upload_windows:
needs: test_windows
- if: ${{github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true)}}
+ if: ${{github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/')}}
runs-on: windows-latest
strategy:
matrix:
@@ -263,3 +311,111 @@ jobs:
run: |
conda install anaconda-client
anaconda --token ${{ env.ANACONDA_TOKEN }} upload --user dppy --label dev ${{ env.PACKAGE_NAME }}-*.tar.bz2
+
+ test_examples_linux:
+ needs: build_linux
+ runs-on: ${{ matrix.runner }}
+ strategy:
+ matrix:
+ python: [3.8]
+ experimental: [false]
+ runner: [ubuntu-latest]
+ continue-on-error: ${{ matrix.experimental }}
+ env:
+ CHANNELS: -c intel -c defaults --override-channels
+
+ steps:
+ - name: Install conda-build
+ # Needed to be able to run conda index
+ run: conda install conda-build python=${{ matrix.python }}
+ - name: Checkout dpctl repo
+ uses: actions/checkout@v2
+ with:
+ fetch-depth: 0
+ - name: Download artifact
+ uses: actions/download-artifact@v2
+ with:
+ name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }}
+ - name: Add conda to system path
+ run: echo $CONDA/bin >> $GITHUB_PATH
+ - name: Create conda channel
+ run: |
+ mkdir -p $GITHUB_WORKSPACE/channel/linux-64
+ mv ${PACKAGE_NAME}-*.tar.bz2 $GITHUB_WORKSPACE/channel/linux-64
+ conda index $GITHUB_WORKSPACE/channel
+ # Test channel
+ conda search $PACKAGE_NAME -c $GITHUB_WORKSPACE/channel --override-channels
+ - name: Collect dependencies
+ run: |
+ CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}"
+ conda install $PACKAGE_NAME python=${{ matrix.python }} $CHANNELS --only-deps --dry-run > lockfile
+ - name: Set pkgs_dirs
+ run: |
+ echo "pkgs_dirs: [~/.conda/pkgs]" >> ~/.condarc
+ - name: Cache conda packages
+ uses: actions/cache@v2
+ env:
+ CACHE_NUMBER: 0 # Increase to reset cache
+ with:
+ path: ~/.conda/pkgs
+ key:
+ ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('lockfile') }}
+ restore-keys: |
+ ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-
+ ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-
+ - name: Install dpctl
+ shell: bash -l {0}
+ run: |
+ source $CONDA/etc/profile.d/conda.sh
+ conda activate
+ CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}"
+ conda install -y $PACKAGE_NAME pytest python=${{ matrix.python }} $CHANNELS
+ # Test installed packages
+ conda list
+ - name: Install example requirements
+ shell: bash -l {0}
+ run: |
+ source $CONDA/etc/profile.d/conda.sh
+ conda install -y pybind11 cython
+ conda install -y -c intel mkl-dpcpp mkl-devel-dpcpp numba-dppy
+ conda create -y -n build_env -c intel dpcpp_linux-64
+ - name: Build and run examples with native extensions
+ shell: bash -l {0}
+ run: |
+ source $CONDA/etc/profile.d/conda.sh
+ export OCL_ICD_FILENAMES=libintelocl.so
+ export SYCL_ENABLE_HOST_DEVICE=1
+ conda activate
+ cd examples/pybind11
+ export CC=dpcpp
+ export CXX=dpcpp
+ for d in $(ls)
+ do
+ pushd $d
+ conda activate --stack build_env
+ python setup.py build_ext --inplace || exit 1
+ conda deactivate
+ python example.py
+ popd
+ done
+ cd ../cython
+ for d in $(ls)
+ do
+ pushd $d
+ conda activate --stack build_env
+ python setup.py build_ext --inplace || exit 1
+ conda deactivate
+ python run.py
+ popd
+ done
+ - name: Run Python examples
+ shell: bash -l {0}
+ run: |
+ cd examples/python
+ export OCL_ICD_FILENAMES=libintelocl.so
+ export SYCL_ENABLE_HOST_DEVICE=1
+ for script in $(find . \( -not -name "_*" -and -name "*.py" \))
+ do
+ echo "Executing ${script}"
+ python ${script} || exit 1
+ done
diff --git a/.github/workflows/cpp_style_checks.yml b/.github/workflows/cpp_style_checks.yml
index b5a7def26f..b16530f967 100644
--- a/.github/workflows/cpp_style_checks.yml
+++ b/.github/workflows/cpp_style_checks.yml
@@ -16,7 +16,12 @@ jobs:
steps:
- uses: actions/checkout@v2
- name: Run clang-format style check for C/C++ programs.
- uses: jidicula/clang-format-action@v3.1.0
+ uses: jidicula/clang-format-action@v3.5.1
with:
clang-format-version: '11'
- check-path: 'dpctl-capi'
+ check-path: 'libsyclinterface'
+ - name: Run clang-format style check for api headers.
+ uses: jidicula/clang-format-action@v3.5.1
+ with:
+ clang-format-version: '11'
+ check-path: 'dpctl/apis'
diff --git a/.github/workflows/generate-coverage.yaml b/.github/workflows/generate-coverage.yaml
index feb7e71015..c46af799a8 100644
--- a/.github/workflows/generate-coverage.yaml
+++ b/.github/workflows/generate-coverage.yaml
@@ -11,7 +11,7 @@ jobs:
env:
ONEAPI_ROOT: /opt/intel/oneapi
- GTEST_ROOT: /home/runner/work/googletest-release-1.10.0/install
+ GTEST_ROOT: /home/runner/work/googletest-release-1.11.0/install
steps:
- name: Cancel Previous Runs
@@ -29,17 +29,17 @@ jobs:
- name: Install Intel OneAPI
run: |
- sudo apt-get install intel-oneapi-compiler-dpcpp-cpp=2021.3.0-3350
- sudo apt-get install intel-oneapi-tbb=2021.3.0-511
+ sudo apt-get install intel-oneapi-compiler-dpcpp-cpp
+ sudo apt-get install intel-oneapi-tbb
- - name: Install CMake
+ - name: Install CMake and Ninja
run: |
- sudo apt-get install cmake
+ sudo apt-get install cmake ninja-build
- name: Setup Python
uses: actions/setup-python@v2
with:
- python-version: '3.8'
+ python-version: '3.9'
architecture: x64
- name: Cache Gtest
@@ -47,8 +47,8 @@ jobs:
uses: actions/cache@v2
with:
path: |
- /home/runner/work/googletest-release-1.10.0/install
- key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('/home/runner/work/googletest-release-1.10.0/install/include/gtest/*') }}
+ /home/runner/work/googletest-release-1.11.0/install
+ key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('/home/runner/work/googletest-release-1.11.0/install/include/gtest/*') }}
restore-keys: |
${{ runner.os }}-build-${{ env.cache-name }}-
${{ runner.os }}-build-
@@ -59,12 +59,12 @@ jobs:
shell: bash -l {0}
run: |
cd /home/runner/work
- wget https://github.com/google/googletest/archive/refs/tags/release-1.10.0.tar.gz
- tar xf release-1.10.0.tar.gz
- cd googletest-release-1.10.0
+ wget https://github.com/google/googletest/archive/refs/tags/release-1.11.0.tar.gz
+ tar xf release-1.11.0.tar.gz
+ cd googletest-release-1.11.0
mkdir build
cd build
- cmake .. -DCMAKE_INSTALL_PREFIX=/home/runner/work/googletest-release-1.10.0/install
+ cmake .. -DCMAKE_INSTALL_PREFIX=/home/runner/work/googletest-release-1.11.0/install
make && make install
- name: Checkout repo
@@ -79,14 +79,29 @@ jobs:
- name: Install dpctl dependencies
shell: bash -l {0}
run: |
- pip install numpy cython setuptools pytest pytest-cov coverage[toml]
+ pip install numpy cython setuptools pytest pytest-cov scikit-build coverage[toml]
- name: Build dpctl with coverage
shell: bash -l {0}
run: |
source /opt/intel/oneapi/setvars.sh
- python setup.py develop --coverage=True
- python -c "import dpctl; print(dpctl.__version__); dpctl.lsplatform()"
+ export _SAVED_PATH=${PATH}
+ export PATH=$(dirname $(dirname $(which icx)))/bin-llvm:${PATH}
+ python setup.py develop -- \
+ -G "Ninja" \
+ -DCMAKE_BUILD_TYPE=Debug \
+ -DCMAKE_C_COMPILER:PATH=icx \
+ -DCMAKE_CXX_COMPILER:PATH=icpx \
+ -DDPCTL_ENABLE_LO_PROGRAM_CREATION=ON \
+ -DDPCTL_GENERATE_COVERAGE=ON \
+ -DDPCTL_BUILD_CAPI_TESTS=ON \
+ -DDPCTL_COVERAGE_REPORT_OUTPUT_DIR=$(pwd)
+ pushd $(find _skbuild -name cmake-build)
+ cmake --build . --target lcov-genhtml || exit 1
+ popd
+ export PATH=${_SAVED_PATH}
+ unset _SAVED_PATH
+ python -c "import dpctl; print(dpctl.__version__); dpctl.lsplatform()" || exit 1
pytest -q -ra --disable-warnings --cov-config pyproject.toml --cov dpctl --cov-report term-missing --pyargs dpctl -vv
- name: Install coverall dependencies
@@ -96,8 +111,9 @@ jobs:
pip install coveralls
- name: Upload coverage data to coveralls.io
+ shell: bash -l {0}
run: |
- coveralls-lcov -v -n build_cmake/tests/dpctl.lcov > dpctl-c-api-coverage.json
+ coveralls-lcov -v -n $(find _skbuild -name tests)/dpctl.lcov > dpctl-c-api-coverage.json
coveralls --service=github --merge=dpctl-c-api-coverage.json
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/generate-docs.yml b/.github/workflows/generate-docs.yml
index be23c32363..adf8717192 100644
--- a/.github/workflows/generate-docs.yml
+++ b/.github/workflows/generate-docs.yml
@@ -3,6 +3,8 @@ on:
push:
branches:
- master
+ pull_request:
+ types: [opened, synchronize, reopened, closed]
jobs:
build-and-deploy:
@@ -14,6 +16,7 @@ jobs:
with:
access_token: ${{ github.token }}
- name: Add Intel repository
+ if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
run: |
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
@@ -21,54 +24,61 @@ jobs:
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
sudo apt-get update
- name: Install Intel OneAPI
+ if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
run: |
sudo apt-get install intel-oneapi-dpcpp-cpp-compiler
- name: Install Lua
+ if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
run: |
sudo apt-get install liblua5.2-dev
- name: Install Doxygen
+ if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
run: |
sudo apt-get install doxygen
- - name: Install CMake
+ - name: Install CMake and Ninja
+ if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
run: |
- sudo apt-get install cmake
+ sudo apt-get install cmake ninja-build
- name: Setup Python
+ if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
uses: actions/setup-python@v2
with:
python-version: '3.8'
architecture: x64
- name: Install sphinx dependencies
+ if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
shell: bash -l {0}
run: |
- pip install numpy cython setuptools sphinx sphinx_rtd_theme pydot graphviz
+ pip install numpy cython setuptools scikit-build sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput
- name: Checkout repo
uses: actions/checkout@v2
with:
fetch-depth: 0
- - name: Build dpctl
- shell: bash -l {0}
- run: |
- source /opt/intel/oneapi/setvars.sh
- python setup.py develop
- python -c "import dpctl; print(dpctl.__version__)"
- - name: Build docs
+ persist-credentials: false
+ - name: Build dpctl+docs
+ if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
shell: bash -l {0}
run: |
# Ensure that SYCL libraries are on LD_LIBRARY_PATH
source /opt/intel/oneapi/setvars.sh
- cd docs
- mkdir -p build && cd build && rm -rf *
wget https://github.com/vovkos/doxyrest/releases/download/doxyrest-2.1.2/doxyrest-2.1.2-linux-amd64.tar.xz
tar xf doxyrest-2.1.2-linux-amd64.tar.xz
- cmake .. -DDPCTL_USE_MULTIVERSION_TEMPLATE=ON \
- -DDPCTL_ENABLE_DOXYREST=ON \
- -DDoxyrest_DIR=`pwd`/doxyrest-2.1.2-linux-amd64
- make Sphinx
- cd ..
- mv generated_docs/docs ~/docs
+ python setup.py develop -- \
+ -G "Ninja" \
+ -DCMAKE_BUILD_TYPE=Debug \
+ -DCMAKE_C_COMPILER:PATH=icx \
+ -DCMAKE_CXX_COMPILER:PATH=icpx \
+ -DDPCTL_ENABLE_LO_PROGRAM_CREATION=ON \
+ -DDPCTL_GENERATE_DOCS=ON \
+ -DDPCTL_ENABLE_DOXYREST=ON \
+ -DDoxyrest_DIR=`pwd`/doxyrest-2.1.2-linux-amd64
+ python -c "import dpctl; print(dpctl.__version__)" || exit 1
+ cd "$(find _skbuild -name cmake-build)" || exit 1
+ cmake --build . --target Sphinx || exit 1
+ mv ../cmake-install/docs/docs ~/docs
git clean -dfx
- cd ..
- name: Publish docs
+ if: ${{ github.ref == 'refs/heads/master' }}
shell: bash -l {0}
run: |
git remote add tokened_docs https://IntelPython:${{ secrets.GITHUB_TOKEN }}@github.com/IntelPython/dpctl.git
@@ -77,9 +87,63 @@ jobs:
echo `pwd`
cd master
git rm -rf *
- mv ~/docs/* .
+ mv ~/docs/* . || exit 1
git add .
git config --global user.name 'github-actions[doc-deploy-bot]'
- git config --gloabl user.email 'github-actions[doc-deploy-bot]@users.noreply.github.com'
+ git config --global user.email 'github-actions[doc-deploy-bot]@users.noreply.github.com'
git commit -m "Latest docs."
git push tokened_docs gh-pages
+ - name: Publish pull-request docs
+ if: ${{ github.event.pull_request && github.event.action != 'closed' }}
+ env:
+ PR_NUM: ${{ github.event.number }}
+ shell: bash -l {0}
+ run: |
+ git remote add tokened_docs https://IntelPython:${{ secrets.GITHUB_TOKEN }}@github.com/IntelPython/dpctl.git
+ git fetch tokened_docs
+ git checkout --track tokened_docs/gh-pages
+ echo `pwd`
+ [ -d pulls/${PR_NUM} ] && git rm -rf pulls/${PR_NUM}
+ mkdir -p pulls/${PR_NUM}
+ cd pulls/${PR_NUM}
+ mv ~/docs/* .
+ git add .
+ git config --global user.name 'github-actions[doc-deploy-bot]'
+ git config --global user.email 'github-actions[doc-deploy-bot]@users.noreply.github.com'
+ git commit -m "Docs for pull request ${PR_NUM}"
+ git push tokened_docs gh-pages
+ - name: Unpublished pull-request docs
+ if: ${{ github.event.pull_request && github.event.action == 'closed' }}
+ env:
+ PR_NUM: ${{ github.event.number }}
+ shell: bash -l {0}
+ run: |
+ git remote add tokened_docs https://IntelPython:${{ secrets.GITHUB_TOKEN }}@github.com/IntelPython/dpctl.git
+ git fetch tokened_docs
+ git checkout --track tokened_docs/gh-pages
+ echo `pwd`
+ [ -d pulls/${PR_NUM} ] && git rm -rf pulls/${PR_NUM}
+ git config --global user.name 'github-actions[doc-deploy-bot]'
+ git config --global user.email 'github-actions[doc-deploy-bot]@users.noreply.github.com'
+ git commit -m "Removing docs for closed pull request ${PR_NUM}"
+ git push tokened_docs gh-pages
+ - name: Comment with URL to published pull-request docs
+ if: ${{ github.event.pull_request && github.event.action != 'closed' }}
+ env:
+ PR_NUM: ${{ github.event.number }}
+ uses: mshick/add-pr-comment@v1
+ with:
+ message: |
+ View rendered docs @ https://intelpython.github.io/dpctl/pulls/${{ env.PR_NUM }}/index.html
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+ repo-token-user-login: 'github-actions[bot]'
+ - name: Comment with URL about removal of PR docs
+ if: ${{ github.event.pull_request && github.event.action == 'closed' }}
+ env:
+ PR_NUM: ${{ github.event.number }}
+ uses: mshick/add-pr-comment@v1
+ with:
+ message: |
+ Deleted rendered PR docs from intelpython.github.com/dpctl, latest should be updated shortly. :crossed_fingers:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+ repo-token-user-login: 'github-actions[bot]'
diff --git a/.github/workflows/os-llvm-sycl-build.yml b/.github/workflows/os-llvm-sycl-build.yml
index b2101c24bb..c1c0329db7 100644
--- a/.github/workflows/os-llvm-sycl-build.yml
+++ b/.github/workflows/os-llvm-sycl-build.yml
@@ -10,9 +10,12 @@ jobs:
runs-on: ubuntu-20.04
env:
- OCLCPUEXP_FN: oclcpuexp-2021.12.6.0.19_rel.tar.gz
- FPGAEMU_FN: fpgaemu-2021.12.6.0.19_rel.tar.gz
- TBB_FN: oneapi-tbb-2021.4.0-lin.tgz
+ DOWNLOAD_URL_PREFIX: https://github.com/intel/llvm/releases/download
+ DRIVER_PATH: 2021-WW50
+ OCLCPUEXP_FN: oclcpuexp-2021.13.11.0.23_rel.tar.gz
+ FPGAEMU_FN: fpgaemu-2021.13.11.0.23_rel.tar.gz
+ TBB_URL: https://github.com/oneapi-src/oneTBB/releases/download/v2021.5.0
+ TBB_FN: oneapi-tbb-2021.5.0-lin.tgz
steps:
- name: Cancel Previous Runs
@@ -45,19 +48,18 @@ jobs:
if [[ -f bundle_id.txt && ( "$(cat bundle_id.txt)" == "${LATEST_LLVM_TAG_SHA}" ) ]]; then
echo "Using cached download of ${LATEST_LLVM_TAG}"
else
- export DOWNLOAD_URL_PREFIX=https://github.com/intel/llvm/releases/download
rm -rf dpcpp-compiler.tar.gz
wget ${DOWNLOAD_URL_PREFIX}/${NIGHTLY_TAG}/dpcpp-compiler.tar.gz && echo ${LATEST_LLVM_TAG_SHA} > bundle_id.txt || rm -rf bundle_id.txt
- [ -f ${OCLCPUEXP_FN} ] || wget ${DOWNLOAD_URL_PREFIX}/2021-07/${OCLCPUEXP_FN} || rm -rf bundle_id.txt
- [ -f ${FPGAEMU_FN} ] || wget ${DOWNLOAD_URL_PREFIX}/2021-07/${FPGAEMU_FN} || rm -rf bundle_id.txt
- [ -f ${TBB_FN} ] || wget https://github.com/oneapi-src/oneTBB/releases/download/v2021.4.0/${TBB_FN} || rm -rf bundle_id.txt
+ [ -f ${OCLCPUEXP_FN} ] || wget ${DOWNLOAD_URL_PREFIX}/${DRIVER_PATH}/${OCLCPUEXP_FN} || rm -rf bundle_id.txt
+ [ -f ${FPGAEMU_FN} ] || wget ${DOWNLOAD_URL_PREFIX}/${DRIVER_PATH}/${FPGAEMU_FN} || rm -rf bundle_id.txt
+ [ -f ${TBB_FN} ] || wget ${TBB_URL}/${TBB_FN} || rm -rf bundle_id.txt
rm -rf dpcpp_compiler
tar xf dpcpp-compiler.tar.gz
mkdir -p oclcpuexp
mkdir -p fpgaemu
[ -d oclcpuexp/x64 ] || tar xf ${OCLCPUEXP_FN} -C oclcpuexp
[ -d fpgaemu/x64 ] || tar xf ${FPGAEMU_FN} -C fpgaemu
- [ -d oneapi-tbb-2021.4.0/lib ] || tar xf ${TBB_FN}
+ [ -d oneapi-tbb-2021.5.0/lib ] || tar xf ${TBB_FN}
mkdir -p dpcpp_compiler/lib
mkdir -p dpcpp_compiler/lib/oclfpga
touch dpcpp_compiler/lib/oclfpga/fpgavars.sh
@@ -66,7 +68,7 @@ jobs:
- name: Install system components
shell: bash -l {0}
run: |
- sudo apt-get install cmake libtinfo5
+ sudo apt-get install cmake ninja-build libtinfo5
- name: Setup Python
uses: actions/setup-python@v2
@@ -77,7 +79,7 @@ jobs:
- name: Install dpctl dependencies
shell: bash -l {0}
run: |
- pip install numpy cython setuptools pytest
+ pip install numpy cython setuptools pytest scikit-build
- name: Checkout repo
uses: actions/checkout@v2
@@ -91,9 +93,10 @@ jobs:
source ${SYCL_BUNDLE_FOLDER}/dpcpp_compiler/startup.sh
export LD_LIBRARY_PATH=${SYCL_BUNDLE_FOLDER}/oclcpuexp/x64:${LD_LIBRARY_PATH}
export LD_LIBRARY_PATH=${SYCL_BUNDLE_FOLDER}/fpgaemu/x64:${LD_LIBRARY_PATH}
- export LD_LIBRARY_PATH=${SYCL_BUNDLE_FOLDER}/oneapi-tbb-2021.4.0/lib/intel64/gcc4.8:${LD_LIBRARY_PATH}
+ export LD_LIBRARY_PATH=${SYCL_BUNDLE_FOLDER}/oneapi-tbb-2021.5.0/lib/intel64/gcc4.8:${LD_LIBRARY_PATH}
export OCL_ICD_FILENAMES=libintelocl.so:libintelocl_emu.so
clang++ --version
sycl-ls
- python setup.py develop --sycl-compiler-prefix=$(dirname $(dirname `which clang++`))
- python -m pytest -v dpctl/tests
+ python setup.py develop -- -G Ninja -DCMAKE_C_COMPILER:PATH=clang -DCMAKE_CXX_COMPILER:PATH=clang++ -DDPCTL_ENABLE_LO_PROGRAM_CREATION=ON -DDPCTL_DPCPP_HOME_DIR=$(dirname $(dirname $(which clang))) -DDPCTL_DPCPP_FROM_ONEAPI=OFF
+ python -c "import dpctl; dpctl.lsplatform()" || exit 1
+ SYCL_ENABLE_HOST_DEVICE=1 python -m pytest -v dpctl/tests
diff --git a/.github/workflows/python_style_checks.yml b/.github/workflows/python_style_checks.yml
index 5e4b9a1d9f..92ae080f53 100644
--- a/.github/workflows/python_style_checks.yml
+++ b/.github/workflows/python_style_checks.yml
@@ -25,6 +25,10 @@ jobs:
# The type of runner that the job will run on
runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: [3.9]
+
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
@@ -33,16 +37,17 @@ jobs:
- uses: actions/setup-python@v2
# Run black code formatter
- - uses: psf/black@21.4b2
+ - uses: psf/black@stable
with:
- args: ". --check"
+ src: "."
+ options: "--check"
flake8:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: [3.7]
+ python-version: [3.9]
steps:
- uses: actions/checkout@v2
diff --git a/.gitignore b/.gitignore
index 97cccfe031..ae22979595 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ __pycache__/
# CMake build and local install directory
build
+_skbuild
build_cmake
install
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ebc971590c..7311c9eb67 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -8,17 +8,17 @@ repos:
pass_filenames: false
args: ["-r", "dpctl", "-lll"]
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v2.3.0
+ rev: v4.0.1
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
- rev: 21.4b2
+ rev: 22.1.0
hooks:
- id: black
exclude: "versioneer.py|dpctl/_version.py"
- repo: https://github.com/pycqa/isort
- rev: 5.8.0
+ rev: 5.10.1
hooks:
- id: isort
name: isort (python)
@@ -29,7 +29,7 @@ repos:
name: isort (pyi)
types: [pyi]
- repo: https://gitlab.com/pycqa/flake8
- rev: 3.9.1
+ rev: 4.0.1
hooks:
- id: flake8
- repo: https://github.com/pocc/pre-commit-hooks
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fb7a085d55..f14be39a1c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,15 +6,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
+### Added
+- `dpctl.tensor.asarray`, `dpctl.tensor.empty` implemented (#646).
+- `dpctl.tensor.usm_ndarray` adds support for DLPack protocol. `dpctl.tensor.from_dlpack` implemented (#682).
+
+### Changed
+- dpctl-capi is now renamed to `libsyclinterface` (#666).
+
## [0.11.4] - 12/03/2021
### Fixed
-* Fix tests for nested context factories expecting for integration environment by @PokhodenkoSA in https://github.com/IntelPython/dpctl/pull/705
+- Fix tests for nested context factories expecting for integration environment by @PokhodenkoSA in https://github.com/IntelPython/dpctl/pull/705
## [0.11.3] - 11/30/2021
### Fixed
-* Set the last byte in allocated char array to zero [cherry picked from #650] (#699)
+- Set the last byte in allocated char array to zero [cherry picked from #650] (#699)
## [0.11.2] - 11/29/2021
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000000..858c83c5fe
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,24 @@
+cmake_minimum_required(VERSION 3.21...3.22 FATAL_ERROR)
+
+project(dpctl
+ LANGUAGES CXX
+ DESCRIPTION "Python interface for XPU programming"
+)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED True)
+
+find_package(IntelDPCPP REQUIRED PATHS ${CMAKE_SOURCE_DIR}/cmake NO_DEFAULT_PATH)
+
+add_subdirectory(libsyclinterface)
+
+file(GLOB _dpctl_capi_headers dpctl/apis/include/*.h*)
+install(FILES ${_dpctl_capi_headers}
+ DESTINATION dpctl/include
+)
+
+add_subdirectory(dpctl)
+
+if (DPCTL_GENERATE_DOCS)
+ add_subdirectory(docs)
+endif()
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 9cae87e86e..f8d961169b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -15,12 +15,12 @@ Run before each commit:
```bash
clang-format -style=file -i \
- dpctl-capi/include/*.h \
- dpctl-capi/include/Support/*.h \
- dpctl-capi/source/*.cpp \
- dpctl-capi/tests/*.cpp \
- dpctl-capi/helper/include/*.h \
- dpctl-capi/helper/source/*.cpp
+ libsyclinterface/include/*.h \
+ libsyclinterface/include/Support/*.h \
+ libsyclinterface/source/*.cpp \
+ libsyclinterface/tests/*.cpp \
+ libsyclinterface/helper/include/*.h \
+ libsyclinterface/helper/source/*.cpp
```
> **_NOTE:_** A much simpler option is to use `pre-commit` and the
@@ -161,13 +161,12 @@ these steps:
3. Build dpctl with code coverage support.
```bash
- python setup.py develop --coverage=True
- pytest -q -ra --disable-warnings --cov dpctl --cov-report term-missing --pyargs dpctl -vv
+ python scripts/gen_coverage.py --oneapi
coverage html
```
Note that code coverage builds the C sources with debug symbols. For this
- reason, the coverage flag is only available with the `develop` mode of
+ reason, the coverage script builds the package in `develop` mode of
`setup.py`.
The coverage results for the C and Python sources will be printed to the
@@ -191,3 +190,52 @@ these steps:
> ```
> The error is related to the `tcl` package. You should uninstall the `tcl`
> package to resolve the error.
+
+## Error Reporting and Logging
+
+The SyclInterface library responds to `DPCTL_VERBOSITY` environment variable that controls the severity level of errors printed to console.
+One can specify one of the following severity levels (in increasing order of severity): `warning` and `error`.
+
+```bash
+export DPCTL_VERBOSITY=warning
+```
+
+Messages of a given severity are shown not only in the console for that severity, but also for the higher severity. For example, the severity level `warning` will output severity errors for `error` and `warning` to the console.
+
+### Optional use of the Google logging library (glog)
+
+Dpctl's error handler for libsyclinterface can be optionally configured to use [glog](https://github.com/google/glog). To use glog, follow the following steps:
+
+1. Install glog package of the latest version (0.5.0)
+
+```bash
+conda install glog
+```
+2. Build dpctl with glog support
+
+```bash
+python scripts/build_locally.py --oneapi --glog
+```
+
+3. Use `dpctl._diagnostics.syclinterface_diagnostics(verbosity="warning", log_dir=None)` context manager to switch library diagnostics on for a block of Python code.
+Use `DPCTLService_InitLogger` and `DPCTLService_ShutdownLogger` library C functions during library development to initialize the Google's logging library and de-initialize accordingly
+
+```python
+from dpctl._diagnostics import syclinterface_diagnostics
+import dpctl
+
+with syclinterface_diagnostics():
+ code
+```
+
+```c
+DPCTLService_InitLogger(const char *app_name, const char *log_dir);
+DPCTLService_ShutdownLogger();
+```
+
+ - `*app_name` - name of the executable file (prefix for logs of various levels).
+ - `*log_dir` - directory path for writing log files. Specifying `NULL` results in logging to ``std::cerr``.
+
+> **_NOTE:_**
+>
+> If `InitGoogleLogging` is not called before first use of glog, the library will self-initialize to `logtostderr` mode and log files will not be generated.
diff --git a/MANIFEST.in b/MANIFEST.in
index 19f37e5a30..9ace2ea6ab 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,5 @@
-include versioneer.py
recursive-include dpctl/include *.h
+include dpctl/include/dpctl4pybind11.hpp
recursive-include dpctl *.pxd
include dpctl/_sycl_context.h
include dpctl/_sycl_context_api.h
@@ -13,5 +13,6 @@ include dpctl/memory/_memory.h
include dpctl/memory/_memory_api.h
include dpctl/tensor/_usmarray.h
include dpctl/tensor/_usmarray_api.h
+recursive-include dpctl/tensor/include *
include dpctl/tests/input_files/*
include dpctl/tests/*.pyx
diff --git a/README.md b/README.md
index ea3e40d85f..031267f7e9 100644
--- a/README.md
+++ b/README.md
@@ -1,158 +1,114 @@
[](https://github.com/psf/black)
[](https://pycqa.github.io/isort/)
+[](https://github.com/pre-commit/pre-commit)
[](https://coveralls.io/github/IntelPython/dpctl?branch=master)
+
-About dpctl
-===========
-
-
-
-`dpctl` (data parallel control) is a lightweight [Python package](https://intelpython.github.io/dpctl) exposing a
-subset of the Intel(R) oneAPI DPC++ [runtime classes](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_sycl_runtime_classes)
-that is distributed as part of [Intel(R) Distribution for Python*](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/distribution-for-python.html) and
-is included in Intel(R) [oneAPI](https://oneapi.io) [Base ToolKit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit.html).
-`dpctl` lets Python users query SYCL platforms, discover and represent SYCL devices, and construct SYCL queues to control data-parallel code execution on [Intel(R) XPUs](https://www.intel.com/content/www/us/en/newsroom/news/xpu-vision-oneapi-server-gpu.html) from Python.
-
-`dpctl` features classes representing [SYCL unified shared memory](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:usm)
-allocations as well as higher-level objects such as [`dpctl.tensor.usm_ndarray`](https://intelpython.github.io/dpctl/latest/docfiles/dpctl.tensor_api.html#module-dpctl.tensor) on top of USM allocations.
-
-`dpctl` assists authors of Python native extensions written in C,
-Cython, or pybind11 to use its `dpctl.SyclQueue` object to indicate the offload
-target as well as objects in `dpctl.memory` and `dpctl.tensor` submodules to
-represent USM allocations that are accessible from within data-parallel code executed
-on the target queue.
-
-`dpctl.tensor` submodule provides an array container representing an array in a
-strided layout on top of a USM allocation. The submodule provides an array-API
-conforming oneAPI DPC++ powered library to manipulate the array container.
-
-Requirements
-============
-- Install Conda
-- Install Intel oneAPI
- - Set environment variable `ONEAPI_ROOT`
- - Windows: `C:\Program Files (x86)\Intel\oneAPI\`
- - Linux: `/opt/intel/oneapi`
-- Install OpenCL HD graphics drivers
-
-Build and Install Conda Package
-==================================
-1. Create and activate conda build environment
-```bash
-conda create -n build-env conda-build
-conda activate build-env
-```
-2. Set environment variable `ONEAPI_ROOT` and build conda package
-```bash
-export ONEAPI_ROOT=/opt/intel/oneapi
-conda build conda-recipe -c ${ONEAPI_ROOT}/conda_channel
-```
-On Windows to cope with [long file names](https://github.com/IntelPython/dpctl/issues/15)
-use `croot` with short folder path:
-```cmd
-set "ONEAPI_ROOT=C:\Program Files (x86)\Intel\oneAPI\"
-conda build --croot=C:/tmp conda-recipe -c "%ONEAPI_ROOT%\conda_channel"
-```
-
-:warning: **You could face issues with conda-build=3.20**: Use conda-build=3.18!
+About
+=====
-3. Install conda package
-```bash
-conda install dpctl
-```
+
+
+Data Parallel Control (`dpctl`) is a Python library that allows a user
+to *control* the execution placement of a [compute
+kernel](https://en.wikipedia.org/wiki/Compute_kernel) on an
+[XPU](https://www.intel.com/content/www/us/en/newsroom/news/xpu-vision-oneapi-server-gpu.html).
+The compute kernel can be either a code written by the user, *e.g.*,
+using `numba-dppy`, or a code that is part of a library like oneMKL. The `dpctl`
+library is built upon the [SYCL
+standard](https://www.khronos.org/sycl/) and implements Python
+bindings for a subset of the standard [runtime
+classes](https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_sycl_runtime_classes)
+that allow users to query platforms, discover and represent devices
+and sub-devices, and construct contexts and queues. In addition,
+`dpctl` features classes for [SYCL Unified Shared Memory
+(USM)](https://link.springer.com/chapter/10.1007/978-1-4842-5574-2_6)
+management and implements a tensor [array
+API](https://data-apis.org/array-api/latest/).
+
+The library also assists authors of Python native extensions written
+in C, Cython, or pybind11 to access `dpctl` objects representing SYCL
+devices, queues, memory, and tensors.
+
+`Dpctl` is the core part of a larger family of [data-parallel Python
+libraries and
+tools](https://www.intel.com/content/www/us/en/developer/tools/oneapi/distribution-for-python.html)
+to program XPUs. The library is available via
+[conda](https://anaconda.org/intel/dpctl) and
+[pip](https://pypi.org/project/dpctl/). It is included in the [Intel(R)
+Distribution for
+Python*](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/distribution-for-python.html)
+(IDP).
+
+Installing
+==========
+
+From Intel oneAPI
+-----------------
+
+`dpctl` is packaged as part of the quarterly Intel oneAPI releases. To
+get the library from the latest oneAPI release please follow the
+instructions from Intel's [oneAPI installation
+guide](https://www.intel.com/content/www/us/en/developer/articles/guide/installation-guide-for-oneapi-toolkits.html).
+Note that you will need to install the Intel BaseKit toolkit to get
+IDP and `dpctl`.
+
+From Conda
+----------
+
+`dpctl` package is available on the Intel channel on Annaconda
+cloud. You an use the following to install `dpctl` from there:
-Build and Install with setuptools
-=================================
-dpctl relies on DPC++ runtime. With Intel oneAPI installed you should activate it.
-`setup.py` requires environment variable `ONEAPI_ROOT` and following packages
-installed:
-- `cython`
-- `numpy`
-- `cmake` - for building C API
-- `ninja` - only on Windows
-
-You need DPC++ to build dpctl. If you want to build using the DPC++ in a
-oneAPI distribution, activate DPC++ compiler as follows:
```bash
-export ONEAPI_ROOT=/opt/intel/oneapi
-source ${ONEAPI_ROOT}/compiler/latest/env/vars.sh
+conda install dpctl -c intel
```
-For install:
-```cmd
-python setup.py install
-```
-
-For development:
-```cmd
-python setup.py develop
-```
+From PyPi
+---------
-It is also possible to build dpctl using [DPC++ toolchain](https://github.com/intel/llvm/blob/sycl/sycl/doc/GetStartedGuide.md) instead of oneAPI DPC++. Instead of activating the oneAPI environment, indicate the toolchain installation prefix with `--sycl-compiler-prefix` option, e.g.
+`dpctl` is also available from PyPi and can be installed using:
-```cmd
-python setup.py develop --sycl-compiler-prefix=${DPCPP_ROOT}/llvm/build
-```
-
-Please use `python setup.py develop --help` for more details.
-
-Install Wheel Package from Pypi
-==================================
-1. Install dpctl
-```cmd
-python -m pip install --index-url https://pypi.anaconda.org/intel/simple --extra-index-url https://pypi.org/simple dpctl
+```bash
+pip3 install dpctl
```
-Note: dpctl wheel package is placed on Pypi, but some of its dependencies (like Intel numpy) are in Anaconda Cloud.
-That is why install command requires additional intel Pypi channel from Anaconda Cloud.
-2. Set path to Performance Libraries in case of using venv or system Python:
-On Linux:
-```cmd
-export LD_LIBRARY_PATH=/lib
-```
-On Windows:
-```cmd
-set PATH=\bin;\Library\bin;%PATH%
-```
+Installing the bleeding edge
+------------------------
-Using dpctl
-===========
-dpctl relies on DPC++ runtime. With Intel oneAPI installed you could activate it.
+If you want to try out the current master, you can install it from our
+development channel on Anaconda cloud:
-On Windows:
-```cmd
-call "%ONEAPI_ROOT%\compiler\latest\env\vars.bat"
-```
-On Linux:
```bash
-source ${ONEAPI_ROOT}/compiler/latest/env/vars.sh
+conda install dpctl -c dppy\label\dev
```
-When dpctl is installed via conda package
-then it uses DPC++ runtime from `dpcpp_cpp_rt` package
-and it is not necessary to activate oneAPI DPC++ compiler environment.
+Building
+========
-`dpcpp_cpp_rt` package is provided by oneAPI `conda_channel`.
+Please refer our [getting started user
+guide](https://intelpython.github.io/dpctl) for more information on
+setting up a development environment and building `dpctl` from source.
-Examples
-========
+Running Examples
+================
See examples in folder `examples`.
Run python examples:
+
```bash
for script in `ls examples/python/`; do echo "executing ${script}"; python examples/python/${script}; done
```
Examples of building Cython extensions with DPC++ compiler, that interoperate
-with dpctl can be found in folder `cython`.
+with `dpctl` can be found in folder `cython`.
Each example in `cython` folder can be built using
`CC=icx CXX=dpcpp python setup.py build_ext --inplace`.
Please refer to `run.py` script in respective folders to execute extensions.
-Tests
-=====
-See tests in folder `dpctl/tests`.
+Running Tests
+=============
+Tests are located in folder `dpctl/tests`.
Run tests:
```bash
diff --git a/cmake/IntelDPCPPConfig.cmake b/cmake/IntelDPCPPConfig.cmake
new file mode 100644
index 0000000000..2b73830f1e
--- /dev/null
+++ b/cmake/IntelDPCPPConfig.cmake
@@ -0,0 +1,293 @@
+# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+#[=======================================================================[.rst:
+IntelDPCPPConfig
+-------
+
+DPCPP Library to verify DPCPP/SYCL compatability of CMAKE_CXX_COMPILER
+and passes relevant compiler flags.
+
+Result Variables
+^^^^^^^^^^^^^^^^
+
+This will define the following variables:
+
+``IntelDPCPP_FOUND``
+ True if the system has the DPCPP library.
+``SYCL_LANGUAGE_VERSION``
+ The SYCL language spec version by Compiler.
+``SYCL_INCLUDE_DIR``
+ Include directories needed to use SYCL.
+``SYCL_IMPLEMENTATION_ID``
+ The SYCL compiler variant.
+``SYCL_FLAGS``
+ SYCL specific flags for the compiler.
+
+Cache Variables
+^^^^^^^^^^^^^^^
+
+The following cache variables may also be set:
+
+``SYCL_INCLUDE_DIR``
+ The directory containing ``sycl.hpp``.
+``SYCL_LIBRARY_DIR``
+ The path to the SYCL library.
+``SYCL_FLAGS``
+ SYCL specific flags for the compiler.
+``SYCL_LANGUAGE_VERSION``
+ The SYCL language spec version by Compiler.
+
+
+.. note::
+
+ For now, user needs to set -DCMAKE_CXX_COMPILER or environment of
+ CXX pointing to SYCL compatible compiler ( eg: icx, clang++, icpx)
+
+ Note: do not set to DPCPP compiler. If set to a Compiler family
+ that supports dpcpp ( eg: IntelLLVM) both DPCPP and SYCL
+ features are enabled.
+
+ And add this package to user's Cmake config file.
+
+ .. code-block:: cmake
+
+ find_package(IntelDPCPP REQUIRED)
+
+#]=======================================================================]
+
+include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake)
+
+find_package(PkgConfig QUIET)
+if(PKG_CONFIG_FOUND)
+ # TODO add dependency package module checks, if any
+endif()
+
+
+# TODO: can't use find_program to override the CMAKE_CXX_COMPILER as
+# Platform/ files are executed, potentially for a different compiler.
+# Safer approach is to make user to define CMAKE_CXX_COMPILER.
+
+string(COMPARE EQUAL "${CMAKE_CXX_COMPILER}" "" nocmplr)
+if(nocmplr)
+ set(IntelDPCPP_FOUND False)
+ set(SYCL_REASON_FAILURE "SYCL: CMAKE_CXX_COMPILER not set!!")
+ set(IntelDPCPP_NOT_FOUND_MESSAGE "${SYCL_REASON_FAILURE}")
+endif()
+
+# Check for known compiler family that supports SYCL
+
+if( NOT "x${CMAKE_CXX_COMPILER_ID}" STREQUAL "xClang" AND
+ NOT "x${CMAKE_CXX_COMPILER_ID}" STREQUAL "xIntelLLVM")
+ set(IntelDPCPP_FOUND False)
+ set(SYCL_REASON_FAILURE "Unsupported compiler family ${CMAKE_CXX_COMPILER_ID} and compiler ${CMAKE_CXX_COMPILER}!!")
+ set(IntelDPCPP_NOT_FOUND_MESSAGE "${SYCL_REASON_FAILURE}")
+ return()
+endif()
+
+# Assume that CXX Compiler supports SYCL and then test to verify.
+set(SYCL_COMPILER ${CMAKE_CXX_COMPILER})
+
+
+# Function to write a test case to verify SYCL features.
+
+function(SYCL_FEATURE_TEST_WRITE src)
+
+ set(pp_if "#if")
+ set(pp_endif "#endif")
+
+ set(SYCL_TEST_CONTENT "")
+ string(APPEND SYCL_TEST_CONTENT "#include \nusing namespace std;\n")
+ string(APPEND SYCL_TEST_CONTENT "int main(){\n")
+
+ # Feature tests goes here
+
+ string(APPEND SYCL_TEST_CONTENT "${pp_if} defined(SYCL_LANGUAGE_VERSION)\n")
+ string(APPEND SYCL_TEST_CONTENT "cout << \"SYCL_LANGUAGE_VERSION=\"<=3.21
- python
- - make # [unix]
- - ninja # [win]
+ - ninja
+ - scikit-build
- numpy 1.19
- wheel
run:
- python
- {{ pin_compatible('numpy') }}
- - dpcpp-cpp-rt >=2021.2
+ - dpcpp-cpp-rt >=2022.0
test:
requires:
diff --git a/docs/.gitignore b/docs/.gitignore
index c7b38c7ac8..c781c1805a 100644
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -1,6 +1,7 @@
docs
generated_docs
-docfiles/dpctl-capi
+docfiles/libsyclinterface
+docfiles/dpctl
api
build
conf.py
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index bba66d788a..ff3fbcb7cd 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -1,6 +1,3 @@
-cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
-project("Data-parallel Control (dpctl)")
-
# Option to generate rst for C API and add to Sphinx documentation
option(DPCTL_ENABLE_DOXYREST
"Enable generation of rst files for C API"
@@ -29,7 +26,7 @@ function(_setup_doxygen)
if(DPCTL_ENABLE_DOXYGEN_HTML)
set(GENERATE_HTML "YES")
endif()
- set(DOXYGEN_INPUT_DIR ../dpctl-capi/include)
+ set(DOXYGEN_INPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../libsyclinterface/include)
set(DOXYGEN_OUTPUT_DIR ${DOC_OUTPUT_DIR}/doxygen)
set(DOXYGEN_INDEX_FILE ${DOXYGEN_OUTPUT_DIR}/xml/index.xml)
set(DOXYFILE_IN ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in)
@@ -55,27 +52,29 @@ function(_setup_doxygen)
# Target to generate only Doxygen documentation
add_custom_target(
Doxygen
- ALL
DEPENDS ${DOXYGEN_INDEX_FILE}
)
endfunction()
function(_setup_doxyrest)
- set(DOXYREST_OUTPUT_DIR_NAME docfiles/dpctl-capi)
+ set(DOXYREST_OUTPUT_DIR_NAME docfiles/libsyclinterface)
+ # Set the DOXYREST_OUTPUT_DIR variable in both current and parent scope.
+ # The variable is used by _setup_sphinx when generating the conf.py file.
set(DOXYREST_OUTPUT_DIR
${CMAKE_CURRENT_SOURCE_DIR}/${DOXYREST_OUTPUT_DIR_NAME}
PARENT_SCOPE
)
set(DOXYREST_OUTPUT_DIR
${CMAKE_CURRENT_SOURCE_DIR}/${DOXYREST_OUTPUT_DIR_NAME}
-
)
set(DOXYREST_CONFIG_IN ${CMAKE_CURRENT_SOURCE_DIR}/doxyrest-config.lua.in)
set(DOXYREST_CONFIG_OUT ${CMAKE_CURRENT_SOURCE_DIR}/doxyrest-config.lua)
set(DOXYREST_OUTPUT ${DOXYREST_OUTPUT_DIR}/index.rst)
set(DOXYGEN_OUTPUT_DIR ${DOC_OUTPUT_DIR}/doxygen)
+
configure_file(${DOXYREST_CONFIG_IN} ${DOXYREST_CONFIG_OUT} @ONLY)
configure_file(${INDEX_DOXYREST_IN} ${INDEX_OUT} @ONLY)
+
add_custom_command(
OUTPUT ${DOXYREST_OUTPUT}
COMMAND
@@ -83,73 +82,71 @@ function(_setup_doxyrest)
${DOXYREST_CONFIG_OUT}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS
- # Other docs files that can be edited manually
- ${INDEX_OUT}
- ${DOXYGEN_INDEX_FILE}
+ # Other docs files that can be edited manually
+ ${INDEX_OUT}
+ ${DOXYGEN_INDEX_FILE}
MAIN_DEPENDENCY ${DOXYREST_CONFIG_OUT} ${DOXYREST_CONFIG_IN}
COMMENT "Generating Doxyrest documentation"
)
# Target to generate rst from Doxygen XML using Doxyrest
add_custom_target(
Doxyrest
- ALL
DEPENDS Doxygen ${DOXYREST_OUTPUT}
)
endfunction()
function(_setup_sphinx)
+ set(GENERATE_RST_OUTPUT_DIR
+ ${CMAKE_CURRENT_SOURCE_DIR}/docfiles/dpctl
+ )
set(SPHINX_SOURCE ${CMAKE_CURRENT_SOURCE_DIR})
set(SPHINX_OUTPUT_DIR ${DOC_OUTPUT_DIR}/docs)
set(SPHINX_INDEX_FILE ${SPHINX_OUTPUT_DIR}/index.html)
set(SPHINX_CONF_IN ${SPHINX_SOURCE}/conf.in)
set(SPHINX_CONF_OUT ${SPHINX_SOURCE}/conf.py)
- # Only regenerate Sphinx when:
- # - Doxygen has rerun
- # - Our doc files have been updated
- # - The Sphinx config has been updated
+ set(DPCTL_PYAPI_RST_FILE ${GENERATE_RST_OUTPUT_DIR}/dpctl_pyapi.rst)
+
if(DPCTL_ENABLE_DOXYREST)
- add_custom_command(
- OUTPUT ${SPHINX_INDEX_FILE}
- COMMAND
- ${SPHINX_EXECUTABLE} -b html
- ${SPHINX_SOURCE}
- ${SPHINX_OUTPUT_DIR}
- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
- DEPENDS
- # Other docs files that can be edited manually
- ${CMAKE_CURRENT_SOURCE_DIR}/index.rst
- ${DOXYGEN_INDEX_FILE}
- MAIN_DEPENDENCY ${SPHINX_CONF_OUT} ${SPHINX_CONF_IN}
- COMMENT "Generating Sphinx documentation"
- )
- # Target to generate Sphinx
- add_custom_target(
- Sphinx
- ALL
- DEPENDS Doxyrest ${SPHINX_INDEX_FILE}
- )
+ set(DEPEND_ON_DOXYREST "Doxyrest")
else()
configure_file(${INDEX_NO_DOXYREST_IN} ${INDEX_OUT} @ONLY)
- add_custom_command(
- OUTPUT ${SPHINX_INDEX_FILE}
- COMMAND
- ${SPHINX_EXECUTABLE} -b html
- ${SPHINX_SOURCE}
- ${SPHINX_OUTPUT_DIR}
- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
- DEPENDS
- # Other docs files that can be edited manually
- ${CMAKE_CURRENT_SOURCE_DIR}/index.rst
- MAIN_DEPENDENCY ${SPHINX_CONF_OUT} ${SPHINX_CONF_IN}
- COMMENT "Generating Sphinx documentation"
- )
- # Target to generate Sphinx
- add_custom_target(
- Sphinx
- ALL
- DEPENDS ${SPHINX_INDEX_FILE}
- )
endif()
+
+ configure_file(
+ "${CMAKE_CURRENT_SOURCE_DIR}/generate_rst.py"
+ ${CMAKE_CURRENT_BINARY_DIR}
+ )
+ # A custom command to generate the Python API rst files
+ add_custom_command(
+ OUTPUT ${DPCTL_PYAPI_RST_FILE}
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${GENERATE_RST_OUTPUT_DIR}
+ COMMAND
+ "${Python_EXECUTABLE}"
+ "${CMAKE_CURRENT_BINARY_DIR}/generate_rst.py"
+ --dir "${GENERATE_RST_OUTPUT_DIR}"
+ --module "dpctl"
+ COMMENT "Generating RST files for Python API of dpctl"
+ )
+ add_custom_command(
+ OUTPUT ${SPHINX_INDEX_FILE}
+ COMMAND
+ ${SPHINX_EXECUTABLE} -b html
+ ${SPHINX_SOURCE}
+ ${SPHINX_OUTPUT_DIR}
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/index.rst
+ MAIN_DEPENDENCY ${SPHINX_CONF_OUT} ${SPHINX_CONF_IN}
+ COMMENT "Generating Sphinx documentation"
+ )
+ # Target to generate Sphinx. Note that the order of the dependencies is
+ # important, we want the rst files to generate prior to sphinx build.
+ add_custom_target(
+ Sphinx
+ DEPENDS
+ ${DEPEND_ON_DOXYREST}
+ ${DPCTL_PYAPI_RST_FILE}
+ ${SPHINX_INDEX_FILE}
+ )
# Create a conf.py by replacing variables inside @@ with the current values
configure_file(${SPHINX_CONF_IN} ${SPHINX_CONF_OUT} @ONLY)
endfunction()
@@ -185,18 +182,15 @@ set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
find_package(Git)
find_package(Sphinx REQUIRED)
find_package(Doxygen REQUIRED)
+find_package(Python REQUIRED)
+
if (DPCTL_ENABLE_DOXYREST)
find_package(Lua REQUIRED)
find_package(Doxyrest REQUIRED)
endif()
# Set the location where the generated docs are saved
-if(DPCTL_DOCGEN_PREFIX)
- message(STATUS "Generating dpctl documents in " ${DPCTL_DOCGEN_PREFIX})
- set(DOC_OUTPUT_DIR ${DPCTL_DOCGEN_PREFIX})
-else()
- set(DOC_OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/generated_docs)
-endif()
+set(DOC_OUTPUT_DIR ${CMAKE_INSTALL_PREFIX}/docs)
set(INDEX_NO_DOXYREST_IN ${CMAKE_CURRENT_SOURCE_DIR}/index_no_doxyrest.rst.in)
set(INDEX_DOXYREST_IN ${CMAKE_CURRENT_SOURCE_DIR}/index_doxyrest.rst.in)
@@ -208,3 +202,11 @@ if(DPCTL_ENABLE_DOXYREST)
_setup_doxyrest()
endif()
_setup_sphinx()
+
+set_property(
+ DIRECTORY
+ PROPERTY
+ ADDITIONAL_CLEAN_FILES
+ "${CMAKE_CURRENT_SOURCE_DIR}/docfiles/dpctl"
+ "${CMAKE_CURRENT_SOURCE_DIR}/docfiles/libsyclinterface"
+)
diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in
index a1055b7256..49f8ec208b 100644
--- a/docs/Doxyfile.in
+++ b/docs/Doxyfile.in
@@ -562,7 +562,7 @@ INTERNAL_DOCS = NO
# (including Cygwin) ands Mac users are advised to set this option to NO.
# The default value is: system dependent.
-CASE_SENSE_NAMES = YES
+CASE_SENSE_NAMES = NO
# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
# their full class and namespace scopes in the documentation. If set to YES, the
@@ -917,11 +917,11 @@ RECURSIVE = YES
# Note that relative paths are relative to the directory from which doxygen is
# run.
-EXCLUDE = ../dpctl-capi/include/Support
-EXCLUDE += ../dpctl-capi/include/Config
-EXCLUDE += ../dpctl-capi/include/dpctl_vector.h
-EXCLUDE += ../dpctl-capi/include/dpctl_data_types.h
-EXCLUDE += ../dpctl-capi/include/dpctl_utils.h
+EXCLUDE = ../libsyclinterface/include/Support
+EXCLUDE += ../libsyclinterface/include/Config
+EXCLUDE += ../libsyclinterface/include/dpctl_vector.h
+EXCLUDE += ../libsyclinterface/include/dpctl_data_types.h
+EXCLUDE += ../libsyclinterface/include/dpctl_utils.h
# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
# directories that are symbolic links (a Unix file system feature) are excluded
diff --git a/docs/README.md b/docs/README.md
index 20e696f1a2..075889c9af 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -32,58 +32,13 @@ sudo apt-get install liblua5.2-dev
Generating the docs
===================
-The documentation should be generated using the provided `Cmake` build script.
-There are a few configurable options that can be used to select the type of
-documentation to generate.
+The helper script ``scripts/gen_docs.py`` is the preferred way to generate the
+documentation. The generated documentation html pages will be installed to the
+``CMAKE_INSTALL_PREFIX/docs`` directory.
-Build only Doxygen for C API
----------------------------
```bash
-cd dpctl/docs
-mkdir -p build
-cd build
-cmake ..
-make Doxygen
+python scripts/gen_docs.py --doxyrest-root=
```
-The above steps will generate the `Doxygen` files at
-`dpctl/docs/generated_docs/doxygen/html`. The documentation can also be
-generated at a custom location by providing the optional flag
-
-```bash
-cd dpctl/docs
-mkdir -p build
-cd build
-cmake .. -DDPCTL_DOCGEN_PREFIX=
-make Doxygen
-```
-
-Build only Sphinx for Python API
---------------------------------
-```bash
-cd dpctl/docs
-mkdir -p build
-cd build
-cmake .. -DDPCTL_DOCGEN_PREFIX=
-make Sphinx
-```
-
-The `make Sphinx` command will generate only the Python API docs for dpctl.
-
-Build consolidated docs
------------------------
-It is possible to generate a single site with both Python and C API docs. As
-mentioned before, `Doxyrest` and `Lua` are required to generate the consolidated
-site.
-
-```bash
-cd dpctl/docs
-mkdir -p build
-cd build
-cmake .. \
- -DDPCTL_ENABLE_DOXYREST=ON \
- -DDoxyrest_DIR= \
- -DDPCTL_DOCGEN_PREFIX=
-make Sphinx
-```
-The `Doxyrest_DIR` flag is optional, but is needed when Doxyrest is installed in
-a non-system location.
+To skip generating the documentation for ``libsyclinterface``, the
+``--doxyrest-root`` option should be omitted.
diff --git a/docs/conf.in b/docs/conf.in
index f904d1c26b..64af63037e 100644
--- a/docs/conf.in
+++ b/docs/conf.in
@@ -1,3 +1,19 @@
+# Data Parallel Control (dpctl)
+#
+# Copyright 2020-2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
@@ -5,19 +21,23 @@
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
+import os
+import sys
+
from docutils.parsers.rst import directives
from sphinx.ext.autosummary import Autosummary, get_documenter
from sphinx.util.inspect import safe_getattr
import dpctl
+sys.path.insert(0, os.path.abspath("."))
+
+import extlinks_gen as urlgen
+
# -- Project information -----------------------------------------------------
project = "Data-parallel Control (dpctl)"
-copyright = "2020, Intel Corp."
+copyright = "2020-21, Intel Corp."
author = "Intel Corp."
version = dpctl.__version__.strip(".dirty")
@@ -31,13 +51,15 @@ release = dpctl.__version__.strip(".dirty")
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
- "sphinx.ext.todo",
- "sphinx.ext.coverage",
- "sphinx.ext.viewcode",
- "sphinx.ext.githubpages",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
+ "sphinx.ext.coverage",
+ "sphinx.ext.extlinks",
+ "sphinx.ext.githubpages",
"sphinx.ext.napoleon",
+ "sphinx.ext.todo",
+ "sphinx.ext.viewcode",
+ "sphinxcontrib.programoutput",
]
todo_include_todos = True
@@ -168,7 +190,7 @@ class AutoAutoSummary(Autosummary):
if not include_public:
include_public = []
items = []
- for name in sorted(obj.__dict__.keys()):
+ for name in sorted(dir(obj)):
try:
documenter = get_documenter(app, safe_getattr(obj, name), obj)
except AttributeError:
@@ -209,3 +231,7 @@ class AutoAutoSummary(Autosummary):
def setup(app):
app.add_directive("autoautosummary", AutoAutoSummary)
+
+
+# A dictionary of urls
+extlinks = urlgen.create_extlinks()
diff --git a/docs/docfiles/dpctl.memory_api.rst b/docs/docfiles/dpctl.memory_api.rst
deleted file mode 100644
index c70041cdba..0000000000
--- a/docs/docfiles/dpctl.memory_api.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-.. _dpctl.memory_api:
-
-############
-dpctl.memory
-############
-
-.. automodule:: dpctl.memory
-
-Classes
--------
-
-.. autoclass:: dpctl.memory.MemoryUSMDevice
- :members:
- :inherited-members:
- :undoc-members:
-
-.. autoclass:: dpctl.memory.MemoryUSMHost
- :members:
- :inherited-members:
- :undoc-members:
-
-.. autoclass:: dpctl.memory.MemoryUSMShared
- :members:
- :inherited-members:
- :undoc-members:
-
-Functions
----------
-
-.. autofunction:: dpctl.memory.as_usm_memory
diff --git a/docs/docfiles/dpctl.program_api.rst b/docs/docfiles/dpctl.program_api.rst
deleted file mode 100644
index 64db9c55e4..0000000000
--- a/docs/docfiles/dpctl.program_api.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-.. _dpctl.program_api:
-
-#############
-dpctl.program
-#############
-
-.. automodule:: dpctl.program
-
-Classes
--------
-
-.. autoclass:: dpctl.program.SyclKernel
- :members:
- :undoc-members:
-
-.. autoclass:: dpctl.program.SyclProgram
- :members:
- :undoc-members:
-
-Exceptions
-----------
-
-.. autoexception:: dpctl.program.SyclProgramCompilationError
-
-Functions
----------
-
-.. autofunction:: create_program_from_source
-.. autofunction:: create_program_from_spirv
diff --git a/docs/docfiles/dpctl.tensor_api.rst b/docs/docfiles/dpctl.tensor_api.rst
deleted file mode 100644
index 837aa36ffb..0000000000
--- a/docs/docfiles/dpctl.tensor_api.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-.. _dpctl.tensor_api:
-
-############
-dpctl.tensor
-############
-
-.. automodule:: dpctl.tensor
- :members:
- :undoc-members:
diff --git a/docs/docfiles/dpctl_pyapi.rst b/docs/docfiles/dpctl_pyapi.rst
deleted file mode 100644
index 7c60676d09..0000000000
--- a/docs/docfiles/dpctl_pyapi.rst
+++ /dev/null
@@ -1,86 +0,0 @@
-.. _dpctl_pyapi:
-
-################
-dpctl Python API
-################
-
-.. currentmodule:: dpctl
-
-.. automodule:: dpctl
-
-Sub-modules
------------
-
- :mod:`dpctl.memory`
- USM allocators and deallocators and classes that implement Python's
- `buffer protocol`_.
- :mod:`dpctl.program`
- Experimental wrappers for SYCL 1.2 ``program`` and ``kernel`` classes.
- The module is going to be refactored in the future to support SYCL
- 2020's ``kernel_bundle`` feature and the wrapper for the ``program``
- class is going to be removed.
- :mod:`dpctl.tensor`
- Implementation of different types of tensor classes that use USM memory.
-
-Classes
--------
-
-.. toctree::
- :maxdepth: 1
-
- dpctl.SyclContext : A Python class representing cl::sycl::context
- dpctl.SyclDevice : A Python class representing cl::sycl::device
- dpctl.SyclEvent : A Python class representing cl::sycl::event
- dpctl.SyclPlatform : A Python class representing cl::sycl::event
- dpctl.SyclQueue : A Python class representing cl::sycl::event
-
-Enumerations
-------------
-
-.. autoclass:: dpctl.backend_type
- :members:
-
-.. autoclass:: dpctl.device_type
- :members:
-
-Exceptions
-----------
-
-.. autoexception:: dpctl.SyclKernelInvalidRangeError
-.. autoexception:: dpctl.SyclKernelSubmitError
-.. autoexception:: dpctl.SyclQueueCreationError
-
-Device Selection Functions
---------------------------
-
-.. autofunction:: get_devices
-.. autofunction:: select_accelerator_device
-.. autofunction:: select_cpu_device
-.. autofunction:: select_default_device
-.. autofunction:: select_gpu_device
-.. autofunction:: select_host_device
-.. autofunction:: get_num_devices
-.. autofunction:: has_cpu_devices
-.. autofunction:: has_gpu_devices
-.. autofunction:: has_accelerator_devices
-.. autofunction:: has_host_device
-
-Queue Management Functions
---------------------------
-
-.. autofunction:: device_context
-.. autofunction:: get_current_backend
-.. autofunction:: get_current_device_type
-.. autofunction:: get_current_queue
-.. autofunction:: get_num_activated_queues
-.. autofunction:: is_in_device_context
-.. autofunction:: set_global_queue
-
-Other Helper Functions
-----------------------
-.. autofunction:: get_platforms
-.. autofunction:: lsplatform
-
-.. _Section 4.6: https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_sycl_runtime_classes
-.. _SYCL 2020 spec: https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html
-.. _buffer protocol: https://docs.python.org/3/c-api/buffer.html
diff --git a/docs/docfiles/dpctl_pyapi/SyclContext.rst b/docs/docfiles/dpctl_pyapi/SyclContext.rst
deleted file mode 100644
index 8ec8728d04..0000000000
--- a/docs/docfiles/dpctl_pyapi/SyclContext.rst
+++ /dev/null
@@ -1,45 +0,0 @@
-.. _SyclContext_api:
-
-#################
-dpctl.SyclContext
-#################
-
-.. currentmodule:: dpctl
-
-.. autoclass:: dpctl.SyclContext
-
- .. rubric:: Attributes:
-
- .. autoautosummary:: dpctl.SyclContext
- :attributes:
-
- .. rubric:: Private methods:
-
- .. autoautosummary:: dpctl.SyclContext
- :private_methods:
-
- .. rubric:: Public methods:
-
- .. autoautosummary:: dpctl.SyclContext
- :methods:
-
-Detail
-======
-
-Attributes
-----------
-
-.. autoattribute:: dpctl.SyclContext.device_count
-
-
-Private methods
----------------
-
-.. autofunction:: dpctl.SyclContext._get_capsule
-
-
-Public methods
---------------
-
-.. autofunction:: dpctl.SyclContext.addressof_ref
-.. autofunction:: dpctl.SyclContext.get_devices
diff --git a/docs/docfiles/dpctl_pyapi/SyclDevice.rst b/docs/docfiles/dpctl_pyapi/SyclDevice.rst
deleted file mode 100644
index 43a13ce2fe..0000000000
--- a/docs/docfiles/dpctl_pyapi/SyclDevice.rst
+++ /dev/null
@@ -1,84 +0,0 @@
-.. _SyclDevice_api:
-
-################
-dpctl.SyclDevice
-################
-
-.. currentmodule:: dpctl
-
-.. autoclass:: SyclDevice
-
- .. rubric:: Attributes:
-
- .. autoautosummary:: dpctl.SyclDevice
- :attributes:
-
- .. rubric:: Public methods:
-
- .. autoautosummary:: dpctl.SyclDevice
- :methods:
-
-Detail
-======
-
-Attributes
-----------
-
-.. autoattribute:: dpctl.SyclDevice.backend
-.. autoattribute:: dpctl.SyclDevice.default_selector_score
-.. autoattribute:: dpctl.SyclDevice.device_type
-.. autoattribute:: dpctl.SyclDevice.driver_version
-.. autoattribute:: dpctl.SyclDevice.filter_string
-.. autoattribute:: dpctl.SyclDevice.has_aspect_accelerator
-.. autoattribute:: dpctl.SyclDevice.has_aspect_cpu
-.. autoattribute:: dpctl.SyclDevice.has_aspect_custom
-.. autoattribute:: dpctl.SyclDevice.has_aspect_fp16
-.. autoattribute:: dpctl.SyclDevice.has_aspect_fp64
-.. autoattribute:: dpctl.SyclDevice.has_aspect_gpu
-.. autoattribute:: dpctl.SyclDevice.has_aspect_host
-.. autoattribute:: dpctl.SyclDevice.has_aspect_image
-.. autoattribute:: dpctl.SyclDevice.has_aspect_int64_base_atomics
-.. autoattribute:: dpctl.SyclDevice.has_aspect_int64_extended_atomics
-.. autoattribute:: dpctl.SyclDevice.has_aspect_online_compiler
-.. autoattribute:: dpctl.SyclDevice.has_aspect_online_linker
-.. autoattribute:: dpctl.SyclDevice.has_aspect_queue_profiling
-.. autoattribute:: dpctl.SyclDevice.has_aspect_usm_device_allocations
-.. autoattribute:: dpctl.SyclDevice.has_aspect_usm_host_allocations
-.. autoattribute:: dpctl.SyclDevice.has_aspect_usm_restricted_shared_allocations
-.. autoattribute:: dpctl.SyclDevice.has_aspect_usm_shared_allocations
-.. autoattribute:: dpctl.SyclDevice.has_aspect_usm_system_allocator
-.. autoattribute:: dpctl.SyclDevice.image_2d_max_height
-.. autoattribute:: dpctl.SyclDevice.image_2d_max_width
-.. autoattribute:: dpctl.SyclDevice.image_3d_max_depth
-.. autoattribute:: dpctl.SyclDevice.image_3d_max_height
-.. autoattribute:: dpctl.SyclDevice.image_3d_max_width
-.. autoattribute:: dpctl.SyclDevice.is_accelerator
-.. autoattribute:: dpctl.SyclDevice.is_cpu
-.. autoattribute:: dpctl.SyclDevice.is_gpu
-.. autoattribute:: dpctl.SyclDevice.is_host
-.. autoattribute:: dpctl.SyclDevice.max_compute_units
-.. autoattribute:: dpctl.SyclDevice.max_num_sub_groups
-.. autoattribute:: dpctl.SyclDevice.max_read_image_args
-.. autoattribute:: dpctl.SyclDevice.max_work_group_size
-.. autoattribute:: dpctl.SyclDevice.max_work_item_dims
-.. autoattribute:: dpctl.SyclDevice.max_work_item_sizes
-.. autoattribute:: dpctl.SyclDevice.max_write_image_args
-.. autoattribute:: dpctl.SyclDevice.name
-.. autoattribute:: dpctl.SyclDevice.parent_device
-.. autoattribute:: dpctl.SyclDevice.preferred_vector_width_char
-.. autoattribute:: dpctl.SyclDevice.preferred_vector_width_double
-.. autoattribute:: dpctl.SyclDevice.preferred_vector_width_float
-.. autoattribute:: dpctl.SyclDevice.preferred_vector_width_half
-.. autoattribute:: dpctl.SyclDevice.preferred_vector_width_int
-.. autoattribute:: dpctl.SyclDevice.preferred_vector_width_long
-.. autoattribute:: dpctl.SyclDevice.preferred_vector_width_short
-.. autoattribute:: dpctl.SyclDevice.sub_group_independent_forward_progress
-.. autoattribute:: dpctl.SyclDevice.vendor
-
-Public methods
---------------
-
-.. autofunction:: dpctl.SyclDevice.addressof_ref
-.. autofunction:: dpctl.SyclDevice.create_sub_devices
-.. autofunction:: dpctl.SyclDevice.get_filter_string
-.. autofunction:: dpctl.SyclDevice.print_device_info
diff --git a/docs/docfiles/dpctl_pyapi/SyclEvent.rst b/docs/docfiles/dpctl_pyapi/SyclEvent.rst
deleted file mode 100644
index bf6dbdef62..0000000000
--- a/docs/docfiles/dpctl_pyapi/SyclEvent.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-.. _SyclEvent_api:
-
-###############
-dpctl.SyclEvent
-###############
-
-.. currentmodule:: dpctl
-
-.. autoclass:: dpctl.SyclEvent
-
- .. rubric:: Public methods:
-
- .. autoautosummary:: dpctl.SyclEvent
- :methods:
-
-Detail
-======
-
-.. autofunction:: dpctl.SyclEvent.addressof_ref
-.. autofunction:: dpctl.SyclEvent.wait
diff --git a/docs/docfiles/dpctl_pyapi/SyclPlatform.rst b/docs/docfiles/dpctl_pyapi/SyclPlatform.rst
deleted file mode 100644
index fa5dafe93d..0000000000
--- a/docs/docfiles/dpctl_pyapi/SyclPlatform.rst
+++ /dev/null
@@ -1,35 +0,0 @@
-.. _SyclPlatform_api:
-
-##################
-dpctl.SyclPlatform
-##################
-
-.. currentmodule:: dpctl
-
-.. autoclass:: dpctl.SyclPlatform
-
- .. rubric:: Attributes:
-
- .. autoautosummary:: dpctl.SyclPlatform
- :attributes:
-
- .. rubric:: Public methods:
-
- .. autoautosummary:: dpctl.SyclPlatform
- :methods:
-
-Detail
-======
-
-Attributes
-----------
-
-.. autoattribute:: dpctl.SyclPlatform.backend
-.. autoattribute:: dpctl.SyclPlatform.name
-.. autoattribute:: dpctl.SyclPlatform.vendor
-.. autoattribute:: dpctl.SyclPlatform.version
-
-Public methods
---------------
-
-.. autofunction:: dpctl.SyclPlatform.print_platform_info
diff --git a/docs/docfiles/dpctl_pyapi/SyclQueue.rst b/docs/docfiles/dpctl_pyapi/SyclQueue.rst
deleted file mode 100644
index a48018d6e1..0000000000
--- a/docs/docfiles/dpctl_pyapi/SyclQueue.rst
+++ /dev/null
@@ -1,53 +0,0 @@
-.. _SyclQueue_api:
-
-###############
-dpctl.SyclQueue
-###############
-
-.. currentmodule:: dpctl
-
-.. autoclass:: dpctl.SyclQueue
-
- .. rubric:: Attributes:
-
- .. autoautosummary:: dpctl.SyclQueue
- :attributes:
-
- .. rubric:: Private methods:
-
- .. autoautosummary:: dpctl.SyclQueue
- :private_methods:
-
- .. rubric:: Public methods:
-
- .. autoautosummary:: dpctl.SyclQueue
- :methods:
-
-Detail
-======
-
-Attributes
-----------
-
-.. autoattribute:: dpctl.SyclQueue.is_in_order
-.. autoattribute:: dpctl.SyclQueue.sycl_context
-.. autoattribute:: dpctl.SyclQueue.sycl_device
-
-Private methods
----------------
-
-.. autofunction:: dpctl.SyclQueue._get_capsule
-
-
-Public methods
---------------
-
-.. autofunction:: dpctl.SyclQueue.addressof_ref
-.. autofunction:: dpctl.SyclQueue.get_sycl_backend
-.. autofunction:: dpctl.SyclQueue.get_sycl_context
-.. autofunction:: dpctl.SyclQueue.get_sycl_device
-.. autofunction:: dpctl.SyclQueue.mem_advise
-.. autofunction:: dpctl.SyclQueue.memcpy
-.. autofunction:: dpctl.SyclQueue.prefetch
-.. autofunction:: dpctl.SyclQueue.submit
-.. autofunction:: dpctl.SyclQueue.wait
diff --git a/docs/docfiles/intro.rst b/docs/docfiles/intro.rst
index 115749b3da..892e66af72 100644
--- a/docs/docfiles/intro.rst
+++ b/docs/docfiles/intro.rst
@@ -2,10 +2,9 @@ Welcome to Data-parallel Control (dpctl)'s documentation!
=========================================================
The data-parallel control (dpctl) library provides C and Python bindings for
-`SYCL 2020 `_.
-The SYCL 2020 features supported by dpctl are limited to those included by
-Intel's DPCPP compiler and specifically cover the SYCL runtime classes described
-in `Section 4.6 `_
+:sycl_spec_2020:`SYCL 2020 <>`. The SYCL 2020 features supported by dpctl are
+limited to those included by Intel's DPCPP compiler and specifically cover the
+SYCL runtime classes described in :sycl_runtime_classes:`Section 4.6 <>`
of the SYCL 2020 specification. Apart from the bindings for these runtime
classes, dpctl includes bindings for SYCL USM memory allocators and
deallocators. Dpctl's Python API provides classes that implement
diff --git a/docs/docfiles/urls.json b/docs/docfiles/urls.json
new file mode 100644
index 0000000000..3e0906fc41
--- /dev/null
+++ b/docs/docfiles/urls.json
@@ -0,0 +1,16 @@
+{
+ "dpcpp_envar": "https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md",
+ "numa_domain": "https://en.wikipedia.org/wiki/Non-uniform_memory_access",
+ "oneapi": "https://www.oneapi.io/",
+ "oneapi_filter_selection": "https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/FilterSelector/FilterSelector.adoc",
+ "sycl_aspects": "https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#table.device.aspect",
+ "sycl_context": "https://sycl.readthedocs.io/en/latest/iface/context.html",
+ "sycl_device": "https://sycl.readthedocs.io/en/latest/iface/device.html",
+ "sycl_device_info": "https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_device_information_descriptors",
+ "sycl_device_selector": "https://sycl.readthedocs.io/en/latest/iface/device-selector.html",
+ "sycl_event": "https://sycl.readthedocs.io/en/latest/iface/event.html",
+ "sycl_platform": "https://sycl.readthedocs.io/en/latest/iface/platform.html",
+ "sycl_queue": "https://sycl.readthedocs.io/en/latest/iface/queue.html",
+ "sycl_runtime_classes": "https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_sycl_runtime_classes",
+ "sycl_spec_2020": "https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html"
+}
diff --git a/docs/docfiles/user_guides/QuickStart.rst b/docs/docfiles/user_guides/QuickStart.rst
index 7b377a94b6..94b4953ca3 100644
--- a/docs/docfiles/user_guides/QuickStart.rst
+++ b/docs/docfiles/user_guides/QuickStart.rst
@@ -4,14 +4,8 @@
Quick Start Guide
#################
-
-.. contents:: Table of contents
- :local:
- :backlinks: none
- :depth: 3
-
Installing from oneAPI
-----------------------
+======================
Dpctl is available as part of the oneAPI Intel Distribution of Python (IDP).
Please follow `oneAPI installation guide`_ to install oneAPI. In this quick
@@ -50,13 +44,13 @@ On Windows
`GPU driver installation guide`_.
Install Wheel package from Pypi
--------------------------------
+===============================
Dpctl can also be istalled from Pypi.
.. code-block:: bash
- python -m pip install --index-url https://pypi.anaconda.org/intel/simple -extra-index-url https://pypi.org/simple dpctl
+ python -m pip install --index-url https://pypi.anaconda.org/intel/simple dpctl
.. note::
@@ -79,7 +73,7 @@ On Windows
set PATH=\bin;\Library\bin;%PATH%
Building from source
---------------------
+====================
To build dpctl from source, we need dpcpp and GPU drivers (and optionally CPU
OpenCL drivers). It is preferable to use the dpcpp compiler packaged as part of
@@ -87,12 +81,13 @@ oneAPI. However, it is possible to use a custom build of dpcpp to build dpctl,
especially if you want to enable CUDA support.
Building using oneAPI dpcpp
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------
As before, oneAPI and graphics drivers should be installed on the system prior
to proceeding further.
-**Activate oneAPI as follows**
+Activate oneAPI as follows
+~~~~~~~~~~~~~~~~~~~~~~~~~~
On Linux
@@ -106,7 +101,8 @@ On Windows
call "%ONEAPI_ROOT%\setvars.bat"
-**Build and install using conda-build**
+Build and install using conda-build
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The conda-recipe included with the sources can be used to build the dpctl
package. The advantage of this approach is that all dependencies are pulled in
@@ -136,52 +132,67 @@ After building the conda package you may install it by executing:
You could face issues with conda-build version 3.20. Use conda-build
3.18 instead.
-**Build and Install with setuptools**
-To build using Python ``setuptools``, the following packages should be
+Build and install with scikit-build
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To build using Python ``setuptools`` and ``scikit-build``, the following Python packages should be
installed:
- ``cython``
- ``numpy``
- ``cmake``
- - ``ninja`` (only on Windows)
+ - ``scikit-build``
+ - ``ninja``
- ``gtest`` (optional to run C API tests)
+ - ``gmock`` (optional to run C API tests)
- ``pytest`` (optional to run Python API tests)
-Once the prerequisites are installed, building using ``setuptools`` involves The
-usual steps
+Once the prerequisites are installed, building using ``scikit-build`` involves the usual steps, to build and install:
+
+.. code-block:: bash
+
+ python setup.py install -- -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx -DDPCTL_ENABLE_LO_PROGRAM_CREATION=ON
-to build and install
+, and to develop:
.. code-block:: bash
- python setup.py install
+ python setup.py develop -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx -DDPCTL_ENABLE_LO_PROGRAM_CREATION=ON
-, and to develop.
+On Windows, use ``icx`` for both C and CXX compilers.
+
+Developing on Linux can also be done using driver script:
.. code-block:: bash
- python setup.py develop
+ python scripts/build_locally.py --oneapi
+
Building using custom dpcpp
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------
It is possible to build dpctl from source using .. _DPC++ toolchain: https://github.com/intel/llvm/blob/sycl/sycl/doc/GetStartedGuide.md
instead of the DPC++ compiler that comes with oneAPI. One reason for doing this
may be to enable support for CUDA devices.
-Following steps in :ref:`Build and Install with setuptools` use command line
-option :code:`--sycl-compiler-prefix`, for example:
+Following steps in `Build and install with scikit-build`_ use command line option to set relevant cmake variables, for example:
.. code-block:: bash
- python setup.py develop --sycl-compiler-prefix=${DPCPP_ROOT}/llvm/build
+ python setup.py develop -- -G Ninja -DCMAKE_C_COMPILER:PATH=clang -DCMAKE_CXX_COMPILER:PATH=clang++ -DDPCTL_ENABLE_LO_PROGRAM_CREATION=ONE -DDPCTL_DPCPP_HOME_DIR=${DPCPP_ROOT}/llvm/build -DDPCTL_DPCPP_FROM_ONEAPI=OFF
+
+Alterantively, the driver script can be used
+
+.. code-block:: bash
+
+ python scripts/build_locally.py --c-compiler=clang --cxx-compiler=clang++ --compiler-root=${DPCPP_ROOT}/llvm/build
Available options and their descriptions can be retrieved using option
:code:`--help`.
Using dpctl
------------
+===========
Dpctl requires a DPC++ runtime. When dpctl is installed via conda then it uses
the DPC++ runtime from ``dpcpp_cpp_rt`` package that is part of IDP. When using
@@ -190,10 +201,10 @@ the system. The easiest way to setup a DPC++ runtime will be by activating
oneAPI.
Running examples and tests
---------------------------
+==========================
Running the examples
-~~~~~~~~~~~~~~~~~~~~
+--------------------
After setting up dpctl you can try out the Python examples as follows:
@@ -213,7 +224,7 @@ located under *examples/cython*. Each example in the folder can be built using
examples.
Running the Python tests
-~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------
The dpctl Python test suite can be executed as follows:
@@ -222,14 +233,13 @@ The dpctl Python test suite can be executed as follows:
pytest --pyargs dpctl
-Building the C API shared library
----------------------------------
+Building the DPCTLSyclInterface library
+=======================================
-The dpctl C API is a shared library called libDPCTLSyclInterface and is built
-together when build the Python package. However, it is possible to only build
-the C API as a standalone library. To do so, you will need ``cmake``,
+The libDPCTLSyclInterface is a shared library used by the Python package.
+To build the library you will need ``DPC++`` toolchain, ``cmake``,
``ninja`` or ``make``, and optionally ``gtest 1.10`` if you wish to run the
-C API test suite.
+test suite.
For example, on Linux the following script can be used to build the C oneAPI
library.
diff --git a/docs/docfiles/user_guides/UserManual.rst b/docs/docfiles/user_guides/UserManual.rst
new file mode 100644
index 0000000000..9b955f1b0c
--- /dev/null
+++ b/docs/docfiles/user_guides/UserManual.rst
@@ -0,0 +1,10 @@
+.. _user_manual:
+
+###########
+User Manual
+###########
+
+.. toctree::
+ :maxdepth: 3
+
+ manual/dpctl/intro
diff --git a/docs/docfiles/user_guides/manual/dpctl/basic_concepts.rst b/docs/docfiles/user_guides/manual/dpctl/basic_concepts.rst
new file mode 100644
index 0000000000..fbda045899
--- /dev/null
+++ b/docs/docfiles/user_guides/manual/dpctl/basic_concepts.rst
@@ -0,0 +1,75 @@
+.. _basic_concepts:
+
+Basic Concepts
+==============
+
+The section introduces the basic concepts for XPU management used by dpctl.
+As dpctl is based on SYCL the concepts should be familiar to users with prior
+experience with SYCL. However, users of dpctl need not have any prior experience
+with SYCL and the concepts presented here should be self-sufficient. We do not
+go into all the SYCL-level details here and if needed readers should refer to a
+more topical SYCL reference such as the :sycl_spec_2020:`SYCL 2020 spec <>`.
+
+* **Heterogeneous computing**
+ Refers to using multiple devices in a program.
+
+* **Host**
+ Every program starts by running on a host, and most of the lines of code in
+ a program, in particular lines of code implementing the Python interpreter
+ itself, are usually for the host. Hosts are customarily CPUs.
+
+* **Device**
+ A device is an XPU connected to a host that is programmable with a specific
+ device driver. Different types of devices can have different architectures
+ (CPUs, GPUs, FPGA, ASICs, DSP), but are programmable using the same
+ :oneapi:`oneAPI <>` programming model.
+
+* **Platform**
+ A device driver installed on the system is termed as a platform. As multiple
+ devices of the same type can share the same device driver, a platform may
+ contain multiple devices. Note that the same physical hardware (say, a GPU)
+ may be reflected as two separate devices if they can be programmed by more
+ than one platform, *e.g.*, the same GPU hardware can be listed as an
+ OpenCL GPU device and a Level-Zero GPU device.
+
+* **Context**
+ A context holds the run-time information needed to operate on a device or a
+ group of devices from the same platform. Contexts are relatively expensive
+ to create and should be reused as much as possible.
+
+* **Queue**
+ A queue is needed to schedule execution of any computation, or data
+ copying on the device. Queue construction requires specifying a device
+ and a context targeting that device as well as additional properties,
+ such as whether profiling information should be collected or whether submitted
+ tasks are executed in the order in which they were submitted.
+
+* **Event**
+ An event holds information related to computation/data movement operation
+ scheduled for execution on a queue, such as its execution status as well
+ as profiling information if the queue the task was submitted to allowed
+ for collection of such information. Events can be used to specify task
+ dependencies as well as to synchronize host and devices.
+
+* **USM**
+ Unified Shared Memory (USM) refers to pointer based device memory management.
+ USM allocations are bound to context. In other words, a pointer representing
+ USM allocation can be unambiguously mapped to the data it represents only
+ if the associated context is known. USM allocations are accessible by
+ computational kernels that are executed on a device, provided that the
+ allocation is bound to the same context that was used to construct the queue
+ where the kernel was scheduled for execution.
+
+ Depending on the capability of the device, USM allocations can be a "device"
+ allocation, a "shared" allocation, or a "host" allocation. A "device"
+ allocation is not accessible from host, while "shared" or "host" allocations
+ are. "Host" allocation refers to an allocation in host memory that is
+ accessible from a device.
+
+ "Shared" allocations are accessible by both host and device. Runtime manages
+ synchronization of host's and device's view into shared allocations. Initial
+ placement of the shared allocations is not defined.
+
+* **Backend**
+ Refers to an implementation of :oneapi:`oneAPI <>` programming model exposed
+ by the underlying runtime.
diff --git a/docs/docfiles/user_guides/manual/dpctl/device_selection.rst b/docs/docfiles/user_guides/manual/dpctl/device_selection.rst
new file mode 100644
index 0000000000..b68f930834
--- /dev/null
+++ b/docs/docfiles/user_guides/manual/dpctl/device_selection.rst
@@ -0,0 +1,152 @@
+.. _device_selection:
+
+################
+Device Selection
+################
+
+Device selection refers to programmatically selecting a single device from
+the set of :ref:`devices ` available on the system.
+
+Selecting a Specific Type of Device
+-----------------------------------
+
+If a user needs to select a specific type of device such as a GPU, they can
+directly use one of the helper functions included inside dpctl. Dpctl includes
+:ref:`helper functions ` for selecting
+a ``host``, a ``cpu``, a ``gpu``, an ``accelerator``, or the ``default`` device.
+These functions are analogous to SYCL's built-in
+:sycl_device_selector:`sycl::device_selector <>` classes. The scoring and
+selection of a specific device when multiple devices of the same type are
+available on a system is deferred to the underlying SYCL runtime.
+
+The example :ref:`fig-gpu-device-selection` shows the usage of the
+:func:`dpctl.select_gpu_device()` device selection function. In case multiple
+GPU devices are available, only one is returned based on the underlying scoring
+logic inside the SYCL runtime. If the selection function was unable to select a
+device a ``ValueError`` is raised.
+
+.. _fig-gpu-device-selection:
+
+.. literalinclude:: ../../../../../examples/python/device_selection.py
+ :language: python
+ :lines: 20-21, 38-52
+ :caption: Selecting a GPU Device
+ :linenos:
+
+A possible output for the example :ref:`fig-gpu-device-selection` may be:
+
+.. program-output:: python ../examples/python/device_selection.py -r create_gpu_device
+
+.. _sec-filter-selection:
+
+Selecting a Device Using a Filter String
+----------------------------------------
+
+Along with using the default device selection functions, a more explicit way of
+device selection involves the use of *filter strings* (refer
+:oneapi_filter_selection:`oneAPI filter selection extension <>`). The example
+:ref:`fig-gpu-device-selection` also demonstrates the use of a filter string
+to create a GPU device directly. Using a filter string allows much more
+fine-grained control for selecting a device. The following example
+:ref:`fig-filter-selection` demonstrates usages of device selection using filter
+strings.
+
+.. _fig-filter-selection:
+
+.. literalinclude:: ../../../../../examples/python/filter_selection.py
+ :language: python
+ :lines: 20-21, 23-53
+ :caption: Device Creation With Filter Strings
+ :linenos:
+
+A possible output for the example :ref:`fig-filter-selection` may be:
+
+.. program-output:: python ../examples/python/filter_selection.py -r select_using_filter
+
+
+It is also possible to pass a list of devices using a filter string. The
+example :ref:`fig-adv-device-selection` demonstrates such a use case. The
+filter string ``gpu,cpu`` implies that a GPU should be selected if available,
+else a CPU device should be selected.
+
+.. _fig-adv-device-selection:
+
+.. literalinclude:: ../../../../../examples/python/device_selection.py
+ :language: python
+ :lines: 20-21, 55-67
+ :caption: Selecting a GPU Device if Available
+ :linenos:
+
+A possible output for the example :ref:`fig-adv-device-selection` may be:
+
+.. program-output:: python ../examples/python/device_selection.py -r create_gpu_device_if_present
+
+.. Note::
+ A **filter string** is a three-tuple that may specify the *backend*,
+ *device type*, and *device number* as a colon (:) separated string. The
+ backend specifies the type of device driver and can have a value such as
+ *host*, *opencl*, *level-zero*, or *cuda*. The device type can be *host*,
+ *gpu*, *cpu*, *accelerator*. And, the device number is a numeric value
+ specifying the ordinality of the device in the listing of devices as
+ determined by the SYCL runtime. Each of the backend, device type, and device
+ number value is optional, but at least one of them should be provided,
+ *i.e.*, ``opencl:gpu:0``, ``gpu:0``, ``gpu``, ``0``, and ``opencl:0`` are
+ all valid filter strings.
+
+ The device listing including the device number value remain stable for
+ a given system unless the driver configuration is changed or the SYCL
+ runtime setting is changed using the ``SYCL_DEVICE_FILTER`` environment
+ variable. Please refer
+ :oneapi_filter_selection:`oneAPI filter selection extension <>` for more
+ detail.
+
+Advanced Device Selection
+-------------------------
+
+Till now we have discussed device selection using methods that defer the
+selection logic to the SYCL runtime. However, real-world applications may
+require more precise control over device selection. Dpctl offers a way for users
+to accomplish more advanced device selection.
+
+.. _fig-custom-device-selection:
+
+.. literalinclude:: ../../../../../examples/python/device_selection.py
+ :language: python
+ :lines: 20-21, 70-91
+ :caption: Custom Device Selection
+ :linenos:
+
+The example :ref:`fig-custom-device-selection` shows a way of selecting a device
+based off a specific hardware property. The :func:`dpctl.get_devices()` returns
+a list of all *root* devices on the system, out of that list the devices that
+support half-precision floating-point arithmetic are selected. Finally, a
+"score" computed using the SYCL runtime's default device scoring logic that is
+stored in :attr:`dpctl.SyclDevice.default_selector_score` is used to select a
+single device. Refer the documentation of :class:`dpctl.SyclDevice` for a list
+of hardware properties that may be used for device selection.
+
+.. _RootDevice:
+
+.. Note::
+ A **root** device implies an unpartitioned device. A root device can be
+ partitioned into two or more :ref:`sub-devices `
+ based on various criteria. For example, a CPU device with multiple NUMA
+ domains may be partitioned into multiple sub-devices, each representing a
+ sub-device.
+
+A convenience function :func:`dpctl.select_device_with_aspects()` is available
+that makes it easy to select a device based on a set of specific aspects. The
+example :ref:`fig-select-device-with-aspects` selects a device that
+supports double precision arithmetic and SYCL USM shared memory allocation.
+
+.. _fig-select-device-with-aspects:
+
+.. literalinclude:: ../../../../../examples/python/device_selection.py
+ :language: python
+ :lines: 20-21, 94-103
+ :caption: Device Selection Using Aspects
+ :linenos:
+
+A possible output for the example :ref:`fig-select-device-with-aspects` may be:
+
+.. program-output:: python ../examples/python/device_selection.py -r create_device_with_aspects
diff --git a/docs/docfiles/user_guides/manual/dpctl/devices.rst b/docs/docfiles/user_guides/manual/dpctl/devices.rst
new file mode 100644
index 0000000000..ca2529b0f4
--- /dev/null
+++ b/docs/docfiles/user_guides/manual/dpctl/devices.rst
@@ -0,0 +1,141 @@
+.. _devices:
+
+######
+Device
+######
+
+A device is an abstract representation for an XPU. The :class:`dpctl.SyclDevice`
+class represents a device and is a wrapper over the
+:sycl_device:`sycl::device <>` SYCL runtime class.
+
+Creating Devices
+----------------
+
+We touched upon device creation under the :ref:`device_selection` section. the
+:class:`dpctl.SyclDevice` class includes a default constructor to create a
+"default" device that is selected by the SYCL runtime. Users can also use
+explicit :ref:`filter selector strings ` to create a
+device.
+
+Listing Devices
+---------------
+
+:py:mod:`dpctl` provides the :func:`dpctl.get_devices` utility function to list
+the available devices on a user's system. The list of devices returned depends
+on available hardware, installed drivers, as well as by
+:dpcpp_envar:`environment variables <>` influencing SYCL runtime
+such as ``SYCL_DEVICE_FILTER`` or ``SYCL_DEVICE_ALLOWLIST``.
+
+.. _fig-listing-devices:
+
+.. literalinclude:: ../../../../../examples/python/device_selection.py
+ :language: python
+ :lines: 20-22, 107-131
+ :caption: Listing Available Devices
+ :linenos:
+
+A possible output for the example :ref:`fig-listing-devices` may be:
+
+.. program-output:: python ../examples/python/device_selection.py -r list_devices
+
+The example :ref:`fig-listing-devices` demonstrates the usage of
+:func:`dpctl.get_devices`. The list can be filtered based on
+:class:`dpctl.backend` and :class:`dpctl.device_type`. The 0-based ordinal
+position of a device in the output of :func:`dpctl.get_devices` corresponds to
+the ``device id`` value in the filter selector string corresponding to the
+device. For example, ``"opencl:cpu:0"`` refers to the first device in the list
+returned by ``dpctl.get_devices(backend="opencl", device_type="cpu")``. If such
+a list is empty, device construction call ``dpctl.SyclDevice("opencl:gpu:0")``
+will raise a ``ValueError``.
+
+.. Note::
+
+ Unless the system configuration changes, the list of devices returned by
+ :func:`dpctl.get_devices` and the relative ordering of devices in the list
+ is stable for every call to the function, even across different runs of an
+ application.
+
+Device Aspects and Information Descriptors
+------------------------------------------
+
+A device can have various *aspects* and *information descriptors* that describe
+its hardware characteristics. :sycl_aspects:`Aspects <>` are boolean
+characteristics of the device, whereas
+:sycl_device_info:`information descriptors <>` are non-boolean characteristics
+that provide more verbose information about the device.
+:class:`dpctl.SyclDevice` exposes various Python properties that describe a
+device's aspects and information descriptors. For example, the property
+``has_aspect_fp16`` returns a boolean expression indicating whether a
+particular device has aspect ``"fp16"``, indicating whether it supports the
+IEEE-754 half-precision floating point type. Whereas, the ``name`` property is
+an information descriptor that returns a string with the name of the device.
+
+.. _fig-available-properties:
+
+.. code-block:: Python
+ :caption: Listing Available Device Aspects and Information Descriptors
+ :linenos:
+
+ import dpctl
+ import inspect
+
+ def get_properties(cls, prop_name):
+ "Get name of properties of a class known to have `prop_name`"
+ known_property_t = type(getattr(cls, prop_name))
+ return [n for n, o in inspect.getmembers(cls) if isinstance(o, known_property_t)]
+
+ print(len(get_properties(dpctl.SyclDevice, "name")))
+ # Output: 52
+
+The example :ref:`fig-available-properties` demonstrates a programmatic way of
+listing all the aspects and information descriptor properties in
+:class:`dpctl.SyclDevice`.
+
+.. _sec-devices-sub-devices:
+
+Sub-devices
+-----------
+
+It is possible for a device to be partitioned into "sub-devices". A sub-device
+represents a sub-set of the computational units within a device that are grouped
+based on some hardware criteria. For example, a two socket CPU device may be
+partitioned into two sub-devices, where each sub-device represents a separate
+:numa_domain:`NUMA domain <>`. Depending on the hardware characteristics and
+the capabilities of the SYCL runtime, a sub-device may be partitioned further.
+
+For devices that support partitioning, the
+:func:`dpctl.SyclDevice.create_sub_devices` can be used to create a list of
+sub-devices. The requested partitioning scheme is indicated with use of the
+required ``partition`` keyword. Several types of partitioning schemes are
+available:
+
+* **Count partitioning**
+ The partitioning scheme is specified as a list of positive integers
+ indicating a partitioning with each sub-device having the requested number
+ of parallel compute units, or as a single positive integer indicating
+ equal-counts partition.
+
+* **Affinity partitioning**
+ The partitioning scheme is specified as a string indicating an affinity
+ domain used to create sub-devices that sharing a common resource, such as
+ certain hardware cache levels.
+
+.. Note::
+
+ Use ``partition="next_partitionable"`` to partition along the next level of
+ architectural hierarchy.
+
+The following example shows an affinity-based partitioning of a CPU device
+into sub-devices based on the available NUMA domains.
+
+.. _fig-partition-cpu:
+
+.. literalinclude:: ../../../../../examples/python/subdevices.py
+ :language: python
+ :lines: 17, 62-76
+ :caption: Partitioning a CPU device
+ :linenos:
+
+A possible output for the example :ref:`fig-partition-cpu` may be:
+
+.. program-output:: python ../examples/python/subdevices.py -r subdivide_by_affinity
diff --git a/docs/docfiles/user_guides/manual/dpctl/intro.rst b/docs/docfiles/user_guides/manual/dpctl/intro.rst
new file mode 100644
index 0000000000..327178919e
--- /dev/null
+++ b/docs/docfiles/user_guides/manual/dpctl/intro.rst
@@ -0,0 +1,38 @@
+.. _intro:
+
+dpctl
+-----
+
+The Data Parallel Control (dpctl) package provides a Python runtime to access a
+data-parallel computing resource or *XPU* from another Python application or
+library, alleviating the need for the other Python packages to develop such a
+runtime themselves. The term XPU denotes a diverse range of compute
+architectures such as a CPU, GPU, FPGA, *etc.*, available to programmers on a
+modern heterogeneous system.
+
+The dpctl runtime is built on top of the C++ SYCL standard and is designed to be
+both vendor and architecture agnostic. If the underlying SYCL runtime supports
+a type of architecture, the dpctl runtime will allow accessing that architecture
+from Python.
+
+In its current form, dpctl relies on certain DPC++ extensions of SYCL standard.
+Moreover, the binary distribution of dpctl uses the proprietary Intel(R) oneAPI
+DPC++ runtime bundled as part of oneAPI and supports Intel XPU devices only.
+However, dpctl is compatible with the runtime of open-source DPC++ SYCL bundle
+that can be compiled to support a wide range of architectures including CUDA,
+AMD ROC, and HIP.
+
+The user guide introduces the core features of dpctl and the underlying
+concepts. The guide is meant primarily for users of the Python package. Library
+and native extension developers should refer to the programmer's guide.
+
+Table of contents
++++++++++++++++++
+
+.. toctree::
+ :maxdepth: 2
+
+ basic_concepts
+ device_selection
+ platforms
+ devices
diff --git a/docs/docfiles/user_guides/manual/dpctl/platforms.rst b/docs/docfiles/user_guides/manual/dpctl/platforms.rst
new file mode 100644
index 0000000000..bf9c0ed981
--- /dev/null
+++ b/docs/docfiles/user_guides/manual/dpctl/platforms.rst
@@ -0,0 +1,35 @@
+.. _querying_platforms:
+
+########
+Platform
+########
+
+A platform abstracts a device driver for one or more XPU that is connected to
+a host. The :class:`dpctl.SyclPlatform` class represents a platform and
+abstracts the :sycl_platform:`sycl::platform <>` SYCL runtime class.
+
+Listing Available Platforms
+---------------------------
+
+The platforms available on a system can be queried using the
+:func:`dpctl.lsplatform` function. In addition, as illustrated in the following
+example it is possible to print out metadata about a platform.
+
+.. literalinclude:: ../../../../../examples/python/lsplatform.py
+ :language: python
+ :lines: 20-41
+ :linenos:
+
+The example can be executed as follows:
+
+.. code-block:: bash
+
+ python dpctl/examples/python/lsplatform.py -r all
+
+The possible output for the example may be:
+
+.. program-output:: python ../examples/python/lsplatform.py -r all
+
+.. Note::
+ The verbosity for the output can be controlled using the ``verbosity``
+ keyword argument. Refer :func:`dpctl.lsplatform`.
diff --git a/docs/doxyrest-config.lua.in b/docs/doxyrest-config.lua.in
index 857260ddeb..42de9e7797 100644
--- a/docs/doxyrest-config.lua.in
+++ b/docs/doxyrest-config.lua.in
@@ -82,7 +82,7 @@ EXTRA_PAGE_LIST = {}
--! is not set (otherwise, the title of intro file will be used).
--!
-INDEX_TITLE = "dpctl C API"
+INDEX_TITLE = "libsyclinterface"
--!
--! File with project introduction (reStructuredText). When non-nil, this file
diff --git a/docs/extlinks_gen.py b/docs/extlinks_gen.py
new file mode 100644
index 0000000000..caa45a9e94
--- /dev/null
+++ b/docs/extlinks_gen.py
@@ -0,0 +1,36 @@
+# Data Parallel Control (dpctl)
+#
+# Copyright 2020-2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+
+def create_extlinks():
+ """Reads a JSON file to create a dictionary of urls in the format supported
+ by the sphinx.ect.extlinks extension.
+
+ Returns:
+ dict: A dictionary that is understood by the extlinks Sphinx extension.
+
+ """
+ extlinks = {}
+
+ with open("docfiles/urls.json") as urls_json:
+ urls = json.load(urls_json)
+ for url in urls:
+ url_value = urls[url]
+ extlinks[url] = (url_value + "%s", None)
+
+ return extlinks
diff --git a/docs/generate_rst.py b/docs/generate_rst.py
new file mode 100644
index 0000000000..1c80f4bc4a
--- /dev/null
+++ b/docs/generate_rst.py
@@ -0,0 +1,735 @@
+# Data Parallel Control (dpctl)
+#
+# Copyright 2020-2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""" The module provides helper functions to generate API documentation for
+ dpctl and its members.
+"""
+
+import argparse
+import enum
+import inspect
+import io
+import os
+import sys
+from importlib import import_module
+from pkgutil import iter_modules
+
+import dpctl
+
+# known property in Cython extension class
+_getset_descriptor = type(dpctl.SyclDevice.name)
+# known method (defined using def in Cython extension class)
+_cython_method_type = type(dpctl.SyclDevice.get_filter_string)
+# known builtin method (defined using cpdef in Cython extension class)
+_cython_builtin_function_or_method_type = type(dpctl.SyclQueue.mro)
+
+# Dictionary mapping internal module names to a readable string. so that we
+# can use the module name to logically group functions.
+function_groups = {
+ "dpctl._device_selection": "Device Selection Functions",
+ "dpctl._sycl_queue_manager": "Queue Management Functions",
+ "dpctl.tensor._ctors": "Array Construction",
+ "dpctl.tensor._copy_utils": "Array Construction",
+ "dpctl.tensor._dlpack": "Array Construction",
+ "dpctl.tensor._reshape": "Array Manipulation",
+ "dpctl.memory._memory": "Functions",
+ "dpctl.program._program": "Functions",
+ "dpctl.utils._compute_follows_data": "Functions",
+}
+
+
+def _get_module(module):
+ """Get the Python object for a module from a string providing the module's
+ name.
+
+ Args:
+ module ([str]): The name of a module to be searched in ``sys.modules``.
+
+ Raises:
+ ValueError: If no corresponding module object was found for the string
+ module name.
+
+ Returns:
+ [object]: A Python object representing a module.
+ """
+ try:
+ return sys.modules[module]
+ except KeyError:
+ raise ValueError(
+ module + "is not a valid module name or it is not loaded"
+ )
+
+
+def _write_line(output, s):
+ """Write a line to specified out stream.
+
+ Args:
+ output (``io.StringIO``): The string stream to be written.
+ s (str): The string that is to be written out as a line.
+ """
+ output.write(s)
+ output.write("\n")
+
+
+def _write_empty_line(output):
+ """[summary]
+
+ Args:
+ output ([type]): [description]
+ """
+ _write_line(output, "")
+
+
+def _write_marquee(o, s):
+ """[summary]
+
+ Args:
+ o ([type]): [description]
+ s ([type]): [description]
+ """
+ marquee = "#" * len(s)
+ _write_line(o, marquee)
+ _write_line(o, s)
+ _write_line(o, marquee)
+
+
+def _write_underlined(o, s, c):
+ """[summary]
+
+ Args:
+ o ([type]): [description]
+ s ([type]): [description]
+ c ([type]): [description]
+ """
+ _write_line(o, s)
+ _write_line(o, c * len(s))
+
+
+def _write_hidden_toc(o, list_of_obj_names, prefix_str="", suffix_str=""):
+ """[summary]
+
+ Args:
+ o ([type]): [description]
+ list_of_objs ([type]): [description]
+ prefix_str ([type]): [description]
+ suffix_str ([type]): [description]
+ """
+ if not list_of_obj_names:
+ return
+ _write_line(o, ".. toctree::")
+ _write_line(o, " :hidden:")
+ _write_empty_line(o)
+ for obj in list_of_obj_names:
+ _write_line(o, " " + prefix_str + obj + suffix_str)
+ _write_empty_line(o)
+
+
+def _get_public_class_name(cls):
+ """[summary]
+
+ Raises:
+ TypeError: [description]
+
+ Returns:
+ [type]: [description]
+ """
+ if not inspect.isclass(cls):
+ raise TypeError("Expecting class, got {}".format(type(cls)))
+ modl = cls.__module__
+ if modl:
+ modl = ".".join(
+ [comp for comp in modl.split(".") if not comp.startswith("_")]
+ )
+ if modl:
+ res = ".".join([modl, cls.__qualname__])
+ else:
+ res = cls.__qualname__
+ return res
+
+
+def _is_class_property(o):
+ """[summary]
+
+ Args:
+ o ([type]): [description]
+
+ Returns:
+ [type]: [description]
+ """
+ return isinstance(o, property) or (type(o) == _getset_descriptor)
+
+
+def _is_class_method(o):
+ """[summary]
+
+ Args:
+ o ([type]): [description]
+
+ Returns:
+ [type]: [description]
+ """
+ return inspect.ismethod(o) or (
+ type(o)
+ in [_cython_method_type, _cython_builtin_function_or_method_type]
+ )
+
+
+def _get_filtered_names(cls, selector_func):
+ """[summary]
+
+ Args:
+ selector_func ([type]): [description]
+
+ Returns:
+ [type]: [description]
+ """
+ return [
+ _name
+ for _name, _obj in inspect.getmembers(cls, selector_func)
+ if not _name.startswith("__")
+ ]
+
+
+def _group_functions(mod):
+ """Bin module functions into a set of logical groups.
+
+ Args:
+ mod (object): A module whose functions will be grouped into bins
+ based on the ``function_groups`` dictionary.
+
+ Returns:
+ [dict]: A dictionary containing grouping of functions in the
+ module.
+ """
+ groups = {}
+ for name, obj in inspect.getmembers(mod):
+ if inspect.isbuiltin(obj) or inspect.isfunction(obj):
+ if obj.__module__ and obj.__module__ in function_groups:
+ try:
+ flist = groups[function_groups[obj.__module__]]
+ flist.append(obj)
+ except KeyError:
+ groups[function_groups[obj.__module__]] = [
+ obj,
+ ]
+ else:
+ # Special case for _sycl_device_factory
+ if (
+ obj.__module__ == "dpctl._sycl_device_factory"
+ and "select_" in obj.__name__
+ ):
+ try:
+ flist = groups["Device Selection Functions"]
+ flist.append(obj)
+ except KeyError:
+ groups["Device Selection Functions"] = [
+ obj,
+ ]
+ else:
+ try:
+ flist = groups["Other Functions"]
+ flist.append(obj)
+ except KeyError:
+ groups["Other Functions"] = [
+ obj,
+ ]
+ return groups
+
+
+def _generate_class_rst(cls):
+ """Generate a rst file with the API documentation for a class.
+
+ Raises:
+ TypeError: When the input is not a Python class
+
+ Returns:
+ [str]: A string with rst nodes that can be written out to a file.
+ """
+ if not inspect.isclass(cls):
+ raise TypeError("Expecting class, got {}".format(type(cls)))
+
+ cls_qualname = _get_public_class_name(cls)
+ rst_header = cls_qualname.split(".")[-1]
+ rst_module = ".".join(cls_qualname.split(".")[:-1])
+ rst_header = "".join([".. _", rst_header, "_api:"])
+
+ def write_rubric(o, indent, rubric_display, rubric_tag, cls_qualname):
+ _write_line(o, indent + ".. rubric:: " + rubric_display)
+ _write_empty_line(o)
+ _write_line(o, indent + ".. autoautosummary:: " + cls_qualname)
+ _write_line(o, indent + indent + ":" + rubric_tag + ":")
+ _write_empty_line(o)
+
+ with io.StringIO() as output:
+ # Attributes
+ all_attributes = _get_filtered_names(cls, _is_class_property)
+ # Methods, separated into public/private
+ all_methods = _get_filtered_names(cls, _is_class_method)
+ all_public_methods = []
+ all_private_methods = []
+ for _name in all_methods:
+ if _name.startswith("_"):
+ all_private_methods.append(_name)
+ else:
+ all_public_methods.append(_name)
+
+ _write_line(output, rst_header)
+ _write_empty_line(output)
+ _write_marquee(output, cls_qualname)
+ _write_empty_line(output)
+
+ _write_line(output, ".. currentmodule:: " + rst_module)
+ _write_empty_line(output)
+
+ _write_line(output, ".. autoclass:: " + cls_qualname)
+ _write_empty_line(output)
+
+ indent = " "
+ attributes_header = "Attributes"
+ private_methods_header = "Private methods"
+ public_methods_header = "Public methods"
+
+ if all_attributes:
+ write_rubric(
+ output,
+ indent,
+ attributes_header + ":",
+ "attributes",
+ cls_qualname,
+ )
+ if all_public_methods:
+ write_rubric(
+ output,
+ indent,
+ public_methods_header + ":",
+ "methods",
+ cls_qualname,
+ )
+ if all_private_methods:
+ write_rubric(
+ output,
+ indent,
+ private_methods_header + ":",
+ "private_methods",
+ cls_qualname,
+ )
+
+ _write_empty_line(output)
+
+ if all_attributes:
+ _write_underlined(output, attributes_header, "-")
+ _write_empty_line(output)
+ for n in all_attributes:
+ _write_line(
+ output,
+ ".. autoattribute:: " + ".".join([cls_qualname, n]),
+ )
+ _write_empty_line(output)
+
+ if all_public_methods:
+ _write_underlined(output, public_methods_header, "-")
+ _write_empty_line(output)
+ for n in all_public_methods:
+ _write_line(
+ output,
+ ".. autofunction:: " + ".".join([cls_qualname, n]),
+ )
+ _write_empty_line(output)
+
+ # Private methods
+ if all_private_methods:
+ _write_underlined(output, private_methods_header, "-")
+ _write_empty_line(output)
+ for n in all_private_methods:
+ _write_line(
+ output,
+ ".. autofunction:: " + ".".join([cls_qualname, n]),
+ )
+ return output.getvalue()
+
+
+def _generate_module_summary_rst(module):
+ """[summary]
+
+ Args:
+ module ([str]): [description]
+
+ Returns:
+ [type]: [description]
+ """
+ rst_header = "".join([".. _", module, "_pyapi:"])
+ pagename = module
+ indent = " "
+
+ def _get_doc_summary(obj):
+ docstr = getattr(obj, "__doc__")
+ if not isinstance(docstr, str):
+ docstr = f"[FIXME]: {type(obj)} does not have a docstring"
+ return docstr
+ # Let us stip out any newlines, tabs, etc. at the start of the docstr
+ docstr = docstr.lstrip()
+ # Check if a signature line is provided. The check only looks for
+ # something like "SyclContext("
+ st = (
+ len(obj.__name__ + "(")
+ if docstr.startswith(obj.__name__ + "(", 0)
+ else 0
+ )
+ # If an apparent signature line was seen, then locate the end of the
+ # signature line.
+ if st:
+ nOpens = 1
+ for i, c in enumerate(docstr[st:]):
+ if c == "(":
+ nOpens += 1
+ elif c == ")":
+ stop = i
+ nOpens -= 1
+ if nOpens == 0:
+ break
+ st += stop + 1
+ # Strip out the signature in the docstring.
+ docstr = docstr[st:]
+ # The hope is to find the first line (summary) from the docstring
+ # by searching for a period followed by a new line. Not foolproof, but
+ # a best-effort check.
+ docstr = " ".join(
+ docstr[0 : docstr.find(".\n") + 1].replace("\n", " ").split()
+ )
+ if not docstr:
+ return f"[FIXME]: {type(obj)} has a docstring with no summary"
+ return docstr
+
+ def _write_table_header(o):
+ _write_line(o, ".. list-table::")
+ _write_line(o, indent + ":widths: 25,50")
+ _write_empty_line(o)
+
+ def _write_submodules_summary_table(o, mod):
+ submods = [
+ submod.name for submod in iter_modules(mod.__path__) if submod.ispkg
+ ]
+ if submods:
+ _write_empty_line(output)
+ _write_underlined(output, "Sub-modules", "-")
+ _write_empty_line(output)
+ _write_hidden_toc(output, submods, mod.__name__ + ".", "_pyapi")
+ _write_table_header(o)
+ for submod in submods:
+ _write_line(
+ o,
+ indent
+ + "* - :ref:`"
+ + mod.__name__
+ + "."
+ + submod
+ + "_pyapi`",
+ )
+ _submod = import_module(module + "." + submod, mod.__name__)
+ mod_summary = _get_doc_summary(_submod)
+ _write_line(o, indent + " - " + mod_summary)
+ _write_empty_line(o)
+
+ def _write_classes_summary_table(o, mod):
+ classes = []
+ class_names = []
+ for mem_tup in inspect.getmembers(mod):
+ cls = mem_tup[1]
+ if inspect.isclass(cls) and not (
+ issubclass(cls, enum.Enum) or issubclass(cls, Exception)
+ ):
+ classes.append(cls)
+ class_names.append(mem_tup[0])
+ if classes:
+ _write_line(o, ".. _" + mod.__name__.lower() + "_classes:")
+ _write_empty_line(o)
+ _write_underlined(o, "Classes", "-")
+ _write_empty_line(o)
+ _write_hidden_toc(output, class_names)
+ _write_table_header(o)
+ for cls in classes:
+ _write_line(o, indent + "* - :class:`" + cls.__name__ + "`")
+ # For classes, the first line of the docstring is the
+ # signature. So we skip that line to pick up the summary.
+ cls_summary = _get_doc_summary(cls)
+ _write_line(o, indent + " - " + cls_summary)
+ _write_empty_line(o)
+
+ def _write_enums_summary_table(o, mod):
+ enums = []
+ for mem_tup in inspect.getmembers(mod):
+ e = mem_tup[1]
+ if inspect.isclass(e) and (issubclass(e, enum.Enum)):
+ enums.append(e)
+ if enums:
+ _write_underlined(o, "Enums", "-")
+ _write_empty_line(o)
+ _write_table_header(o)
+ for e in enums:
+ _write_line(o, indent + "* - :class:`" + e.__name__ + "`")
+ enum_summary = _get_doc_summary(e)
+ _write_line(o, indent + " - " + enum_summary)
+ _write_empty_line(o)
+
+ def _write_exceptions_summary_table(o, mod):
+ exps = []
+ for mem_tup in inspect.getmembers(mod):
+ e = mem_tup[1]
+ if inspect.isclass(e) and (issubclass(e, Exception)):
+ exps.append(e)
+
+ if exps:
+ _write_underlined(o, "Exceptions", "-")
+ _write_empty_line(o)
+ _write_table_header(o)
+ for e in exps:
+ _write_line(o, indent + "* - :class:`" + e.__name__ + "`")
+ excep_summary = _get_doc_summary(e)
+ _write_line(o, indent + " - " + excep_summary)
+ _write_empty_line(o)
+
+ def _write_functions_summary_table(o, mod, fnobj_list):
+ _write_table_header(o)
+ for fnobj in fnobj_list:
+ _write_line(o, indent + "* - :func:`" + fnobj.__name__ + "()`")
+ # For functions, the first line of the docstring is the
+ # signature. So we skip that line to pick up the summary.
+ fn_summary = _get_doc_summary(fnobj)
+ _write_line(o, indent + " - " + fn_summary)
+ _write_empty_line(o)
+
+ def _write_function_groups_summary(o, mod, groups):
+
+ for group in groups:
+ if group != "Other Functions":
+ _write_line(
+ o,
+ ".. _"
+ + mod.__name__.lower()
+ + "_"
+ + group.lower().replace(" ", "_")
+ + ":",
+ )
+ _write_empty_line(o)
+ _write_underlined(o, group, "-")
+ _write_empty_line(o)
+ _write_functions_summary_table(o, mod, groups[group])
+
+ # We want to write "Other Functions" in the end always
+ try:
+ other_fns = groups["Other Functions"]
+ _write_line(
+ o,
+ ".. _" + mod.__name__.lower() + "_other_functions:",
+ )
+ _write_empty_line(o)
+ _write_underlined(o, "Other Functions", "-")
+ _write_empty_line(o)
+ _write_functions_summary_table(o, mod, other_fns)
+ except KeyError:
+ pass
+
+ mod = _get_module(module)
+
+ with io.StringIO() as output:
+ _write_line(output, rst_header)
+ _write_empty_line(output)
+ _write_marquee(output, pagename)
+ _write_empty_line(output)
+ _write_line(output, ".. currentmodule:: " + module)
+ _write_empty_line(output)
+ _write_line(output, ".. automodule:: " + module)
+ _write_empty_line(output)
+ _write_submodules_summary_table(output, mod)
+ _write_empty_line(output)
+ _write_classes_summary_table(output, mod)
+ _write_empty_line(output)
+ _write_function_groups_summary(output, mod, _group_functions(mod))
+ _write_empty_line(output)
+ _write_enums_summary_table(output, mod)
+ _write_empty_line(output)
+ _write_exceptions_summary_table(output, mod)
+ _write_empty_line(output)
+
+ return output.getvalue()
+
+
+def _generate_rst_for_all_classes(module, outputpath):
+ """Generates rst API docs for all classes in a module and writes them to
+ given path.
+
+ Args:
+ module ([str]): Name of module that needs to be documented
+ outputpath ([str]): Path where the rst files are to be saved.
+ """
+ mod = _get_module(module)
+
+ if not os.path.exists(outputpath):
+ raise ValueError("Invalid output path provided")
+ for name, obj in inspect.getmembers(mod):
+ if inspect.isclass(obj) and not (
+ issubclass(obj, enum.Enum) or issubclass(obj, Exception)
+ ):
+ out = outputpath + "/" + name + ".rst"
+ with open(out, "w") as rst_file:
+ rst_file.write(_generate_class_rst(obj))
+
+
+def _generate_rst_for_all_functions(module, outputpath):
+ """[summary]
+
+ Args:
+ module ([type]): [description]
+ outputpath ([type]): [description]
+
+ Raises:
+ ValueError: [description]
+ """
+ mod = _get_module(module)
+ groups = _group_functions(mod)
+
+ rst_header = "".join([".. _", module, "_functions_api:"])
+ pagename = module + " Functions"
+
+ if not os.path.exists(outputpath):
+ raise ValueError("Invalid output path provided")
+
+ def _write_function_autodocs(o, groups):
+ for group, fnlist in groups.items():
+ _write_empty_line(o)
+ _write_underlined(o, group, "-")
+ _write_empty_line(o)
+ for fn in fnlist:
+ _write_line(output, ".. autofunction:: " + fn.__name__)
+
+ out = outputpath + "/" + module + "_functions_api.rst"
+ with open(out, "w") as rst_file:
+ with io.StringIO() as output:
+ _write_line(output, rst_header)
+ _write_empty_line(output)
+ _write_marquee(output, pagename)
+ _write_empty_line(output)
+ _write_empty_line(output)
+ _write_line(output, ".. currentmodule:: " + module)
+ _write_empty_line(output)
+ _write_function_autodocs(output, groups)
+ rst_file.write(output.getvalue())
+
+
+def _generate_rst_for_all_exceptions(module, outputpath):
+ """[summary]
+
+ Args:
+ module ([type]): [description]
+ outputpath ([type]): [description]
+
+ Raises:
+ ValueError: [description]
+ """
+ mod = _get_module(module)
+ rst_header = "".join([".. _", module, "_exception_api:"])
+ pagename = module + " Exceptions"
+
+ if not os.path.exists(outputpath):
+ raise ValueError("Invalid output path provided")
+
+ out = outputpath + "/" + module + "_exception_api.rst"
+ with open(out, "w") as rst_file:
+ with io.StringIO() as output:
+ _write_line(output, rst_header)
+ _write_empty_line(output)
+ _write_marquee(output, pagename)
+ _write_empty_line(output)
+ _write_empty_line(output)
+ _write_line(output, ".. currentmodule:: " + module)
+ _write_empty_line(output)
+ for name, obj in inspect.getmembers(mod):
+ if inspect.isclass(obj) and issubclass(obj, Exception):
+ _write_line(output, ".. autoexception:: " + obj.__name__)
+
+ rst_file.write(output.getvalue())
+
+
+def _generate_rst_for_all_enums(module, outputpath):
+ """[summary]
+
+ Args:
+ module ([type]): [description]
+ outputpath ([type]): [description]
+
+ Raises:
+ ValueError: [description]
+ """
+ mod = _get_module(module)
+ indent = " "
+ rst_header = "".join([".. _", module, "_enum_api:"])
+ pagename = module + " Enums"
+
+ if not os.path.exists(outputpath):
+ raise ValueError("Invalid output path provided")
+
+ out = outputpath + "/" + module + "_enum_api.rst"
+ with open(out, "w") as rst_file:
+ with io.StringIO() as output:
+ _write_line(output, rst_header)
+ _write_empty_line(output)
+ _write_marquee(output, pagename)
+ _write_empty_line(output)
+ _write_empty_line(output)
+ _write_line(output, ".. currentmodule:: " + module)
+ _write_empty_line(output)
+ for name, obj in inspect.getmembers(mod):
+ if inspect.isclass(obj) and issubclass(obj, enum.Enum):
+ _write_line(output, ".. autoclass:: " + obj.__name__)
+ _write_line(output, indent + ":members:")
+
+ rst_file.write(output.getvalue())
+
+
+def generate_all(module, outputpath):
+ """Recursively generate rst files for a root module and all its members.
+
+ Args:
+ module ([str]): Name of a Python module
+ outputpath ([str]): Output directory
+ """
+ mod = _get_module(module)
+ out = outputpath + "/" + module + "_pyapi.rst"
+ # Generate a summary page for the module's API
+ with open(out, "w") as rst_file:
+ rst_file.write(_generate_module_summary_rst(module))
+ # Generate supporting pages for the module
+ _generate_rst_for_all_classes(module, outputpath)
+ _generate_rst_for_all_enums(module, outputpath)
+ _generate_rst_for_all_exceptions(module, outputpath)
+ _generate_rst_for_all_functions(module, outputpath)
+
+ # Now recurse into any submodule and generate all for them too.
+ for submod in iter_modules(mod.__path__):
+ if submod.ispkg:
+ generate_all(module + "." + submod.name, outputpath)
+
+
+parser = argparse.ArgumentParser("Generate rst files for Python source files")
+parser.add_argument("--dir", help="Output directory", required=True)
+parser.add_argument("--module", help="Python module", required=True)
+
+args = parser.parse_args()
+outdir = args.dir
+mod = args.module
+
+# Run generate_all
+generate_all(mod, outdir)
diff --git a/docs/index_doxyrest.rst.in b/docs/index_doxyrest.rst.in
index 51e55562f9..9e064ed047 100644
--- a/docs/index_doxyrest.rst.in
+++ b/docs/index_doxyrest.rst.in
@@ -1,16 +1,21 @@
.. include:: ./docfiles/intro.rst
+
+How-to Guides
+=============
+
.. toctree::
:maxdepth: 1
- :caption: User Guides
docfiles/user_guides/QuickStart
+ docfiles/user_guides/UserManual
+
.. toctree::
- :maxdepth: 1
- :caption: API Documentation
+ :maxdepth: 1
+ :caption: API Documentation
- docfiles/dpctl_pyapi
- @DOXYREST_OUTPUT_DIR_NAME@/index
+ docfiles/dpctl/dpctl_pyapi
+ @DOXYREST_OUTPUT_DIR_NAME@/index
.. include:: ./docfiles/boilerplate.rst
diff --git a/docs/index_no_doxyrest.rst.in b/docs/index_no_doxyrest.rst.in
index fffbf63cb4..ac57d680ae 100644
--- a/docs/index_no_doxyrest.rst.in
+++ b/docs/index_no_doxyrest.rst.in
@@ -1,15 +1,19 @@
.. include:: ./docfiles/intro.rst
+How-to Guides
+=============
+
.. toctree::
:maxdepth: 1
- :caption: User Guides
docfiles/user_guides/QuickStart
+ docfiles/user_guides/UserManual
+
.. toctree::
- :maxdepth: 1
- :caption: API Documentation
+ :maxdepth: 1
+ :caption: API Documentation
- docfiles/dpctl_pyapi
+ docfiles/dpctl/dpctl_pyapi
.. include:: ./docfiles/boilerplate.rst
diff --git a/dpctl-capi/helper/source/dpctl_async_error_handler.cpp b/dpctl-capi/helper/source/dpctl_async_error_handler.cpp
deleted file mode 100644
index 6b5d6db191..0000000000
--- a/dpctl-capi/helper/source/dpctl_async_error_handler.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-//===-- dpctl_async_error_handler.h - An async error handler -*-C++-*- ===//
-//
-// Data Parallel Control (dpctl)
-//
-// Copyright 2020-2021 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// A functor to use for passing an error handler callback function to sycl
-/// context and queue contructors.
-//===----------------------------------------------------------------------===//
-
-#include "dpctl_async_error_handler.h"
-
-void DPCTL_AsyncErrorHandler::operator()(
- const cl::sycl::exception_list &exceptions)
-{
- for (std::exception_ptr const &e : exceptions) {
- try {
- std::rethrow_exception(e);
- } catch (cl::sycl::exception const &e) {
- std::cerr << "Caught asynchronous SYCL exception:\n"
- << e.what() << std::endl;
- // FIXME: Change get_cl_code() to code() once DPCPP supports it.
- auto err_code = e.get_cl_code();
- handler_(err_code);
- }
- }
-}
diff --git a/dpctl-capi/tests/dpcpp_kernels.cpp b/dpctl-capi/tests/dpcpp_kernels.cpp
deleted file mode 100644
index 897aa86de4..0000000000
--- a/dpctl-capi/tests/dpcpp_kernels.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-#include "dpcpp_kernels.hpp"
-#include
-#include
-
-template sycl::kernel
-dpcpp_kernels::get_fill_kernel(sycl::queue &, size_t, int *, int);
-
-template sycl::kernel
-dpcpp_kernels::get_fill_kernel(sycl::queue &,
- size_t,
- unsigned int *,
- unsigned int);
-
-template sycl::kernel
-dpcpp_kernels::get_fill_kernel(sycl::queue &, size_t, double *, double);
-
-template sycl::kernel
-dpcpp_kernels::get_fill_kernel(sycl::queue &, size_t, float *, float);
-
-template sycl::kernel
-dpcpp_kernels::get_range_kernel(sycl::queue &, size_t, int *);
-
-template sycl::kernel
-dpcpp_kernels::get_range_kernel(sycl::queue &,
- size_t,
- unsigned int *);
-
-template sycl::kernel
-dpcpp_kernels::get_range_kernel(sycl::queue &, size_t, float *);
-
-template sycl::kernel
-dpcpp_kernels::get_range_kernel(sycl::queue &, size_t, double *);
-
-template sycl::kernel dpcpp_kernels::get_mad_kernel(sycl::queue &,
- size_t,
- int *,
- int *,
- int *,
- int);
-
-template sycl::kernel
-dpcpp_kernels::get_mad_kernel(sycl::queue &,
- size_t,
- unsigned int *,
- unsigned int *,
- unsigned int *,
- unsigned int);
-
-template sycl::kernel dpcpp_kernels::get_local_sort_kernel(sycl::queue &,
- size_t,
- size_t,
- int *,
- size_t);
-
-template sycl::kernel
-dpcpp_kernels::get_local_count_exceedance_kernel(sycl::queue &,
- size_t,
- size_t,
- int *,
- size_t,
- int,
- int *);
-
-template sycl::kernel
-dpcpp_kernels::get_local_count_exceedance_kernel(sycl::queue &,
- size_t,
- size_t,
- unsigned int *,
- size_t,
- unsigned int,
- int *);
-
-template sycl::kernel
-dpcpp_kernels::get_local_count_exceedance_kernel(sycl::queue &,
- size_t,
- size_t,
- float *,
- size_t,
- float,
- int *);
-
-template sycl::kernel
-dpcpp_kernels::get_local_count_exceedance_kernel(sycl::queue &,
- size_t,
- size_t,
- double *,
- size_t,
- double,
- int *);
diff --git a/dpctl/.gitignore b/dpctl/.gitignore
index 0c03f1ed6a..3e23a8af25 100644
--- a/dpctl/.gitignore
+++ b/dpctl/.gitignore
@@ -1,5 +1,6 @@
*.so
*.cpp
+*.cxx
*.c
*.h
memory/*.h
diff --git a/dpctl/CMakeLists.txt b/dpctl/CMakeLists.txt
new file mode 100644
index 0000000000..541ef7d5e8
--- /dev/null
+++ b/dpctl/CMakeLists.txt
@@ -0,0 +1,192 @@
+
+find_package(PythonExtensions REQUIRED)
+find_package(NumPy REQUIRED)
+
+set(CYTHON_FLAGS "-t -w ${CMAKE_SOURCE_DIR}")
+find_package(Cython REQUIRED)
+
+if(WIN32)
+ string(CONCAT WARNING_FLAGS
+ "-Wall "
+ "-Wextra "
+ "-Winit-self "
+ "-Wunused-function "
+ "-Wuninitialized "
+ "-Wmissing-declarations "
+ "-Wno-unused-parameter "
+ )
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ox ${WARNING_FLAGS}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Ox ${WARNING_FLAGS}")
+ set(CMAKE_C_FLAGS_DEBUG
+ "${CMAKE_C_FLAGS_DEBUG} ${WARNING_FLAGS} -ggdb3 -DDEBUG"
+ )
+ set(CMAKE_CXX_FLAGS_DEBUG
+ "${CMAKE_CXX_FLAGS_DEBUG} ${WARNING_FLAGS} -ggdb3 -DDEBUG"
+ )
+ set(DPCTL_LDFLAGS "/NXCompat /DynamicBase")
+elseif(UNIX)
+ string(CONCAT WARNING_FLAGS
+ "-Wall "
+ "-Wextra "
+ "-Winit-self "
+ "-Wunused-function "
+ "-Wuninitialized "
+ "-Wmissing-declarations "
+ "-fdiagnostics-color=auto "
+ "-Wno-deprecated-declarations "
+ )
+ string(CONCAT SDL_FLAGS
+ "-fstack-protector "
+ "-fstack-protector-all "
+ "-fpic "
+ "-fPIC "
+ "-D_FORTIFY_SOURCE=2 "
+ "-Wformat "
+ "-Wformat-security "
+ "-fno-strict-overflow "
+ "-fno-delete-null-pointer-checks "
+ )
+ string(CONCAT CFLAGS
+ "${WARNING_FLAGS}"
+ "${SDL_FLAGS}"
+ )
+ string(CONCAT CXXFLAGS
+ "${WARNING_FLAGS}"
+ "${SDL_FLAGS}"
+ "-fsycl "
+ )
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 ${CFLAGS}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 ${CXXFLAGS}")
+ set(CMAKE_C_FLAGS_DEBUG
+ "${CMAKE_C_FLAGS_DEBUG} ${CFLAGS} -ggdb3 -DDEBUG"
+ )
+ set(CMAKE_CXX_FLAGS_DEBUG
+ "${CMAKE_CXX_FLAGS_DEBUG} ${CXXFLAGS} -ggdb3 -DDEBUG"
+ )
+ set(DPCTL_LDFLAGS "-z,noexecstack,-z,relro,-z,now")
+else()
+ message(FATAL_ERROR "Unsupported system.")
+endif()
+
+# at build time create include/ directory and copy header files over
+set(DPCTL_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
+add_custom_target(_build_time_create_dpctl_include ALL
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${DPCTL_INCLUDE_DIR}
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${DPCTL_INCLUDE_DIR}/syclinterface
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${DPCTL_INCLUDE_DIR}/syclinterface/Support
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${DPCTL_INCLUDE_DIR}/syclinterface/Config
+ DEPENDS DPCTLSyclInterface
+)
+
+set(_copied_header_files)
+file(GLOB _syclinterface_h ${CMAKE_SOURCE_DIR}/libsyclinterface/include/*.h)
+foreach(hf ${_syclinterface_h})
+ get_filename_component(_header_name ${hf} NAME)
+ set(_target_header_file ${DPCTL_INCLUDE_DIR}/syclinterface/${_header_name})
+ list(APPEND _copied_header_files ${_target_header_file})
+ add_custom_command(OUTPUT ${_target_header_file}
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${hf} ${_target_header_file}
+ DEPENDS ${hf} _build_time_create_dpctl_include
+ VERBATIM
+ )
+endforeach()
+
+file(GLOB _syclinterface_Support_h ${CMAKE_SOURCE_DIR}/libsyclinterface/include/Support/*.h)
+foreach(hf ${_syclinterface_Support_h})
+ get_filename_component(_header_name ${hf} NAME)
+ set(_target_header_file ${DPCTL_INCLUDE_DIR}/syclinterface/Support/${_header_name})
+ list(APPEND _copied_header_files ${_target_header_file})
+ add_custom_command(OUTPUT ${_target_header_file}
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${hf} ${_target_header_file}
+ DEPENDS ${hf} _build_time_create_dpctl_include
+ )
+endforeach()
+
+file(GLOB _syclinterface_Config_h ${CMAKE_SOURCE_DIR}/libsyclinterface/include/Config/*.h)
+foreach(hf ${_syclinterface_Config_h})
+ get_filename_component(_header_name ${hf} NAME)
+ set(_target_header_file ${DPCTL_INCLUDE_DIR}/syclinterface/Config/${_header_name})
+ list(APPEND _copied_header_files ${_target_header_file})
+ add_custom_command(OUTPUT ${_target_header_file}
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${hf} ${_target_header_file}
+ DEPENDS ${hf} _build_time_create_dpctl_include
+ )
+endforeach()
+
+file(GLOB _apis_h ${CMAKE_CURRENT_SOURCE_DIR}/apis/include/*)
+foreach(hf ${_apis_h})
+ get_filename_component(_header_name ${hf} NAME)
+ set(_target_header_file ${DPCTL_INCLUDE_DIR}/${_header_name})
+ list(APPEND _copied_header_files ${_target_header_file})
+ add_custom_command(OUTPUT ${_target_header_file}
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${hf} ${_target_header_file}
+ DEPENDS ${hf} _build_time_create_dpctl_include
+ )
+endforeach()
+
+add_custom_target(
+ _build_time_create_dpctl_include_copy ALL
+ DEPENDS ${_copied_header_files}
+)
+
+set(CMAKE_INSTALL_RPATH "$ORIGIN")
+
+function(build_dpctl_ext _trgt _src _dest)
+ add_cython_target(${_trgt} ${_src} CXX OUTPUT_VAR _generated_src)
+ add_library(${_trgt} MODULE ${_generated_src})
+ target_include_directories(${_trgt} PRIVATE ${NumPy_INCLUDE_DIR} ${DPCTL_INCLUDE_DIR})
+ add_dependencies(${_trgt} _build_time_create_dpctl_include_copy)
+ if (DPCTL_GENERATE_COVERAGE)
+ target_compile_definitions(${_trgt} PRIVATE CYTHON_TRACE=1 CYTHON_TRACE_NOGIL=1)
+ target_compile_options(${_trgt} PRIVATE -fno-sycl-use-footer)
+ endif()
+ target_link_libraries(${_trgt} DPCTLSyclInterface)
+ target_link_options(${_trgt} PRIVATE "LINKER:${DPCTL_LDFLAGS}")
+ python_extension_module(${_trgt})
+ get_filename_component(_name_wle ${_generated_src} NAME_WLE)
+ get_filename_component(_generated_src_dir ${_generated_src} DIRECTORY)
+ set(_generated_public_h "${_generated_src_dir}/${_name_wle}.h")
+ set(_generated_api_h "${_generated_src_dir}/${_name_wle}_api.h")
+ set(_copy_trgt "${_trgt}_copy_capi_include")
+ add_custom_target(
+ ${_copy_trgt} ALL
+ COMMAND ${CMAKE_COMMAND}
+ -DSOURCE_FILE=${_generated_public_h}
+ -DDEST=${CMAKE_CURRENT_SOURCE_DIR}
+ -P ${CMAKE_SOURCE_DIR}/dpctl/cmake/copy_existing.cmake
+ COMMAND ${CMAKE_COMMAND}
+ -DSOURCE_FILE=${_generated_api_h}
+ -DDEST=${CMAKE_CURRENT_SOURCE_DIR}
+ -P ${CMAKE_SOURCE_DIR}/dpctl/cmake/copy_existing.cmake
+ DEPENDS ${_trgt}
+ VERBATIM
+ COMMENT "Copying Cython-generated headers to dpctl"
+ )
+ if (DPCTL_GENERATE_COVERAGE)
+ set(_copy_cxx_trgt "${_trgt}_copy_cxx")
+ add_custom_target(
+ ${_copy_cxx_trgt} ALL
+ COMMAND ${CMAKE_COMMAND}
+ -DSOURCE_FILE=${_generated_src}
+ -DDEST=${CMAKE_CURRENT_SOURCE_DIR}
+ -P ${CMAKE_SOURCE_DIR}/dpctl/cmake/copy_existing.cmake
+ DEPENDS ${_trgt}
+ VERBATIM
+ COMMENT "Copying Cython-generated source to dpctl"
+ )
+ endif()
+ install(TARGETS ${_trgt} LIBRARY DESTINATION ${_dest})
+endfunction()
+
+file(GLOB _cython_sources *.pyx)
+foreach(_cy_file ${_cython_sources})
+ get_filename_component(_trgt ${_cy_file} NAME_WLE)
+ build_dpctl_ext(${_trgt} ${_cy_file} "dpctl")
+endforeach()
+
+target_include_directories(_sycl_queue PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+
+add_subdirectory(program)
+add_subdirectory(memory)
+add_subdirectory(tensor)
+add_subdirectory(utils)
diff --git a/dpctl/__init__.py b/dpctl/__init__.py
index efd93f3630..ca4ee9796f 100644
--- a/dpctl/__init__.py
+++ b/dpctl/__init__.py
@@ -18,11 +18,12 @@
**Data Parallel Control (dpctl)** is a Python abstraction layer over SYCL.
Dpctl implements a subset of SYCL's API providing wrappers for the
- SYCL runtime classes described in `Section 4.6`_ of the `SYCL 2020 spec`_.
- Note that the SYCL ``device_selector`` class is not implemented, instead
- there are device selection helper functions that can be used to simulate
- the same behavior. Dpctl implements the ``ONEPI::filter_selector`` extension
- that is included in Intel's DPC++ SYCL compiler.
+ SYCL runtime classes described in :sycl_runtime_classes:`Section 4.6 <>` of
+ the :sycl_spec_2020:`SYCL 2020 spec <>`. Note that the SYCL
+ ``device_selector`` class is not implemented, instead there are device
+ selection helper functions that can be used to simulate the same behavior.
+ Dpctl implements the ``ONEPI::filter_selector`` extension that is included
+ in Intel's DPC++ SYCL compiler.
The module also includes a global SYCL queue manager. The queue manager
provides convenience functions to create a global instance of
@@ -123,11 +124,18 @@
__all__ += [
"get_include",
]
+# add submodules
+__all__ += [
+ "memory",
+ "program",
+ "tensor",
+ "utils",
+]
def get_include():
- """
- Return the directory that contains the dpctl *.h header files.
+ r"""
+ Return the directory that contains the dpctl \*.h header files.
Extension modules that need to be compiled against dpctl should use
this function to locate the appropriate include directory.
diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd
index eefdd4da85..8a666b045b 100644
--- a/dpctl/_backend.pxd
+++ b/dpctl/_backend.pxd
@@ -25,15 +25,15 @@ from libc.stdint cimport int64_t, uint32_t
from libcpp cimport bool
-cdef extern from "dpctl_error_handler_type.h":
+cdef extern from "syclinterface/dpctl_error_handler_type.h":
ctypedef void error_handler_callback(int err_code)
-cdef extern from "dpctl_utils.h":
+cdef extern from "syclinterface/dpctl_utils.h":
cdef void DPCTLCString_Delete(const char *str)
cdef void DPCTLSize_t_Array_Delete(size_t *arr)
-cdef extern from "dpctl_sycl_enum_types.h":
+cdef extern from "syclinterface/dpctl_sycl_enum_types.h":
ctypedef enum _backend_type 'DPCTLSyclBackendType':
_ALL_BACKENDS 'DPCTL_ALL_BACKENDS'
_CUDA 'DPCTL_CUDA'
@@ -93,7 +93,7 @@ cdef extern from "dpctl_sycl_enum_types.h":
_usm_host_allocations 'usm_host_allocations',
_usm_shared_allocations 'usm_shared_allocations',
_usm_restricted_shared_allocations 'usm_restricted_shared_allocations',
- _usm_system_allocator 'usm_system_allocator'
+ _usm_system_allocations 'usm_system_allocations'
ctypedef enum _partition_affinity_domain_type 'DPCTLPartitionAffinityDomainType':
_not_applicable 'not_applicable',
@@ -111,7 +111,7 @@ cdef extern from "dpctl_sycl_enum_types.h":
_COMPLETE 'DPCTL_COMPLETE'
-cdef extern from "dpctl_sycl_types.h":
+cdef extern from "syclinterface/dpctl_sycl_types.h":
cdef struct DPCTLOpaqueSyclContext
cdef struct DPCTLOpaqueSyclDevice
cdef struct DPCTLOpaqueSyclDeviceSelector
@@ -133,12 +133,12 @@ cdef extern from "dpctl_sycl_types.h":
ctypedef DPCTLOpaqueSyclUSM *DPCTLSyclUSMRef
-cdef extern from "dpctl_sycl_device_manager.h":
+cdef extern from "syclinterface/dpctl_sycl_device_manager.h":
cdef struct DPCTLDeviceVector
ctypedef DPCTLDeviceVector *DPCTLDeviceVectorRef
-cdef extern from "dpctl_sycl_device_interface.h":
+cdef extern from "syclinterface/dpctl_sycl_device_interface.h":
cdef bool DPCTLDevice_AreEq(const DPCTLSyclDeviceRef DRef1,
const DPCTLSyclDeviceRef DRef2)
cdef DPCTLSyclDeviceRef DPCTLDevice_Copy(const DPCTLSyclDeviceRef DRef)
@@ -192,7 +192,7 @@ cdef extern from "dpctl_sycl_device_interface.h":
cdef DPCTLSyclDeviceRef DPCTLDevice_GetParentDevice(const DPCTLSyclDeviceRef DRef)
-cdef extern from "dpctl_sycl_device_manager.h":
+cdef extern from "syclinterface/dpctl_sycl_device_manager.h":
cdef DPCTLDeviceVectorRef DPCTLDeviceVector_CreateFromArray(
size_t nelems,
DPCTLSyclDeviceRef *elems)
@@ -213,7 +213,7 @@ cdef extern from "dpctl_sycl_device_manager.h":
cdef int64_t DPCTLDeviceMgr_GetRelativeId(const DPCTLSyclDeviceRef DRef)
-cdef extern from "dpctl_sycl_device_selector_interface.h":
+cdef extern from "syclinterface/dpctl_sycl_device_selector_interface.h":
DPCTLSyclDeviceSelectorRef DPCTLAcceleratorSelector_Create()
DPCTLSyclDeviceSelectorRef DPCTLDefaultSelector_Create()
DPCTLSyclDeviceSelectorRef DPCTLCPUSelector_Create()
@@ -224,11 +224,11 @@ cdef extern from "dpctl_sycl_device_selector_interface.h":
int DPCTLDeviceSelector_Score(DPCTLSyclDeviceSelectorRef, DPCTLSyclDeviceRef)
-cdef extern from "dpctl_sycl_event_interface.h":
+cdef extern from "syclinterface/dpctl_sycl_event_interface.h":
cdef DPCTLSyclEventRef DPCTLEvent_Create()
cdef DPCTLSyclEventRef DPCTLEvent_Copy(const DPCTLSyclEventRef ERef)
- cdef void DPCTLEvent_Wait(DPCTLSyclEventRef ERef)
- cdef void DPCTLEvent_WaitAndThrow(DPCTLSyclEventRef ERef)
+ cdef void DPCTLEvent_Wait(DPCTLSyclEventRef ERef) nogil
+ cdef void DPCTLEvent_WaitAndThrow(DPCTLSyclEventRef ERef) nogil
cdef void DPCTLEvent_Delete(DPCTLSyclEventRef ERef)
cdef _event_status_type DPCTLEvent_GetCommandExecutionStatus(DPCTLSyclEventRef ERef)
cdef _backend_type DPCTLEvent_GetBackend(DPCTLSyclEventRef ERef)
@@ -246,13 +246,13 @@ cdef extern from "dpctl_sycl_event_interface.h":
cdef size_t DPCTLEvent_GetProfilingInfoEnd(DPCTLSyclEventRef ERef)
-cdef extern from "dpctl_sycl_kernel_interface.h":
+cdef extern from "syclinterface/dpctl_sycl_kernel_interface.h":
cdef const char* DPCTLKernel_GetFunctionName(const DPCTLSyclKernelRef KRef)
cdef size_t DPCTLKernel_GetNumArgs(const DPCTLSyclKernelRef KRef)
cdef void DPCTLKernel_Delete(DPCTLSyclKernelRef KRef)
-cdef extern from "dpctl_sycl_platform_manager.h":
+cdef extern from "syclinterface/dpctl_sycl_platform_manager.h":
cdef struct DPCTLPlatformVector
ctypedef DPCTLPlatformVector *DPCTLPlatformVectorRef
@@ -265,7 +265,7 @@ cdef extern from "dpctl_sycl_platform_manager.h":
cdef void DPCTLPlatformMgr_PrintInfo(const DPCTLSyclPlatformRef, size_t)
-cdef extern from "dpctl_sycl_platform_interface.h":
+cdef extern from "syclinterface/dpctl_sycl_platform_interface.h":
cdef DPCTLSyclPlatformRef DPCTLPlatform_Copy(const DPCTLSyclPlatformRef)
cdef DPCTLSyclPlatformRef DPCTLPlatform_Create()
cdef DPCTLSyclPlatformRef DPCTLPlatform_CreateFromSelector(
@@ -278,14 +278,14 @@ cdef extern from "dpctl_sycl_platform_interface.h":
cdef DPCTLPlatformVectorRef DPCTLPlatform_GetPlatforms()
-cdef extern from "dpctl_sycl_context_interface.h":
+cdef extern from "syclinterface/dpctl_sycl_context_interface.h":
cdef DPCTLSyclContextRef DPCTLContext_Create(
const DPCTLSyclDeviceRef DRef,
- error_handler_callback *error_handler,
+ error_handler_callback *handler,
int properties)
cdef DPCTLSyclContextRef DPCTLContext_CreateFromDevices(
const DPCTLDeviceVectorRef DVRef,
- error_handler_callback *error_handler,
+ error_handler_callback *handler,
int properties)
cdef DPCTLSyclContextRef DPCTLContext_Copy(
const DPCTLSyclContextRef CRef)
@@ -299,7 +299,7 @@ cdef extern from "dpctl_sycl_context_interface.h":
cdef void DPCTLContext_Delete(DPCTLSyclContextRef CtxRef)
-cdef extern from "dpctl_sycl_program_interface.h":
+cdef extern from "syclinterface/dpctl_sycl_program_interface.h":
cdef DPCTLSyclProgramRef DPCTLProgram_CreateFromSpirv(
const DPCTLSyclContextRef Ctx,
const void *IL,
@@ -317,13 +317,13 @@ cdef extern from "dpctl_sycl_program_interface.h":
cdef void DPCTLProgram_Delete(DPCTLSyclProgramRef PRef)
-cdef extern from "dpctl_sycl_queue_interface.h":
+cdef extern from "syclinterface/dpctl_sycl_queue_interface.h":
cdef bool DPCTLQueue_AreEq(const DPCTLSyclQueueRef QRef1,
const DPCTLSyclQueueRef QRef2)
cdef DPCTLSyclQueueRef DPCTLQueue_Create(
const DPCTLSyclContextRef CRef,
const DPCTLSyclDeviceRef DRef,
- error_handler_callback *error_handler,
+ error_handler_callback *handler,
int properties)
cdef DPCTLSyclQueueRef DPCTLQueue_CreateForDevice(
const DPCTLSyclDeviceRef dRef,
@@ -356,7 +356,7 @@ cdef extern from "dpctl_sycl_queue_interface.h":
size_t NDims,
const DPCTLSyclEventRef *DepEvents,
size_t NDepEvents)
- cdef void DPCTLQueue_Wait(const DPCTLSyclQueueRef QRef)
+ cdef void DPCTLQueue_Wait(const DPCTLSyclQueueRef QRef) nogil
cdef DPCTLSyclEventRef DPCTLQueue_Memcpy(
const DPCTLSyclQueueRef Q,
void *Dest,
@@ -381,7 +381,7 @@ cdef extern from "dpctl_sycl_queue_interface.h":
cdef bool DPCTLQueue_HasEnableProfiling(const DPCTLSyclQueueRef QRef)
-cdef extern from "dpctl_sycl_queue_manager.h":
+cdef extern from "syclinterface/dpctl_sycl_queue_manager.h":
cdef DPCTLSyclQueueRef DPCTLQueueMgr_GetCurrentQueue()
cdef bool DPCTLQueueMgr_GlobalQueueIsCurrent()
cdef bool DPCTLQueueMgr_IsCurrentQueue(const DPCTLSyclQueueRef QRef)
@@ -391,26 +391,28 @@ cdef extern from "dpctl_sycl_queue_manager.h":
cdef size_t DPCTLQueueMgr_GetQueueStackSize()
-cdef extern from "dpctl_sycl_usm_interface.h":
+cdef extern from "syclinterface/dpctl_sycl_usm_interface.h":
cdef DPCTLSyclUSMRef DPCTLmalloc_shared(
size_t size,
- DPCTLSyclQueueRef QRef)
+ DPCTLSyclQueueRef QRef) nogil
cdef DPCTLSyclUSMRef DPCTLmalloc_host(
size_t size,
- DPCTLSyclQueueRef QRef)
- cdef DPCTLSyclUSMRef DPCTLmalloc_device(size_t size, DPCTLSyclQueueRef QRef)
+ DPCTLSyclQueueRef QRef) nogil
+ cdef DPCTLSyclUSMRef DPCTLmalloc_device(
+ size_t size,
+ DPCTLSyclQueueRef QRef) nogil
cdef DPCTLSyclUSMRef DPCTLaligned_alloc_shared(
size_t alignment,
size_t size,
- DPCTLSyclQueueRef QRef)
+ DPCTLSyclQueueRef QRef) nogil
cdef DPCTLSyclUSMRef DPCTLaligned_alloc_host(
size_t alignment,
size_t size,
- DPCTLSyclQueueRef QRef)
+ DPCTLSyclQueueRef QRef) nogil
cdef DPCTLSyclUSMRef DPCTLaligned_alloc_device(
size_t alignment,
size_t size,
- DPCTLSyclQueueRef QRef)
+ DPCTLSyclQueueRef QRef) nogil
cdef void DPCTLfree_with_queue(
DPCTLSyclUSMRef MRef,
DPCTLSyclQueueRef QRef)
diff --git a/dpctl/_diagnostics.pyx b/dpctl/_diagnostics.pyx
new file mode 100644
index 0000000000..dc98cb29db
--- /dev/null
+++ b/dpctl/_diagnostics.pyx
@@ -0,0 +1,80 @@
+# Data Parallel Control (dpctl)
+#
+# Copyright 2020-2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# distutils: language = c++
+# cython: language_level=3
+# cython: linetrace=True
+
+""" Implements developer utilities.
+"""
+import contextlib
+import os
+
+
+cdef extern from "syclinterface/dpctl_service.h":
+ cdef void DPCTLService_InitLogger(const char *, const char *)
+ cdef void DPCTLService_ShutdownLogger()
+
+
+def _init_logger(log_dir=None):
+ """Initialize logger to use given directory to save logs.
+
+ The call has no effect if `dpctl` was not built to use logger.
+ """
+ cdef bytes p = b""
+ cdef const char *app_name = "dpctl"
+ cdef char *ld_cstr = NULL
+ if log_dir:
+ if not os.path.exists(log_dir):
+ raise ValueError(f"Path {log_dir} does not exist")
+ if isinstance(log_dir, str):
+ p = bytes(log_dir, "utf-8")
+ else:
+ p = bytes(log_dir)
+ ld_cstr = p
+ DPCTLService_InitLogger(app_name, ld_cstr)
+
+
+def _shutdown_logger():
+ """Finalize logger.
+
+ The call has no effect if `dpctl` was not built to use logger.
+ """
+ DPCTLService_ShutdownLogger()
+
+
+@contextlib.contextmanager
+def syclinterface_diagnostics(verbosity="warning", log_dir=None):
+ """Context manager that activate verbosity of DPCTLSyclInterface
+ function calls.
+ """
+ _allowed_verbosity = ["warning", "error"]
+ if not verbosity in _allowed_verbosity:
+ raise ValueError(
+ f"Verbosity argument not understood. "
+ f"Permitted values are {_allowed_verbosity}"
+ )
+ _init_logger(log_dir=log_dir)
+ _saved_verbosity = os.environ.get("DPCTL_VERBOSITY", None)
+ os.environ["DPCTL_VERBOSITY"] = verbosity
+ try:
+ yield
+ finally:
+ _shutdown_logger()
+ if _saved_verbosity:
+ os.environ["DPCTL_VERBOSITY"] = _saved_verbosity
+ else:
+ del os.environ["DPCTL_VERBOSITY"]
diff --git a/dpctl/_host_task_util.hpp b/dpctl/_host_task_util.hpp
new file mode 100644
index 0000000000..7871cdebfc
--- /dev/null
+++ b/dpctl/_host_task_util.hpp
@@ -0,0 +1,74 @@
+//===--- _host_tasl_util.hpp - Implements async DECREF =//
+//
+// Data Parallel Control (dpctl)
+//
+// Copyright 2020-2021 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements a utility function to schedule host task to a sycl
+/// queue depending on given array of sycl events to decrement reference counts
+/// for the given array of Python objects.
+///
+/// N.B.: The host task attempts to acquire GIL, so queue wait, event wait and
+/// other synchronization mechanisms should be called after releasing the GIL to
+/// avoid deadlocks.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Python.h"
+#include "syclinterface/dpctl_data_types.h"
+#include
+
+int async_dec_ref(DPCTLSyclQueueRef QRef,
+ PyObject **obj_array,
+ size_t obj_array_size,
+ DPCTLSyclEventRef *ERefs,
+ size_t nERefs)
+{
+
+ sycl::queue *q = reinterpret_cast(QRef);
+
+ std::vector obj_vec;
+ obj_vec.reserve(obj_array_size);
+ for (size_t obj_id = 0; obj_id < obj_array_size; ++obj_id) {
+ obj_vec.push_back(obj_array[obj_id]);
+ }
+
+ try {
+ q->submit([&](sycl::handler &cgh) {
+ for (size_t ev_id = 0; ev_id < nERefs; ++ev_id) {
+ cgh.depends_on(
+ *(reinterpret_cast(ERefs[ev_id])));
+ }
+ cgh.host_task([obj_array_size, obj_vec]() {
+ // if the main thread has not finilized the interpreter yet
+ if (Py_IsInitialized()) {
+ PyGILState_STATE gstate;
+ gstate = PyGILState_Ensure();
+ for (size_t i = 0; i < obj_array_size; ++i) {
+ Py_DECREF(obj_vec[i]);
+ }
+ PyGILState_Release(gstate);
+ }
+ });
+ });
+ } catch (const std::exception &e) {
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/dpctl/_sycl_context.pyx b/dpctl/_sycl_context.pyx
index 404f27c99b..247a5d0f68 100644
--- a/dpctl/_sycl_context.pyx
+++ b/dpctl/_sycl_context.pyx
@@ -86,8 +86,9 @@ cdef class _SyclContext:
cdef class SyclContext(_SyclContext):
"""
SyclContext(arg=None)
- Python class representing ``cl::sycl::context``. There are multiple
- ways to create a :class:`dpctl.SyclContext` object:
+ A Python wrapper for the :sycl_context:`sycl::context <>` C++ class.
+
+ There are multiple ways to create a :class:`dpctl.SyclContext` object:
- Invoking the constructor with no arguments creates a context using
the default selector.
@@ -479,9 +480,18 @@ cdef class SyclContext(_SyclContext):
&_context_capsule_deleter
)
-cdef api DPCTLSyclContextRef get_context_ref(SyclContext ctx):
+cdef api DPCTLSyclContextRef SyclContext_GetContextRef(SyclContext ctx):
"""
C-API function to get opaque context reference from
:class:`dpctl.SyclContext` instance.
"""
return ctx.get_context_ref()
+
+
+cdef api SyclContext SyclContext_Make(DPCTLSyclContextRef CRef):
+ """
+ C-API function to create :class:`dpctl.SyclContext` instance
+ from the given opaque context reference.
+ """
+ cdef DPCTLSyclContextRef copied_CRef = DPCTLContext_Copy(CRef)
+ return SyclContext._create(copied_CRef)
diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx
index 70302e9937..ee30002cce 100644
--- a/dpctl/_sycl_device.pyx
+++ b/dpctl/_sycl_device.pyx
@@ -175,13 +175,13 @@ cdef void _init_helper(_SyclDevice device, DPCTLSyclDeviceRef DRef):
cdef class SyclDevice(_SyclDevice):
""" SyclDevice(arg=None)
- Python equivalent for cl::sycl::device class.
+ A Python wrapper for the :sycl_device:`sycl::device <>` C++ class.
There are two ways of creating a SyclDevice instance:
- by directly passing in a filter string to the class
constructor. The filter string needs to conform to the
- `DPC++ filter selector SYCL extension `_.
+ :oneapi_filter_selection:`DPC++ filter selector SYCL extension <>`.
:Example:
.. code-block:: python
@@ -441,8 +441,8 @@ cdef class SyclDevice(_SyclDevice):
return DPCTLDevice_HasAspect(self._device_ref, AT)
@property
- def has_aspect_usm_system_allocator(self):
- cdef _aspect_type AT = _aspect_type._usm_system_allocator
+ def has_aspect_usm_system_allocations(self):
+ cdef _aspect_type AT = _aspect_type._usm_system_allocations
return DPCTLDevice_HasAspect(self._device_ref, AT)
@property
@@ -1130,9 +1130,18 @@ cdef class SyclDevice(_SyclDevice):
else:
return str(relId)
-cdef api DPCTLSyclDeviceRef get_device_ref(SyclDevice dev):
+cdef api DPCTLSyclDeviceRef SyclDevice_GetDeviceRef(SyclDevice dev):
"""
C-API function to get opaque device reference from
:class:`dpctl.SyclDevice` instance.
"""
return dev.get_device_ref()
+
+
+cdef api SyclDevice SyclDevice_Make(DPCTLSyclDeviceRef DRef):
+ """
+ C-API function to create :class:`dpctl.SyclDevice` instance
+ from the given opaque device reference.
+ """
+ cdef DPCTLSyclDeviceRef copied_DRef = DPCTLDevice_Copy(DRef)
+ return SyclDevice._create(copied_DRef)
diff --git a/dpctl/_sycl_event.pyx b/dpctl/_sycl_event.pyx
index f78e19c326..c64478d3c6 100644
--- a/dpctl/_sycl_event.pyx
+++ b/dpctl/_sycl_event.pyx
@@ -57,13 +57,22 @@ __all__ = [
_logger = logging.getLogger(__name__)
-cdef api DPCTLSyclEventRef get_event_ref(SyclEvent ev):
+cdef api DPCTLSyclEventRef SyclEvent_GetEventRef(SyclEvent ev):
""" C-API function to access opaque event reference from
Python object of type :class:`dpctl.SyclEvent`.
"""
return ev.get_event_ref()
+cdef api SyclEvent SyclEvent_Make(DPCTLSyclEventRef ERef):
+ """
+ C-API function to create :class:`dpctl.SyclEvent`
+ instance from opaque sycl event reference.
+ """
+ cdef DPCTLSyclEventRef copied_ERef = DPCTLEvent_Copy(ERef)
+ return SyclEvent._create(copied_ERef)
+
+
cdef void _event_capsule_deleter(object o):
cdef DPCTLSyclEventRef ERef = NULL
if pycapsule.PyCapsule_IsValid(o, "SyclEventRef"):
@@ -89,7 +98,6 @@ cdef class _SyclEvent:
def __dealloc__(self):
if (self._event_ref):
- DPCTLEvent_Wait(self._event_ref)
DPCTLEvent_Delete(self._event_ref)
self._event_ref = NULL
self.args = None
@@ -215,7 +223,7 @@ cdef class SyclEvent(_SyclEvent):
@staticmethod
cdef void _wait(SyclEvent event):
- DPCTLEvent_WaitAndThrow(event._event_ref)
+ with nogil: DPCTLEvent_WaitAndThrow(event._event_ref)
@staticmethod
def wait_for(event):
@@ -250,13 +258,15 @@ cdef class SyclEvent(_SyclEvent):
**SyclEventRef**. The ownership of the pointer inside the capsule is
passed to the caller, and pointer is deleted when the capsule goes out
of scope.
+
Returns:
:class:`pycapsule`: A capsule object storing a copy of the
- ``cl::sycl::event`` pointer belonging to thus
+ ``cl::sycl::event`` pointer belonging to a
:class:`dpctl.SyclEvent` instance.
Raises:
ValueError: If the ``DPCTLEvent_Copy`` fails to copy the
``cl::sycl::event`` pointer.
+
"""
cdef DPCTLSyclEventRef ERef = NULL
ERef = DPCTLEvent_Copy(self._event_ref)
@@ -359,4 +369,4 @@ cdef class SyclEvent(_SyclEvent):
cpdef void wait(self):
"Synchronously wait for completion of this event."
- DPCTLEvent_Wait(self._event_ref)
+ with nogil: DPCTLEvent_Wait(self._event_ref)
diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx
index 1091bbd765..30a64fec4c 100644
--- a/dpctl/_sycl_queue.pyx
+++ b/dpctl/_sycl_queue.pyx
@@ -65,11 +65,17 @@ import ctypes
from .enum_types import backend_type
from cpython cimport pycapsule
+from cpython.ref cimport Py_DECREF, Py_INCREF, PyObject
from libc.stdlib cimport free, malloc
import collections.abc
import logging
+
+cdef extern from "_host_task_util.hpp":
+ int async_dec_ref(DPCTLSyclQueueRef, PyObject **, size_t, DPCTLSyclEventRef *, size_t) nogil
+
+
__all__ = [
"SyclQueue",
"SyclKernelInvalidRangeError",
@@ -714,12 +720,14 @@ cdef class SyclQueue(_SyclQueue):
cdef _arg_data_type *kargty = NULL
cdef DPCTLSyclEventRef *depEvents = NULL
cdef DPCTLSyclEventRef Eref = NULL
- cdef int ret
+ cdef int ret = 0
cdef size_t gRange[3]
cdef size_t lRange[3]
cdef size_t nGS = len(gS)
cdef size_t nLS = len(lS) if lS is not None else 0
cdef size_t nDE = len(dEvents) if dEvents is not None else 0
+ cdef PyObject **arg_objects = NULL
+ cdef ssize_t i = 0
# Allocate the arrays to be sent to DPCTLQueue_Submit
kargs = malloc(len(args) * sizeof(void*))
@@ -820,11 +828,27 @@ cdef class SyclQueue(_SyclQueue):
raise SyclKernelSubmitError(
"Kernel submission to Sycl queue failed."
)
+ # increment reference counts to each argument
+ arg_objects = malloc(len(args) * sizeof(PyObject *))
+ for i in range(len(args)):
+ arg_objects[i] = (args[i])
+ Py_INCREF(