diff --git a/.cirrus.yml b/.cirrus.yml new file mode 100644 index 0000000..3468577 --- /dev/null +++ b/.cirrus.yml @@ -0,0 +1,24 @@ +######################################################### +# Build arm64 wheels for OSX on Cirrus CI +######################################################### + +cirrus_wheels_macos_arm64_task: + name: Build macOS arm64 wheels. + trigger_type: manual + macos_instance: + image: ghcr.io/cirruslabs/macos-monterey-xcode:latest + env: + PATH: /opt/homebrew/opt/python@3.10/bin:$PATH + CIBW_ARCHS_MACOS: arm64 + install_pre_requirements_script: + - brew install python@3.10 + - ln -s python3 /opt/homebrew/opt/python@3.10/bin/python + - which python + - python --version + install_cibuildwheel_script: + - python -m pip install cibuildwheel + run_cibuildwheel_script: + - CIBW_BUILD_VERBOSITY=3 CIBW_SKIP="*-win32 *-manylinux_* *-musllinux_*" CIBW_ARCHS_MACOS="arm64" CIBW_BUILD="cp39-* cp310-* cp311-*" CIBW_BEFORE_ALL_MACOS="brew tap fbkarsdorp/homebrew-lamachine && brew install icu4c libxml2 frog" cibuildwheel --platform macos + wheels_artifacts: + path: "wheelhouse/*" + diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 9954741..a0b3a7e 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -6,63 +6,81 @@ on: [workflow_dispatch] jobs: build_wheels: - name: Build wheels on ${{ matrix.os }} - runs-on: ${{ matrix.os }} + name: Build wheels on for ${{matrix.python.cp}}-${{ matrix.buildplat.sys }} + runs-on: ${{ matrix.buildplat.runs_on }} strategy: matrix: - os: [ubuntu-20.04, macOS-12] + buildplat: + - { runs_on: ubuntu-22.04, sys: manylinux, arch: x86_64, benv: "" } + - { runs_on: ubuntu-22.04, sys: musllinux, arch: x86_64, benv: "" } + - { runs_on: macos-14, sys: macosx, arch: arm64, benv: "14.0" } + python: + - { cp: "cp38", rel: "3.8" } + - { cp: "cp39", rel: "3.9" } + - { cp: "cp310", rel: "3.10" } + - { cp: "cp311", rel: "3.11" } + - { cp: "cp312", rel: "3.12" } + - { cp: "cp313", rel: "3.13" } + - { cp: "cp314", rel: "3.14" } steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4.1.1 # Used to host cibuildwheel - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v5 + with: + python-version: 3.11 - name: Install cibuildwheel run: python -m pip install cibuildwheel - - name: Build wheels + - name: Build wheels (Linux glibc) + if: ${{ matrix.buildplat.sys == 'manylinux' }} run: python -m cibuildwheel --output-dir wheelhouse env: + CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* CIBW_ARCHS_LINUX: "x86_64" - CIBW_ARCHS_MACOS: "x86_64 arm64" CIBW_BEFORE_ALL_LINUX: > if command -v apt-get; then apt-get -y git libicu-dev libxml2-dev libexttextcat-dev libxslt1-dev libbz2-dev zlib1g-dev autoconf automake autoconf-archive libtool autotools-dev gcc g++ make elif command -v yum; then yum install -y git libicu-devel libxml2-devel libxslt-devel libexttextcat zlib-devel bzip2-devel libtool autoconf-archive autoconf automake m4 wget + #on CentOS 7 we also have libtar-devel libexttextcat-devel which will be installed by build-deps.sh, on 8 they are missing and will be installed from source or otherwise elif command -v apk; then apk add build-base git autoconf-archive autoconf automake libtool bzip2-dev icu-dev libxml2-dev libexttextcat-dev libtool rsync && rsync -av --ignore-existing /usr/share/aclocal/*.m4 /usr/local/share/aclocal/ fi && ./build-deps.sh - CIBW_BEFORE_ALL_MACOS: > - brew install pkg-config autoconf-archive autoconf automake && - git clone https://github.com/tukaani-project/xz && - cd xz && - autoreconf --install && ./configure && make && make install && - cd .. && - git clone https://github.com/unicode-org/icu && - cd icu/icu4c/source && - ./configure && make && make install && ls -l /usr/local/lib/libicu* && - cd ../../../ && - git clone https://gitlab.gnome.org/GNOME/libxml2 && - cd libxml2 && - autoreconf --install && ./configure && make && make install && - cd ../ && - wget https://software.wise-guys.nl/download/libtextcat-2.2.tar.gz && - tar xzf libtextcat-2.2.tar.gz && - cd libtextcat-2.2 && autoreconf -i && ./configure && make && make install && - mkdir /usr/local/include/libtextcat/ && cp src/*.h /usr/local/include/libtextcat/ && - cd ../ && - git clone https://github.com/LanguageMachines/frog && - cd frog && - ./build-deps.sh && - ./bootstrap.sh && - ./configure && make && make install - CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux2014_x86_64 + CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux_2_28_x86_64 CIBW_SKIP: "*-win* *-manylinux_i686 pp*" - - uses: actions/upload-artifact@v3 + - name: Build wheels (Linux musl) + if: ${{ matrix.buildplat.sys == 'musllinux' && matrix.python.cp != 'cp314' }} + run: python -m cibuildwheel --output-dir wheelhouse + env: + CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* + CIBW_ARCHS_LINUX: "x86_64" + CIBW_BEFORE_ALL_LINUX: > + apk add build-base git autoconf-archive autoconf automake libtool bzip2-dev icu-dev libxml2-dev libexttextcat-dev libtool rsync && + mkdir -p /usr/local/share/aclocal/ && rsync -av --ignore-existing /usr/share/aclocal/*.m4 /usr/local/share/aclocal/ && + ./build-deps.sh + CIBW_MUSLLINUX_X86_64_IMAGE: quay.io/pypa/musllinux_1_1_x86_64 + CIBW_MUSLLINUX_AARCH64_IMAGE: quay.io/pypa/musllinux_1_1_aarch64 + CIBW_SKIP: "*-win* *-manylinux_i686 pp*" + + - name: Build wheels (macOS) + if: ${{ runner.os == 'macOS' && matrix.python.cp != 'cp38' }} + run: python -m cibuildwheel --output-dir wheelhouse + env: + CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* + CIBW_ARCHS: ${{ matrix.buildplat.arch }} + CIBW_ENVIRONMENT: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.buildplat.benv }}" + CIBW_BEFORE_ALL_MACOS: > + brew tap fbkarsdorp/homebrew-lamachine && + brew install frog + + - uses: actions/upload-artifact@v4 + if: ${{ ! (runner.os == 'macOS' && matrix.python.cp == '3.8') }} with: + name: ${{matrix.python.cp}}-${{matrix.buildplat.sys}}-${{matrix.buildplat.arch}} path: ./wheelhouse/*.whl diff --git a/build-deps.sh b/build-deps.sh index 04fc8e4..0173d14 100755 --- a/build-deps.sh +++ b/build-deps.sh @@ -7,6 +7,8 @@ set -e . /etc/os-release +echo "OS: $ID">&2 +echo "VERSION: $VERSION_ID">&2 get_latest_version() { #Finds the latest git tag or falls back to returning the git default branch (usually master or main) @@ -27,24 +29,27 @@ if [ "$ID" = "almalinux" ] || [ "$ID" = "centos" ] || [ "$ID" = "rhel" ]; then #needed for manylinux_2_28 container which ships custom autoconf, possibly others too? export ACLOCAL_PATH=/usr/share/aclocal fi - if [ "$VERSION_ID" = "7" ]; then - yum install -y libexttextcat-devel - if [ -d /opt/rh/devtoolset-10/root/usr/lib ]; then - #we are running in the manylinux2014 image - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib:/opt/rh/devtoolset-10/root/usr/lib - #libxml2 is out of date, compile and install a new one - yum install -y xz - wget https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.14.tar.xz - unxz libxml2-2.9.14.tar.xz - tar -xf libxml2-2.9.14.tar - cd libxml2-2.9.14 && ./configure --prefix=$PREFIX --without-python && make && make install - cd .. - fi - elif [ "$VERSION_ID" = "8" ]; then - #they forgot to package libexttextcat-devel? grab one manually: - wget https://github.com/proycon/LaMachine/raw/master/deps/centos8/libexttextcat-devel-3.4.5-2.el8.x86_64.rpm - yum install -y libexttextcat-devel-3.4.5-2.el8.x86_64.rpm - fi + case $VERSION_ID in + 7*) + yum install -y libexttextcat-devel + if [ -d /opt/rh/devtoolset-10/root/usr/lib ]; then + #we are running in the manylinux2014 image + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib:/opt/rh/devtoolset-10/root/usr/lib + #libxml2 is out of date, compile and install a new one + yum install -y xz + wget https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.14.tar.xz + unxz libxml2-2.9.14.tar.xz + tar -xf libxml2-2.9.14.tar + cd libxml2-2.9.14 && ./configure --prefix=$PREFIX --without-python && make && make install + cd .. + fi + ;; + 8*) + #they forgot to package libexttextcat-devel? grab one manually: + wget https://github.com/proycon/LaMachine/raw/master/deps/centos8/libexttextcat-devel-3.4.5-2.el8.x86_64.rpm + yum install -y libexttextcat-devel-3.4.5-2.el8.x86_64.rpm + ;; + esac fi PWD="$(pwd)" @@ -77,5 +82,4 @@ for PACKAGE in LanguageMachines/ticcutils LanguageMachines/libfolia LanguageMach done cd $PWD [ -n "$BUILDDIR" ] && rm -Rf "$BUILDDIR" - echo "Dependencies installed" >&2 diff --git a/codemeta-harvest.json b/codemeta-harvest.json index a770f0e..767e19e 100644 --- a/codemeta-harvest.json +++ b/codemeta-harvest.json @@ -15,5 +15,5 @@ }, "contIntegration": "https://github.com/proycon/python-frog/actions/workflows/python-frog.yml", "applicationCategory": [ "https://vocabs.dariah.eu/tadirah/annotating", "https://vocabs.dariah.eu/tadirah/tagging", "https://vocabs.dariah.eu/tadirah/namedEntityRecognition", "https://vocabs.dariah.eu/tadirah/posTagging", "https://vocabs.dariah.eu/tadirah/segmenting", "https://vocabs.dariah.eu/tadirah/treeTagging", "https://vocabs.dariah.eu/tadirah/contextualizing" , "https://w3id.org/nwo-research-fields#Linguistics", "https://w3id.org/nwo-research-fields#TextualAndContentAnalysis" ], - "developmentStatus": [ "https://www.repostatus.org/#active", "https://w3id.org/research-technology-readiness-levels#Level9Proven" ], + "developmentStatus": [ "https://www.repostatus.org/#active", "https://w3id.org/research-technology-readiness-levels#Level9Proven" ] } diff --git a/frog_wrapper.pyx b/frog_wrapper.pyx index 167d2f4..8da1948 100644 --- a/frog_wrapper.pyx +++ b/frog_wrapper.pyx @@ -20,7 +20,7 @@ import sys cimport libfolia_classes cimport frog_classes -FROGDATAVERSION = "0.21" +FROGDATAVERSION = "0.22" UCTODATAVERSION = "0.9.1" try: @@ -122,9 +122,15 @@ cdef class FrogOptions: elif key.lower() in ('debug','debugflag'): if value: self.capi.insert(b"d", b"1", False) elif key.lower() in ('docid','id'): - self.capi.insert(b"id", value) + if isinstance(value, bytes): + self.capi.insert(b"id", value) + else: + self.capi.insert(b"id", value.encode('utf-8')) elif key.lower() in ('numthreads','threads'): - self.capi.insert(b"threads",value) + if isinstance(value, int): + self.capi.insert(b"threads",str(value).encode('utf-8')) + else: + self.capi.insert(b"threads",value) else: if key == 'x': self.shadow['xmlin'] = True diff --git a/setup.py b/setup.py index 8b0b8f8..feb491c 100755 --- a/setup.py +++ b/setup.py @@ -4,13 +4,16 @@ from Cython.Build import cythonize import platform import os +import sys includedirs = [] libdirs = [] +print(f"system={platform.system()} machine={platform.machine()}", file=sys.stderr) if platform.system() == "Darwin": #we are running on Mac OS X (with homebrew hopefully), stuff is in specific locations: if platform.machine().lower() == "arm64": + print("(macos arm64 detected)", file=sys.stderr) libdirs.append("/opt/homebrew/lib") includedirs.append("/opt/homebrew/include") libdirs.append("/opt/homebrew/icu4c/lib") @@ -18,6 +21,10 @@ libdirs.append("/opt/homebrew/libxml2/lib") includedirs.append("/opt/homebrew/libxml2/include") includedirs.append("/opt/homebrew/libxml2/include/libxml2") + libdirs.append("/opt/homebrew/opt/icu4c/lib") + includedirs.append("/opt/homebrew/opt/icu4c/include") + libdirs.append("/opt/homebrew/opt/libxml2/lib") + includedirs.append("/opt/homebrew/opt/libxml2/include") else: #we are running on Mac OS X with homebrew, stuff is in specific locations: libdirs.append("/usr/local/opt/icu4c/lib") @@ -42,6 +49,8 @@ else: extra_options = ['-D U_USING_ICU_NAMESPACE=1'] +print(f"include_dirs={' '.join(includedirs)} library_dirs={' '.join(libdirs)} extra_options={' '.join(extra_options)}", file=sys.stderr) + extensions = cythonize([ Extension("frog", [ "frog_wrapper.pyx"], @@ -49,7 +58,7 @@ include_dirs=includedirs, library_dirs=libdirs, libraries=['frog','ucto','folia'], - extra_compile_args=['--std=c++0x'] + extra_options) + extra_compile_args=['--std=c++17'] + extra_options) ], compiler_directives={"language_level": "3"} ) @@ -57,11 +66,11 @@ setup( name = 'python-frog', - version = '0.6.7', #also ensure UCTODATAVERSION and FROGDATAVERSION are good in frog_wrapper.pyx + version = '0.6.12', #also ensure UCTODATAVERSION and FROGDATAVERSION are good in frog_wrapper.pyx author = "Maarten van Gompel", author_email = "proycon@anaproy.nl", description = ("Python binding to Frog, an NLP suite for Dutch doing part-of-speech tagging, lemmatisation, morphological analysis, named-entity recognition, shallow parsing, and dependency parsing."), - license = "GPLv3", + license = "GPL-3.0-only", keywords = "nlp computational_linguistics dutch pos lemmatizer", url = "https://github.com/proycon/python-frog", ext_modules = extensions, @@ -77,6 +86,5 @@ "Operating System :: POSIX", "Intended Audience :: Developers", "Intended Audience :: Science/Research", - "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", ], )