diff --git a/.circleci/config.yml b/.circleci/config.yml index 988f321d..9530d5c7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -15,13 +15,16 @@ orbs: # See: https://circleci.com/docs/2.0/configuration-reference/#jobs jobs: build-and-test: # This is the name of the job, feel free to change it to better match what you're trying to do! + parameters: + python-version: + type: string # These next lines defines a Docker executors: https://circleci.com/docs/2.0/executor-types/ # You can specify an image from Dockerhub or use one of the convenience images from CircleCI's Developer Hub # A list of available CircleCI Docker convenience images are available here: https://circleci.com/developer/images/image/cimg/python # The executor is the environment in which the steps below will be executed - below will use a python 3.10.2 container # Change the version below to your required version of python docker: - - image: cimg/python:3.9 + - image: cimg/python:<< parameters.python-version >> # Checkout the code as the first step. This is a dedicated CircleCI step. # The python orb's install-packages step will install the dependencies from a Pipfile via Pipenv by default. # Here we're making sure we use just use the system-wide pip. By default it uses the project root's requirements.txt. @@ -31,15 +34,12 @@ jobs: - checkout - python/install-packages: pkg-manager: pip - # app-dir: ~/project/package-directory/ # If you're requirements.txt isn't in the root directory. - # pip-dependency-file: test-requirements.txt # if you have a different name for your requirements file, maybe one that combines your runtime and test requirements. - run: name: Install Udapi command: pip install ".[test]" - run: mkdir -p test-results - run: name: Run pytest tests - # This assumes pytest is installed via the install-package step above command: pytest --junitxml=test-results/junit.xml -o junit_family=legacy - store_test_results: path: test-results @@ -54,7 +54,9 @@ jobs: # Invoke jobs via workflows # See: https://circleci.com/docs/2.0/configuration-reference/#workflows workflows: - sample: # This is the name of the workflow, feel free to change it to better match your workflow. - # Inside the workflow, you define the jobs you want to run. + test-matrix: jobs: - - build-and-test + - build-and-test: + matrix: + parameters: + python-version: ["3.9", "3.11", "3.13"] diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 5b0975b4..0285eddb 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -68,23 +68,3 @@ jobs: uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: dist/ - - testpypi-publish: - runs-on: ubuntu-latest - needs: - - release-build - permissions: - id-token: write - environment: - name: testpypi - url: https://test.pypi.org/p/udapi - steps: - - name: Retrieve release distributions - uses: actions/download-artifact@v4 - with: - name: release-dists - path: dist/ - - name: Publish release distributions to TestPyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - packages-dir: dist/ diff --git a/CHANGES.txt b/CHANGES.txt index 3cc3d6db..98e26605 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -2,6 +2,9 @@ Udapi Change Log ---------------- See https://github.com/udapi/udapi-python/commits/master for details. +0.5.1 2025-11-05 + - make udapy compatible with Python 3.13 + 0.5.0 2025-10-18 - added mwt.feats - added root.prev_tree and root.next_tree diff --git a/setup.cfg b/setup.cfg index e089eac2..fdbae292 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = udapi -version = 0.5.0 +version = 0.5.1 author = Martin Popel author_email = popel@ufal.mff.cuni.cz description = Python framework for processing Universal Dependencies data diff --git a/udapi/block/ud/addmwt.py b/udapi/block/ud/addmwt.py index 996f4dc9..e7eb3989 100644 --- a/udapi/block/ud/addmwt.py +++ b/udapi/block/ud/addmwt.py @@ -86,7 +86,7 @@ def process_node(self, node): mwt_words[mwt_words.index(node):mwt_words.index(node)+1] = nodes nodes = mwt_words - mwt = node.root.create_multiword_token(nodes, mwt_form, mwt_misc) + mwt = node.root.create_multiword_token(words=nodes, form=mwt_form, misc=mwt_misc) self.postprocess_mwt(mwt) def multiword_analysis(self, node): diff --git a/udapi/block/ud/ar/fixedeprels.py b/udapi/block/ud/ar/fixedeprels.py index db0dc600..a4b359ff 100644 --- a/udapi/block/ud/ar/fixedeprels.py +++ b/udapi/block/ud/ar/fixedeprels.py @@ -24,6 +24,36 @@ class FixEdeprels(Block): 'فَ': [] } + # Reduction and normalization of prepositions and conjunctions, including + # the derived and compound ones. The Latin transliterations are not really + # needed in the process. We include them here as documentation, but also + # to help the poor editor with rendering the lines. Ideally, each line + # should have left-to-right text at both the beginning and end. + substitution = [ + {'target': ('min:gen', 'مِن:gen'), + 'sources': + [('ibtida min', 'اِبتِدَاء_مِن')] + }, + {'target': ('ʾiṯra:gen', 'إِثرَ:gen'), # ʾiṯra = right after + 'sources': + [('ʾiṯra', 'إِثرَ')] + }, + {'target': ('ʾaṯnāʾa:gen', 'أَثنَاءَ:gen'), # ʾaṯnāʾa = during + 'sources': + [('ʾaṯnāʾa', 'أَثنَاءَ')] + }, + {'target': ('ʾiḏ', 'إِذ'), # ʾiḏ = because + 'sources': + [('ʾiḏ', 'إِذ'), + ('ʾiḏ ʾanna', 'إِذ_أَنَّ')] + }, + {'target': ('ʾiḏā', 'إِذَا'), # ʾiḏā = if + 'sources': + [('ʾiḏā', 'إِذَا'), + ('ʾiḏā', 'إِذًا')] + }, + ] + # Secondary prepositions sometimes have the lemma of the original part of # speech. We want the grammaticalized form instead. List even those that # will have the same lexical form, as we also want to check the morphological @@ -218,6 +248,7 @@ class FixEdeprels(Block): 'حَوَالَى_مِن': 'مِن:gen', # hawala min = from around X 'حَولَ': 'حَولَ:gen', # ḥawla = about 'حولما_إِذَا': 'إِذَا', + 'حَولَ_مَا_إِذَا': 'إِذَا', 'حِيَالَ': 'حِيَالَ:gen', # ḥiyāla = concerning 'حَيثُ': 'حَيثُ', # remove morphological case; ḥayṯu = where (SCONJ, not ADV) 'حِينَمَا': 'فِي_حِينِ', # during @@ -480,6 +511,7 @@ class FixEdeprels(Block): 'ما_دَام': 'مِمَّا', 'مادامت': 'مِمَّا', 'مَالَم': 'مَالَم', # mālam = unless + 'مَا_إِذَا': 'إِذَا', 'مِثلَ': 'مِثلَ', # remove morphological case; miṯla = like 'مِثلَمَا': 'مِثلَ', # miṯla = like 'مَعَ': 'مَعَ:gen', # maʿa = with diff --git a/udapi/block/ud/id/fixgsd.py b/udapi/block/ud/id/fixgsd.py index d328212d..4ea23d06 100644 --- a/udapi/block/ud/id/fixgsd.py +++ b/udapi/block/ud/id/fixgsd.py @@ -242,7 +242,7 @@ def merge_reduplication(self, node): hyph.remove() node.remove() first.misc['SpaceAfter'] = '' - mwt = root.create_multiword_token([first, second], first.form + second.form, mwtmisc) + mwt = root.create_multiword_token([first, second], form=first.form + second.form, misc=mwtmisc) else: first.form = first.form + '-' + node.form if node.no_space_after: @@ -288,7 +288,7 @@ def merge_reduplication(self, node): prefix.remove() hyph.remove() stem.misc['SpaceAfter'] = '' - mwt = root.create_multiword_token([stem, second], stem.form + second.form, mwtmisc) + mwt = root.create_multiword_token([stem, second], form=stem.form + second.form, misc=mwtmisc) else: stem.form = prefix.form + '-' + stem.form prefix.remove() @@ -345,7 +345,7 @@ def fix_satu_satunya(self, node): if mwt: mwtmisc = mwt.misc.copy() mwt.remove() - mwt = root.create_multiword_token([satu0, nya], satu0.form + nya.form, mwtmisc) + mwt = root.create_multiword_token([satu0, nya], form=satu0.form + nya.form, misc=mwtmisc) satu0.misc['SpaceAfter'] = '' root.text = root.compute_text() if node.multiword_token and node.no_space_after: diff --git a/udapi/block/ud/joinasmwt.py b/udapi/block/ud/joinasmwt.py index 8cedec68..be93bd3c 100644 --- a/udapi/block/ud/joinasmwt.py +++ b/udapi/block/ud/joinasmwt.py @@ -34,8 +34,8 @@ def should_join(self, node, next_node): def create_mwt(self, mwt_nodes): mwt_form = ''.join([n.form for n in mwt_nodes]) - mwt = node.root.create_multiword_token(mwt_nodes, mwt_form) - if node.misc['SpaceAfter'] == 'No': + mwt = mwt_nodes[0].root.create_multiword_token(words=mwt_nodes, form=mwt_form) + if mwt_nodes[0].node.misc['SpaceAfter'] == 'No': mwt.misc['SpaceAfter'] = 'No' for mwt_node in mwt_nodes: del mwt_node.misc['SpaceAfter'] diff --git a/udapi/block/ud/sk/fixedeprels.py b/udapi/block/ud/sk/fixedeprels.py index 7208b6ef..7de53881 100644 --- a/udapi/block/ud/sk/fixedeprels.py +++ b/udapi/block/ud/sk/fixedeprels.py @@ -1,6 +1,5 @@ """Block to fix case-enhanced dependency relations in Slovak.""" from udapi.core.block import Block -import logging import re class FixEdeprels(Block): @@ -14,9 +13,11 @@ class FixEdeprels(Block): 'a_hoci': 'hoci', 'ako': 'ako', # remove morphological case 'ako_na': 'ako', + 'ako_z': 'ako', 'akoby_z': 'z:gen', 'akže': 'ak', 'ani_keby': 'keby', + 'ani_keď': 'keď', 'až_keď': 'keď', 'do': 'do:gen', 'k': 'k:dat', diff --git a/udapi/core/run.py b/udapi/core/run.py index 8ac63e1e..6453641c 100644 --- a/udapi/core/run.py +++ b/udapi/core/run.py @@ -99,9 +99,9 @@ def _import_blocks(block_names, block_args): :param block_args: A list of block arguments to be passed to block constructor. :return: A list of initialized objects. :rtype: list - """ blocks = [] + namespace = {} # Create a namespace dictionary to store imported classes for (block_id, block_name) in enumerate(block_names): # Importing module dynamically. @@ -115,7 +115,7 @@ def _import_blocks(block_names, block_args): try: command = "from " + module + " import " + class_name + " as b" + str(block_id) logging.debug("Trying to run command: %s", command) - exec(command) # pylint: disable=exec-used + exec(command, namespace) # Pass namespace as globals except ModuleNotFoundError as err: package_name = ".".join(module.split(".")[:-1]) package_blocks = _blocks_in_a_package(package_name) @@ -130,10 +130,11 @@ def _import_blocks(block_names, block_args): raise # Run the imported module. - kwargs = block_args[block_id] # pylint: disable=unused-variable + kwargs = block_args[block_id] + namespace['kwargs'] = kwargs # Add kwargs to the namespace command = "b%s(**kwargs)" % block_id logging.debug("Trying to evaluate this: %s", command) - new_block_instance = eval(command) # pylint: disable=eval-used + new_block_instance = eval(command, namespace) # Pass namespace as globals args = ' '.join(f"{k}={v}" for k,v in kwargs.items()) blocks.append((block_name, new_block_instance, args))