diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 36959b0..37e11a4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,6 +1,6 @@ name: CI -# Run on push only for dev/sandbox +# Run on push only for ci/staging # Otherwise it may trigger concurrently `push & pull_request` on PRs. on: push: @@ -14,12 +14,18 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: [3.7, 3.8, 3.9, "3.10", 3.11, 3.12, pypy3.8] + python: + - "3.10" + - "3.11" + - "3.12" + - "3.13" + - "3.14" + - "pypy3.11" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - name: Install dependencies diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index c12b80a..55119d6 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -14,12 +14,18 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: [3.7, 3.8, 3.9, "3.10", 3.11, 3.12, pypy3.8] + python: + - "3.10" + - "3.11" + - "3.12" + - "3.13" + - "3.14" + - "pypy3.11" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - name: Install dependencies diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index eb66dc5..ba50793 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,42 +1,33 @@ name: Main -# Run on push only for dev/sandbox -# Otherwise it may trigger concurrently `push & pull_request` on PRs. on: + pull_request: null push: branches: - master jobs: build: - name: Python ${{ matrix.python }} + name: Linux runs-on: ubuntu-latest - strategy: - matrix: - python: [3.7, 3.8, 3.9, "3.10", 3.11, 3.12, pypy3.8] - steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 - name: setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: - python-version: ${{ matrix.python }} + python-version: | + pypy3.11 + 3.10 + 3.11 + 3.12 + 3.13 + 3.14 - name: Install dependencies run: | - python -m pip install --upgrade pip - pip install -e . - pip install coveralls --upgrade - - name: Run flake8 - run: | - pip install flake8 --upgrade - flake8 --exclude=build --ignore=E501,F403,F401,E241,E225,E128 . - - name: Run pycodestyle - run: | - pip install pycodestyle --upgrade - pycodestyle --ignore=E128,E261,E225,E501,W605 slugify test.py setup.py + python -m pip install coveralls tox tox-uv - name: Run test run: | - coverage run --source=slugify test.py + tox - name: Coveralls run: coveralls --service=github env: diff --git a/CHANGELOG.md b/CHANGELOG.md index 395e538..537460e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,25 @@ -## Work in progress +## Unreleased -- Added typing to API and expose `py.typed`. -- Formally support 3.12 +- Support Python 3.14. +- Drop support for Python 3.9 and lower. +- Use tox for local test runs and in CI. +- Test the project against both `unidecode` and `text_unidecode`. +- Fix type annotation issues identified by mypy. +- Run CI against pull requests. +- Fix package build warnings. + +## 8.0.4 + +- Properly handle uppercase special characters (@mib1185 - thx) + +## 8.0.3 + +- Drop compatibility for unsupported Python Version (@Viicos - thx) +- Fix pattern types. + +## 8.0.2 + +- Normalize text before converting to unicode. (@chuckyblack - thx) ## 8.0.1 diff --git a/README.md b/README.md index 0dfbd92..e5123f1 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,11 @@ However, there is an alternative decoding package called [Unidecode](https://git # How to install - easy_install python-slugify |OR| easy_install python-slugify[unidecode] - -- OR -- - pip install python-slugify |OR| pip install python-slugify[unidecode] + pip install python-slugify + + # OR + + pip install python-slugify[unidecode] # Options @@ -56,7 +58,7 @@ def slugify( :param hexadecimal (bool): converts html hexadecimal to unicode (Ž -> Ž -> z) :param max_length (int): output string length :param word_boundary (bool): truncates to end of full words (length may be shorter than max_length) - :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order + :param save_order (bool): when set, does not include shorter subsequent words even if they fit :param separator (str): separator between words :param stopwords (iterable): words to discount :param regex_pattern (str): regex pattern for disallowed characters @@ -108,9 +110,13 @@ txt = 'jaja---lol-méméméoo--a' r = slugify(txt, max_length=20, word_boundary=True, separator=".") self.assertEqual(r, "jaja.lol.mememeoo.a") -txt = 'one two three four five' -r = slugify(txt, max_length=13, word_boundary=True, save_order=True) -self.assertEqual(r, "one-two-three") +txt = 'one two three four' +r = slugify(txt, max_length=12, word_boundary=True, save_order=False) +self.assertEqual(r, "one-two-four") + +txt = 'one two three four' +r = slugify(txt, max_length=12, word_boundary=True, save_order=True) +self.assertEqual(r, "one-two") txt = 'the quick brown fox jumps over the lazy dog' r = slugify(txt, stopwords=['the']) @@ -205,7 +211,7 @@ X.Y.Z Version `MINOR` version -- when you add functionality in a backwards-compatible manner, and `PATCH` version -- when you make backwards-compatible bug fixes. -[status-image]: https://github.com/un33k/python-slugify/actions/workflows/ci.yml/badge.svg +[status-image]: https://github.com/un33k/python-slugify/actions/workflows/main.yml/badge.svg [status-link]: https://github.com/un33k/python-slugify/actions/workflows/ci.yml [version-image]: https://img.shields.io/pypi/v/python-slugify.svg [version-link]: https://pypi.python.org/pypi/python-slugify diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1c02bfe --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,46 @@ +[build-system] +requires = ["setuptools>=61.2"] +build-backend = "setuptools.build_meta" + + +# coverage +# -------- + +[tool.coverage.run] +relative_files = true +parallel = true +branch = true +source = [ + "slugify", + "test", +] + +[tool.coverage.paths] +source = [ + "src", + "*/site-packages", +] + +[tool.coverage.report] +skip_covered = true +fail_under = 97 + + +# mypy +# ---- + +[tool.mypy] +packages = "slugify" +strict = true +sqlite_cache = true + + +# pytest +# ------ + +[tool.pytest.ini_options] +testpaths = ["test.py"] +addopts = "--color=yes" +filterwarnings = [ + "error", +] diff --git a/setup.py b/setup.py index 9661638..32f44dd 100755 --- a/setup.py +++ b/setup.py @@ -3,24 +3,22 @@ import os import sys -from codecs import open from shutil import rmtree from setuptools import setup package = 'slugify' -python_requires = ">=3.7" +python_requires = ">=3.10" here = os.path.abspath(os.path.dirname(__file__)) install_requires = ['text-unidecode>=1.3'] extras_requires = {'unidecode': ['Unidecode>=1.1.1']} -test_requires = [] about = {} -with open(os.path.join(here, package, '__version__.py'), 'r', 'utf-8') as f: +with open(os.path.join(here, package, '__version__.py'), 'r', encoding='utf-8') as f: exec(f.read(), about) -with open('README.md', 'r', 'utf-8') as f: +with open('README.md', 'r', encoding='utf-8') as f: readme = f.read() @@ -66,7 +64,6 @@ def status(s): include_package_data=True, python_requires=python_requires, install_requires=install_requires, - tests_require=test_requires, extras_require=extras_requires, zip_safe=False, cmdclass={}, @@ -75,15 +72,13 @@ def status(s): 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', 'Natural Language :: English', - 'License :: OSI Approved :: MIT License', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', + 'Programming Language :: Python :: 3.14', ], entry_points={'console_scripts': ['slugify=slugify.__main__:main']}, ) diff --git a/slugify/__main__.py b/slugify/__main__.py index d31a6bb..4e6b3d9 100644 --- a/slugify/__main__.py +++ b/slugify/__main__.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, annotations, print_function +from __future__ import annotations import argparse import sys @@ -47,7 +47,7 @@ def parse_args(argv: list[str]) -> argparse.Namespace: parser.error("Input strings and --stdin cannot work together") if args.replacements: - def split_check(repl): + def split_check(repl: str) -> list[str]: SEP = '->' if SEP not in repl: parser.error("Replacements must be of the form: ORIGINAL{SEP}REPLACED".format(SEP=SEP)) @@ -82,7 +82,7 @@ def slugify_params(args: argparse.Namespace) -> dict[str, Any]: ) -def main(argv: list[str] | None = None): # pragma: no cover +def main(argv: list[str] | None = None) -> None: """ Run this program """ if argv is None: argv = sys.argv @@ -94,5 +94,5 @@ def main(argv: list[str] | None = None): # pragma: no cover sys.exit(-1) -if __name__ == '__main__': # pragma: no cover +if __name__ == '__main__': main() diff --git a/slugify/__version__.py b/slugify/__version__.py index a558d9b..a9cd778 100644 --- a/slugify/__version__.py +++ b/slugify/__version__.py @@ -3,6 +3,6 @@ __author_email__ = 'info@neekware.com' __description__ = 'A Python slugify application that also handles Unicode' __url__ = 'https://github.com/un33k/python-slugify' -__license__ = 'MIT' +__license__ = 'SPDX-License-Identifier: MIT' __copyright__ = 'Copyright 2022 Val Neekman @ Neekware Inc.' -__version__ = '8.0.1' +__version__ = '8.0.4' diff --git a/slugify/slugify.py b/slugify/slugify.py index 21bdaeb..9b5f27f 100644 --- a/slugify/slugify.py +++ b/slugify/slugify.py @@ -1,7 +1,6 @@ from __future__ import annotations import re -import sys import unicodedata from collections.abc import Iterable from html.entities import name2codepoint @@ -9,7 +8,7 @@ try: import unidecode except ImportError: - import text_unidecode as unidecode + import text_unidecode as unidecode # type: ignore[import-untyped, no-redef] __all__ = ['slugify', 'smart_truncate'] @@ -68,7 +67,7 @@ def smart_truncate( else: if save_order: break - if not truncated: # pragma: no cover + if not truncated: truncated = string[:max_length] return truncated.strip(separator) @@ -83,7 +82,7 @@ def slugify( separator: str = DEFAULT_SEPARATOR, save_order: bool = False, stopwords: Iterable[str] = (), - regex_pattern: str | None = None, + regex_pattern: re.Pattern[str] | str | None = None, lowercase: bool = True, replacements: Iterable[Iterable[str]] = (), allow_unicode: bool = False, @@ -96,7 +95,7 @@ def slugify( :param hexadecimal (bool): converts html hexadecimal to unicode :param max_length (int): output string length :param word_boundary (bool): truncates to complete word even if length ends up shorter than max_length - :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order + :param save_order (bool): when set, does not include shorter subsequent words even if they fit :param separator (str): separator between words :param stopwords (iterable): words to discount :param regex_pattern (str): regex pattern for disallowed characters @@ -118,8 +117,11 @@ def slugify( # replace quotes with dashes - pre-process text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text) - # decode unicode - if not allow_unicode: + # normalize text, convert to unicode if required + if allow_unicode: + text = unicodedata.normalize('NFKC', text) + else: + text = unicodedata.normalize('NFKD', text) text = unidecode.unidecode(text) # ensure text is still in unicode @@ -144,15 +146,12 @@ def slugify( except Exception: pass - # translate + # re normalize text if allow_unicode: text = unicodedata.normalize('NFKC', text) else: text = unicodedata.normalize('NFKD', text) - if sys.version_info < (3,): - text = text.encode('ascii', 'ignore') - # make the text lowercase (optional) if lowercase: text = text.lower() diff --git a/slugify/special.py b/slugify/special.py index 0b602cf..918cb2a 100644 --- a/slugify/special.py +++ b/slugify/special.py @@ -9,7 +9,6 @@ def add_uppercase_char(char_list: list[tuple[str, str]]) -> list[tuple[str, str] upper_dict = char.upper(), xlate.capitalize() if upper_dict not in char_list and char != upper_dict[0]: char_list.insert(0, upper_dict) - return char_list return char_list diff --git a/tea.yaml b/tea.yaml new file mode 100644 index 0000000..dda3df9 --- /dev/null +++ b/tea.yaml @@ -0,0 +1,7 @@ +# https://tea.xyz/what-is-this-file +--- +version: 1.0.0 +codeOwners: + - '0xaC8Bb28685BD43FD784DC902E132829c6C6DafA2' +quorum: 1 + diff --git a/test.py b/test.py index 931f38f..fcec4b6 100644 --- a/test.py +++ b/test.py @@ -4,6 +4,7 @@ import unittest from contextlib import contextmanager +from slugify import PRE_TRANSLATIONS from slugify import slugify from slugify import smart_truncate from slugify.__main__ import slugify_params, parse_args @@ -36,6 +37,10 @@ def test_phonetic_conversion_of_eastern_scripts(self): self.assertEqual(r, "ying-shi-ma") def test_accented_text(self): + txt = '𝐚́́𝕒́àáâäãąā' + r = slugify(txt) + self.assertEqual(r, "aaaaaaaaa") + txt = 'C\'est déjà l\'été.' r = slugify(txt) self.assertEqual(r, "c-est-deja-l-ete") @@ -232,9 +237,11 @@ def test_replacements_german_umlaut_custom(self): r = slugify(txt, replacements=[['Ü', 'UE'], ['ü', 'ue']]) self.assertEqual(r, "ueber-ueber-german-umlaut") + def test_pre_translation(self): + self.assertEqual(PRE_TRANSLATIONS, [('Ю', 'U'), ('Щ', 'Sch'), ('У', 'Y'), ('Х', 'H'), ('Я', 'Ya'), ('Ё', 'E'), ('ё', 'e'), ('я', 'ya'), ('х', 'h'), ('у', 'y'), ('щ', 'sch'), ('ю', 'u'), ('Ü', 'Ue'), ('Ö', 'Oe'), ('Ä', 'Ae'), ('ä', 'ae'), ('ö', 'oe'), ('ü', 'ue'), ('Ϋ́', 'Y'), ('Ϋ', 'Y'), ('Ύ', 'Y'), ('Υ', 'Y'), ('Χ', 'Ch'), ('χ', 'ch'), ('Ξ', 'X'), ('ϒ', 'Y'), ('υ', 'y'), ('ύ', 'y'), ('ϋ', 'y'), ('ΰ', 'y')]) -class TestSlugifyUnicode(unittest.TestCase): +class TestSlugifyUnicode(unittest.TestCase): def test_extraneous_seperators(self): txt = "This is a test ---" @@ -646,5 +653,5 @@ def test_multivalued_options_with_text(self): self.assertEqual(params['stopwords'], ['the', 'in', 'a', 'hurry']) -if __name__ == '__main__': +if __name__ == '__main__': # pragma: nocover unittest.main() diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..0c16f5e --- /dev/null +++ b/tox.ini @@ -0,0 +1,69 @@ +[tox] +env_list = + coverage-erase + py{3.10, 3.11, 3.12, 3.13, 3.14}-{unidecode, text_unidecode} + pypy{3.11}-{unidecode, text_unidecode} + coverage-report + coverage-html + mypy + pycodestyle + +[testenv] +depends = + py{3.10, 3.11, 3.12, 3.13, 3.14}-{unidecode, text_unidecode}: coverage-erase + pypy{3.11}-{unidecode, text_unidecode}: coverage-erase +deps = + coverage[toml] + pytest + unidecode: pip + unidecode: unidecode +commands_pre: + # If testing unidecode, ensure text_unidecode is unavailable. + unidecode: pip uninstall --yes text_unidecode +commands = + coverage run -m pytest test.py + +[testenv:coverage_base] +deps = + coverage[toml] + +[testenv:coverage-erase] +base = coverage_base +commands = + coverage erase + +[testenv:coverage-report] +base = coverage_base +depends = + py{3.10, 3.11, 3.12, 3.13, 3.14}-{unidecode, text_unidecode} + pypy{3.11}-{unidecode, text_unidecode} +commands_pre = + - coverage combine +commands = + coverage report + +[testenv:coverage-html] +base = coverage_base +depends = + coverage-report +commands = + coverage html --fail-under=0 + +[testenv:mypy] +deps = + mypy + unidecode +commands = + mypy + +[testenv:pycodestyle] +deps = + pycodestyle +commands = + pycodestyle --ignore=E128,E261,E225,E501,W605 slugify test.py setup.py + +[testenv:flake8] +deps = + flake8 +commands = + flake8 --ignore=E501,F403,F401,E241,E225,E128 slugify/ setup.py test.py