diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 70e56e09..ede12d90 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,39 +1,96 @@ -name: Publish +name: Publish Python distribution 📦 to PyPI + on: push: tags: - "v*" + jobs: build: + name: Build distribution 📦 runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 with: - python-version: 3.11 + python-version: "3.x" + - name: Install pypa/build + run: >- + python3 -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ - - run: | - pip install poetry - poetry build + publish-to-pypi: + name: >- + Publish Python distribution 📦 to PyPI + if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/ftfy + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing - - uses: actions/upload-artifact@v3 + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 with: - path: ./dist - - pypi-publish: - needs: ["build"] - environment: "publish" + name: python-package-distributions + path: dist/ + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 - name: upload release to PyPI + github-release: + name: >- + Sign the Python distribution 📦 with Sigstore + and upload them to GitHub Release + needs: + - publish-to-pypi runs-on: ubuntu-latest + permissions: - # IMPORTANT: this permission is mandatory for trusted publishing - id-token: write - steps: - - uses: actions/download-artifact@v3 + contents: write # IMPORTANT: mandatory for making GitHub Releases + id-token: write # IMPORTANT: mandatory for sigstore - - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Sign the dists with Sigstore + uses: sigstore/gh-action-sigstore-python@v3.0.0 with: - packages_dir: artifact/ + inputs: >- + ./dist/*.tar.gz + ./dist/*.whl + - name: Create GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + run: >- + gh release create + '${{ github.ref_name }}' + --repo '${{ github.repository }}' + --notes "" + - name: Upload artifact signatures to GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + # Upload to GitHub Release using the `gh` CLI. + # `dist/` contains the built packages, and the + # sigstore-produced signatures and certificates. + run: >- + gh release upload + '${{ github.ref_name }}' dist/** + --repo '${{ github.repository }}' diff --git a/.readthedocs.yaml b/.readthedocs.yaml index b6d16ee9..6fe1099d 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -10,15 +10,14 @@ build: os: ubuntu-24.04 tools: python: "3.11" - jobs: - post_create_environment: - # Install poetry - # https://python-poetry.org/docs/#installing-manually - - python -m pip install poetry - post_install: - # Install only dependencies - # https://python-poetry.org/docs/managing-dependencies/#dependency-groups - - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install + commands: + - asdf plugin add uv + - asdf install uv latest + - asdf global uv latest + - uv venv + - uv sync + - .venv/bin/python -m sphinx -T -b html -d docs/_build/doctrees -D language=en docs $READTHEDOCS_OUTPUT/html + # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 96d4ea8c..28f6c207 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,19 @@ +## Version 6.3.1 (October 25, 2024) + +- Fixed `license` metadata field in pyproject.toml. +- Removed extraneous files from the `hatchling` sdist output. + +## Version 6.3.0 (October 8, 2024) + +- Switched packaging from poetry to uv. +- Uses modern Python packaging exclusively (no setup.py). +- Added support for mojibake in Windows-1257 (Baltic). +- Detects mojibake for "Ü" in an uppercase word, such as "ZURÜCK". +- Expanded a heuristic that notices improbable punctuation. +- Fixed a false positive involving two concatenated strings, one of which began with the § sign. +- Rewrote `chardata.py` to be more human-readable and debuggable, instead of being full of + keysmash-like character sets. + ## Version 6.2.3 (August 5, 2024) - Updated PyPI metadata. diff --git a/README.md b/README.md index 00df49c0..a138cfc6 100644 --- a/README.md +++ b/README.md @@ -11,8 +11,7 @@ ``` -The full documentation of ftfy is available at [ftfy.readthedocs.org](https://ftfy.readthedocs.org). The documentation covers a lot more than this README, so here are -some links into it: +The full documentation of ftfy is available at [ftfy.readthedocs.org](https://ftfy.readthedocs.org). The documentation covers a lot more than this README, so here are some links into it: - [Fixing problems and getting explanations](https://ftfy.readthedocs.io/en/latest/explain.html) - [Configuring ftfy](https://ftfy.readthedocs.io/en/latest/config.html) @@ -36,8 +35,6 @@ some links into it: — Brennan Young - “I have no idea when I’m gonna need this, but I’m definitely bookmarking it.” — [/u/ocrow](https://reddit.com/u/ocrow) -- “9.2/10” - — [pylint](https://bitbucket.org/logilab/pylint/) ## What it does @@ -83,7 +80,7 @@ The following text could be encoded in Windows-1252 and decoded in UTF-8, and it ## Installing -ftfy is a Python 3 package that can be installed using `pip`: +ftfy is a Python 3 package that can be installed using `pip` or `uv pip`: pip install ftfy @@ -93,14 +90,12 @@ If you use `poetry`, you can use ftfy as a dependency in the usual way (such as ### Local development -ftfy is developed using `poetry`. Its `setup.py` is vestigial and is not the recommended way to install it. - -[Install Poetry](https://python-poetry.org/docs/master/#installing-with-the-official-installer), check out this repository, and run `poetry install` to install ftfy for local development, such as experimenting with the heuristic or running tests. +ftfy is developed using [uv](https://github.com/astral-sh/uv). You can build a virtual environment with its local dependencies by running `uv venv`, and test it with `uv run pytest`. ## Who maintains ftfy? -I'm Robyn Speer, also known as Elia Robyn Lake. You can find me -[on GitHub](https://github.com/rspeer) or [Cohost](https://cohost.org/arborelia). +I'm Robyn Speer, also known as Elia Robyn Lake. You can find my projects +[on GitHub](https://github.com/rspeer) and my posts on [my own blog](https://posts.arborelia.net). ## Citing ftfy @@ -108,8 +103,7 @@ ftfy has been used as a crucial data processing step in major NLP research. It's important to give credit appropriately to everyone whose work you build on in research. This includes software, not just high-status contributions such as mathematical models. All I ask when you use ftfy for research is that you cite it. -ftfy has a citable record [on Zenodo](https://zenodo.org/record/2591652). -A citation of ftfy may look like this: +ftfy has a citable record [on Zenodo](https://zenodo.org/record/2591652). A citation of ftfy may look like this: Robyn Speer. (2019). ftfy (Version 5.5). Zenodo. http://doi.org/10.5281/zenodo.2591652 diff --git a/docs/conf.py b/docs/conf.py index bca94892..0aef57bd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # ftfy documentation build configuration file, created by # sphinx-quickstart on Wed Aug 28 03:18:27 2013. @@ -47,9 +46,9 @@ # built documents. # # The short X.Y version. -version = "6.2" +version = "6.3" # The full version, including alpha/beta/rc tags. -release = "6.2.3" +release = "6.3.1" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/encodings.rst b/docs/encodings.rst index 13a892f4..b0513a78 100644 --- a/docs/encodings.rst +++ b/docs/encodings.rst @@ -6,14 +6,15 @@ ftfy can't fix all possible mix-ups. Its goal is to cover the most common encodi ftfy can understand text that was decoded as any of these single-byte encodings: - Latin-1 (ISO-8859-1) -- Windows-1252 (cp1252 -- used in Microsoft products) -- Windows-1251 (cp1251 -- the Russian version of cp1252) -- Windows-1250 (cp1250 -- the Eastern European version of cp1252) -- Windows-1253 (cp1253 -- the Greek version of cp1252) -- Windows-1254 (cp1254 -- the Turkish version of cp1252) +- Windows-1250 (cp1250 -- used in Microsoft products in Eastern Europe) +- Windows-1251 (cp1251 -- used in Microsoft products in Russia) +- Windows-1252 (cp1252 -- used in Microsoft products in Western Europe and the Americas) +- Windows-1253 (cp1253 -- used in Microsoft products in Greece) +- Windows-1254 (cp1254 -- used in Microsoft products in Türkiye) +- Windows-1257 (cp1257 -- used in Microsoft products in Baltic countries) - ISO-8859-2 (which is not quite the same as Windows-1250) - MacRoman (used on Mac OS 9 and earlier) -- cp437 (used in MS-DOS and some versions of the Windows command prompt) +- cp437 (it's the "text mode" in your video card firmware) when it was actually intended to be decoded as one of these variable-length encodings: @@ -26,6 +27,8 @@ However, ftfy cannot understand other mixups between single-byte encodings, beca We also can't handle the legacy encodings used for Chinese, Japanese, and Korean, such as ``shift-jis`` and ``gb18030``. See `issue #34`_ for why this is so hard. -.. _`issue #34`: https://github.com/LuminosoInsight/python-ftfy/issues/34 +I tried adding support for cp850, the cp437-workalike that supported European languages, but I couldn't find any real examples that it fixed, and it introduced some false positives. + +.. _`issue #34`: https://github.com/rspeer/python-ftfy/issues/34 Remember that the input to ftfy is Unicode, so it handles actual CJK *text* just fine. It just can't discover that a CJK *encoding* introduced mojibake into the text. diff --git a/docs/index.rst b/docs/index.rst index 7f339d29..1ba7ed00 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,12 @@ ftfy: fixes text for you ======================== -*Version 6.2* +*Version 6.3* + +“Assume all external input is the result of (a series of) bugs.” +— `RFC 9225`_: Software Defects Considered Harmful + +.. _`RFC 9225`: https://www.rfc-editor.org/rfc/rfc9225.html **ftfy** fixes Unicode that's broken in various ways. diff --git a/ftfy/__init__.py b/ftfy/__init__.py index 6b97b585..fb666989 100644 --- a/ftfy/__init__.py +++ b/ftfy/__init__.py @@ -10,18 +10,13 @@ import unicodedata import warnings from typing import ( + TYPE_CHECKING, Any, BinaryIO, Callable, - Dict, - Iterator, - List, Literal, NamedTuple, - Optional, TextIO, - Tuple, - Union, cast, ) @@ -29,7 +24,10 @@ from ftfy.badness import is_bad from ftfy.formatting import display_ljust -__version__ = "6.2.3" +if TYPE_CHECKING: + from collections.abc import Iterator + +__version__ = "6.3.1" # Though this function does nothing, it lets linters know that we're using @@ -73,11 +71,11 @@ class ExplainedText(NamedTuple): """ text: str - explanation: Optional[List[ExplanationStep]] + explanation: list[ExplanationStep] | None # Functions that can be applied using `apply_plan`. -FIXERS: Dict[str, Callable] = { # type: ignore[type-arg] +FIXERS: dict[str, Callable] = { # type: ignore[type-arg] "unescape_html": fixes.unescape_html, "remove_terminal_escapes": fixes.remove_terminal_escapes, "restore_byte_a0": fixes.restore_byte_a0, @@ -214,7 +212,7 @@ class TextFixerConfig(NamedTuple): will automatically set `explain` to False. """ - unescape_html: Union[str, bool] = "auto" + unescape_html: str | bool = "auto" remove_terminal_escapes: bool = True fix_encoding: bool = True restore_byte_a0: bool = True @@ -227,14 +225,12 @@ class TextFixerConfig(NamedTuple): fix_line_breaks: bool = True fix_surrogates: bool = True remove_control_chars: bool = True - normalization: Optional[Literal["NFC", "NFD", "NFKC", "NFKD"]] = "NFC" + normalization: Literal["NFC", "NFD", "NFKC", "NFKD"] | None = "NFC" max_decode_length: int = 1000000 explain: bool = True -def _config_from_kwargs( - config: TextFixerConfig, kwargs: Dict[str, Any] -) -> TextFixerConfig: +def _config_from_kwargs(config: TextFixerConfig, kwargs: dict[str, Any]) -> TextFixerConfig: """ Handle parameters provided as keyword arguments to ftfy's top-level functions, converting them into a TextFixerConfig. @@ -248,8 +244,7 @@ def _config_from_kwargs( kwargs = kwargs.copy() kwargs["unescape_html"] = kwargs["fix_entities"] del kwargs["fix_entities"] - config = config._replace(**kwargs) - return config + return config._replace(**kwargs) BYTES_ERROR_TEXT = """Hey wait, this isn't Unicode. @@ -276,7 +271,7 @@ def _try_fix( fixer_name: str, text: str, config: TextFixerConfig, - steps: Optional[List[ExplanationStep]], + steps: list[ExplanationStep] | None, ) -> str: """ A helper function used across several 'fixer' steps, deciding whether to @@ -292,7 +287,7 @@ def _try_fix( return text -def fix_text(text: str, config: Optional[TextFixerConfig] = None, **kwargs: Any) -> str: +def fix_text(text: str, config: TextFixerConfig | None = None, **kwargs: Any) -> str: r""" Given Unicode text as input, fix inconsistencies and glitches in it, such as mojibake (text that was decoded in the wrong encoding). @@ -367,7 +362,7 @@ def fix_text(text: str, config: Optional[TextFixerConfig] = None, **kwargs: Any) def fix_and_explain( - text: str, config: Optional[TextFixerConfig] = None, **kwargs: Any + text: str, config: TextFixerConfig | None = None, **kwargs: Any ) -> ExplainedText: """ Fix text as a single segment, returning the fixed text and an explanation @@ -386,7 +381,7 @@ def fix_and_explain( config = config._replace(unescape_html=False) if config.explain: - steps: Optional[List[ExplanationStep]] = [] + steps: list[ExplanationStep] | None = [] else: # If explanations aren't desired, `steps` will be None steps = None @@ -427,7 +422,7 @@ def fix_and_explain( def fix_encoding_and_explain( - text: str, config: Optional[TextFixerConfig] = None, **kwargs: Any + text: str, config: TextFixerConfig | None = None, **kwargs: Any ) -> ExplainedText: """ Apply the steps of ftfy that detect mojibake and fix it. Returns the fixed @@ -460,7 +455,7 @@ def fix_encoding_and_explain( # fixing the encoding return ExplainedText(text, []) - plan_so_far: List[ExplanationStep] = [] + plan_so_far: list[ExplanationStep] = [] while True: prevtext = text text, plan = _fix_encoding_one_step_and_explain(text, config) @@ -470,9 +465,7 @@ def fix_encoding_and_explain( return ExplainedText(text, plan_so_far) -def _fix_encoding_one_step_and_explain( - text: str, config: TextFixerConfig -) -> ExplainedText: +def _fix_encoding_one_step_and_explain(text: str, config: TextFixerConfig) -> ExplainedText: """ Perform one step of fixing the encoding of text. """ @@ -518,9 +511,7 @@ def _fix_encoding_one_step_and_explain( ): replaced_bytes = fixes.restore_byte_a0(encoded_bytes) if replaced_bytes != encoded_bytes: - transcode_steps.append( - ExplanationStep("transcode", "restore_byte_a0") - ) + transcode_steps.append(ExplanationStep("transcode", "restore_byte_a0")) encoded_bytes = replaced_bytes # Replace sequences where information has been lost @@ -536,7 +527,7 @@ def _fix_encoding_one_step_and_explain( decoding = "utf-8-variants" decode_step = ExplanationStep("decode", decoding) - steps = [encode_step] + transcode_steps + [decode_step] + steps = [encode_step, *transcode_steps, decode_step] fixed = encoded_bytes.decode(decoding) return ExplainedText(fixed, steps) @@ -588,9 +579,7 @@ def _fix_encoding_one_step_and_explain( return ExplainedText(text, []) -def fix_encoding( - text: str, config: Optional[TextFixerConfig] = None, **kwargs: Any -) -> str: +def fix_encoding(text: str, config: TextFixerConfig | None = None, **kwargs: Any) -> str: """ Apply just the encoding-fixing steps of ftfy to this text. Returns the fixed text, discarding the explanation. @@ -611,9 +600,7 @@ def fix_encoding( ftfy = fix_text -def fix_text_segment( - text: str, config: Optional[TextFixerConfig] = None, **kwargs: Any -) -> str: +def fix_text_segment(text: str, config: TextFixerConfig | None = None, **kwargs: Any) -> str: """ Fix text as a single segment, with a consistent sequence of steps that are applied to fix the text. Discard the explanation. @@ -627,8 +614,8 @@ def fix_text_segment( def fix_file( input_file: TextIO | BinaryIO, - encoding: Optional[str] = None, - config: Optional[TextFixerConfig] = None, + encoding: str | None = None, + config: TextFixerConfig | None = None, **kwargs: Any, ) -> Iterator[str]: """ @@ -658,7 +645,7 @@ def fix_file( yield fixed_line -def guess_bytes(bstring: bytes) -> Tuple[str, str]: +def guess_bytes(bstring: bytes) -> tuple[str, str]: """ NOTE: Using `guess_bytes` is not the recommended way of using ftfy. ftfy is not designed to be an encoding detector. @@ -684,12 +671,13 @@ def guess_bytes(bstring: bytes) -> Tuple[str, str]: single-byte encoding. """ if isinstance(bstring, str): - raise UnicodeError( + msg = ( "This string was already decoded as Unicode. You should pass " "bytes to guess_bytes, not Unicode." ) + raise UnicodeError(msg) - if bstring.startswith(b"\xfe\xff") or bstring.startswith(b"\xff\xfe"): + if bstring.startswith((b"\xfe\xff", b"\xff\xfe")): return bstring.decode("utf-16"), "utf-16" byteset = set(bstring) @@ -729,7 +717,7 @@ def guess_bytes(bstring: bytes) -> Tuple[str, str]: return bstring.decode("sloppy-windows-1252"), "sloppy-windows-1252" -def apply_plan(text: str, plan: List[Tuple[str, str]]) -> str: +def apply_plan(text: str, plan: list[tuple[str, str]]) -> str: """ Apply a plan for fixing the encoding of text. @@ -763,9 +751,11 @@ def apply_plan(text: str, plan: List[Tuple[str, str]]) -> str: if encoding in FIXERS: obj = FIXERS[encoding](obj) else: - raise ValueError("Unknown function to apply: %s" % encoding) + msg = f"Unknown function to apply: {encoding}" + raise ValueError(msg) else: - raise ValueError("Unknown plan step: %s" % operation) + msg = f"Unknown plan step: {operation}" + raise ValueError(msg) return obj diff --git a/ftfy/bad_codecs/__init__.py b/ftfy/bad_codecs/__init__.py index bb71136a..a449a38e 100644 --- a/ftfy/bad_codecs/__init__.py +++ b/ftfy/bad_codecs/__init__.py @@ -32,9 +32,9 @@ import codecs from encodings import normalize_encoding -from typing import Dict, Optional +from typing import Optional -_CACHE: Dict[str, codecs.CodecInfo] = {} +_CACHE: dict[str, codecs.CodecInfo] = {} # Define some aliases for 'utf-8-variants'. All hyphens get turned into # underscores, because of `normalize_encoding`. diff --git a/ftfy/bad_codecs/sloppy.py b/ftfy/bad_codecs/sloppy.py index 6f63b1e8..8c65e4fe 100644 --- a/ftfy/bad_codecs/sloppy.py +++ b/ftfy/bad_codecs/sloppy.py @@ -76,7 +76,6 @@ import codecs from encodings import normalize_encoding -from typing import Optional, Tuple REPLACEMENT_CHAR = "\ufffd" @@ -121,14 +120,10 @@ def make_sloppy_codec(encoding: str) -> codecs.CodecInfo: # `encodings.cp1252` for comparison; this is almost exactly the same, # except I made it follow pep8. class Codec(codecs.Codec): - def encode( - self, input: str, errors: Optional[str] = "strict" - ) -> Tuple[bytes, int]: + def encode(self, input: str, errors: str | None = "strict") -> tuple[bytes, int]: return codecs.charmap_encode(input, errors, encoding_table) - def decode( - self, input: bytes, errors: Optional[str] = "strict" - ) -> Tuple[str, int]: + def decode(self, input: bytes, errors: str | None = "strict") -> tuple[str, int]: return codecs.charmap_decode(input, errors, decoding_table) # type: ignore[arg-type] class IncrementalEncoder(codecs.IncrementalEncoder): @@ -148,7 +143,7 @@ class StreamReader(Codec, codecs.StreamReader): return codecs.CodecInfo( name="sloppy-" + encoding, encode=Codec().encode, - decode=Codec().decode, + decode=Codec().decode, # type: ignore[arg-type] incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, @@ -160,9 +155,9 @@ class StreamReader(Codec, codecs.StreamReader): # can be used by the main module of ftfy.bad_codecs. CODECS = {} INCOMPLETE_ENCODINGS = ( - ["windows-%s" % num for num in range(1250, 1259)] - + ["iso-8859-%s" % num for num in (3, 6, 7, 8, 11)] - + ["cp%s" % num for num in range(1250, 1259)] + [f"windows-{num}" for num in range(1250, 1259)] + + [f"iso-8859-{num}" for num in (3, 6, 7, 8, 11)] + + [f"cp{num}" for num in range(1250, 1259)] + ["cp874"] ) diff --git a/ftfy/bad_codecs/utf8_variants.py b/ftfy/bad_codecs/utf8_variants.py index 28366a54..eaac3c14 100644 --- a/ftfy/bad_codecs/utf8_variants.py +++ b/ftfy/bad_codecs/utf8_variants.py @@ -47,7 +47,7 @@ from encodings.utf_8 import ( IncrementalEncoder as UTF8IncrementalEncoder, ) -from typing import Callable, Optional, Tuple +from typing import Callable, Optional NAME = "utf-8-variants" @@ -95,7 +95,7 @@ class IncrementalDecoder(UTF8IncrementalDecoder): @staticmethod def _buffer_decode( # type: ignore[override] input: bytes, errors: Optional[str], final: bool - ) -> Tuple[str, int]: + ) -> tuple[str, int]: """ Decode bytes that may be arriving in a stream, following the Codecs API. @@ -137,9 +137,7 @@ def _buffer_decode( # type: ignore[override] return "".join(decoded_segments), position @staticmethod - def _buffer_decode_step( - input: bytes, errors: Optional[str], final: bool - ) -> Tuple[str, int]: + def _buffer_decode_step(input: bytes, errors: Optional[str], final: bool) -> tuple[str, int]: """ There are three possibilities for each decoding step: @@ -168,27 +166,22 @@ def _buffer_decode_step( if len(input) > 1: # Decode the two-byte sequence 0xc0 0x80. return "\u0000", 2 - else: - if final: - # We hit the end of the stream. Let the superclass method - # handle it. - return sup(input, errors, True) - else: - # Wait to see another byte. - return "", 0 - else: - # Decode a possible six-byte sequence starting with 0xed. - return IncrementalDecoder._buffer_decode_surrogates( - sup, input, errors, final - ) + if final: + # We hit the end of the stream. Let the superclass method + # handle it. + return sup(input, errors, True) + # Wait to see another byte. + return "", 0 + # Decode a possible six-byte sequence starting with 0xed. + return IncrementalDecoder._buffer_decode_surrogates(sup, input, errors, final) @staticmethod def _buffer_decode_surrogates( - sup: Callable[[bytes, Optional[str], bool], Tuple[str, int]], + sup: Callable[[bytes, Optional[str], bool], tuple[str, int]], input: bytes, errors: Optional[str], final: bool, - ) -> Tuple[str, int]: + ) -> tuple[str, int]: """ When we have improperly encoded surrogates, we can still see the bits that they were meant to represent. @@ -209,28 +202,25 @@ def _buffer_decode_surrogates( # handle it as normal UTF-8. It might be a Hangul character # or an error. return sup(input, errors, final) - else: - # We found a surrogate, the stream isn't over yet, and we don't - # know enough of the following bytes to decode anything, so - # consume zero bytes and wait. - return "", 0 - else: - if CESU8_RE.match(input): - # Given this is a CESU-8 sequence, do some math to pull out - # the intended 20-bit value, and consume six bytes. - codepoint = ( - ((input[1] & 0x0F) << 16) - + ((input[2] & 0x3F) << 10) - + ((input[4] & 0x0F) << 6) - + (input[5] & 0x3F) - + 0x10000 - ) - return chr(codepoint), 6 - else: - # This looked like a CESU-8 sequence, but it wasn't one. - # 0xed indicates the start of a three-byte sequence, so give - # three bytes to the superclass to decode as usual. - return sup(input[:3], errors, False) + # We found a surrogate, the stream isn't over yet, and we don't + # know enough of the following bytes to decode anything, so + # consume zero bytes and wait. + return "", 0 + if CESU8_RE.match(input): + # Given this is a CESU-8 sequence, do some math to pull out + # the intended 20-bit value, and consume six bytes. + codepoint = ( + ((input[1] & 0x0F) << 16) + + ((input[2] & 0x3F) << 10) + + ((input[4] & 0x0F) << 6) + + (input[5] & 0x3F) + + 0x10000 + ) + return chr(codepoint), 6 + # This looked like a CESU-8 sequence, but it wasn't one. + # 0xed indicates the start of a three-byte sequence, so give + # three bytes to the superclass to decode as usual. + return sup(input[:3], errors, False) # The encoder is identical to UTF-8. @@ -239,20 +229,20 @@ def _buffer_decode_surrogates( class StreamWriter(codecs.StreamWriter): @staticmethod - def encode(input: str, errors: str = "strict") -> Tuple[bytes, int]: + def encode(input: str, errors: str = "strict") -> tuple[bytes, int]: return IncrementalEncoder(errors).encode(input, final=True), len(input) class StreamReader(codecs.StreamReader): @staticmethod - def decode(input: bytes, errors: str = "strict") -> Tuple[str, int]: + def decode(input: bytes, errors: str = "strict") -> tuple[str, int]: return IncrementalDecoder(errors).decode(input, final=True), len(input) CODEC_INFO = codecs.CodecInfo( name=NAME, encode=StreamWriter.encode, - decode=StreamReader.decode, + decode=StreamReader.decode, # type: ignore[arg-type] incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, diff --git a/ftfy/badness.py b/ftfy/badness.py index 81c52744..38ec1f44 100644 --- a/ftfy/badness.py +++ b/ftfy/badness.py @@ -16,7 +16,7 @@ import re -# There are only 403 characters that occur in known UTF-8 mojibake, and we can +# There are only a few hundred characters that occur in known UTF-8 mojibake, and we can # characterize them: MOJIBAKE_CATEGORIES = { @@ -42,8 +42,6 @@ "\N{DIAERESIS}" "\N{NOT SIGN}" "\N{MACRON}" - "\N{PILCROW SIGN}" - "\N{SECTION SIGN}" "\N{CEDILLA}" "\N{LATIN SMALL LETTER F WITH HOOK}" "\N{MODIFIER LETTER CIRCUMFLEX ACCENT}" # it's not a modifier @@ -62,6 +60,11 @@ "\N{FEMININE ORDINAL INDICATOR}" "\N{MASCULINE ORDINAL INDICATOR}" ), + # Characters used in legalese + "law": ( + "\N{PILCROW SIGN}" + "\N{SECTION SIGN}" + ), "currency": ( "\N{CENT SIGN}" "\N{POUND SIGN}" @@ -132,6 +135,9 @@ "ò-ö" "ø-ü" "\N{LATIN CAPITAL LETTER O WITH DOUBLE ACUTE}" + "\N{LATIN CAPITAL LETTER O WITH MACRON}" + "\N{LATIN CAPITAL LETTER U WITH MACRON}" + "\N{LATIN CAPITAL LETTER U WITH OGONEK}" "\N{DEGREE SIGN}" ), "upper_accented": ( @@ -143,6 +149,7 @@ "\N{LATIN CAPITAL LETTER U WITH DIAERESIS}" "\N{LATIN CAPITAL LETTER Y WITH ACUTE}" "\N{LATIN CAPITAL LETTER A WITH BREVE}" + "\N{LATIN CAPITAL LETTER A WITH MACRON}" "\N{LATIN CAPITAL LETTER A WITH OGONEK}" "\N{LATIN CAPITAL LETTER C WITH ACUTE}" "\N{LATIN CAPITAL LETTER C WITH CARON}" @@ -150,13 +157,20 @@ "\N{LATIN CAPITAL LETTER D WITH STROKE}" "\N{LATIN CAPITAL LETTER E WITH OGONEK}" "\N{LATIN CAPITAL LETTER E WITH CARON}" + "\N{LATIN CAPITAL LETTER E WITH MACRON}" + "\N{LATIN CAPITAL LETTER E WITH DOT ABOVE}" "\N{LATIN CAPITAL LETTER G WITH BREVE}" + "\N{LATIN CAPITAL LETTER G WITH CEDILLA}" "\N{LATIN CAPITAL LETTER I WITH DOT ABOVE}" + "\N{LATIN CAPITAL LETTER I WITH MACRON}" + "\N{LATIN CAPITAL LETTER K WITH CEDILLA}" "\N{LATIN CAPITAL LETTER L WITH ACUTE}" "\N{LATIN CAPITAL LETTER L WITH CARON}" "\N{LATIN CAPITAL LETTER L WITH STROKE}" + "\N{LATIN CAPITAL LETTER L WITH CEDILLA}" "\N{LATIN CAPITAL LETTER N WITH ACUTE}" "\N{LATIN CAPITAL LETTER N WITH CARON}" + "\N{LATIN CAPITAL LETTER N WITH CEDILLA}" "\N{LATIN CAPITAL LIGATURE OE}" "\N{LATIN CAPITAL LETTER R WITH CARON}" "\N{LATIN CAPITAL LETTER S WITH ACUTE}" @@ -179,16 +193,24 @@ # skip o's and u's that could be used in kaomoji "\N{LATIN SMALL LETTER A WITH BREVE}" "\N{LATIN SMALL LETTER A WITH OGONEK}" + "\N{LATIN SMALL LETTER A WITH MACRON}" "\N{LATIN SMALL LETTER C WITH ACUTE}" "\N{LATIN SMALL LETTER C WITH CARON}" "\N{LATIN SMALL LETTER D WITH CARON}" "\N{LATIN SMALL LETTER D WITH STROKE}" "\N{LATIN SMALL LETTER E WITH OGONEK}" "\N{LATIN SMALL LETTER E WITH CARON}" + "\N{LATIN SMALL LETTER E WITH MACRON}" + "\N{LATIN SMALL LETTER E WITH DOT ABOVE}" "\N{LATIN SMALL LETTER G WITH BREVE}" + "\N{LATIN SMALL LETTER G WITH CEDILLA}" + "\N{LATIN SMALL LETTER I WITH OGONEK}" + "\N{LATIN SMALL LETTER I WITH MACRON}" + "\N{LATIN SMALL LETTER K WITH CEDILLA}" "\N{LATIN SMALL LETTER L WITH ACUTE}" "\N{LATIN SMALL LETTER L WITH CARON}" "\N{LATIN SMALL LETTER L WITH STROKE}" + "\N{LATIN SMALL LETTER L WITH CEDILLA}" "\N{LATIN SMALL LIGATURE OE}" "\N{LATIN SMALL LETTER R WITH ACUTE}" "\N{LATIN SMALL LETTER S WITH ACUTE}" @@ -253,11 +275,11 @@ r""" [{c1}] | - [{bad}{lower_accented}{upper_accented}{box}{start_punctuation}{end_punctuation}{currency}{numeric}] [{bad}] + [{bad}{lower_accented}{upper_accented}{box}{start_punctuation}{end_punctuation}{currency}{numeric}{law}] [{bad}] | [a-zA-Z] [{lower_common}{upper_common}] [{bad}] | - [{bad}] [{lower_accented}{upper_accented}{box}{start_punctuation}{end_punctuation}{currency}{numeric}] + [{bad}] [{lower_accented}{upper_accented}{box}{start_punctuation}{end_punctuation}{currency}{numeric}{law}] | [{lower_accented}{lower_common}{box}{end_punctuation}{currency}{numeric}] [{upper_accented}] | @@ -267,11 +289,11 @@ | \s [{upper_accented}] [{currency}] | - [{upper_accented}{box}] [{numeric}] + [{upper_accented}{box}] [{numeric}{law}] | [{lower_accented}{upper_accented}{box}{currency}{end_punctuation}] [{start_punctuation}] [{numeric}] | - [{lower_accented}{upper_accented}{currency}{numeric}{box}] [{end_punctuation}] [{start_punctuation}] + [{lower_accented}{upper_accented}{currency}{numeric}{box}{law}] [{end_punctuation}] [{start_punctuation}] | [{currency}{numeric}{box}] [{start_punctuation}] | @@ -279,19 +301,23 @@ | [{box}] [{kaomoji}] | - [{lower_accented}{upper_accented}{currency}{numeric}{start_punctuation}{end_punctuation}] [{box}] + [{lower_accented}{upper_accented}{currency}{numeric}{start_punctuation}{end_punctuation}{law}] [{box}] | [{box}] [{end_punctuation}] | - [{lower_accented}{upper_accented}] [{end_punctuation}] \w + [{lower_accented}{upper_accented}] [{start_punctuation}{end_punctuation}] \w | # The ligature œ when not followed by an unaccented Latin letter [Œœ][^A-Za-z] | + # Degree signs after capital letters + [{upper_accented}]° + | + # Common Windows-1252 2-character mojibake that isn't covered by the cases above - [ÂÃÎÐ][€Šš¢£Ÿž\xa0\xad®©°·»{start_punctuation}{end_punctuation}–—´] + [ÂÃÎÐ][€œŠš¢£Ÿž\xa0\xad®©°·»{start_punctuation}{end_punctuation}–—´] | × [²³] | @@ -350,7 +376,13 @@ # Windows-1253 mojibake of Latin-1 characters and/or the Greek alphabet [ΒΓΞΟ][{c1}{bad}{start_punctuation}{end_punctuation}{currency}°][ΒΓΞΟ] -""".format(**MOJIBAKE_CATEGORIES), + | + + # Windows-1257 mojibake of characters in the U+2000 range + †+ """.format( + **MOJIBAKE_CATEGORIES + ), re.VERBOSE, ) diff --git a/ftfy/chardata.py b/ftfy/chardata.py index 2e0e82a8..43d117c6 100644 --- a/ftfy/chardata.py +++ b/ftfy/chardata.py @@ -9,7 +9,6 @@ import itertools import re import unicodedata -from typing import Dict # These are the encodings we will try to fix in ftfy, in the # order that they should be tried. @@ -20,6 +19,7 @@ "sloppy-windows-1250", "sloppy-windows-1253", "sloppy-windows-1254", + "sloppy-windows-1257", "iso-8859-2", "macroman", "cp437", @@ -29,7 +29,7 @@ DOUBLE_QUOTE_RE = re.compile("[\u201c-\u201f]") -def _build_regexes() -> Dict[str, re.Pattern[str]]: +def _build_regexes() -> dict[str, re.Pattern[str]]: """ ENCODING_REGEXES contain reasonably fast ways to detect if we could represent a given string in a given encoding. The simplest one is @@ -43,7 +43,7 @@ def _build_regexes() -> Dict[str, re.Pattern[str]]: # Make a sequence of characters that bytes \x80 to \xFF decode to # in each encoding, as well as byte \x1A, which is used to represent # the replacement character � in the sloppy-* encodings. - byte_range = bytes(list(range(0x80, 0x100)) + [0x1A]) + byte_range = bytes([*range(0x80, 0x100), 0x1A]) charlist = byte_range.decode(encoding) # The rest of the ASCII bytes -- bytes \x00 to \x19 and \x1B @@ -51,7 +51,7 @@ def _build_regexes() -> Dict[str, re.Pattern[str]]: # support, so we can just include them as ranges. This also lets us # not worry about escaping regex special characters, because all of # them are in the \x1B to \x7F range. - regex = "^[\x00-\x19\x1b-\x7f{0}]*$".format(charlist) + regex = f"^[\x00-\x19\x1b-\x7f{charlist}]*$" encoding_regexes[encoding] = re.compile(regex) return encoding_regexes @@ -59,7 +59,7 @@ def _build_regexes() -> Dict[str, re.Pattern[str]]: ENCODING_REGEXES = _build_regexes() -def _build_html_entities() -> Dict[str, str]: +def _build_html_entities() -> dict[str, str]: entities = {} # Create a dictionary based on the built-in HTML5 entity dictionary. # Add a limited set of HTML entities that we'll also decode if they've @@ -94,13 +94,13 @@ def possible_encoding(text: str, encoding: str) -> bool: return bool(ENCODING_REGEXES[encoding].match(text)) -def _build_control_char_mapping() -> Dict[int, None]: +def _build_control_char_mapping() -> dict[int, None]: """ Build a translate mapping that strips likely-unintended control characters. See :func:`ftfy.fixes.remove_control_chars` for a description of these codepoint ranges and why they should be removed. """ - control_chars: Dict[int, None] = {} + control_chars: dict[int, None] = {} for i in itertools.chain( range(0x00, 0x09), @@ -230,7 +230,7 @@ def _build_control_char_mapping() -> Dict[int, None]: } -def _build_width_map() -> Dict[int, str]: +def _build_width_map() -> dict[int, str]: """ Build a translate mapping that replaces halfwidth and fullwidth forms with their standard-width forms. @@ -252,34 +252,410 @@ def _build_width_map() -> Dict[int, str]: # Character classes that help us pinpoint embedded mojibake. These can # include common characters, because we'll also check them for 'badness'. -UTF8_CLUES = { +# +# Though they go on for many lines, the members of this dictionary are +# single concatenated strings. +# +# This code is generated using scripts/char_data_table.py. +UTF8_CLUES: dict[str, str] = { # Letters that decode to 0xC2 - 0xDF in a Latin-1-like encoding "utf8_first_of_2": ( - "ÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßĂĆČĎĐĘĚĞİĹŃŇŐŘŞŢŮŰ" - "ΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ" + "\N{LATIN CAPITAL LETTER A WITH BREVE}" # windows-1250:C3 + "\N{LATIN CAPITAL LETTER A WITH CIRCUMFLEX}" # latin-1:C2 + "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}" # latin-1:C4 + "\N{LATIN CAPITAL LETTER A WITH MACRON}" # windows-1257:C2 + "\N{LATIN CAPITAL LETTER A WITH RING ABOVE}" # latin-1:C5 + "\N{LATIN CAPITAL LETTER A WITH TILDE}" # latin-1:C3 + "\N{LATIN CAPITAL LETTER AE}" # latin-1:C6 + "\N{LATIN CAPITAL LETTER C WITH ACUTE}" # windows-1250:C6 + "\N{LATIN CAPITAL LETTER C WITH CARON}" # windows-1250:C8 + "\N{LATIN CAPITAL LETTER C WITH CEDILLA}" # latin-1:C7 + "\N{LATIN CAPITAL LETTER D WITH CARON}" # windows-1250:CF + "\N{LATIN CAPITAL LETTER D WITH STROKE}" # windows-1250:D0 + "\N{LATIN CAPITAL LETTER E WITH ACUTE}" # latin-1:C9 + "\N{LATIN CAPITAL LETTER E WITH CARON}" # windows-1250:CC + "\N{LATIN CAPITAL LETTER E WITH CIRCUMFLEX}" # latin-1:CA + "\N{LATIN CAPITAL LETTER E WITH DIAERESIS}" # latin-1:CB + "\N{LATIN CAPITAL LETTER E WITH DOT ABOVE}" # windows-1257:CB + "\N{LATIN CAPITAL LETTER E WITH GRAVE}" # latin-1:C8 + "\N{LATIN CAPITAL LETTER E WITH MACRON}" # windows-1257:C7 + "\N{LATIN CAPITAL LETTER E WITH OGONEK}" # windows-1250:CA + "\N{LATIN CAPITAL LETTER ETH}" # latin-1:D0 + "\N{LATIN CAPITAL LETTER G WITH BREVE}" # windows-1254:D0 + "\N{LATIN CAPITAL LETTER G WITH CEDILLA}" # windows-1257:CC + "\N{LATIN CAPITAL LETTER I WITH ACUTE}" # latin-1:CD + "\N{LATIN CAPITAL LETTER I WITH CIRCUMFLEX}" # latin-1:CE + "\N{LATIN CAPITAL LETTER I WITH DIAERESIS}" # latin-1:CF + "\N{LATIN CAPITAL LETTER I WITH DOT ABOVE}" # windows-1254:DD + "\N{LATIN CAPITAL LETTER I WITH GRAVE}" # latin-1:CC + "\N{LATIN CAPITAL LETTER I WITH MACRON}" # windows-1257:CE + "\N{LATIN CAPITAL LETTER K WITH CEDILLA}" # windows-1257:CD + "\N{LATIN CAPITAL LETTER L WITH ACUTE}" # windows-1250:C5 + "\N{LATIN CAPITAL LETTER L WITH CEDILLA}" # windows-1257:CF + "\N{LATIN CAPITAL LETTER L WITH STROKE}" # windows-1257:D9 + "\N{LATIN CAPITAL LETTER N WITH ACUTE}" # windows-1250:D1 + "\N{LATIN CAPITAL LETTER N WITH CARON}" # windows-1250:D2 + "\N{LATIN CAPITAL LETTER N WITH CEDILLA}" # windows-1257:D2 + "\N{LATIN CAPITAL LETTER N WITH TILDE}" # latin-1:D1 + "\N{LATIN CAPITAL LETTER O WITH ACUTE}" # latin-1:D3 + "\N{LATIN CAPITAL LETTER O WITH CIRCUMFLEX}" # latin-1:D4 + "\N{LATIN CAPITAL LETTER O WITH DIAERESIS}" # latin-1:D6 + "\N{LATIN CAPITAL LETTER O WITH DOUBLE ACUTE}" # windows-1250:D5 + "\N{LATIN CAPITAL LETTER O WITH GRAVE}" # latin-1:D2 + "\N{LATIN CAPITAL LETTER O WITH MACRON}" # windows-1257:D4 + "\N{LATIN CAPITAL LETTER O WITH STROKE}" # latin-1:D8 + "\N{LATIN CAPITAL LETTER O WITH TILDE}" # latin-1:D5 + "\N{LATIN CAPITAL LETTER R WITH CARON}" # windows-1250:D8 + "\N{LATIN CAPITAL LETTER S WITH ACUTE}" # windows-1257:DA + "\N{LATIN CAPITAL LETTER S WITH CARON}" # windows-1257:D0 + "\N{LATIN CAPITAL LETTER S WITH CEDILLA}" # windows-1254:DE + "\N{LATIN CAPITAL LETTER T WITH CEDILLA}" # windows-1250:DE + "\N{LATIN CAPITAL LETTER THORN}" # latin-1:DE + "\N{LATIN CAPITAL LETTER U WITH ACUTE}" # latin-1:DA + "\N{LATIN CAPITAL LETTER U WITH CIRCUMFLEX}" # latin-1:DB + "\N{LATIN CAPITAL LETTER U WITH DIAERESIS}" # latin-1:DC + "\N{LATIN CAPITAL LETTER U WITH DOUBLE ACUTE}" # windows-1250:DB + "\N{LATIN CAPITAL LETTER U WITH GRAVE}" # latin-1:D9 + "\N{LATIN CAPITAL LETTER U WITH MACRON}" # windows-1257:DB + "\N{LATIN CAPITAL LETTER U WITH OGONEK}" # windows-1257:D8 + "\N{LATIN CAPITAL LETTER U WITH RING ABOVE}" # windows-1250:D9 + "\N{LATIN CAPITAL LETTER Y WITH ACUTE}" # latin-1:DD + "\N{LATIN CAPITAL LETTER Z WITH ACUTE}" # windows-1257:CA + "\N{LATIN CAPITAL LETTER Z WITH CARON}" # windows-1257:DE + "\N{LATIN CAPITAL LETTER Z WITH DOT ABOVE}" # windows-1257:DD + "\N{LATIN SMALL LETTER SHARP S}" # latin-1:DF + "\N{MULTIPLICATION SIGN}" # latin-1:D7 + "\N{GREEK CAPITAL LETTER BETA}" # windows-1253:C2 + "\N{GREEK CAPITAL LETTER GAMMA}" # windows-1253:C3 + "\N{GREEK CAPITAL LETTER DELTA}" # windows-1253:C4 + "\N{GREEK CAPITAL LETTER EPSILON}" # windows-1253:C5 + "\N{GREEK CAPITAL LETTER ZETA}" # windows-1253:C6 + "\N{GREEK CAPITAL LETTER ETA}" # windows-1253:C7 + "\N{GREEK CAPITAL LETTER THETA}" # windows-1253:C8 + "\N{GREEK CAPITAL LETTER IOTA}" # windows-1253:C9 + "\N{GREEK CAPITAL LETTER KAPPA}" # windows-1253:CA + "\N{GREEK CAPITAL LETTER LAMDA}" # windows-1253:CB + "\N{GREEK CAPITAL LETTER MU}" # windows-1253:CC + "\N{GREEK CAPITAL LETTER NU}" # windows-1253:CD + "\N{GREEK CAPITAL LETTER XI}" # windows-1253:CE + "\N{GREEK CAPITAL LETTER OMICRON}" # windows-1253:CF + "\N{GREEK CAPITAL LETTER PI}" # windows-1253:D0 + "\N{GREEK CAPITAL LETTER RHO}" # windows-1253:D1 + "\N{GREEK CAPITAL LETTER SIGMA}" # windows-1253:D3 + "\N{GREEK CAPITAL LETTER TAU}" # windows-1253:D4 + "\N{GREEK CAPITAL LETTER UPSILON}" # windows-1253:D5 + "\N{GREEK CAPITAL LETTER PHI}" # windows-1253:D6 + "\N{GREEK CAPITAL LETTER CHI}" # windows-1253:D7 + "\N{GREEK CAPITAL LETTER PSI}" # windows-1253:D8 + "\N{GREEK CAPITAL LETTER OMEGA}" # windows-1253:D9 + "\N{GREEK CAPITAL LETTER IOTA WITH DIALYTIKA}" # windows-1253:DA + "\N{GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA}" # windows-1253:DB + "\N{GREEK SMALL LETTER ALPHA WITH TONOS}" # windows-1253:DC + "\N{GREEK SMALL LETTER EPSILON WITH TONOS}" # windows-1253:DD + "\N{GREEK SMALL LETTER ETA WITH TONOS}" # windows-1253:DE + "\N{GREEK SMALL LETTER IOTA WITH TONOS}" # windows-1253:DF + "\N{CYRILLIC CAPITAL LETTER VE}" # windows-1251:C2 + "\N{CYRILLIC CAPITAL LETTER GHE}" # windows-1251:C3 + "\N{CYRILLIC CAPITAL LETTER DE}" # windows-1251:C4 + "\N{CYRILLIC CAPITAL LETTER IE}" # windows-1251:C5 + "\N{CYRILLIC CAPITAL LETTER ZHE}" # windows-1251:C6 + "\N{CYRILLIC CAPITAL LETTER ZE}" # windows-1251:C7 + "\N{CYRILLIC CAPITAL LETTER I}" # windows-1251:C8 + "\N{CYRILLIC CAPITAL LETTER SHORT I}" # windows-1251:C9 + "\N{CYRILLIC CAPITAL LETTER KA}" # windows-1251:CA + "\N{CYRILLIC CAPITAL LETTER EL}" # windows-1251:CB + "\N{CYRILLIC CAPITAL LETTER EM}" # windows-1251:CC + "\N{CYRILLIC CAPITAL LETTER EN}" # windows-1251:CD + "\N{CYRILLIC CAPITAL LETTER O}" # windows-1251:CE + "\N{CYRILLIC CAPITAL LETTER PE}" # windows-1251:CF + "\N{CYRILLIC CAPITAL LETTER ER}" # windows-1251:D0 + "\N{CYRILLIC CAPITAL LETTER ES}" # windows-1251:D1 + "\N{CYRILLIC CAPITAL LETTER TE}" # windows-1251:D2 + "\N{CYRILLIC CAPITAL LETTER U}" # windows-1251:D3 + "\N{CYRILLIC CAPITAL LETTER EF}" # windows-1251:D4 + "\N{CYRILLIC CAPITAL LETTER HA}" # windows-1251:D5 + "\N{CYRILLIC CAPITAL LETTER TSE}" # windows-1251:D6 + "\N{CYRILLIC CAPITAL LETTER CHE}" # windows-1251:D7 + "\N{CYRILLIC CAPITAL LETTER SHA}" # windows-1251:D8 + "\N{CYRILLIC CAPITAL LETTER SHCHA}" # windows-1251:D9 + "\N{CYRILLIC CAPITAL LETTER HARD SIGN}" # windows-1251:DA + "\N{CYRILLIC CAPITAL LETTER YERU}" # windows-1251:DB + "\N{CYRILLIC CAPITAL LETTER SOFT SIGN}" # windows-1251:DC + "\N{CYRILLIC CAPITAL LETTER E}" # windows-1251:DD + "\N{CYRILLIC CAPITAL LETTER YU}" # windows-1251:DE + "\N{CYRILLIC CAPITAL LETTER YA}" # windows-1251:DF ), # Letters that decode to 0xE0 - 0xEF in a Latin-1-like encoding - "utf8_first_of_3": ("àáâãäåæçèéêëìíîïăćčďęěĺŕΰαβγδεζηθικλμνξοабвгдежзийклмноп"), + "utf8_first_of_3": ( + "\N{LATIN SMALL LETTER A WITH ACUTE}" # latin-1:E1 + "\N{LATIN SMALL LETTER A WITH BREVE}" # windows-1250:E3 + "\N{LATIN SMALL LETTER A WITH CIRCUMFLEX}" # latin-1:E2 + "\N{LATIN SMALL LETTER A WITH DIAERESIS}" # latin-1:E4 + "\N{LATIN SMALL LETTER A WITH GRAVE}" # latin-1:E0 + "\N{LATIN SMALL LETTER A WITH MACRON}" # windows-1257:E2 + "\N{LATIN SMALL LETTER A WITH OGONEK}" # windows-1257:E0 + "\N{LATIN SMALL LETTER A WITH RING ABOVE}" # latin-1:E5 + "\N{LATIN SMALL LETTER A WITH TILDE}" # latin-1:E3 + "\N{LATIN SMALL LETTER AE}" # latin-1:E6 + "\N{LATIN SMALL LETTER C WITH ACUTE}" # windows-1250:E6 + "\N{LATIN SMALL LETTER C WITH CARON}" # windows-1250:E8 + "\N{LATIN SMALL LETTER C WITH CEDILLA}" # latin-1:E7 + "\N{LATIN SMALL LETTER D WITH CARON}" # windows-1250:EF + "\N{LATIN SMALL LETTER E WITH ACUTE}" # latin-1:E9 + "\N{LATIN SMALL LETTER E WITH CARON}" # windows-1250:EC + "\N{LATIN SMALL LETTER E WITH CIRCUMFLEX}" # latin-1:EA + "\N{LATIN SMALL LETTER E WITH DIAERESIS}" # latin-1:EB + "\N{LATIN SMALL LETTER E WITH DOT ABOVE}" # windows-1257:EB + "\N{LATIN SMALL LETTER E WITH GRAVE}" # latin-1:E8 + "\N{LATIN SMALL LETTER E WITH MACRON}" # windows-1257:E7 + "\N{LATIN SMALL LETTER E WITH OGONEK}" # windows-1250:EA + "\N{LATIN SMALL LETTER E WITH OGONEK}" # windows-1250:EA + "\N{LATIN SMALL LETTER G WITH CEDILLA}" # windows-1257:EC + "\N{LATIN SMALL LETTER I WITH ACUTE}" # latin-1:ED + "\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}" # latin-1:EE + "\N{LATIN SMALL LETTER I WITH DIAERESIS}" # latin-1:EF + "\N{LATIN SMALL LETTER I WITH GRAVE}" # latin-1:EC + "\N{LATIN SMALL LETTER I WITH MACRON}" # windows-1257:EE + "\N{LATIN SMALL LETTER I WITH OGONEK}" # windows-1257:E1 + "\N{LATIN SMALL LETTER K WITH CEDILLA}" # windows-1257:ED + "\N{LATIN SMALL LETTER L WITH ACUTE}" # windows-1250:E5 + "\N{LATIN SMALL LETTER L WITH CEDILLA}" # windows-1257:EF + "\N{LATIN SMALL LETTER R WITH ACUTE}" # windows-1250:E0 + "\N{LATIN SMALL LETTER Z WITH ACUTE}" # windows-1257:EA + "\N{GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS}" # windows-1253:E0 + "\N{GREEK SMALL LETTER ALPHA}" # windows-1253:E1 + "\N{GREEK SMALL LETTER BETA}" # windows-1253:E2 + "\N{GREEK SMALL LETTER GAMMA}" # windows-1253:E3 + "\N{GREEK SMALL LETTER DELTA}" # windows-1253:E4 + "\N{GREEK SMALL LETTER EPSILON}" # windows-1253:E5 + "\N{GREEK SMALL LETTER ZETA}" # windows-1253:E6 + "\N{GREEK SMALL LETTER ETA}" # windows-1253:E7 + "\N{GREEK SMALL LETTER THETA}" # windows-1253:E8 + "\N{GREEK SMALL LETTER IOTA}" # windows-1253:E9 + "\N{GREEK SMALL LETTER KAPPA}" # windows-1253:EA + "\N{GREEK SMALL LETTER LAMDA}" # windows-1253:EB + "\N{GREEK SMALL LETTER MU}" # windows-1253:EC + "\N{GREEK SMALL LETTER NU}" # windows-1253:ED + "\N{GREEK SMALL LETTER XI}" # windows-1253:EE + "\N{GREEK SMALL LETTER OMICRON}" # windows-1253:EF + "\N{CYRILLIC SMALL LETTER A}" # windows-1251:E0 + "\N{CYRILLIC SMALL LETTER BE}" # windows-1251:E1 + "\N{CYRILLIC SMALL LETTER VE}" # windows-1251:E2 + "\N{CYRILLIC SMALL LETTER GHE}" # windows-1251:E3 + "\N{CYRILLIC SMALL LETTER DE}" # windows-1251:E4 + "\N{CYRILLIC SMALL LETTER IE}" # windows-1251:E5 + "\N{CYRILLIC SMALL LETTER ZHE}" # windows-1251:E6 + "\N{CYRILLIC SMALL LETTER ZE}" # windows-1251:E7 + "\N{CYRILLIC SMALL LETTER I}" # windows-1251:E8 + "\N{CYRILLIC SMALL LETTER SHORT I}" # windows-1251:E9 + "\N{CYRILLIC SMALL LETTER KA}" # windows-1251:EA + "\N{CYRILLIC SMALL LETTER EL}" # windows-1251:EB + "\N{CYRILLIC SMALL LETTER EM}" # windows-1251:EC + "\N{CYRILLIC SMALL LETTER EN}" # windows-1251:ED + "\N{CYRILLIC SMALL LETTER O}" # windows-1251:EE + "\N{CYRILLIC SMALL LETTER PE}" # windows-1251:EF + ), # Letters that decode to 0xF0 or 0xF3 in a Latin-1-like encoding. # (Other leading bytes correspond only to unassigned codepoints) - "utf8_first_of_4": ("ðóđğπσру"), + "utf8_first_of_4": ( + "\N{LATIN SMALL LETTER D WITH STROKE}" # windows-1250:F0 + "\N{LATIN SMALL LETTER ETH}" # latin-1:F0 + "\N{LATIN SMALL LETTER G WITH BREVE}" # windows-1254:F0 + "\N{LATIN SMALL LETTER O WITH ACUTE}" # latin-1:F3 + "\N{LATIN SMALL LETTER S WITH CARON}" # windows-1257:F0 + "\N{GREEK SMALL LETTER PI}" # windows-1253:F0 + "\N{GREEK SMALL LETTER SIGMA}" # windows-1253:F3 + "\N{CYRILLIC SMALL LETTER ER}" # windows-1251:F0 + "\N{CYRILLIC SMALL LETTER U}" # windows-1251:F3 + ), # Letters that decode to 0x80 - 0xBF in a Latin-1-like encoding, # including a space standing in for 0xA0 "utf8_continuation": ( "\x80-\xbf" - "ĄąĽľŁłŒœŚśŞşŠšŤťŸŹźŻżŽžƒˆˇ˘˛˜˝΄΅" - "ΆΈΉΊΌΎΏЁЂЃЄЅІЇЈЉЊЋЌЎЏёђѓєѕіїјљњћќўџҐґ" - "–—―‘’‚“”„†‡•…‰‹›€№™" - " " + "\N{SPACE}" # modification of latin-1:A0, NO-BREAK SPACE + "\N{LATIN CAPITAL LETTER A WITH OGONEK}" # windows-1250:A5 + "\N{LATIN CAPITAL LETTER AE}" # windows-1257:AF + "\N{LATIN CAPITAL LETTER L WITH CARON}" # windows-1250:BC + "\N{LATIN CAPITAL LETTER L WITH STROKE}" # windows-1250:A3 + "\N{LATIN CAPITAL LETTER O WITH STROKE}" # windows-1257:A8 + "\N{LATIN CAPITAL LETTER R WITH CEDILLA}" # windows-1257:AA + "\N{LATIN CAPITAL LETTER S WITH ACUTE}" # windows-1250:8C + "\N{LATIN CAPITAL LETTER S WITH CARON}" # windows-1252:8A + "\N{LATIN CAPITAL LETTER S WITH CEDILLA}" # windows-1250:AA + "\N{LATIN CAPITAL LETTER T WITH CARON}" # windows-1250:8D + "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}" # windows-1252:9F + "\N{LATIN CAPITAL LETTER Z WITH ACUTE}" # windows-1250:8F + "\N{LATIN CAPITAL LETTER Z WITH CARON}" # windows-1252:8E + "\N{LATIN CAPITAL LETTER Z WITH DOT ABOVE}" # windows-1250:AF + "\N{LATIN CAPITAL LIGATURE OE}" # windows-1252:8C + "\N{LATIN SMALL LETTER A WITH OGONEK}" # windows-1250:B9 + "\N{LATIN SMALL LETTER AE}" # windows-1257:BF + "\N{LATIN SMALL LETTER F WITH HOOK}" # windows-1252:83 + "\N{LATIN SMALL LETTER L WITH CARON}" # windows-1250:BE + "\N{LATIN SMALL LETTER L WITH STROKE}" # windows-1250:B3 + "\N{LATIN SMALL LETTER O WITH STROKE}" # windows-1257:B8 + "\N{LATIN SMALL LETTER R WITH CEDILLA}" # windows-1257:BA + "\N{LATIN SMALL LETTER S WITH ACUTE}" # windows-1250:9C + "\N{LATIN SMALL LETTER S WITH CARON}" # windows-1252:9A + "\N{LATIN SMALL LETTER S WITH CEDILLA}" # windows-1250:BA + "\N{LATIN SMALL LETTER T WITH CARON}" # windows-1250:9D + "\N{LATIN SMALL LETTER Z WITH ACUTE}" # windows-1250:9F + "\N{LATIN SMALL LETTER Z WITH CARON}" # windows-1252:9E + "\N{LATIN SMALL LETTER Z WITH DOT ABOVE}" # windows-1250:BF + "\N{LATIN SMALL LIGATURE OE}" # windows-1252:9C + "\N{MODIFIER LETTER CIRCUMFLEX ACCENT}" # windows-1252:88 + "\N{CARON}" # windows-1250:A1 + "\N{BREVE}" # windows-1250:A2 + "\N{OGONEK}" # windows-1250:B2 + "\N{SMALL TILDE}" # windows-1252:98 + "\N{DOUBLE ACUTE ACCENT}" # windows-1250:BD + "\N{GREEK TONOS}" # windows-1253:B4 + "\N{GREEK DIALYTIKA TONOS}" # windows-1253:A1 + "\N{GREEK CAPITAL LETTER ALPHA WITH TONOS}" # windows-1253:A2 + "\N{GREEK CAPITAL LETTER EPSILON WITH TONOS}" # windows-1253:B8 + "\N{GREEK CAPITAL LETTER ETA WITH TONOS}" # windows-1253:B9 + "\N{GREEK CAPITAL LETTER IOTA WITH TONOS}" # windows-1253:BA + "\N{GREEK CAPITAL LETTER OMICRON WITH TONOS}" # windows-1253:BC + "\N{GREEK CAPITAL LETTER UPSILON WITH TONOS}" # windows-1253:BE + "\N{GREEK CAPITAL LETTER OMEGA WITH TONOS}" # windows-1253:BF + "\N{CYRILLIC CAPITAL LETTER IO}" # windows-1251:A8 + "\N{CYRILLIC CAPITAL LETTER DJE}" # windows-1251:80 + "\N{CYRILLIC CAPITAL LETTER GJE}" # windows-1251:81 + "\N{CYRILLIC CAPITAL LETTER UKRAINIAN IE}" # windows-1251:AA + "\N{CYRILLIC CAPITAL LETTER DZE}" # windows-1251:BD + "\N{CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I}" # windows-1251:B2 + "\N{CYRILLIC CAPITAL LETTER YI}" # windows-1251:AF + "\N{CYRILLIC CAPITAL LETTER JE}" # windows-1251:A3 + "\N{CYRILLIC CAPITAL LETTER LJE}" # windows-1251:8A + "\N{CYRILLIC CAPITAL LETTER NJE}" # windows-1251:8C + "\N{CYRILLIC CAPITAL LETTER TSHE}" # windows-1251:8E + "\N{CYRILLIC CAPITAL LETTER KJE}" # windows-1251:8D + "\N{CYRILLIC CAPITAL LETTER SHORT U}" # windows-1251:A1 + "\N{CYRILLIC CAPITAL LETTER DZHE}" # windows-1251:8F + "\N{CYRILLIC SMALL LETTER IO}" # windows-1251:B8 + "\N{CYRILLIC SMALL LETTER DJE}" # windows-1251:90 + "\N{CYRILLIC SMALL LETTER GJE}" # windows-1251:83 + "\N{CYRILLIC SMALL LETTER UKRAINIAN IE}" # windows-1251:BA + "\N{CYRILLIC SMALL LETTER DZE}" # windows-1251:BE + "\N{CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I}" # windows-1251:B3 + "\N{CYRILLIC SMALL LETTER YI}" # windows-1251:BF + "\N{CYRILLIC SMALL LETTER JE}" # windows-1251:BC + "\N{CYRILLIC SMALL LETTER LJE}" # windows-1251:9A + "\N{CYRILLIC SMALL LETTER NJE}" # windows-1251:9C + "\N{CYRILLIC SMALL LETTER TSHE}" # windows-1251:9E + "\N{CYRILLIC SMALL LETTER KJE}" # windows-1251:9D + "\N{CYRILLIC SMALL LETTER SHORT U}" # windows-1251:A2 + "\N{CYRILLIC SMALL LETTER DZHE}" # windows-1251:9F + "\N{CYRILLIC CAPITAL LETTER GHE WITH UPTURN}" # windows-1251:A5 + "\N{CYRILLIC SMALL LETTER GHE WITH UPTURN}" # windows-1251:B4 + "\N{EN DASH}" # windows-1252:96 + "\N{EM DASH}" # windows-1252:97 + "\N{HORIZONTAL BAR}" # windows-1253:AF + "\N{LEFT SINGLE QUOTATION MARK}" # windows-1252:91 + "\N{RIGHT SINGLE QUOTATION MARK}" # windows-1252:92 + "\N{SINGLE LOW-9 QUOTATION MARK}" # windows-1252:82 + "\N{LEFT DOUBLE QUOTATION MARK}" # windows-1252:93 + "\N{RIGHT DOUBLE QUOTATION MARK}" # windows-1252:94 + "\N{DOUBLE LOW-9 QUOTATION MARK}" # windows-1252:84 + "\N{DAGGER}" # windows-1252:86 + "\N{DOUBLE DAGGER}" # windows-1252:87 + "\N{BULLET}" # windows-1252:95 + "\N{HORIZONTAL ELLIPSIS}" # windows-1252:85 + "\N{PER MILLE SIGN}" # windows-1252:89 + "\N{SINGLE LEFT-POINTING ANGLE QUOTATION MARK}" # windows-1252:8B + "\N{SINGLE RIGHT-POINTING ANGLE QUOTATION MARK}" # windows-1252:9B + "\N{EURO SIGN}" # windows-1252:80 + "\N{NUMERO SIGN}" # windows-1251:B9 + "\N{TRADE MARK SIGN}" # windows-1252:99 ), # Letters that decode to 0x80 - 0xBF in a Latin-1-like encoding, # and don't usually stand for themselves when adjacent to mojibake. - # This excludes spaces, dashes, quotation marks, and ellipses. + # This excludes spaces, dashes, 'bullet', quotation marks, and ellipses. "utf8_continuation_strict": ( "\x80-\xbf" - "ĄąĽľŁłŒœŚśŞşŠšŤťŸŹźŻżŽžƒˆˇ˘˛˜˝΄΅" - "ΆΈΉΊΌΎΏЁЂЃЄЅІЇЈЉЊЋЌЎЏёђѓєѕіїјљњћќўџҐґ" - "†‡•‰‹›€№™" + "\N{LATIN CAPITAL LETTER A WITH OGONEK}" # windows-1250:A5 + "\N{LATIN CAPITAL LETTER AE}" # windows-1257:AF + "\N{LATIN CAPITAL LETTER L WITH CARON}" # windows-1250:BC + "\N{LATIN CAPITAL LETTER L WITH STROKE}" # windows-1250:A3 + "\N{LATIN CAPITAL LETTER O WITH STROKE}" # windows-1257:A8 + "\N{LATIN CAPITAL LETTER R WITH CEDILLA}" # windows-1257:AA + "\N{LATIN CAPITAL LETTER S WITH ACUTE}" # windows-1250:8C + "\N{LATIN CAPITAL LETTER S WITH CARON}" # windows-1252:8A + "\N{LATIN CAPITAL LETTER S WITH CEDILLA}" # windows-1250:AA + "\N{LATIN CAPITAL LETTER T WITH CARON}" # windows-1250:8D + "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}" # windows-1252:9F + "\N{LATIN CAPITAL LETTER Z WITH ACUTE}" # windows-1250:8F + "\N{LATIN CAPITAL LETTER Z WITH CARON}" # windows-1252:8E + "\N{LATIN CAPITAL LETTER Z WITH DOT ABOVE}" # windows-1250:AF + "\N{LATIN CAPITAL LIGATURE OE}" # windows-1252:8C + "\N{LATIN SMALL LETTER A WITH OGONEK}" # windows-1250:B9 + "\N{LATIN SMALL LETTER AE}" # windows-1257:BF + "\N{LATIN SMALL LETTER F WITH HOOK}" # windows-1252:83 + "\N{LATIN SMALL LETTER L WITH CARON}" # windows-1250:BE + "\N{LATIN SMALL LETTER L WITH STROKE}" # windows-1250:B3 + "\N{LATIN SMALL LETTER O WITH STROKE}" # windows-1257:B8 + "\N{LATIN SMALL LETTER R WITH CEDILLA}" # windows-1257:BA + "\N{LATIN SMALL LETTER S WITH ACUTE}" # windows-1250:9C + "\N{LATIN SMALL LETTER S WITH CARON}" # windows-1252:9A + "\N{LATIN SMALL LETTER S WITH CEDILLA}" # windows-1250:BA + "\N{LATIN SMALL LETTER T WITH CARON}" # windows-1250:9D + "\N{LATIN SMALL LETTER Z WITH ACUTE}" # windows-1250:9F + "\N{LATIN SMALL LETTER Z WITH CARON}" # windows-1252:9E + "\N{LATIN SMALL LETTER Z WITH DOT ABOVE}" # windows-1250:BF + "\N{LATIN SMALL LIGATURE OE}" # windows-1252:9C + "\N{MODIFIER LETTER CIRCUMFLEX ACCENT}" # windows-1252:88 + "\N{CARON}" # windows-1250:A1 + "\N{BREVE}" # windows-1250:A2 + "\N{OGONEK}" # windows-1250:B2 + "\N{SMALL TILDE}" # windows-1252:98 + "\N{DOUBLE ACUTE ACCENT}" # windows-1250:BD + "\N{GREEK TONOS}" # windows-1253:B4 + "\N{GREEK DIALYTIKA TONOS}" # windows-1253:A1 + "\N{GREEK CAPITAL LETTER ALPHA WITH TONOS}" # windows-1253:A2 + "\N{GREEK CAPITAL LETTER EPSILON WITH TONOS}" # windows-1253:B8 + "\N{GREEK CAPITAL LETTER ETA WITH TONOS}" # windows-1253:B9 + "\N{GREEK CAPITAL LETTER IOTA WITH TONOS}" # windows-1253:BA + "\N{GREEK CAPITAL LETTER OMICRON WITH TONOS}" # windows-1253:BC + "\N{GREEK CAPITAL LETTER UPSILON WITH TONOS}" # windows-1253:BE + "\N{GREEK CAPITAL LETTER OMEGA WITH TONOS}" # windows-1253:BF + "\N{CYRILLIC CAPITAL LETTER IO}" # windows-1251:A8 + "\N{CYRILLIC CAPITAL LETTER DJE}" # windows-1251:80 + "\N{CYRILLIC CAPITAL LETTER GJE}" # windows-1251:81 + "\N{CYRILLIC CAPITAL LETTER UKRAINIAN IE}" # windows-1251:AA + "\N{CYRILLIC CAPITAL LETTER DZE}" # windows-1251:BD + "\N{CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I}" # windows-1251:B2 + "\N{CYRILLIC CAPITAL LETTER YI}" # windows-1251:AF + "\N{CYRILLIC CAPITAL LETTER JE}" # windows-1251:A3 + "\N{CYRILLIC CAPITAL LETTER LJE}" # windows-1251:8A + "\N{CYRILLIC CAPITAL LETTER NJE}" # windows-1251:8C + "\N{CYRILLIC CAPITAL LETTER TSHE}" # windows-1251:8E + "\N{CYRILLIC CAPITAL LETTER KJE}" # windows-1251:8D + "\N{CYRILLIC CAPITAL LETTER SHORT U}" # windows-1251:A1 + "\N{CYRILLIC CAPITAL LETTER DZHE}" # windows-1251:8F + "\N{CYRILLIC SMALL LETTER IO}" # windows-1251:B8 + "\N{CYRILLIC SMALL LETTER DJE}" # windows-1251:90 + "\N{CYRILLIC SMALL LETTER GJE}" # windows-1251:83 + "\N{CYRILLIC SMALL LETTER UKRAINIAN IE}" # windows-1251:BA + "\N{CYRILLIC SMALL LETTER DZE}" # windows-1251:BE + "\N{CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I}" # windows-1251:B3 + "\N{CYRILLIC SMALL LETTER YI}" # windows-1251:BF + "\N{CYRILLIC SMALL LETTER JE}" # windows-1251:BC + "\N{CYRILLIC SMALL LETTER LJE}" # windows-1251:9A + "\N{CYRILLIC SMALL LETTER NJE}" # windows-1251:9C + "\N{CYRILLIC SMALL LETTER TSHE}" # windows-1251:9E + "\N{CYRILLIC SMALL LETTER KJE}" # windows-1251:9D + "\N{CYRILLIC SMALL LETTER SHORT U}" # windows-1251:A2 + "\N{CYRILLIC SMALL LETTER DZHE}" # windows-1251:9F + "\N{CYRILLIC CAPITAL LETTER GHE WITH UPTURN}" # windows-1251:A5 + "\N{CYRILLIC SMALL LETTER GHE WITH UPTURN}" # windows-1251:B4 + "\N{DAGGER}" # windows-1252:86 + "\N{DOUBLE DAGGER}" # windows-1252:87 + "\N{PER MILLE SIGN}" # windows-1252:89 + "\N{SINGLE LEFT-POINTING ANGLE QUOTATION MARK}" # windows-1252:8B + "\N{SINGLE RIGHT-POINTING ANGLE QUOTATION MARK}" # windows-1252:9B + "\N{EURO SIGN}" # windows-1252:80 + "\N{NUMERO SIGN}" # windows-1251:B9 + "\N{TRADE MARK SIGN}" # windows-1252:99 ), } @@ -310,8 +686,6 @@ def _build_width_map() -> Dict[int, str]: | [{utf8_first_of_4}] [{utf8_continuation}]{{3}} )+ -""".format( - **UTF8_CLUES - ), + """.format(**UTF8_CLUES), re.VERBOSE, ) diff --git a/ftfy/cli.py b/ftfy/cli.py index dfb2e935..16f32967 100644 --- a/ftfy/cli.py +++ b/ftfy/cli.py @@ -4,6 +4,7 @@ import os import sys +from pathlib import Path from typing import Union from ftfy import TextFixerConfig, __version__, fix_file @@ -48,28 +49,26 @@ def main() -> None: import argparse parser = argparse.ArgumentParser( - description="ftfy (fixes text for you), version %s" % __version__ + description=f"ftfy (fixes text for you), version {__version__}" ) parser.add_argument( "filename", default="-", nargs="?", - help="The file whose Unicode is to be fixed. Defaults " - "to -, meaning standard input.", + help="The file whose Unicode is to be fixed. Defaults to -, meaning standard input.", ) parser.add_argument( "-o", "--output", type=str, default="-", - help="The file to output to. Defaults to -, meaning " "standard output.", + help="The file to output to. Defaults to -, meaning standard output.", ) parser.add_argument( "-g", "--guess", action="store_true", - help="Ask ftfy to guess the encoding of your input. " - "This is risky. Overrides -e.", + help="Ask ftfy to guess the encoding of your input. This is risky. Overrides -e.", ) parser.add_argument( "-e", @@ -83,15 +82,13 @@ def main() -> None: "--normalization", type=str, default="NFC", - help="The normalization of Unicode to apply. " - 'Defaults to NFC. Can be "none".', + help='The normalization of Unicode to apply. Defaults to NFC. Can be "none".', ) parser.add_argument( "--preserve-entities", action="store_true", help="Leave HTML entities as they are. The default " - "is to decode them, as long as no HTML tags " - "have appeared in the file.", + "is to decode them, as long as no HTML tags have appeared in the file.", ) args = parser.parse_args() @@ -105,7 +102,7 @@ def main() -> None: # whatever encoding is necessary. file = sys.stdin.buffer else: - file = open(args.filename, "rb") + file = Path(args.filename).open("rb") if args.output == "-": outfile = sys.stdout @@ -113,17 +110,14 @@ def main() -> None: if os.path.realpath(args.output) == os.path.realpath(args.filename): sys.stderr.write(SAME_FILE_ERROR_TEXT) sys.exit(1) - outfile = open(args.output, "w", encoding="utf-8") + outfile = Path(args.output).open("w", encoding="utf-8") normalization = args.normalization if normalization.lower() == "none": normalization = None unescape_html: Union[str, bool] - if args.preserve_entities: - unescape_html = False - else: - unescape_html = "auto" + unescape_html = False if args.preserve_entities else "auto" config = TextFixerConfig(unescape_html=unescape_html, normalization=normalization) diff --git a/ftfy/fixes.py b/ftfy/fixes.py index a248bc83..41d3c2f8 100644 --- a/ftfy/fixes.py +++ b/ftfy/fixes.py @@ -14,7 +14,8 @@ import html import re import warnings -from typing import Any, List, Match, Tuple +from re import Match +from typing import Any import ftfy from ftfy.badness import is_bad @@ -57,7 +58,7 @@ def fix_encoding(text: str) -> str: return ftfy.fix_encoding(text) -def apply_plan(text: str, plan: List[Tuple[str, str]]) -> str: +def apply_plan(text: str, plan: list[tuple[str, str]]) -> str: """ Deprecated copy of `ftfy.apply_plan()`. """ @@ -474,7 +475,7 @@ def replace_lossy_sequences(byts: bytes) -> bytes: This is used as a transcoder within `fix_encoding`. """ - return LOSSY_UTF8_RE.sub("\ufffd".encode("utf-8"), byts) + return LOSSY_UTF8_RE.sub("\ufffd".encode(), byts) def decode_inconsistent_utf8(text: str) -> str: diff --git a/ftfy/formatting.py b/ftfy/formatting.py index 18df64b0..42955588 100644 --- a/ftfy/formatting.py +++ b/ftfy/formatting.py @@ -99,7 +99,8 @@ def display_ljust(text: str, width: int, fillchar: str = " ") -> str: correct if you're viewing this code or documentation in a Web browser. """ if character_width(fillchar) != 1: - raise ValueError("The padding character must have display width 1") + msg = "The padding character must have display width 1" + raise ValueError(msg) text_width = monospaced_width(text) if text_width == -1: @@ -129,7 +130,8 @@ def display_rjust(text: str, width: int, fillchar: str = " ") -> str: ▒▒▒▒▒▒▒▒ちゃぶ台返し """ if character_width(fillchar) != 1: - raise ValueError("The padding character must have display width 1") + msg = "The padding character must have display width 1" + raise ValueError(msg) text_width = monospaced_width(text) if text_width == -1: @@ -154,7 +156,8 @@ def display_center(text: str, width: int, fillchar: str = " ") -> str: ▒▒▒▒ちゃぶ台返し▒▒▒▒ """ if character_width(fillchar) != 1: - raise ValueError("The padding character must have display width 1") + msg = "The padding character must have display width 1" + raise ValueError(msg) text_width = monospaced_width(text) if text_width == -1: diff --git a/mypy.ini b/mypy.ini index 43839a28..278ee780 100644 --- a/mypy.ini +++ b/mypy.ini @@ -14,7 +14,7 @@ warn_redundant_casts = True warn_return_any = True warn_unused_configs = True warn_unused_ignores = True -python_version = 3.8 +python_version = 3.9 [mypy-wcwidth] ignore_missing_imports = True diff --git a/notes/mysteries.txt b/notes/mysteries.txt new file mode 100644 index 00000000..23e4a9ed --- /dev/null +++ b/notes/mysteries.txt @@ -0,0 +1,10 @@ +on https://www.nipette.com/article-6358031.html, a comment is signed 'MÃ\x83©Ã\x82¬Ã\x82¡nie'. +This happens to be triple-UTF-8 for 'M鬡nie', but that's probably not the name they meant. + +What exactly did https://www.horoskopy-horoskop.cz/clanek/431-numerologicky-vyznam-jmena-jaromir +mean when they said 'TadeÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂáÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂá' ? + +https://mtlurb.com/tags/arbres/ +'montrã©al' probably isn't in cp850, but what is it? + + diff --git a/poetry.lock b/poetry.lock deleted file mode 100644 index 800fc215..00000000 --- a/poetry.lock +++ /dev/null @@ -1,763 +0,0 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. - -[[package]] -name = "alabaster" -version = "0.7.13" -description = "A configurable sidebar-enabled Sphinx theme" -optional = false -python-versions = ">=3.6" -files = [ - {file = "alabaster-0.7.13-py3-none-any.whl", hash = "sha256:1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3"}, - {file = "alabaster-0.7.13.tar.gz", hash = "sha256:a27a4a084d5e690e16e01e03ad2b2e552c61a65469419b907243193de1a84ae2"}, -] - -[[package]] -name = "babel" -version = "2.15.0" -description = "Internationalization utilities" -optional = false -python-versions = ">=3.8" -files = [ - {file = "Babel-2.15.0-py3-none-any.whl", hash = "sha256:08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb"}, - {file = "babel-2.15.0.tar.gz", hash = "sha256:8daf0e265d05768bc6c7a314cf1321e9a123afc328cc635c18622a2f30a04413"}, -] - -[package.dependencies] -pytz = {version = ">=2015.7", markers = "python_version < \"3.9\""} - -[package.extras] -dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] - -[[package]] -name = "beautifulsoup4" -version = "4.12.3" -description = "Screen-scraping library" -optional = false -python-versions = ">=3.6.0" -files = [ - {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, - {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, -] - -[package.dependencies] -soupsieve = ">1.2" - -[package.extras] -cchardet = ["cchardet"] -chardet = ["chardet"] -charset-normalizer = ["charset-normalizer"] -html5lib = ["html5lib"] -lxml = ["lxml"] - -[[package]] -name = "certifi" -version = "2024.7.4" -description = "Python package for providing Mozilla's CA Bundle." -optional = false -python-versions = ">=3.6" -files = [ - {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"}, - {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"}, -] - -[[package]] -name = "charset-normalizer" -version = "3.3.2" -description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -optional = false -python-versions = ">=3.7.0" -files = [ - {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, - {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, -] - -[[package]] -name = "colorama" -version = "0.4.6" -description = "Cross-platform colored terminal text." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -files = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] - -[[package]] -name = "docutils" -version = "0.20.1" -description = "Docutils -- Python Documentation Utilities" -optional = false -python-versions = ">=3.7" -files = [ - {file = "docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6"}, - {file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"}, -] - -[[package]] -name = "exceptiongroup" -version = "1.2.2" -description = "Backport of PEP 654 (exception groups)" -optional = false -python-versions = ">=3.7" -files = [ - {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, - {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, -] - -[package.extras] -test = ["pytest (>=6)"] - -[[package]] -name = "furo" -version = "2024.7.18" -description = "A clean customisable Sphinx documentation theme." -optional = false -python-versions = ">=3.8" -files = [ - {file = "furo-2024.7.18-py3-none-any.whl", hash = "sha256:b192c7c1f59805494c8ed606d9375fdac6e6ba8178e747e72bc116745fb7e13f"}, - {file = "furo-2024.7.18.tar.gz", hash = "sha256:37b08c5fccc95d46d8712c8be97acd46043963895edde05b0f4f135d58325c83"}, -] - -[package.dependencies] -beautifulsoup4 = "*" -pygments = ">=2.7" -sphinx = ">=6.0,<8.0" -sphinx-basic-ng = ">=1.0.0.beta2" - -[[package]] -name = "idna" -version = "3.7" -description = "Internationalized Domain Names in Applications (IDNA)" -optional = false -python-versions = ">=3.5" -files = [ - {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, - {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, -] - -[[package]] -name = "imagesize" -version = "1.4.1" -description = "Getting image size from png/jpeg/jpeg2000/gif file" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"}, - {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"}, -] - -[[package]] -name = "importlib-metadata" -version = "8.2.0" -description = "Read metadata from Python packages" -optional = false -python-versions = ">=3.8" -files = [ - {file = "importlib_metadata-8.2.0-py3-none-any.whl", hash = "sha256:11901fa0c2f97919b288679932bb64febaeacf289d18ac84dd68cb2e74213369"}, - {file = "importlib_metadata-8.2.0.tar.gz", hash = "sha256:72e8d4399996132204f9a16dcc751af254a48f8d1b20b9ff0f98d4a8f901e73d"}, -] - -[package.dependencies] -zipp = ">=0.5" - -[package.extras] -doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -perf = ["ipython"] -test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] - -[[package]] -name = "iniconfig" -version = "2.0.0" -description = "brain-dead simple config-ini parsing" -optional = false -python-versions = ">=3.7" -files = [ - {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, - {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, -] - -[[package]] -name = "jinja2" -version = "3.1.4" -description = "A very fast and expressive template engine." -optional = false -python-versions = ">=3.7" -files = [ - {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, - {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, -] - -[package.dependencies] -MarkupSafe = ">=2.0" - -[package.extras] -i18n = ["Babel (>=2.7)"] - -[[package]] -name = "markupsafe" -version = "2.1.5" -description = "Safely add untrusted strings to HTML/XML markup." -optional = false -python-versions = ">=3.7" -files = [ - {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"}, - {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, -] - -[[package]] -name = "mypy" -version = "1.11.1" -description = "Optional static typing for Python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "mypy-1.11.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a32fc80b63de4b5b3e65f4be82b4cfa362a46702672aa6a0f443b4689af7008c"}, - {file = "mypy-1.11.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c1952f5ea8a5a959b05ed5f16452fddadbaae48b5d39235ab4c3fc444d5fd411"}, - {file = "mypy-1.11.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1e30dc3bfa4e157e53c1d17a0dad20f89dc433393e7702b813c10e200843b03"}, - {file = "mypy-1.11.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2c63350af88f43a66d3dfeeeb8d77af34a4f07d760b9eb3a8697f0386c7590b4"}, - {file = "mypy-1.11.1-cp310-cp310-win_amd64.whl", hash = "sha256:a831671bad47186603872a3abc19634f3011d7f83b083762c942442d51c58d58"}, - {file = "mypy-1.11.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7b6343d338390bb946d449677726edf60102a1c96079b4f002dedff375953fc5"}, - {file = "mypy-1.11.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4fe9f4e5e521b458d8feb52547f4bade7ef8c93238dfb5bbc790d9ff2d770ca"}, - {file = "mypy-1.11.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:886c9dbecc87b9516eff294541bf7f3655722bf22bb898ee06985cd7269898de"}, - {file = "mypy-1.11.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fca4a60e1dd9fd0193ae0067eaeeb962f2d79e0d9f0f66223a0682f26ffcc809"}, - {file = "mypy-1.11.1-cp311-cp311-win_amd64.whl", hash = "sha256:0bd53faf56de9643336aeea1c925012837432b5faf1701ccca7fde70166ccf72"}, - {file = "mypy-1.11.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f39918a50f74dc5969807dcfaecafa804fa7f90c9d60506835036cc1bc891dc8"}, - {file = "mypy-1.11.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0bc71d1fb27a428139dd78621953effe0d208aed9857cb08d002280b0422003a"}, - {file = "mypy-1.11.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b868d3bcff720dd7217c383474008ddabaf048fad8d78ed948bb4b624870a417"}, - {file = "mypy-1.11.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a707ec1527ffcdd1c784d0924bf5cb15cd7f22683b919668a04d2b9c34549d2e"}, - {file = "mypy-1.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:64f4a90e3ea07f590c5bcf9029035cf0efeae5ba8be511a8caada1a4893f5525"}, - {file = "mypy-1.11.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:749fd3213916f1751fff995fccf20c6195cae941dc968f3aaadf9bb4e430e5a2"}, - {file = "mypy-1.11.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b639dce63a0b19085213ec5fdd8cffd1d81988f47a2dec7100e93564f3e8fb3b"}, - {file = "mypy-1.11.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c956b49c5d865394d62941b109728c5c596a415e9c5b2be663dd26a1ff07bc0"}, - {file = "mypy-1.11.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45df906e8b6804ef4b666af29a87ad9f5921aad091c79cc38e12198e220beabd"}, - {file = "mypy-1.11.1-cp38-cp38-win_amd64.whl", hash = "sha256:d44be7551689d9d47b7abc27c71257adfdb53f03880841a5db15ddb22dc63edb"}, - {file = "mypy-1.11.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2684d3f693073ab89d76da8e3921883019ea8a3ec20fa5d8ecca6a2db4c54bbe"}, - {file = "mypy-1.11.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:79c07eb282cb457473add5052b63925e5cc97dfab9812ee65a7c7ab5e3cb551c"}, - {file = "mypy-1.11.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11965c2f571ded6239977b14deebd3f4c3abd9a92398712d6da3a772974fad69"}, - {file = "mypy-1.11.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a2b43895a0f8154df6519706d9bca8280cda52d3d9d1514b2d9c3e26792a0b74"}, - {file = "mypy-1.11.1-cp39-cp39-win_amd64.whl", hash = "sha256:1a81cf05975fd61aec5ae16501a091cfb9f605dc3e3c878c0da32f250b74760b"}, - {file = "mypy-1.11.1-py3-none-any.whl", hash = "sha256:0624bdb940255d2dd24e829d99a13cfeb72e4e9031f9492148f410ed30bcab54"}, - {file = "mypy-1.11.1.tar.gz", hash = "sha256:f404a0b069709f18bbdb702eb3dcfe51910602995de00bd39cea3050b5772d08"}, -] - -[package.dependencies] -mypy-extensions = ">=1.0.0" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = ">=4.6.0" - -[package.extras] -dmypy = ["psutil (>=4.0)"] -install-types = ["pip"] -mypyc = ["setuptools (>=50)"] -reports = ["lxml"] - -[[package]] -name = "mypy-extensions" -version = "1.0.0" -description = "Type system extensions for programs checked with the mypy type checker." -optional = false -python-versions = ">=3.5" -files = [ - {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, - {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, -] - -[[package]] -name = "packaging" -version = "24.1" -description = "Core utilities for Python packages" -optional = false -python-versions = ">=3.8" -files = [ - {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, - {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, -] - -[[package]] -name = "pluggy" -version = "1.5.0" -description = "plugin and hook calling mechanisms for python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, - {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, -] - -[package.extras] -dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] - -[[package]] -name = "pygments" -version = "2.18.0" -description = "Pygments is a syntax highlighting package written in Python." -optional = false -python-versions = ">=3.8" -files = [ - {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"}, - {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"}, -] - -[package.extras] -windows-terminal = ["colorama (>=0.4.6)"] - -[[package]] -name = "pytest" -version = "8.3.2" -description = "pytest: simple powerful testing with Python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pytest-8.3.2-py3-none-any.whl", hash = "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5"}, - {file = "pytest-8.3.2.tar.gz", hash = "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "sys_platform == \"win32\""} -exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} -iniconfig = "*" -packaging = "*" -pluggy = ">=1.5,<2" -tomli = {version = ">=1", markers = "python_version < \"3.11\""} - -[package.extras] -dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] - -[[package]] -name = "pytz" -version = "2024.1" -description = "World timezone definitions, modern and historical" -optional = false -python-versions = "*" -files = [ - {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, - {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, -] - -[[package]] -name = "requests" -version = "2.32.3" -description = "Python HTTP for Humans." -optional = false -python-versions = ">=3.8" -files = [ - {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, - {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, -] - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = ">=2,<4" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<3" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] - -[[package]] -name = "ruff" -version = "0.5.6" -description = "An extremely fast Python linter and code formatter, written in Rust." -optional = false -python-versions = ">=3.7" -files = [ - {file = "ruff-0.5.6-py3-none-linux_armv6l.whl", hash = "sha256:a0ef5930799a05522985b9cec8290b185952f3fcd86c1772c3bdbd732667fdcd"}, - {file = "ruff-0.5.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b652dc14f6ef5d1552821e006f747802cc32d98d5509349e168f6bf0ee9f8f42"}, - {file = "ruff-0.5.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:80521b88d26a45e871f31e4b88938fd87db7011bb961d8afd2664982dfc3641a"}, - {file = "ruff-0.5.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9bc8f328a9f1309ae80e4d392836e7dbc77303b38ed4a7112699e63d3b066ab"}, - {file = "ruff-0.5.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4d394940f61f7720ad371ddedf14722ee1d6250fd8d020f5ea5a86e7be217daf"}, - {file = "ruff-0.5.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:111a99cdb02f69ddb2571e2756e017a1496c2c3a2aeefe7b988ddab38b416d36"}, - {file = "ruff-0.5.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:e395daba77a79f6dc0d07311f94cc0560375ca20c06f354c7c99af3bf4560c5d"}, - {file = "ruff-0.5.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c476acb43c3c51e3c614a2e878ee1589655fa02dab19fe2db0423a06d6a5b1b6"}, - {file = "ruff-0.5.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e2ff8003f5252fd68425fd53d27c1f08b201d7ed714bb31a55c9ac1d4c13e2eb"}, - {file = "ruff-0.5.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c94e084ba3eaa80c2172918c2ca2eb2230c3f15925f4ed8b6297260c6ef179ad"}, - {file = "ruff-0.5.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:1f77c1c3aa0669fb230b06fb24ffa3e879391a3ba3f15e3d633a752da5a3e670"}, - {file = "ruff-0.5.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f908148c93c02873210a52cad75a6eda856b2cbb72250370ce3afef6fb99b1ed"}, - {file = "ruff-0.5.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:563a7ae61ad284187d3071d9041c08019975693ff655438d8d4be26e492760bd"}, - {file = "ruff-0.5.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:94fe60869bfbf0521e04fd62b74cbca21cbc5beb67cbb75ab33fe8c174f54414"}, - {file = "ruff-0.5.6-py3-none-win32.whl", hash = "sha256:e6a584c1de6f8591c2570e171cc7ce482bb983d49c70ddf014393cd39e9dfaed"}, - {file = "ruff-0.5.6-py3-none-win_amd64.whl", hash = "sha256:d7fe7dccb1a89dc66785d7aa0ac283b2269712d8ed19c63af908fdccca5ccc1a"}, - {file = "ruff-0.5.6-py3-none-win_arm64.whl", hash = "sha256:57c6c0dd997b31b536bff49b9eee5ed3194d60605a4427f735eeb1f9c1b8d264"}, - {file = "ruff-0.5.6.tar.gz", hash = "sha256:07c9e3c2a8e1fe377dd460371c3462671a728c981c3205a5217291422209f642"}, -] - -[[package]] -name = "snowballstemmer" -version = "2.2.0" -description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." -optional = false -python-versions = "*" -files = [ - {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"}, - {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, -] - -[[package]] -name = "soupsieve" -version = "2.5" -description = "A modern CSS selector implementation for Beautiful Soup." -optional = false -python-versions = ">=3.8" -files = [ - {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, - {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, -] - -[[package]] -name = "sphinx" -version = "7.1.2" -description = "Python documentation generator" -optional = false -python-versions = ">=3.8" -files = [ - {file = "sphinx-7.1.2-py3-none-any.whl", hash = "sha256:d170a81825b2fcacb6dfd5a0d7f578a053e45d3f2b153fecc948c37344eb4cbe"}, - {file = "sphinx-7.1.2.tar.gz", hash = "sha256:780f4d32f1d7d1126576e0e5ecc19dc32ab76cd24e950228dcf7b1f6d3d9e22f"}, -] - -[package.dependencies] -alabaster = ">=0.7,<0.8" -babel = ">=2.9" -colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} -docutils = ">=0.18.1,<0.21" -imagesize = ">=1.3" -importlib-metadata = {version = ">=4.8", markers = "python_version < \"3.10\""} -Jinja2 = ">=3.0" -packaging = ">=21.0" -Pygments = ">=2.13" -requests = ">=2.25.0" -snowballstemmer = ">=2.0" -sphinxcontrib-applehelp = "*" -sphinxcontrib-devhelp = "*" -sphinxcontrib-htmlhelp = ">=2.0.0" -sphinxcontrib-jsmath = "*" -sphinxcontrib-qthelp = "*" -sphinxcontrib-serializinghtml = ">=1.1.5" - -[package.extras] -docs = ["sphinxcontrib-websupport"] -lint = ["docutils-stubs", "flake8 (>=3.5.0)", "flake8-simplify", "isort", "mypy (>=0.990)", "ruff", "sphinx-lint", "types-requests"] -test = ["cython", "filelock", "html5lib", "pytest (>=4.6)"] - -[[package]] -name = "sphinx-basic-ng" -version = "1.0.0b2" -description = "A modern skeleton for Sphinx themes." -optional = false -python-versions = ">=3.7" -files = [ - {file = "sphinx_basic_ng-1.0.0b2-py3-none-any.whl", hash = "sha256:eb09aedbabfb650607e9b4b68c9d240b90b1e1be221d6ad71d61c52e29f7932b"}, - {file = "sphinx_basic_ng-1.0.0b2.tar.gz", hash = "sha256:9ec55a47c90c8c002b5960c57492ec3021f5193cb26cebc2dc4ea226848651c9"}, -] - -[package.dependencies] -sphinx = ">=4.0" - -[package.extras] -docs = ["furo", "ipython", "myst-parser", "sphinx-copybutton", "sphinx-inline-tabs"] - -[[package]] -name = "sphinxcontrib-applehelp" -version = "1.0.4" -description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books" -optional = false -python-versions = ">=3.8" -files = [ - {file = "sphinxcontrib-applehelp-1.0.4.tar.gz", hash = "sha256:828f867945bbe39817c210a1abfd1bc4895c8b73fcaade56d45357a348a07d7e"}, - {file = "sphinxcontrib_applehelp-1.0.4-py3-none-any.whl", hash = "sha256:29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228"}, -] - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "sphinxcontrib-devhelp" -version = "1.0.2" -description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp document." -optional = false -python-versions = ">=3.5" -files = [ - {file = "sphinxcontrib-devhelp-1.0.2.tar.gz", hash = "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"}, - {file = "sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e"}, -] - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "sphinxcontrib-htmlhelp" -version = "2.0.1" -description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" -optional = false -python-versions = ">=3.8" -files = [ - {file = "sphinxcontrib-htmlhelp-2.0.1.tar.gz", hash = "sha256:0cbdd302815330058422b98a113195c9249825d681e18f11e8b1f78a2f11efff"}, - {file = "sphinxcontrib_htmlhelp-2.0.1-py3-none-any.whl", hash = "sha256:c38cb46dccf316c79de6e5515e1770414b797162b23cd3d06e67020e1d2a6903"}, -] - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["html5lib", "pytest"] - -[[package]] -name = "sphinxcontrib-jsmath" -version = "1.0.1" -description = "A sphinx extension which renders display math in HTML via JavaScript" -optional = false -python-versions = ">=3.5" -files = [ - {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"}, - {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"}, -] - -[package.extras] -test = ["flake8", "mypy", "pytest"] - -[[package]] -name = "sphinxcontrib-qthelp" -version = "1.0.3" -description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp document." -optional = false -python-versions = ">=3.5" -files = [ - {file = "sphinxcontrib-qthelp-1.0.3.tar.gz", hash = "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72"}, - {file = "sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"}, -] - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "sphinxcontrib-serializinghtml" -version = "1.1.5" -description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)." -optional = false -python-versions = ">=3.5" -files = [ - {file = "sphinxcontrib-serializinghtml-1.1.5.tar.gz", hash = "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952"}, - {file = "sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl", hash = "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd"}, -] - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "tomli" -version = "2.0.1" -description = "A lil' TOML parser" -optional = false -python-versions = ">=3.7" -files = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, -] - -[[package]] -name = "typing-extensions" -version = "4.12.2" -description = "Backported and Experimental Type Hints for Python 3.8+" -optional = false -python-versions = ">=3.8" -files = [ - {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, - {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, -] - -[[package]] -name = "urllib3" -version = "2.2.2" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = ">=3.8" -files = [ - {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, - {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, -] - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] - -[[package]] -name = "wcwidth" -version = "0.2.13" -description = "Measures the displayed width of unicode strings in a terminal" -optional = false -python-versions = "*" -files = [ - {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, - {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, -] - -[[package]] -name = "zipp" -version = "3.19.2" -description = "Backport of pathlib-compatible object wrapper for zip files" -optional = false -python-versions = ">=3.8" -files = [ - {file = "zipp-3.19.2-py3-none-any.whl", hash = "sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c"}, - {file = "zipp-3.19.2.tar.gz", hash = "sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19"}, -] - -[package.extras] -doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] - -[metadata] -lock-version = "2.0" -python-versions = ">=3.8.1,<4" -content-hash = "d3d8b680e9511dc48a9ce073ee651541f50b4904b4b1a410266242a83059b98f" diff --git a/poetry.toml b/poetry.toml deleted file mode 100644 index 53b35d37..00000000 --- a/poetry.toml +++ /dev/null @@ -1,3 +0,0 @@ -[virtualenvs] -create = true -in-project = true diff --git a/pyproject.toml b/pyproject.toml index 1149606e..130dec28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,50 +1,53 @@ -[tool.poetry] +[project] name = "ftfy" -version = "6.2.3" +version = "6.3.1" description = "Fixes mojibake and other problems with Unicode, after the fact" -homepage = "https://ftfy.readthedocs.io/en/latest/" -documentation = "https://ftfy.readthedocs.io/en/latest/" -repository = "https://github.com/rspeer/python-ftfy" -authors = ["Robyn Speer "] -license = "Apache-2.0" -include = [ - { path = "README.md", format = "sdist" }, - { path = "CHANGELOG.md", format = "sdist" }, - { path = "tests", format = "sdist" }, -] +authors = [{ name = "Robyn Speer", email = "rspeer@arborelia.net" }] +license = { text = "Apache-2.0" } readme = "README.md" +dependencies = ["wcwidth"] +requires-python = ">=3.9" -[tool.poetry.dependencies] -python = ">=3.8.1,<4" -wcwidth = "^0.2.12" - -[tool.poetry.group.dev.dependencies] -mypy = "^1.7.0" -Sphinx = ">=7, <8" -furo = ">=2024.7.18" -pytest = "^8.3.2" -ruff = "^0.5.6" - -[tool.poetry.scripts] +[project.scripts] ftfy = "ftfy.cli:main" -[tool.poetry.urls] +[project.urls] +Homepage = "https://ftfy.readthedocs.io/en/latest/" +Documentation = "https://ftfy.readthedocs.io/en/latest/" +Repository = "https://github.com/rspeer/python-ftfy" Issues = "https://github.com/rspeer/python-ftfy/issues/" Changelog = "https://github.com/rspeer/python-ftfy/blob/main/CHANGELOG.md" -Cohost = "https://cohost.org/arborelia" +Blog = "https://posts.arborelia.net" [build-system] -requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.sdist] +exclude = ["^.github/", "scripts/", ".readthedocs.yaml", "notes/", "notebook/"] + +[tool.uv] +dev-dependencies = [ + "Sphinx >=7, <8", + "furo >= 2024.7.18", + "pytest >= 8.3.2, < 9", + "ruff", +] [tool.ruff] -exclude = ["badness.py"] +exclude = ["badness.py", "notebook"] line-length = 100 -target-version = "py38" +target-version = "py39" [tool.ruff.lint] -select = ["B", "F", "I", "N", "ANN"] -ignore = ["ANN101", "ANN401"] +select = ["B", "F", "I", "N", "ANN", "UP", "RUF", "C4", "EM", "PIE", "RSE", "TCH", "PTH", "FURB"] +ignore = [ + "ANN101", + "ANN401", + "RUF001", # complains about Unicode characters that belong in my docstrings + "RUF002", # complains about Unicode characters that belong in my docstrings + "PIE808", # explicitly starting ranges at 0 sometimes helps with readability +] [tool.ruff.lint.per-file-ignores] "tests/*" = ["ANN"] diff --git a/scripts/char_data_table.py b/scripts/char_data_table.py new file mode 100644 index 00000000..d063d1ac --- /dev/null +++ b/scripts/char_data_table.py @@ -0,0 +1,78 @@ +""" +Used to regenerate character tables in ftfy/chardata.py with explanatory comments. +""" + +import unicodedata +from dataclasses import dataclass + +from ftfy.chardata import UTF8_CLUES + + +@dataclass +class CharData: + name: str + codept: int + encodings: list[tuple[str, int]] + + def sort_key(self) -> tuple[int, str, int]: + if self.name.startswith("LATIN "): + return (0, self.name, self.codept) + return (1, "", self.codept) + + +SAFE_ENCODINGS = [ + "latin-1", + "windows-1252", + "windows-1251", + "windows-1250", + "windows-1253", + "windows-1254", + "windows-1257", +] + + +def show_char_table(chars: str, byte_min: int = 0, byte_max: int = 0xFF) -> None: + char_data: list[CharData] = [] + for char in chars: + name = unicodedata.name(char, "") + codept = ord(char) + encodings: list[tuple[str, int]] = [] + for encoding in SAFE_ENCODINGS: + try: + encoded: bytes = char.encode(encoding) + byte: int = encoded[0] + encodings.append((encoding, byte)) + except UnicodeEncodeError: + pass + if encodings: + char_data.append(CharData(name=name, codept=codept, encodings=encodings)) + else: + print(f"No relevant encoding for {codept=}, {name=}") + char_data.sort(key=CharData.sort_key) + for cd in char_data: + encoding_info: list[str] = [] + for encoding, byte in cd.encodings: + if byte_min <= byte <= byte_max: + info_str = f"{encoding}:{byte:X}" + encoding_info.append(info_str) + encoding_explanation = encoding_info[0] if encoding_info else "???" + print(f' "\\N{{{cd.name}}}" # {encoding_explanation}') + + +def run() -> None: + print("# utf8_first_of_2") + show_char_table(UTF8_CLUES["utf8_first_of_2"], 0xC2, 0xDF) + print("# utf8_first_of_3") + show_char_table(UTF8_CLUES["utf8_first_of_3"], 0xE0, 0xEF) + print("# utf8_first_of_4") + show_char_table(UTF8_CLUES["utf8_first_of_4"], 0xF0, 0xF3) + print("# utf8_continuation") + print(r' "\x80-\xbf"') + show_char_table(UTF8_CLUES["utf8_continuation"][3:], 0x80, 0xBF) + print("# utf8_continuation_strict") + print(r' "\x80-\xbf"') + show_char_table(UTF8_CLUES["utf8_continuation_strict"][3:], 0x80, 0xBF) + + +if __name__ == "__main__": + run() diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index b7e47898..00000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[aliases] -test=pytest diff --git a/setup.py b/setup.py deleted file mode 100644 index 10789bf1..00000000 --- a/setup.py +++ /dev/null @@ -1,56 +0,0 @@ -import sys - -from setuptools import setup - -# Before we get to the rest of setup, with dependencies on setuptools and the -# Python 3 standard library, let's make sure we're not on Python 2 and provide -# a helpful message if we are. - -PY2_MESSAGE = "Python 2 is no longer supported. Please upgrade." - - -if sys.version_info[0] < 3: - print(PY2_MESSAGE) - readable_version = sys.version.split(" ")[0] - print("The version of Python you're running is: %s" % readable_version) - print("Python is running from: %r" % sys.executable) - sys.exit(1) - - -DESCRIPTION = open("README.md", encoding="utf-8").read() - -setup( - name="ftfy", - version="6.2.3", - maintainer="Robyn Speer", - maintainer_email="rspeer@arborelia.net", - license="Apache 2.0", - url="http://github.com/rspeer/python-ftfy", - platforms=["any"], - description="Fixes some problems with Unicode text after the fact", - long_description=DESCRIPTION, - long_description_content_type="text/markdown", - packages=["ftfy", "ftfy.bad_codecs"], - install_requires=["wcwidth"], - tests_require=["pytest"], - python_requires=">=3.8", - classifiers=[ - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - "Topic :: Software Development :: Libraries :: Python Modules", - "Topic :: Text Processing :: Filters", - "Development Status :: 5 - Production/Stable", - ], - entry_points={"console_scripts": ["ftfy = ftfy.cli:main"]}, - extras_require={"docs": ["furo", "sphinx"]}, - project_urls={ - "Documentation": "http://ftfy.readthedocs.io", - }, -) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test-cases/README.md b/tests/test-cases/README.md new file mode 100644 index 00000000..673bd5f2 --- /dev/null +++ b/tests/test-cases/README.md @@ -0,0 +1,20 @@ +# ftfy test cases + +This directory contains JSON files with test cases for ftfy. Many of them are real mojibake found in the wild, such as by listening to the Twitter firehose (when that existed), searching through the OSCAR web crawl, or in issue reports from users. + +Cases labeled "synthetic" were not found in the wild, but were instead constructed to test a particular edge case. + +Cases labeled "negative" are not mojibake but look lke they could be. We're testing that ftfy does not alter the text (except for its usual processing such as un-curling quotes). + +`known-failures.json` contains cases that we would do better at with an improved heuristic. Most of these are false negatives, where ftfy does not figure out how to fix the text. ftfy aims to have no false positives, but there is one synthetic false positive in `known-failures.json`. + +## Structure of a test case + +A test case contains the following fields: + +- `label`: A description of the test case, shown when pytest runs in verbose mode. +- `comment`: Further details on the test case because JSON doesn't have comments. +- `original`: The text to run through ftfy. +- `fixed-encoding` (optional): the expected result of `ftfy.fix_encoding(original)`. If unspecified, uses the value from `fixed`. +- `fixed`: the expected result of `ftfy.fix_text(original)`. +- `expect`: "pass" for test cases that should pass, or "fail" for known failures. \ No newline at end of file diff --git a/tests/test-cases/in-the-wild.json b/tests/test-cases/in-the-wild.json new file mode 100644 index 00000000..b40c838c --- /dev/null +++ b/tests/test-cases/in-the-wild.json @@ -0,0 +1,451 @@ +[ + { + "label": "Low-codepoint emoji", + "comment": "From the ancient era before widespread emoji support on Twitter", + "original": "He's Justinâ\u009d¤", + "fixed": "He's Justin❤", + "expect": "pass" + }, + { + "label": "UTF-8 / MacRoman mix-up about smurfs", + "original": "Le Schtroumpf Docteur conseille g√¢teaux et baies schtroumpfantes pour un r√©gime √©quilibr√©.", + "fixed": "Le Schtroumpf Docteur conseille gâteaux et baies schtroumpfantes pour un régime équilibré.", + "expect": "pass" + }, + { + "label": "Checkmark that almost looks okay as mojibake", + "original": "✔ No problems", + "fixed": "✔ No problems", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1251 Russian mixup about futbol", + "original": "РґРѕСЂРѕРіРµ Р\u0098Р·-РїРѕРґ #футбол", + "fixed": "дороге Из-под #футбол", + "expect": "pass" + }, + { + "label": "Latin-1 / Windows-1252 mixup in German", + "original": "\u0084Handwerk bringt dich überall hin\u0093: Von der YOU bis nach Monaco", + "fixed-encoding": "„Handwerk bringt dich überall hin“: Von der YOU bis nach Monaco", + "fixed": "\"Handwerk bringt dich überall hin\": Von der YOU bis nach Monaco", + "expect": "pass" + }, + { + "label": "Latin-1 / Windows-1252 mixup of the replacement character", + "original": "Some comments may be republished on the website or in the newspaper � email addresses will not be published.", + "fixed": "Some comments may be republished on the website or in the newspaper � email addresses will not be published.", + "expect": "pass" + }, + { + "label": "CESU-8 / Windows-1252 emoji", + "original": "Hi guys í ½í¸\u008d", + "fixed": "Hi guys 😍", + "expect": "pass" + }, + { + "label": "CESU-8 / Latin-1 emoji", + "original": "hihi RT username: â\u0098ºí ½í¸\u0098", + "fixed": "hihi RT username: ☺😘", + "expect": "pass" + }, + { + "label": "Latin-1 / Windows-1252 mixup in Turkish", + "original": "Beta Haber: Hırsızı Büyü Korkuttu", + "fixed": "Beta Haber: Hırsızı Büyü Korkuttu", + "expect": "pass" + }, + { + "label": "Latin-1 / Windows-1252 mixup in İstanbul (issue #192)", + "original": "İstanbul", + "fixed": "İstanbul", + "expect": "pass" + }, + { + "label": "Latin-1 / Windows-1252 mixup in German (issue #188)", + "original": "RUF MICH ZURÜCK", + "fixed": "RUF MICH ZURÜCK", + "expect": "pass" + }, + { + "label": "Latin-1 / Windows-1252 mixup in Rīga (issue #192)", + "original": "RÄ«ga", + "fixed": "Rīga", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1251 mixed up twice in Russian", + "original": "приятности. РІСњВ¤", + "fixed": "приятности. ❤", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1252 mixed up twice in Malay", + "original": "Kayanya laptopku error deh, soalnya tiap mau ngetik deket-deket kamu font yg keluar selalu Times New “ Romanceâ€Â\u009d.", + "fixed-encoding": "Kayanya laptopku error deh, soalnya tiap mau ngetik deket-deket kamu font yg keluar selalu Times New “ Romance”.", + "fixed": "Kayanya laptopku error deh, soalnya tiap mau ngetik deket-deket kamu font yg keluar selalu Times New \" Romance\".", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1252 mixed up twice in naming Iggy Pop", + "original": "Iggy Pop (né Jim Osterberg)", + "fixed": "Iggy Pop (né Jim Osterberg)", + "expect": "pass" + }, + { + "label": "Left quote is UTF-8, right quote is Latin-1, both encoded in Windows-1252", + "original": "Direzione Pd, ok â\u0080\u009csenza modifiche\u0094 all'Italicum.", + "fixed-encoding": "Direzione Pd, ok “senza modifiche” all'Italicum.", + "fixed": "Direzione Pd, ok \"senza modifiche\" all'Italicum.", + "expect": "pass" + }, + { + "label": "UTF-8 / sloppy Windows-1252 mixed up twice in a triumphant emoticon", + "original": "selamat berpuasa sob (Ã\u00a0¸‡'̀⌣'ÃŒÂ\u0081)Ã\u00a0¸‡", + "fixed": "selamat berpuasa sob (ง'̀⌣'́)ง", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1252 mixed up three times", + "original": "The Mona Lisa doesn’t have eyebrows.", + "fixed-encoding": "The Mona Lisa doesn’t have eyebrows.", + "fixed": "The Mona Lisa doesn't have eyebrows.", + "expect": "pass" + }, + { + "label": "UTF-8 / Codepage 437 mixup in Russian", + "original": "#╨┐╤Ç╨░╨▓╨╕╨╗╤î╨╜╨╛╨╡╨┐╨╕╤é╨░╨╜╨╕╨╡", + "fixed": "#правильноепитание", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1252 mixup in French", + "original": "Hôtel de Police", + "fixed": "Hôtel de Police", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1250 mixup in French", + "original": "Liège Avenue de l'HĂ´pital", + "fixed": "Liège Avenue de l'Hôpital", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1252 mixup in Vietnamese", + "original": "Tại sao giá hạt sầu riêng lại lên giá?", + "fixed": "Tại sao giá hạt sầu riêng lại lên giá?", + "expect": "pass" + }, + { + "label": "Science! Mid-word Greek letter gets fixed correctly", + "original": "Humanized HLA-DR4.RagKO.IL2RγcKO.NOD (DRAG) mice sustain the complex vertebrate life cycle of Plasmodium falciparum malaria.", + "fixed": "Humanized HLA-DR4.RagKO.IL2RγcKO.NOD (DRAG) mice sustain the complex vertebrate life cycle of Plasmodium falciparum malaria.", + "expect": "pass" + }, + { + "label": "For goodness' sake. We can come close to fixing this, but fail in the last step", + "original": "ItÃ?¢â?¬â?¢s classic. ItÃ?¢â?¬â?¢s epic. ItÃ?¢â?¬â?¢s ELIZABETH BENNET for goodnessÃ?¢â?¬â?¢ sake!", + "fixed": "It�¢��s classic. It�¢��s epic. It�¢��s ELIZABETH BENNET for goodness�¢�� sake!", + "expect": "pass" + }, + { + "label": "lossy UTF-8 / Windows-1250 mixup in Spanish", + "original": "Europa, Asia, Ă�frica, Norte, AmĂ©rica Central y del Sur, Australia y OceanĂ­a", + "fixed": "Europa, Asia, �frica, Norte, América Central y del Sur, Australia y Oceanía", + "expect": "pass" + }, + { + "label": "UTF-8 / sloppy Windows-1250 mixup in English", + "original": "It was named „scars´ stones“ after the rock-climbers who got hurt while climbing on it.", + "fixed-encoding": "It was named\u00a0„scars´ stones“ after the rock-climbers who got hurt while climbing on it.", + "fixed": "It was named\u00a0\"scars´ stones\" after the rock-climbers who got hurt while climbing on it.", + "expect": "pass" + }, + { + "label": "The same text as above, but as a UTF-8 / ISO-8859-2 mixup", + "original": "It was namedÂ\u00a0â\u0080\u009escars´ stonesâ\u0080\u009c after the rock-climbers who got hurt while climbing on it.", + "fixed-encoding": "It was named\u00a0„scars´ stones“ after the rock-climbers who got hurt while climbing on it.", + "fixed": "It was named\u00a0\"scars´ stones\" after the rock-climbers who got hurt while climbing on it.", + "expect": "pass" + }, + { + "label": "UTF-8 / ISO-8859-2 mixup in Czech", + "comment": "This says 'I've had enough of the third millennium', which is great because it involves software decisions made in the second", + "original": "MĂĄm dost tĹ\u0099etĂ\u00adho tisĂ\u00adciletĂ\u00ad", + "fixed": "Mám dost třetího tisíciletí", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1252 mixup in mixed French and Arabic", + "comment": "A difficult test case that can depend on the order that steps are applied", + "original": "À tous mes frères et soeurs dans la syrienneté comme dans l’humanité, sans discrimination aucune, je vous souhaite bonne fête عيد سعيد.Que la paix, la liberté, l’égalité, la fraternité et la dignité soient avec vous.Pardonnez ce ton un peu ecclésiastique.", + "fixed-encoding": "À tous mes frères et soeurs dans la syrienneté comme dans l’humanité, sans discrimination aucune, je vous souhaite bonne fête عيد سعيد.Que la paix, la liberté, l’égalité, la fraternité et la dignité soient avec vous.Pardonnez ce ton un peu ecclésiastique.", + "fixed": "À tous mes frères et soeurs dans la syrienneté comme dans l'humanité, sans discrimination aucune, je vous souhaite bonne fête عيد سعيد.Que la paix, la liberté, l'égalité, la fraternité et la dignité soient avec vous.Pardonnez ce ton un peu ecclésiastique.", + "expect": "pass" + }, + { + "label": "UTF-8 / sloppy Windows-1250 mixup in Romanian", + "original": "vedere Ă®nceĹŁoĹźatÄ\u0083", + "fixed": "vedere înceţoşată", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1250 mixup in Slovak", + "original": "NapĂ\u00adšte nám !", + "fixed": "Napíšte nám !", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1252 mixup in Spanish", + "original": "DOS AÑOS", + "fixed": "DOS AÑOS", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1252 followed by UTF-8 / Windows-1251", + "original": "a bigger-than-expected £5.8bn rights issue to satisfy the new banking regulator", + "fixed": "a bigger-than-expected £5.8bn rights issue to satisfy the new banking regulator", + "expect": "pass" + }, + { + "label": "fancy Unicode crossing-out, but mojibaked", + "original": "hotel $49 $̶6̶3̶ updated 2018", + "fixed": "hotel $49 $̶6̶3̶ updated 2018", + "expect": "pass" + }, + { + "label": "A face with UTF-8 / sloppy Windows-1252 mixed up twice", + "original": "ââ€\u009d’(⌣˛⌣)ââ€\u009dŽ", + "fixed": "┒(⌣˛⌣)┎", + "expect": "pass" + }, + { + "label": "We can mostly decode the face above when we lose the character U+009D", + "original": "ââ€�’(⌣˛⌣)ââ€�Ž", + "fixed": "�(⌣˛⌣)�", + "expect": "pass" + }, + { + "label": "Lossy decoding can have plain ASCII question marks, as well", + "original": "The ICR has been upgraded to “bb+â€? from “bbâ€?", + "fixed-encoding": "The ICR has been upgraded to “bb+� from “bb�", + "fixed": "The ICR has been upgraded to \"bb+� from \"bb�", + "expect": "pass" + }, + { + "label": "CESU-8 / Latin-1 mixup over several emoji", + "comment": "You tried", + "original": "I just figured out how to tweet emojis! â\u009a½í\u00a0½í¸\u0080í\u00a0½í¸\u0081í\u00a0½í¸\u0082í\u00a0½í¸\u0086í\u00a0½í¸\u008eí\u00a0½í¸\u008eí\u00a0½í¸\u008eí\u00a0½í¸\u008e", + "fixed": "I just figured out how to tweet emojis! ⚽😀😁😂😆😎😎😎😎", + "expect": "pass" + }, + { + "label": "An absolutely hopeless garble", + "comment": "If we try too hard to decode this, we'll recursively apply `decode_inconsistent_utf8` until the characters turn into random Han and katakana characters.", + "original": "ã†â€™ãƒâ€ ã¢â‚¬â„¢ãƒæ’ã‚â¢ãƒâ¢ã¢â‚¬å¡ã‚â¬ãƒâ€šã‚â", + "fixed-encoding": "ã†â€™ãƒâ€ ã¢â‚¬â„¢ãƒæ’ã‚â¢ãƒâ¢ã¢â‚¬å¡ã‚â¬ãƒâ€šã‚â", + "fixed": "ã†â€™ãƒâ€ ã¢â'¬â\"¢ãƒæ'ã'â¢ãƒâ¢ã¢â'¬å¡ã'â¬ãƒâ€šã'â", + "expect": "pass" + }, + { + "label": "Inconsistent UTF-8 / Latin-1 mojibake", + "original": "Ecuadorâ\u0080\u0099s â\u0080\u0098purely political decision on Assangeâ\u0080\u0099 is likely result of â\u0080\u0098US pressureâ\u0080\u0099\u0085", + "fixed-encoding": "Ecuador’s ‘purely political decision on Assange’ is likely result of ‘US pressure’…", + "fixed": "Ecuador's 'purely political decision on Assange' is likely result of 'US pressure'…", + "expect": "pass" + }, + { + "label": "Inconsistent UTF-8 / Latin-1 mojibake with an ellipsis from the Windows-1252 character set", + "original": "Ecuadorâ\u0080\u0099s â\u0080\u0098purely political decision on Assangeâ\u0080\u0099 is likely result of â\u0080\u0098US pressureâ\u0080\u0099…", + "fixed-encoding": "Ecuador’s ‘purely political decision on Assange’ is likely result of ‘US pressure’…", + "fixed": "Ecuador's 'purely political decision on Assange' is likely result of 'US pressure'…", + "expect": "pass" + }, + { + "label": "Inconsistent mojibake in Portuguese", + "original": "Campeonatos > III Divisão - Série F > Jornadas Classificação", + "fixed": "Campeonatos > III Divisão - Série F > Jornadas Classificação", + "expect": "pass" + }, + { + "label": "Handle Afrikaans 'n character", + "original": "ʼn Chloroplas is ʼn organel wat in fotosinterende plante voorkom.", + "fixed-encoding": "ʼn Chloroplas is ʼn organel wat in fotosinterende plante voorkom.", + "fixed": "'n Chloroplas is 'n organel wat in fotosinterende plante voorkom.", + "expect": "pass" + }, + { + "label": "Handle Croatian single-codepoint digraphs", + "original": "izum „bootstrap load“ koji je korištenjem polisilicijskog sloja proizveo dovoljno dobre kondenzatore na čipu", + "fixed-encoding": "izum „bootstrap load“ koji je korištenjem polisilicijskog sloja proizveo dovoljno dobre kondenzatore na čipu", + "fixed": "izum \"bootstrap load\" koji je korištenjem polisilicijskog sloja proizveo dovoljno dobre kondenzatore na čipu", + "expect": "pass" + }, + { + "label": "A with an acute accent, in isolation", + "original": "Nicolás", + "fixed": "Nicolás", + "expect": "pass" + }, + { + "label": "sharp S, in isolation, via MacRoman encoding", + "comment": "regression reported in issue #186", + "original": "wei√ü", + "fixed": "weiß", + "expect": "pass" + }, + { + "label": "French example containing non-breaking spaces", + "original": "ART TRIP Ã\u00a0 l'office de tourisme", + "fixed": "ART TRIP à l'office de tourisme", + "expect": "pass" + }, + { + "label": "English example in UTF-8 / Windows-1251 with a ligature", + "original": "This is signiп¬Ѓcantly lower than the respective share", + "fixed-encoding": "This is significantly lower than the respective share", + "fixed": "This is significantly lower than the respective share", + "expect": "pass" + }, + { + "label": "'à' remains its own word, even if spaces after it get coalesced into one", + "original": "à perturber la réflexion des théologiens jusqu'à nos jours", + "fixed": "à perturber la réflexion des théologiens jusqu'à nos jours", + "expect": "pass" + }, + { + "label": "Fix 'à' in inconsistent mojibake", + "original": "Le barème forfaitaire permet l’évaluation des frais de déplacement relatifs à l’utilisation", + "fixed-encoding": "Le barème forfaitaire permet l’évaluation des frais de déplacement relatifs à l’utilisation", + "fixed": "Le barème forfaitaire permet l'évaluation des frais de déplacement relatifs à l'utilisation", + "expect": "pass" + }, + { + "label": "The Portuguese word 'às' does not become 'à s' due to the French fix", + "original": "com especial atenção à s crianças", + "fixed": "com especial atenção às crianças", + "expect": "pass" + }, + { + "label": "This is why we require a space after the 's' in 'às'", + "original": "Troisième édition pour ce festival qui persiste et signe à s'éloigner des grands axes pour prendre les contre-allées en 16 concerts dans 7 villes de 2 pays voisins.", + "fixed": "Troisième édition pour ce festival qui persiste et signe à s'éloigner des grands axes pour prendre les contre-allées en 16 concerts dans 7 villes de 2 pays voisins.", + "expect": "pass" + }, + { + "label": "We can fix 'à' in windows-1251 sometimes as well", + "original": "La rГ©gion de Dnepropetrovsk se trouve Г l’ouest de l’Ukraine", + "fixed-encoding": "La région de Dnepropetrovsk se trouve à l’ouest de l’Ukraine", + "fixed": "La région de Dnepropetrovsk se trouve à l'ouest de l'Ukraine", + "expect": "pass" + }, + { + "label": "'à quele' is the Portuguese word 'àquele', not 'à quele'", + "original": "eliminado o antígeno e mantidos os níveis de anticorpos, surgem as condições necessárias ao estabelecimento do granuloma, semelhante à quele observado nas lesões por imunocomplexo em excesso de anticorpos", + "fixed": "eliminado o antígeno e mantidos os níveis de anticorpos, surgem as condições necessárias ao estabelecimento do granuloma, semelhante àquele observado nas lesões por imunocomplexo em excesso de anticorpos", + "expect": "pass" + }, + { + "label": "A complex, lossy pile-up of mojibake in Portuguese", + "original": "â € ðŸ“� Regulamento: â € âš ï¸� As pessoas que marcarem nos comentários perfis empresariais e/ou de marcas, personalidades ou fake serão desclassificadas. âš ï¸� Podem participar pessoas residentes em Petrolina/PE ou Juazeiro/BA, desde que se comprometam a retirar o prêmio em nosso endereço. Funcionários estão vetados. âš ï¸� Serão válidos os comentários postados até 16h, do dia 31/03/2018. E o resultado será divulgado até à s 19h do mesmo dia em uma nova publicação em nosso instagram. â € Boa sorte!!! 😀ðŸ�°", + "fixed": "⠀ �\u00a0Regulamento: ⠀ ⚠� As pessoas que marcarem nos comentários perfis empresariais e/ou de marcas, personalidades ou fake serão desclassificadas. ⚠� Podem participar pessoas residentes em Petrolina/PE ou Juazeiro/BA, desde que se comprometam a retirar o prêmio em nosso endereço. Funcionários estão vetados. ⚠� Serão válidos os comentários postados até 16h, do dia 31/03/2018. E o resultado será divulgado até às 19h do mesmo dia em uma nova publicação em nosso instagram. ⠀ Boa sorte!!!\u00a0😀�", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1252 mixup in Gaelic involving non-breaking spaces", + "original": "CÃ\u00a0nan nan GÃ\u00a0idheal", + "fixed": "Cànan nan Gàidheal", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1251 mixup in tweet spam", + "original": "Blog Traffic Tip 2 – Broadcast Email Your Blog", + "fixed": "Blog Traffic Tip 2 – Broadcast Email Your Blog", + "expect": "pass" + }, + { + "label": "UTF-8 / Windows-1251 mixup", + "original": "S&P Confirms Ukrsotsbank’s “B-“ Rating", + "fixed-encoding": "S&P Confirms Ukrsotsbank’s “B-“ Rating", + "fixed": "S&P Confirms Ukrsotsbank's \"B-\" Rating", + "expect": "pass" + }, + { + "label": "Dutch example with ë", + "comment": "from issue reported by MicroJackson", + "original": "ongeëvenaard", + "fixed-encoding": "ongeëvenaard", + "fixed": "ongeëvenaard", + "expect": "pass" + }, + { + "label": "HTML entity on top of UTF-8 / Latin-1", + "original": "10μs", + "fixed-encoding": "10μs", + "fixed": "10μs", + "expect": "pass" + }, + { + "label": "Three layers of UTF-8 / MacRoman mixup in French", + "comment": "You're welcome", + "original": "Merci de t‚Äö√†√∂¬¨¬©l‚Äö√†√∂¬¨¬©charger le plug-in Flash Player 8", + "fixed": "Merci de télécharger le plug-in Flash Player 8", + "expect": "pass" + }, + { + "label": "UTF-8 / MacRoman mixup in French", + "original": "Merci de bien vouloir activiter le Javascript dans votre navigateur web afin d'en profiter‚Ķ", + "fixed": "Merci de bien vouloir activiter le Javascript dans votre navigateur web afin d'en profiter…", + "expect": "pass" + }, + { + "label": "Italian UTF-8 / MacRoman example with ò", + "original": "Le Vigne di Zam√≤", + "fixed": "Le Vigne di Zamò", + "expect": "pass" + }, + { + "label": "Punctuation pile-up should actually be musical notes", + "original": "Engkau masih yg terindah, indah di dalam hatiku♫~", + "fixed": "Engkau masih yg terindah, indah di dalam hatiku♫~", + "expect": "pass" + }, + { + "label": "Latvian UTF-8 / Windows-1257 mojibake", + "original": "Å veices baņķieri gaida konkrÄ“tus investÄ«ciju projektus", + "fixed": "Šveices baņķieri gaida konkrētus investīciju projektus", + "expect": "pass" + }, + { + "label": "Latvian UTF-8 / MacRoman mojibake", + "original": "SaeimƒÅ ievƒìlƒìtƒÅs partijas \"Progresƒ´vie\" lƒ´dzvadƒ´tƒÅja Anto≈Üina ≈Öena≈°eva atbild uz ≈æurnƒÅlistu jautƒÅjumiem pƒìc partijas tik≈°anƒÅs ar Valsts prezidentu Rƒ´gas pilƒ´,", + "fixed": "Saeimā ievēlētās partijas \"Progresīvie\" līdzvadītāja Antoņina Ņenaševa atbild uz žurnālistu jautājumiem pēc partijas tikšanās ar Valsts prezidentu Rīgas pilī,", + "expect": "pass" + }, + { + "label": "Lithuanian UTF-8 / Windows-1257 mojibake", + "original": "Å iaip ÄÆdomu, kaip ÄÆsivaizduoji. Visų pirma tam reikia laiko.", + "fixed": "Šiaip įdomu, kaip įsivaizduoji. Visų pirma tam reikia laiko.", + "expect": "pass" + }, + { + "label": "Lithuanian UTF-8 / Windows-1250 mojibake", + "original": "Lietuva pagrÄŻstai gali paklausti: Ĺ˝inoma, kad ne.", + "fixed": "Lietuva pagrįstai gali paklausti: Žinoma, kad ne.", + "expect": "pass" + }, + { + "label": "Hebrew UTF-8 / Windows-1252 mojibake", + "comment": "reported by SuperIRabbit as issue #158", + "original": "בהודעה", + "fixed": "בהודעה", + "expect": "pass" + }, + { + "label": "Wide comma in UTF-8 / Windows-1252", + "original": "Ningbo,China", + "fixed-encoding": "Ningbo,China", + "fixed": "Ningbo,China", + "expect": "pass" + } +] \ No newline at end of file diff --git a/tests/test-cases/known-failures.json b/tests/test-cases/known-failures.json new file mode 100644 index 00000000..2663d9f7 --- /dev/null +++ b/tests/test-cases/known-failures.json @@ -0,0 +1,70 @@ +[ + { + "label": "Misleading mix-up in Spanish", + "comment": "The original text has mojibake, but the sequence 'á \u0093' can decode as U+1813 MONGOLIAN DIGIT THREE, when the whole string should really just decode as a Latin-1/Windows-1252 mixup", + "original": "tiene demora y está \u0093próximo a resolverse\u0094", + "fixed": "tiene demora y está \"próximo a resolverse\"", + "expect": "fail" + }, + { + "label": "Two levels of inconsistent mojibake", + "comment": "The en-dash was mojibaked in UTF-8 / Windows-1252 as three characters, two of which were mojibaked again as Windows-1252 / Latin-1, and the third of which was mojibaked as UTF-8 / Latin-1. Unfortunately, if we fix this, we leave ourselves room to greedily 'decode' random Han characters in complex Latin-alphabet mojibake", + "original": "Arsenal v Wolfsburg: pre-season friendly â\u0080â\u0080\u009c live!", + "fixed": "Arsenal v Wolfsburg: pre-season friendly – live!", + "expect": "fail" + }, + { + "label": "A-with-grave in Vietnamese", + "comment": "Currently adds extra spaces that shouldn't be there", + "original": "Xem clip hĂ i, phim hĂ i má»›i hay nhất", + "fixed": "Xem clip hài, phim hài mới hay nhất", + "expect": "fail" + }, + { + "label": "Latin-1 / MacRoman mixup in Spanish", + "comment": "Requires something like encoding detection", + "original": "Deja dos heridos hundimiento de barco tur\u0092stico en Acapulco.", + "fixed": "Deja dos heridos hundimiento de barco turístico en Acapulco.", + "expect": "fail" + }, + { + "label": "subtle UTF-8 / codepage 437 mixup in Spanish", + "original": "┬┐que diferencia hay?", + "fixed": "¿que diferencia hay?", + "expect": "fail" + }, + { + "label": "Latin-1 / MacRoman mixup in Spanish, 2 characters", + "comment": "Requires something like encoding detection", + "original": "Habitantes de Coatl\u0087n conf\u0092an en proyecto de edil electo independiente", + "fixed": "Habitantes de Coatlán confían en proyecto de edil electo independiente", + "expect": "fail" + }, + { + "label": "An example with 'à' in windows-1251 where we need our heuristic to be bolder", + "original": "faites attention Г bien vous renseigner avant sur le mГ©dicament", + "fixed": "faites attention à bien vous renseigner avant sur le médicament", + "expect": "fail" + }, + { + "label": "Italian UTF-8 / MacRoman mojibake that looks like math", + "comment": "False negative: 'pi√π' is a bit too reasonable to fix", + "original": "Sarai ricontattato dal nostro Esperto al pi√π presto.", + "fixed": "Sarai ricontattato dal nostro Esperto al più presto.", + "expect": "fail" + }, + { + "label": "Synthetic: Incomplete UTF-8 / Windows-1252 mixup in Arabic", + "comment": "I find text like this in OSCAR a fair amount, but couldn't isolate a good example that tested digits. The intended text means 'more than 100 countries'.", + "original": "أكثر من Ù Ù Ù¡ بلد", + "fixed": "أكثر من ٠٠١ بلد", + "expect": "fail" + }, + { + "label": "Synthetic, false positive: the title of a manga, in weird capitalized romaji, with a non-breaking space", + "comment": "Testing tells me I should worry about cases like this, though I haven't seen a real example. Searching for similar real text yields a lot of examples that actually come out fine.", + "original": "MISUTÂ\u00a0AJIKKO", + "fixed": "MISUTÂ\u00a0AJIKKO", + "expect": "fail" + } +] \ No newline at end of file diff --git a/tests/test-cases/language-names.json b/tests/test-cases/language-names.json new file mode 100644 index 00000000..cdb82418 --- /dev/null +++ b/tests/test-cases/language-names.json @@ -0,0 +1,127 @@ +[ + { + "label": "Messy language names: Czech", + "comment": "This and several following examples came from the same language selector", + "original": "ÄŒeÅ¡tina", + "fixed": "Čeština", + "expect": "pass" + }, + { + "label": "Messy language names: Gaelic", + "comment": "note that if U+A0 is replaced by a space, it comes out slightly incorrectly as 'Gà idhlig'", + "original": "GÃ\u00a0idhlig", + "fixed": "Gàidhlig", + "expect": "pass" + }, + { + "label": "Messy language names: Lithuanian", + "original": "Lietuvių", + "fixed": "Lietuvių", + "expect": "pass" + }, + { + "label": "Messy language names: Slovak", + "original": "SlovenÄ�ina", + "fixed": "Sloven�ina", + "expect": "pass" + }, + { + "label": "Messy language names: Vietnamese", + "original": "Tiếng Việt", + "fixed": "Tiếng Việt", + "expect": "pass" + }, + { + "label": "Messy language names: Greek", + "original": "Ελληνικά", + "fixed": "Ελληνικά", + "expect": "pass" + }, + { + "label": "Messy language names: Bulgarian", + "original": "българÑ�ки език", + "fixed": "българ�ки език", + "expect": "pass" + }, + { + "label": "Messy language names: Russian", + "original": "РуÑ�Ñ�кий", + "fixed": "Ру��кий", + "expect": "pass" + }, + { + "label": "Messy language names: Serbian [Cyrillic]", + "original": "CрпÑ�ки [ћирилицом]", + "fixed": "Cрп�ки [ћирилицом]", + "expect": "pass" + }, + { + "label": "Messy language names: Hebrew", + "original": "עברית", + "fixed": "עברית", + "expect": "pass" + }, + { + "label": "Messy language names: Russian", + "original": "РуÑ�Ñ�кий", + "fixed": "Ру��кий", + "expect": "pass" + }, + { + "label": "Messy language names: Hindi", + "comment": "My terminal has difficulty rendering the mostly-fixed text", + "original": "हिनà¥�दी", + "fixed": "\u0939\u093f\u0928\ufffd\u0926\u0940", + "expect": "pass" + }, + { + "label": "Messy language names: Tamil", + "comment": "My terminal has difficulty rendering the mostly-fixed text", + "original": "தமிழà¯�", + "fixed": "\u0ba4\u0bae\u0bbf\u0bb4\ufffd", + "expect": "pass" + }, + { + "label": "Messy language names: Thai", + "original": "ภาษาไทย", + "fixed": "ภาษาไทย", + "expect": "pass" + }, + { + "label": "Messy language names: Simplified Chinese", + "original": "简体ä¸\u00adæ–‡", + "fixed": "简体中文", + "expect": "pass" + }, + { + "label": "Messy language names: Traditional Chinese", + "original": "æ\u00ad£é«”ä¸\u00adæ–‡", + "fixed": "正體中文", + "expect": "pass" + }, + { + "label": "Messy language names: Japanese", + "original": "日本語", + "fixed": "日本語", + "expect": "pass" + }, + { + "label": "Messy language names: Korean", + "original": "한êµ\u00adì–´", + "fixed": "한국어", + "expect": "pass" + }, + { + "label": "Messy language name in cp437: Czech", + "comment": "A synthetic example, I suppose, but goes with the other language name tests", + "original": "─îe┼ítina", + "fixed": "Čeština", + "expect": "pass" + }, + { + "label": "Messy language name in cp437: Vietnamese", + "original": "Tiß║┐ng Viß╗çt", + "fixed": "Tiếng Việt", + "expect": "pass" + } +] \ No newline at end of file diff --git a/tests/test-cases/negative.json b/tests/test-cases/negative.json new file mode 100644 index 00000000..dc1e36b1 --- /dev/null +++ b/tests/test-cases/negative.json @@ -0,0 +1,216 @@ +[ + { + "label": "Negative: Using diaereses as quotation marks in Greek", + "comment": "Examples in this file might be detected as mojibake-like, but should not be changed", + "original": "Η ¨ανατροφή¨ δυστυχώς από τους προπονητές", + "fixed": "Η ¨ανατροφή¨ δυστυχώς από τους προπονητές", + "expect": "pass" + }, + { + "label": "Negative: Don't fix a multiplication symbol in quotes", + "original": "higher values (“+” and “×” curves) in the superficial region", + "fixed-encoding": "higher values (“+” and “×” curves) in the superficial region", + "fixed": "higher values (\"+\" and \"×\" curves) in the superficial region", + "expect": "pass" + }, + { + "label": "Sort of negative: this inconsistent mojibake could be Latin-1 or MacRoman, and it was meant to be Latin-1, but it's safest to not decode it as either", + "comment": "issue #202", + "original": "Bremer/Mccoy – DrÃ¥ber", + "fixed": "Bremer/Mccoy – DrÃ¥ber", + "expect": "pass" + }, + { + "label": "Negative: 'è' preceded by a non-breaking space is not a small capital Y", + "original": "Con il corpo e lo spirito ammaccato,\u00a0è come se nel cuore avessi un vetro conficcato.", + "fixed": "Con il corpo e lo spirito ammaccato,\u00a0è come se nel cuore avessi un vetro conficcato.", + "expect": "pass" + }, + { + "label": "Negative: multiplication sign and ellipsis", + "comment": "Should not turn into a dot below", + "original": "4288×…", + "fixed": "4288×…", + "expect": "pass" + }, + { + "label": "Negative: accents are sometimes used as quotes", + "comment": "Under a previous heuristic, this tested the CESU-8 decoder, which would try to decode it and fail when it hit the end of the string", + "original": "``toda produzida pronta pra assa aí´´", + "fixed": "``toda produzida pronta pra assa aí´´", + "expect": "pass" + }, + { + "label": "Negative: 'Õ' followed by an ellipsis", + "comment": "Should not turn into the Armenian letter Յ", + "original": "HUHLL Õ…", + "fixed": "HUHLL Õ…", + "expect": "pass" + }, + { + "label": "Negative: 'Ê' followed by an ellipsis", + "comment": "Should not turn into a squat reversed esh", + "original": "RETWEET SE VOCÊ…", + "fixed": "RETWEET SE VOCÊ…", + "expect": "pass" + }, + { + "label": "Negative: 'É' followed by an ellipsis", + "comment": "Should not turn into 'MARQUɅ'", + "original": "PARCE QUE SUR LEURS PLAQUES IL Y MARQUÉ…", + "fixed": "PARCE QUE SUR LEURS PLAQUES IL Y MARQUÉ…", + "expect": "pass" + }, + { + "label": "Negative: 'Ó' followed by an ellipsis", + "comment": "Should not turn into 'SӅ'", + "original": "TEM QUE SEGUIR, SDV SÓ…", + "fixed": "TEM QUE SEGUIR, SDV SÓ…", + "expect": "pass" + }, + { + "label": "Negative: 'É' followed by a curly apostrophe", + "comment": "Should not turn into 'ZZAJɒs'", + "original": "Join ZZAJÉ’s Official Fan List and receive news, events, and more!", + "fixed-encoding": "Join ZZAJÉ’s Official Fan List and receive news, events, and more!", + "fixed": "Join ZZAJÉ's Official Fan List and receive news, events, and more!", + "expect": "pass" + }, + { + "label": "Negative: 'é' preceded by curly apostrophe", + "comment": "Should not turn into 'LՎpisode'", + "original": "L’épisode 8 est trop fou ouahh", + "fixed-encoding": "L’épisode 8 est trop fou ouahh", + "fixed": "L'épisode 8 est trop fou ouahh", + "expect": "pass" + }, + { + "label": "Negative: three raised eyebrows or something?", + "comment": "Should not turn into private use character U+F659", + "original": "Ôôô VIDA MINHA", + "fixed": "Ôôô VIDA MINHA", + "expect": "pass" + }, + { + "label": "Negative: copyright sign preceded by non-breaking space", + "comment": "Should not turn into 'ʩ'", + "original": "[x]\u00a0©", + "fixed": "[x]\u00a0©", + "expect": "pass" + }, + { + "label": "Negative: en dash and infinity sign", + "comment": "Should not turn into '2012Ѱ'", + "original": "2012—∞", + "fixed": "2012—∞", + "expect": "pass" + }, + { + "label": "Negative: This Е is a Ukrainian letter, but nothing else is wrong", + "original": "SENSЕ - Oleg Tsedryk", + "fixed": "SENSЕ - Oleg Tsedryk", + "expect": "pass" + }, + { + "label": "Negative: angry face", + "comment": "The face should not turn into '`«'", + "original": "OK??:( `¬´ ):", + "fixed": "OK??:( `¬´ ):", + "expect": "pass" + }, + { + "label": "Negative, synthetic: face with glasses and a raised eyebrow", + "original": "( o¬ô )", + "fixed": "( o¬ô )", + "expect": "pass" + }, + { + "label": "Negative: triangle and degree sign", + "comment": "I'm not really sure what it *is* supposed to be, but it's not 'ơ'", + "original": "∆°", + "fixed": "∆°", + "expect": "pass" + }, + { + "label": "Negative: Portuguese with inverted question mark", + "comment": "Former false positive - it should not turn into 'QUEM ɿ'", + "original": "ESSE CARA AI QUEM É¿", + "fixed": "ESSE CARA AI QUEM É¿", + "expect": "pass" + }, + { + "label": "Negative: Portuguese with acute accents as quotation marks", + "comment": "Former false positive - the end should not turn into a superscript H", + "original": "``hogwarts nao existe, voce nao vai pegar o trem pra lá´´", + "fixed": "``hogwarts nao existe, voce nao vai pegar o trem pra lá´´", + "expect": "pass" + }, + { + "label": "Negative: Finnish Ä followed by a non-breaking space", + "comment": "Former false positive - should not become a G with a dot", + "original": "SELKÄ\u00a0EDELLÄ\u00a0MAAHAN via @YouTube", + "fixed": "SELKÄ\u00a0EDELLÄ\u00a0MAAHAN via @YouTube", + "expect": "pass" + }, + { + "label": "Negative: multiplying by currency", + "comment": "Former false positive - should not become the Hebrew letter 'final pe'", + "original": "Offering 5×£35 pin ups", + "fixed": "Offering 5×£35 pin ups", + "expect": "pass" + }, + { + "label": "Negative: registered chocolate brand name", + "comment": "Former false positive - should not become the IPA letter 'lezh'", + "original": "NESTLÉ® requiere contratar personal para diferentes areas a nivel nacional e internacional", + "fixed": "NESTLÉ® requiere contratar personal para diferentes areas a nivel nacional e internacional", + "expect": "pass" + }, + { + "label": "Negative: it looks like Windows-1257 mojibake but someone writes their name this way", + "comment": "Should not become a cedilla", + "original": "Connect with Āø on Facebook", + "fixed": "Connect with Āø on Facebook", + "expect": "pass" + }, + { + "label": "Mostly negative: we only need to fix C1 control characters", + "comment": "We should not decode 'é\u0085 ' as '酠'", + "original": "C'est vrai que nous n'en avons pas encore beaucoup parlé\u0085 Tu sais, ça fait de nombreuses années", + "fixed": "C'est vrai que nous n'en avons pas encore beaucoup parlé… Tu sais, ça fait de nombreuses années", + "expect": "pass" + }, + { + "label": "Negative: We don't fix à in all contexts", + "original": "C O N C L U S à O", + "fixed": "C O N C L U S à O", + "expect": "pass" + }, + { + "label": "Negative: Two concatenated strings", + "comment": "Should not turn into 'fratarak᧠141'", + "original": "Oborzos, per. Vahbarz, frataraká§ 141", + "fixed": "Oborzos, per. Vahbarz, frataraká§ 141", + "expect": "pass" + }, + { + "label": "Negative: Indonesian leetspeak", + "original": "MÄ£ÄM ÌÑÌ Q £ÄGÌ GÄLÄW ÑÍCH SÖÄ£ ÑÝÄ $ÚÄMÌ Q £ÄGÌ GÄK ÉÑÄK BÄDÄÑ....?????????, ......JÄDÍ...", + "fixed": "MÄ£ÄM ÌÑÌ Q £ÄGÌ GÄLÄW ÑÍCH SÖÄ£ ÑÝÄ $ÚÄMÌ Q £ÄGÌ GÄK ÉÑÄK BÄDÄÑ....?????????, ......JÄDÍ...", + "expect": "pass" + }, + { + "label": "Negative: math in Unicode", + "comment": "This isn't mojibake, it's an actual equation", + "original": "(-1/2)! = √π", + "fixed": "(-1/2)! = √π", + "expect": "pass" + }, + { + "label": "Negative: Leet line-art", + "comment": "The heuristic before v6 loved to 'fix' this and decode it as 'ôaſaſaſaſa'", + "original": "├┤a┼┐a┼┐a┼┐a┼┐a", + "fixed": "├┤a┼┐a┼┐a┼┐a┼┐a", + "expect": "pass" + } +] \ No newline at end of file diff --git a/tests/test-cases/synthetic.json b/tests/test-cases/synthetic.json new file mode 100644 index 00000000..a9393111 --- /dev/null +++ b/tests/test-cases/synthetic.json @@ -0,0 +1,208 @@ +[ + { + "label": "Synthetic: we can recognize à in some cases when it's the only mojibake", + "comment": "Examples in this file were made up to test something, instead of found in the wild", + "original": "voilà le travail", + "fixed": "voilà le travail", + "expect": "pass" + }, + { + "label": "Synthetic: we can recognize à at the end of a word when it absorbs a following space", + "original": "voilà le travail", + "fixed": "voilà le travail", + "expect": "pass" + }, + { + "label": "Synthetic: Hebrew UTF-8 / Windows-1250 mojibake", + "original": "בהודעה", + "fixed": "בהודעה", + "expect": "pass" + }, + { + "label": "Synthetic: Hebrew UTF-8 / MacRoman mojibake", + "original": "◊ë◊î◊ï◊ì◊¢◊î", + "fixed": "בהודעה", + "expect": "pass" + }, + { + "label": "Synthetic: Hebrew UTF-8 / Latin-1 mojibake", + "comment": "This example uses low-numbered codepoints to spell 'ABBA' in Hebrew, so that it falls into the range where Latin-1 is different from Windows-1252. As a bonus, this example looks right even if your RTL text rendering isn't working.", + "original": "×\u0090×\u0091×\u0091×\u0090", + "fixed": "אבבא", + "expect": "pass" + }, + { + "label": "Synthetic: Arabic UTF-8 / Windows-1252 mojibake", + "original": "رسالة", + "fixed": "رسالة", + "expect": "pass" + }, + { + "label": "Synthetic: Arabic UTF-8 / Windows-1250 mojibake", + "original": "رسالة", + "fixed": "رسالة", + "expect": "pass" + }, + { + "label": "Synthetic: Arabic UTF-8 / MacRoman mojibake", + "original": "ÿ±ÿ≥ÿߟÑÿ©", + "fixed": "رسالة", + "expect": "pass" + }, + { + "label": "Synthetic, negative: Brontë's name does not end with a Korean syllable", + "comment": "The original example of why ftfy needs heuristics", + "original": "I'm not such a fan of Charlotte Brontë…”", + "fixed-encoding": "I'm not such a fan of Charlotte Brontë…”", + "fixed": "I'm not such a fan of Charlotte Brontë…\"", + "expect": "pass" + }, + { + "label": "Synthetic, negative: hypothetical Swedish product name", + "comment": "This used to be a constructed example of a false positive, until you added another symbol", + "original": "AHÅ™, the new sofa from IKEA", + "fixed": "AHÅ™, the new sofa from IKEA", + "expect": "pass" + }, + { + "label": "Synthetic, negative: Ukrainian capital letters", + "comment": "We need to fix Windows-1251 conservatively, or else this decodes as '²ʲ'", + "original": "ВІКІ is Ukrainian for WIKI", + "fixed": "ВІКІ is Ukrainian for WIKI", + "expect": "pass" + }, + { + "label": "Synthetic, negative: don't leak our internal use of byte 0x1A", + "comment": "We use byte 0x1A internally as an encoding of U+FFFD, but literal occurrences of U+1A are just ASCII control characters", + "original": "These control characters \u001a are apparently intentional \u0081", + "fixed-encoding": "These control characters \u001a are apparently intentional \u0081", + "fixed": "These control characters are apparently intentional \u0081", + "expect": "pass" + }, + { + "label": "Synthetic, negative: U+1A on its own", + "comment": "We use byte 0x1A internally as an encoding of U+FFFD, but literal occurrences of U+1A are just ASCII control characters", + "original": "Here's a control character: \u001a", + "fixed-encoding": "Here's a control character: \u001a", + "fixed": "Here's a control character: ", + "expect": "pass" + }, + { + "label": "Synthetic, negative: A-with-circle as an Angstrom sign", + "comment": "Should not turn into '10 ŗ'", + "original": "a radius of 10 Å—", + "fixed": "a radius of 10 Å—", + "expect": "pass" + }, + { + "label": "Synthetic, negative: Spanish with exclamation points on the wrong sides", + "original": "!YO SÉ¡", + "fixed": "!YO SÉ¡", + "expect": "pass" + }, + { + "label": "Synthetic: fix text with backslashes in it", + "comment": "Tests for a regression on a long-ago bug", + "original": "<40\\% vs \u00e2\u0089\u00a540\\%", + "fixed": "<40\\% vs ≥40\\%", + "expect": "pass" + }, + { + "label": "Synthetic: curly quotes with mismatched encoding glitches in Latin-1", + "original": "\u00e2\u0080\u009cmismatched quotes\u0085\u0094", + "fixed-encoding": "“mismatched quotes…”", + "fixed": "\"mismatched quotes…\"", + "expect": "pass" + }, + { + "label": "Synthetic: curly quotes with mismatched encoding glitches in Windows-1252", + "original": "“mismatched quotes…”", + "fixed-encoding": "“mismatched quotes…”", + "fixed": "\"mismatched quotes…\"", + "expect": "pass" + }, + { + "label": "Synthetic: lossy decoding in sloppy-windows-1252", + "original": "“lossy decodingâ€�", + "fixed-encoding": "“lossy decoding�", + "fixed": "\"lossy decoding�", + "expect": "pass" + }, + { + "label": "Synthetic: French word for August in windows-1252", + "original": "août", + "fixed-encoding": "août", + "fixed": "août", + "expect": "pass" + }, + { + "label": "Synthetic: French word for hotel in all-caps windows-1252", + "original": "HÔTEL", + "fixed-encoding": "HÔTEL", + "fixed": "HÔTEL", + "expect": "pass" + }, + { + "label": "Synthetic: Scottish Gaelic word for 'subject' in all-caps windows-1252", + "original": "CÙIS", + "fixed-encoding": "CÙIS", + "fixed": "CÙIS", + "expect": "pass" + }, + { + "label": "Synthetic, negative: Romanian word before a non-breaking space", + "comment": "The word literally means 'not even once', which might be a good recommendation about fixing Romanian mojibake", + "original": "NICIODATĂ\u00a0", + "fixed": "NICIODATĂ\u00a0", + "expect": "pass" + }, + { + "label": "Synthetic, negative: Be careful around curly apostrophes", + "comment": "It shouldn't end up saying 'a lot of Òs'", + "original": "There are a lot of Ã’s in mojibake text", + "fixed-encoding": "There are a lot of Ã’s in mojibake text", + "fixed": "There are a lot of Ã's in mojibake text", + "expect": "pass" + }, + { + "label": "Synthetic, negative: Romanian word before a trademark sign", + "comment": "We would change 'DATÙ' to 'DATÙ' if it passed the badness heuristic", + "original": "NICIODATĂ™", + "fixed": "NICIODATĂ™", + "expect": "pass" + }, + { + "label": "Synthetic, negative: Lithuanian word before a trademark sign", + "comment": "Similar to the above example. Shouldn't turn into U+0619 ARABIC SMALL DAMMA", + "original": "TRANSFORMATORIŲ™", + "fixed": "TRANSFORMATORIŲ™", + "expect": "pass" + }, + { + "label": "Synthetic, negative: Norwegian capitalized nonsense", + "comment": "We're shouting that the island of Håøya is gullible. It should not turn into 'HŨYA ER BLŨYD'.", + "original": "HÅØYA ER BLÅØYD", + "fixed": "HÅØYA ER BLÅØYD", + "expect": "pass" + }, + { + "label": "Synthetic, negative: raised eyebrow kaomoji", + "original": "Ō¬o", + "fixed": "Ō¬o", + "expect": "pass" + }, + { + "label": "Synthetic, negative: Camel-cased Serbian that looks like a UTF-8 / Windows-1251 mixup", + "comment": "I made this text up, but it seems like it means 'HelloDevil'. Could be a username or something.", + "original": "ПоздравЂаво", + "fixed": "ПоздравЂаво", + "expect": "pass" + }, + { + "label": "Synthetic: mojibake with trademark sign at the end of a word", + "comment": "I recall the correct version of this text from a sign in the movie Amélie. Now we can help her twin Amélie, who makes mojibaked signs.", + "original": "OÙ ET QUAND?", + "fixed": "OÙ ET QUAND?", + "expect": "pass" + } +] \ No newline at end of file diff --git a/tests/test_cases.json b/tests/test_cases.json deleted file mode 100644 index 342b1d42..00000000 --- a/tests/test_cases.json +++ /dev/null @@ -1,951 +0,0 @@ -[ - { - "label": "Messy language names: Czech", - "comment": "This and several following examples came from the same language selector", - "original": "ÄŒeÅ¡tina", - "fixed": "Čeština", - "expect": "pass" - }, - { - "label": "Messy language names: Gaelic", - "comment": "note that if U+A0 is replaced by a space, it comes out slightly incorrectly as 'Gà idhlig'", - "original": "GÃ\u00a0idhlig", - "fixed": "Gàidhlig", - "expect": "pass" - }, - { - "label": "Messy language names: Lithuanian", - "original": "Lietuvių", - "fixed": "Lietuvių", - "expect": "pass" - }, - { - "label": "Messy language names: Slovak", - "original": "SlovenÄ�ina", - "fixed": "Sloven�ina", - "expect": "pass" - }, - { - "label": "Messy language names: Vietnamese", - "original": "Tiếng Việt", - "fixed": "Tiếng Việt", - "expect": "pass" - }, - { - "label": "Messy language names: Greek", - "original": "Ελληνικά", - "fixed": "Ελληνικά", - "expect": "pass" - }, - { - "label": "Messy language names: Bulgarian", - "original": "българÑ�ки език", - "fixed": "българ�ки език", - "expect": "pass" - }, - { - "label": "Messy language names: Russian", - "original": "РуÑ�Ñ�кий", - "fixed": "Ру��кий", - "expect": "pass" - }, - { - "label": "Messy language names: Serbian [Cyrillic]", - "original": "CрпÑ�ки [ћирилицом]", - "fixed": "Cрп�ки [ћирилицом]", - "expect": "pass" - }, - { - "label": "Messy language names: Hebrew", - "original": "עברית", - "fixed": "עברית", - "expect": "pass" - }, - { - "label": "Messy language names: Russian", - "original": "РуÑ�Ñ�кий", - "fixed": "Ру��кий", - "expect": "pass" - }, - { - "label": "Messy language names: Hindi", - "comment": "My terminal has difficulty rendering the mostly-fixed text", - "original": "हिनà¥�दी", - "fixed": "\u0939\u093f\u0928\ufffd\u0926\u0940", - "expect": "pass" - }, - { - "label": "Messy language names: Tamil", - "comment": "My terminal has difficulty rendering the mostly-fixed text", - "original": "தமிழà¯�", - "fixed": "\u0ba4\u0bae\u0bbf\u0bb4\ufffd", - "expect": "pass" - }, - { - "label": "Messy language names: Thai", - "original": "ภาษาไทย", - "fixed": "ภาษาไทย", - "expect": "pass" - }, - { - "label": "Messy language names: Simplified Chinese", - "original": "简体ä¸\u00adæ–‡", - "fixed": "简体中文", - "expect": "pass" - }, - { - "label": "Messy language names: Traditional Chinese", - "original": "æ\u00ad£é«”ä¸\u00adæ–‡", - "fixed": "正體中文", - "expect": "pass" - }, - { - "label": "Messy language names: Japanese", - "original": "日本語", - "fixed": "日本語", - "expect": "pass" - }, - { - "label": "Messy language names: Korean", - "original": "한êµ\u00adì–´", - "fixed": "한국어", - "expect": "pass" - }, - { - "label": "Low-codepoint emoji", - "original": "He's Justinâ\u009d¤", - "fixed": "He's Justin❤", - "expect": "pass" - }, - { - "label": "UTF-8 / MacRoman mix-up about smurfs", - "original": "Le Schtroumpf Docteur conseille g√¢teaux et baies schtroumpfantes pour un r√©gime √©quilibr√©.", - "fixed": "Le Schtroumpf Docteur conseille gâteaux et baies schtroumpfantes pour un régime équilibré.", - "expect": "pass" - }, - { - "label": "Checkmark that almost looks okay as mojibake", - "original": "✔ No problems", - "fixed": "✔ No problems", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1251 Russian mixup about futbol", - "original": "РґРѕСЂРѕРіРµ Р\u0098Р·-РїРѕРґ #футбол", - "fixed": "дороге Из-под #футбол", - "expect": "pass" - }, - { - "label": "Latin-1 / Windows-1252 mixup in German", - "original": "\u0084Handwerk bringt dich überall hin\u0093: Von der YOU bis nach Monaco", - "fixed-encoding": "„Handwerk bringt dich überall hin“: Von der YOU bis nach Monaco", - "fixed": "\"Handwerk bringt dich überall hin\": Von der YOU bis nach Monaco", - "expect": "pass" - }, - { - "label": "Latin-1 / Windows-1252 mixup of the replacement character", - "original": "Some comments may be republished on the website or in the newspaper � email addresses will not be published.", - "fixed": "Some comments may be republished on the website or in the newspaper � email addresses will not be published.", - "expect": "pass" - }, - { - "label": "CESU-8 / Windows-1252 emoji", - "original": "Hi guys í ½í¸\u008d", - "fixed": "Hi guys 😍", - "expect": "pass" - }, - { - "label": "CESU-8 / Latin-1 emoji", - "original": "hihi RT username: â\u0098ºí ½í¸\u0098", - "fixed": "hihi RT username: ☺😘", - "expect": "pass" - }, - { - "label": "Latin-1 / Windows-1252 mixup in Turkish", - "original": "Beta Haber: Hırsızı Büyü Korkuttu", - "fixed": "Beta Haber: Hırsızı Büyü Korkuttu", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1251 mixed up twice in Russian", - "original": "приятности. РІСњВ¤", - "fixed": "приятности. ❤", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1252 mixed up twice in Malay", - "original": "Kayanya laptopku error deh, soalnya tiap mau ngetik deket-deket kamu font yg keluar selalu Times New “ Romanceâ€Â\u009d.", - "fixed-encoding": "Kayanya laptopku error deh, soalnya tiap mau ngetik deket-deket kamu font yg keluar selalu Times New “ Romance”.", - "fixed": "Kayanya laptopku error deh, soalnya tiap mau ngetik deket-deket kamu font yg keluar selalu Times New \" Romance\".", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1252 mixed up twice in naming Iggy Pop", - "original": "Iggy Pop (né Jim Osterberg)", - "fixed": "Iggy Pop (né Jim Osterberg)", - "expect": "pass" - }, - { - "label": "Left quote is UTF-8, right quote is Latin-1, both encoded in Windows-1252", - "original": "Direzione Pd, ok â\u0080\u009csenza modifiche\u0094 all'Italicum.", - "fixed-encoding": "Direzione Pd, ok “senza modifiche” all'Italicum.", - "fixed": "Direzione Pd, ok \"senza modifiche\" all'Italicum.", - "expect": "pass" - }, - { - "label": "UTF-8 / sloppy Windows-1252 mixed up twice in a triumphant emoticon", - "original": "selamat berpuasa sob (Ã\u00a0¸‡'̀⌣'ÃŒÂ\u0081)Ã\u00a0¸‡", - "fixed": "selamat berpuasa sob (ง'̀⌣'́)ง", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1252 mixed up three times", - "original": "The Mona Lisa doesn’t have eyebrows.", - "fixed-encoding": "The Mona Lisa doesn’t have eyebrows.", - "fixed": "The Mona Lisa doesn't have eyebrows.", - "expect": "pass" - }, - { - "label": "UTF-8 / Codepage 437 mixup in Russian", - "original": "#╨┐╤Ç╨░╨▓╨╕╨╗╤î╨╜╨╛╨╡╨┐╨╕╤é╨░╨╜╨╕╨╡", - "fixed": "#правильноепитание", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1252 mixup in French", - "original": "Hôtel de Police", - "fixed": "Hôtel de Police", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1250 mixup in French", - "original": "Liège Avenue de l'HĂ´pital", - "fixed": "Liège Avenue de l'Hôpital", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1252 mixup in Vietnamese", - "original": "Tại sao giá hạt sầu riêng lại lên giá?", - "fixed": "Tại sao giá hạt sầu riêng lại lên giá?", - "expect": "pass" - }, - { - "label": "Negative: using diaereses as quotation marks in Greek", - "original": "Η ¨ανατροφή¨ δυστυχώς από τους προπονητές", - "fixed": "Η ¨ανατροφή¨ δυστυχώς από τους προπονητές", - "expect": "pass" - }, - { - "label": "Science! Mid-word Greek letter gets fixed correctly", - "original": "Humanized HLA-DR4.RagKO.IL2RγcKO.NOD (DRAG) mice sustain the complex vertebrate life cycle of Plasmodium falciparum malaria.", - "fixed": "Humanized HLA-DR4.RagKO.IL2RγcKO.NOD (DRAG) mice sustain the complex vertebrate life cycle of Plasmodium falciparum malaria.", - "expect": "pass" - }, - { - "label": "Negative: More science! Don't fix a multiplication symbol in quotes", - "original": "higher values (“+” and “×” curves) in the superficial region", - "fixed-encoding": "higher values (“+” and “×” curves) in the superficial region", - "fixed": "higher values (\"+\" and \"×\" curves) in the superficial region", - "expect": "pass" - }, - { - "label": "For goodness' sake. We can come close to fixing this, but fail in the last step", - "original": "ItÃ?¢â?¬â?¢s classic. ItÃ?¢â?¬â?¢s epic. ItÃ?¢â?¬â?¢s ELIZABETH BENNET for goodnessÃ?¢â?¬â?¢ sake!", - "fixed": "It�¢��s classic. It�¢��s epic. It�¢��s ELIZABETH BENNET for goodness�¢�� sake!", - "expect": "pass" - }, - { - "label": "lossy UTF-8 / Windows-1250 mixup in Spanish", - "original": "Europa, Asia, Ă�frica, Norte, AmĂ©rica Central y del Sur, Australia y OceanĂ­a", - "fixed": "Europa, Asia, �frica, Norte, América Central y del Sur, Australia y Oceanía", - "expect": "pass" - }, - { - "label": "UTF-8 / sloppy Windows-1250 mixup in English", - "original": "It was named „scars´ stones“ after the rock-climbers who got hurt while climbing on it.", - "fixed-encoding": "It was named\u00a0„scars´ stones“ after the rock-climbers who got hurt while climbing on it.", - "fixed": "It was named\u00a0\"scars´ stones\" after the rock-climbers who got hurt while climbing on it.", - "expect": "pass" - }, - { - "label": "The same text as above, but as a UTF-8 / ISO-8859-2 mixup", - "original": "It was namedÂ\u00a0â\u0080\u009escars´ stonesâ\u0080\u009c after the rock-climbers who got hurt while climbing on it.", - "fixed-encoding": "It was named\u00a0„scars´ stones“ after the rock-climbers who got hurt while climbing on it.", - "fixed": "It was named\u00a0\"scars´ stones\" after the rock-climbers who got hurt while climbing on it.", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1252 mixup in mixed French and Arabic", - "comment": "A difficult test case that can depend on the order that steps are applied", - "original": "À tous mes frères et soeurs dans la syrienneté comme dans l’humanité, sans discrimination aucune, je vous souhaite bonne fête عيد سعيد.Que la paix, la liberté, l’égalité, la fraternité et la dignité soient avec vous.Pardonnez ce ton un peu ecclésiastique.", - "fixed-encoding": "À tous mes frères et soeurs dans la syrienneté comme dans l’humanité, sans discrimination aucune, je vous souhaite bonne fête عيد سعيد.Que la paix, la liberté, l’égalité, la fraternité et la dignité soient avec vous.Pardonnez ce ton un peu ecclésiastique.", - "fixed": "À tous mes frères et soeurs dans la syrienneté comme dans l'humanité, sans discrimination aucune, je vous souhaite bonne fête عيد سعيد.Que la paix, la liberté, l'égalité, la fraternité et la dignité soient avec vous.Pardonnez ce ton un peu ecclésiastique.", - "expect": "pass" - }, - { - "label": "Synthetic: Incomplete UTF-8 / Windows-1252 mixup in Arabic", - "comment": "I find text like this in OSCAR a fair amount, but couldn't isolate a good example that tested digits. The intended text means 'more than 100 countries'.", - "original": "أكثر من Ù Ù Ù¡ بلد", - "fixed": "أكثر من ٠٠١ بلد", - "expect": "fail" - }, - { - "label": "UTF-8 / sloppy Windows-1250 mixup in Romanian", - "original": "vedere Ă®nceĹŁoĹźatÄ\u0083", - "fixed": "vedere înceţoşată", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1250 mixup in Slovak", - "original": "NapĂ\u00adšte nám !", - "fixed": "Napíšte nám !", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1252 mixup in Spanish", - "original": "DOS AÑOS", - "fixed": "DOS AÑOS", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1252 followed by UTF-8 / Windows-1251", - "original": "a bigger-than-expected £5.8bn rights issue to satisfy the new banking regulator", - "fixed": "a bigger-than-expected £5.8bn rights issue to satisfy the new banking regulator", - "expect": "pass" - }, - { - "label": "fancy Unicode crossing-out, but mojibaked", - "original": "hotel $49 $̶6̶3̶ updated 2018", - "fixed": "hotel $49 $̶6̶3̶ updated 2018", - "expect": "pass" - }, - { - "label": "A face with UTF-8 / sloppy Windows-1252 mixed up twice", - "original": "ââ€\u009d’(⌣˛⌣)ââ€\u009dŽ", - "fixed": "┒(⌣˛⌣)┎", - "expect": "pass" - }, - { - "label": "We can mostly decode the face above when we lose the character U+009D", - "original": "ââ€�’(⌣˛⌣)ââ€�Ž", - "fixed": "�(⌣˛⌣)�", - "expect": "pass" - }, - { - "label": "Lossy decoding can have plain ASCII question marks, as well", - "original": "The ICR has been upgraded to “bb+â€? from “bbâ€?", - "fixed-encoding": "The ICR has been upgraded to “bb+� from “bb�", - "fixed": "The ICR has been upgraded to \"bb+� from \"bb�", - "expect": "pass" - }, - { - "label": "CESU-8 / Latin-1 mixup over several emoji", - "comment": "You tried", - "original": "I just figured out how to tweet emojis! â\u009a½í\u00a0½í¸\u0080í\u00a0½í¸\u0081í\u00a0½í¸\u0082í\u00a0½í¸\u0086í\u00a0½í¸\u008eí\u00a0½í¸\u008eí\u00a0½í¸\u008eí\u00a0½í¸\u008e", - "fixed": "I just figured out how to tweet emojis! ⚽😀😁😂😆😎😎😎😎", - "expect": "pass" - }, - { - "label": "Two levels of inconsistent mojibake", - "comment": "The en-dash was mojibaked in UTF-8 / Windows-1252 as three characters, two of which were mojibaked again as Windows-1252 / Latin-1, and the third of which was mojibaked as UTF-8 / Latin-1. Unfortunately, if we fix this, we leave ourselves room to greedily 'decode' random Han characters in complex Latin-alphabet mojibake", - "original": "Arsenal v Wolfsburg: pre-season friendly â\u0080â\u0080\u009c live!", - "fixed": "Arsenal v Wolfsburg: pre-season friendly – live!", - "expect": "fail" - }, - { - "label": "An absolutely hopeless garble", - "comment": "If we try too hard to decode this, we'll recursively apply `decode_inconsistent_utf8` until the characters turn into random Han and katakana characters.", - "original": "ã†â€™ãƒâ€ ã¢â‚¬â„¢ãƒæ’ã‚â¢ãƒâ¢ã¢â‚¬å¡ã‚â¬ãƒâ€šã‚â", - "fixed-encoding": "ã†â€™ãƒâ€ ã¢â‚¬â„¢ãƒæ’ã‚â¢ãƒâ¢ã¢â‚¬å¡ã‚â¬ãƒâ€šã‚â", - "fixed": "ã†â€™ãƒâ€ ã¢â'¬â\"¢ãƒæ'ã'â¢ãƒâ¢ã¢â'¬å¡ã'â¬ãƒâ€šã'â", - "expect": "pass" - }, - { - "label": "Inconsistent UTF-8 / Latin-1 mojibake", - "original": "Ecuadorâ\u0080\u0099s â\u0080\u0098purely political decision on Assangeâ\u0080\u0099 is likely result of â\u0080\u0098US pressureâ\u0080\u0099\u0085", - "fixed-encoding": "Ecuador’s ‘purely political decision on Assange’ is likely result of ‘US pressure’…", - "fixed": "Ecuador's 'purely political decision on Assange' is likely result of 'US pressure'…", - "expect": "pass" - }, - { - "label": "Inconsistent UTF-8 / Latin-1 mojibake with an ellipsis from the Windows-1252 character set", - "original": "Ecuadorâ\u0080\u0099s â\u0080\u0098purely political decision on Assangeâ\u0080\u0099 is likely result of â\u0080\u0098US pressureâ\u0080\u0099…", - "fixed-encoding": "Ecuador’s ‘purely political decision on Assange’ is likely result of ‘US pressure’…", - "fixed": "Ecuador's 'purely political decision on Assange' is likely result of 'US pressure'…", - "expect": "pass" - }, - { - "label": "Inconsistent mojibake in Portuguese", - "original": "Campeonatos > III Divisão - Série F > Jornadas Classificação", - "fixed": "Campeonatos > III Divisão - Série F > Jornadas Classificação", - "expect": "pass" - }, - { - "label": "Handle Afrikaans 'n character", - "original": "ʼn Chloroplas is ʼn organel wat in fotosinterende plante voorkom.", - "fixed-encoding": "ʼn Chloroplas is ʼn organel wat in fotosinterende plante voorkom.", - "fixed": "'n Chloroplas is 'n organel wat in fotosinterende plante voorkom.", - "expect": "pass" - }, - { - "label": "Handle Croatian single-codepoint digraphs", - "original": "izum „bootstrap load“ koji je korištenjem polisilicijskog sloja proizveo dovoljno dobre kondenzatore na čipu", - "fixed-encoding": "izum „bootstrap load“ koji je korištenjem polisilicijskog sloja proizveo dovoljno dobre kondenzatore na čipu", - "fixed": "izum \"bootstrap load\" koji je korištenjem polisilicijskog sloja proizveo dovoljno dobre kondenzatore na čipu", - "expect": "pass" - }, - { - "label": "A with an acute accent, in isolation", - "original": "Nicolás", - "fixed": "Nicolás", - "expect": "pass" - }, - { - "label": "sharp S, in isolation, via MacRoman encoding", - "comment": "regression reported in issue #186", - "original": "wei√ü", - "fixed": "weiß", - "expect": "pass" - }, - { - "label": "Sort of negative: this inconsistent mojibake could be Latin-1 or MacRoman, and it was meant to be Latin-1, but it's safest to not decode it as either", - "comment": "issue #202", - "original": "Bremer/Mccoy – DrÃ¥ber", - "fixed": "Bremer/Mccoy – DrÃ¥ber", - "expect": "pass" - }, - { - "label": "Negative: 'è' preceded by a non-breaking space is not a small capital Y", - "original": "Con il corpo e lo spirito ammaccato,\u00a0è come se nel cuore avessi un vetro conficcato.", - "fixed": "Con il corpo e lo spirito ammaccato,\u00a0è come se nel cuore avessi un vetro conficcato.", - "expect": "pass" - }, - { - "label": "Negative: multiplication sign and ellipsis", - "comment": "Should not turn into a dot below", - "original": "4288×…", - "fixed": "4288×…", - "expect": "pass" - }, - { - "label": "Negative: accents are sometimes used as quotes", - "comment": "Under a previous heuristic, this tested the CESU-8 decoder, which would try to decode it and fail when it hit the end of the string", - "original": "``toda produzida pronta pra assa aí´´", - "fixed": "``toda produzida pronta pra assa aí´´", - "expect": "pass" - }, - { - "label": "Negative: 'Õ' followed by an ellipsis", - "comment": "Should not turn into the Armenian letter Յ", - "original": "HUHLL Õ…", - "fixed": "HUHLL Õ…", - "expect": "pass" - }, - { - "label": "Negative: 'Ê' followed by an ellipsis", - "comment": "Should not turn into a squat reversed esh", - "original": "RETWEET SE VOCÊ…", - "fixed": "RETWEET SE VOCÊ…", - "expect": "pass" - }, - { - "label": "Negative: 'É' followed by an ellipsis", - "comment": "Should not turn into 'MARQUɅ'", - "original": "PARCE QUE SUR LEURS PLAQUES IL Y MARQUÉ…", - "fixed": "PARCE QUE SUR LEURS PLAQUES IL Y MARQUÉ…", - "expect": "pass" - }, - { - "label": "Negative: 'Ó' followed by an ellipsis", - "comment": "Should not turn into 'SӅ'", - "original": "TEM QUE SEGUIR, SDV SÓ…", - "fixed": "TEM QUE SEGUIR, SDV SÓ…", - "expect": "pass" - }, - { - "label": "Negative: 'É' followed by a curly apostrophe", - "comment": "Should not turn into 'ZZAJɒs'", - "original": "Join ZZAJÉ’s Official Fan List and receive news, events, and more!", - "fixed-encoding": "Join ZZAJÉ’s Official Fan List and receive news, events, and more!", - "fixed": "Join ZZAJÉ's Official Fan List and receive news, events, and more!", - "expect": "pass" - }, - { - "label": "Negative: 'é' preceded by curly apostrophe", - "comment": "Should not turn into 'LՎpisode'", - "original": "L’épisode 8 est trop fou ouahh", - "fixed-encoding": "L’épisode 8 est trop fou ouahh", - "fixed": "L'épisode 8 est trop fou ouahh", - "expect": "pass" - }, - { - "label": "Negative: three raised eyebrows or something?", - "comment": "Should not turn into private use character U+F659", - "original": "Ôôô VIDA MINHA", - "fixed": "Ôôô VIDA MINHA", - "expect": "pass" - }, - { - "label": "Negative: copyright sign preceded by non-breaking space", - "comment": "Should not turn into 'ʩ'", - "original": "[x]\u00a0©", - "fixed": "[x]\u00a0©", - "expect": "pass" - }, - { - "label": "Negative: en dash and infinity sign", - "comment": "Should not turn into '2012Ѱ'", - "original": "2012—∞", - "fixed": "2012—∞", - "expect": "pass" - }, - { - "label": "Negative: This Е is a Ukrainian letter, but nothing else is wrong", - "original": "SENSЕ - Oleg Tsedryk", - "fixed": "SENSЕ - Oleg Tsedryk", - "expect": "pass" - }, - { - "label": "Negative: angry face", - "comment": "The face should not turn into '`«'", - "original": "OK??:( `¬´ ):", - "fixed": "OK??:( `¬´ ):", - "expect": "pass" - }, - { - "label": "Negative, synthetic: face with glasses and a raised eyebrow", - "original": "( o¬ô )", - "fixed": "( o¬ô )", - "expect": "pass" - }, - { - "label": "Negative: triangle and degree sign", - "comment": "I'm not really sure what it *is* supposed to be, but it's not 'ơ'", - "original": "∆°", - "fixed": "∆°", - "expect": "pass" - }, - { - "label": "Negative: Portuguese with inverted question mark", - "comment": "Former false positive - it should not turn into 'QUEM ɿ'", - "original": "ESSE CARA AI QUEM É¿", - "fixed": "ESSE CARA AI QUEM É¿", - "expect": "pass" - }, - { - "label": "Negative: Portuguese with acute accents as quotation marks", - "comment": "Former false positive - the end should not turn into a superscript H", - "original": "``hogwarts nao existe, voce nao vai pegar o trem pra lá´´", - "fixed": "``hogwarts nao existe, voce nao vai pegar o trem pra lá´´", - "expect": "pass" - }, - { - "label": "Negative: Finnish Ä followed by a non-breaking space", - "comment": "Former false positive - should not become a G with a dot", - "original": "SELKÄ\u00a0EDELLÄ\u00a0MAAHAN via @YouTube", - "fixed": "SELKÄ\u00a0EDELLÄ\u00a0MAAHAN via @YouTube", - "expect": "pass" - }, - { - "label": "Negative: multiplying by currency", - "comment": "Former false positive - should not become the Hebrew letter 'final pe'", - "original": "Offering 5×£35 pin ups", - "fixed": "Offering 5×£35 pin ups", - "expect": "pass" - }, - { - "label": "Negative: registered chocolate brand name", - "comment": "Former false positive - should not become the IPA letter 'lezh'", - "original": "NESTLÉ® requiere contratar personal para diferentes areas a nivel nacional e internacional", - "fixed": "NESTLÉ® requiere contratar personal para diferentes areas a nivel nacional e internacional", - "expect": "pass" - }, - { - "label": "Mostly negative: we only need to fix C1 control characters", - "comment": "We should not decode 'é\u0085 ' as '酠'", - "original": "C'est vrai que nous n'en avons pas encore beaucoup parlé\u0085 Tu sais, ça fait de nombreuses années", - "fixed": "C'est vrai que nous n'en avons pas encore beaucoup parlé… Tu sais, ça fait de nombreuses années", - "expect": "pass" - }, - { - "label": "French example containing non-breaking spaces", - "original": "ART TRIP Ã\u00a0 l'office de tourisme", - "fixed": "ART TRIP à l'office de tourisme", - "expect": "pass" - }, - { - "label": "English example in UTF-8 / Windows-1251 with a ligature", - "original": "This is signiп¬Ѓcantly lower than the respective share", - "fixed-encoding": "This is significantly lower than the respective share", - "fixed": "This is significantly lower than the respective share", - "expect": "pass" - }, - { - "label": "Synthetic: we can recognize à in some cases when it's the only mojibake", - "original": "voilà le travail", - "fixed": "voilà le travail", - "expect": "pass" - }, - { - "label": "Synthetic: we can recognize à at the end of a word when it absorbs a following space", - "original": "voilà le travail", - "fixed": "voilà le travail", - "expect": "pass" - }, - { - "label": "Negative: We don't fix à in all contexts", - "original": "C O N C L U S à O", - "fixed": "C O N C L U S à O", - "expect": "pass" - }, - { - "label": "'à' remains its own word, even if spaces after it get coalesced into one", - "original": "à perturber la réflexion des théologiens jusqu'à nos jours", - "fixed": "à perturber la réflexion des théologiens jusqu'à nos jours", - "expect": "pass" - }, - { - "label": "Fix 'à' in inconsistent mojibake", - "original": "Le barème forfaitaire permet l’évaluation des frais de déplacement relatifs à l’utilisation", - "fixed-encoding": "Le barème forfaitaire permet l’évaluation des frais de déplacement relatifs à l’utilisation", - "fixed": "Le barème forfaitaire permet l'évaluation des frais de déplacement relatifs à l'utilisation", - "expect": "pass" - }, - { - "label": "The Portuguese word 'às' does not become 'à s' due to the French fix", - "original": "com especial atenção à s crianças", - "fixed": "com especial atenção às crianças", - "expect": "pass" - }, - { - "label": "This is why we require a space after the 's' in 'às'", - "original": "Troisième édition pour ce festival qui persiste et signe à s'éloigner des grands axes pour prendre les contre-allées en 16 concerts dans 7 villes de 2 pays voisins.", - "fixed": "Troisième édition pour ce festival qui persiste et signe à s'éloigner des grands axes pour prendre les contre-allées en 16 concerts dans 7 villes de 2 pays voisins.", - "expect": "pass" - }, - { - "label": "We can fix 'à' in windows-1251 sometimes as well", - "original": "La rГ©gion de Dnepropetrovsk se trouve Г l’ouest de l’Ukraine", - "fixed-encoding": "La région de Dnepropetrovsk se trouve à l’ouest de l’Ukraine", - "fixed": "La région de Dnepropetrovsk se trouve à l'ouest de l'Ukraine", - "expect": "pass" - }, - { - "label": "'à quele' is the Portuguese word 'àquele', not 'à quele'", - "original": "eliminado o antígeno e mantidos os níveis de anticorpos, surgem as condições necessárias ao estabelecimento do granuloma, semelhante à quele observado nas lesões por imunocomplexo em excesso de anticorpos", - "fixed": "eliminado o antígeno e mantidos os níveis de anticorpos, surgem as condições necessárias ao estabelecimento do granuloma, semelhante àquele observado nas lesões por imunocomplexo em excesso de anticorpos", - "expect": "pass" - }, - { - "label": "A complex, lossy pile-up of mojibake in Portuguese", - "original": "â € ðŸ“� Regulamento: â € âš ï¸� As pessoas que marcarem nos comentários perfis empresariais e/ou de marcas, personalidades ou fake serão desclassificadas. âš ï¸� Podem participar pessoas residentes em Petrolina/PE ou Juazeiro/BA, desde que se comprometam a retirar o prêmio em nosso endereço. Funcionários estão vetados. âš ï¸� Serão válidos os comentários postados até 16h, do dia 31/03/2018. E o resultado será divulgado até à s 19h do mesmo dia em uma nova publicação em nosso instagram. â € Boa sorte!!! 😀ðŸ�°", - "fixed": "⠀ �\u00a0Regulamento: ⠀ ⚠� As pessoas que marcarem nos comentários perfis empresariais e/ou de marcas, personalidades ou fake serão desclassificadas. ⚠� Podem participar pessoas residentes em Petrolina/PE ou Juazeiro/BA, desde que se comprometam a retirar o prêmio em nosso endereço. Funcionários estão vetados. ⚠� Serão válidos os comentários postados até 16h, do dia 31/03/2018. E o resultado será divulgado até às 19h do mesmo dia em uma nova publicação em nosso instagram. ⠀ Boa sorte!!!\u00a0😀�", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1252 mixup in Gaelic involving non-breaking spaces", - "original": "CÃ\u00a0nan nan GÃ\u00a0idheal", - "fixed": "Cànan nan Gàidheal", - "expect": "pass" - }, - { - "label": "Misleading mix-up in Spanish", - "comment": "The original text has mojibake, but the sequence 'á \u0093' can decode as U+1813 MONGOLIAN DIGIT THREE, when the whole string should really just decode as a Latin-1/Windows-1252 mixup", - "original": "tiene demora y está \u0093próximo a resolverse\u0094", - "fixed": "tiene demora y está \"próximo a resolverse\"", - "expect": "fail" - }, - { - "label": "A-with-grave in Vietnamese", - "comment": "Currently adds extra spaces that shouldn't be there", - "original": "Xem clip hĂ i, phim hĂ i má»›i hay nhất", - "fixed": "Xem clip hài, phim hài mới hay nhất", - "expect": "fail" - }, - { - "label": "Punctuation pile-up should actually be musical notes", - "original": "Engkau masih yg terindah, indah di dalam hatiku♫~", - "fixed": "Engkau masih yg terindah, indah di dalam hatiku♫~", - "expect": "pass" - }, - { - "label": "Latin-1 / MacRoman mixup in Spanish", - "comment": "Requires something like encoding detection", - "original": "Deja dos heridos hundimiento de barco tur\u0092stico en Acapulco.", - "fixed": "Deja dos heridos hundimiento de barco turístico en Acapulco.", - "expect": "fail" - }, - { - "label": "subtle UTF-8 / codepage 437 mixup in Spanish", - "original": "┬┐que diferencia hay?", - "fixed": "¿que diferencia hay?", - "expect": "fail" - }, - { - "label": "Latin-1 / MacRoman mixup in Spanish, 2 characters", - "comment": "Requires something like encoding detection", - "original": "Habitantes de Coatl\u0087n conf\u0092an en proyecto de edil electo independiente", - "fixed": "Habitantes de Coatlán confían en proyecto de edil electo independiente", - "expect": "fail" - }, - { - "label": "An example with 'à' in windows-1251 where we need our heuristic to be bolder", - "original": "faites attention Г bien vous renseigner avant sur le mГ©dicament", - "fixed": "faites attention à bien vous renseigner avant sur le médicament", - "expect": "fail" - }, - { - "label": "UTF-8 / Windows-1251 mixup in tweet spam", - "original": "Blog Traffic Tip 2 – Broadcast Email Your Blog", - "fixed": "Blog Traffic Tip 2 – Broadcast Email Your Blog", - "expect": "pass" - }, - { - "label": "UTF-8 / Windows-1251 mixup", - "original": "S&P Confirms Ukrsotsbank’s “B-“ Rating", - "fixed-encoding": "S&P Confirms Ukrsotsbank’s “B-“ Rating", - "fixed": "S&P Confirms Ukrsotsbank's \"B-\" Rating", - "expect": "pass" - }, - { - "label": "Dutch example with ë", - "comment": "from issue reported by MicroJackson", - "original": "ongeëvenaard", - "fixed-encoding": "ongeëvenaard", - "fixed": "ongeëvenaard", - "expect": "pass" - }, - { - "label": "Negative: Indonesian leetspeak", - "original": "MÄ£ÄM ÌÑÌ Q £ÄGÌ GÄLÄW ÑÍCH SÖÄ£ ÑÝÄ $ÚÄMÌ Q £ÄGÌ GÄK ÉÑÄK BÄDÄÑ....?????????, ......JÄDÍ...", - "fixed": "MÄ£ÄM ÌÑÌ Q £ÄGÌ GÄLÄW ÑÍCH SÖÄ£ ÑÝÄ $ÚÄMÌ Q £ÄGÌ GÄK ÉÑÄK BÄDÄÑ....?????????, ......JÄDÍ...", - "expect": "pass" - }, - { - "label": "Three layers of UTF-8 / MacRoman mixup in French", - "comment": "You're welcome", - "original": "Merci de t‚Äö√†√∂¬¨¬©l‚Äö√†√∂¬¨¬©charger le plug-in Flash Player 8", - "fixed": "Merci de télécharger le plug-in Flash Player 8", - "expect": "pass" - }, - { - "label": "UTF-8 / MacRoman mixup in French", - "original": "Merci de bien vouloir activiter le Javascript dans votre navigateur web afin d'en profiter‚Ķ", - "fixed": "Merci de bien vouloir activiter le Javascript dans votre navigateur web afin d'en profiter…", - "expect": "pass" - }, - { - "label": "Italian UTF-8 / MacRoman example with ò", - "original": "Le Vigne di Zam√≤", - "fixed": "Le Vigne di Zamò", - "expect": "pass" - }, - { - "label": "Italian UTF-8 / MacRoman mojibake that looks like math", - "comment": "False negative: 'pi√π' is a bit too reasonable to fix", - "original": "Sarai ricontattato dal nostro Esperto al pi√π presto.", - "fixed": "Sarai ricontattato dal nostro Esperto al più presto.", - "expect": "fail" - }, - { - "label": "Hebrew UTF-8 / Windows-1252 mojibake", - "comment": "reported by SuperIRabbit as issue #158", - "original": "בהודעה", - "fixed": "בהודעה", - "expect": "pass" - }, - { - "label": "Synthetic: Hebrew UTF-8 / Windows-1250 mojibake", - "original": "בהודעה", - "fixed": "בהודעה", - "expect": "pass" - }, - { - "label": "Synthetic: Hebrew UTF-8 / MacRoman mojibake", - "original": "◊ë◊î◊ï◊ì◊¢◊î", - "fixed": "בהודעה", - "expect": "pass" - }, - { - "label": "Synthetic: Hebrew UTF-8 / Latin-1 mojibake", - "comment": "This example uses low-numbered codepoints to spell 'ABBA' in Hebrew, so that it falls into the range where Latin-1 is different from Windows-1252. As a bonus, this example looks right even if your RTL text rendering isn't working.", - "original": "×\u0090×\u0091×\u0091×\u0090", - "fixed": "אבבא", - "expect": "pass" - }, - { - "label": "Synthetic: Arabic UTF-8 / Windows-1252 mojibake", - "original": "رسالة", - "fixed": "رسالة", - "expect": "pass" - }, - { - "label": "Synthetic: Arabic UTF-8 / Windows-1250 mojibake", - "original": "رسالة", - "fixed": "رسالة", - "expect": "pass" - }, - { - "label": "Synthetic: Arabic UTF-8 / MacRoman mojibake", - "original": "ÿ±ÿ≥ÿߟÑÿ©", - "fixed": "رسالة", - "expect": "pass" - }, - { - "label": "Negative: math in Unicode", - "comment": "This isn't mojibake, it's an actual equation", - "original": "(-1/2)! = √π", - "fixed": "(-1/2)! = √π", - "expect": "pass" - }, - { - "label": "Negative: Leet line-art", - "comment": "The heuristic before v6 loved to 'fix' this and decode it as 'ôaſaſaſaſa'", - "original": "├┤a┼┐a┼┐a┼┐a┼┐a", - "fixed": "├┤a┼┐a┼┐a┼┐a┼┐a", - "expect": "pass" - }, - { - "label": "Synthetic, negative: Brontë's name does not end with a Korean syllable", - "comment": "The original example of why ftfy needs heuristics", - "original": "I'm not such a fan of Charlotte Brontë…”", - "fixed-encoding": "I'm not such a fan of Charlotte Brontë…”", - "fixed": "I'm not such a fan of Charlotte Brontë…\"", - "expect": "pass" - }, - { - "label": "Synthetic, negative: hypothetical Swedish product name", - "comment": "This used to be a constructed example of a false positive, until you added another symbol", - "original": "AHÅ™, the new sofa from IKEA", - "fixed": "AHÅ™, the new sofa from IKEA", - "expect": "pass" - }, - { - "label": "Synthetic, negative: Ukrainian capital letters", - "comment": "We need to fix Windows-1251 conservatively, or else this decodes as '²ʲ'", - "original": "ВІКІ is Ukrainian for WIKI", - "fixed": "ВІКІ is Ukrainian for WIKI", - "expect": "pass" - }, - { - "label": "Synthetic, negative: don't leak our internal use of byte 0x1A", - "comment": "We use byte 0x1A internally as an encoding of U+FFFD, but literal occurrences of U+1A are just ASCII control characters", - "original": "These control characters \u001a are apparently intentional \u0081", - "fixed-encoding": "These control characters \u001a are apparently intentional \u0081", - "fixed": "These control characters are apparently intentional \u0081", - "expect": "pass" - }, - { - "label": "Synthetic, negative: U+1A on its own", - "comment": "We use byte 0x1A internally as an encoding of U+FFFD, but literal occurrences of U+1A are just ASCII control characters", - "original": "Here's a control character: \u001a", - "fixed-encoding": "Here's a control character: \u001a", - "fixed": "Here's a control character: ", - "expect": "pass" - }, - { - "label": "Synthetic, negative: A-with-circle as an Angstrom sign", - "comment": "Should not turn into '10 ŗ'", - "original": "a radius of 10 Å—", - "fixed": "a radius of 10 Å—", - "expect": "pass" - }, - { - "label": "Synthetic, negative: Spanish with exclamation points on the wrong sides", - "original": "!YO SÉ¡", - "fixed": "!YO SÉ¡", - "expect": "pass" - }, - { - "label": "Synthetic: fix text with backslashes in it", - "comment": "Tests for a regression on a long-ago bug", - "original": "<40\\% vs \u00e2\u0089\u00a540\\%", - "fixed": "<40\\% vs ≥40\\%", - "expect": "pass" - }, - { - "label": "Synthetic: curly quotes with mismatched encoding glitches in Latin-1", - "original": "\u00e2\u0080\u009cmismatched quotes\u0085\u0094", - "fixed-encoding": "“mismatched quotes…”", - "fixed": "\"mismatched quotes…\"", - "expect": "pass" - }, - { - "label": "Synthetic: curly quotes with mismatched encoding glitches in Windows-1252", - "original": "“mismatched quotes…”", - "fixed-encoding": "“mismatched quotes…”", - "fixed": "\"mismatched quotes…\"", - "expect": "pass" - }, - { - "label": "Synthetic: lossy decoding in sloppy-windows-1252", - "original": "“lossy decodingâ€�", - "fixed-encoding": "“lossy decoding�", - "fixed": "\"lossy decoding�", - "expect": "pass" - }, - { - "label": "Synthetic: French word for August in windows-1252", - "original": "août", - "fixed-encoding": "août", - "fixed": "août", - "expect": "pass" - }, - { - "label": "Synthetic: French word for hotel in all-caps windows-1252", - "original": "HÔTEL", - "fixed-encoding": "HÔTEL", - "fixed": "HÔTEL", - "expect": "pass" - }, - { - "label": "Synthetic: Scottish Gaelic word for 'subject' in all-caps windows-1252", - "original": "CÙIS", - "fixed-encoding": "CÙIS", - "fixed": "CÙIS", - "expect": "pass" - }, - { - "label": "Synthetic, negative: Romanian word before a non-breaking space", - "comment": "The word literally means 'not even once', which might be a good recommendation about fixing Romanian mojibake", - "original": "NICIODATĂ\u00a0", - "fixed": "NICIODATĂ\u00a0", - "expect": "pass" - }, - { - "label": "Synthetic, negative: Be careful around curly apostrophes", - "comment": "It shouldn't end up saying 'a lot of Òs'", - "original": "There are a lot of Ã’s in mojibake text", - "fixed-encoding": "There are a lot of Ã’s in mojibake text", - "fixed": "There are a lot of Ã's in mojibake text", - "expect": "pass" - }, - { - "label": "Synthetic, negative: Romanian word before a trademark sign", - "comment": "We would change 'DATÙ' to 'DATÙ' if it passed the badness heuristic", - "original": "NICIODATĂ™", - "fixed": "NICIODATĂ™", - "expect": "pass" - }, - { - "label": "Synthetic, false positive: the title of a manga, in weird capitalized romaji, with a non-breaking space", - "comment": "Testing tells me I should worry about cases like this, though I haven't seen a real example. Searching for similar real text yields a lot of examples that actually come out fine.", - "original": "MISUTÂ\u00a0AJIKKO", - "fixed": "MISUTÂ\u00a0AJIKKO", - "expect": "fail" - }, - { - "label": "Synthetic, negative: Camel-cased Serbian that looks like a UTF-8 / Windows-1251 mixup", - "comment": "I made this text up, but it seems like it means 'HelloDevil'. Could be a username or something.", - "original": "ПоздравЂаво", - "fixed": "ПоздравЂаво", - "expect": "pass" - }, - { - "label": "Synthetic: mojibake with trademark sign at the end of a word", - "comment": "I recall the correct version of this text from a sign in the movie Amélie. Now we can help her twin Amélie, who makes mojibaked signs.", - "original": "OÙ ET QUAND?", - "fixed": "OÙ ET QUAND?", - "expect": "pass" - } -] \ No newline at end of file diff --git a/tests/test_cli.py b/tests/test_cli.py index 0b3d107f..a862e31d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,11 +1,12 @@ import os import subprocess +from pathlib import Path import pytest # Get the filename of 'face.txt', an example of mojibake -THIS_DIR = os.path.dirname(__file__) -TEST_FILENAME = os.path.join(THIS_DIR, "face.txt") +THIS_DIR = Path(__file__).parent +TEST_FILENAME = THIS_DIR / "face.txt" CORRECT_OUTPUT = os.linesep.join(["┒(⌣˛⌣)┎", ""]) FAILED_OUTPUT = os.linesep.join( [ @@ -61,6 +62,6 @@ def test_same_file(): def test_stdin(): - with open(TEST_FILENAME, "rb") as infile: + with TEST_FILENAME.open("rb") as infile: output = get_command_output(["ftfy"], stdin=infile) assert output == CORRECT_OUTPUT diff --git a/tests/test_encodings.py b/tests/test_encodings.py index 037404c0..c3c9c2e4 100644 --- a/tests/test_encodings.py +++ b/tests/test_encodings.py @@ -6,9 +6,7 @@ def test_cesu8(): cls2 = bad_codecs.search_function("cesu-8").__class__ assert cls1 == cls2 - test_bytes = ( - b"\xed\xa6\x9d\xed\xbd\xb7 is an unassigned character, and \xc0\x80 is null" - ) + test_bytes = b"\xed\xa6\x9d\xed\xbd\xb7 is an unassigned character, and \xc0\x80 is null" test_text = "\U00077777 is an unassigned character, and \x00 is null" assert test_bytes.decode("cesu8") == test_text diff --git a/tests/test_examples_in_json.py b/tests/test_examples_in_json.py index cf99e27b..2be9eb4e 100644 --- a/tests/test_examples_in_json.py +++ b/tests/test_examples_in_json.py @@ -25,15 +25,24 @@ """ import json -import os +from pathlib import Path import pytest from ftfy import apply_plan, fix_and_explain, fix_encoding_and_explain, fix_text -THIS_DIR = os.path.dirname(__file__) -TEST_FILENAME = os.path.join(THIS_DIR, "test_cases.json") -TEST_DATA = json.load(open(TEST_FILENAME, encoding="utf-8")) +THIS_DIR = Path(__file__).parent +TEST_CASE_DIR = THIS_DIR / "test-cases" + + +def load_test_data() -> list[dict]: + test_data = [] + for filepath in TEST_CASE_DIR.glob("*.json"): + test_data.extend(json.load(filepath.open())) + return test_data + + +TEST_DATA = load_test_data() TESTS_THAT_PASS = [test for test in TEST_DATA if test["expect"] == "pass"] TESTS_THAT_FAIL = [test for test in TEST_DATA if test["expect"] == "fail"] diff --git a/tox.ini b/tox.ini index 1f6e83c1..ec356b7c 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py38, py39, py310, py311, py312, py313 +envlist = py39, py310, py311, py312, py313 [testenv] deps = diff --git a/uv.lock b/uv.lock new file mode 100644 index 00000000..438359fb --- /dev/null +++ b/uv.lock @@ -0,0 +1,546 @@ +version = 1 +requires-python = ">=3.9" + +[[package]] +name = "alabaster" +version = "0.7.16" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c9/3e/13dd8e5ed9094e734ac430b5d0eb4f2bb001708a8b7856cbf8e084e001ba/alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65", size = 23776 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/34/d4e1c02d3bee589efb5dfa17f88ea08bdb3e3eac12bc475462aec52ed223/alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92", size = 13511 }, +] + +[[package]] +name = "babel" +version = "2.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2a/74/f1bc80f23eeba13393b7222b11d95ca3af2c1e28edca18af487137eefed9/babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316", size = 9348104 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b", size = 9587599 }, +] + +[[package]] +name = "beautifulsoup4" +version = "4.12.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/ca/824b1195773ce6166d388573fc106ce56d4a805bd7427b624e063596ec58/beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051", size = 581181 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed", size = 147925 }, +] + +[[package]] +name = "certifi" +version = "2024.8.30" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/ee/9b19140fe824b367c04c5e1b369942dd754c4c5462d5674002f75c4dedc1/certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9", size = 168507 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/90/3c9ff0512038035f59d279fddeb79f5f1eccd8859f06d6163c58798b9487/certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8", size = 167321 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/4f/e1808dc01273379acc506d18f1504eb2d299bd4131743b9fc54d7be4df1e/charset_normalizer-3.4.0.tar.gz", hash = "sha256:223217c3d4f82c3ac5e29032b3f1c2eb0fb591b72161f86d93f5719079dae93e", size = 106620 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/8b/825cc84cf13a28bfbcba7c416ec22bf85a9584971be15b21dd8300c65b7f/charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4f9fc98dad6c2eaa32fc3af1417d95b5e3d08aff968df0cd320066def971f9a6", size = 196363 }, + { url = "https://files.pythonhosted.org/packages/23/81/d7eef6a99e42c77f444fdd7bc894b0ceca6c3a95c51239e74a722039521c/charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0de7b687289d3c1b3e8660d0741874abe7888100efe14bd0f9fd7141bcbda92b", size = 125639 }, + { url = "https://files.pythonhosted.org/packages/21/67/b4564d81f48042f520c948abac7079356e94b30cb8ffb22e747532cf469d/charset_normalizer-3.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5ed2e36c3e9b4f21dd9422f6893dec0abf2cca553af509b10cd630f878d3eb99", size = 120451 }, + { url = "https://files.pythonhosted.org/packages/c2/72/12a7f0943dd71fb5b4e7b55c41327ac0a1663046a868ee4d0d8e9c369b85/charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d3ff7fc90b98c637bda91c89d51264a3dcf210cade3a2c6f838c7268d7a4ca", size = 140041 }, + { url = "https://files.pythonhosted.org/packages/67/56/fa28c2c3e31217c4c52158537a2cf5d98a6c1e89d31faf476c89391cd16b/charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1110e22af8ca26b90bd6364fe4c763329b0ebf1ee213ba32b68c73de5752323d", size = 150333 }, + { url = "https://files.pythonhosted.org/packages/f9/d2/466a9be1f32d89eb1554cf84073a5ed9262047acee1ab39cbaefc19635d2/charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86f4e8cca779080f66ff4f191a685ced73d2f72d50216f7112185dc02b90b9b7", size = 142921 }, + { url = "https://files.pythonhosted.org/packages/f8/01/344ec40cf5d85c1da3c1f57566c59e0c9b56bcc5566c08804a95a6cc8257/charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f683ddc7eedd742e2889d2bfb96d69573fde1d92fcb811979cdb7165bb9c7d3", size = 144785 }, + { url = "https://files.pythonhosted.org/packages/73/8b/2102692cb6d7e9f03b9a33a710e0164cadfce312872e3efc7cfe22ed26b4/charset_normalizer-3.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27623ba66c183eca01bf9ff833875b459cad267aeeb044477fedac35e19ba907", size = 146631 }, + { url = "https://files.pythonhosted.org/packages/d8/96/cc2c1b5d994119ce9f088a9a0c3ebd489d360a2eb058e2c8049f27092847/charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f606a1881d2663630ea5b8ce2efe2111740df4b687bd78b34a8131baa007f79b", size = 140867 }, + { url = "https://files.pythonhosted.org/packages/c9/27/cde291783715b8ec30a61c810d0120411844bc4c23b50189b81188b273db/charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0b309d1747110feb25d7ed6b01afdec269c647d382c857ef4663bbe6ad95a912", size = 149273 }, + { url = "https://files.pythonhosted.org/packages/3a/a4/8633b0fc1a2d1834d5393dafecce4a1cc56727bfd82b4dc18fc92f0d3cc3/charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:136815f06a3ae311fae551c3df1f998a1ebd01ddd424aa5603a4336997629e95", size = 152437 }, + { url = "https://files.pythonhosted.org/packages/64/ea/69af161062166b5975ccbb0961fd2384853190c70786f288684490913bf5/charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:14215b71a762336254351b00ec720a8e85cada43b987da5a042e4ce3e82bd68e", size = 150087 }, + { url = "https://files.pythonhosted.org/packages/3b/fd/e60a9d9fd967f4ad5a92810138192f825d77b4fa2a557990fd575a47695b/charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:79983512b108e4a164b9c8d34de3992f76d48cadc9554c9e60b43f308988aabe", size = 145142 }, + { url = "https://files.pythonhosted.org/packages/6d/02/8cb0988a1e49ac9ce2eed1e07b77ff118f2923e9ebd0ede41ba85f2dcb04/charset_normalizer-3.4.0-cp310-cp310-win32.whl", hash = "sha256:c94057af19bc953643a33581844649a7fdab902624d2eb739738a30e2b3e60fc", size = 94701 }, + { url = "https://files.pythonhosted.org/packages/d6/20/f1d4670a8a723c46be695dff449d86d6092916f9e99c53051954ee33a1bc/charset_normalizer-3.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:55f56e2ebd4e3bc50442fbc0888c9d8c94e4e06a933804e2af3e89e2f9c1c749", size = 102191 }, + { url = "https://files.pythonhosted.org/packages/9c/61/73589dcc7a719582bf56aae309b6103d2762b526bffe189d635a7fcfd998/charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0d99dd8ff461990f12d6e42c7347fd9ab2532fb70e9621ba520f9e8637161d7c", size = 193339 }, + { url = "https://files.pythonhosted.org/packages/77/d5/8c982d58144de49f59571f940e329ad6e8615e1e82ef84584c5eeb5e1d72/charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c57516e58fd17d03ebe67e181a4e4e2ccab1168f8c2976c6a334d4f819fe5944", size = 124366 }, + { url = "https://files.pythonhosted.org/packages/bf/19/411a64f01ee971bed3231111b69eb56f9331a769072de479eae7de52296d/charset_normalizer-3.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6dba5d19c4dfab08e58d5b36304b3f92f3bd5d42c1a3fa37b5ba5cdf6dfcbcee", size = 118874 }, + { url = "https://files.pythonhosted.org/packages/4c/92/97509850f0d00e9f14a46bc751daabd0ad7765cff29cdfb66c68b6dad57f/charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf4475b82be41b07cc5e5ff94810e6a01f276e37c2d55571e3fe175e467a1a1c", size = 138243 }, + { url = "https://files.pythonhosted.org/packages/e2/29/d227805bff72ed6d6cb1ce08eec707f7cfbd9868044893617eb331f16295/charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce031db0408e487fd2775d745ce30a7cd2923667cf3b69d48d219f1d8f5ddeb6", size = 148676 }, + { url = "https://files.pythonhosted.org/packages/13/bc/87c2c9f2c144bedfa62f894c3007cd4530ba4b5351acb10dc786428a50f0/charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ff4e7cdfdb1ab5698e675ca622e72d58a6fa2a8aa58195de0c0061288e6e3ea", size = 141289 }, + { url = "https://files.pythonhosted.org/packages/eb/5b/6f10bad0f6461fa272bfbbdf5d0023b5fb9bc6217c92bf068fa5a99820f5/charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3710a9751938947e6327ea9f3ea6332a09bf0ba0c09cae9cb1f250bd1f1549bc", size = 142585 }, + { url = "https://files.pythonhosted.org/packages/3b/a0/a68980ab8a1f45a36d9745d35049c1af57d27255eff8c907e3add84cf68f/charset_normalizer-3.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82357d85de703176b5587dbe6ade8ff67f9f69a41c0733cf2425378b49954de5", size = 144408 }, + { url = "https://files.pythonhosted.org/packages/d7/a1/493919799446464ed0299c8eef3c3fad0daf1c3cd48bff9263c731b0d9e2/charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47334db71978b23ebcf3c0f9f5ee98b8d65992b65c9c4f2d34c2eaf5bcaf0594", size = 139076 }, + { url = "https://files.pythonhosted.org/packages/fb/9d/9c13753a5a6e0db4a0a6edb1cef7aee39859177b64e1a1e748a6e3ba62c2/charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8ce7fd6767a1cc5a92a639b391891bf1c268b03ec7e021c7d6d902285259685c", size = 146874 }, + { url = "https://files.pythonhosted.org/packages/75/d2/0ab54463d3410709c09266dfb416d032a08f97fd7d60e94b8c6ef54ae14b/charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f1a2f519ae173b5b6a2c9d5fa3116ce16e48b3462c8b96dfdded11055e3d6365", size = 150871 }, + { url = "https://files.pythonhosted.org/packages/8d/c9/27e41d481557be53d51e60750b85aa40eaf52b841946b3cdeff363105737/charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:63bc5c4ae26e4bc6be6469943b8253c0fd4e4186c43ad46e713ea61a0ba49129", size = 148546 }, + { url = "https://files.pythonhosted.org/packages/ee/44/4f62042ca8cdc0cabf87c0fc00ae27cd8b53ab68be3605ba6d071f742ad3/charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bcb4f8ea87d03bc51ad04add8ceaf9b0f085ac045ab4d74e73bbc2dc033f0236", size = 143048 }, + { url = "https://files.pythonhosted.org/packages/01/f8/38842422988b795220eb8038745d27a675ce066e2ada79516c118f291f07/charset_normalizer-3.4.0-cp311-cp311-win32.whl", hash = "sha256:9ae4ef0b3f6b41bad6366fb0ea4fc1d7ed051528e113a60fa2a65a9abb5b1d99", size = 94389 }, + { url = "https://files.pythonhosted.org/packages/0b/6e/b13bd47fa9023b3699e94abf565b5a2f0b0be6e9ddac9812182596ee62e4/charset_normalizer-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cee4373f4d3ad28f1ab6290684d8e2ebdb9e7a1b74fdc39e4c211995f77bec27", size = 101752 }, + { url = "https://files.pythonhosted.org/packages/d3/0b/4b7a70987abf9b8196845806198975b6aab4ce016632f817ad758a5aa056/charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0713f3adb9d03d49d365b70b84775d0a0d18e4ab08d12bc46baa6132ba78aaf6", size = 194445 }, + { url = "https://files.pythonhosted.org/packages/50/89/354cc56cf4dd2449715bc9a0f54f3aef3dc700d2d62d1fa5bbea53b13426/charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de7376c29d95d6719048c194a9cf1a1b0393fbe8488a22008610b0361d834ecf", size = 125275 }, + { url = "https://files.pythonhosted.org/packages/fa/44/b730e2a2580110ced837ac083d8ad222343c96bb6b66e9e4e706e4d0b6df/charset_normalizer-3.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a51b48f42d9358460b78725283f04bddaf44a9358197b889657deba38f329db", size = 119020 }, + { url = "https://files.pythonhosted.org/packages/9d/e4/9263b8240ed9472a2ae7ddc3e516e71ef46617fe40eaa51221ccd4ad9a27/charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b295729485b06c1a0683af02a9e42d2caa9db04a373dc38a6a58cdd1e8abddf1", size = 139128 }, + { url = "https://files.pythonhosted.org/packages/6b/e3/9f73e779315a54334240353eaea75854a9a690f3f580e4bd85d977cb2204/charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee803480535c44e7f5ad00788526da7d85525cfefaf8acf8ab9a310000be4b03", size = 149277 }, + { url = "https://files.pythonhosted.org/packages/1a/cf/f1f50c2f295312edb8a548d3fa56a5c923b146cd3f24114d5adb7e7be558/charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d59d125ffbd6d552765510e3f31ed75ebac2c7470c7274195b9161a32350284", size = 142174 }, + { url = "https://files.pythonhosted.org/packages/16/92/92a76dc2ff3a12e69ba94e7e05168d37d0345fa08c87e1fe24d0c2a42223/charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cda06946eac330cbe6598f77bb54e690b4ca93f593dee1568ad22b04f347c15", size = 143838 }, + { url = "https://files.pythonhosted.org/packages/a4/01/2117ff2b1dfc61695daf2babe4a874bca328489afa85952440b59819e9d7/charset_normalizer-3.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07afec21bbbbf8a5cc3651aa96b980afe2526e7f048fdfb7f1014d84acc8b6d8", size = 146149 }, + { url = "https://files.pythonhosted.org/packages/f6/9b/93a332b8d25b347f6839ca0a61b7f0287b0930216994e8bf67a75d050255/charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6b40e8d38afe634559e398cc32b1472f376a4099c75fe6299ae607e404c033b2", size = 140043 }, + { url = "https://files.pythonhosted.org/packages/ab/f6/7ac4a01adcdecbc7a7587767c776d53d369b8b971382b91211489535acf0/charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b8dcd239c743aa2f9c22ce674a145e0a25cb1566c495928440a181ca1ccf6719", size = 148229 }, + { url = "https://files.pythonhosted.org/packages/9d/be/5708ad18161dee7dc6a0f7e6cf3a88ea6279c3e8484844c0590e50e803ef/charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:84450ba661fb96e9fd67629b93d2941c871ca86fc38d835d19d4225ff946a631", size = 151556 }, + { url = "https://files.pythonhosted.org/packages/5a/bb/3d8bc22bacb9eb89785e83e6723f9888265f3a0de3b9ce724d66bd49884e/charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:44aeb140295a2f0659e113b31cfe92c9061622cadbc9e2a2f7b8ef6b1e29ef4b", size = 149772 }, + { url = "https://files.pythonhosted.org/packages/f7/fa/d3fc622de05a86f30beea5fc4e9ac46aead4731e73fd9055496732bcc0a4/charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1db4e7fefefd0f548d73e2e2e041f9df5c59e178b4c72fbac4cc6f535cfb1565", size = 144800 }, + { url = "https://files.pythonhosted.org/packages/9a/65/bdb9bc496d7d190d725e96816e20e2ae3a6fa42a5cac99c3c3d6ff884118/charset_normalizer-3.4.0-cp312-cp312-win32.whl", hash = "sha256:5726cf76c982532c1863fb64d8c6dd0e4c90b6ece9feb06c9f202417a31f7dd7", size = 94836 }, + { url = "https://files.pythonhosted.org/packages/3e/67/7b72b69d25b89c0b3cea583ee372c43aa24df15f0e0f8d3982c57804984b/charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:b197e7094f232959f8f20541ead1d9862ac5ebea1d58e9849c1bf979255dfac9", size = 102187 }, + { url = "https://files.pythonhosted.org/packages/f3/89/68a4c86f1a0002810a27f12e9a7b22feb198c59b2f05231349fbce5c06f4/charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dd4eda173a9fcccb5f2e2bd2a9f423d180194b1bf17cf59e3269899235b2a114", size = 194617 }, + { url = "https://files.pythonhosted.org/packages/4f/cd/8947fe425e2ab0aa57aceb7807af13a0e4162cd21eee42ef5b053447edf5/charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9e3c4c9e1ed40ea53acf11e2a386383c3304212c965773704e4603d589343ed", size = 125310 }, + { url = "https://files.pythonhosted.org/packages/5b/f0/b5263e8668a4ee9becc2b451ed909e9c27058337fda5b8c49588183c267a/charset_normalizer-3.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92a7e36b000bf022ef3dbb9c46bfe2d52c047d5e3f3343f43204263c5addc250", size = 119126 }, + { url = "https://files.pythonhosted.org/packages/ff/6e/e445afe4f7fda27a533f3234b627b3e515a1b9429bc981c9a5e2aa5d97b6/charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54b6a92d009cbe2fb11054ba694bc9e284dad30a26757b1e372a1fdddaf21920", size = 139342 }, + { url = "https://files.pythonhosted.org/packages/a1/b2/4af9993b532d93270538ad4926c8e37dc29f2111c36f9c629840c57cd9b3/charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ffd9493de4c922f2a38c2bf62b831dcec90ac673ed1ca182fe11b4d8e9f2a64", size = 149383 }, + { url = "https://files.pythonhosted.org/packages/fb/6f/4e78c3b97686b871db9be6f31d64e9264e889f8c9d7ab33c771f847f79b7/charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:35c404d74c2926d0287fbd63ed5d27eb911eb9e4a3bb2c6d294f3cfd4a9e0c23", size = 142214 }, + { url = "https://files.pythonhosted.org/packages/2b/c9/1c8fe3ce05d30c87eff498592c89015b19fade13df42850aafae09e94f35/charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4796efc4faf6b53a18e3d46343535caed491776a22af773f366534056c4e1fbc", size = 144104 }, + { url = "https://files.pythonhosted.org/packages/ee/68/efad5dcb306bf37db7db338338e7bb8ebd8cf38ee5bbd5ceaaaa46f257e6/charset_normalizer-3.4.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7fdd52961feb4c96507aa649550ec2a0d527c086d284749b2f582f2d40a2e0d", size = 146255 }, + { url = "https://files.pythonhosted.org/packages/0c/75/1ed813c3ffd200b1f3e71121c95da3f79e6d2a96120163443b3ad1057505/charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:92db3c28b5b2a273346bebb24857fda45601aef6ae1c011c0a997106581e8a88", size = 140251 }, + { url = "https://files.pythonhosted.org/packages/7d/0d/6f32255c1979653b448d3c709583557a4d24ff97ac4f3a5be156b2e6a210/charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ab973df98fc99ab39080bfb0eb3a925181454d7c3ac8a1e695fddfae696d9e90", size = 148474 }, + { url = "https://files.pythonhosted.org/packages/ac/a0/c1b5298de4670d997101fef95b97ac440e8c8d8b4efa5a4d1ef44af82f0d/charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4b67fdab07fdd3c10bb21edab3cbfe8cf5696f453afce75d815d9d7223fbe88b", size = 151849 }, + { url = "https://files.pythonhosted.org/packages/04/4f/b3961ba0c664989ba63e30595a3ed0875d6790ff26671e2aae2fdc28a399/charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:aa41e526a5d4a9dfcfbab0716c7e8a1b215abd3f3df5a45cf18a12721d31cb5d", size = 149781 }, + { url = "https://files.pythonhosted.org/packages/d8/90/6af4cd042066a4adad58ae25648a12c09c879efa4849c705719ba1b23d8c/charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ffc519621dce0c767e96b9c53f09c5d215578e10b02c285809f76509a3931482", size = 144970 }, + { url = "https://files.pythonhosted.org/packages/cc/67/e5e7e0cbfefc4ca79025238b43cdf8a2037854195b37d6417f3d0895c4c2/charset_normalizer-3.4.0-cp313-cp313-win32.whl", hash = "sha256:f19c1585933c82098c2a520f8ec1227f20e339e33aca8fa6f956f6691b784e67", size = 94973 }, + { url = "https://files.pythonhosted.org/packages/65/97/fc9bbc54ee13d33dc54a7fcf17b26368b18505500fc01e228c27b5222d80/charset_normalizer-3.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:707b82d19e65c9bd28b81dde95249b07bf9f5b90ebe1ef17d9b57473f8a64b7b", size = 102308 }, + { url = "https://files.pythonhosted.org/packages/54/2f/28659eee7f5d003e0f5a3b572765bf76d6e0fe6601ab1f1b1dd4cba7e4f1/charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:980b4f289d1d90ca5efcf07958d3eb38ed9c0b7676bf2831a54d4f66f9c27dfa", size = 196326 }, + { url = "https://files.pythonhosted.org/packages/d1/18/92869d5c0057baa973a3ee2af71573be7b084b3c3d428fe6463ce71167f8/charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f28f891ccd15c514a0981f3b9db9aa23d62fe1a99997512b0491d2ed323d229a", size = 125614 }, + { url = "https://files.pythonhosted.org/packages/d6/27/327904c5a54a7796bb9f36810ec4173d2df5d88b401d2b95ef53111d214e/charset_normalizer-3.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8aacce6e2e1edcb6ac625fb0f8c3a9570ccc7bfba1f63419b3769ccf6a00ed0", size = 120450 }, + { url = "https://files.pythonhosted.org/packages/a4/23/65af317914a0308495133b2d654cf67b11bbd6ca16637c4e8a38f80a5a69/charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd7af3717683bea4c87acd8c0d3d5b44d56120b26fd3f8a692bdd2d5260c620a", size = 140135 }, + { url = "https://files.pythonhosted.org/packages/f2/41/6190102ad521a8aa888519bb014a74251ac4586cde9b38e790901684f9ab/charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ff2ed8194587faf56555927b3aa10e6fb69d931e33953943bc4f837dfee2242", size = 150413 }, + { url = "https://files.pythonhosted.org/packages/7b/ab/f47b0159a69eab9bd915591106859f49670c75f9a19082505ff16f50efc0/charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e91f541a85298cf35433bf66f3fab2a4a2cff05c127eeca4af174f6d497f0d4b", size = 142992 }, + { url = "https://files.pythonhosted.org/packages/28/89/60f51ad71f63aaaa7e51a2a2ad37919985a341a1d267070f212cdf6c2d22/charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:309a7de0a0ff3040acaebb35ec45d18db4b28232f21998851cfa709eeff49d62", size = 144871 }, + { url = "https://files.pythonhosted.org/packages/0c/48/0050550275fea585a6e24460b42465020b53375017d8596c96be57bfabca/charset_normalizer-3.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:285e96d9d53422efc0d7a17c60e59f37fbf3dfa942073f666db4ac71e8d726d0", size = 146756 }, + { url = "https://files.pythonhosted.org/packages/dc/b5/47f8ee91455946f745e6c9ddbb0f8f50314d2416dd922b213e7d5551ad09/charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5d447056e2ca60382d460a604b6302d8db69476fd2015c81e7c35417cfabe4cd", size = 141034 }, + { url = "https://files.pythonhosted.org/packages/84/79/5c731059ebab43e80bf61fa51666b9b18167974b82004f18c76378ed31a3/charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:20587d20f557fe189b7947d8e7ec5afa110ccf72a3128d61a2a387c3313f46be", size = 149434 }, + { url = "https://files.pythonhosted.org/packages/ca/f3/0719cd09fc4dc42066f239cb3c48ced17fc3316afca3e2a30a4756fe49ab/charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:130272c698667a982a5d0e626851ceff662565379baf0ff2cc58067b81d4f11d", size = 152443 }, + { url = "https://files.pythonhosted.org/packages/f7/0e/c6357297f1157c8e8227ff337e93fd0a90e498e3d6ab96b2782204ecae48/charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:ab22fbd9765e6954bc0bcff24c25ff71dcbfdb185fcdaca49e81bac68fe724d3", size = 150294 }, + { url = "https://files.pythonhosted.org/packages/54/9a/acfa96dc4ea8c928040b15822b59d0863d6e1757fba8bd7de3dc4f761c13/charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7782afc9b6b42200f7362858f9e73b1f8316afb276d316336c0ec3bd73312742", size = 145314 }, + { url = "https://files.pythonhosted.org/packages/73/1c/b10a63032eaebb8d7bcb8544f12f063f41f5f463778ac61da15d9985e8b6/charset_normalizer-3.4.0-cp39-cp39-win32.whl", hash = "sha256:2de62e8801ddfff069cd5c504ce3bc9672b23266597d4e4f50eda28846c322f2", size = 94724 }, + { url = "https://files.pythonhosted.org/packages/c5/77/3a78bf28bfaa0863f9cfef278dbeadf55efe064eafff8c7c424ae3c4c1bf/charset_normalizer-3.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:95c3c157765b031331dd4db3c775e58deaee050a3042fcad72cbc4189d7c8dca", size = 102159 }, + { url = "https://files.pythonhosted.org/packages/bf/9b/08c0432272d77b04803958a4598a51e2a4b51c06640af8b8f0f908c18bf2/charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079", size = 49446 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "docutils" +version = "0.21.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, +] + +[[package]] +name = "exceptiongroup" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/09/35/2495c4ac46b980e4ca1f6ad6db102322ef3ad2410b79fdde159a4b0f3b92/exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc", size = 28883 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453 }, +] + +[[package]] +name = "ftfy" +version = "6.3.1" +source = { editable = "." } +dependencies = [ + { name = "wcwidth" }, +] + +[package.dev-dependencies] +dev = [ + { name = "furo" }, + { name = "pytest" }, + { name = "ruff" }, + { name = "sphinx" }, +] + +[package.metadata] +requires-dist = [{ name = "wcwidth" }] + +[package.metadata.requires-dev] +dev = [ + { name = "furo", specifier = ">=2024.7.18" }, + { name = "pytest", specifier = ">=8.3.2,<9" }, + { name = "ruff" }, + { name = "sphinx", specifier = ">=7,<8" }, +] + +[[package]] +name = "furo" +version = "2024.8.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "pygments" }, + { name = "sphinx" }, + { name = "sphinx-basic-ng" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a0/e2/d351d69a9a9e4badb4a5be062c2d0e87bd9e6c23b5e57337fef14bef34c8/furo-2024.8.6.tar.gz", hash = "sha256:b63e4cee8abfc3136d3bc03a3d45a76a850bada4d6374d24c1716b0e01394a01", size = 1661506 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/48/e791a7ed487dbb9729ef32bb5d1af16693d8925f4366befef54119b2e576/furo-2024.8.6-py3-none-any.whl", hash = "sha256:6cd97c58b47813d3619e63e9081169880fbe331f0ca883c871ff1f3f11814f5c", size = 341333 }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, +] + +[[package]] +name = "imagesize" +version = "1.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/84/62473fb57d61e31fef6e36d64a179c8781605429fd927b5dd608c997be31/imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a", size = 1280026 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", size = 8769 }, +] + +[[package]] +name = "importlib-metadata" +version = "8.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "zipp" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/12/33e59336dca5be0c398a7482335911a33aa0e20776128f038019f1a95f1b/importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7", size = 55304 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/d9/a1e041c5e7caa9a05c925f4bdbdfb7f006d1f74996af53467bc394c97be7/importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b", size = 26514 }, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, +] + +[[package]] +name = "jinja2" +version = "3.1.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/55/39036716d19cab0747a5020fc7e907f362fbf48c984b14e62127f7e68e5d/jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369", size = 240245 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 }, +] + +[[package]] +name = "markupsafe" +version = "3.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b4/d2/38ff920762f2247c3af5cbbbbc40756f575d9692d381d7c520f45deb9b8f/markupsafe-3.0.1.tar.gz", hash = "sha256:3e683ee4f5d0fa2dde4db77ed8dd8a876686e3fc417655c2ece9a90576905344", size = 20249 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/a2/0482d1a157f5f10f72fc4fe8c3be9ffa3651c1f7a12b60a3ab71b2635e13/MarkupSafe-3.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:db842712984e91707437461930e6011e60b39136c7331e971952bb30465bc1a1", size = 14391 }, + { url = "https://files.pythonhosted.org/packages/3b/25/5ea6500d200fd2dc3ea25c765f69dea0a1a8d42ec80a38cd896ad47cb85d/MarkupSafe-3.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3ffb4a8e7d46ed96ae48805746755fadd0909fea2306f93d5d8233ba23dda12a", size = 12414 }, + { url = "https://files.pythonhosted.org/packages/92/41/cf5397dd6bb18895d148aa402cafa71018f2ffc5f6e9d6e90d85b523c741/MarkupSafe-3.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67c519635a4f64e495c50e3107d9b4075aec33634272b5db1cde839e07367589", size = 21787 }, + { url = "https://files.pythonhosted.org/packages/2e/0d/5d91ef2b4f30afa87483a3a7c108c777d144b1c42d7113459296a8a2bfa0/MarkupSafe-3.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48488d999ed50ba8d38c581d67e496f955821dc183883550a6fbc7f1aefdc170", size = 20954 }, + { url = "https://files.pythonhosted.org/packages/f6/de/12a4110c2c7c7b502fe0e6f911367726dbb7a37e03e207495135d064bb48/MarkupSafe-3.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f31ae06f1328595d762c9a2bf29dafd8621c7d3adc130cbb46278079758779ca", size = 21086 }, + { url = "https://files.pythonhosted.org/packages/96/55/59389babc6e8ed206849a9958de9da7c23f3a75d294f46e99624fa38fb79/MarkupSafe-3.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:80fcbf3add8790caddfab6764bde258b5d09aefbe9169c183f88a7410f0f6dea", size = 21685 }, + { url = "https://files.pythonhosted.org/packages/3d/cb/cbad5f093e12cd79ceea3e2957ba5bd4c2706810f333d0a3422ab2aef358/MarkupSafe-3.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3341c043c37d78cc5ae6e3e305e988532b072329639007fd408a476642a89fd6", size = 21348 }, + { url = "https://files.pythonhosted.org/packages/8e/70/e19c4f39d68a52406012ee118667b57efb0bbe6e950be21187cd7a1b4b80/MarkupSafe-3.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cb53e2a99df28eee3b5f4fea166020d3ef9116fdc5764bc5117486e6d1211b25", size = 21098 }, + { url = "https://files.pythonhosted.org/packages/30/95/ca809c01624428d427e9b3a4500f9068eca941e0c520328954ce84ad966a/MarkupSafe-3.0.1-cp310-cp310-win32.whl", hash = "sha256:db15ce28e1e127a0013dfb8ac243a8e392db8c61eae113337536edb28bdc1f97", size = 15075 }, + { url = "https://files.pythonhosted.org/packages/23/41/decb99ab07793656821a86f827a394700ce28402ebb02dc6d003210d9859/MarkupSafe-3.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:4ffaaac913c3f7345579db4f33b0020db693f302ca5137f106060316761beea9", size = 15535 }, + { url = "https://files.pythonhosted.org/packages/ce/af/2f5d88a7fc7226bd34c6e15f6061246ad8cff979da9f19d11bdd0addd8e2/MarkupSafe-3.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:26627785a54a947f6d7336ce5963569b5d75614619e75193bdb4e06e21d447ad", size = 14387 }, + { url = "https://files.pythonhosted.org/packages/8d/43/fd588ef5d192308c5e05974bac659bf6ae29c202b7ea2c4194bcf01eacee/MarkupSafe-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b954093679d5750495725ea6f88409946d69cfb25ea7b4c846eef5044194f583", size = 12410 }, + { url = "https://files.pythonhosted.org/packages/58/26/78f161d602fb03804118905e5faacafc0ec592bbad71aaee62537529813a/MarkupSafe-3.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:973a371a55ce9ed333a3a0f8e0bcfae9e0d637711534bcb11e130af2ab9334e7", size = 24006 }, + { url = "https://files.pythonhosted.org/packages/ae/1d/7d5ec8bcfd9c2db235d720fa51d818b7e2abc45250ce5f53dd6cb60409ca/MarkupSafe-3.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:244dbe463d5fb6d7ce161301a03a6fe744dac9072328ba9fc82289238582697b", size = 23303 }, + { url = "https://files.pythonhosted.org/packages/26/ce/703ca3b03a709e3bd1fbffa407789e56b9fa664456538092617dd665fc1d/MarkupSafe-3.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d98e66a24497637dd31ccab090b34392dddb1f2f811c4b4cd80c230205c074a3", size = 23205 }, + { url = "https://files.pythonhosted.org/packages/88/60/40be0493decabc2344b12d3a709fd6ccdd15a5ebaee1e8d878315d107ad3/MarkupSafe-3.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ad91738f14eb8da0ff82f2acd0098b6257621410dcbd4df20aaa5b4233d75a50", size = 23684 }, + { url = "https://files.pythonhosted.org/packages/6d/f8/8fd52a66e8f62a9add62b4a0b5a3ab4092027437f2ef027f812d94ae91cf/MarkupSafe-3.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7044312a928a66a4c2a22644147bc61a199c1709712069a344a3fb5cfcf16915", size = 23472 }, + { url = "https://files.pythonhosted.org/packages/d4/0b/998b17b9e06ea45ad1646fea586f1b83d02dfdb14d47dd2fd81fba5a08c9/MarkupSafe-3.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a4792d3b3a6dfafefdf8e937f14906a51bd27025a36f4b188728a73382231d91", size = 23388 }, + { url = "https://files.pythonhosted.org/packages/5a/57/b6b7aa23b2e26d68d601718f8ce3161fbdaf967b31752c7dec52bef828c9/MarkupSafe-3.0.1-cp311-cp311-win32.whl", hash = "sha256:fa7d686ed9883f3d664d39d5a8e74d3c5f63e603c2e3ff0abcba23eac6542635", size = 15106 }, + { url = "https://files.pythonhosted.org/packages/fc/b5/20cb1d714596acb553c810009c8004c809823947da63e13c19a7decfcb6c/MarkupSafe-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:9ba25a71ebf05b9bb0e2ae99f8bc08a07ee8e98c612175087112656ca0f5c8bf", size = 15542 }, + { url = "https://files.pythonhosted.org/packages/45/6d/72ed58d42a12bd9fc288dbff6dd8d03ea973a232ac0538d7f88d105b5251/MarkupSafe-3.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8ae369e84466aa70f3154ee23c1451fda10a8ee1b63923ce76667e3077f2b0c4", size = 14322 }, + { url = "https://files.pythonhosted.org/packages/86/f5/241238f89cdd6461ac9f521af8389f9a48fab97e4f315c69e9e0d52bc919/MarkupSafe-3.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40f1e10d51c92859765522cbd79c5c8989f40f0419614bcdc5015e7b6bf97fc5", size = 12380 }, + { url = "https://files.pythonhosted.org/packages/27/94/79751928bca5841416d8ca02e22198672e021d5c7120338e2a6e3771f8fc/MarkupSafe-3.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a4cb365cb49b750bdb60b846b0c0bc49ed62e59a76635095a179d440540c346", size = 24099 }, + { url = "https://files.pythonhosted.org/packages/10/6e/1b8070bbfc467429c7983cd5ffd4ec57e1d501763d974c7caaa0a9a79f4c/MarkupSafe-3.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee3941769bd2522fe39222206f6dd97ae83c442a94c90f2b7a25d847d40f4729", size = 23249 }, + { url = "https://files.pythonhosted.org/packages/66/50/9389ae6cdff78d7481a2a2641830b5eb1d1f62177550e73355a810a889c9/MarkupSafe-3.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62fada2c942702ef8952754abfc1a9f7658a4d5460fabe95ac7ec2cbe0d02abc", size = 23149 }, + { url = "https://files.pythonhosted.org/packages/16/02/5dddff5366fde47133186efb847fa88bddef85914bbe623e25cfeccb3517/MarkupSafe-3.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c2d64fdba74ad16138300815cfdc6ab2f4647e23ced81f59e940d7d4a1469d9", size = 23864 }, + { url = "https://files.pythonhosted.org/packages/f3/f1/700ee6655561cfda986e03f7afc309e3738918551afa7dedd99225586227/MarkupSafe-3.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:fb532dd9900381d2e8f48172ddc5a59db4c445a11b9fab40b3b786da40d3b56b", size = 23440 }, + { url = "https://files.pythonhosted.org/packages/fb/3e/d26623ac7f16709823b4c80e0b4a1c9196eeb46182a6c1d47b5e0c8434f4/MarkupSafe-3.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0f84af7e813784feb4d5e4ff7db633aba6c8ca64a833f61d8e4eade234ef0c38", size = 23610 }, + { url = "https://files.pythonhosted.org/packages/51/04/1f8da0810c39cb9fcff96b6baed62272c97065e9cf11471965a161439e20/MarkupSafe-3.0.1-cp312-cp312-win32.whl", hash = "sha256:cbf445eb5628981a80f54087f9acdbf84f9b7d862756110d172993b9a5ae81aa", size = 15113 }, + { url = "https://files.pythonhosted.org/packages/eb/24/a36dc37365bdd358b1e583cc40475593e36ab02cb7da6b3d0b9c05b0da7a/MarkupSafe-3.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:a10860e00ded1dd0a65b83e717af28845bb7bd16d8ace40fe5531491de76b79f", size = 15611 }, + { url = "https://files.pythonhosted.org/packages/b1/60/4572a8aa1beccbc24b133aa0670781a5d2697f4fa3fecf0a87b46383174b/MarkupSafe-3.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e81c52638315ff4ac1b533d427f50bc0afc746deb949210bc85f05d4f15fd772", size = 14325 }, + { url = "https://files.pythonhosted.org/packages/38/42/849915b99a765ec104bfd07ee933de5fc9c58fa9570efa7db81717f495d8/MarkupSafe-3.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:312387403cd40699ab91d50735ea7a507b788091c416dd007eac54434aee51da", size = 12373 }, + { url = "https://files.pythonhosted.org/packages/ef/82/4caaebd963c6d60b28e4445f38841d24f8b49bc10594a09956c9d73bfc08/MarkupSafe-3.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ae99f31f47d849758a687102afdd05bd3d3ff7dbab0a8f1587981b58a76152a", size = 24059 }, + { url = "https://files.pythonhosted.org/packages/20/15/6b319be2f79fcfa3173f479d69f4e950b5c9b642db4f22cf73ae5ade745f/MarkupSafe-3.0.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c97ff7fedf56d86bae92fa0a646ce1a0ec7509a7578e1ed238731ba13aabcd1c", size = 23211 }, + { url = "https://files.pythonhosted.org/packages/9d/3f/8963bdf4962feb2154475acb7dc350f04217b5e0be7763a39b432291e229/MarkupSafe-3.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7420ceda262dbb4b8d839a4ec63d61c261e4e77677ed7c66c99f4e7cb5030dd", size = 23095 }, + { url = "https://files.pythonhosted.org/packages/af/93/f770bc70953d32de0c6ce4bcb76271512123a1ead91aaef625a020c5bfaf/MarkupSafe-3.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45d42d132cff577c92bfba536aefcfea7e26efb975bd455db4e6602f5c9f45e7", size = 23901 }, + { url = "https://files.pythonhosted.org/packages/11/92/1e5a33aa0a1190161238628fb68eb1bc5e67b56a5c89f0636328704b463a/MarkupSafe-3.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4c8817557d0de9349109acb38b9dd570b03cc5014e8aabf1cbddc6e81005becd", size = 23463 }, + { url = "https://files.pythonhosted.org/packages/0d/fe/657efdfe385d2a3a701f2c4fcc9577c63c438aeefdd642d0d956c4ecd225/MarkupSafe-3.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6a54c43d3ec4cf2a39f4387ad044221c66a376e58c0d0e971d47c475ba79c6b5", size = 23569 }, + { url = "https://files.pythonhosted.org/packages/cf/24/587dea40304046ace60f846cedaebc0d33d967a3ce46c11395a10e7a78ba/MarkupSafe-3.0.1-cp313-cp313-win32.whl", hash = "sha256:c91b394f7601438ff79a4b93d16be92f216adb57d813a78be4446fe0f6bc2d8c", size = 15117 }, + { url = "https://files.pythonhosted.org/packages/32/8f/d8961d633f26a011b4fe054f3bfff52f673423b8c431553268741dfb089e/MarkupSafe-3.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:fe32482b37b4b00c7a52a07211b479653b7fe4f22b2e481b9a9b099d8a430f2f", size = 15613 }, + { url = "https://files.pythonhosted.org/packages/9e/93/d6367ffbcd0c5c371370767f768eaa32af60bc411245b8517e383c6a2b12/MarkupSafe-3.0.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:17b2aea42a7280db02ac644db1d634ad47dcc96faf38ab304fe26ba2680d359a", size = 14563 }, + { url = "https://files.pythonhosted.org/packages/4a/37/f813c3835747dec08fe19ac9b9eced01fdf93a4b3e626521675dc7f423a9/MarkupSafe-3.0.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:852dc840f6d7c985603e60b5deaae1d89c56cb038b577f6b5b8c808c97580f1d", size = 12505 }, + { url = "https://files.pythonhosted.org/packages/72/bf/800b4d1580298ca91ccd6c95915bbd147142dad1b8cf91d57b93b28670dd/MarkupSafe-3.0.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0778de17cff1acaeccc3ff30cd99a3fd5c50fc58ad3d6c0e0c4c58092b859396", size = 25358 }, + { url = "https://files.pythonhosted.org/packages/fd/78/26e209abc8f0a379f031f0acc151231974e5b153d7eda5759d17d8f329f2/MarkupSafe-3.0.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:800100d45176652ded796134277ecb13640c1a537cad3b8b53da45aa96330453", size = 23797 }, + { url = "https://files.pythonhosted.org/packages/09/e1/918496a9390891756efee818880e71c1bbaf587f4dc8ede3f3852357310a/MarkupSafe-3.0.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d06b24c686a34c86c8c1fba923181eae6b10565e4d80bdd7bc1c8e2f11247aa4", size = 23743 }, + { url = "https://files.pythonhosted.org/packages/cd/c6/26f576cd58d6c2decd9045e4e3f3c5dbc01ea6cb710916e7bbb6ebd95b6b/MarkupSafe-3.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:33d1c36b90e570ba7785dacd1faaf091203d9942bc036118fab8110a401eb1a8", size = 25076 }, + { url = "https://files.pythonhosted.org/packages/b5/fa/10b24fb3b0e15fe5389dc88ecc6226ede08297e0ba7130610efbe0cdfb27/MarkupSafe-3.0.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:beeebf760a9c1f4c07ef6a53465e8cfa776ea6a2021eda0d0417ec41043fe984", size = 24037 }, + { url = "https://files.pythonhosted.org/packages/c8/81/4b3f5537d9f6cc4f5c80d6c4b78af9a5247fd37b5aba95807b2cbc336b9a/MarkupSafe-3.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bbde71a705f8e9e4c3e9e33db69341d040c827c7afa6789b14c6e16776074f5a", size = 24015 }, + { url = "https://files.pythonhosted.org/packages/5f/07/8e8dcecd53216c5e01a51e84c32a2bce166690ed19c184774b38cd41921d/MarkupSafe-3.0.1-cp313-cp313t-win32.whl", hash = "sha256:82b5dba6eb1bcc29cc305a18a3c5365d2af06ee71b123216416f7e20d2a84e5b", size = 15213 }, + { url = "https://files.pythonhosted.org/packages/0d/87/4c364e0f109eea2402079abecbe33fef4f347b551a11423d1f4e187ea497/MarkupSafe-3.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:730d86af59e0e43ce277bb83970530dd223bf7f2a838e086b50affa6ec5f9295", size = 15741 }, + { url = "https://files.pythonhosted.org/packages/6f/4f/420741fb39fa3d40396fb1731a1ca78e6f9fbb225dcf15e5185b1fa954bc/MarkupSafe-3.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:4935dd7883f1d50e2ffecca0aa33dc1946a94c8f3fdafb8df5c330e48f71b132", size = 14376 }, + { url = "https://files.pythonhosted.org/packages/91/71/0c4782b9ce7fb68b140b94e1eb9d2b6292990bda91dc3d3b5a34e8bd41f3/MarkupSafe-3.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e9393357f19954248b00bed7c56f29a25c930593a77630c719653d51e7669c2a", size = 12408 }, + { url = "https://files.pythonhosted.org/packages/3e/3c/cbf30bf7ac1da2e013e3d338e1582db85fc3b27bf9f8863137423ad4b0b6/MarkupSafe-3.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40621d60d0e58aa573b68ac5e2d6b20d44392878e0bfc159012a5787c4e35bc8", size = 21654 }, + { url = "https://files.pythonhosted.org/packages/0b/28/229e797b8727427845b79cbd58019f598e478f974730fa705fa23904b18e/MarkupSafe-3.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f94190df587738280d544971500b9cafc9b950d32efcb1fba9ac10d84e6aa4e6", size = 20817 }, + { url = "https://files.pythonhosted.org/packages/e8/b4/1121f3b2614de93cbb3deec7f44df283df44c2258ea9368bb1302b4a0b45/MarkupSafe-3.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b6a387d61fe41cdf7ea95b38e9af11cfb1a63499af2759444b99185c4ab33f5b", size = 20956 }, + { url = "https://files.pythonhosted.org/packages/a8/8b/b4d57bafca01c8b1e1fbb037660869fa4f6725983c4105a02bd1242f0066/MarkupSafe-3.0.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8ad4ad1429cd4f315f32ef263c1342166695fad76c100c5d979c45d5570ed58b", size = 21548 }, + { url = "https://files.pythonhosted.org/packages/83/87/04806f7096ba1d4f1b8c61f35c1d7c0b507c6a3cf7ed495393bf97eb5af7/MarkupSafe-3.0.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e24bfe89c6ac4c31792793ad9f861b8f6dc4546ac6dc8f1c9083c7c4f2b335cd", size = 21222 }, + { url = "https://files.pythonhosted.org/packages/e9/96/1ecb2bb5ee7298e628cff95833beba7da6a774df7fe890a6d2f0ec460590/MarkupSafe-3.0.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2a4b34a8d14649315c4bc26bbfa352663eb51d146e35eef231dd739d54a5430a", size = 20952 }, + { url = "https://files.pythonhosted.org/packages/fd/70/b937a12df7bbff14e1ca3385929f464c7af2ca72c8183c95dad26c3bf754/MarkupSafe-3.0.1-cp39-cp39-win32.whl", hash = "sha256:242d6860f1fd9191aef5fae22b51c5c19767f93fb9ead4d21924e0bcb17619d8", size = 15075 }, + { url = "https://files.pythonhosted.org/packages/e3/c4/262fac0328552da9a75a7786d7c0f43adaba4afb5f295979d33fa0f324c7/MarkupSafe-3.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:93e8248d650e7e9d49e8251f883eed60ecbc0e8ffd6349e18550925e31bd029b", size = 15527 }, +] + +[[package]] +name = "packaging" +version = "24.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/51/65/50db4dda066951078f0a96cf12f4b9ada6e4b811516bf0262c0f4f7064d4/packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", size = 148788 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124", size = 53985 }, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, +] + +[[package]] +name = "pygments" +version = "2.18.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/62/8336eff65bcbc8e4cb5d05b55faf041285951b6e80f33e2bff2024788f31/pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199", size = 4891905 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a", size = 1205513 }, +] + +[[package]] +name = "pytest" +version = "8.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/6c/62bbd536103af674e227c41a8f3dcd022d591f6eed5facb5a0f31ee33bbc/pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181", size = 1442487 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/77/7440a06a8ead44c7757a64362dd22df5760f9b12dc5f11b6188cd2fc27a0/pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2", size = 342341 }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, +] + +[[package]] +name = "ruff" +version = "0.6.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/26/0d/6148a48dab5662ca1d5a93b7c0d13c03abd3cc7e2f35db08410e47cef15d/ruff-0.6.9.tar.gz", hash = "sha256:b076ef717a8e5bc819514ee1d602bbdca5b4420ae13a9cf61a0c0a4f53a2baa2", size = 3095355 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/8f/f7a0a0ef1818662efb32ed6df16078c95da7a0a3248d64c2410c1e27799f/ruff-0.6.9-py3-none-linux_armv6l.whl", hash = "sha256:064df58d84ccc0ac0fcd63bc3090b251d90e2a372558c0f057c3f75ed73e1ccd", size = 10440526 }, + { url = "https://files.pythonhosted.org/packages/8b/69/b179a5faf936a9e2ab45bb412a668e4661eded964ccfa19d533f29463ef6/ruff-0.6.9-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:140d4b5c9f5fc7a7b074908a78ab8d384dd7f6510402267bc76c37195c02a7ec", size = 10034612 }, + { url = "https://files.pythonhosted.org/packages/c7/ef/fd1b4be979c579d191eeac37b5cfc0ec906de72c8bcd8595e2c81bb700c1/ruff-0.6.9-py3-none-macosx_11_0_arm64.whl", hash = "sha256:53fd8ca5e82bdee8da7f506d7b03a261f24cd43d090ea9db9a1dc59d9313914c", size = 9706197 }, + { url = "https://files.pythonhosted.org/packages/29/61/b376d775deb5851cb48d893c568b511a6d3625ef2c129ad5698b64fb523c/ruff-0.6.9-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:645d7d8761f915e48a00d4ecc3686969761df69fb561dd914a773c1a8266e14e", size = 10751855 }, + { url = "https://files.pythonhosted.org/packages/13/d7/def9e5f446d75b9a9c19b24231a3a658c075d79163b08582e56fa5dcfa38/ruff-0.6.9-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eae02b700763e3847595b9d2891488989cac00214da7f845f4bcf2989007d577", size = 10200889 }, + { url = "https://files.pythonhosted.org/packages/6c/d6/7f34160818bcb6e84ce293a5966cba368d9112ff0289b273fbb689046047/ruff-0.6.9-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d5ccc9e58112441de8ad4b29dcb7a86dc25c5f770e3c06a9d57e0e5eba48829", size = 11038678 }, + { url = "https://files.pythonhosted.org/packages/13/34/a40ff8ae62fb1b26fb8e6fa7e64bc0e0a834b47317880de22edd6bfb54fb/ruff-0.6.9-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:417b81aa1c9b60b2f8edc463c58363075412866ae4e2b9ab0f690dc1e87ac1b5", size = 11808682 }, + { url = "https://files.pythonhosted.org/packages/2e/6d/25a4386ae4009fc798bd10ba48c942d1b0b3e459b5403028f1214b6dd161/ruff-0.6.9-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c866b631f5fbce896a74a6e4383407ba7507b815ccc52bcedabb6810fdb3ef7", size = 11330446 }, + { url = "https://files.pythonhosted.org/packages/f7/f6/bdf891a9200d692c94ebcd06ae5a2fa5894e522f2c66c2a12dd5d8cb2654/ruff-0.6.9-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7b118afbb3202f5911486ad52da86d1d52305b59e7ef2031cea3425142b97d6f", size = 12483048 }, + { url = "https://files.pythonhosted.org/packages/a7/86/96f4252f41840e325b3fa6c48297e661abb9f564bd7dcc0572398c8daa42/ruff-0.6.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a67267654edc23c97335586774790cde402fb6bbdb3c2314f1fc087dee320bfa", size = 10936855 }, + { url = "https://files.pythonhosted.org/packages/45/87/801a52d26c8dbf73424238e9908b9ceac430d903c8ef35eab1b44fcfa2bd/ruff-0.6.9-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3ef0cc774b00fec123f635ce5c547dac263f6ee9fb9cc83437c5904183b55ceb", size = 10713007 }, + { url = "https://files.pythonhosted.org/packages/be/27/6f7161d90320a389695e32b6ebdbfbedde28ccbf52451e4b723d7ce744ad/ruff-0.6.9-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:12edd2af0c60fa61ff31cefb90aef4288ac4d372b4962c2864aeea3a1a2460c0", size = 10274594 }, + { url = "https://files.pythonhosted.org/packages/00/52/dc311775e7b5f5b19831563cb1572ecce63e62681bccc609867711fae317/ruff-0.6.9-py3-none-musllinux_1_2_i686.whl", hash = "sha256:55bb01caeaf3a60b2b2bba07308a02fca6ab56233302406ed5245180a05c5625", size = 10608024 }, + { url = "https://files.pythonhosted.org/packages/98/b6/be0a1ddcbac65a30c985cf7224c4fce786ba2c51e7efeb5178fe410ed3cf/ruff-0.6.9-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:925d26471fa24b0ce5a6cdfab1bb526fb4159952385f386bdcc643813d472039", size = 10982085 }, + { url = "https://files.pythonhosted.org/packages/bb/a4/c84bc13d0b573cf7bb7d17b16d6d29f84267c92d79b2f478d4ce322e8e72/ruff-0.6.9-py3-none-win32.whl", hash = "sha256:eb61ec9bdb2506cffd492e05ac40e5bc6284873aceb605503d8494180d6fc84d", size = 8522088 }, + { url = "https://files.pythonhosted.org/packages/74/be/fc352bd8ca40daae8740b54c1c3e905a7efe470d420a268cd62150248c91/ruff-0.6.9-py3-none-win_amd64.whl", hash = "sha256:785d31851c1ae91f45b3d8fe23b8ae4b5170089021fbb42402d811135f0b7117", size = 9359275 }, + { url = "https://files.pythonhosted.org/packages/3e/14/fd026bc74ded05e2351681545a5f626e78ef831f8edce064d61acd2e6ec7/ruff-0.6.9-py3-none-win_arm64.whl", hash = "sha256:a9641e31476d601f83cd602608739a0840e348bda93fec9f1ee816f8b6798b93", size = 8679879 }, +] + +[[package]] +name = "snowballstemmer" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/44/7b/af302bebf22c749c56c9c3e8ae13190b5b5db37a33d9068652e8f73b7089/snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1", size = 86699 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a", size = 93002 }, +] + +[[package]] +name = "soupsieve" +version = "2.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/ce/fbaeed4f9fb8b2daa961f90591662df6a86c1abf25c548329a86920aedfb/soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb", size = 101569 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 }, +] + +[[package]] +name = "sphinx" +version = "7.4.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "alabaster" }, + { name = "babel" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "docutils" }, + { name = "imagesize" }, + { name = "importlib-metadata", marker = "python_full_version < '3.10'" }, + { name = "jinja2" }, + { name = "packaging" }, + { name = "pygments" }, + { name = "requests" }, + { name = "snowballstemmer" }, + { name = "sphinxcontrib-applehelp" }, + { name = "sphinxcontrib-devhelp" }, + { name = "sphinxcontrib-htmlhelp" }, + { name = "sphinxcontrib-jsmath" }, + { name = "sphinxcontrib-qthelp" }, + { name = "sphinxcontrib-serializinghtml" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/be/50e50cb4f2eff47df05673d361095cafd95521d2a22521b920c67a372dcb/sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe", size = 8067911 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/ef/153f6803c5d5f8917dbb7f7fcf6d34a871ede3296fa89c2c703f5f8a6c8e/sphinx-7.4.7-py3-none-any.whl", hash = "sha256:c2419e2135d11f1951cd994d6eb18a1835bd8fdd8429f9ca375dc1f3281bd239", size = 3401624 }, +] + +[[package]] +name = "sphinx-basic-ng" +version = "1.0.0b2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "sphinx" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/0b/a866924ded68efec7a1759587a4e478aec7559d8165fac8b2ad1c0e774d6/sphinx_basic_ng-1.0.0b2.tar.gz", hash = "sha256:9ec55a47c90c8c002b5960c57492ec3021f5193cb26cebc2dc4ea226848651c9", size = 20736 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/dd/018ce05c532a22007ac58d4f45232514cd9d6dd0ee1dc374e309db830983/sphinx_basic_ng-1.0.0b2-py3-none-any.whl", hash = "sha256:eb09aedbabfb650607e9b4b68c9d240b90b1e1be221d6ad71d61c52e29f7932b", size = 22496 }, +] + +[[package]] +name = "sphinxcontrib-applehelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300 }, +] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530 }, +] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705 }, +] + +[[package]] +name = "sphinxcontrib-jsmath" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/e8/9ed3830aeed71f17c026a07a5097edcf44b692850ef215b161b8ad875729/sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8", size = 5787 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071 }, +] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743 }, +] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072 }, +] + +[[package]] +name = "tomli" +version = "2.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/35/b9/de2a5c0144d7d75a57ff355c0c24054f965b2dc3036456ae03a51ea6264b/tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed", size = 16096 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/db/ce8eda256fa131af12e0a76d481711abe4681b6923c27efb9a255c9e4594/tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38", size = 13237 }, +] + +[[package]] +name = "urllib3" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ed/63/22ba4ebfe7430b76388e7cd448d5478814d3032121827c12a2cc287e2260/urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9", size = 300677 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", size = 126338 }, +] + +[[package]] +name = "wcwidth" +version = "0.2.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 }, +] + +[[package]] +name = "zipp" +version = "3.20.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/bf/5c0000c44ebc80123ecbdddba1f5dcd94a5ada602a9c225d84b5aaa55e86/zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29", size = 24199 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/8b/5ba542fa83c90e09eac972fc9baca7a88e7e7ca4b221a89251954019308b/zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350", size = 9200 }, +]