diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a5ed97f22..11667d614 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,6 +1,12 @@ name: CI -on: +# Since this same workflow file is used for both PRs and pushes, +# Zizmor wants to flag all uses of caches as potential cache poisoning vulnerabilities. +# I don't see a way to easily split this workflow into two separate files since +# we do want to build the package for PRs, and then publish it from that exact +# built artifact, if we're pushing to tags. + +on: # zizmor: ignore[cache-poisoning] push: branches: - master @@ -12,15 +18,34 @@ on: - master - '*-maint' +permissions: {} + jobs: lint: + name: lint + permissions: + contents: read runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: pre-commit/action@v3.0.1 + - uses: actions/checkout@v6 + with: + persist-credentials: false + - uses: astral-sh/setup-uv@681c641aba71e4a1c380be3ab5e12ad51f415867 # v7.1.6 + with: + cache-dependency-glob: ".github/workflows/*.yml" + cache-suffix: pre-commit-uv + - run: uv tool install pre-commit --with pre-commit-uv --force-reinstall + - uses: actions/cache@v5 + with: + path: ~/.cache/pre-commit + key: pre-commit-uv-v1-${{ hashFiles('.pre-commit-config.yaml') }} + - run: pre-commit run --all-files --show-diff-on-failure --color=always env: RUFF_OUTPUT_FORMAT: github test: + name: test + permissions: + contents: read runs-on: ${{ matrix.os }} strategy: matrix: @@ -35,18 +60,22 @@ jobs: - "3.11" - "3.12" - "3.13" + - "3.14" - "pypy3.10" env: BABEL_CLDR_NO_DOWNLOAD_PROGRESS: "1" BABEL_CLDR_QUIET: "1" + PIP_DISABLE_PIP_VERSION_CHECK: "1" steps: - - uses: actions/checkout@v4 - - uses: actions/cache@v4 + - uses: actions/checkout@v6 + with: + persist-credentials: false + - uses: actions/cache@v5 with: path: cldr key: cldr-${{ hashFiles('scripts/*cldr*') }} - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} allow-prereleases: true @@ -61,30 +90,36 @@ jobs: env: COVERAGE_XML_PATH: ${{ runner.temp }} BABEL_TOX_EXTRA_DEPS: pytest-github-actions-annotate-failures - - uses: codecov/codecov-action@v5 + - uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2 with: directory: ${{ runner.temp }} flags: ${{ matrix.os }}-${{ matrix.python-version }} token: ${{ secrets.CODECOV_TOKEN }} verbose: true build: + name: build + permissions: + contents: read runs-on: ubuntu-24.04 needs: lint steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@v6 + with: + persist-credentials: false + - uses: actions/setup-python@v6 with: - python-version: "3.13" + python-version: "3.14" cache: "pip" cache-dependency-path: "**/setup.py" - run: pip install build -e . - run: make import-cldr - run: python -m build - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v6 with: name: dist path: dist publish: + name: publish if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') needs: - build @@ -93,14 +128,14 @@ jobs: name: release url: https://pypi.org/p/babel/ permissions: - id-token: write + id-token: write # Required for Trusted Publishing action steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v7 with: name: dist path: dist/ - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 with: verbose: true print-hash: true diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 000000000..e4f467e38 --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,29 @@ +name: Security + +on: + push: + branches: + - master + - '*-maint' + tags: + - 'v*' + pull_request: + branches: + - master + - '*-maint' + +permissions: {} + +jobs: + zizmor: + name: Run zizmor + runs-on: ubuntu-latest + permissions: + security-events: write # via Zizmor example + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + persist-credentials: false + - name: Run zizmor + uses: zizmorcore/zizmor-action@e639db99335bc9038abc0e066dfcd72e23d26fb4 # v0.3.0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6a86ee871..04f385123 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,12 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.1 + rev: 5ba58aca0bd5bc7c0e1c0fc45af2e88d6a2bde83 # frozen: v0.14.10 hooks: - - id: ruff + - id: ruff-check args: - --fix - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0 hooks: - id: check-added-large-files - id: check-docstring-first diff --git a/AUTHORS b/AUTHORS index 89353ae0f..79e18d46d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -24,6 +24,7 @@ Babel is written and maintained by the Babel team and various contributors: - Hugo van Kemenade - Jun Omae - Heungsub Lee +- Boris Verkhovskiy - Jakob Schnitzer - Sachin Paliwal - Alex Willmer @@ -49,6 +50,9 @@ Babel is written and maintained by the Babel team and various contributors: - Arturas Moskvinas - Leonardo Pistone - Hyunjun Kim +- Bart Broere +- Guillaume Gauvrit +- clach04 - wandrew004 - James McKinney - Tomáš Hrnčiar diff --git a/CHANGES.rst b/CHANGES.rst index dcd7aa28e..b09e184e8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,61 @@ Babel Changelog =============== +Version 2.18.0 +-------------- + +Happy 2026! This release is, coincidentally, also being made from FOSDEM. + +We will aspire for a slightly less glacial release cadence in this year; +there are interesting features in the pipeline. + +Features +~~~~~~~~ + +* Core: Add `babel.core.get_cldr_version()` by @akx in :gh:`1242` +* Core: Use CLDR 47 by @tomasr8 in :gh:`1210` +* Core: Use canonical IANA zone names in zone_territories by @akx in :gh:`1220` +* Messages: Improve extract performance via ignoring directories early during os.walk by @akx in :gh:`968` +* Messages: Merge in per-format keywords and auto_comments by @akx in :gh:`1243` +* Messages: Update keywords for extraction of dpgettext and dnpgettext by @mardiros in :gh:`1235` +* Messages: Validate all plurals in Python format checker by @tomasr8 in :gh:`1188` +* Time: Use standard library `timezone` instead of `FixedOffsetTimezone` by @akx in :gh:`1203` + +Bugfixes +~~~~~~~~ + +* Core: Fix formatting for "Empty locale identifier" exception added in #1164 by @akx in :gh:`1184` +* Core: Improve handling of no-inheritance-marker in timezone data by @akx in :gh:`1194` +* Core: Make the number pattern regular expression more efficient by @akx in :gh:`1213` +* Messages: Keep translator comments next to the translation function call by @akx in :gh:`1196` +* Numbers: Fix KeyError that occurred when formatting compact currencies of exactly one thousand in several locales by @bartbroere in :gh:`1246` + +Other improvements +~~~~~~~~~~~~~~~~~~ + +* Core: Avoid unnecessary uses of `map()` by @akx in :gh:`1180` +* Messages: Have init-catalog create directories too by @akx in :gh:`1244` +* Messages: Optimizations for read_po by @akx in :gh:`1200` +* Messages: Use pathlib.Path() in catalog frontend; improve test coverage by @akx in :gh:`1204` + + +Infrastructure and documentation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* CI: Renovate CI & lint tools by @akx in :gh:`1228` +* CI: Tighten up CI with Zizmor by @akx in :gh:`1230` +* CI: make job permissions explicit by @akx in :gh:`1227` +* Docs: Add SECURITY.md by @akx in :gh:`1229` +* Docs: Remove u string prefix from docs by @verhovsky in :gh:`1174` +* Docs: Update dates.rst with current unicode.org tr35 link by @clach04 in :gh:`1189` +* General: Add some PyPI classifiers by @tomasr8 in :gh:`1186` +* General: Apply reformatting by hand and with Ruff by @akx in :gh:`1202` +* General: Test on and declare support for Python 3.14 by @akx in :gh:`1233` +* Tests: Convert Unittest testcases with setup/teardown to fixtures by @akx in :gh:`1240` +* Tests: Mark PyPy CI flake as xfail by @akx in :gh:`1197` +* Tests: Move pytest config to `pyproject.toml` by @tomasr8 in :gh:`1187` +* Tests: Unwrap most `unittest` test cases to bare functions by @akx in :gh:`1241` + Version 2.17.0 -------------- diff --git a/LICENSE b/LICENSE index 6ddae98eb..96f467d27 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2013-2025 by the Babel Team, see AUTHORS for more information. +Copyright (c) 2013-2026 by the Babel Team, see AUTHORS for more information. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..7c9adcfcb --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,21 @@ +# Security Policy + +## Supported Versions + +Security patches will mainly target the latest release version, +as listed on [PyPI](https://pypi.org/project/babel/) or [GitHub Releases](https://github.com/python-babel/babel/releases). + +Patches for particularly high-impact security issues may be backported to older versions as needed, +but Babel has generally been extremely backward compatible (within major version series), +so for many users, simply upgrading to the latest release should be rather frictionless. + +If you're using a version of Babel packaged by a downstream distribution, +such as Debian, Ubuntu, etc., they may backport patches from newer versions with a different policy. + +## Reporting a Vulnerability + +Please feel free to report vulnerabilities by any method below you feel comfortable with: + +* You can use GitHub's form [over here](https://github.com/python-babel/babel/security/advisories/new). +* Contact a maintainer, presently [@akx](https://github.com/akx), over email (akx@iki.fi) or direct messages on listed socials. + * If you need an encrypted channel of communications, please email/DM first and we'll set something up. diff --git a/babel/__init__.py b/babel/__init__.py index 7b2774558..2fd88befa 100644 --- a/babel/__init__.py +++ b/babel/__init__.py @@ -1,19 +1,19 @@ """ - babel - ~~~~~ +babel +~~~~~ - Integrated collection of utilities that assist in internationalizing and - localizing applications. +Integrated collection of utilities that assist in internationalizing and +localizing applications. - This package is basically composed of two major parts: +This package is basically composed of two major parts: - * tools to build and work with ``gettext`` message catalogs - * a Python interface to the CLDR (Common Locale Data Repository), providing - access to various locale display names, localized number and date - formatting, etc. + * tools to build and work with ``gettext`` message catalogs + * a Python interface to the CLDR (Common Locale Data Repository), providing + access to various locale display names, localized number and date + formatting, etc. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ from babel.core import ( @@ -25,7 +25,7 @@ parse_locale, ) -__version__ = '2.17.0' +__version__ = '2.18.0' __all__ = [ 'Locale', diff --git a/babel/core.py b/babel/core.py index 5762bbe36..4210b46bb 100644 --- a/babel/core.py +++ b/babel/core.py @@ -1,11 +1,11 @@ """ - babel.core - ~~~~~~~~~~ +babel.core +~~~~~~~~~~ - Core locale representation and locale data access. +Core locale representation and locale data access. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ from __future__ import annotations @@ -22,6 +22,7 @@ 'Locale', 'UnknownLocaleError', 'default_locale', + 'get_cldr_version', 'get_global', 'get_locale_identifier', 'negotiate_locale', @@ -33,6 +34,7 @@ _GLOBAL_KEY: TypeAlias = Literal[ "all_currencies", + "cldr", "currency_fractions", "language_aliases", "likely_subtags", @@ -56,12 +58,14 @@ def _raise_no_data_error(): - raise RuntimeError('The babel data files are not available. ' - 'This usually happens because you are using ' - 'a source checkout from Babel and you did ' - 'not build the data files. Just make sure ' - 'to run "python setup.py import_cldr" before ' - 'installing the library.') + raise RuntimeError( + 'The babel data files are not available. ' + 'This usually happens because you are using ' + 'a source checkout from Babel and you did ' + 'not build the data files. Just make sure ' + 'to run "python setup.py import_cldr" before ' + 'installing the library.', + ) def get_global(key: _GLOBAL_KEY) -> Mapping[str, Any]: @@ -71,13 +75,14 @@ def get_global(key: _GLOBAL_KEY) -> Mapping[str, Any]: information independent of individual locales. >>> get_global('zone_aliases')['UTC'] - u'Etc/UTC' + 'Etc/UTC' >>> get_global('zone_territories')['Europe/Berlin'] - u'DE' + 'DE' The keys available are: - ``all_currencies`` + - ``cldr`` (metadata) - ``currency_fractions`` - ``language_aliases`` - ``likely_subtags`` @@ -119,7 +124,7 @@ def get_global(key: _GLOBAL_KEY) -> Mapping[str, Any]: 'mk': 'mk_MK', 'nl': 'nl_NL', 'nn': 'nn_NO', 'no': 'nb_NO', 'pl': 'pl_PL', 'pt': 'pt_PT', 'ro': 'ro_RO', 'ru': 'ru_RU', 'sk': 'sk_SK', 'sl': 'sl_SI', 'sv': 'sv_SE', 'th': 'th_TH', 'tr': 'tr_TR', 'uk': 'uk_UA', -} +} # fmt: skip class UnknownLocaleError(Exception): @@ -145,7 +150,7 @@ class Locale: >>> repr(locale) "Locale('en', territory='US')" >>> locale.display_name - u'English (United States)' + 'English (United States)' A `Locale` object can also be instantiated from a raw locale string: @@ -157,7 +162,7 @@ class Locale: territory and language names, number and date format patterns, and more: >>> locale.number_symbols['latn']['decimal'] - u'.' + '.' If a locale is requested for which no locale data is available, an `UnknownLocaleError` is raised: @@ -216,7 +221,11 @@ def __init__( raise UnknownLocaleError(identifier) @classmethod - def default(cls, category: str | None = None, aliases: Mapping[str, str] = LOCALE_ALIASES) -> Locale: + def default( + cls, + category: str | None = None, + aliases: Mapping[str, str] = LOCALE_ALIASES, + ) -> Locale: """Return the system default locale for the specified category. >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LC_MESSAGES']: @@ -268,8 +277,7 @@ def negotiate( :param aliases: a dictionary of aliases for locale identifiers :param sep: separator for parsing; e.g. Windows tends to use '-' instead of '_'. """ - identifier = negotiate_locale(preferred, available, sep=sep, - aliases=aliases) + identifier = negotiate_locale(preferred, available, sep=sep, aliases=aliases) if identifier: return Locale.parse(identifier, sep=sep) return None @@ -285,7 +293,7 @@ def parse( >>> l = Locale.parse('de-DE', sep='-') >>> l.display_name - u'Deutsch (Deutschland)' + 'Deutsch (Deutschland)' If the `identifier` parameter is not a string, but actually a `Locale` object, that object is returned: @@ -343,10 +351,11 @@ def parse( f"Empty locale identifier value: {identifier!r}\n\n" f"If you didn't explicitly pass an empty value to a Babel function, " f"this could be caused by there being no suitable locale environment " - f"variables for the API you tried to use.", + f"variables for the API you tried to use." ) if isinstance(identifier, str): - raise ValueError(msg) # `parse_locale` would raise a ValueError, so let's do that here + # `parse_locale` would raise a ValueError, so let's do that here + raise ValueError(msg) raise TypeError(msg) if not isinstance(identifier, str): @@ -420,7 +429,9 @@ def _try_load_reducing(parts): else: language2, _, script2, variant2 = parts2 modifier2 = None - locale = _try_load_reducing((language2, territory, script2, variant2, modifier2)) + locale = _try_load_reducing( + (language2, territory, script2, variant2, modifier2), + ) if locale is not None: return locale @@ -431,19 +442,18 @@ def __eq__(self, other: object) -> bool: if not hasattr(other, key): return False return ( - self.language == getattr(other, 'language') and # noqa: B009 - self.territory == getattr(other, 'territory') and # noqa: B009 - self.script == getattr(other, 'script') and # noqa: B009 - self.variant == getattr(other, 'variant') and # noqa: B009 - self.modifier == getattr(other, 'modifier') # noqa: B009 + self.language == getattr(other, 'language') # noqa: B009 + and self.territory == getattr(other, 'territory') # noqa: B009 + and self.script == getattr(other, 'script') # noqa: B009 + and self.variant == getattr(other, 'variant') # noqa: B009 + and self.modifier == getattr(other, 'modifier') # noqa: B009 ) def __ne__(self, other: object) -> bool: return not self.__eq__(other) def __hash__(self) -> int: - return hash((self.language, self.territory, self.script, - self.variant, self.modifier)) + return hash((self.language, self.territory, self.script, self.variant, self.modifier)) def __repr__(self) -> str: parameters = [''] @@ -454,9 +464,9 @@ def __repr__(self) -> str: return f"Locale({self.language!r}{', '.join(parameters)})" def __str__(self) -> str: - return get_locale_identifier((self.language, self.territory, - self.script, self.variant, - self.modifier)) + return get_locale_identifier( + (self.language, self.territory, self.script, self.variant, self.modifier), + ) @property def _data(self) -> localedata.LocaleDataDict: @@ -471,12 +481,12 @@ def get_display_name(self, locale: Locale | str | None = None) -> str | None: variant, if those are specified. >>> Locale('zh', 'CN', script='Hans').get_display_name('en') - u'Chinese (Simplified, China)' + 'Chinese (Simplified, China)' Modifiers are currently passed through verbatim: >>> Locale('it', 'IT', modifier='euro').get_display_name('en') - u'Italian (Italy, euro)' + 'Italian (Italy, euro)' :param locale: the locale to use """ @@ -499,24 +509,27 @@ def get_display_name(self, locale: Locale | str | None = None) -> str | None: retval += f" ({detail_string})" return retval - display_name = property(get_display_name, doc="""\ + display_name = property( + get_display_name, + doc="""\ The localized display name of the locale. >>> Locale('en').display_name - u'English' + 'English' >>> Locale('en', 'US').display_name - u'English (United States)' + 'English (United States)' >>> Locale('sv').display_name - u'svenska' + 'svenska' :type: `unicode` - """) + """, + ) def get_language_name(self, locale: Locale | str | None = None) -> str | None: """Return the language of this locale in the given locale. >>> Locale('zh', 'CN', script='Hans').get_language_name('de') - u'Chinesisch' + 'Chinesisch' .. versionadded:: 1.0 @@ -527,12 +540,15 @@ def get_language_name(self, locale: Locale | str | None = None) -> str | None: locale = Locale.parse(locale) return locale.languages.get(self.language) - language_name = property(get_language_name, doc="""\ + language_name = property( + get_language_name, + doc="""\ The localized language name of the locale. >>> Locale('en', 'US').language_name - u'English' - """) + 'English' + """, + ) def get_territory_name(self, locale: Locale | str | None = None) -> str | None: """Return the territory name in the given locale.""" @@ -541,12 +557,15 @@ def get_territory_name(self, locale: Locale | str | None = None) -> str | None: locale = Locale.parse(locale) return locale.territories.get(self.territory or '') - territory_name = property(get_territory_name, doc="""\ + territory_name = property( + get_territory_name, + doc="""\ The localized territory name of the locale if available. >>> Locale('de', 'DE').territory_name - u'Deutschland' - """) + 'Deutschland' + """, + ) def get_script_name(self, locale: Locale | str | None = None) -> str | None: """Return the script name in the given locale.""" @@ -555,21 +574,24 @@ def get_script_name(self, locale: Locale | str | None = None) -> str | None: locale = Locale.parse(locale) return locale.scripts.get(self.script or '') - script_name = property(get_script_name, doc="""\ + script_name = property( + get_script_name, + doc="""\ The localized script name of the locale if available. >>> Locale('sr', 'ME', script='Latn').script_name - u'latinica' - """) + 'latinica' + """, + ) @property def english_name(self) -> str | None: """The english display name of the locale. >>> Locale('de').english_name - u'German' + 'German' >>> Locale('de', 'DE').english_name - u'German (Germany)' + 'German (Germany)' :type: `unicode`""" return self.get_display_name(Locale('en')) @@ -581,7 +603,7 @@ def languages(self) -> localedata.LocaleDataDict: """Mapping of language codes to translated language names. >>> Locale('de', 'DE').languages['ja'] - u'Japanisch' + 'Japanisch' See `ISO 639 `_ for more information. @@ -593,7 +615,7 @@ def scripts(self) -> localedata.LocaleDataDict: """Mapping of script codes to translated script names. >>> Locale('en', 'US').scripts['Hira'] - u'Hiragana' + 'Hiragana' See `ISO 15924 `_ for more information. @@ -605,7 +627,7 @@ def territories(self) -> localedata.LocaleDataDict: """Mapping of script codes to translated script names. >>> Locale('es', 'CO').territories['DE'] - u'Alemania' + 'Alemania' See `ISO 3166 `_ for more information. @@ -617,7 +639,7 @@ def variants(self) -> localedata.LocaleDataDict: """Mapping of script codes to translated script names. >>> Locale('de', 'DE').variants['1901'] - u'Alte deutsche Rechtschreibung' + 'Alte deutsche Rechtschreibung' """ return self._data['variants'] @@ -631,9 +653,9 @@ def currencies(self) -> localedata.LocaleDataDict: :func:`babel.numbers.get_currency_name` function. >>> Locale('en').currencies['COP'] - u'Colombian Peso' + 'Colombian Peso' >>> Locale('de', 'DE').currencies['COP'] - u'Kolumbianischer Peso' + 'Kolumbianischer Peso' """ return self._data['currency_names'] @@ -642,9 +664,9 @@ def currency_symbols(self) -> localedata.LocaleDataDict: """Mapping of currency codes to symbols. >>> Locale('en', 'US').currency_symbols['USD'] - u'$' + '$' >>> Locale('es', 'CO').currency_symbols['USD'] - u'US$' + 'US$' """ return self._data['currency_symbols'] @@ -656,11 +678,11 @@ def number_symbols(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('fr', 'FR').number_symbols["latn"]['decimal'] - u',' + ',' >>> Locale('fa', 'IR').number_symbols["arabext"]['decimal'] - u'٫' + '٫' >>> Locale('fa', 'IR').number_symbols["latn"]['decimal'] - u'.' + '.' """ return self._data['number_symbols'] @@ -671,7 +693,7 @@ def other_numbering_systems(self) -> localedata.LocaleDataDict: See: https://www.unicode.org/reports/tr35/tr35-numbers.html#otherNumberingSystems >>> Locale('el', 'GR').other_numbering_systems['traditional'] - u'grek' + 'grek' .. note:: The format of the value returned may change between Babel versions. @@ -682,7 +704,7 @@ def other_numbering_systems(self) -> localedata.LocaleDataDict: def default_numbering_system(self) -> str: """The default numbering system used by the locale. >>> Locale('el', 'GR').default_numbering_system - u'latn' + 'latn' """ return self._data['default_numbering_system'] @@ -694,7 +716,7 @@ def decimal_formats(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en', 'US').decimal_formats[None] - + """ return self._data['decimal_formats'] @@ -706,7 +728,7 @@ def compact_decimal_formats(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en', 'US').compact_decimal_formats["short"]["one"]["1000"] - + """ return self._data['compact_decimal_formats'] @@ -718,9 +740,9 @@ def currency_formats(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en', 'US').currency_formats['standard'] - + >>> Locale('en', 'US').currency_formats['accounting'] - + """ return self._data['currency_formats'] @@ -732,7 +754,7 @@ def compact_currency_formats(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en', 'US').compact_currency_formats["short"]["one"]["1000"] - + """ return self._data['compact_currency_formats'] @@ -744,7 +766,7 @@ def percent_formats(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en', 'US').percent_formats[None] - + """ return self._data['percent_formats'] @@ -756,7 +778,7 @@ def scientific_formats(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en', 'US').scientific_formats[None] - + """ return self._data['scientific_formats'] @@ -767,7 +789,7 @@ def periods(self) -> localedata.LocaleDataDict: """Locale display names for day periods (AM/PM). >>> Locale('en', 'US').periods['am'] - u'AM' + 'AM' """ try: return self._data['day_periods']['stand-alone']['wide'] @@ -784,8 +806,7 @@ def day_periods(self) -> localedata.LocaleDataDict: @property def day_period_rules(self) -> localedata.LocaleDataDict: - """Day period rules for the locale. Used by `get_period_id`. - """ + """Day period rules for the locale. Used by `get_period_id`.""" return self._data.get('day_period_rules', localedata.LocaleDataDict({})) @property @@ -793,7 +814,7 @@ def days(self) -> localedata.LocaleDataDict: """Locale display names for weekdays. >>> Locale('de', 'DE').days['format']['wide'][3] - u'Donnerstag' + 'Donnerstag' """ return self._data['days'] @@ -802,7 +823,7 @@ def months(self) -> localedata.LocaleDataDict: """Locale display names for months. >>> Locale('de', 'DE').months['format']['wide'][10] - u'Oktober' + 'Oktober' """ return self._data['months'] @@ -811,7 +832,7 @@ def quarters(self) -> localedata.LocaleDataDict: """Locale display names for quarters. >>> Locale('de', 'DE').quarters['format']['wide'][1] - u'1. Quartal' + '1. Quartal' """ return self._data['quarters'] @@ -823,9 +844,9 @@ def eras(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en', 'US').eras['wide'][1] - u'Anno Domini' + 'Anno Domini' >>> Locale('en', 'US').eras['abbreviated'][0] - u'BC' + 'BC' """ return self._data['eras'] @@ -837,9 +858,9 @@ def time_zones(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight'] - u'British Summer Time' + 'British Summer Time' >>> Locale('en', 'US').time_zones['America/St_Johns']['city'] - u'St. John\u2019s' + 'St. John’s' """ return self._data['time_zones'] @@ -854,7 +875,7 @@ def meta_zones(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight'] - u'Central European Summer Time' + 'Central European Summer Time' .. versionadded:: 0.9 """ @@ -868,9 +889,9 @@ def zone_formats(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en', 'US').zone_formats['fallback'] - u'%(1)s (%(0)s)' + '%(1)s (%(0)s)' >>> Locale('pt', 'BR').zone_formats['region'] - u'Hor\\xe1rio %s' + 'Horário %s' .. versionadded:: 0.9 """ @@ -923,9 +944,9 @@ def date_formats(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en', 'US').date_formats['short'] - + >>> Locale('fr', 'FR').date_formats['long'] - + """ return self._data['date_formats'] @@ -937,9 +958,9 @@ def time_formats(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en', 'US').time_formats['short'] - + >>> Locale('fr', 'FR').time_formats['long'] - + """ return self._data['time_formats'] @@ -951,9 +972,9 @@ def datetime_formats(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en').datetime_formats['full'] - u'{1}, {0}' + '{1}, {0}' >>> Locale('th').datetime_formats['medium'] - u'{1} {0}' + '{1} {0}' """ return self._data['datetime_formats'] @@ -962,11 +983,11 @@ def datetime_skeletons(self) -> localedata.LocaleDataDict: """Locale patterns for formatting parts of a datetime. >>> Locale('en').datetime_skeletons['MEd'] - + >>> Locale('fr').datetime_skeletons['MEd'] - + >>> Locale('fr').datetime_skeletons['H'] - + """ return self._data['datetime_skeletons'] @@ -981,7 +1002,7 @@ def interval_formats(self) -> localedata.LocaleDataDict: smallest changing component: >>> Locale('fi_FI').interval_formats['MEd']['d'] - [u'E d.\u2009\u2013\u2009', u'E d.M.'] + ['E d.\\u2009–\\u2009', 'E d.M.'] .. seealso:: @@ -1015,11 +1036,11 @@ def list_patterns(self) -> localedata.LocaleDataDict: Babel versions. >>> Locale('en').list_patterns['standard']['start'] - u'{0}, {1}' + '{0}, {1}' >>> Locale('en').list_patterns['standard']['end'] - u'{0}, and {1}' + '{0}, and {1}' >>> Locale('en_GB').list_patterns['standard']['end'] - u'{0} and {1}' + '{0} and {1}' """ return self._data['list_patterns'] @@ -1045,9 +1066,9 @@ def measurement_systems(self) -> localedata.LocaleDataDict: """Localized names for various measurement systems. >>> Locale('fr', 'FR').measurement_systems['US'] - u'am\\xe9ricain' + 'américain' >>> Locale('en', 'US').measurement_systems['US'] - u'US' + 'US' """ return self._data['measurement_systems'] @@ -1149,7 +1170,12 @@ def default_locale( return None -def negotiate_locale(preferred: Iterable[str], available: Iterable[str], sep: str = '_', aliases: Mapping[str, str] = LOCALE_ALIASES) -> str | None: +def negotiate_locale( + preferred: Iterable[str], + available: Iterable[str], + sep: str = '_', + aliases: Mapping[str, str] = LOCALE_ALIASES, +) -> str | None: """Find the best match between available and requested locale strings. >>> negotiate_locale(['de_DE', 'en_US'], ['de_DE', 'de_AT']) @@ -1215,7 +1241,10 @@ def negotiate_locale(preferred: Iterable[str], available: Iterable[str], sep: st def parse_locale( identifier: str, sep: str = '_', -) -> tuple[str, str | None, str | None, str | None] | tuple[str, str | None, str | None, str | None, str | None]: +) -> ( + tuple[str, str | None, str | None, str | None] + | tuple[str, str | None, str | None, str | None, str | None] +): """Parse a locale identifier into a tuple of the form ``(language, territory, script, variant, modifier)``. @@ -1293,8 +1322,10 @@ def parse_locale( territory = parts.pop(0) if parts and ( - len(parts[0]) == 4 and parts[0][0].isdigit() or - len(parts[0]) >= 5 and parts[0][0].isalpha() + len(parts[0]) == 4 + and parts[0][0].isdigit() + or len(parts[0]) >= 5 + and parts[0][0].isalpha() ): variant = parts.pop().upper() @@ -1335,3 +1366,19 @@ def get_locale_identifier( lang, territory, script, variant, modifier = tup + (None,) * (5 - len(tup)) ret = sep.join(filter(None, (lang, script, territory, variant))) return f'{ret}@{modifier}' if modifier else ret + + +def get_cldr_version() -> str: + """Return the Unicode CLDR version used by this Babel installation. + + Generally, you should be able to assume that the return value of this + function is a string representing a version number, e.g. '47'. + + >>> get_cldr_version() + '47' + + .. versionadded:: 2.18 + + :rtype: str + """ + return str(get_global("cldr")["version"]) diff --git a/babel/dates.py b/babel/dates.py index 355a9236e..69610a7f0 100644 --- a/babel/dates.py +++ b/babel/dates.py @@ -1,18 +1,18 @@ """ - babel.dates - ~~~~~~~~~~~ +babel.dates +~~~~~~~~~~~ - Locale dependent formatting and parsing of dates and times. +Locale dependent formatting and parsing of dates and times. - The default locale for the functions in this module is determined by the - following environment variables, in that order: +The default locale for the functions in this module is determined by the +following environment variables, in that order: - * ``LC_TIME``, - * ``LC_ALL``, and - * ``LANG`` + * ``LC_TIME``, + * ``LC_ALL``, and + * ``LANG`` - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ from __future__ import annotations @@ -38,10 +38,11 @@ if TYPE_CHECKING: from typing_extensions import TypeAlias + _Instant: TypeAlias = datetime.date | datetime.time | float | None _PredefinedTimeFormat: TypeAlias = Literal['full', 'long', 'medium', 'short'] _Context: TypeAlias = Literal['format', 'stand-alone'] - _DtOrTzinfo: TypeAlias = datetime.datetime | datetime.tzinfo | str | int | datetime.time | None + _DtOrTzinfo: TypeAlias = datetime.datetime | datetime.tzinfo | str | int | datetime.time | None # fmt: skip # "If a given short metazone form is known NOT to be understood in a given # locale and the parent locale has this value such that it would normally @@ -75,7 +76,9 @@ def _localize(tz: datetime.tzinfo, dt: datetime.datetime) -> datetime.datetime: return dt.astimezone(tz) -def _get_dt_and_tzinfo(dt_or_tzinfo: _DtOrTzinfo) -> tuple[datetime.datetime | None, datetime.tzinfo]: +def _get_dt_and_tzinfo( + dt_or_tzinfo: _DtOrTzinfo, +) -> tuple[datetime.datetime | None, datetime.tzinfo]: """ Parse a `dt_or_tzinfo` value into a datetime and a tzinfo. @@ -153,13 +156,16 @@ def _get_datetime(instant: _Instant) -> datetime.datetime: return datetime.datetime.fromtimestamp(instant, UTC).replace(tzinfo=None) elif isinstance(instant, datetime.time): return datetime.datetime.combine(datetime.date.today(), instant) - elif isinstance(instant, datetime.date) and not isinstance(instant, datetime.datetime): + elif isinstance(instant, datetime.date) and not isinstance(instant, datetime.datetime): # fmt: skip return datetime.datetime.combine(instant, datetime.time()) # TODO (3.x): Add an assertion/type check for this fallthrough branch: return instant -def _ensure_datetime_tzinfo(dt: datetime.datetime, tzinfo: datetime.tzinfo | None = None) -> datetime.datetime: +def _ensure_datetime_tzinfo( + dt: datetime.datetime, + tzinfo: datetime.tzinfo | None = None, +) -> datetime.datetime: """ Ensure the datetime passed has an attached tzinfo. @@ -260,7 +266,7 @@ def get_period_names( """Return the names for day periods (AM/PM) used by the locale. >>> get_period_names(locale='en_US')['am'] - u'AM' + 'AM' :param width: the width to use, one of "abbreviated", "narrow", or "wide" :param context: the context, either "format" or "stand-alone" @@ -277,13 +283,13 @@ def get_day_names( """Return the day names used by the locale for the specified format. >>> get_day_names('wide', locale='en_US')[1] - u'Tuesday' + 'Tuesday' >>> get_day_names('short', locale='en_US')[1] - u'Tu' + 'Tu' >>> get_day_names('abbreviated', locale='es')[1] - u'mar' + 'mar' >>> get_day_names('narrow', context='stand-alone', locale='de_DE')[1] - u'D' + 'D' :param width: the width to use, one of "wide", "abbreviated", "short" or "narrow" :param context: the context, either "format" or "stand-alone" @@ -300,11 +306,11 @@ def get_month_names( """Return the month names used by the locale for the specified format. >>> get_month_names('wide', locale='en_US')[1] - u'January' + 'January' >>> get_month_names('abbreviated', locale='es')[1] - u'ene' + 'ene' >>> get_month_names('narrow', context='stand-alone', locale='de_DE')[1] - u'J' + 'J' :param width: the width to use, one of "wide", "abbreviated", or "narrow" :param context: the context, either "format" or "stand-alone" @@ -321,11 +327,11 @@ def get_quarter_names( """Return the quarter names used by the locale for the specified format. >>> get_quarter_names('wide', locale='en_US')[1] - u'1st quarter' + '1st quarter' >>> get_quarter_names('abbreviated', locale='de_DE')[1] - u'Q1' + 'Q1' >>> get_quarter_names('narrow', locale='de_DE')[1] - u'1' + '1' :param width: the width to use, one of "wide", "abbreviated", or "narrow" :param context: the context, either "format" or "stand-alone" @@ -341,9 +347,9 @@ def get_era_names( """Return the era names used by the locale for the specified format. >>> get_era_names('wide', locale='en_US')[1] - u'Anno Domini' + 'Anno Domini' >>> get_era_names('abbreviated', locale='de_DE')[1] - u'n. Chr.' + 'n. Chr.' :param width: the width to use, either "wide", "abbreviated", or "narrow" :param locale: the `Locale` object, or a locale string. Defaults to the system time locale. @@ -359,9 +365,9 @@ def get_date_format( format. >>> get_date_format(locale='en_US') - + >>> get_date_format('full', locale='de_DE') - + :param format: the format to use, one of "full", "long", "medium", or "short" @@ -378,7 +384,7 @@ def get_datetime_format( specified format. >>> get_datetime_format(locale='en_US') - u'{1}, {0}' + '{1}, {0}' :param format: the format to use, one of "full", "long", "medium", or "short" @@ -398,9 +404,9 @@ def get_time_format( format. >>> get_time_format(locale='en_US') - + >>> get_time_format('full', locale='de_DE') - + :param format: the format to use, one of "full", "long", "medium", or "short" @@ -421,25 +427,25 @@ def get_timezone_gmt( >>> from datetime import datetime >>> dt = datetime(2007, 4, 1, 15, 30) >>> get_timezone_gmt(dt, locale='en') - u'GMT+00:00' + 'GMT+00:00' >>> get_timezone_gmt(dt, locale='en', return_z=True) 'Z' >>> get_timezone_gmt(dt, locale='en', width='iso8601_short') - u'+00' + '+00' >>> tz = get_timezone('America/Los_Angeles') >>> dt = _localize(tz, datetime(2007, 4, 1, 15, 30)) >>> get_timezone_gmt(dt, locale='en') - u'GMT-07:00' + 'GMT-07:00' >>> get_timezone_gmt(dt, 'short', locale='en') - u'-0700' + '-0700' >>> get_timezone_gmt(dt, locale='en', width='iso8601_short') - u'-07' + '-07' The long format depends on the locale, for example in France the acronym UTC string is used instead of GMT: >>> get_timezone_gmt(dt, 'long', locale='fr_FR') - u'UTC-07:00' + 'UTC-07:00' .. versionadded:: 0.9 @@ -488,14 +494,14 @@ def get_timezone_location( St. John’s >>> tz = get_timezone('America/Mexico_City') >>> get_timezone_location(tz, locale='de_DE') - u'Mexiko (Mexiko-Stadt) (Ortszeit)' + 'Mexiko (Mexiko-Stadt) (Ortszeit)' If the timezone is associated with a country that uses only a single timezone, just the localized country name is returned: >>> tz = get_timezone('Europe/Berlin') >>> get_timezone_name(tz, locale='de_DE') - u'Mitteleurop\\xe4ische Zeit' + 'Mitteleuropäische Zeit' .. versionadded:: 0.9 @@ -524,7 +530,11 @@ def get_timezone_location( if territory not in locale.territories: territory = 'ZZ' # invalid/unknown territory_name = locale.territories[territory] - if not return_city and territory and len(get_global('territory_zones').get(territory, [])) == 1: + if ( + not return_city + and territory + and len(get_global('territory_zones').get(territory, [])) == 1 + ): return region_format % territory_name # Otherwise, include the city in the output @@ -543,10 +553,13 @@ def get_timezone_location( if return_city: return city_name - return region_format % (fallback_format % { - '0': city_name, - '1': territory_name, - }) + return region_format % ( + fallback_format + % { + '0': city_name, + '1': territory_name, + } + ) def get_timezone_name( @@ -563,11 +576,11 @@ def get_timezone_name( >>> from datetime import time >>> dt = time(15, 30, tzinfo=get_timezone('America/Los_Angeles')) >>> get_timezone_name(dt, locale='en_US') # doctest: +SKIP - u'Pacific Standard Time' + 'Pacific Standard Time' >>> get_timezone_name(dt, locale='en_US', return_zone=True) 'America/Los_Angeles' >>> get_timezone_name(dt, width='short', locale='en_US') # doctest: +SKIP - u'PST' + 'PST' If this function gets passed only a `tzinfo` object and no concrete `datetime`, the returned display name is independent of daylight savings @@ -576,9 +589,9 @@ def get_timezone_name( >>> tz = get_timezone('America/Los_Angeles') >>> get_timezone_name(tz, locale='en_US') - u'Pacific Time' + 'Pacific Time' >>> get_timezone_name(tz, 'short', locale='en_US') - u'PT' + 'PT' If no localized display name for the timezone is available, and the timezone is associated with a country that uses only a single timezone, the name of @@ -586,16 +599,16 @@ def get_timezone_name( >>> tz = get_timezone('Europe/Berlin') >>> get_timezone_name(tz, locale='de_DE') - u'Mitteleurop\xe4ische Zeit' + 'Mitteleuropäische Zeit' >>> get_timezone_name(tz, locale='pt_BR') - u'Hor\xe1rio da Europa Central' + 'Horário da Europa Central' On the other hand, if the country uses multiple timezones, the city is also included in the representation: >>> tz = get_timezone('America/St_Johns') >>> get_timezone_name(tz, locale='de_DE') - u'Neufundland-Zeit' + 'Neufundland-Zeit' Note that short format is currently not supported for all timezones and all locales. This is partially because not every timezone has a short @@ -649,7 +662,9 @@ def get_timezone_name( info = locale.time_zones.get(zone, {}) # Try explicitly translated zone names first if width in info and zone_variant in info[width]: - return info[width][zone_variant] + value = info[width][zone_variant] + if value != NO_INHERITANCE_MARKER: + return value metazone = get_global('meta_zones').get(zone) if metazone: @@ -660,7 +675,7 @@ def get_timezone_name( # If the short form is marked no-inheritance, # try to fall back to the long name instead. name = metazone_info.get('long', {}).get(zone_variant) - if name: + if name and name != NO_INHERITANCE_MARKER: return name # If we have a concrete datetime, we assume that the result can't be @@ -681,15 +696,15 @@ def format_date( >>> from datetime import date >>> d = date(2007, 4, 1) >>> format_date(d, locale='en_US') - u'Apr 1, 2007' + 'Apr 1, 2007' >>> format_date(d, format='full', locale='de_DE') - u'Sonntag, 1. April 2007' + 'Sonntag, 1. April 2007' If you don't want to use the locale default formats, you can specify a custom date pattern: >>> format_date(d, "EEE, MMM d, ''yy", locale='en') - u"Sun, Apr 1, '07" + "Sun, Apr 1, '07" :param date: the ``date`` or ``datetime`` object; if `None`, the current date is used @@ -720,7 +735,7 @@ def format_datetime( >>> from datetime import datetime >>> dt = datetime(2007, 4, 1, 15, 30) >>> format_datetime(dt, locale='en_US') - u'Apr 1, 2007, 3:30:00\u202fPM' + 'Apr 1, 2007, 3:30:00\u202fPM' For any pattern requiring the display of the timezone: @@ -729,7 +744,7 @@ def format_datetime( 'dimanche 1 avril 2007, 17:30:00 heure d’été d’Europe centrale' >>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz", ... tzinfo=get_timezone('US/Eastern'), locale='en') - u'2007.04.01 AD at 11:30:00 EDT' + '2007.04.01 AD at 11:30:00 EDT' :param datetime: the `datetime` object; if `None`, the current date and time is used @@ -742,11 +757,12 @@ def format_datetime( locale = Locale.parse(locale or LC_TIME) if format in ('full', 'long', 'medium', 'short'): - return get_datetime_format(format, locale=locale) \ - .replace("'", "") \ - .replace('{0}', format_time(datetime, format, tzinfo=None, - locale=locale)) \ + return ( + get_datetime_format(format, locale=locale) + .replace("'", "") + .replace('{0}', format_time(datetime, format, tzinfo=None, locale=locale)) .replace('{1}', format_date(datetime, format, locale=locale)) + ) else: return parse_pattern(format).apply(datetime, locale) @@ -762,15 +778,15 @@ def format_time( >>> from datetime import datetime, time >>> t = time(15, 30) >>> format_time(t, locale='en_US') - u'3:30:00\u202fPM' + '3:30:00\u202fPM' >>> format_time(t, format='short', locale='de_DE') - u'15:30' + '15:30' If you don't want to use the locale default formats, you can specify a custom time pattern: >>> format_time(t, "hh 'o''clock' a", locale='en') - u"03 o'clock PM" + "03 o'clock PM" For any pattern requiring the display of the time-zone a timezone has to be specified explicitly: @@ -782,7 +798,7 @@ def format_time( '15:30:00 heure d’été d’Europe centrale' >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=get_timezone('US/Eastern'), ... locale='en') - u"09 o'clock AM, Eastern Daylight Time" + "09 o'clock AM, Eastern Daylight Time" As that example shows, when this function gets passed a ``datetime.datetime`` value, the actual time in the formatted string is @@ -800,10 +816,10 @@ def format_time( >>> t = time(15, 30) >>> format_time(t, format='full', tzinfo=get_timezone('Europe/Paris'), ... locale='fr_FR') # doctest: +SKIP - u'15:30:00 heure normale d\u2019Europe centrale' + '15:30:00 heure normale d\u2019Europe centrale' >>> format_time(t, format='full', tzinfo=get_timezone('US/Eastern'), ... locale='en_US') # doctest: +SKIP - u'3:30:00\u202fPM Eastern Standard Time' + '3:30:00\u202fPM Eastern Standard Time' :param time: the ``time`` or ``datetime`` object; if `None`, the current time in UTC is used @@ -842,11 +858,11 @@ def format_skeleton( >>> from datetime import datetime >>> t = datetime(2007, 4, 1, 15, 30) >>> format_skeleton('MMMEd', t, locale='fr') - u'dim. 1 avr.' + 'dim. 1 avr.' >>> format_skeleton('MMMEd', t, locale='en') - u'Sun, Apr 1' + 'Sun, Apr 1' >>> format_skeleton('yMMd', t, locale='fi') # yMMd is not in the Finnish locale; yMd gets used - u'1.4.2007' + '1.4.2007' >>> format_skeleton('yMMd', t, fuzzy=False, locale='fi') # yMMd is not in the Finnish locale, an error is thrown Traceback (most recent call last): ... @@ -888,8 +904,16 @@ def format_skeleton( def format_timedelta( delta: datetime.timedelta | int, - granularity: Literal['year', 'month', 'week', 'day', 'hour', 'minute', 'second'] = 'second', - threshold: float = .85, + granularity: Literal[ + 'year', + 'month', + 'week', + 'day', + 'hour', + 'minute', + 'second', + ] = 'second', + threshold: float = 0.85, add_direction: bool = False, format: Literal['narrow', 'short', 'medium', 'long'] = 'long', locale: Locale | str | None = None, @@ -898,39 +922,39 @@ def format_timedelta( >>> from datetime import timedelta >>> format_timedelta(timedelta(weeks=12), locale='en_US') - u'3 months' + '3 months' >>> format_timedelta(timedelta(seconds=1), locale='es') - u'1 segundo' + '1 segundo' The granularity parameter can be provided to alter the lowest unit presented, which defaults to a second. >>> format_timedelta(timedelta(hours=3), granularity='day', locale='en_US') - u'1 day' + '1 day' The threshold parameter can be used to determine at which value the presentation switches to the next higher unit. A higher threshold factor means the presentation will switch later. For example: >>> format_timedelta(timedelta(hours=23), threshold=0.9, locale='en_US') - u'1 day' + '1 day' >>> format_timedelta(timedelta(hours=23), threshold=1.1, locale='en_US') - u'23 hours' + '23 hours' In addition directional information can be provided that informs the user if the date is in the past or in the future: >>> format_timedelta(timedelta(hours=1), add_direction=True, locale='en') - u'in 1 hour' + 'in 1 hour' >>> format_timedelta(timedelta(hours=-1), add_direction=True, locale='en') - u'1 hour ago' + '1 hour ago' The format parameter controls how compact or wide the presentation is: >>> format_timedelta(timedelta(hours=3), format='short', locale='en') - u'3 hr' + '3 hr' >>> format_timedelta(timedelta(hours=3), format='narrow', locale='en') - u'3h' + '3h' :param delta: a ``timedelta`` object representing the time difference to format, or the delta in seconds as an `int` value @@ -953,8 +977,7 @@ def format_timedelta( raise TypeError('Format must be one of "narrow", "short" or "long"') if format == 'medium': warnings.warn( - '"medium" value for format param of format_timedelta' - ' is deprecated. Use "long" instead', + '"medium" value for format param of format_timedelta is deprecated. Use "long" instead', category=DeprecationWarning, stacklevel=2, ) @@ -971,7 +994,7 @@ def _iter_patterns(a_unit): if add_direction: # Try to find the length variant version first ("year-narrow") # before falling back to the default. - unit_rel_patterns = (date_fields.get(f"{a_unit}-{format}") or date_fields[a_unit]) + unit_rel_patterns = date_fields.get(f"{a_unit}-{format}") or date_fields[a_unit] if seconds >= 0: yield unit_rel_patterns['future'] else: @@ -1016,9 +1039,17 @@ def _format_fallback_interval( ) -> str: if skeleton in locale.datetime_skeletons: # Use the given skeleton format = lambda dt: format_skeleton(skeleton, dt, tzinfo, locale=locale) - elif all((isinstance(d, datetime.date) and not isinstance(d, datetime.datetime)) for d in (start, end)): # Both are just dates + elif all( + # Both are just dates + (isinstance(d, datetime.date) and not isinstance(d, datetime.datetime)) + for d in (start, end) + ): format = lambda dt: format_date(dt, locale=locale) - elif all((isinstance(d, datetime.time) and not isinstance(d, datetime.date)) for d in (start, end)): # Both are times + elif all( + # Both are times + (isinstance(d, datetime.time) and not isinstance(d, datetime.date)) + for d in (start, end) + ): format = lambda dt: format_time(dt, tzinfo=tzinfo, locale=locale) else: format = lambda dt: format_datetime(dt, tzinfo=tzinfo, locale=locale) @@ -1030,9 +1061,9 @@ def _format_fallback_interval( return format(start) return ( - locale.interval_formats.get(None, "{0}-{1}"). - replace("{0}", formatted_start). - replace("{1}", formatted_end) + locale.interval_formats.get(None, "{0}-{1}") + .replace("{0}", formatted_start) + .replace("{1}", formatted_end) ) @@ -1049,16 +1080,16 @@ def format_interval( >>> from datetime import date, time >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "yMd", locale="fi") - u'15.\u201317.1.2016' + '15.–17.1.2016' >>> format_interval(time(12, 12), time(16, 16), "Hm", locale="en_GB") - '12:12\u201316:16' + '12:12–16:16' >>> format_interval(time(5, 12), time(16, 16), "hm", locale="en_US") - '5:12\u202fAM\u2009–\u20094:16\u202fPM' + '5:12\\u202fAM\\u2009–\\u20094:16\\u202fPM' >>> format_interval(time(16, 18), time(16, 24), "Hm", locale="it") - '16:18\u201316:24' + '16:18–16:24' If the start instant equals the end instant, the interval is formatted like the instant. @@ -1068,13 +1099,13 @@ def format_interval( Unknown skeletons fall back to "default" formatting. >>> format_interval(date(2015, 1, 1), date(2017, 1, 1), "wzq", locale="ja") - '2015/01/01\uff5e2017/01/01' + '2015/01/01~2017/01/01' >>> format_interval(time(16, 18), time(16, 24), "xxx", locale="ja") - '16:18:00\uff5e16:24:00' + '16:18:00~16:24:00' >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "xxx", locale="de") - '15.01.2016\u2009–\u200917.01.2016' + '15.01.2016\\u2009–\\u200917.01.2016' :param start: First instant (datetime/date/time) :param end: Second instant (datetime/date/time) @@ -1132,8 +1163,7 @@ def format_interval( # > format the start and end datetime, as above. return "".join( parse_pattern(pattern).apply(instant, locale) - for pattern, instant - in zip(skel_formats[field], (start, end)) + for pattern, instant in zip(skel_formats[field], (start, end)) ) # > Otherwise, format the start and end datetime using the fallback pattern. @@ -1154,13 +1184,13 @@ def get_period_id( >>> from datetime import time >>> get_period_names(locale="de")[get_period_id(time(7, 42), locale="de")] - u'Morgen' + 'Morgen' >>> get_period_id(time(0), locale="en_US") - u'midnight' + 'midnight' >>> get_period_id(time(0), type="selection", locale="en_US") - u'night1' + 'morning1' :param time: The time to inspect. :param tzinfo: The timezone for the time. See ``format_time``. @@ -1191,8 +1221,10 @@ def get_period_id( return rule_id else: # e.g. from="21:00" before="06:00" - if rule["from"] <= seconds_past_midnight < 86400 or \ - 0 <= seconds_past_midnight < rule["before"]: + if ( + rule["from"] <= seconds_past_midnight < 86400 + or 0 <= seconds_past_midnight < rule["before"] + ): return rule_id start_ok = end_ok = False @@ -1264,8 +1296,11 @@ def parse_date( use_predefined_format = format in ('full', 'long', 'medium', 'short') # we try ISO-8601 format first, meaning similar to formats # extended YYYY-MM-DD or basic YYYYMMDD - iso_alike = re.match(r'^(\d{4})-?([01]\d)-?([0-3]\d)$', - string, flags=re.ASCII) # allow only ASCII digits + iso_alike = re.match( + r'^(\d{4})-?([01]\d)-?([0-3]\d)$', + string, + flags=re.ASCII, # allow only ASCII digits + ) if iso_alike and use_predefined_format: try: return datetime.date(*map(int, iso_alike.groups())) @@ -1364,7 +1399,6 @@ def parse_time( class DateTimePattern: - def __init__(self, pattern: str, format: DateTimeFormat): self.pattern = pattern self.format = format @@ -1391,7 +1425,6 @@ def apply( class DateTimeFormat: - def __init__( self, value: datetime.date | datetime.time, @@ -1472,7 +1505,9 @@ def extract(self, char: str) -> int: elif char == 'a': return int(self.value.hour >= 12) # 0 for am, 1 for pm else: - raise NotImplementedError(f"Not implemented: extracting {char!r} from {self.value!r}") + raise NotImplementedError( + f"Not implemented: extracting {char!r} from {self.value!r}", + ) def format_era(self, char: str, num: int) -> str: width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)] @@ -1522,12 +1557,12 @@ def format_weekday(self, char: str = 'E', num: int = 4) -> str: >>> from datetime import date >>> format = DateTimeFormat(date(2016, 2, 28), Locale.parse('en_US')) >>> format.format_weekday() - u'Sunday' + 'Sunday' 'E': Day of week - Use one through three letters for the abbreviated day name, four for the full (wide) name, five for the narrow name, or six for the short name. >>> format.format_weekday('E',2) - u'Sun' + 'Sun' 'e': Local day of week. Same as E except adds a numeric value that will depend on the local starting day of the week, using one or two letters. For this example, Monday is the first day of the week. @@ -1566,28 +1601,32 @@ def format_period(self, char: str, num: int) -> str: >>> from datetime import datetime, time >>> format = DateTimeFormat(time(13, 42), 'fi_FI') >>> format.format_period('a', 1) - u'ip.' + 'ip.' >>> format.format_period('b', 1) - u'iltap.' + 'iltap.' >>> format.format_period('b', 4) - u'iltapäivä' + 'iltapäivä' >>> format.format_period('B', 4) - u'iltapäivällä' + 'iltapäivällä' >>> format.format_period('B', 5) - u'ip.' + 'ip.' >>> format = DateTimeFormat(datetime(2022, 4, 28, 6, 27), 'zh_Hant') >>> format.format_period('a', 1) - u'上午' + '上午' >>> format.format_period('B', 1) - u'清晨' + '清晨' :param char: pattern format character ('a', 'b', 'B') :param num: count of format character """ - widths = [{3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)], - 'wide', 'narrow', 'abbreviated'] + widths = [ + {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)], + 'wide', + 'narrow', + 'abbreviated', + ] if char == 'a': period = 'pm' if self.value.hour >= 12 else 'am' context = 'format' @@ -1610,8 +1649,12 @@ def format_frac_seconds(self, num: int) -> str: return self.format(round(value, num) * 10**num, num) def format_milliseconds_in_day(self, num): - msecs = self.value.microsecond // 1000 + self.value.second * 1000 + \ - self.value.minute * 60000 + self.value.hour * 3600000 + msecs = ( + self.value.microsecond // 1000 + + self.value.second * 1000 + + self.value.minute * 60000 + + self.value.hour * 3600000 + ) return self.format(msecs, num) def format_timezone(self, char: str, num: int) -> str: @@ -1635,35 +1678,24 @@ def format_timezone(self, char: str, num: int) -> str: return get_timezone_gmt(value, width, locale=self.locale) # TODO: To add support for O:1 elif char == 'v': - return get_timezone_name(value.tzinfo, width, - locale=self.locale) + return get_timezone_name(value.tzinfo, width, locale=self.locale) elif char == 'V': if num == 1: - return get_timezone_name(value.tzinfo, width, - uncommon=True, locale=self.locale) + return get_timezone_name(value.tzinfo, width, locale=self.locale) elif num == 2: return get_timezone_name(value.tzinfo, locale=self.locale, return_zone=True) elif num == 3: - return get_timezone_location(value.tzinfo, locale=self.locale, return_city=True) + return get_timezone_location(value.tzinfo, locale=self.locale, return_city=True) # fmt: skip return get_timezone_location(value.tzinfo, locale=self.locale) - # Included additional elif condition to add support for 'Xx' in timezone format - elif char == 'X': - if num == 1: - return get_timezone_gmt(value, width='iso8601_short', locale=self.locale, - return_z=True) - elif num in (2, 4): - return get_timezone_gmt(value, width='short', locale=self.locale, - return_z=True) - elif num in (3, 5): - return get_timezone_gmt(value, width='iso8601', locale=self.locale, - return_z=True) - elif char == 'x': + elif char in 'Xx': + return_z = char == 'X' if num == 1: - return get_timezone_gmt(value, width='iso8601_short', locale=self.locale) + width = 'iso8601_short' elif num in (2, 4): - return get_timezone_gmt(value, width='short', locale=self.locale) + width = 'short' elif num in (3, 5): - return get_timezone_gmt(value, width='iso8601', locale=self.locale) + width = 'iso8601' + return get_timezone_gmt(value, width=width, locale=self.locale, return_z=return_z) # fmt: skip def format(self, value: SupportsInt, length: int) -> str: return '%0*d' % (length, value) @@ -1679,12 +1711,13 @@ def get_week_of_year(self) -> int: week = self.get_week_number(day_of_year) if week == 0: date = datetime.date(self.value.year - 1, 12, 31) - week = self.get_week_number(self.get_day_of_year(date), - date.weekday()) + week = self.get_week_number(self.get_day_of_year(date), date.weekday()) elif week > 52: weekday = datetime.date(self.value.year + 1, 1, 1).weekday() - if self.get_week_number(1, weekday) == 1 and \ - 32 - (weekday - self.locale.first_week_day) % 7 <= self.value.day: + if ( + self.get_week_number(1, weekday) == 1 + and 32 - (weekday - self.locale.first_week_day) % 7 <= self.value.day + ): week = 1 return week @@ -1713,8 +1746,7 @@ def get_week_number(self, day_of_period: int, day_of_week: int | None = None) -> """ if day_of_week is None: day_of_week = self.value.weekday() - first_day = (day_of_week - self.locale.first_week_day - - day_of_period + 1) % 7 + first_day = (day_of_week - self.locale.first_week_day - day_of_period + 1) % 7 if first_day < 0: first_day += 7 week_number = (day_of_period + first_day - 1) // 7 @@ -1737,7 +1769,7 @@ def get_week_number(self, day_of_period: int, day_of_week: int | None = None) -> 's': [1, 2], 'S': None, 'A': None, # second 'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4, 5], 'O': [1, 4], 'v': [1, 4], # zone 'V': [1, 2, 3, 4], 'x': [1, 2, 3, 4, 5], 'X': [1, 2, 3, 4, 5], # zone -} +} # fmt: skip #: The pattern characters declared in the Date Field Symbol Table #: (https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table) @@ -1749,20 +1781,20 @@ def parse_pattern(pattern: str | DateTimePattern) -> DateTimePattern: """Parse date, time, and datetime format patterns. >>> parse_pattern("MMMMd").format - u'%(MMMM)s%(d)s' + '%(MMMM)s%(d)s' >>> parse_pattern("MMM d, yyyy").format - u'%(MMM)s %(d)s, %(yyyy)s' + '%(MMM)s %(d)s, %(yyyy)s' Pattern can contain literal strings in single quotes: >>> parse_pattern("H:mm' Uhr 'z").format - u'%(H)s:%(mm)s Uhr %(z)s' + '%(H)s:%(mm)s Uhr %(z)s' An actual single quote can be used by using two adjacent single quote characters: >>> parse_pattern("hh' o''clock'").format - u"%(hh)s o'clock" + "%(hh)s o'clock" :param pattern: the formatting pattern to parse """ @@ -1886,18 +1918,18 @@ def split_interval_pattern(pattern: str) -> list[str]: > The pattern is then designed to be broken up into two pieces by determining the first repeating field. - https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats - >>> split_interval_pattern(u'E d.M. \u2013 E d.M.') - [u'E d.M. \u2013 ', 'E d.M.'] + >>> split_interval_pattern('E d.M. – E d.M.') + ['E d.M. – ', 'E d.M.'] >>> split_interval_pattern("Y 'text' Y 'more text'") ["Y 'text '", "Y 'more text'"] - >>> split_interval_pattern(u"E, MMM d \u2013 E") - [u'E, MMM d \u2013 ', u'E'] + >>> split_interval_pattern('E, MMM d – E') + ['E, MMM d – ', 'E'] >>> split_interval_pattern("MMM d") ['MMM d'] >>> split_interval_pattern("y G") ['y G'] - >>> split_interval_pattern(u"MMM d \u2013 d") - [u'MMM d \u2013 ', u'd'] + >>> split_interval_pattern('MMM d – d') + ['MMM d – ', 'd'] :param pattern: Interval pattern string :return: list of "subpatterns" @@ -1917,7 +1949,11 @@ def split_interval_pattern(pattern: str) -> list[str]: return [untokenize_pattern(tokens) for tokens in parts] -def match_skeleton(skeleton: str, options: Iterable[str], allow_different_fields: bool = False) -> str | None: +def match_skeleton( + skeleton: str, + options: Iterable[str], + allow_different_fields: bool = False, +) -> str | None: """ Find the closest match for the given datetime skeleton among the options given. @@ -1965,11 +2001,11 @@ def match_skeleton(skeleton: str, options: Iterable[str], allow_different_fields if 'b' in skeleton and not any('b' in option for option in options): skeleton = skeleton.replace('b', '') - get_input_field_width = dict(t[1] for t in tokenize_pattern(skeleton) if t[0] == "field").get + get_input_field_width = dict(t[1] for t in tokenize_pattern(skeleton) if t[0] == "field").get # fmt: skip best_skeleton = None best_distance = None for option in options: - get_opt_field_width = dict(t[1] for t in tokenize_pattern(option) if t[0] == "field").get + get_opt_field_width = dict(t[1] for t in tokenize_pattern(option) if t[0] == "field").get # fmt: skip distance = 0 for field in PATTERN_CHARS: input_width = get_input_field_width(field, 0) @@ -1980,13 +2016,18 @@ def match_skeleton(skeleton: str, options: Iterable[str], allow_different_fields if not allow_different_fields: # This one is not okay option = None break - distance += 0x1000 # Magic weight constant for "entirely different fields" - elif field == 'M' and ((input_width > 2 and opt_width <= 2) or (input_width <= 2 and opt_width > 2)): - distance += 0x100 # Magic weight for "text turns into a number" + # Magic weight constant for "entirely different fields" + distance += 0x1000 + elif field == 'M' and ( + (input_width > 2 and opt_width <= 2) or (input_width <= 2 and opt_width > 2) + ): + # Magic weight constant for "text turns into a number" + distance += 0x100 else: distance += abs(input_width - opt_width) - if not option: # We lost the option along the way (probably due to "allow_different_fields") + if not option: + # We lost the option along the way (probably due to "allow_different_fields") continue if not best_skeleton or distance < best_distance: diff --git a/babel/languages.py b/babel/languages.py index 564f555d2..5b2396c84 100644 --- a/babel/languages.py +++ b/babel/languages.py @@ -3,7 +3,11 @@ from babel.core import get_global -def get_official_languages(territory: str, regional: bool = False, de_facto: bool = False) -> tuple[str, ...]: +def get_official_languages( + territory: str, + regional: bool = False, + de_facto: bool = False, +) -> tuple[str, ...]: """ Get the official language(s) for the given territory. @@ -43,7 +47,9 @@ def get_official_languages(territory: str, regional: bool = False, de_facto: boo return tuple(lang for _, lang in pairs) -def get_territory_language_info(territory: str) -> dict[str, dict[str, float | str | None]]: +def get_territory_language_info( + territory: str, +) -> dict[str, dict[str, float | str | None]]: """ Get a dictionary of language information for a territory. diff --git a/babel/lists.py b/babel/lists.py index 353171c71..b6c859800 100644 --- a/babel/lists.py +++ b/babel/lists.py @@ -1,18 +1,19 @@ """ - babel.lists - ~~~~~~~~~~~ +babel.lists +~~~~~~~~~~~ - Locale dependent formatting of lists. +Locale dependent formatting of lists. - The default locale for the functions in this module is determined by the - following environment variables, in that order: +The default locale for the functions in this module is determined by the +following environment variables, in that order: - * ``LC_ALL``, and - * ``LANG`` + * ``LC_ALL``, and + * ``LANG`` - :copyright: (c) 2015-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2015-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ + from __future__ import annotations import warnings @@ -37,18 +38,26 @@ def __getattr__(name): def format_list( lst: Sequence[str], - style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard', + style: Literal[ + 'standard', + 'standard-short', + 'or', + 'or-short', + 'unit', + 'unit-short', + 'unit-narrow', + ] = 'standard', locale: Locale | str | None = None, ) -> str: """ Format the items in `lst` as a list. >>> format_list(['apples', 'oranges', 'pears'], locale='en') - u'apples, oranges, and pears' + 'apples, oranges, and pears' >>> format_list(['apples', 'oranges', 'pears'], locale='zh') - u'apples\u3001oranges\u548cpears' + 'apples、oranges和pears' >>> format_list(['omena', 'peruna', 'aplari'], style='or', locale='fi') - u'omena, peruna tai aplari' + 'omena, peruna tai aplari' Not all styles are necessarily available in all locales. The function will attempt to fall back to replacement styles according to the rules diff --git a/babel/localedata.py b/babel/localedata.py index 59f1db09e..2b225a142 100644 --- a/babel/localedata.py +++ b/babel/localedata.py @@ -1,14 +1,14 @@ """ - babel.localedata - ~~~~~~~~~~~~~~~~ +babel.localedata +~~~~~~~~~~~~~~~~ - Low-level locale data access. +Low-level locale data access. - :note: The `Locale` class, which uses this module under the hood, provides a - more convenient interface for accessing the locale data. +:note: The `Locale` class, which uses this module under the hood, provides a + more convenient interface for accessing the locale data. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ from __future__ import annotations @@ -89,8 +89,9 @@ def locale_identifiers() -> list[str]: """ return [ stem - for stem, extension in - (os.path.splitext(filename) for filename in os.listdir(_dirname)) + for stem, extension in ( + os.path.splitext(filename) for filename in os.listdir(_dirname) + ) if extension == '.dat' and stem != 'root' ] @@ -125,7 +126,7 @@ def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str >>> d = load('en_US') >>> d['languages']['sv'] - u'Swedish' + 'Swedish' Note that the results are cached, and subsequent requests for the same locale return the same dictionary: @@ -151,6 +152,7 @@ def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str data = {} else: from babel.core import get_global + parent = get_global('parent_exceptions').get(name) if not parent: if _is_non_likely_script(name): @@ -242,7 +244,11 @@ class LocaleDataDict(abc.MutableMapping): values. """ - def __init__(self, data: MutableMapping[str | int | None, Any], base: Mapping[str | int | None, Any] | None = None): + def __init__( + self, + data: MutableMapping[str | int | None, Any], + base: Mapping[str | int | None, Any] | None = None, + ): self._data = data if base is None: base = data diff --git a/babel/localtime/__init__.py b/babel/localtime/__init__.py index 854c07496..9eb95ab2e 100644 --- a/babel/localtime/__init__.py +++ b/babel/localtime/__init__.py @@ -1,12 +1,12 @@ """ - babel.localtime - ~~~~~~~~~~~~~~~ +babel.localtime +~~~~~~~~~~~~~~~ - Babel specific fork of tzlocal to determine the local timezone - of the system. +Babel specific fork of tzlocal to determine the local timezone +of the system. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ import datetime diff --git a/babel/localtime/_fallback.py b/babel/localtime/_fallback.py index fab6867c3..218813905 100644 --- a/babel/localtime/_fallback.py +++ b/babel/localtime/_fallback.py @@ -1,11 +1,11 @@ """ - babel.localtime._fallback - ~~~~~~~~~~~~~~~~~~~~~~~~~ +babel.localtime._fallback +~~~~~~~~~~~~~~~~~~~~~~~~~ - Emulated fallback local timezone when all else fails. +Emulated fallback local timezone when all else fails. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ import datetime @@ -19,7 +19,6 @@ class _FallbackLocalTimezone(datetime.tzinfo): - def utcoffset(self, dt: datetime.datetime) -> datetime.timedelta: if self._isdst(dt): return DSTOFFSET @@ -38,7 +37,7 @@ def tzname(self, dt: datetime.datetime) -> str: def _isdst(self, dt: datetime.datetime) -> bool: tt = (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, - dt.weekday(), 0, -1) + dt.weekday(), 0, -1) # fmt: skip stamp = time.mktime(tt) tt = time.localtime(stamp) return tt.tm_isdst > 0 diff --git a/babel/localtime/_unix.py b/babel/localtime/_unix.py index 782a7d246..70dd2322c 100644 --- a/babel/localtime/_unix.py +++ b/babel/localtime/_unix.py @@ -51,7 +51,7 @@ def _get_localzone(_root: str = '/') -> datetime.tzinfo: # `None` (as a fix for #1092). # Instead, let's just "fix" the double slash symlink by stripping # leading slashes before passing the assumed zone name forward. - zone_name = link_dst[pos + 10:].lstrip("/") + zone_name = link_dst[pos + 10 :].lstrip("/") tzinfo = _get_tzinfo(zone_name) if tzinfo is not None: return tzinfo diff --git a/babel/localtime/_win32.py b/babel/localtime/_win32.py index 1a52567bc..0fb625ba9 100644 --- a/babel/localtime/_win32.py +++ b/babel/localtime/_win32.py @@ -92,7 +92,6 @@ def get_localzone_name() -> str: def _get_localzone() -> datetime.tzinfo: if winreg is None: - raise LookupError( - 'Runtime support not available') + raise LookupError('Runtime support not available') return _get_tzinfo_or_raise(get_localzone_name()) diff --git a/babel/messages/__init__.py b/babel/messages/__init__.py index ca83faa97..8dde3f299 100644 --- a/babel/messages/__init__.py +++ b/babel/messages/__init__.py @@ -1,11 +1,11 @@ """ - babel.messages - ~~~~~~~~~~~~~~ +babel.messages +~~~~~~~~~~~~~~ - Support for ``gettext`` message catalogs. +Support for ``gettext`` message catalogs. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ from babel.messages.catalog import ( diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index f84a5bd1b..9a9739a72 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -1,12 +1,13 @@ """ - babel.messages.catalog - ~~~~~~~~~~~~~~~~~~~~~~ +babel.messages.catalog +~~~~~~~~~~~~~~~~~~~~~~ - Data structures for message catalogs. +Data structures for message catalogs. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ + from __future__ import annotations import datetime @@ -23,7 +24,7 @@ from babel.core import Locale, UnknownLocaleError from babel.dates import format_datetime from babel.messages.plurals import get_plural -from babel.util import LOCALTZ, FixedOffsetTimezone, _cmp, distinct +from babel.util import LOCALTZ, _cmp if TYPE_CHECKING: from typing_extensions import TypeAlias @@ -54,9 +55,11 @@ def get_close_matches(word, possibilities, n=3, cutoff=0.6): s.set_seq2(word) for x in possibilities: s.set_seq1(x) - if s.real_quick_ratio() >= cutoff and \ - s.quick_ratio() >= cutoff and \ - s.ratio() >= cutoff: + if ( + s.real_quick_ratio() >= cutoff + and s.quick_ratio() >= cutoff + and s.ratio() >= cutoff + ): result.append((s.ratio(), x)) # Move the best scorers to head of list @@ -65,7 +68,8 @@ def get_close_matches(word, possibilities, n=3, cutoff=0.6): return [x for score, x in result] -PYTHON_FORMAT = re.compile(r''' +PYTHON_FORMAT = re.compile( + r''' \% (?:\(([\w]*)\))? ( @@ -74,7 +78,9 @@ def get_close_matches(word, possibilities, n=3, cutoff=0.6): [hlL]? ) ([diouxXeEfFgGcrs%]) -''', re.VERBOSE) +''', + re.VERBOSE, +) def _has_python_brace_format(string: str) -> bool: @@ -118,7 +124,10 @@ def _parse_datetime_header(value: str) -> datetime.datetime: net_mins_offset *= plus_minus # Create an offset object - tzoffset = FixedOffsetTimezone(net_mins_offset) + tzoffset = datetime.timezone( + offset=datetime.timedelta(minutes=net_mins_offset), + name=f'Etc/GMT{net_mins_offset:+d}', + ) # Store the offset in a datetime object dt = dt.replace(tzinfo=tzoffset) @@ -161,7 +170,7 @@ def __init__( if not string and self.pluralizable: string = ('', '') self.string = string - self.locations = list(distinct(locations)) + self.locations = list(dict.fromkeys(locations)) if locations else [] self.flags = set(flags) if id and self.python_format: self.flags.add('python-format') @@ -171,12 +180,15 @@ def __init__( self.flags.add('python-brace-format') else: self.flags.discard('python-brace-format') - self.auto_comments = list(distinct(auto_comments)) - self.user_comments = list(distinct(user_comments)) - if isinstance(previous_id, str): - self.previous_id = [previous_id] + self.auto_comments = list(dict.fromkeys(auto_comments)) if auto_comments else [] + self.user_comments = list(dict.fromkeys(user_comments)) if user_comments else [] + if previous_id: + if isinstance(previous_id, str): + self.previous_id = [previous_id] + else: + self.previous_id = list(previous_id) else: - self.previous_id = list(previous_id) + self.previous_id = [] self.lineno = lineno self.context = context @@ -185,10 +197,12 @@ def __repr__(self) -> str: def __cmp__(self, other: object) -> int: """Compare Messages, taking into account plural ids""" + def values_to_compare(obj): if isinstance(obj, Message) and obj.pluralizable: return obj.id[0], obj.context or '' return obj.id, obj.context or '' + return _cmp(values_to_compare(self), values_to_compare(other)) def __gt__(self, other: object) -> bool: @@ -217,10 +231,17 @@ def is_identical(self, other: Message) -> bool: return self.__dict__ == other.__dict__ def clone(self) -> Message: - return Message(*map(copy, (self.id, self.string, self.locations, - self.flags, self.auto_comments, - self.user_comments, self.previous_id, - self.lineno, self.context))) + return Message( + id=copy(self.id), + string=copy(self.string), + locations=copy(self.locations), + flags=copy(self.flags), + auto_comments=copy(self.auto_comments), + user_comments=copy(self.user_comments), + previous_id=copy(self.previous_id), + lineno=self.lineno, # immutable (str/None) + context=self.context, # immutable (str/None) + ) def check(self, catalog: Catalog | None = None) -> list[TranslationError]: """Run various validation checks on the message. Some validations @@ -233,6 +254,7 @@ def check(self, catalog: Catalog | None = None) -> list[TranslationError]: in a catalog. """ from babel.messages.checkers import checkers + errors: list[TranslationError] = [] for checker in checkers: try: @@ -279,9 +301,12 @@ def python_format(self) -> bool: :type: `bool`""" ids = self.id - if not isinstance(ids, (list, tuple)): - ids = [ids] - return any(PYTHON_FORMAT.search(id) for id in ids) + if isinstance(ids, (list, tuple)): + for id in ids: # Explicit loop for performance reasons. + if PYTHON_FORMAT.search(id): + return True + return False + return bool(PYTHON_FORMAT.search(ids)) @property def python_brace_format(self) -> bool: @@ -294,9 +319,12 @@ def python_brace_format(self) -> bool: :type: `bool`""" ids = self.id - if not isinstance(ids, (list, tuple)): - ids = [ids] - return any(_has_python_brace_format(id) for id in ids) + if isinstance(ids, (list, tuple)): + for id in ids: # Explicit loop for performance reasons. + if _has_python_brace_format(id): + return True + return False + return _has_python_brace_format(ids) class TranslationError(Exception): @@ -315,6 +343,7 @@ class TranslationError(Exception): def parse_separated_header(value: str) -> dict[str, str]: # Adapted from https://peps.python.org/pep-0594/#cgi from email.message import Message + m = Message() m['content-type'] = value return dict(m.get_params()) @@ -420,7 +449,9 @@ def _set_locale(self, locale: Locale | str | None) -> None: self._locale = None return - raise TypeError(f"`locale` must be a Locale, a locale identifier string, or None; got {locale!r}") + raise TypeError( + f"`locale` must be a Locale, a locale identifier string, or None; got {locale!r}", + ) def _get_locale(self) -> Locale | None: return self._locale @@ -436,11 +467,13 @@ def _get_header_comment(self) -> str: year = datetime.datetime.now(LOCALTZ).strftime('%Y') if hasattr(self.revision_date, 'strftime'): year = self.revision_date.strftime('%Y') - comment = comment.replace('PROJECT', self.project) \ - .replace('VERSION', self.version) \ - .replace('YEAR', year) \ - .replace('ORGANIZATION', self.copyright_holder) - locale_name = (self.locale.english_name if self.locale else self.locale_identifier) + comment = ( + comment.replace('PROJECT', self.project) + .replace('VERSION', self.version) + .replace('YEAR', year) + .replace('ORGANIZATION', self.copyright_holder) + ) + locale_name = self.locale.english_name if self.locale else self.locale_identifier if locale_name: comment = comment.replace("Translations template", f"{locale_name} translations") return comment @@ -448,7 +481,10 @@ def _get_header_comment(self) -> str: def _set_header_comment(self, string: str | None) -> None: self._header_comment = string - header_comment = property(_get_header_comment, _set_header_comment, doc="""\ + header_comment = property( + _get_header_comment, + _set_header_comment, + doc="""\ The header comment for the catalog. >>> catalog = Catalog(project='Foobar', version='1.0', @@ -479,11 +515,16 @@ def _set_header_comment(self, string: str | None) -> None: # :type: `unicode` - """) + """, + ) def _get_mime_headers(self) -> list[tuple[str, str]]: if isinstance(self.revision_date, (datetime.datetime, datetime.time, int, float)): - revision_date = format_datetime(self.revision_date, 'yyyy-MM-dd HH:mmZ', locale='en') + revision_date = format_datetime( + self.revision_date, + 'yyyy-MM-dd HH:mmZ', + locale='en', + ) else: revision_date = self.revision_date @@ -497,7 +538,7 @@ def _get_mime_headers(self) -> list[tuple[str, str]]: ('POT-Creation-Date', format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', locale='en')), ('PO-Revision-Date', revision_date), ('Last-Translator', self.last_translator), - ] + ] # fmt: skip if self.locale_identifier: headers.append(('Language', str(self.locale_identifier))) headers.append(('Language-Team', language_team)) @@ -547,7 +588,10 @@ def _set_mime_headers(self, headers: Iterable[tuple[str, str]]) -> None: if 'YEAR' not in value: self.revision_date = _parse_datetime_header(value) - mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\ + mime_headers = property( + _get_mime_headers, + _set_mime_headers, + doc="""\ The MIME headers of the catalog, used for the special ``msgid ""`` entry. The behavior of this property changes slightly depending on whether a locale @@ -597,7 +641,8 @@ def _set_mime_headers(self, headers: Iterable[tuple[str, str]]) -> None: Generated-By: Babel ... :type: `list` - """) + """, + ) @property def num_plurals(self) -> int: @@ -693,19 +738,19 @@ def __setitem__(self, id: _MessageID, message: Message) -> None: """Add or update the message with the specified ID. >>> catalog = Catalog() - >>> catalog[u'foo'] = Message(u'foo') - >>> catalog[u'foo'] - + >>> catalog['foo'] = Message('foo') + >>> catalog['foo'] + If a message with that ID is already in the catalog, it is updated to include the locations and flags of the new message. >>> catalog = Catalog() - >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)]) - >>> catalog[u'foo'].locations + >>> catalog['foo'] = Message('foo', locations=[('main.py', 1)]) + >>> catalog['foo'].locations [('main.py', 1)] - >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)]) - >>> catalog[u'foo'].locations + >>> catalog['foo'] = Message('foo', locations=[('utils.py', 5)]) + >>> catalog['foo'].locations [('main.py', 1), ('utils.py', 5)] :param id: the message ID @@ -719,22 +764,20 @@ def __setitem__(self, id: _MessageID, message: Message) -> None: # The new message adds pluralization current.id = message.id current.string = message.string - current.locations = list(distinct(current.locations + - message.locations)) - current.auto_comments = list(distinct(current.auto_comments + - message.auto_comments)) - current.user_comments = list(distinct(current.user_comments + - message.user_comments)) + current.locations = list(dict.fromkeys([*current.locations, *message.locations])) + current.auto_comments = list(dict.fromkeys([*current.auto_comments, *message.auto_comments])) # fmt:skip + current.user_comments = list(dict.fromkeys([*current.user_comments, *message.user_comments])) # fmt:skip current.flags |= message.flags elif id == '': # special treatment for the header message self.mime_headers = message_from_string(message.string).items() - self.header_comment = "\n".join([f"# {c}".rstrip() for c in message.user_comments]) + self.header_comment = "\n".join(f"# {c}".rstrip() for c in message.user_comments) self.fuzzy = message.fuzzy else: if isinstance(id, (list, tuple)): - assert isinstance(message.string, (list, tuple)), \ + assert isinstance(message.string, (list, tuple)), ( f"Expected sequence but got {type(message.string)}" + ) self._messages[key] = message def add( @@ -752,10 +795,10 @@ def add( """Add or update the message with the specified ID. >>> catalog = Catalog() - >>> catalog.add(u'foo') + >>> catalog.add('foo') - >>> catalog[u'foo'] - + >>> catalog['foo'] + This method simply constructs a `Message` object with the given arguments and invokes `__setitem__` with that object. @@ -774,9 +817,17 @@ def add( PO file, if any :param context: the message context """ - message = Message(id, string, list(locations), flags, auto_comments, - user_comments, previous_id, lineno=lineno, - context=context) + message = Message( + id, + string, + list(locations), + flags, + auto_comments, + user_comments, + previous_id, + lineno=lineno, + context=context, + ) self[id] = message return message @@ -831,11 +882,11 @@ def update( >>> template.add(('salad', 'salads'), locations=[('util.py', 42)]) >>> catalog = Catalog(locale='de_DE') - >>> catalog.add('blue', u'blau', locations=[('main.py', 98)]) + >>> catalog.add('blue', 'blau', locations=[('main.py', 98)]) - >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)]) + >>> catalog.add('head', 'Kopf', locations=[('util.py', 33)]) - >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'), + >>> catalog.add(('salad', 'salads'), ('Salat', 'Salate'), ... locations=[('util.py', 38)]) @@ -850,13 +901,13 @@ def update( >>> msg2 = catalog['blue'] >>> msg2.string - u'blau' + 'blau' >>> msg2.locations [('main.py', 100)] >>> msg3 = catalog['salad'] >>> msg3.string - (u'Salat', u'Salate') + ('Salat', 'Salate') >>> msg3.locations [('util.py', 42)] @@ -889,7 +940,11 @@ def update( fuzzy_candidates[self._to_fuzzy_match_key(key)] = (key, ctxt) fuzzy_matches = set() - def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None: + def _merge( + message: Message, + oldkey: tuple[str, str] | str, + newkey: tuple[str, str] | str, + ) -> None: message = message.clone() fuzzy = False if oldkey != newkey: @@ -906,8 +961,8 @@ def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, s assert oldmsg is not None message.string = oldmsg.string - if keep_user_comments: - message.user_comments = list(distinct(oldmsg.user_comments)) + if keep_user_comments and oldmsg.user_comments: + message.user_comments = list(dict.fromkeys(oldmsg.user_comments)) if isinstance(message.id, (list, tuple)): if not isinstance(message.string, (list, tuple)): @@ -917,7 +972,7 @@ def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, s ) elif len(message.string) != self.num_plurals: fuzzy = True - message.string = tuple(message.string[:len(oldmsg.string)]) + message.string = tuple(message.string[: len(oldmsg.string)]) elif isinstance(message.string, (list, tuple)): fuzzy = True message.string = message.string[0] @@ -971,7 +1026,11 @@ def _to_fuzzy_match_key(self, key: tuple[str, str] | str) -> str: matchkey = key return matchkey.lower().strip() - def _key_for(self, id: _MessageID, context: str | None = None) -> tuple[str, str] | str: + def _key_for( + self, + id: _MessageID, + context: str | None = None, + ) -> tuple[str, str] | str: """The key for a message is just the singular ID even for pluralizable messages, but is a ``(msgid, msgctxt)`` tuple for context-specific messages. @@ -991,10 +1050,6 @@ def is_identical(self, other: Catalog) -> bool: for key in self._messages.keys() | other._messages.keys(): message_1 = self.get(key) message_2 = other.get(key) - if ( - message_1 is None - or message_2 is None - or not message_1.is_identical(message_2) - ): + if message_1 is None or message_2 is None or not message_1.is_identical(message_2): return False return dict(self.mime_headers) == dict(other.mime_headers) diff --git a/babel/messages/checkers.py b/babel/messages/checkers.py index df7c3ca73..4026ab1b3 100644 --- a/babel/messages/checkers.py +++ b/babel/messages/checkers.py @@ -1,14 +1,15 @@ """ - babel.messages.checkers - ~~~~~~~~~~~~~~~~~~~~~~~ +babel.messages.checkers +~~~~~~~~~~~~~~~~~~~~~~~ - Various routines that help with validation of translations. +Various routines that help with validation of translations. - :since: version 0.9 +:since: version 0.9 - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ + from __future__ import annotations from collections.abc import Callable @@ -27,8 +28,7 @@ def num_plurals(catalog: Catalog | None, message: Message) -> None: """Verify the number of plurals in the translation.""" if not message.pluralizable: if not isinstance(message.string, str): - raise TranslationError("Found plural forms for non-pluralizable " - "message") + raise TranslationError("Found plural forms for non-pluralizable message") return # skip further tests if no catalog is provided. @@ -39,8 +39,9 @@ def num_plurals(catalog: Catalog | None, message: Message) -> None: if not isinstance(msgstrs, (list, tuple)): msgstrs = (msgstrs,) if len(msgstrs) != catalog.num_plurals: - raise TranslationError("Wrong number of plural forms (expected %d)" % - catalog.num_plurals) + raise TranslationError( + f"Wrong number of plural forms (expected {catalog.num_plurals})", + ) def python_format(catalog: Catalog | None, message: Message) -> None: @@ -54,9 +55,12 @@ def python_format(catalog: Catalog | None, message: Message) -> None: if not isinstance(msgstrs, (list, tuple)): msgstrs = (msgstrs,) - for msgid, msgstr in zip(msgids, msgstrs): - if msgstr: - _validate_format(msgid, msgstr) + if msgstrs[0]: + _validate_format(msgids[0], msgstrs[0]) + if message.pluralizable: + for msgstr in msgstrs[1:]: + if msgstr: + _validate_format(msgids[1], msgstr) def _validate_format(format: str, alternative: str) -> None: @@ -112,17 +116,20 @@ def _check_positional(results: list[tuple[str, str]]) -> bool: positional = name is None else: if (name is None) != positional: - raise TranslationError('format string mixes positional ' - 'and named placeholders') + raise TranslationError( + 'format string mixes positional and named placeholders', + ) return bool(positional) - a, b = map(_parse, (format, alternative)) + a = _parse(format) + b = _parse(alternative) if not a: return # now check if both strings are positional or named - a_positional, b_positional = map(_check_positional, (a, b)) + a_positional = _check_positional(a) + b_positional = _check_positional(b) if a_positional and not b_positional and not b: raise TranslationError('placeholders are incompatible') elif a_positional != b_positional: @@ -132,13 +139,13 @@ def _check_positional(results: list[tuple[str, str]]) -> bool: # same number of format chars and those must be compatible if a_positional: if len(a) != len(b): - raise TranslationError('positional format placeholders are ' - 'unbalanced') + raise TranslationError('positional format placeholders are unbalanced') for idx, ((_, first), (_, second)) in enumerate(zip(a, b)): if not _compatible(first, second): - raise TranslationError('incompatible format for placeholder ' - '%d: %r and %r are not compatible' % - (idx + 1, first, second)) + raise TranslationError( + f'incompatible format for placeholder {idx + 1:d}: ' + f'{first!r} and {second!r} are not compatible', + ) # otherwise the second string must not have names the first one # doesn't have and the types of those included must be compatible @@ -156,6 +163,7 @@ def _check_positional(results: list[tuple[str, str]]) -> bool: def _find_checkers() -> list[Callable[[Catalog | None, Message], object]]: from babel.messages._compat import find_entrypoints + checkers: list[Callable[[Catalog | None, Message], object]] = [] checkers.extend(load() for (name, load) in find_entrypoints('babel.checkers')) if len(checkers) == 0: diff --git a/babel/messages/extract.py b/babel/messages/extract.py index 7f4230f61..6fad84304 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -1,20 +1,21 @@ """ - babel.messages.extract - ~~~~~~~~~~~~~~~~~~~~~~ +babel.messages.extract +~~~~~~~~~~~~~~~~~~~~~~ - Basic infrastructure for extracting localizable messages from source files. +Basic infrastructure for extracting localizable messages from source files. - This module defines an extensible system for collecting localizable message - strings from a variety of sources. A native extractor for Python source - files is builtin, extractors for other sources can be added using very - simple plugins. +This module defines an extensible system for collecting localizable message +strings from a variety of sources. A native extractor for Python source +files is builtin, extractors for other sources can be added using very +simple plugins. - The main entry points into the extraction functionality are the functions - `extract_from_dir` and `extract_from_file`. +The main entry points into the extraction functionality are the functions +`extract_from_dir` and `extract_from_file`. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ + from __future__ import annotations import ast @@ -22,6 +23,7 @@ import os import sys import tokenize +import warnings from collections.abc import ( Callable, Collection, @@ -62,7 +64,7 @@ def tell(self) -> int: ... _Keyword: TypeAlias = dict[int | None, _SimpleKeyword] | _SimpleKeyword # 5-tuple of (filename, lineno, messages, comments, context) - _FileExtractionResult: TypeAlias = tuple[str, int, str | tuple[str, ...], list[str], str | None] + _FileExtractionResult: TypeAlias = tuple[str, int, str | tuple[str, ...], list[str], str | None] # fmt: skip # 4-tuple of (lineno, message, comments, context) _ExtractionResult: TypeAlias = tuple[int, str | tuple[str, ...], list[str], str | None] @@ -72,7 +74,7 @@ def tell(self) -> int: ... _CallableExtractionMethod: TypeAlias = Callable[ [_FileObj | IO[bytes], Mapping[str, _Keyword], Collection[str], Mapping[str, Any]], Iterable[_ExtractionResult], - ] + ] # fmt: skip _ExtractionMethod: TypeAlias = _CallableExtractionMethod | str @@ -86,9 +88,11 @@ def tell(self) -> int: ... 'ungettext': (1, 2), 'dgettext': (2,), 'dngettext': (2, 3), + 'dpgettext': ((2, 'c'), 3), 'N_': None, 'pgettext': ((1, 'c'), 2), 'npgettext': ((1, 'c'), 2, 3), + 'dnpgettext': ((2, 'c'), 3, 4), } DEFAULT_MAPPING: list[tuple[str, str]] = [('**.py', 'python')] @@ -103,15 +107,45 @@ def _strip_comment_tags(comments: MutableSequence[str], tags: Iterable[str]): """Helper function for `extract` that strips comment tags from strings in a list of comment lines. This functions operates in-place. """ + def _strip(line: str): for tag in tags: if line.startswith(tag): - return line[len(tag):].strip() + return line[len(tag) :].strip() return line - comments[:] = map(_strip, comments) + comments[:] = [_strip(c) for c in comments] + + +def _make_default_directory_filter( + method_map: Iterable[tuple[str, str]], + root_dir: str | os.PathLike[str], +): + method_map = tuple(method_map) + + def directory_filter(dirpath: str | os.PathLike[str]) -> bool: + subdir = os.path.basename(dirpath) + # Legacy default behavior: ignore dot and underscore directories + if subdir.startswith('.') or subdir.startswith('_'): + return False + + dir_rel = os.path.relpath(dirpath, root_dir).replace(os.sep, '/') + + for pattern, method in method_map: + if method == "ignore" and pathmatch(pattern, dir_rel): + return False -def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool: + return True + + return directory_filter + + +def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool: # pragma: no cover + warnings.warn( + "`default_directory_filter` is deprecated and will be removed in a future version of Babel.", + DeprecationWarning, + stacklevel=2, + ) subdir = os.path.basename(dirpath) # Legacy default behavior: ignore dot and underscore directories return not (subdir.startswith('.') or subdir.startswith('_')) @@ -198,16 +232,21 @@ def extract_from_dir( """ if dirname is None: dirname = os.getcwd() + if options_map is None: options_map = {} + + dirname = os.path.abspath(dirname) + if directory_filter is None: - directory_filter = default_directory_filter + directory_filter = _make_default_directory_filter( + method_map=method_map, + root_dir=dirname, + ) - absname = os.path.abspath(dirname) - for root, dirnames, filenames in os.walk(absname): + for root, dirnames, filenames in os.walk(dirname): dirnames[:] = [ - subdir for subdir in dirnames - if directory_filter(os.path.join(root, subdir)) + subdir for subdir in dirnames if directory_filter(os.path.join(root, subdir)) ] dirnames.sort() filenames.sort() @@ -222,7 +261,7 @@ def extract_from_dir( keywords, comment_tags, strip_comment_tags, - dirpath=absname, + dirpath=dirname, ) @@ -277,12 +316,31 @@ def check_and_call_extract_file( if pathmatch(opattern, filename): options = odict break + + # Merge keywords and comment_tags from per-format options if present. + file_keywords = keywords + file_comment_tags = comment_tags + if keywords_opt := options.get("keywords"): + if not isinstance(keywords_opt, dict): # pragma: no cover + raise TypeError( + f"The `keywords` option must be a dict of parsed keywords, not {keywords_opt!r}", + ) + file_keywords = {**keywords, **keywords_opt} + + if comments_opt := options.get("add_comments"): + if not isinstance(comments_opt, (list, tuple, set)): # pragma: no cover + raise TypeError( + f"The `add_comments` option must be a collection of comment tags, not {comments_opt!r}.", + ) + file_comment_tags = tuple(set(comment_tags) | set(comments_opt)) + if callback: callback(filename, method, options) for message_tuple in extract_from_file( - method, filepath, - keywords=keywords, - comment_tags=comment_tags, + method, + filepath, + keywords=file_keywords, + comment_tags=file_comment_tags, options=options, strip_comment_tags=strip_comment_tags, ): @@ -321,8 +379,9 @@ def extract_from_file( return [] with open(filename, 'rb') as fileobj: - return list(extract(method, fileobj, keywords, comment_tags, - options, strip_comment_tags)) + return list( + extract(method, fileobj, keywords, comment_tags, options, strip_comment_tags), + ) def _match_messages_against_spec( @@ -357,7 +416,7 @@ def _match_messages_against_spec( first_msg_index = spec[0] - 1 # An empty string msgid isn't valid, emit a warning if not messages[first_msg_index]: - filename = (getattr(fileobj, "name", None) or "(unknown)") + filename = getattr(fileobj, "name", None) or "(unknown)" sys.stderr.write( f"{filename}:{lineno}: warning: Empty msgid. It is reserved by GNU gettext: gettext(\"\") " f"returns the header entry with meta information, not the empty string.\n", @@ -403,7 +462,7 @@ def extract( >>> from io import BytesIO >>> for message in extract('python', BytesIO(source)): ... print(message) - (3, u'Hello, world!', [], None) + (3, 'Hello, world!', [], None) :param method: an extraction method (a callable), or a string specifying the extraction method (.e.g. "python"); @@ -431,7 +490,7 @@ def extract( elif ':' in method or '.' in method: if ':' not in method: lastdot = method.rfind('.') - module, attrname = method[:lastdot], method[lastdot + 1:] + module, attrname = method[:lastdot], method[lastdot + 1 :] else: module, attrname = method.split(':', 1) func = getattr(__import__(module, {}, {}, [attrname]), attrname) @@ -445,8 +504,7 @@ def extract( if func is None: raise ValueError(f"Unknown extraction method {method!r}") - results = func(fileobj, keywords.keys(), comment_tags, - options=options or {}) + results = func(fileobj, keywords.keys(), comment_tags, options=options or {}) for lineno, funcname, messages, comments in results: if not isinstance(messages, (list, tuple)): @@ -508,7 +566,7 @@ def extract_python( :rtype: ``iterator`` """ funcname = lineno = message_lineno = None - call_stack = -1 + call_stack = [] # line numbers of calls buf = [] messages = [] translator_comments = [] @@ -526,7 +584,7 @@ def extract_python( current_fstring_start = None for tok, value, (lineno, _), _, _ in tokens: - if call_stack == -1 and tok == NAME and value in ('def', 'class'): + if not call_stack and tok == NAME and value in ('def', 'class'): in_def = True elif tok == OP and value == '(': if in_def: @@ -535,16 +593,15 @@ def extract_python( in_def = False continue if funcname: - call_stack += 1 + call_stack.append(lineno) elif in_def and tok == OP and value == ':': # End of a class definition without parens in_def = False continue - elif call_stack == -1 and tok == COMMENT: + elif not call_stack and tok == COMMENT: # Strip the comment token from the line value = value[1:].strip() - if in_translator_comments and \ - translator_comments[-1][0] == lineno - 1: + if in_translator_comments and translator_comments[-1][0] == lineno - 1: # We're already inside a translator comment, continue appending translator_comments.append((lineno, value)) continue @@ -555,8 +612,8 @@ def extract_python( in_translator_comments = True translator_comments.append((lineno, value)) break - elif funcname and call_stack == 0: - nested = (tok == NAME and value in keywords) + elif funcname and len(call_stack) == 1: + nested = tok == NAME and value in keywords if (tok == OP and value == ')') or nested: if buf: messages.append(''.join(buf)) @@ -565,17 +622,24 @@ def extract_python( messages.append(None) messages = tuple(messages) if len(messages) > 1 else messages[0] - # Comments don't apply unless they immediately - # precede the message - if translator_comments and \ - translator_comments[-1][0] < message_lineno - 1: - translator_comments = [] - yield (message_lineno, funcname, messages, - [comment[1] for comment in translator_comments]) + if translator_comments: + last_comment_lineno = translator_comments[-1][0] + if last_comment_lineno < min(message_lineno, call_stack[-1]) - 1: + # Comments don't apply unless they immediately + # precede the message, or the line where the parenthesis token + # to start this message's translation call is. + translator_comments.clear() + + yield ( + message_lineno, + funcname, + messages, + [comment[1] for comment in translator_comments], + ) funcname = lineno = message_lineno = None - call_stack = -1 + call_stack.clear() messages = [] translator_comments = [] in_translator_comments = False @@ -619,9 +683,9 @@ def extract_python( elif tok != NL and not message_lineno: message_lineno = lineno - elif call_stack > 0 and tok == OP and value == ')': - call_stack -= 1 - elif funcname and call_stack == -1: + elif len(call_stack) > 1 and tok == OP and value == ')': + call_stack.pop() + elif funcname and not call_stack: funcname = None elif tok == NAME and value in keywords: funcname = value @@ -679,6 +743,7 @@ def extract_javascript( :param lineno: line number offset (for parsing embedded fragments) """ from babel.messages.jslexer import Token, tokenize, unquote_string + funcname = message_lineno = None messages = [] last_argument = None @@ -696,17 +761,30 @@ def extract_javascript( lineno=lineno, ): if ( # Turn keyword`foo` expressions into keyword("foo") calls: - funcname and # have a keyword... - (last_token and last_token.type == 'name') and # we've seen nothing after the keyword... - token.type == 'template_string' # this is a template string + # have a keyword... + funcname + # and we've seen nothing after the keyword... + and (last_token and last_token.type == 'name') + # and this is a template string + and token.type == 'template_string' ): message_lineno = token.lineno messages = [unquote_string(token.value)] call_stack = 0 token = Token('operator', ')', token.lineno) - if options.get('parse_template_string') and not funcname and token.type == 'template_string': - yield from parse_template_string(token.value, keywords, comment_tags, options, token.lineno) + if ( + options.get('parse_template_string') + and not funcname + and token.type == 'template_string' + ): + yield from parse_template_string( + token.value, + keywords, + comment_tags, + options, + token.lineno, + ) elif token.type == 'operator' and token.value == '(': if funcname: @@ -715,8 +793,7 @@ def extract_javascript( elif call_stack == -1 and token.type == 'linecomment': value = token.value[2:].strip() - if translator_comments and \ - translator_comments[-1][0] == token.lineno - 1: + if translator_comments and translator_comments[-1][0] == token.lineno - 1: translator_comments.append((token.lineno, value)) continue @@ -736,8 +813,7 @@ def extract_javascript( lines[0] = lines[0].strip() lines[1:] = dedent('\n'.join(lines[1:])).splitlines() for offset, line in enumerate(lines): - translator_comments.append((token.lineno + offset, - line)) + translator_comments.append((token.lineno + offset, line)) break elif funcname and call_stack == 0: @@ -753,13 +829,16 @@ def extract_javascript( # Comments don't apply unless they immediately precede the # message - if translator_comments and \ - translator_comments[-1][0] < message_lineno - 1: + if translator_comments and translator_comments[-1][0] < message_lineno - 1: translator_comments = [] if messages is not None: - yield (message_lineno, funcname, messages, - [comment[1] for comment in translator_comments]) + yield ( + message_lineno, + funcname, + messages, + [comment[1] for comment in translator_comments], + ) funcname = message_lineno = last_argument = None concatenate_next = False @@ -786,17 +865,22 @@ def extract_javascript( elif token.value == '+': concatenate_next = True - elif call_stack > 0 and token.type == 'operator' \ - and token.value == ')': + elif call_stack > 0 and token.type == 'operator' and token.value == ')': call_stack -= 1 elif funcname and call_stack == -1: funcname = None - elif call_stack == -1 and token.type == 'name' and \ - token.value in keywords and \ - (last_token is None or last_token.type != 'name' or - last_token.value != 'function'): + elif ( + call_stack == -1 + and token.type == 'name' + and token.value in keywords + and ( + last_token is None + or last_token.type != 'name' + or last_token.value != 'function' + ) + ): funcname = token.value last_token = token @@ -820,6 +904,7 @@ def parse_template_string( :param lineno: starting line number (optional) """ from babel.messages.jslexer import line_re + prev_character = None level = 0 inside_str = False @@ -839,7 +924,13 @@ def parse_template_string( if level == 0 and expression_contents: expression_contents = expression_contents[0:-1] fake_file_obj = io.BytesIO(expression_contents.encode()) - yield from extract_javascript(fake_file_obj, keywords, comment_tags, options, lineno) + yield from extract_javascript( + fake_file_obj, + keywords, + comment_tags, + options, + lineno, + ) lineno += len(line_re.findall(expression_contents)) expression_contents = '' prev_character = character diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 29e5a2aa2..f63dd9ded 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -1,11 +1,11 @@ """ - babel.messages.frontend - ~~~~~~~~~~~~~~~~~~~~~~~ +babel.messages.frontend +~~~~~~~~~~~~~~~~~~~~~~~ - Frontends for the message extraction functionality. +Frontends for the message extraction functionality. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ from __future__ import annotations @@ -15,6 +15,7 @@ import logging import optparse import os +import pathlib import re import shutil import sys @@ -22,7 +23,7 @@ import warnings from configparser import RawConfigParser from io import StringIO -from typing import BinaryIO, Iterable, Literal +from typing import Any, BinaryIO, Iterable, Literal from babel import Locale, localedata from babel import __version__ as VERSION @@ -173,7 +174,7 @@ class CompileCatalog(CommandMixin): 'also include fuzzy translations'), ('statistics', None, 'print statistics about translations'), - ] + ] # fmt: skip boolean_options = ['use-fuzzy', 'statistics'] def initialize_options(self): @@ -199,46 +200,38 @@ def run(self): n_errors += len(errors) if n_errors: self.log.error('%d errors encountered.', n_errors) - return (1 if n_errors else 0) - - def _run_domain(self, domain): - po_files = [] - mo_files = [] + return 1 if n_errors else 0 + def _get_po_mo_triples(self, domain: str): if not self.input_file: + dir_path = pathlib.Path(self.directory) if self.locale: - po_files.append((self.locale, - os.path.join(self.directory, self.locale, - 'LC_MESSAGES', - f"{domain}.po"))) - mo_files.append(os.path.join(self.directory, self.locale, - 'LC_MESSAGES', - f"{domain}.mo")) + lc_messages_path = dir_path / self.locale / "LC_MESSAGES" + po_file = lc_messages_path / f"{domain}.po" + yield self.locale, po_file, po_file.with_suffix(".mo") else: - for locale in os.listdir(self.directory): - po_file = os.path.join(self.directory, locale, - 'LC_MESSAGES', f"{domain}.po") - if os.path.exists(po_file): - po_files.append((locale, po_file)) - mo_files.append(os.path.join(self.directory, locale, - 'LC_MESSAGES', - f"{domain}.mo")) + for locale_path in dir_path.iterdir(): + po_file = locale_path / "LC_MESSAGES" / f"{domain}.po" + if po_file.exists(): + yield locale_path.name, po_file, po_file.with_suffix(".mo") else: - po_files.append((self.locale, self.input_file)) + po_file = pathlib.Path(self.input_file) if self.output_file: - mo_files.append(self.output_file) + mo_file = pathlib.Path(self.output_file) else: - mo_files.append(os.path.join(self.directory, self.locale, - 'LC_MESSAGES', - f"{domain}.mo")) + mo_file = ( + pathlib.Path(self.directory) / self.locale / "LC_MESSAGES" / f"{domain}.mo" + ) + yield self.locale, po_file, mo_file - if not po_files: - raise OptionError('no message catalogs found') + def _run_domain(self, domain): + locale_po_mo_triples = list(self._get_po_mo_triples(domain)) + if not locale_po_mo_triples: + raise OptionError(f'no message catalogs found for domain {domain!r}') catalogs_and_errors = {} - for idx, (locale, po_file) in enumerate(po_files): - mo_file = mo_files[idx] + for locale, po_file, mo_file in locale_po_mo_triples: with open(po_file, 'rb') as infile: catalog = read_po(infile, locale) @@ -252,7 +245,10 @@ def _run_domain(self, domain): percentage = translated * 100 // len(catalog) self.log.info( '%d of %d messages (%d%%) translated in %s', - translated, len(catalog), percentage, po_file, + translated, + len(catalog), + percentage, + po_file, ) if catalog.fuzzy and not self.use_fuzzy: @@ -262,9 +258,7 @@ def _run_domain(self, domain): catalogs_and_errors[catalog] = catalog_errors = list(catalog.check()) for message, errors in catalog_errors: for error in errors: - self.log.error( - 'error: %s:%d: %s', po_file, message.lineno, error, - ) + self.log.error('error: %s:%d: %s', po_file, message.lineno, error) self.log.info('compiling catalog %s to %s', po_file, mo_file) @@ -282,9 +276,7 @@ def _make_directory_filter(ignore_patterns): def cli_directory_filter(dirname): basename = os.path.basename(dirname) return not any( - fnmatch.fnmatch(basename, ignore_pattern) - for ignore_pattern - in ignore_patterns + fnmatch.fnmatch(basename, ignore_pattern) for ignore_pattern in ignore_patterns ) return cli_directory_filter @@ -347,10 +339,15 @@ class ExtractMessages(CommandMixin): 'header comment for the catalog'), ('last-translator=', None, 'set the name and email of the last translator in output'), - ] + ] # fmt: skip boolean_options = [ - 'no-default-keywords', 'no-location', 'omit-header', 'no-wrap', - 'sort-output', 'sort-by-file', 'strip-comments', + 'no-default-keywords', + 'no-location', + 'omit-header', + 'no-wrap', + 'sort-output', + 'sort-by-file', + 'strip-comments', ] as_args = 'input-paths' multiple_value_options = ( @@ -435,10 +432,9 @@ def finalize_options(self): if isinstance(self.input_paths, str): self.input_paths = re.split(r',\s*', self.input_paths) elif self.distribution is not None: - self.input_paths = dict.fromkeys([ - k.split('.', 1)[0] - for k in (self.distribution.packages or ()) - ]).keys() + self.input_paths = list( + {k.split('.', 1)[0] for k in (self.distribution.packages or ())}, + ) else: self.input_paths = [] @@ -493,31 +489,40 @@ def callback(filename: str, method: str, options: dict): def run(self): mappings = self._get_mappings() with open(self.output_file, 'wb') as outfile: - catalog = Catalog(project=self.project, - version=self.version, - msgid_bugs_address=self.msgid_bugs_address, - copyright_holder=self.copyright_holder, - charset=self.charset, - header_comment=(self.header_comment or DEFAULT_HEADER), - last_translator=self.last_translator) + catalog = Catalog( + project=self.project, + version=self.version, + msgid_bugs_address=self.msgid_bugs_address, + copyright_holder=self.copyright_holder, + charset=self.charset, + header_comment=(self.header_comment or DEFAULT_HEADER), + last_translator=self.last_translator, + ) for path, method_map, options_map in mappings: callback = self._build_callback(path) if os.path.isfile(path): current_dir = os.getcwd() extracted = check_and_call_extract_file( - path, method_map, options_map, - callback, self.keywords, self.add_comments, - self.strip_comments, current_dir, + path, + method_map, + options_map, + callback=callback, + comment_tags=self.add_comments, + dirpath=current_dir, + keywords=self.keywords, + strip_comment_tags=self.strip_comments, ) else: extracted = extract_from_dir( - path, method_map, options_map, - keywords=self.keywords, - comment_tags=self.add_comments, + path, + method_map, + options_map, callback=callback, - strip_comment_tags=self.strip_comments, + comment_tags=self.add_comments, directory_filter=self.directory_filter, + keywords=self.keywords, + strip_comment_tags=self.strip_comments, ) for filename, lineno, message, comments, context in extracted: if os.path.isfile(path): @@ -525,16 +530,25 @@ def run(self): else: filepath = os.path.normpath(os.path.join(path, filename)) - catalog.add(message, None, [(filepath, lineno)], - auto_comments=comments, context=context) + catalog.add( + message, + None, + [(filepath, lineno)], + auto_comments=comments, + context=context, + ) self.log.info('writing PO template file to %s', self.output_file) - write_po(outfile, catalog, width=self.width, - no_location=self.no_location, - omit_header=self.omit_header, - sort_output=self.sort_output, - sort_by_file=self.sort_by_file, - include_lineno=self.include_lineno) + write_po( + outfile, + catalog, + include_lineno=self.include_lineno, + no_location=self.no_location, + omit_header=self.omit_header, + sort_by_file=self.sort_by_file, + sort_output=self.sort_output, + width=self.width, + ) def _get_mappings(self): mappings = [] @@ -554,7 +568,10 @@ def _get_mappings(self): ) else: with open(self.mapping_file) as fileobj: - method_map, options_map = parse_mapping_cfg(fileobj, filename=self.mapping_file) + method_map, options_map = parse_mapping_cfg( + fileobj, + filename=self.mapping_file, + ) for path in self.input_paths: mappings.append((path, method_map, options_map)) @@ -567,7 +584,7 @@ def _get_mappings(self): method_map, options_map = [], {} for pattern, method, options in mapping: method_map.append((pattern, method)) - options_map[pattern] = options or {} + options_map[pattern] = _parse_string_options(options or {}) mappings.append((path, method_map, options_map)) else: @@ -577,6 +594,23 @@ def _get_mappings(self): return mappings +def _init_catalog(*, input_file, output_file, locale: Locale, width: int) -> None: + with open(input_file, 'rb') as infile: + # Although reading from the catalog template, read_po must be fed + # the locale in order to correctly calculate plurals + catalog = read_po(infile, locale=locale) + + catalog.locale = locale + catalog.revision_date = datetime.datetime.now(LOCALTZ) + catalog.fuzzy = False + + if dirname := os.path.dirname(output_file): + os.makedirs(dirname, exist_ok=True) + + with open(output_file, 'wb') as outfile: + write_po(outfile, catalog, width=width) + + class InitCatalog(CommandMixin): description = 'create a new catalog based on a POT file' user_options = [ @@ -596,7 +630,7 @@ class InitCatalog(CommandMixin): ('no-wrap', None, 'do not break long message lines, longer than the output line width, ' 'into several lines'), - ] + ] # fmt: skip boolean_options = ['no-wrap'] def initialize_options(self): @@ -622,11 +656,9 @@ def finalize_options(self): if not self.output_file and not self.output_dir: raise OptionError('you must specify the output directory') if not self.output_file: - self.output_file = os.path.join(self.output_dir, self.locale, - 'LC_MESSAGES', f"{self.domain}.po") + lc_messages_path = pathlib.Path(self.output_dir) / self.locale / "LC_MESSAGES" + self.output_file = str(lc_messages_path / f"{self.domain}.po") - if not os.path.exists(os.path.dirname(self.output_file)): - os.makedirs(os.path.dirname(self.output_file)) if self.no_wrap and self.width: raise OptionError("'--no-wrap' and '--width' are mutually exclusive") if not self.no_wrap and not self.width: @@ -636,20 +668,16 @@ def finalize_options(self): def run(self): self.log.info( - 'creating catalog %s based on %s', self.output_file, self.input_file, + 'creating catalog %s based on %s', + self.output_file, + self.input_file, + ) + _init_catalog( + input_file=self.input_file, + output_file=self.output_file, + locale=self._locale, + width=self.width, ) - - with open(self.input_file, 'rb') as infile: - # Although reading from the catalog template, read_po must be fed - # the locale in order to correctly calculate plurals - catalog = read_po(infile, locale=self.locale) - - catalog.locale = self._locale - catalog.revision_date = datetime.datetime.now(LOCALTZ) - catalog.fuzzy = False - - with open(self.output_file, 'wb') as outfile: - write_po(outfile, catalog, width=self.width) class UpdateCatalog(CommandMixin): @@ -689,11 +717,17 @@ class UpdateCatalog(CommandMixin): 'would be updated'), ('ignore-pot-creation-date=', None, 'ignore changes to POT-Creation-Date when updating or checking'), - ] + ] # fmt: skip boolean_options = [ - 'omit-header', 'no-wrap', 'ignore-obsolete', 'init-missing', - 'no-fuzzy-matching', 'previous', 'update-header-comment', - 'check', 'ignore-pot-creation-date', + 'omit-header', + 'no-wrap', + 'ignore-obsolete', + 'init-missing', + 'no-fuzzy-matching', + 'previous', + 'update-header-comment', + 'check', + 'ignore-pot-creation-date', ] def initialize_options(self): @@ -724,8 +758,7 @@ def finalize_options(self): if self.init_missing: if not self.locale: raise OptionError( - 'you must specify the locale for ' - 'the init-missing option to work', + 'you must specify the locale for the init-missing option to work', ) try: @@ -744,75 +777,77 @@ def finalize_options(self): if self.no_fuzzy_matching and self.previous: self.previous = False - def run(self): - check_status = {} - po_files = [] + def _get_locale_po_file_tuples(self): if not self.output_file: + output_path = pathlib.Path(self.output_dir) if self.locale: - po_files.append((self.locale, - os.path.join(self.output_dir, self.locale, - 'LC_MESSAGES', - f"{self.domain}.po"))) + lc_messages_path = output_path / self.locale / "LC_MESSAGES" + yield self.locale, str(lc_messages_path / f"{self.domain}.po") else: - for locale in os.listdir(self.output_dir): - po_file = os.path.join(self.output_dir, locale, - 'LC_MESSAGES', - f"{self.domain}.po") - if os.path.exists(po_file): - po_files.append((locale, po_file)) + for locale_path in output_path.iterdir(): + po_file = locale_path / "LC_MESSAGES" / f"{self.domain}.po" + if po_file.exists(): + yield locale_path.stem, po_file else: - po_files.append((self.locale, self.output_file)) - - if not po_files: - raise OptionError('no message catalogs found') + yield self.locale, self.output_file + def run(self): domain = self.domain if not domain: domain = os.path.splitext(os.path.basename(self.input_file))[0] + check_status = {} + locale_po_file_tuples = list(self._get_locale_po_file_tuples()) + + if not locale_po_file_tuples: + raise OptionError(f'no message catalogs found for domain {domain!r}') + with open(self.input_file, 'rb') as infile: template = read_po(infile) - for locale, filename in po_files: + for locale, filename in locale_po_file_tuples: if self.init_missing and not os.path.exists(filename): if self.check: check_status[filename] = False continue self.log.info( - 'creating catalog %s based on %s', filename, self.input_file, + 'creating catalog %s based on %s', + filename, + self.input_file, ) - with open(self.input_file, 'rb') as infile: - # Although reading from the catalog template, read_po must - # be fed the locale in order to correctly calculate plurals - catalog = read_po(infile, locale=self.locale) - - catalog.locale = self._locale - catalog.revision_date = datetime.datetime.now(LOCALTZ) - catalog.fuzzy = False - - with open(filename, 'wb') as outfile: - write_po(outfile, catalog) + _init_catalog( + input_file=self.input_file, + output_file=filename, + locale=self._locale, + width=self.width, + ) self.log.info('updating catalog %s based on %s', filename, self.input_file) with open(filename, 'rb') as infile: catalog = read_po(infile, locale=locale, domain=domain) catalog.update( - template, self.no_fuzzy_matching, + template, + no_fuzzy_matching=self.no_fuzzy_matching, update_header_comment=self.update_header_comment, update_creation_date=not self.ignore_pot_creation_date, ) - tmpname = os.path.join(os.path.dirname(filename), - tempfile.gettempprefix() + - os.path.basename(filename)) + tmpname = os.path.join( + os.path.dirname(filename), + tempfile.gettempprefix() + os.path.basename(filename), + ) try: with open(tmpname, 'wb') as tmpfile: - write_po(tmpfile, catalog, - omit_header=self.omit_header, - ignore_obsolete=self.ignore_obsolete, - include_previous=self.previous, width=self.width) + write_po( + tmpfile, + catalog, + ignore_obsolete=self.ignore_obsolete, + include_previous=self.previous, + omit_header=self.omit_header, + width=self.width, + ) except Exception: os.remove(tmpname) raise @@ -886,19 +921,34 @@ def run(self, argv=None): if argv is None: argv = sys.argv - self.parser = optparse.OptionParser(usage=self.usage % ('command', '[args]'), - version=self.version) + self.parser = optparse.OptionParser( + usage=self.usage % ('command', '[args]'), + version=self.version, + ) self.parser.disable_interspersed_args() self.parser.print_help = self._help - self.parser.add_option('--list-locales', dest='list_locales', - action='store_true', - help="print all known locales and exit") - self.parser.add_option('-v', '--verbose', action='store_const', - dest='loglevel', const=logging.DEBUG, - help='print as much as possible') - self.parser.add_option('-q', '--quiet', action='store_const', - dest='loglevel', const=logging.ERROR, - help='print as little as possible') + self.parser.add_option( + "--list-locales", + dest="list_locales", + action="store_true", + help="print all known locales and exit", + ) + self.parser.add_option( + "-v", + "--verbose", + action="store_const", + dest="loglevel", + const=logging.DEBUG, + help="print as much as possible", + ) + self.parser.add_option( + "-q", + "--quiet", + action="store_const", + dest="loglevel", + const=logging.ERROR, + help="print as little as possible", + ) self.parser.set_defaults(list_locales=False, loglevel=logging.INFO) options, args = self.parser.parse_args(argv[1:]) @@ -913,8 +963,10 @@ def run(self, argv=None): return 0 if not args: - self.parser.error('no valid command or option passed. ' - 'Try the -h/--help option for more information.') + self.parser.error( + "no valid command or option passed. " + "Try the -h/--help option for more information.", + ) cmdname = args[0] if cmdname not in self.commands: @@ -1027,7 +1079,7 @@ def parse_mapping_cfg(fileobj, filename=None): else: method, pattern = (part.strip() for part in section.split(':', 1)) method_map.append((pattern, method)) - options_map[pattern] = dict(parser.items(section)) + options_map[pattern] = _parse_string_options(dict(parser.items(section))) if extractors: for idx, (pattern, method) in enumerate(method_map): @@ -1038,6 +1090,25 @@ def parse_mapping_cfg(fileobj, filename=None): return method_map, options_map +def _parse_string_options(options: dict[str, str]) -> dict[str, Any]: + """ + Parse string-formatted options from a mapping configuration. + + The `keywords` and `add_comments` options are parsed into a canonical + internal format, so they can be merged with global keywords/comment tags + during extraction. + """ + options: dict[str, Any] = options.copy() + + if keywords_val := options.pop("keywords", None): + options['keywords'] = parse_keywords(listify_value(keywords_val)) + + if comments_val := options.pop("add_comments", None): + options['add_comments'] = listify_value(comments_val) + + return options + + def _parse_config_object(config: dict, *, filename="(unknown)"): extractors = {} method_map = [] @@ -1045,40 +1116,78 @@ def _parse_config_object(config: dict, *, filename="(unknown)"): extractors_read = config.get("extractors", {}) if not isinstance(extractors_read, dict): - raise ConfigurationError(f"{filename}: extractors: Expected a dictionary, got {type(extractors_read)!r}") + raise ConfigurationError( + f"{filename}: extractors: Expected a dictionary, got {type(extractors_read)!r}", + ) for method, callable_spec in extractors_read.items(): if not isinstance(method, str): # Impossible via TOML, but could happen with a custom object. - raise ConfigurationError(f"{filename}: extractors: Extraction method must be a string, got {method!r}") + raise ConfigurationError( + f"{filename}: extractors: Extraction method must be a string, got {method!r}", + ) if not isinstance(callable_spec, str): - raise ConfigurationError(f"{filename}: extractors: Callable specification must be a string, got {callable_spec!r}") + raise ConfigurationError( + f"{filename}: extractors: Callable specification must be a string, got {callable_spec!r}", + ) extractors[method] = callable_spec if "mapping" in config: - raise ConfigurationError(f"{filename}: 'mapping' is not a valid key, did you mean 'mappings'?") + raise ConfigurationError( + f"{filename}: 'mapping' is not a valid key, did you mean 'mappings'?", + ) mappings_read = config.get("mappings", []) if not isinstance(mappings_read, list): - raise ConfigurationError(f"{filename}: mappings: Expected a list, got {type(mappings_read)!r}") + raise ConfigurationError( + f"{filename}: mappings: Expected a list, got {type(mappings_read)!r}", + ) for idx, entry in enumerate(mappings_read): if not isinstance(entry, dict): - raise ConfigurationError(f"{filename}: mappings[{idx}]: Expected a dictionary, got {type(entry)!r}") + raise ConfigurationError( + f"{filename}: mappings[{idx}]: Expected a dictionary, got {type(entry)!r}", + ) entry = entry.copy() method = entry.pop("method", None) if not isinstance(method, str): - raise ConfigurationError(f"{filename}: mappings[{idx}]: 'method' must be a string, got {method!r}") + raise ConfigurationError( + f"{filename}: mappings[{idx}]: 'method' must be a string, got {method!r}", + ) method = extractors.get(method, method) # Map the extractor name to the callable now pattern = entry.pop("pattern", None) if not isinstance(pattern, (list, str)): - raise ConfigurationError(f"{filename}: mappings[{idx}]: 'pattern' must be a list or a string, got {pattern!r}") + raise ConfigurationError( + f"{filename}: mappings[{idx}]: 'pattern' must be a list or a string, got {pattern!r}", + ) if not isinstance(pattern, list): pattern = [pattern] + if keywords_val := entry.pop("keywords", None): + if isinstance(keywords_val, str): + entry["keywords"] = parse_keywords(listify_value(keywords_val)) + elif isinstance(keywords_val, list): + entry["keywords"] = parse_keywords(keywords_val) + else: + raise ConfigurationError( + f"{filename}: mappings[{idx}]: 'keywords' must be a string or list, got {keywords_val!r}", + ) + + if comments_val := entry.pop("add_comments", None): + if isinstance(comments_val, str): + entry["add_comments"] = [comments_val] + elif isinstance(comments_val, list): + entry["add_comments"] = comments_val + else: + raise ConfigurationError( + f"{filename}: mappings[{idx}]: 'add_comments' must be a string or list, got {comments_val!r}", + ) + for pat in pattern: if not isinstance(pat, str): - raise ConfigurationError(f"{filename}: mappings[{idx}]: 'pattern' elements must be strings, got {pat!r}") + raise ConfigurationError( + f"{filename}: mappings[{idx}]: 'pattern' elements must be strings, got {pat!r}", + ) method_map.append((pat, method)) options_map[pat] = entry @@ -1115,11 +1224,15 @@ def _parse_mapping_toml( try: babel_data = parsed_data["tool"]["babel"] except (TypeError, KeyError) as e: - raise ConfigurationError(f"{filename}: No 'tool.babel' section found in file") from e + raise ConfigurationError( + f"{filename}: No 'tool.babel' section found in file", + ) from e elif style == "standalone": babel_data = parsed_data if "babel" in babel_data: - raise ConfigurationError(f"{filename}: 'babel' should not be present in a stand-alone configuration file") + raise ConfigurationError( + f"{filename}: 'babel' should not be present in a stand-alone configuration file", + ) else: # pragma: no cover raise ValueError(f"Unknown TOML style {style!r}") @@ -1190,7 +1303,13 @@ def parse_keywords(strings: Iterable[str] = ()): def __getattr__(name: str): # Re-exports for backwards compatibility; # `setuptools_frontend` is the canonical import location. - if name in {'check_message_extractors', 'compile_catalog', 'extract_messages', 'init_catalog', 'update_catalog'}: + if name in { + 'check_message_extractors', + 'compile_catalog', + 'extract_messages', + 'init_catalog', + 'update_catalog', + }: from babel.messages import setuptools_frontend return getattr(setuptools_frontend, name) diff --git a/babel/messages/jslexer.py b/babel/messages/jslexer.py index 5fc4956fd..d751b58f7 100644 --- a/babel/messages/jslexer.py +++ b/babel/messages/jslexer.py @@ -1,13 +1,14 @@ """ - babel.messages.jslexer - ~~~~~~~~~~~~~~~~~~~~~~ +babel.messages.jslexer +~~~~~~~~~~~~~~~~~~~~~~ - A simple JavaScript 1.5 lexer which is used for the JavaScript - extractor. +A simple JavaScript 1.5 lexer which is used for the JavaScript +extractor. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ + from __future__ import annotations import re @@ -19,7 +20,7 @@ '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=', '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')', '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':', -], key=len, reverse=True) +], key=len, reverse=True) # fmt: skip escapes: dict[str, str] = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'} @@ -53,16 +54,20 @@ class Token(NamedTuple): (0x[a-fA-F0-9]+) )''', re.VERBOSE)), ('jsx_tag', re.compile(r'(?:\s]+|/>)', re.I)), # May be mangled in `get_rules` - ('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))), + ('operator', re.compile(r'(%s)' % '|'.join(re.escape(op) for op in operators))), ('template_string', re.compile(r'''`(?:[^`\\]*(?:\\.[^`\\]*)*)`''', re.UNICODE)), ('string', re.compile(r'''( '(?:[^'\\]*(?:\\.[^'\\]*)*)' | "(?:[^"\\]*(?:\\.[^"\\]*)*)" )''', re.VERBOSE | re.DOTALL)), -] +] # fmt: skip -def get_rules(jsx: bool, dotted: bool, template_string: bool) -> list[tuple[str | None, re.Pattern[str]]]: +def get_rules( + jsx: bool, + dotted: bool, + template_string: bool, +) -> list[tuple[str | None, re.Pattern[str]]]: """ Get a tokenization rule list given the passed syntax options. @@ -95,8 +100,9 @@ def unquote_string(string: str) -> str: """Unquote a string with JavaScript rules. The string has to start with string delimiters (``'``, ``"`` or the back-tick/grave accent (for template strings).) """ - assert string and string[0] == string[-1] and string[0] in '"\'`', \ + assert string and string[0] == string[-1] and string[0] in '"\'`', ( 'string provided is not properly delimited' + ) string = line_join_re.sub('\\1', string[1:-1]) result: list[str] = [] add = result.append @@ -158,7 +164,13 @@ def unquote_string(string: str) -> str: return ''.join(result) -def tokenize(source: str, jsx: bool = True, dotted: bool = True, template_string: bool = True, lineno: int = 1) -> Generator[Token, None, None]: +def tokenize( + source: str, + jsx: bool = True, + dotted: bool = True, + template_string: bool = True, + lineno: int = 1, +) -> Generator[Token, None, None]: """ Tokenize JavaScript/JSX source. Returns a generator of tokens. diff --git a/babel/messages/mofile.py b/babel/messages/mofile.py index 3c9fefc4a..1a6fedfcb 100644 --- a/babel/messages/mofile.py +++ b/babel/messages/mofile.py @@ -1,12 +1,13 @@ """ - babel.messages.mofile - ~~~~~~~~~~~~~~~~~~~~~ +babel.messages.mofile +~~~~~~~~~~~~~~~~~~~~~ - Writing of files in the ``gettext`` MO (machine object) format. +Writing of files in the ``gettext`` MO (machine object) format. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ + from __future__ import annotations import array @@ -18,8 +19,8 @@ if TYPE_CHECKING: from _typeshed import SupportsRead, SupportsWrite -LE_MAGIC: int = 0x950412de -BE_MAGIC: int = 0xde120495 +LE_MAGIC: int = 0x950412DE +BE_MAGIC: int = 0xDE120495 def read_mo(fileobj: SupportsRead[bytes]) -> Catalog: @@ -56,9 +57,9 @@ def read_mo(fileobj: SupportsRead[bytes]) -> Catalog: # Now put all messages from the .mo file buffer into the catalog # dictionary for _i in range(msgcount): - mlen, moff = unpack(ii, buf[origidx:origidx + 8]) + mlen, moff = unpack(ii, buf[origidx : origidx + 8]) mend = moff + mlen - tlen, toff = unpack(ii, buf[transidx:transidx + 8]) + tlen, toff = unpack(ii, buf[transidx : transidx + 8]) tend = toff + tlen if mend < buflen and tend < buflen: msg = buf[moff:mend] @@ -116,7 +117,7 @@ def write_mo(fileobj: SupportsWrite[bytes], catalog: Catalog, use_fuzzy: bool = >>> catalog = Catalog(locale='en_US') >>> catalog.add('foo', 'Voh') - >>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz')) + >>> catalog.add(('bar', 'baz'), ('Bahr', 'Batz')) >>> catalog.add('fuz', 'Futz', flags=['fuzzy']) @@ -133,19 +134,19 @@ def write_mo(fileobj: SupportsWrite[bytes], catalog: Catalog, use_fuzzy: bool = ... translations.ugettext = translations.gettext ... translations.ungettext = translations.ngettext >>> translations.ugettext('foo') - u'Voh' + 'Voh' >>> translations.ungettext('bar', 'baz', 1) - u'Bahr' + 'Bahr' >>> translations.ungettext('bar', 'baz', 2) - u'Batz' + 'Batz' >>> translations.ugettext('fuz') - u'fuz' + 'fuz' >>> translations.ugettext('Fizz') - u'Fizz' + 'Fizz' >>> translations.ugettext('Fuzz') - u'Fuzz' + 'Fuzz' >>> translations.ugettext('Fuzzes') - u'Fuzzes' + 'Fuzzes' :param fileobj: the file-like object to write to :param catalog: the `Catalog` instance @@ -153,8 +154,7 @@ def write_mo(fileobj: SupportsWrite[bytes], catalog: Catalog, use_fuzzy: bool = in the output """ messages = list(catalog) - messages[1:] = [m for m in messages[1:] - if m.string and (use_fuzzy or not m.fuzzy)] + messages[1:] = [m for m in messages[1:] if m.string and (use_fuzzy or not m.fuzzy)] messages.sort() ids = strs = b'' @@ -164,24 +164,19 @@ def write_mo(fileobj: SupportsWrite[bytes], catalog: Catalog, use_fuzzy: bool = # For each string, we need size and file offset. Each string is NUL # terminated; the NUL does not count into the size. if message.pluralizable: - msgid = b'\x00'.join([ - msgid.encode(catalog.charset) for msgid in message.id - ]) + msgid = b'\x00'.join(msgid.encode(catalog.charset) for msgid in message.id) msgstrs = [] for idx, string in enumerate(message.string): if not string: msgstrs.append(message.id[min(int(idx), 1)]) else: msgstrs.append(string) - msgstr = b'\x00'.join([ - msgstr.encode(catalog.charset) for msgstr in msgstrs - ]) + msgstr = b'\x00'.join(msgstr.encode(catalog.charset) for msgstr in msgstrs) else: msgid = message.id.encode(catalog.charset) msgstr = message.string.encode(catalog.charset) if message.context: - msgid = b'\x04'.join([message.context.encode(catalog.charset), - msgid]) + msgid = b'\x04'.join([message.context.encode(catalog.charset), msgid]) offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) ids += msgid + b'\x00' strs += msgstr + b'\x00' @@ -200,11 +195,15 @@ def write_mo(fileobj: SupportsWrite[bytes], catalog: Catalog, use_fuzzy: bool = voffsets += [l2, o2 + valuestart] offsets = koffsets + voffsets - fileobj.write(struct.pack('Iiiiiii', - LE_MAGIC, # magic - 0, # version - len(messages), # number of entries - 7 * 4, # start of key index - 7 * 4 + len(messages) * 8, # start of value index - 0, 0, # size and offset of hash table - ) + array.array.tobytes(array.array("i", offsets)) + ids + strs) + header = struct.pack( + 'Iiiiiii', + LE_MAGIC, # magic + 0, # version + len(messages), # number of entries + 7 * 4, # start of key index + 7 * 4 + len(messages) * 8, # start of value index + 0, + 0, # size and offset of hash table + ) + + fileobj.write(header + array.array.tobytes(array.array("i", offsets)) + ids + strs) diff --git a/babel/messages/plurals.py b/babel/messages/plurals.py index da336a7ba..a66fdfe41 100644 --- a/babel/messages/plurals.py +++ b/babel/messages/plurals.py @@ -1,12 +1,13 @@ """ - babel.messages.plurals - ~~~~~~~~~~~~~~~~~~~~~~ +babel.messages.plurals +~~~~~~~~~~~~~~~~~~~~~~ - Plural form definitions. +Plural form definitions. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ + from __future__ import annotations from babel.core import Locale, default_locale @@ -197,7 +198,7 @@ 'xh': (2, '(n != 1)'), # Chinese - From Pootle's PO's (modified) 'zh': (1, '0'), -} +} # fmt: skip DEFAULT_PLURAL: tuple[int, str] = (2, '(n != 1)') diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index 2bb0c7741..b9678a924 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -1,13 +1,14 @@ """ - babel.messages.pofile - ~~~~~~~~~~~~~~~~~~~~~ +babel.messages.pofile +~~~~~~~~~~~~~~~~~~~~~ - Reading and writing of files in the ``gettext`` PO (portable object) - format. +Reading and writing of files in the ``gettext`` PO (portable object) +format. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ + from __future__ import annotations import os @@ -17,7 +18,7 @@ from babel.core import Locale from babel.messages.catalog import Catalog, Message -from babel.util import TextWrapper, _cmp +from babel.util import TextWrapper if TYPE_CHECKING: from typing import IO, AnyStr @@ -25,6 +26,9 @@ from _typeshed import SupportsWrite +_unescape_re = re.compile(r'\\([\\trn"])') + + def unescape(string: str) -> str: r"""Reverse `escape` the given string. @@ -35,6 +39,7 @@ def unescape(string: str) -> str: :param string: the string to unescape """ + def replace_escapes(match): m = match.group(1) if m == 'n': @@ -45,7 +50,10 @@ def replace_escapes(match): return '\r' # m is \ or " return m - return re.compile(r'\\([\\trn"])').sub(replace_escapes, string[1:-1]) + + if "\\" not in string: # Fast path: there's nothing to unescape + return string[1:-1] + return _unescape_re.sub(replace_escapes, string[1:-1]) def denormalize(string: str) -> str: @@ -73,8 +81,7 @@ def denormalize(string: str) -> str: escaped_lines = string.splitlines() if string.startswith('""'): escaped_lines = escaped_lines[1:] - lines = map(unescape, escaped_lines) - return ''.join(lines) + return ''.join(map(unescape, escaped_lines)) else: return unescape(string) @@ -95,14 +102,18 @@ def _extract_locations(line: str) -> list[str]: for c in line: if c == "\u2068": if in_filename: - raise ValueError("location comment contains more First Strong Isolate " - "characters, than Pop Directional Isolate characters") + raise ValueError( + "location comment contains more First Strong Isolate " + "characters, than Pop Directional Isolate characters", + ) in_filename = True continue elif c == "\u2069": if not in_filename: - raise ValueError("location comment contains more Pop Directional Isolate " - "characters, than First Strong Isolate characters") + raise ValueError( + "location comment contains more Pop Directional Isolate " + "characters, than First Strong Isolate characters", + ) in_filename = False continue elif c == " ": @@ -116,8 +127,10 @@ def _extract_locations(line: str) -> list[str]: else: if location: if in_filename: - raise ValueError("location comment contains more First Strong Isolate " - "characters, than Pop Directional Isolate characters") + raise ValueError( + "location comment contains more First Strong Isolate " + "characters, than Pop Directional Isolate characters", + ) locations.append(location) return locations @@ -133,48 +146,14 @@ def __init__(self, message: str, catalog: Catalog, line: str, lineno: int) -> No self.lineno = lineno -class _NormalizedString: - +class _NormalizedString(list): def __init__(self, *args: str) -> None: - self._strs: list[str] = [] - for arg in args: - self.append(arg) - - def append(self, s: str) -> None: - self._strs.append(s.strip()) + super().__init__(map(str.strip, args)) def denormalize(self) -> str: - return ''.join(map(unescape, self._strs)) - - def __bool__(self) -> bool: - return bool(self._strs) - - def __repr__(self) -> str: - return os.linesep.join(self._strs) - - def __cmp__(self, other: object) -> int: - if not other: - return 1 - - return _cmp(str(self), str(other)) - - def __gt__(self, other: object) -> bool: - return self.__cmp__(other) > 0 - - def __lt__(self, other: object) -> bool: - return self.__cmp__(other) < 0 - - def __ge__(self, other: object) -> bool: - return self.__cmp__(other) >= 0 - - def __le__(self, other: object) -> bool: - return self.__cmp__(other) <= 0 - - def __eq__(self, other: object) -> bool: - return self.__cmp__(other) == 0 - - def __ne__(self, other: object) -> bool: - return self.__cmp__(other) != 0 + if not self: + return "" + return ''.join(map(unescape, self)) class PoFileParser: @@ -184,14 +163,12 @@ class PoFileParser: See `read_po` for simple cases. """ - _keywords = [ - 'msgid', - 'msgstr', - 'msgctxt', - 'msgid_plural', - ] - - def __init__(self, catalog: Catalog, ignore_obsolete: bool = False, abort_invalid: bool = False) -> None: + def __init__( + self, + catalog: Catalog, + ignore_obsolete: bool = False, + abort_invalid: bool = False, + ) -> None: self.catalog = catalog self.ignore_obsolete = ignore_obsolete self.counter = 0 @@ -217,25 +194,33 @@ def _add_message(self) -> None: Add a message to the catalog based on the current parser state and clear the state ready to process the next message. """ - self.translations.sort() if len(self.messages) > 1: msgid = tuple(m.denormalize() for m in self.messages) - else: - msgid = self.messages[0].denormalize() - if isinstance(msgid, (list, tuple)): string = ['' for _ in range(self.catalog.num_plurals)] - for idx, translation in self.translations: + for idx, translation in sorted(self.translations): if idx >= self.catalog.num_plurals: - self._invalid_pofile("", self.offset, "msg has more translations than num_plurals of catalog") + self._invalid_pofile( + "", + self.offset, + "msg has more translations than num_plurals of catalog", + ) continue string[idx] = translation.denormalize() string = tuple(string) else: + msgid = self.messages[0].denormalize() string = self.translations[0][1].denormalize() msgctxt = self.context.denormalize() if self.context else None - message = Message(msgid, string, list(self.locations), set(self.flags), - self.auto_comments, self.user_comments, lineno=self.offset + 1, - context=msgctxt) + message = Message( + msgid, + string, + self.locations, + self.flags, + self.auto_comments, + self.user_comments, + lineno=self.offset + 1, + context=msgctxt, + ) if self.obsolete: if not self.ignore_obsolete: self.catalog.obsolete[self.catalog._key_for(msgid, msgctxt)] = message @@ -247,28 +232,24 @@ def _add_message(self) -> None: def _finish_current_message(self) -> None: if self.messages: if not self.translations: - self._invalid_pofile("", self.offset, f"missing msgstr for msgid '{self.messages[0].denormalize()}'") - self.translations.append([0, _NormalizedString("")]) + self._invalid_pofile( + "", + self.offset, + f"missing msgstr for msgid '{self.messages[0].denormalize()}'", + ) + self.translations.append([0, _NormalizedString()]) self._add_message() def _process_message_line(self, lineno, line, obsolete=False) -> None: - if line.startswith('"'): + if not line: + return + if line[0] == '"': self._process_string_continuation_line(line, lineno) else: self._process_keyword_line(lineno, line, obsolete) def _process_keyword_line(self, lineno, line, obsolete=False) -> None: - - for keyword in self._keywords: - try: - if line.startswith(keyword) and line[len(keyword)] in [' ', '[']: - arg = line[len(keyword):] - break - except IndexError: - self._invalid_pofile(line, lineno, "Keyword must be followed by a string") - else: - self._invalid_pofile(line, lineno, "Start of line didn't match any expected keyword.") - return + keyword, _, arg = line.partition(' ') if keyword in ['msgid', 'msgctxt']: self._finish_current_message() @@ -284,19 +265,23 @@ def _process_keyword_line(self, lineno, line, obsolete=False) -> None: self.in_msgctxt = False self.in_msgid = True self.messages.append(_NormalizedString(arg)) + return + + if keyword == 'msgctxt': + self.in_msgctxt = True + self.context = _NormalizedString(arg) + return - elif keyword == 'msgstr': + if keyword == 'msgstr' or keyword.startswith('msgstr['): self.in_msgid = False self.in_msgstr = True - if arg.startswith('['): - idx, msg = arg[1:].split(']', 1) - self.translations.append([int(idx), _NormalizedString(msg)]) - else: - self.translations.append([0, _NormalizedString(arg)]) + kwarg, has_bracket, idxarg = keyword.partition('[') + idx = int(idxarg[:-1]) if has_bracket else 0 + s = _NormalizedString(arg) if arg != '""' else _NormalizedString() + self.translations.append([idx, s]) + return - elif keyword == 'msgctxt': - self.in_msgctxt = True - self.context = _NormalizedString(arg) + self._invalid_pofile(line, lineno, "Unknown or misformatted keyword") def _process_string_continuation_line(self, line, lineno) -> None: if self.in_msgid: @@ -306,51 +291,68 @@ def _process_string_continuation_line(self, line, lineno) -> None: elif self.in_msgctxt: s = self.context else: - self._invalid_pofile(line, lineno, "Got line starting with \" but not in msgid, msgstr or msgctxt") + self._invalid_pofile( + line, + lineno, + "Got line starting with \" but not in msgid, msgstr or msgctxt", + ) return - s.append(line) + # For performance reasons, `NormalizedString` doesn't strip internally + s.append(line.strip()) def _process_comment(self, line) -> None: - self._finish_current_message() - if line[1:].startswith(':'): + prefix = line[:2] + if prefix == '#:': for location in _extract_locations(line[2:]): - pos = location.rfind(':') - if pos >= 0: + a, colon, b = location.rpartition(':') + if colon: try: - lineno = int(location[pos + 1:]) + self.locations.append((a, int(b))) except ValueError: continue - self.locations.append((location[:pos], lineno)) - else: + else: # No line number specified self.locations.append((location, None)) - elif line[1:].startswith(','): - for flag in line[2:].lstrip().split(','): - self.flags.append(flag.strip()) - elif line[1:].startswith('.'): + return + + if prefix == '#,': + self.flags.extend(flag.strip() for flag in line[2:].lstrip().split(',')) + return + + if prefix == '#.': # These are called auto-comments comment = line[2:].strip() if comment: # Just check that we're not adding empty comments self.auto_comments.append(comment) - else: - # These are called user comments - self.user_comments.append(line[1:].strip()) + return + + # These are called user comments + self.user_comments.append(line[1:].strip()) def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None: """ - Reads from the file-like object `fileobj` and adds any po file - units found in it to the `Catalog` supplied to the constructor. + Reads from the file-like object (or iterable of string-likes) `fileobj` + and adds any po file units found in it to the `Catalog` + supplied to the constructor. + + All of the items in the iterable must be the same type; either `str` + or `bytes` (decoded with the catalog charset), but not a mixture. """ + needs_decode = None for lineno, line in enumerate(fileobj): line = line.strip() - if not isinstance(line, str): - line = line.decode(self.catalog.charset) + if needs_decode is None: + # If we don't yet know whether we need to decode, + # let's find out now. + needs_decode = not isinstance(line, str) if not line: continue - if line.startswith('#'): - if line[1:].startswith('~'): + if needs_decode: + line = line.decode(self.catalog.charset) + if line[0] == '#': + if line[:2] == '#~': self._process_message_line(lineno, line[2:].lstrip(), obsolete=True) else: try: @@ -365,8 +367,8 @@ def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None: # No actual messages found, but there was some info in comments, from which # we'll construct an empty header message if not self.counter and (self.flags or self.user_comments or self.auto_comments): - self.messages.append(_NormalizedString('""')) - self.translations.append([0, _NormalizedString('""')]) + self.messages.append(_NormalizedString()) + self.translations.append([0, _NormalizedString()]) self._add_message() def _invalid_pofile(self, line, lineno, msg) -> None: @@ -412,12 +414,12 @@ def read_po( ... print((message.id, message.string)) ... print(' ', (message.locations, sorted(list(message.flags)))) ... print(' ', (message.user_comments, message.auto_comments)) - (u'foo %(name)s', u'quux %(name)s') - ([(u'main.py', 1)], [u'fuzzy', u'python-format']) + ('foo %(name)s', 'quux %(name)s') + ([('main.py', 1)], ['fuzzy', 'python-format']) ([], []) - ((u'bar', u'baz'), (u'bar', u'baaz')) - ([(u'main.py', 3)], []) - ([u'A user comment'], [u'An auto comment']) + (('bar', 'baz'), ('bar', 'baaz')) + ([('main.py', 3)], []) + (['A user comment'], ['An auto comment']) .. versionadded:: 1.0 Added support for explicit charset argument. @@ -437,11 +439,13 @@ def read_po( return catalog -WORD_SEP = re.compile('(' - r'\s+|' # any whitespace - r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words - r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash - ')') +WORD_SEP = re.compile( + '(' + r'\s+|' # any whitespace + r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words + r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash + ')', +) def escape(string: str) -> str: @@ -455,11 +459,10 @@ def escape(string: str) -> str: :param string: the string to escape """ - return '"%s"' % string.replace('\\', '\\\\') \ - .replace('\t', '\\t') \ - .replace('\r', '\\r') \ - .replace('\n', '\\n') \ - .replace('\"', '\\"') + return '"%s"' % string.replace('\\', '\\\\').replace('\t', '\\t').replace( + '\r', + '\\r', + ).replace('\n', '\\n').replace('"', '\\"') def normalize(string: str, prefix: str = '', width: int = 76) -> str: @@ -556,10 +559,10 @@ def write_po( message catalog to the provided file-like object. >>> catalog = Catalog() - >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)], + >>> catalog.add('foo %(name)s', locations=[('main.py', 1)], ... flags=('fuzzy',)) - >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)]) + >>> catalog.add(('bar', 'baz'), locations=[('main.py', 3)]) >>> from io import BytesIO >>> buf = BytesIO() @@ -687,8 +690,10 @@ def _format_message(message, prefix=''): # if no sorting possible, leave unsorted. # (see issue #606) try: - locations = sorted(message.locations, - key=lambda x: (x[0], isinstance(x[1], int) and x[1] or -1)) + locations = sorted( + message.locations, + key=lambda x: (x[0], isinstance(x[1], int) and x[1] or -1), + ) except TypeError: # e.g. "TypeError: unorderable types: NoneType() < int()" locations = message.locations @@ -726,7 +731,10 @@ def _format_message(message, prefix=''): yield '\n' -def _sort_messages(messages: Iterable[Message], sort_by: Literal["message", "location"] | None) -> list[Message]: +def _sort_messages( + messages: Iterable[Message], + sort_by: Literal["message", "location"] | None, +) -> list[Message]: """ Sort the given message iterable by the given criteria. diff --git a/babel/numbers.py b/babel/numbers.py index 2737a7076..2ef9031aa 100644 --- a/babel/numbers.py +++ b/babel/numbers.py @@ -1,20 +1,21 @@ """ - babel.numbers - ~~~~~~~~~~~~~ +babel.numbers +~~~~~~~~~~~~~ - Locale dependent formatting and parsing of numeric data. +Locale dependent formatting and parsing of numeric data. - The default locale for the functions in this module is determined by the - following environment variables, in that order: +The default locale for the functions in this module is determined by the +following environment variables, in that order: - * ``LC_MONETARY`` for currency related functions, - * ``LC_NUMERIC``, and - * ``LC_ALL``, and - * ``LANG`` + * ``LC_MONETARY`` for currency related functions, + * ``LC_NUMERIC``, and + * ``LC_ALL``, and + * ``LANG`` - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ + # TODO: # Padding and rounding increments in pattern: # - https://www.unicode.org/reports/tr35/ (Appendix G.6) @@ -34,8 +35,7 @@ class UnknownCurrencyError(Exception): - """Exception thrown when a currency is requested for which no data is available. - """ + """Exception thrown when a currency is requested for which no data is available.""" def __init__(self, identifier: str) -> None: """Create the exception. @@ -48,7 +48,7 @@ def __init__(self, identifier: str) -> None: def list_currencies(locale: Locale | str | None = None) -> set[str]: - """ Return a `set` of normalized currency codes. + """Return a `set` of normalized currency codes. .. versionadded:: 2.5.0 @@ -64,7 +64,7 @@ def list_currencies(locale: Locale | str | None = None) -> set[str]: def validate_currency(currency: str, locale: Locale | str | None = None) -> None: - """ Check the currency code is recognized by Babel. + """Check the currency code is recognized by Babel. Accepts a ``locale`` parameter for fined-grained validation, working as the one defined above in ``list_currencies()`` method. @@ -76,7 +76,7 @@ def validate_currency(currency: str, locale: Locale | str | None = None) -> None def is_currency(currency: str, locale: Locale | str | None = None) -> bool: - """ Returns `True` only if a currency is recognized by Babel. + """Returns `True` only if a currency is recognized by Babel. This method always return a Boolean and never raise. """ @@ -112,7 +112,7 @@ def get_currency_name( """Return the name used by the locale for the specified currency. >>> get_currency_name('USD', locale='en_US') - u'US Dollar' + 'US Dollar' .. versionadded:: 0.9.4 @@ -142,7 +142,7 @@ def get_currency_symbol(currency: str, locale: Locale | str | None = None) -> st """Return the symbol used by the locale for the specified currency. >>> get_currency_symbol('USD', locale='en_US') - u'$' + '$' :param currency: the currency code. :param locale: the `Locale` object or locale identifier. @@ -178,7 +178,7 @@ def get_currency_unit_pattern( name should be substituted. >>> get_currency_unit_pattern('USD', locale='en_US', count=10) - u'{0} {1}' + '{0} {1}' .. versionadded:: 2.7.0 @@ -208,8 +208,7 @@ def get_territory_currencies( tender: bool = ..., non_tender: bool = ..., include_details: Literal[False] = ..., -) -> list[str]: - ... # pragma: no cover +) -> list[str]: ... # pragma: no cover @overload @@ -220,8 +219,7 @@ def get_territory_currencies( tender: bool = ..., non_tender: bool = ..., include_details: Literal[True] = ..., -) -> list[dict[str, Any]]: - ... # pragma: no cover +) -> list[dict[str, Any]]: ... # pragma: no cover def get_territory_currencies( @@ -295,8 +293,7 @@ def get_territory_currencies( # TODO: validate that the territory exists def _is_active(start, end): - return (start is None or start <= end_date) and \ - (end is None or end >= start_date) + return (start is None or start <= end_date) and (end is None or end >= start_date) result = [] for currency_code, start, end, is_tender in curs: @@ -304,22 +301,29 @@ def _is_active(start, end): start = datetime.date(*start) if end: end = datetime.date(*end) - if ((is_tender and tender) or - (not is_tender and non_tender)) and _is_active(start, end): + if ((is_tender and tender) or (not is_tender and non_tender)) and _is_active( + start, + end, + ): if include_details: - result.append({ - 'currency': currency_code, - 'from': start, - 'to': end, - 'tender': is_tender, - }) + result.append( + { + 'currency': currency_code, + 'from': start, + 'to': end, + 'tender': is_tender, + }, + ) else: result.append(currency_code) return result -def _get_numbering_system(locale: Locale, numbering_system: Literal["default"] | str = "latn") -> str: +def _get_numbering_system( + locale: Locale, + numbering_system: Literal["default"] | str = "latn", +) -> str: if numbering_system == "default": return locale.default_numbering_system else: @@ -335,11 +339,14 @@ def _get_number_symbols( try: return locale.number_symbols[numbering_system] except KeyError as error: - raise UnsupportedNumberingSystemError(f"Unknown numbering system {numbering_system} for Locale {locale}.") from error + raise UnsupportedNumberingSystemError( + f"Unknown numbering system {numbering_system} for Locale {locale}.", + ) from error class UnsupportedNumberingSystemError(Exception): """Exception thrown when an unsupported numbering system is requested for the given Locale.""" + pass @@ -351,11 +358,11 @@ def get_decimal_symbol( """Return the symbol used by the locale to separate decimal fractions. >>> get_decimal_symbol('en_US') - u'.' + '.' >>> get_decimal_symbol('ar_EG', numbering_system='default') - u'٫' + '٫' >>> get_decimal_symbol('ar_EG', numbering_system='latn') - u'.' + '.' :param locale: the `Locale` object or locale identifier. Defaults to the system numeric locale. :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn". @@ -374,11 +381,11 @@ def get_plus_sign_symbol( """Return the plus sign symbol used by the current locale. >>> get_plus_sign_symbol('en_US') - u'+' + '+' >>> get_plus_sign_symbol('ar_EG', numbering_system='default') - u'\u061c+' + '\\u061c+' >>> get_plus_sign_symbol('ar_EG', numbering_system='latn') - u'\u200e+' + '\\u200e+' :param locale: the `Locale` object or locale identifier. Defaults to the system numeric locale. :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn". @@ -397,11 +404,11 @@ def get_minus_sign_symbol( """Return the plus sign symbol used by the current locale. >>> get_minus_sign_symbol('en_US') - u'-' + '-' >>> get_minus_sign_symbol('ar_EG', numbering_system='default') - u'\u061c-' + '\\u061c-' >>> get_minus_sign_symbol('ar_EG', numbering_system='latn') - u'\u200e-' + '\\u200e-' :param locale: the `Locale` object or locale identifier. Defaults to the system numeric locale. :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn". @@ -420,11 +427,11 @@ def get_exponential_symbol( """Return the symbol used by the locale to separate mantissa and exponent. >>> get_exponential_symbol('en_US') - u'E' + 'E' >>> get_exponential_symbol('ar_EG', numbering_system='default') - u'أس' + 'أس' >>> get_exponential_symbol('ar_EG', numbering_system='latn') - u'E' + 'E' :param locale: the `Locale` object or locale identifier. Defaults to the system numeric locale. :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn". @@ -432,7 +439,7 @@ def get_exponential_symbol( :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale. """ locale = Locale.parse(locale or LC_NUMERIC) - return _get_number_symbols(locale, numbering_system=numbering_system).get('exponential', 'E') + return _get_number_symbols(locale, numbering_system=numbering_system).get('exponential', 'E') # fmt: skip def get_group_symbol( @@ -443,11 +450,11 @@ def get_group_symbol( """Return the symbol used by the locale to separate groups of thousands. >>> get_group_symbol('en_US') - u',' + ',' >>> get_group_symbol('ar_EG', numbering_system='default') - u'٬' + '٬' >>> get_group_symbol('ar_EG', numbering_system='latn') - u',' + ',' :param locale: the `Locale` object or locale identifier. Defaults to the system numeric locale. :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn". @@ -466,11 +473,11 @@ def get_infinity_symbol( """Return the symbol used by the locale to represent infinity. >>> get_infinity_symbol('en_US') - u'∞' + '∞' >>> get_infinity_symbol('ar_EG', numbering_system='default') - u'∞' + '∞' >>> get_infinity_symbol('ar_EG', numbering_system='latn') - u'∞' + '∞' :param locale: the `Locale` object or locale identifier. Defaults to the system numeric locale. :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn". @@ -481,13 +488,16 @@ def get_infinity_symbol( return _get_number_symbols(locale, numbering_system=numbering_system).get('infinity', '∞') -def format_number(number: float | decimal.Decimal | str, locale: Locale | str | None = None) -> str: +def format_number( + number: float | decimal.Decimal | str, + locale: Locale | str | None = None, +) -> str: """Return the given number formatted for a specific locale. >>> format_number(1099, locale='en_US') # doctest: +SKIP - u'1,099' + '1,099' >>> format_number(1099, locale='de_DE') # doctest: +SKIP - u'1.099' + '1.099' .. deprecated:: 2.6.0 @@ -498,7 +508,11 @@ def format_number(number: float | decimal.Decimal | str, locale: Locale | str | """ - warnings.warn('Use babel.numbers.format_decimal() instead.', DeprecationWarning, stacklevel=2) + warnings.warn( + 'Use babel.numbers.format_decimal() instead.', + DeprecationWarning, + stacklevel=2, + ) return format_decimal(number, locale=locale) @@ -534,38 +548,38 @@ def format_decimal( """Return the given decimal number formatted for a specific locale. >>> format_decimal(1.2345, locale='en_US') - u'1.234' + '1.234' >>> format_decimal(1.2346, locale='en_US') - u'1.235' + '1.235' >>> format_decimal(-1.2346, locale='en_US') - u'-1.235' + '-1.235' >>> format_decimal(1.2345, locale='sv_SE') - u'1,234' + '1,234' >>> format_decimal(1.2345, locale='de') - u'1,234' + '1,234' >>> format_decimal(1.2345, locale='ar_EG', numbering_system='default') - u'1٫234' + '1٫234' >>> format_decimal(1.2345, locale='ar_EG', numbering_system='latn') - u'1.234' + '1.234' The appropriate thousands grouping and the decimal separator are used for each locale: >>> format_decimal(12345.5, locale='en_US') - u'12,345.5' + '12,345.5' By default the locale is allowed to truncate and round a high-precision number by forcing its format pattern onto the decimal part. You can bypass this behavior with the `decimal_quantization` parameter: >>> format_decimal(1.2346, locale='en_US') - u'1.235' + '1.235' >>> format_decimal(1.2346, locale='en_US', decimal_quantization=False) - u'1.2346' + '1.2346' >>> format_decimal(12345.67, locale='fr_CA', group_separator=False) - u'12345,67' + '12345,67' >>> format_decimal(12345.67, locale='en_US', group_separator=True) - u'12,345.67' + '12,345.67' :param number: the number to format :param format: @@ -583,7 +597,12 @@ def format_decimal( format = locale.decimal_formats[format] pattern = parse_pattern(format) return pattern.apply( - number, locale, decimal_quantization=decimal_quantization, group_separator=group_separator, numbering_system=numbering_system) + number, + locale, + decimal_quantization=decimal_quantization, + group_separator=group_separator, + numbering_system=numbering_system, + ) def format_compact_decimal( @@ -597,19 +616,19 @@ def format_compact_decimal( """Return the given decimal number formatted for a specific locale in compact form. >>> format_compact_decimal(12345, format_type="short", locale='en_US') - u'12K' + '12K' >>> format_compact_decimal(12345, format_type="long", locale='en_US') - u'12 thousand' + '12 thousand' >>> format_compact_decimal(12345, format_type="short", locale='en_US', fraction_digits=2) - u'12.34K' + '12.34K' >>> format_compact_decimal(1234567, format_type="short", locale="ja_JP") - u'123万' + '123万' >>> format_compact_decimal(2345678, format_type="long", locale="mk") - u'2 милиони' + '2 милиони' >>> format_compact_decimal(21000000, format_type="long", locale="mk") - u'21 милион' + '21 милион' >>> format_compact_decimal(12345, format_type="short", locale='ar_EG', fraction_digits=2, numbering_system='default') - u'12٫34\xa0ألف' + '12٫34\\xa0ألف' :param number: the number to format :param format_type: Compact format to use ("short" or "long") @@ -626,7 +645,12 @@ def format_compact_decimal( if format is None: format = locale.decimal_formats[None] pattern = parse_pattern(format) - return pattern.apply(number, locale, decimal_quantization=False, numbering_system=numbering_system) + return pattern.apply( + number, + locale, + decimal_quantization=False, + numbering_system=numbering_system, + ) def _get_compact_format( @@ -654,7 +678,10 @@ def _get_compact_format( break # otherwise, we need to divide the number by the magnitude but remove zeros # equal to the number of 0's in the pattern minus 1 - number = cast(decimal.Decimal, number / (magnitude // (10 ** (pattern.count("0") - 1)))) + number = cast( + decimal.Decimal, + number / (magnitude // (10 ** (pattern.count("0") - 1))), + ) # round to the number of fraction digits requested rounded = round(number, fraction_digits) # if the remaining number is singular, use the singular format @@ -663,6 +690,8 @@ def _get_compact_format( plural_form = "other" if number == 1 and "1" in compact_format: plural_form = "1" + if str(magnitude) not in compact_format[plural_form]: + plural_form = "other" # fall back to other as the implicit default format = compact_format[plural_form][str(magnitude)] number = rounded break @@ -690,43 +719,43 @@ def format_currency( >>> format_currency(1099.98, 'USD', locale='en_US') '$1,099.98' >>> format_currency(1099.98, 'USD', locale='es_CO') - u'US$1.099,98' + 'US$1.099,98' >>> format_currency(1099.98, 'EUR', locale='de_DE') - u'1.099,98\\xa0\\u20ac' + '1.099,98\\xa0\\u20ac' >>> format_currency(1099.98, 'EGP', locale='ar_EG', numbering_system='default') - u'\u200f1٬099٫98\xa0ج.م.\u200f' + '\\u200f1٬099٫98\\xa0ج.م.\\u200f' The format can also be specified explicitly. The currency is placed with the '¤' sign. As the sign gets repeated the format expands (¤ being the symbol, ¤¤ is the currency abbreviation and ¤¤¤ is the full name of the currency): - >>> format_currency(1099.98, 'EUR', u'\xa4\xa4 #,##0.00', locale='en_US') - u'EUR 1,099.98' - >>> format_currency(1099.98, 'EUR', u'#,##0.00 \xa4\xa4\xa4', locale='en_US') - u'1,099.98 euros' + >>> format_currency(1099.98, 'EUR', '\\xa4\\xa4 #,##0.00', locale='en_US') + 'EUR 1,099.98' + >>> format_currency(1099.98, 'EUR', '#,##0.00 \\xa4\\xa4\\xa4', locale='en_US') + '1,099.98 euros' Currencies usually have a specific number of decimal digits. This function favours that information over the given format: >>> format_currency(1099.98, 'JPY', locale='en_US') - u'\\xa51,100' - >>> format_currency(1099.98, 'COP', u'#,##0.00', locale='es_ES') - u'1.099,98' + '\\xa51,100' + >>> format_currency(1099.98, 'COP', '#,##0.00', locale='es_ES') + '1.099,98' However, the number of decimal digits can be overridden from the currency information, by setting the last parameter to ``False``: >>> format_currency(1099.98, 'JPY', locale='en_US', currency_digits=False) - u'\\xa51,099.98' - >>> format_currency(1099.98, 'COP', u'#,##0.00', locale='es_ES', currency_digits=False) - u'1.099,98' + '\\xa51,099.98' + >>> format_currency(1099.98, 'COP', '#,##0.00', locale='es_ES', currency_digits=False) + '1.099,98' If a format is not specified the type of currency format to use from the locale can be specified: >>> format_currency(1099.98, 'EUR', locale='en_US', format_type='standard') - u'\\u20ac1,099.98' + '\\u20ac1,099.98' When the given currency format type is not available, an exception is raised: @@ -737,30 +766,30 @@ def format_currency( UnknownCurrencyFormatError: "'unknown' is not a known currency format type" >>> format_currency(101299.98, 'USD', locale='en_US', group_separator=False) - u'$101299.98' + '$101299.98' >>> format_currency(101299.98, 'USD', locale='en_US', group_separator=True) - u'$101,299.98' + '$101,299.98' You can also pass format_type='name' to use long display names. The order of the number and currency name, along with the correct localized plural form of the currency name, is chosen according to locale: >>> format_currency(1, 'USD', locale='en_US', format_type='name') - u'1.00 US dollar' + '1.00 US dollar' >>> format_currency(1099.98, 'USD', locale='en_US', format_type='name') - u'1,099.98 US dollars' + '1,099.98 US dollars' >>> format_currency(1099.98, 'USD', locale='ee', format_type='name') - u'us ga dollar 1,099.98' + 'us ga dollar 1,099.98' By default the locale is allowed to truncate and round a high-precision number by forcing its format pattern onto the decimal part. You can bypass this behavior with the `decimal_quantization` parameter: >>> format_currency(1099.9876, 'USD', locale='en_US') - u'$1,099.99' + '$1,099.99' >>> format_currency(1099.9876, 'USD', locale='en_US', decimal_quantization=False) - u'$1,099.9876' + '$1,099.9876' :param number: the number to format :param currency: the currency code @@ -797,11 +826,19 @@ def format_currency( try: pattern = locale.currency_formats[format_type] except KeyError: - raise UnknownCurrencyFormatError(f"{format_type!r} is not a known currency format type") from None + raise UnknownCurrencyFormatError( + f"{format_type!r} is not a known currency format type", + ) from None return pattern.apply( - number, locale, currency=currency, currency_digits=currency_digits, - decimal_quantization=decimal_quantization, group_separator=group_separator, numbering_system=numbering_system) + number, + locale, + currency=currency, + currency_digits=currency_digits, + decimal_quantization=decimal_quantization, + group_separator=group_separator, + numbering_system=numbering_system, + ) def _format_currency_long_name( @@ -839,8 +876,14 @@ def _format_currency_long_name( pattern = parse_pattern(format) number_part = pattern.apply( - number, locale, currency=currency, currency_digits=currency_digits, - decimal_quantization=decimal_quantization, group_separator=group_separator, numbering_system=numbering_system) + number, + locale, + currency=currency, + currency_digits=currency_digits, + decimal_quantization=decimal_quantization, + group_separator=group_separator, + numbering_system=numbering_system, + ) return unit_pattern.format(number_part, display_name) @@ -857,11 +900,11 @@ def format_compact_currency( """Format a number as a currency value in compact form. >>> format_compact_currency(12345, 'USD', locale='en_US') - u'$12K' + '$12K' >>> format_compact_currency(123456789, 'USD', locale='en_US', fraction_digits=2) - u'$123.46M' + '$123.46M' >>> format_compact_currency(123456789, 'EUR', locale='de_DE', fraction_digits=1) - '123,5\xa0Mio.\xa0€' + '123,5\\xa0Mio.\\xa0€' :param number: the number to format :param currency: the currency code @@ -877,7 +920,9 @@ def format_compact_currency( try: compact_format = locale.compact_currency_formats[format_type] except KeyError as error: - raise UnknownCurrencyFormatError(f"{format_type!r} is not a known compact currency format type") from error + raise UnknownCurrencyFormatError( + f"{format_type!r} is not a known compact currency format type", + ) from error number, format = _get_compact_format(number, compact_format, locale, fraction_digits) # Did not find a format, fall back. if format is None or "¤" not in str(format): @@ -894,8 +939,14 @@ def format_compact_currency( if format is None: raise ValueError('No compact currency format found for the given number and locale.') pattern = parse_pattern(format) - return pattern.apply(number, locale, currency=currency, currency_digits=False, decimal_quantization=False, - numbering_system=numbering_system) + return pattern.apply( + number, + locale, + currency=currency, + currency_digits=False, + decimal_quantization=False, + numbering_system=numbering_system, + ) def format_percent( @@ -910,33 +961,33 @@ def format_percent( """Return formatted percent value for a specific locale. >>> format_percent(0.34, locale='en_US') - u'34%' + '34%' >>> format_percent(25.1234, locale='en_US') - u'2,512%' + '2,512%' >>> format_percent(25.1234, locale='sv_SE') - u'2\\xa0512\\xa0%' + '2\\xa0512\\xa0%' >>> format_percent(25.1234, locale='ar_EG', numbering_system='default') - u'2٬512%' + '2٬512%' The format pattern can also be specified explicitly: - >>> format_percent(25.1234, u'#,##0\u2030', locale='en_US') - u'25,123\u2030' + >>> format_percent(25.1234, '#,##0\\u2030', locale='en_US') + '25,123‰' By default the locale is allowed to truncate and round a high-precision number by forcing its format pattern onto the decimal part. You can bypass this behavior with the `decimal_quantization` parameter: >>> format_percent(23.9876, locale='en_US') - u'2,399%' + '2,399%' >>> format_percent(23.9876, locale='en_US', decimal_quantization=False) - u'2,398.76%' + '2,398.76%' >>> format_percent(229291.1234, locale='pt_BR', group_separator=False) - u'22929112%' + '22929112%' >>> format_percent(229291.1234, locale='pt_BR', group_separator=True) - u'22.929.112%' + '22.929.112%' :param number: the percent number to format :param format: @@ -954,7 +1005,10 @@ def format_percent( format = locale.percent_formats[None] pattern = parse_pattern(format) return pattern.apply( - number, locale, decimal_quantization=decimal_quantization, group_separator=group_separator, + number, + locale, + decimal_quantization=decimal_quantization, + group_separator=group_separator, numbering_system=numbering_system, ) @@ -970,23 +1024,23 @@ def format_scientific( """Return value formatted in scientific notation for a specific locale. >>> format_scientific(10000, locale='en_US') - u'1E4' + '1E4' >>> format_scientific(10000, locale='ar_EG', numbering_system='default') - u'1أس4' + '1أس4' The format pattern can also be specified explicitly: - >>> format_scientific(1234567, u'##0.##E00', locale='en_US') - u'1.23E06' + >>> format_scientific(1234567, '##0.##E00', locale='en_US') + '1.23E06' By default the locale is allowed to truncate and round a high-precision number by forcing its format pattern onto the decimal part. You can bypass this behavior with the `decimal_quantization` parameter: - >>> format_scientific(1234.9876, u'#.##E0', locale='en_US') - u'1.23E3' - >>> format_scientific(1234.9876, u'#.##E0', locale='en_US', decimal_quantization=False) - u'1.2349876E3' + >>> format_scientific(1234.9876, '#.##E0', locale='en_US') + '1.23E3' + >>> format_scientific(1234.9876, '#.##E0', locale='en_US', decimal_quantization=False) + '1.2349876E3' :param number: the number to format :param format: @@ -1002,7 +1056,11 @@ def format_scientific( format = locale.scientific_formats[None] pattern = parse_pattern(format) return pattern.apply( - number, locale, decimal_quantization=decimal_quantization, numbering_system=numbering_system) + number, + locale, + decimal_quantization=decimal_quantization, + numbering_system=numbering_system, + ) class NumberFormatError(ValueError): @@ -1054,9 +1112,12 @@ def parse_number( group_symbol = get_group_symbol(locale, numbering_system=numbering_system) if ( - group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space, - group_symbol not in string and # and the string to be parsed does not contain it, - SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead, + # if the grouping symbol is a kind of space, + group_symbol in SPACE_CHARS + # and the string to be parsed does not contain it, + and group_symbol not in string + # but it does contain any other kind of space instead, + and SPACE_CHARS_RE.search(string) ): # ... it's reasonable to assume it is taking the place of the grouping symbol. string = SPACE_CHARS_RE.sub(group_symbol, string) @@ -1120,24 +1181,30 @@ def parse_decimal( decimal_symbol = get_decimal_symbol(locale, numbering_system=numbering_system) if not strict and ( - group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space, - group_symbol not in string and # and the string to be parsed does not contain it, - SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead, + group_symbol in SPACE_CHARS # if the grouping symbol is a kind of space, + and group_symbol not in string # and the string to be parsed does not contain it, + # but it does contain any other kind of space instead, + and SPACE_CHARS_RE.search(string) ): # ... it's reasonable to assume it is taking the place of the grouping symbol. string = SPACE_CHARS_RE.sub(group_symbol, string) try: - parsed = decimal.Decimal(string.replace(group_symbol, '') - .replace(decimal_symbol, '.')) + parsed = decimal.Decimal(string.replace(group_symbol, '').replace(decimal_symbol, '.')) except decimal.InvalidOperation as exc: raise NumberFormatError(f"{string!r} is not a valid decimal number") from exc if strict and group_symbol in string: - proper = format_decimal(parsed, locale=locale, decimal_quantization=False, numbering_system=numbering_system) - if string != proper and proper != _remove_trailing_zeros_after_decimal(string, decimal_symbol): + proper = format_decimal( + parsed, + locale=locale, + decimal_quantization=False, + numbering_system=numbering_system, + ) + if string != proper and proper != _remove_trailing_zeros_after_decimal(string, decimal_symbol): # fmt: skip try: - parsed_alt = decimal.Decimal(string.replace(decimal_symbol, '') - .replace(group_symbol, '.')) + parsed_alt = decimal.Decimal( + string.replace(decimal_symbol, '').replace(group_symbol, '.'), + ) except decimal.InvalidOperation as exc: raise NumberFormatError( f"{string!r} is not a properly formatted decimal number. " @@ -1201,14 +1268,11 @@ def _remove_trailing_zeros_after_decimal(string: str, decimal_symbol: str) -> st return string -PREFIX_END = r'[^0-9@#.,]' -NUMBER_TOKEN = r'[0-9@#.,E+]' - -PREFIX_PATTERN = r"(?P(?:'[^']*'|%s)*)" % PREFIX_END -NUMBER_PATTERN = r"(?P%s*)" % NUMBER_TOKEN -SUFFIX_PATTERN = r"(?P.*)" - -number_re = re.compile(f"{PREFIX_PATTERN}{NUMBER_PATTERN}{SUFFIX_PATTERN}") +_number_pattern_re = re.compile( + r"(?P(?:[^'0-9@#.,]|'[^']*')*)" + r"(?P[0-9@#.,E+]*)" + r"(?P.*)", +) def parse_grouping(p: str) -> tuple[int, int]: @@ -1226,7 +1290,7 @@ def parse_grouping(p: str) -> tuple[int, int]: if g1 == -1: return 1000, 1000 g1 = width - g1 - 1 - g2 = p[:-g1 - 1].rfind(',') + g2 = p[: -g1 - 1].rfind(',') if g2 == -1: return g1, g1 g2 = width - g1 - g2 - 2 @@ -1239,7 +1303,7 @@ def parse_pattern(pattern: NumberPattern | str) -> NumberPattern: return pattern def _match_number(pattern): - rv = number_re.search(pattern) + rv = _number_pattern_re.search(pattern) if rv is None: raise ValueError(f"Invalid number pattern {pattern!r}") return rv.groups() @@ -1292,14 +1356,20 @@ def parse_precision(p): exp_plus = None exp_prec = None grouping = parse_grouping(integer) - return NumberPattern(pattern, (pos_prefix, neg_prefix), - (pos_suffix, neg_suffix), grouping, - int_prec, frac_prec, - exp_prec, exp_plus, number) + return NumberPattern( + pattern, + (pos_prefix, neg_prefix), + (pos_suffix, neg_suffix), + grouping, + int_prec, + frac_prec, + exp_prec, + exp_plus, + number, + ) class NumberPattern: - def __init__( self, pattern: str, @@ -1348,8 +1418,7 @@ def scientific_notation_elements( *, numbering_system: Literal["default"] | str = "latn", ) -> tuple[decimal.Decimal, int, str]: - """ Returns normalized scientific notation components of a value. - """ + """Returns normalized scientific notation components of a value.""" # Normalize value to only have one lead digit. exp = value.adjusted() value = value * get_decimal_quantum(exp) @@ -1426,7 +1495,11 @@ def apply( # Prepare scientific notation metadata. if self.exp_prec: - value, exp, exp_sign = self.scientific_notation_elements(value, locale, numbering_system=numbering_system) + value, exp, exp_sign = self.scientific_notation_elements( + value, + locale, + numbering_system=numbering_system, + ) # Adjust the precision of the fractional part and force it to the # currency's if necessary. @@ -1439,7 +1512,7 @@ def apply( ) frac_prec = force_frac elif currency and currency_digits: - frac_prec = (get_currency_precision(currency), ) * 2 + frac_prec = (get_currency_precision(currency),) * 2 else: frac_prec = self.frac_prec @@ -1459,13 +1532,11 @@ def apply( get_exponential_symbol(locale, numbering_system=numbering_system), exp_sign, # type: ignore # exp_sign is always defined here self._format_int(str(exp), self.exp_prec[0], self.exp_prec[1], locale, numbering_system=numbering_system), # type: ignore # exp is always defined here - ]) + ]) # fmt: skip # Is it a significant digits pattern? elif '@' in self.pattern: - text = self._format_significant(value, - self.int_prec[0], - self.int_prec[1]) + text = self._format_significant(value, self.int_prec[0], self.int_prec[1]) a, sep, b = text.partition(".") number = self._format_int(a, 0, 1000, locale, numbering_system=numbering_system) if sep: @@ -1473,12 +1544,21 @@ def apply( # A normal number pattern. else: - number = self._quantize_value(value, locale, frac_prec, group_separator, numbering_system=numbering_system) + number = self._quantize_value( + value, + locale, + frac_prec, + group_separator, + numbering_system=numbering_system, + ) - retval = ''.join([ - self.prefix[is_negative], - number if self.number_pattern != '' else '', - self.suffix[is_negative]]) + retval = ''.join( + ( + self.prefix[is_negative], + number if self.number_pattern != '' else '', + self.suffix[is_negative], + ), + ) if '¤' in retval and currency is not None: retval = retval.replace('¤¤¤', get_currency_name(currency, value, locale)) @@ -1568,8 +1648,19 @@ def _quantize_value( a, sep, b = f"{rounded:f}".partition(".") integer_part = a if group_separator: - integer_part = self._format_int(a, self.int_prec[0], self.int_prec[1], locale, numbering_system=numbering_system) - number = integer_part + self._format_frac(b or '0', locale=locale, force_frac=frac_prec, numbering_system=numbering_system) + integer_part = self._format_int( + a, + self.int_prec[0], + self.int_prec[1], + locale, + numbering_system=numbering_system, + ) + number = integer_part + self._format_frac( + b or '0', + locale=locale, + force_frac=frac_prec, + numbering_system=numbering_system, + ) return number def _format_frac( @@ -1582,7 +1673,7 @@ def _format_frac( ) -> str: min, max = force_frac or self.frac_prec if len(value) < min: - value += ('0' * (min - len(value))) + value += '0' * (min - len(value)) if max == 0 or (min == 0 and int(value) == 0): return '' while len(value) > min and value[-1] == '0': diff --git a/babel/plural.py b/babel/plural.py index 085209e9d..90aa4952d 100644 --- a/babel/plural.py +++ b/babel/plural.py @@ -1,12 +1,13 @@ """ - babel.numbers - ~~~~~~~~~~~~~ +babel.numbers +~~~~~~~~~~~~~ - CLDR Plural support. See UTS #35. +CLDR Plural support. See UTS #35. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ + from __future__ import annotations import decimal @@ -18,7 +19,9 @@ _fallback_tag = 'other' -def extract_operands(source: float | decimal.Decimal) -> tuple[decimal.Decimal | int, int, int, int, int, int, Literal[0], Literal[0]]: +def extract_operands( + source: float | decimal.Decimal, +) -> tuple[decimal.Decimal | int, int, int, int, int, int, Literal[0], Literal[0]]: """Extract operands from a decimal, a float or an int, according to `CLDR rules`_. The result is an 8-tuple (n, i, v, w, f, t, c, e), where those symbols are as follows: @@ -124,11 +127,14 @@ def __init__(self, rules: Mapping[str, str] | Iterable[tuple[str, str]]) -> None def __repr__(self) -> str: rules = self.rules - args = ", ".join([f"{tag}: {rules[tag]}" for tag in _plural_tags if tag in rules]) + args = ", ".join(f"{tag}: {rules[tag]}" for tag in _plural_tags if tag in rules) return f"<{type(self).__name__} {args!r}>" @classmethod - def parse(cls, rules: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> PluralRule: + def parse( + cls, + rules: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule, + ) -> PluralRule: """Create a `PluralRule` instance for the given rules. If the rules are a `PluralRule` object, that object is returned. @@ -193,7 +199,9 @@ def to_javascript(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRu return ''.join(result) -def to_python(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> Callable[[float | decimal.Decimal], str]: +def to_python( + rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule, +) -> Callable[[float | decimal.Decimal], str]: """Convert a list/dict of rules or a `PluralRule` object into a regular Python function. This is useful in situations where you need a real function and don't are about the actual rule object: @@ -256,7 +264,10 @@ def to_gettext(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) return ''.join(result) -def in_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool: +def in_range_list( + num: float | decimal.Decimal, + range_list: Iterable[Iterable[float | decimal.Decimal]], +) -> bool: """Integer range list test. This is the callback for the "in" operator of the UTS #35 pluralization rule language: @@ -276,7 +287,10 @@ def in_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[fl return num == int(num) and within_range_list(num, range_list) -def within_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool: +def within_range_list( + num: float | decimal.Decimal, + range_list: Iterable[Iterable[float | decimal.Decimal]], +) -> bool: """Float range test. This is the callback for the "within" operator of the UTS #35 pluralization rule language: @@ -336,7 +350,7 @@ class RuleError(Exception): _RULES: list[tuple[str | None, re.Pattern[str]]] = [ (None, re.compile(r'\s+', re.UNICODE)), - ('word', re.compile(fr'\b(and|or|is|(?:with)?in|not|mod|[{"".join(_VARS)}])\b')), + ('word', re.compile(rf'\b(and|or|is|(?:with)?in|not|mod|[{"".join(_VARS)}])\b')), ('value', re.compile(r'\d+')), ('symbol', re.compile(r'%|,|!=|=')), ('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)), # U+2026: ELLIPSIS @@ -366,8 +380,7 @@ def test_next_token( type_: str, value: str | None = None, ) -> list[tuple[str, str]] | bool: - return tokens and tokens[-1][0] == type_ and \ - (value is None or tokens[-1][1] == value) + return tokens and tokens[-1][0] == type_ and (value is None or tokens[-1][1] == value) def skip_token(tokens: list[tuple[str, str]], type_: str, value: str | None = None): @@ -376,7 +389,7 @@ def skip_token(tokens: list[tuple[str, str]], type_: str, value: str | None = No def value_node(value: int) -> tuple[Literal['value'], tuple[int]]: - return 'value', (value, ) + return 'value', (value,) def ident_node(name: str) -> tuple[str, tuple[()]]: @@ -463,8 +476,8 @@ def and_condition(self): def relation(self): left = self.expr() if skip_token(self.tokens, 'word', 'is'): - return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \ - (left, self.value()) + op = 'isnot' if skip_token(self.tokens, 'word', 'not') else 'is' + return op, (left, self.value()) negated = skip_token(self.tokens, 'word', 'not') method = 'in' if skip_token(self.tokens, 'word', 'within'): @@ -566,7 +579,9 @@ class _PythonCompiler(_Compiler): compile_mod = _binary_compiler('MOD(%s, %s)') def compile_relation(self, method, expr, range_list): - ranges = ",".join([f"({self.compile(a)}, {self.compile(b)})" for (a, b) in range_list[1]]) + ranges = ",".join( + f"({self.compile(a)}, {self.compile(b)})" for (a, b) in range_list[1] + ) return f"{method.upper()}({self.compile(expr)}, [{ranges}])" @@ -586,7 +601,8 @@ def compile_relation(self, method, expr, range_list): if item[0] == item[1]: rv.append(f"({expr} == {self.compile(item[0])})") else: - min, max = map(self.compile, item) + min = self.compile(item[0]) + max = self.compile(item[1]) rv.append(f"({expr} >= {min} && {expr} <= {max})") return f"({' || '.join(rv)})" @@ -603,8 +619,7 @@ class _JavaScriptCompiler(_GettextCompiler): compile_t = compile_zero def compile_relation(self, method, expr, range_list): - code = _GettextCompiler.compile_relation( - self, method, expr, range_list) + code = _GettextCompiler.compile_relation(self, method, expr, range_list) if method == 'in': expr = self.compile(expr) code = f"(parseInt({expr}, 10) == {expr} && {code})" diff --git a/babel/support.py b/babel/support.py index b600bfe27..8cc2492e8 100644 --- a/babel/support.py +++ b/babel/support.py @@ -1,15 +1,16 @@ """ - babel.support - ~~~~~~~~~~~~~ +babel.support +~~~~~~~~~~~~~ - Several classes and functions that help with integrating and using Babel - in applications. +Several classes and functions that help with integrating and using Babel +in applications. - .. note: the code in this module is not used by Babel itself +.. note: the code in this module is not used by Babel itself - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ + from __future__ import annotations import gettext @@ -44,9 +45,9 @@ class Format: >>> from datetime import date >>> fmt = Format('en_US', UTC) >>> fmt.date(date(2007, 4, 1)) - u'Apr 1, 2007' + 'Apr 1, 2007' >>> fmt.decimal(1.2345) - u'1.234' + '1.234' """ def __init__( @@ -77,7 +78,7 @@ def date( >>> from datetime import date >>> fmt = Format('en_US') >>> fmt.date(date(2007, 4, 1)) - u'Apr 1, 2007' + 'Apr 1, 2007' """ return format_date(date, format, locale=self.locale) @@ -92,7 +93,7 @@ def datetime( >>> from babel.dates import get_timezone >>> fmt = Format('en_US', tzinfo=get_timezone('US/Eastern')) >>> fmt.datetime(datetime(2007, 4, 1, 15, 30)) - u'Apr 1, 2007, 11:30:00\u202fAM' + 'Apr 1, 2007, 11:30:00\\u202fAM' """ return format_datetime(datetime, format, tzinfo=self.tzinfo, locale=self.locale) @@ -107,14 +108,22 @@ def time( >>> from babel.dates import get_timezone >>> fmt = Format('en_US', tzinfo=get_timezone('US/Eastern')) >>> fmt.time(datetime(2007, 4, 1, 15, 30)) - u'11:30:00\u202fAM' + '11:30:00\\u202fAM' """ return format_time(time, format, tzinfo=self.tzinfo, locale=self.locale) def timedelta( self, delta: _datetime.timedelta | int, - granularity: Literal["year", "month", "week", "day", "hour", "minute", "second"] = "second", + granularity: Literal[ + "year", + "month", + "week", + "day", + "hour", + "minute", + "second", + ] = "second", threshold: float = 0.85, format: Literal["narrow", "short", "medium", "long"] = "long", add_direction: bool = False, @@ -124,30 +133,43 @@ def timedelta( >>> from datetime import timedelta >>> fmt = Format('en_US') >>> fmt.timedelta(timedelta(weeks=11)) - u'3 months' - """ - return format_timedelta(delta, granularity=granularity, - threshold=threshold, - format=format, add_direction=add_direction, - locale=self.locale) + '3 months' + """ + return format_timedelta( + delta, + granularity=granularity, + threshold=threshold, + format=format, + add_direction=add_direction, + locale=self.locale, + ) def number(self, number: float | Decimal | str) -> str: """Return an integer number formatted for the locale. >>> fmt = Format('en_US') >>> fmt.number(1099) - u'1,099' + '1,099' """ - return format_decimal(number, locale=self.locale, numbering_system=self.numbering_system) + return format_decimal( + number, + locale=self.locale, + numbering_system=self.numbering_system, + ) def decimal(self, number: float | Decimal | str, format: str | None = None) -> str: """Return a decimal number formatted for the locale. >>> fmt = Format('en_US') >>> fmt.decimal(1.2345) - u'1.234' + '1.234' """ - return format_decimal(number, format, locale=self.locale, numbering_system=self.numbering_system) + return format_decimal( + number, + format, + locale=self.locale, + numbering_system=self.numbering_system, + ) def compact_decimal( self, @@ -159,7 +181,7 @@ def compact_decimal( >>> fmt = Format('en_US') >>> fmt.compact_decimal(123456789) - u'123M' + '123M' >>> fmt.compact_decimal(1234567, format_type='long', fraction_digits=2) '1.23 million' """ @@ -172,9 +194,13 @@ def compact_decimal( ) def currency(self, number: float | Decimal | str, currency: str) -> str: - """Return a number in the given currency formatted for the locale. - """ - return format_currency(number, currency, locale=self.locale, numbering_system=self.numbering_system) + """Return a number in the given currency formatted for the locale.""" + return format_currency( + number, + currency, + locale=self.locale, + numbering_system=self.numbering_system, + ) def compact_currency( self, @@ -189,22 +215,36 @@ def compact_currency( >>> Format('en_US').compact_currency(1234567, "USD", format_type='short', fraction_digits=2) '$1.23M' """ - return format_compact_currency(number, currency, format_type=format_type, fraction_digits=fraction_digits, - locale=self.locale, numbering_system=self.numbering_system) + return format_compact_currency( + number, + currency, + format_type=format_type, + fraction_digits=fraction_digits, + locale=self.locale, + numbering_system=self.numbering_system, + ) def percent(self, number: float | Decimal | str, format: str | None = None) -> str: """Return a number formatted as percentage for the locale. >>> fmt = Format('en_US') >>> fmt.percent(0.34) - u'34%' + '34%' """ - return format_percent(number, format, locale=self.locale, numbering_system=self.numbering_system) + return format_percent( + number, + format, + locale=self.locale, + numbering_system=self.numbering_system, + ) def scientific(self, number: float | Decimal | str) -> str: - """Return a number formatted using scientific notation for the locale. - """ - return format_scientific(number, locale=self.locale, numbering_system=self.numbering_system) + """Return a number formatted using scientific notation for the locale.""" + return format_scientific( + number, + locale=self.locale, + numbering_system=self.numbering_system, + ) class LazyProxy: @@ -216,10 +256,10 @@ class LazyProxy: >>> lazy_greeting = LazyProxy(greeting, name='Joe') >>> print(lazy_greeting) Hello, Joe! - >>> u' ' + lazy_greeting - u' Hello, Joe!' - >>> u'(%s)' % lazy_greeting - u'(Hello, Joe!)' + >>> ' ' + lazy_greeting + ' Hello, Joe!' + >>> '(%s)' % lazy_greeting + '(Hello, Joe!)' This can be used, for example, to implement lazy translation functions that delay the actual translation until the string is actually used. The @@ -242,7 +282,15 @@ class LazyProxy: Hello, universe! Hello, world! """ - __slots__ = ['_func', '_args', '_kwargs', '_value', '_is_cache_enabled', '_attribute_error'] + + __slots__ = [ + '_func', + '_args', + '_kwargs', + '_value', + '_is_cache_enabled', + '_attribute_error', + ] if TYPE_CHECKING: _func: Callable[..., Any] @@ -252,7 +300,13 @@ class LazyProxy: _value: Any _attribute_error: AttributeError | None - def __init__(self, func: Callable[..., Any], *args: Any, enable_cache: bool = True, **kwargs: Any) -> None: + def __init__( + self, + func: Callable[..., Any], + *args: Any, + enable_cache: bool = True, + **kwargs: Any, + ) -> None: # Avoid triggering our own __setattr__ implementation object.__setattr__(self, '_func', func) object.__setattr__(self, '_args', args) @@ -362,6 +416,7 @@ def __copy__(self) -> LazyProxy: def __deepcopy__(self, memo: Any) -> LazyProxy: from copy import deepcopy + return LazyProxy( deepcopy(self._func, memo), enable_cache=deepcopy(self._is_cache_enabled, memo), @@ -371,7 +426,6 @@ def __deepcopy__(self, memo: Any) -> LazyProxy: class NullTranslations(gettext.NullTranslations): - if TYPE_CHECKING: _info: dict[str, str] _fallback: NullTranslations | None @@ -406,6 +460,7 @@ def ldgettext(self, domain: str, message: str) -> str: domain. """ import warnings + warnings.warn( 'ldgettext() is deprecated, use dgettext() instead', DeprecationWarning, @@ -418,6 +473,7 @@ def udgettext(self, domain: str, message: str) -> str: domain. """ return self._domains.get(domain, self).ugettext(message) + # backward compatibility with 0.9 dugettext = udgettext @@ -432,6 +488,7 @@ def ldngettext(self, domain: str, singular: str, plural: str, num: int) -> str: domain. """ import warnings + warnings.warn( 'ldngettext() is deprecated, use dngettext() instead', DeprecationWarning, @@ -444,6 +501,7 @@ def udngettext(self, domain: str, singular: str, plural: str, num: int) -> str: domain. """ return self._domains.get(domain, self).ungettext(singular, plural, num) + # backward compatibility with 0.9 dungettext = udngettext @@ -479,6 +537,7 @@ def lpgettext(self, context: str, message: str) -> str | bytes | object: ``bind_textdomain_codeset()``. """ import warnings + warnings.warn( 'lpgettext() is deprecated, use pgettext() instead', DeprecationWarning, @@ -517,6 +576,7 @@ def lnpgettext(self, context: str, singular: str, plural: str, num: int) -> str ``bind_textdomain_codeset()``. """ import warnings + warnings.warn( 'lnpgettext() is deprecated, use npgettext() instead', DeprecationWarning, @@ -583,6 +643,7 @@ def udpgettext(self, domain: str, context: str, message: str) -> str: `domain`. """ return self._domains.get(domain, self).upgettext(context, message) + # backward compatibility with 0.9 dupgettext = udpgettext @@ -593,29 +654,34 @@ def ldpgettext(self, domain: str, context: str, message: str) -> str | bytes | o """ return self._domains.get(domain, self).lpgettext(context, message) - def dnpgettext(self, domain: str, context: str, singular: str, plural: str, num: int) -> str: + def dnpgettext(self, domain: str, context: str, singular: str, plural: str, num: int) -> str: # fmt: skip """Like ``npgettext``, but look the message up in the specified `domain`. """ - return self._domains.get(domain, self).npgettext(context, singular, - plural, num) + return self._domains.get(domain, self).npgettext(context, singular, plural, num) - def udnpgettext(self, domain: str, context: str, singular: str, plural: str, num: int) -> str: + def udnpgettext(self, domain: str, context: str, singular: str, plural: str, num: int) -> str: # fmt: skip """Like ``unpgettext``, but look the message up in the specified `domain`. """ - return self._domains.get(domain, self).unpgettext(context, singular, - plural, num) + return self._domains.get(domain, self).unpgettext(context, singular, plural, num) + # backward compatibility with 0.9 dunpgettext = udnpgettext - def ldnpgettext(self, domain: str, context: str, singular: str, plural: str, num: int) -> str | bytes: + def ldnpgettext( + self, + domain: str, + context: str, + singular: str, + plural: str, + num: int, + ) -> str | bytes: """Equivalent to ``dnpgettext()``, but the translation is returned in the preferred system encoding, if no other encoding was explicitly set with ``bind_textdomain_codeset()``. """ - return self._domains.get(domain, self).lnpgettext(context, singular, - plural, num) + return self._domains.get(domain, self).lnpgettext(context, singular, plural, num) ugettext = gettext.NullTranslations.gettext ungettext = gettext.NullTranslations.ngettext @@ -626,7 +692,11 @@ class Translations(NullTranslations, gettext.GNUTranslations): DEFAULT_DOMAIN = 'messages' - def __init__(self, fp: gettext._TranslationsReader | None = None, domain: str | None = None): + def __init__( + self, + fp: gettext._TranslationsReader | None = None, + domain: str | None = None, + ): """Initialize the translations catalog. :param fp: the file-like object the translation should be read from diff --git a/babel/units.py b/babel/units.py index 86ac2abc9..88ebb909c 100644 --- a/babel/units.py +++ b/babel/units.py @@ -87,32 +87,32 @@ def format_unit( and number formats. >>> format_unit(12, 'length-meter', locale='ro_RO') - u'12 metri' + '12 metri' >>> format_unit(15.5, 'length-mile', locale='fi_FI') - u'15,5 mailia' + '15,5 mailia' >>> format_unit(1200, 'pressure-millimeter-ofhg', locale='nb') - u'1\\xa0200 millimeter kvikks\\xf8lv' + '1\\xa0200 millimeter kvikks\\xf8lv' >>> format_unit(270, 'ton', locale='en') - u'270 tons' + '270 tons' >>> format_unit(1234.5, 'kilogram', locale='ar_EG', numbering_system='default') - u'1٬234٫5 كيلوغرام' + '1٬234٫5 كيلوغرام' Number formats may be overridden with the ``format`` parameter. >>> import decimal >>> format_unit(decimal.Decimal("-42.774"), 'temperature-celsius', 'short', format='#.0', locale='fr') - u'-42,8\\u202f\\xb0C' + '-42,8\\u202f\\xb0C' The locale's usual pluralization rules are respected. >>> format_unit(1, 'length-meter', locale='ro_RO') - u'1 metru' + '1 metru' >>> format_unit(0, 'length-mile', locale='cy') - u'0 mi' + '0 mi' >>> format_unit(1, 'length-mile', locale='cy') - u'1 filltir' + '1 filltir' >>> format_unit(3, 'length-mile', locale='cy') - u'3 milltir' + '3 milltir' >>> format_unit(15, 'length-horse', locale='fi') Traceback (most recent call last): @@ -143,7 +143,12 @@ def format_unit( formatted_value = value plural_form = "one" else: - formatted_value = format_decimal(value, format, locale, numbering_system=numbering_system) + formatted_value = format_decimal( + value, + format, + locale, + numbering_system=numbering_system, + ) plural_form = locale.plural_form(value) if plural_form in unit_patterns: @@ -151,7 +156,11 @@ def format_unit( # Fall back to a somewhat bad representation. # nb: This is marked as no-cover, as the current CLDR seemingly has no way for this to happen. - fallback_name = get_unit_name(measurement_unit, length=length, locale=locale) # pragma: no cover + fallback_name = get_unit_name( # pragma: no cover + measurement_unit, + length=length, + locale=locale, + ) return f"{formatted_value} {fallback_name or measurement_unit}" # pragma: no cover @@ -204,7 +213,10 @@ def _find_compound_unit( # Now we can try and rebuild a compound unit specifier, then qualify it: - return _find_unit_pattern(f"{bare_numerator_unit}-per-{bare_denominator_unit}", locale=locale) + return _find_unit_pattern( + f"{bare_numerator_unit}-per-{bare_denominator_unit}", + locale=locale, + ) def format_compound_unit( @@ -310,7 +322,12 @@ def format_compound_unit( elif denominator_unit: # Denominator has unit if denominator_value == 1: # support perUnitPatterns when the denominator is 1 denominator_unit = _find_unit_pattern(denominator_unit, locale=locale) - per_pattern = locale._data["unit_patterns"].get(denominator_unit, {}).get(length, {}).get("per") + per_pattern = ( + locale._data["unit_patterns"] + .get(denominator_unit, {}) + .get(length, {}) + .get("per") + ) if per_pattern: return per_pattern.format(formatted_numerator) # See TR-35's per-unit pattern algorithm, point 3.2. @@ -335,6 +352,11 @@ def format_compound_unit( ) # TODO: this doesn't support "compound_variations" (or "prefix"), and will fall back to the "x/y" representation - per_pattern = locale._data["compound_unit_patterns"].get("per", {}).get(length, {}).get("compound", "{0}/{1}") + per_pattern = ( + locale._data["compound_unit_patterns"] + .get("per", {}) + .get(length, {}) + .get("compound", "{0}/{1}") + ) return per_pattern.format(formatted_numerator, formatted_denominator) diff --git a/babel/util.py b/babel/util.py index d113982ee..a2bf728cc 100644 --- a/babel/util.py +++ b/babel/util.py @@ -1,12 +1,13 @@ """ - babel.util - ~~~~~~~~~~ +babel.util +~~~~~~~~~~ - Various utility classes and functions. +Various utility classes and functions. - :copyright: (c) 2013-2025 by the Babel Team. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2013-2026 by the Babel Team. +:license: BSD, see LICENSE for more details. """ + from __future__ import annotations import codecs @@ -47,7 +48,9 @@ def distinct(iterable: Iterable[_T]) -> Generator[_T, None, None]: # Regexp to match python magic encoding line PYTHON_MAGIC_COMMENT_re = re.compile( - br'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', re.VERBOSE) + rb'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', + flags=re.VERBOSE, +) def parse_encoding(fp: IO[bytes]) -> str | None: @@ -67,12 +70,13 @@ def parse_encoding(fp: IO[bytes]) -> str | None: line1 = fp.readline() has_bom = line1.startswith(codecs.BOM_UTF8) if has_bom: - line1 = line1[len(codecs.BOM_UTF8):] + line1 = line1[len(codecs.BOM_UTF8) :] m = PYTHON_MAGIC_COMMENT_re.match(line1) if not m: try: import ast + ast.parse(line1.decode('latin-1')) except (ImportError, SyntaxError, UnicodeEncodeError): # Either it's a real syntax error, in which case the source is @@ -98,8 +102,7 @@ def parse_encoding(fp: IO[bytes]) -> str | None: fp.seek(pos) -PYTHON_FUTURE_IMPORT_re = re.compile( - r'from\s+__future__\s+import\s+\(*(.+)\)*') +PYTHON_FUTURE_IMPORT_re = re.compile(r'from\s+__future__\s+import\s+\(*(.+)\)*') def parse_future_flags(fp: IO[bytes], encoding: str = 'latin-1') -> int: @@ -107,6 +110,7 @@ def parse_future_flags(fp: IO[bytes], encoding: str = 'latin-1') -> int: code. """ import __future__ + pos = fp.tell() fp.seek(0) flags = 0 @@ -201,8 +205,8 @@ def pathmatch(pattern: str, filename: str) -> bool: class TextWrapper(textwrap.TextWrapper): wordsep_re = re.compile( - r'(\s+|' # any whitespace - r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))', # em-dash + r'(\s+|' # any whitespace + r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))', # em-dash ) # e.g. '\u2068foo bar.py\u2069:42' @@ -226,7 +230,12 @@ def _split(self, text): return [c for c in chunks if c] -def wraptext(text: str, width: int = 70, initial_indent: str = '', subsequent_indent: str = '') -> list[str]: +def wraptext( + text: str, + width: int = 70, + initial_indent: str = '', + subsequent_indent: str = '', +) -> list[str]: """Simple wrapper around the ``textwrap.wrap`` function in the standard library. This version does not wrap lines on hyphens in words. It also does not wrap PO file locations containing spaces. @@ -244,10 +253,12 @@ def wraptext(text: str, width: int = 70, initial_indent: str = '', subsequent_in DeprecationWarning, stacklevel=2, ) - wrapper = TextWrapper(width=width, initial_indent=initial_indent, - subsequent_indent=subsequent_indent, - break_long_words=False) - return wrapper.wrap(text) + return TextWrapper( + width=width, + initial_indent=initial_indent, + subsequent_indent=subsequent_indent, + break_long_words=False, + ).wrap(text) # TODO (Babel 3.x): Remove this re-export @@ -255,10 +266,21 @@ def wraptext(text: str, width: int = 70, initial_indent: str = '', subsequent_in class FixedOffsetTimezone(datetime.tzinfo): - """Fixed offset in minutes east from UTC.""" + """ + Fixed offset in minutes east from UTC. - def __init__(self, offset: float, name: str | None = None) -> None: + DEPRECATED: Use the standard library `datetime.timezone` instead. + """ + # TODO (Babel 3.x): Remove this class + + def __init__(self, offset: float, name: str | None = None) -> None: + warnings.warn( + "`FixedOffsetTimezone` is deprecated and will be removed in a future version of Babel. " + "Use the standard library `datetime.timezone` class.", + DeprecationWarning, + stacklevel=2, + ) self._offset = datetime.timedelta(minutes=offset) if name is None: name = 'Etc/GMT%+d' % offset diff --git a/docs/api/core.rst b/docs/api/core.rst index 6993764b8..9f297b2a1 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -36,3 +36,5 @@ Utility Functions .. autofunction:: parse_locale .. autofunction:: get_locale_identifier + +.. autofunction:: get_cldr_version diff --git a/docs/dates.rst b/docs/dates.rst index 0c2c17fc0..18b4f1f59 100644 --- a/docs/dates.rst +++ b/docs/dates.rst @@ -19,9 +19,9 @@ Babel provides functions for locale-specific formatting of those objects in its >>> d = date(2007, 4, 1) >>> format_date(d, locale='en') - u'Apr 1, 2007' + 'Apr 1, 2007' >>> format_date(d, locale='de_DE') - u'01.04.2007' + '01.04.2007' As this example demonstrates, Babel will automatically choose a date format that is appropriate for the requested locale. @@ -39,11 +39,11 @@ For example: .. code-block:: pycon >>> format_date(d, format='short', locale='en') - u'4/1/07' + '4/1/07' >>> format_date(d, format='long', locale='en') - u'April 1, 2007' + 'April 1, 2007' >>> format_date(d, format='full', locale='en') - u'Sunday, April 1, 2007' + 'Sunday, April 1, 2007' Core Time Concepts ================== @@ -105,26 +105,26 @@ For example: >>> d = date(2007, 4, 1) >>> format_date(d, "EEE, MMM d, ''yy", locale='en') - u"Sun, Apr 1, '07" + "Sun, Apr 1, '07" >>> format_date(d, "EEEE, d.M.yyyy", locale='de') - u'Sonntag, 1.4.2007' + 'Sonntag, 1.4.2007' >>> t = time(15, 30) >>> format_time(t, "hh 'o''clock' a", locale='en') - u"03 o'clock PM" + "03 o'clock PM" >>> format_time(t, 'H:mm a', locale='de') - u'15:30 nachm.' + '15:30 nachm.' >>> dt = datetime(2007, 4, 1, 15, 30) >>> format_datetime(dt, "yyyyy.MMMM.dd GGG hh:mm a", locale='en') - u'02007.April.01 AD 03:30 PM' + '02007.April.01 AD 03:30 PM' The syntax for custom datetime format patterns is described in detail in the the `Locale Data Markup Language specification`_. The following table is just a relatively brief overview. .. _`Locale Data Markup Language specification`: - https://unicode.org/reports/tr35/#Date_Format_Patterns + https://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns Date Fields ----------- @@ -245,7 +245,7 @@ difference, and displays that: >>> from babel.dates import format_timedelta >>> delta = timedelta(days=6) >>> format_timedelta(delta, locale='en_US') - u'1 week' + '1 week' The resulting strings are based from the CLDR data, and are properly pluralized depending on the plural rules of the locale and the calculated @@ -260,9 +260,9 @@ can limit the smallest unit to display: >>> delta = timedelta(days=6) >>> format_timedelta(delta, threshold=1.2, locale='en_US') - u'6 days' + '6 days' >>> format_timedelta(delta, granularity='month', locale='en_US') - u'1 month' + '1 month' .. _timezone-support: @@ -287,7 +287,7 @@ You can directly interface with either of these modules from within Babel: >>> dt = datetime(2007, 4, 1, 15, 30, tzinfo=UTC) >>> eastern = get_timezone('US/Eastern') >>> format_datetime(dt, 'H:mm Z', tzinfo=eastern, locale='en_US') - u'11:30 -0400' + '11:30 -0400' The recommended approach to deal with different time-zones in a Python application is to always use UTC internally, and only convert from/to the users @@ -300,7 +300,7 @@ information unchanged: >>> british = get_timezone('Europe/London') >>> format_datetime(dt, 'H:mm zzzz', tzinfo=british, locale='en_US') - u'16:30 British Summer Time' + '16:30 British Summer Time' Here, the given UTC time is adjusted to the "Europe/London" time-zone, and daylight savings time is taken into account. Daylight savings time is also @@ -317,7 +317,7 @@ your operating system. It's provided through the ``LOCALTZ`` constant: >>> LOCALTZ >>> get_timezone_name(LOCALTZ) - u'Central European Time' + 'Central European Time' .. _pytz: https://pythonhosted.org/pytz/ @@ -338,7 +338,7 @@ functions in the ``babel.dates`` module, most importantly the >>> tz = get_timezone('Europe/Berlin') >>> get_timezone_name(tz, locale=Locale.parse('pt_PT')) - u'Hora da Europa Central' + 'Hora da Europa Central' You can pass the function either a ``datetime.tzinfo`` object, or a ``datetime.date`` or ``datetime.datetime`` object. If you pass an actual date, @@ -354,6 +354,6 @@ display a list of time-zones to the user. >>> dt = _localize(tz, datetime(2007, 8, 15)) >>> get_timezone_name(dt, locale=Locale.parse('de_DE')) - u'Mitteleurop\xe4ische Sommerzeit' + 'Mitteleuropäische Sommerzeit' >>> get_timezone_name(tz, locale=Locale.parse('de_DE')) - u'Mitteleurop\xe4ische Zeit' + 'Mitteleuropäische Zeit' diff --git a/docs/locale.rst b/docs/locale.rst index 425fb776c..abb36fcf5 100644 --- a/docs/locale.rst +++ b/docs/locale.rst @@ -30,10 +30,10 @@ You normally access such locale data through the >>> from babel import Locale >>> locale = Locale('en', 'US') >>> locale.territories['US'] - u'United States' + 'United States' >>> locale = Locale('es', 'MX') >>> locale.territories['US'] - u'Estados Unidos' + 'Estados Unidos' In addition to country/territory names, the locale data also provides access to names of languages, scripts, variants, time zones, and more. Some of the data @@ -89,9 +89,9 @@ language supported by the CLDR: >>> l = Locale.parse('de_DE') >>> l.get_display_name('en_US') - u'German (Germany)' + 'German (Germany)' >>> l.get_display_name('fr_FR') - u'allemand (Allemagne)' + 'allemand (Allemagne)' Display names include all the information to uniquely identify a locale (language, territory, script and variant) which is often not what you @@ -100,13 +100,13 @@ want. You can also ask for the information in parts: .. code-block:: pycon >>> l.get_language_name('de_DE') - u'Deutsch' + 'Deutsch' >>> l.get_language_name('it_IT') - u'tedesco' + 'tedesco' >>> l.get_territory_name('it_IT') - u'Germania' + 'Germania' >>> l.get_territory_name('pt_PT') - u'Alemanha' + 'Alemanha' Calendar Display Names diff --git a/docs/messages.rst b/docs/messages.rst index 0f57eb117..c835d60a7 100644 --- a/docs/messages.rst +++ b/docs/messages.rst @@ -139,14 +139,6 @@ Genshi markup templates and text templates: [javascript: **.js] extract_messages = $._, jQuery._ -The configuration file syntax is based on the format commonly found in ``.INI`` -files on Windows systems, and as supported by the ``ConfigParser`` module in -the Python standard library. Section names (the strings enclosed in square -brackets) specify both the name of the extraction method, and the extended glob -pattern to specify the files that this extraction method should be used for, -separated by a colon. The options in the sections are passed to the extraction -method. Which options are available is specific to the extraction method used. - The extended glob patterns used in this configuration are similar to the glob patterns provided by most shells. A single asterisk (``*``) is a wildcard for any number of characters (except for the pathname component separator "/"), @@ -155,9 +147,132 @@ two subsequent asterisk characters (``**``) can be used to make the wildcard match any directory level, so the pattern ``**.txt`` matches any file with the extension ``.txt`` in any directory. +Babel supports two configuration file formats: INI and TOML. + +INI Configuration Format +^^^^^^^^^^^^^^^^^^^^^^^^ + +The INI configuration file syntax is based on the format commonly found in ``.INI`` +files on Windows systems, and as supported by the ``ConfigParser`` module in +the Python standard library. Section names (the strings enclosed in square +brackets) specify both the name of the extraction method, and the extended glob +pattern to specify the files that this extraction method should be used for, +separated by a colon. The options in the sections are passed to the extraction +method. Which options are available is specific to the extraction method used. + Lines that start with a ``#`` or ``;`` character are ignored and can be used for comments. Empty lines are ignored, too. +TOML Configuration Format +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Babel also supports TOML format for configuration files, when the ``tomllib`` +module is available (Python 3.11+), or when the ``tomli`` package is installed +(for Python versions prior to 3.11). + +TOML provides a more structured format and is particularly useful when combined +with ``pyproject.toml``. + +The same configuration examples shown above can be written in TOML format: + +.. code-block:: toml + + # Extraction from Python source files + [[mappings]] + method = "python" + pattern = "**.py" + + # Extraction from Genshi HTML and text templates + [[mappings]] + method = "genshi" + pattern = "**/templates/**.html" + ignore_tags = "script,style" + include_attrs = "alt title summary" + + [[mappings]] + method = "genshi" + pattern = "**/templates/**.txt" + template_class = "genshi.template:TextTemplate" + encoding = "ISO-8819-15" + + # Extraction from JavaScript files + [[mappings]] + method = "javascript" + pattern = "**.js" + extract_messages = "$._, jQuery._" + +In TOML format, each ``[[mappings]]`` section defines a mapping. The ``method`` +and ``pattern`` fields are required. The ``pattern`` field can be a string or +an array of strings to match multiple patterns with the same configuration. + +If you're using ``pyproject.toml``, nest the configuration under ``[tool.babel]``: + +.. code-block:: toml + + [tool.babel] + [[tool.babel.mappings]] + method = "python" + pattern = "**.py" + +You can reference custom extractors in both formats. In TOML: + +.. code-block:: toml + + [extractors] + custom = "mypackage.module:extract_custom" + + [[mappings]] + method = "custom" + pattern = "**.ctm" + some_option = "foo" + +Common Options +^^^^^^^^^^^^^^ + +In addition to extractor-specific options, the following options can be specified +in any mapping section and will be merged with global settings: + +``keywords`` + A list of keywords (function names) to extract messages from. + This uses the same syntax as the ``--keyword`` command-line option. + Keywords specified here are added to (not replacing) the default keywords or + those specified via command-line. + + In INI format, whitespace-separated: ``keywords = _ gettext ngettext:1,2 pgettext:1c,2`` + + In TOML format, use either a whitespace-separated string or an array: + ``keywords = "_ gettext ngettext:1,2"`` or + ``keywords = ["_", "gettext", "ngettext:1,2"]`` + +``add_comments`` + A list of comment tag prefixes to extract and include in the + output. This uses the same syntax as the ``--add-comments`` command-line option. + Comment tags specified here are added to those specified via command-line. + + In INI format, whitespace-separated: ``add_comments = TRANSLATOR: NOTE:`` + + In TOML format, use either a string or an array: + ``add_comments = "TRANSLATOR NOTE:"`` (parsed as a single string!) or + ``add_comments = ["TRANSLATOR:", "NOTE:"]`` + +**Example in INI format:** + +.. code-block:: ini + + [python: **.py] + keywords = _ _l _n:1,2 + add_comments = TRANSLATOR: + +**Example in TOML format:** + +.. code-block:: toml + + [[mappings]] + method = "python" + pattern = "**.py" + keywords = ["_", "_l", "_n:1,2"] + add_comments = ["TRANSLATOR:"] + .. note:: if you're performing message extraction using the command Babel provides for integration into ``setup.py`` scripts, you can also provide this configuration in a different way, namely as a keyword diff --git a/docs/numbers.rst b/docs/numbers.rst index cbe05cdef..5dfc1aaa9 100644 --- a/docs/numbers.rst +++ b/docs/numbers.rst @@ -20,14 +20,14 @@ Examples: # Numbers with decimal places >>> format_decimal(1.2345, locale='en_US') - u'1.234' + '1.234' >>> format_decimal(1.2345, locale='sv_SE') - u'1,234' + '1,234' # Integers with thousand grouping >>> format_decimal(12345, locale='de_DE') - u'12.345' + '12.345' >>> format_decimal(12345678, locale='de_DE') - u'12.345.678' + '12.345.678' Pattern Syntax ============== @@ -42,9 +42,9 @@ Examples: .. code-block:: pycon >>> format_decimal(-1.2345, format='#,##0.##;-#', locale='en') - u'-1.23' + '-1.23' >>> format_decimal(-1.2345, format='#,##0.##;(#)', locale='en') - u'(1.23)' + '(1.23)' The syntax for custom number format patterns is described in detail in the the specification. The following table is just a relatively brief overview. @@ -106,7 +106,7 @@ current context before formatting a number or currency: >>> with decimal.localcontext(decimal.Context(rounding=decimal.ROUND_DOWN)): >>> txt = format_decimal(123.99, format='#', locale='en_US') >>> txt - u'123' + '123' It is also possible to use ``decimal.setcontext`` or directly modifying the instance returned by ``decimal.getcontext``. However, using a context manager @@ -129,7 +129,7 @@ unexpected results on Python 2.7, with the `cdecimal`_ module installed: >>> with localcontext(Context(rounding=ROUND_DOWN)): >>> txt = format_decimal(123.99, format='#', locale='en_US') >>> txt - u'124' + '124' Changing other parameters such as the precision may also alter the results of the number formatting functions. Remember to test your code to make sure it @@ -176,7 +176,7 @@ Examples: 1099 >>> parse_number('1.099.024', locale='de') 1099024 - >>> parse_number('123' + u'\xa0' + '4567', locale='ru') + >>> parse_number('123' + '\xa0' + '4567', locale='ru') 1234567 >>> parse_number('123 4567', locale='ru') ... diff --git a/misc/icu4c-tools/.gitignore b/misc/icu4c-tools/.gitignore new file mode 100644 index 000000000..e660fd93d --- /dev/null +++ b/misc/icu4c-tools/.gitignore @@ -0,0 +1 @@ +bin/ diff --git a/misc/icu4c-tools/Makefile b/misc/icu4c-tools/Makefile new file mode 100644 index 000000000..0f1d5d133 --- /dev/null +++ b/misc/icu4c-tools/Makefile @@ -0,0 +1,3 @@ +bin/icu4c_date_format: icu4c_date_format.cpp + mkdir -p bin + $(CXX) -Wall -std=c++17 -o $@ $^ $(shell pkg-config --cflags --libs icu-uc icu-i18n) diff --git a/misc/icu4c-tools/README.md b/misc/icu4c-tools/README.md new file mode 100644 index 000000000..7cf11c046 --- /dev/null +++ b/misc/icu4c-tools/README.md @@ -0,0 +1,22 @@ +# icu4c-tools + +Some haphazard tools for cross-checking results between ICU4C and Babel. +These are not meant to be production-ready or e.g. guaranteed to not leak memory in any way. + +## icu4c_date_format + +### Compiling + +This worked on my macOS – on a Linux machine, you shouldn't need the `PKG_CONFIG_PATH` environment variable. + +``` +env PKG_CONFIG_PATH="/opt/homebrew/opt/icu4c@76/lib/pkgconfig" make bin/icu4c_date_format +``` + +### Running + +E.g. + +``` +env TEST_TIMEZONES=Pacific/Honolulu TEST_LOCALES=en_US,en,en_GB TEST_TIME_FORMAT="YYYY-MM-dd H:mm zz" bin/icu4c_date_format +``` diff --git a/misc/icu4c-tools/icu4c_date_format.cpp b/misc/icu4c-tools/icu4c_date_format.cpp new file mode 100644 index 000000000..8a6ac28b8 --- /dev/null +++ b/misc/icu4c-tools/icu4c_date_format.cpp @@ -0,0 +1,101 @@ +#include +#include +#include +#include + +static std::vector split(const std::string &s, char delimiter) { + std::vector tokens; + std::string token; + std::istringstream tokenStream(s); + while (std::getline(tokenStream, token, delimiter)) { + tokens.push_back(token); + } + return tokens; +} + +static UDate parse_time_str(const char *time_str) { + UErrorCode status = U_ZERO_ERROR; + icu::UnicodeString fauxISO8601("yyyy-MM-dd'T'hh:mm:ss'Z'"); + auto fmt = new icu::SimpleDateFormat(fauxISO8601, status); + fmt->setTimeZone(*icu::TimeZone::getGMT()); + UDate date = fmt->parse(icu::UnicodeString(time_str), status); + if (U_FAILURE(status)) { + std::cerr << "Failed to parse time string: " << time_str << std::endl; + exit(1); + } + return date; +} + +static std::vector parse_locales(const char *locales_str) { + auto locales = std::vector{}; + for (auto token : split(locales_str, ',')) { + auto loc = icu::Locale(token.c_str()); + if (loc.isBogus()) { + std::cerr << "Invalid locale: " << token << std::endl; + exit(1); + } + locales.push_back(loc); + } + return locales; +} + +static std::vector parse_timezones(const char *timezones_str) { + auto timezones = std::vector{}; + for (auto token : split(timezones_str, ',')) { + auto tz = icu::TimeZone::createTimeZone(token.c_str()); + if (tz == nullptr) { + std::cerr << "Invalid timezone: " << token << std::endl; + exit(1); + } + timezones.push_back(tz); + } + return timezones; +} + +int main() { + UErrorCode status = U_ZERO_ERROR; + const char *timezones_str = getenv("TEST_TIMEZONES"); + const char *locales_str = getenv("TEST_LOCALES"); + const char *time_str = getenv("TEST_TIME"); + const char *time_format_str = getenv("TEST_TIME_FORMAT"); + + if (!timezones_str || !locales_str) { + std::cerr << "Please set TEST_TIMEZONES, TEST_LOCALES environment variables" + << std::endl; + return 1; + } + + if (time_str == nullptr) { + time_str = "2025-03-04T13:53:00Z"; + std::cerr << "Defaulting TEST_TIME to " << time_str << std::endl; + } + + if (time_format_str == nullptr) { + time_format_str = "z:zz:zzz:zzzz"; + std::cerr << "Defaulting TEST_TIME_FORMAT to " << time_format_str + << std::endl; + } + + auto date = parse_time_str(time_str); + auto timezones = parse_timezones(timezones_str); + auto locales = parse_locales(locales_str); + + for (auto tz : timezones) { + icu::UnicodeString tzid; + tz->getID(tzid); + std::string tzid_str; + tzid.toUTF8String(tzid_str); + for (auto loc : locales) { + auto fmt = new icu::SimpleDateFormat(time_format_str, loc, status); + fmt->setTimeZone(*tz); + icu::UnicodeString name; + fmt->format(date, name); + std::string result; + name.toUTF8String(result); + std::cout << tzid_str << "\t" << loc.getName() << "\t" << result + << std::endl; + delete fmt; + } + } + return 0; +} diff --git a/pyproject.toml b/pyproject.toml index e68b6d5d1..ace935525 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,6 +3,7 @@ target-version = "py38" extend-exclude = [ "tests/messages/data", ] +line-length = 95 [tool.ruff.format] quote-style = "preserve" @@ -30,3 +31,26 @@ ignore = [ [tool.ruff.lint.per-file-ignores] "scripts/import_cldr.py" = ["E402"] + +[tool.pytest.ini_options] +norecursedirs = [ + "venv*", + ".*", + "_*", + "scripts", + "{args}" +] +doctest_optionflags = [ + "ELLIPSIS", + "NORMALIZE_WHITESPACE", + "IGNORE_EXCEPTION_DETAIL" +] +markers = [ + "all_locales: parameterize test with all locales" +] +filterwarnings = [ + # The doctest for format_number would raise this, but we don't really want to see it. + "ignore:babel.numbers.format_decimal:DeprecationWarning", + # FixedOffsetTimezone is still being tested, but we don't want to see the deprecation warning. + "ignore:.*FixedOffsetTimezone:DeprecationWarning", +] diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py index 9fb0ab580..c197fd307 100755 --- a/scripts/download_import_cldr.py +++ b/scripts/download_import_cldr.py @@ -7,17 +7,15 @@ import subprocess import sys import zipfile -from urllib.request import urlretrieve +from urllib.request import Request, urlopen -URL = 'https://unicode.org/Public/cldr/46/cldr-common-46.0.zip' -FILENAME = 'cldr-common-46.0.zip' +URL = 'https://unicode.org/Public/cldr/47/cldr-common-47.zip' +FILENAME = 'cldr-common-47.0.zip' # Via https://unicode.org/Public/cldr/45/hashes/SHASUM512.txt -FILESUM = '316d644b79a4976d4da57d59ca57c689b339908fe61bb49110bfe1a9269c94144cb27322a0ea080398e6dc4c54a16752fd1ca837e14c054b3a6806b1ef9d3ec3' -BLKSIZE = 131072 +FILESUM = '3b1eb2a046dae23cf16f611f452833e2a95affb1aa2ae3fa599753d229d152577114c2ff44ca98a7f369fa41dc6f45b0d7a6647653ca79694aacfd3f3be59801' -def reporthook(block_count, block_size, total_size): - bytes_transmitted = block_count * block_size +def reporthook(bytes_transmitted, total_size): cols = shutil.get_terminal_size().columns buffer = 6 percent = float(bytes_transmitted) / (total_size or 1) @@ -31,6 +29,23 @@ def log(message): sys.stderr.write(f'{message}\n') +def download_file(url, dest_path, reporthook=None): + request = Request(url, headers={'User-Agent': 'babel-cldr-downloader (https://babel.pocoo.org/)'}) + with urlopen(request) as response: + total_size = int(response.headers.get('Content-Length', 0)) + log(f"Downloading {url} to {dest_path}: {total_size // 1024} KiB") + block_count = 0 + with open(dest_path, 'wb') as out_file: + while True: + block = response.read(262144) + if not block: + break + out_file.write(block) + block_count += 1 + if reporthook: + reporthook(out_file.tell(), total_size) + + def is_good_file(filename): if not os.path.isfile(filename): log(f"Local copy '{filename}' not found") @@ -38,7 +53,7 @@ def is_good_file(filename): h = hashlib.sha512() with open(filename, 'rb') as f: while True: - blk = f.read(BLKSIZE) + blk = f.read(262144) if not blk: break h.update(blk) @@ -59,9 +74,8 @@ def main(): show_progress = (False if os.environ.get("BABEL_CLDR_NO_DOWNLOAD_PROGRESS") else sys.stdout.isatty()) while not is_good_file(zip_path): - log(f"Downloading '{FILENAME}' from {URL}") tmp_path = f"{zip_path}.tmp" - urlretrieve(URL, tmp_path, (reporthook if show_progress else None)) + download_file(URL, tmp_path, (reporthook if show_progress else None)) os.replace(tmp_path, zip_path) changed = True print() diff --git a/scripts/generate_authors.py b/scripts/generate_authors.py index cd18f640a..c7387d744 100644 --- a/scripts/generate_authors.py +++ b/scripts/generate_authors.py @@ -9,6 +9,7 @@ re.compile("Jun Omae"): "Jun Omae", re.compile(r"^Hugo$"): "Hugo van Kemenade", re.compile(r"^Tomas R([.])?"): "Tomas R.", + re.compile(r"^Ruff$"): "", # It's a robot } @@ -22,7 +23,7 @@ def map_alias(name): def get_sorted_authors_list(): authors = check_output(['git', 'log', '--format=%aN'], cwd=root_path).decode('UTF-8') counts = Counter(map_alias(name) for name in authors.splitlines()) - return [author for (author, count) in counts.most_common()] + return [author for (author, count) in counts.most_common() if author] def get_authors_file_content(): diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py index bcd5898e6..400150b19 100755 --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -206,6 +206,19 @@ def process_data(srcdir, destdir, force=False, dump_json=False): def parse_global(srcdir, sup): global_data = {} + + with open(os.path.join(srcdir, 'dtd', 'ldml.dtd')) as dtd_file: + cldr_version_match = re.search( + r'=3.8', diff --git a/tests/messages/consts.py b/tests/messages/consts.py index 34509b304..98c9cc05a 100644 --- a/tests/messages/consts.py +++ b/tests/messages/consts.py @@ -9,4 +9,7 @@ data_dir = os.path.join(this_dir, 'data') project_dir = os.path.join(data_dir, 'project') i18n_dir = os.path.join(project_dir, 'i18n') -pot_file = os.path.join(i18n_dir, 'temp.pot') + + +def get_po_file_path(locale): + return os.path.join(i18n_dir, locale, 'LC_MESSAGES', 'messages.po') diff --git a/tests/messages/data/mapping_with_keywords.cfg b/tests/messages/data/mapping_with_keywords.cfg new file mode 100644 index 000000000..710e68187 --- /dev/null +++ b/tests/messages/data/mapping_with_keywords.cfg @@ -0,0 +1,5 @@ +# Test mapping file with keywords option (issue #1224) + +[python: **.py] +encoding = utf-8 +keywords = _ _l _n:1,2 _nl:1,2 _p:1c,2 _pl:1c,2 _np:1c,2,3 _npl:1c,2,3 diff --git a/tests/messages/data/mapping_with_keywords_and_comments.toml b/tests/messages/data/mapping_with_keywords_and_comments.toml new file mode 100644 index 000000000..0a5135f14 --- /dev/null +++ b/tests/messages/data/mapping_with_keywords_and_comments.toml @@ -0,0 +1,8 @@ +# Test mapping file with keywords and add_comments options (issue #1224) + +[[mappings]] +method = "python" +pattern = "**.py" +encoding = "utf-8" +keywords = ["_", "_l", "_n:1,2"] +add_comments = ["SPECIAL:"] diff --git a/tests/messages/data/project/issue_1224_test.py b/tests/messages/data/project/issue_1224_test.py new file mode 100644 index 000000000..8e4f7a608 --- /dev/null +++ b/tests/messages/data/project/issue_1224_test.py @@ -0,0 +1,12 @@ +from myproject.i18n import lazy_gettext as _l, lazy_ngettext as _n + + +class Choices: + # SPECIAL: This comment should be extracted + CHOICE_X = 1, _l("Choice X") + # SPECIAL: Another special comment + CHOICE_Y = 2, _l("Choice Y") + # No comment... + OPTION_C = 3, _l("Option C") + # Test for _n too! (but no comment... shush...) + OPTION_A = 4, (_n("Option A", "Options of the A kind", 1)) diff --git a/tests/messages/frontend/__init__.py b/tests/messages/frontend/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/messages/frontend/conftest.py b/tests/messages/frontend/conftest.py new file mode 100644 index 000000000..d309255c0 --- /dev/null +++ b/tests/messages/frontend/conftest.py @@ -0,0 +1,9 @@ +import pathlib +import time + +import pytest + + +@pytest.fixture +def pot_file(tmp_path) -> pathlib.Path: + return tmp_path / f'po-{time.time()}.pot' diff --git a/tests/messages/frontend/test_cli.py b/tests/messages/frontend/test_cli.py new file mode 100644 index 000000000..200632ec3 --- /dev/null +++ b/tests/messages/frontend/test_cli.py @@ -0,0 +1,668 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +from __future__ import annotations + +import logging +import os +import shutil +import sys +import time +from datetime import datetime, timedelta +from io import StringIO + +import pytest +from freezegun import freeze_time + +from babel import __version__ as VERSION +from babel.dates import format_datetime +from babel.messages import Catalog, frontend +from babel.messages.frontend import BaseError +from babel.messages.pofile import read_po, write_po +from babel.util import LOCALTZ +from tests.messages.consts import data_dir, get_po_file_path, i18n_dir + + +@pytest.fixture +def cli(monkeypatch, capsys): + monkeypatch.chdir(data_dir) + monkeypatch.setattr(sys, 'argv', ['pybabel']) + _remove_log_handlers() + yield frontend.CommandLineInterface() + for dirname in ['lv_LV', 'ja_JP']: + locale_dir = os.path.join(i18n_dir, dirname) + if os.path.isdir(locale_dir): + shutil.rmtree(locale_dir) + _remove_log_handlers() + + +def _remove_log_handlers(): + # Logging handlers will be reused if possible (#227). This breaks the + # implicit assumption that our newly created StringIO for sys.stderr + # contains the console output. Removing the old handler ensures that a + # new handler with our new StringIO instance will be used. + log = logging.getLogger('babel') + for handler in log.handlers: + log.removeHandler(handler) + + +def test_usage(cli): + with pytest.raises(SystemExit) as ei: + cli.run(["pybabel"]) + assert ei.value.code == 2 + assert sys.stderr.getvalue().lower() == """\ +usage: pybabel command [options] [args] + +pybabel: error: no valid command or option passed. try the -h/--help option for more information. +""" + + +def test_list_locales(cli): + """ + Test the command with the --list-locales arg. + """ + result = cli.run(['pybabel', '--list-locales']) + assert not result + output = sys.stdout.getvalue() + assert 'fr_CH' in output + assert 'French (Switzerland)' in output + assert "\nb'" not in output # No bytes repr markers in output + + +def _run_init_catalog(cli): + i18n_dir = os.path.join(data_dir, 'project', 'i18n') + pot_path = os.path.join(data_dir, 'project', 'i18n', 'messages.pot') + cli.run(['pybabel', 'init', '--locale', 'en_US', '-d', i18n_dir, '-i', pot_path]) + + +def test_no_duplicated_output_for_multiple_runs(cli): + _run_init_catalog(cli) + first_output = sys.stderr.getvalue() + _run_init_catalog(cli) + second_output = sys.stderr.getvalue()[len(first_output):] + + # in case the log message is not duplicated we should get the same + # output as before + assert first_output == second_output + + +def test_frontend_can_log_to_predefined_handler(cli): + custom_stream = StringIO() + log = logging.getLogger('babel') + log.addHandler(logging.StreamHandler(custom_stream)) + + _run_init_catalog(cli) + assert id(sys.stderr) != id(custom_stream) + assert not sys.stderr.getvalue() + assert custom_stream.getvalue() + + +def test_help(cli): + with pytest.raises(SystemExit) as ei: + cli.run(['pybabel', '--help']) + assert not ei.value.code + content = sys.stdout.getvalue().lower() + assert 'options:' in content + assert all(command in content for command in ('init', 'update', 'compile', 'extract')) + + +@freeze_time("1994-11-11") +def test_extract_with_default_mapping(cli, pot_file): + cli.run([ + 'pybabel', + 'extract', + '--copyright-holder', 'FooBar, Inc.', + '--project', 'TestProject', '--version', '0.1', + '--msgid-bugs-address', 'bugs.address@email.tld', + '-c', 'TRANSLATOR', '-c', 'TRANSLATORS:', + '-o', pot_file, 'project', + ]) + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for TestProject. +# Copyright (C) {time.strftime('%Y')} FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , {time.strftime('%Y')}. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. TRANSLATOR: This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" + +#: project/ignored/this_wont_normally_be_here.py:11 +msgid "FooBar" +msgid_plural "FooBars" +msgstr[0] "" +msgstr[1] "" + +""" + assert expected_content == pot_file.read_text() + + +@freeze_time("1994-11-11") +def test_extract_with_mapping_file(cli, pot_file): + cli.run([ + 'pybabel', + 'extract', + '--copyright-holder', 'FooBar, Inc.', + '--project', 'TestProject', '--version', '0.1', + '--msgid-bugs-address', 'bugs.address@email.tld', + '--mapping', os.path.join(data_dir, 'mapping.cfg'), + '-c', 'TRANSLATOR', '-c', 'TRANSLATORS:', + '-o', pot_file, 'project', + ]) + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for TestProject. +# Copyright (C) {time.strftime('%Y')} FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , {time.strftime('%Y')}. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. TRANSLATOR: This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" + +""" + assert expected_content == pot_file.read_text() + + +@freeze_time("1994-11-11") +def test_extract_with_exact_file(cli, pot_file): + """Tests that we can call extract with a particular file and only + strings from that file get extracted. (Note the absence of strings from file1.py) + """ + file_to_extract = os.path.join(data_dir, 'project', 'file2.py') + cli.run([ + 'pybabel', + 'extract', + '--copyright-holder', 'FooBar, Inc.', + '--project', 'TestProject', '--version', '0.1', + '--msgid-bugs-address', 'bugs.address@email.tld', + '--mapping', os.path.join(data_dir, 'mapping.cfg'), + '-c', 'TRANSLATOR', '-c', 'TRANSLATORS:', + '-o', pot_file, file_to_extract, + ]) + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for TestProject. +# Copyright (C) {time.strftime('%Y')} FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , {time.strftime('%Y')}. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" + +""" + assert expected_content == pot_file.read_text() + + +@freeze_time("1994-11-11") +def test_init_with_output_dir(cli): + po_file = get_po_file_path('en_US') + cli.run([ + 'pybabel', + 'init', + '--locale', 'en_US', + '-d', os.path.join(i18n_dir), + '-i', os.path.join(i18n_dir, 'messages.pot'), + ]) + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# English (United States) translations for TestProject. +# Copyright (C) 2007 FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , 2007. +# +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: 2007-04-01 15:30+0200\n" +"PO-Revision-Date: {date}\n" +"Last-Translator: FULL NAME \n" +"Language: en_US\n" +"Language-Team: en_US \n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" + +""" + with open(po_file) as f: + actual_content = f.read() + assert expected_content == actual_content + + +@freeze_time("1994-11-11") +def test_init_singular_plural_forms(cli): + po_file = get_po_file_path('ja_JP') + cli.run([ + 'pybabel', + 'init', + '--locale', 'ja_JP', + '-d', os.path.join(i18n_dir), + '-i', os.path.join(i18n_dir, 'messages.pot'), + ]) + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Japanese (Japan) translations for TestProject. +# Copyright (C) 2007 FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , 2007. +# +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: 2007-04-01 15:30+0200\n" +"PO-Revision-Date: {date}\n" +"Last-Translator: FULL NAME \n" +"Language: ja_JP\n" +"Language-Team: ja_JP \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" + +""" + with open(po_file) as f: + actual_content = f.read() + assert expected_content == actual_content + + +@freeze_time("1994-11-11") +def test_init_more_than_2_plural_forms(cli): + po_file = get_po_file_path('lv_LV') + cli.run([ + 'pybabel', + 'init', + '--locale', 'lv_LV', + '-d', i18n_dir, + '-i', os.path.join(i18n_dir, 'messages.pot'), + ]) + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Latvian (Latvia) translations for TestProject. +# Copyright (C) 2007 FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , 2007. +# +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: 2007-04-01 15:30+0200\n" +"PO-Revision-Date: {date}\n" +"Last-Translator: FULL NAME \n" +"Language: lv_LV\n" +"Language-Team: lv_LV \n" +"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 :" +" 2);\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" +msgstr[2] "" + +""" + with open(po_file) as f: + actual_content = f.read() + assert expected_content == actual_content + + +def test_compile_catalog(cli): + po_file = get_po_file_path('de_DE') + mo_file = po_file.replace('.po', '.mo') + cli.run(['pybabel', 'compile', '--locale', 'de_DE', '-d', i18n_dir]) + assert not os.path.isfile(mo_file), f'Expected no file at {mo_file!r}' + assert sys.stderr.getvalue() == f'catalog {po_file} is marked as fuzzy, skipping\n' + + +def test_compile_fuzzy_catalog(cli): + po_file = get_po_file_path('de_DE') + mo_file = po_file.replace('.po', '.mo') + try: + cli.run(['pybabel', 'compile', '--locale', 'de_DE', '--use-fuzzy', '-d', i18n_dir]) + assert os.path.isfile(mo_file) + assert sys.stderr.getvalue() == f'compiling catalog {po_file} to {mo_file}\n' + finally: + if os.path.isfile(mo_file): + os.unlink(mo_file) + + +def test_compile_catalog_with_more_than_2_plural_forms(cli): + po_file = get_po_file_path('ru_RU') + mo_file = po_file.replace('.po', '.mo') + try: + cli.run(['pybabel', 'compile', '--locale', 'ru_RU', '--use-fuzzy', '-d', i18n_dir]) + assert os.path.isfile(mo_file) + assert sys.stderr.getvalue() == f'compiling catalog {po_file} to {mo_file}\n' + finally: + if os.path.isfile(mo_file): + os.unlink(mo_file) + + +def test_compile_catalog_multidomain(cli): + po_foo = os.path.join(i18n_dir, 'de_DE', 'LC_MESSAGES', 'foo.po') + po_bar = os.path.join(i18n_dir, 'de_DE', 'LC_MESSAGES', 'bar.po') + mo_foo = po_foo.replace('.po', '.mo') + mo_bar = po_bar.replace('.po', '.mo') + try: + cli.run([ + 'pybabel', 'compile', + '--locale', 'de_DE', + '--domain', 'foo bar', + '--use-fuzzy', + '-d', i18n_dir, + ]) + for mo_file in [mo_foo, mo_bar]: + assert os.path.isfile(mo_file) + assert sys.stderr.getvalue() == ( + f'compiling catalog {po_foo} to {mo_foo}\n' + f'compiling catalog {po_bar} to {mo_bar}\n' + ) + + finally: + for mo_file in [mo_foo, mo_bar]: + if os.path.isfile(mo_file): + os.unlink(mo_file) + + +def test_update(cli): + template = Catalog() + template.add("1") + template.add("2") + template.add("3") + tmpl_file = os.path.join(i18n_dir, 'temp-template.pot') + with open(tmpl_file, "wb") as outfp: + write_po(outfp, template) + po_file = os.path.join(i18n_dir, 'temp1.po') + cli.run(['pybabel', 'init', '-l', 'fi', '-o', po_file, '-i', tmpl_file]) + with open(po_file) as infp: + catalog = read_po(infp) + assert len(catalog) == 3 + + # Add another entry to the template + + template.add("4") + + with open(tmpl_file, "wb") as outfp: + write_po(outfp, template) + + cli.run(['pybabel', 'update', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file]) + + with open(po_file) as infp: + catalog = read_po(infp) + assert len(catalog) == 4 # Catalog was updated + + +def test_update_pot_creation_date(cli): + template = Catalog() + template.add("1") + template.add("2") + template.add("3") + tmpl_file = os.path.join(i18n_dir, 'temp-template.pot') + with open(tmpl_file, "wb") as outfp: + write_po(outfp, template) + po_file = os.path.join(i18n_dir, 'temp1.po') + cli.run(['pybabel', 'init', '-l', 'fi', '-o', po_file, '-i', tmpl_file]) + with open(po_file) as infp: + catalog = read_po(infp) + assert len(catalog) == 3 + original_catalog_creation_date = catalog.creation_date + + # Update the template creation date + template.creation_date -= timedelta(minutes=3) + with open(tmpl_file, "wb") as outfp: + write_po(outfp, template) + + cli.run(['pybabel', 'update', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file]) + + with open(po_file) as infp: + catalog = read_po(infp) + # We didn't ignore the creation date, so expect a diff + assert catalog.creation_date != original_catalog_creation_date + + # Reset the "original" + original_catalog_creation_date = catalog.creation_date + + # Update the template creation date again + # This time, pass the ignore flag and expect the times are different + template.creation_date -= timedelta(minutes=5) + with open(tmpl_file, "wb") as outfp: + write_po(outfp, template) + + cli.run(['pybabel', 'update', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file, '--ignore-pot-creation-date']) + + with open(po_file) as infp: + catalog = read_po(infp) + # We ignored creation date, so it should not have changed + assert catalog.creation_date == original_catalog_creation_date + + +def test_check(cli): + template = Catalog() + template.add("1") + template.add("2") + template.add("3") + tmpl_file = os.path.join(i18n_dir, 'temp-template.pot') + with open(tmpl_file, "wb") as outfp: + write_po(outfp, template) + po_file = os.path.join(i18n_dir, 'temp1.po') + cli.run(['pybabel', 'init', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file]) + + # Update the catalog file + cli.run(['pybabel', 'update', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file]) + + # Run a check without introducing any changes to the template + cli.run(['pybabel', 'update', '--check', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file]) + + # Add a new entry and expect the check to fail + template.add("4") + with open(tmpl_file, "wb") as outfp: + write_po(outfp, template) + + with pytest.raises(BaseError): + cli.run(['pybabel', 'update', '--check', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file]) + + # Write the latest changes to the po-file + cli.run(['pybabel', 'update', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file]) + + # Update an entry and expect the check to fail + template.add("4", locations=[("foo.py", 1)]) + with open(tmpl_file, "wb") as outfp: + write_po(outfp, template) + + with pytest.raises(BaseError): + cli.run(['pybabel', 'update', '--check', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file]) + + +def test_check_pot_creation_date(cli): + template = Catalog() + template.add("1") + template.add("2") + template.add("3") + tmpl_file = os.path.join(i18n_dir, 'temp-template.pot') + with open(tmpl_file, "wb") as outfp: + write_po(outfp, template) + po_file = os.path.join(i18n_dir, 'temp1.po') + cli.run(['pybabel', 'init', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file]) + + # Update the catalog file + cli.run(['pybabel', 'update', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file]) + + # Run a check without introducing any changes to the template + cli.run(['pybabel', 'update', '--check', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file]) + + # Run a check after changing the template creation date + template.creation_date = datetime.now() - timedelta(minutes=5) + with open(tmpl_file, "wb") as outfp: + write_po(outfp, template) + + # Should fail without --ignore-pot-creation-date flag + with pytest.raises(BaseError): + cli.run(['pybabel', 'update', '--check', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file]) + # Should pass with --ignore-pot-creation-date flag + cli.run([ + 'pybabel', 'update', + '--check', + '-l', 'fi_FI', + '-o', po_file, + '-i', tmpl_file, + '--ignore-pot-creation-date', + ]) + + +def test_update_init_missing(cli): + template = Catalog() + template.add("1") + template.add("2") + template.add("3") + tmpl_file = os.path.join(i18n_dir, 'temp2-template.pot') + with open(tmpl_file, "wb") as outfp: + write_po(outfp, template) + po_file = os.path.join(i18n_dir, 'temp2.po') + + cli.run(['pybabel', 'update', '--init-missing', '-l', 'fi', '-o', po_file, '-i', tmpl_file]) + + with open(po_file) as infp: + catalog = read_po(infp) + assert len(catalog) == 3 + + # Add another entry to the template + + template.add("4") + + with open(tmpl_file, "wb") as outfp: + write_po(outfp, template) + + cli.run(['pybabel', 'update', '--init-missing', '-l', 'fi_FI', '-o', po_file, '-i', tmpl_file]) + + with open(po_file) as infp: + catalog = read_po(infp) + assert len(catalog) == 4 # Catalog was updated + + +def test_update_init_missing_creates_dest_dir(cli, tmp_path): + template = Catalog() + template.add("xyzzy") + template.add("ferg") + tmpl_file = tmp_path / 'temp.pot' + with tmpl_file.open("wb") as outfp: + write_po(outfp, template) + + dest_dir = tmp_path / 'newdir' / 'hierarchy' + assert not dest_dir.exists() + po_file = dest_dir / 'temp.po' + + cli.run(['pybabel', 'update', '--init-missing', '-l', 'ja', '-o', po_file, '-i', tmpl_file]) + assert dest_dir.exists() + + with po_file.open() as infp: + assert len(read_po(infp)) == 2 diff --git a/tests/messages/frontend/test_compile.py b/tests/messages/frontend/test_compile.py new file mode 100644 index 000000000..3db413dac --- /dev/null +++ b/tests/messages/frontend/test_compile.py @@ -0,0 +1,43 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +from __future__ import annotations + +import pytest + +from babel.messages import frontend +from babel.messages.frontend import OptionError +from tests.messages.consts import TEST_PROJECT_DISTRIBUTION_DATA, data_dir +from tests.messages.utils import Distribution + + +@pytest.fixture +def compile_catalog_cmd(monkeypatch): + monkeypatch.chdir(data_dir) + dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) + cmd = frontend.CompileCatalog(dist) + cmd.initialize_options() + return cmd + + +def test_no_directory_or_output_file_specified(compile_catalog_cmd): + compile_catalog_cmd.locale = 'en_US' + compile_catalog_cmd.input_file = 'dummy' + with pytest.raises(OptionError): + compile_catalog_cmd.finalize_options() + + +def test_no_directory_or_input_file_specified(compile_catalog_cmd): + compile_catalog_cmd.locale = 'en_US' + compile_catalog_cmd.output_file = 'dummy' + with pytest.raises(OptionError): + compile_catalog_cmd.finalize_options() diff --git a/tests/messages/frontend/test_extract.py b/tests/messages/frontend/test_extract.py new file mode 100644 index 000000000..712200fbb --- /dev/null +++ b/tests/messages/frontend/test_extract.py @@ -0,0 +1,334 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +from __future__ import annotations + +import time +from datetime import datetime + +import pytest +from freezegun import freeze_time + +from babel import __version__ as VERSION +from babel.dates import format_datetime +from babel.messages import frontend +from babel.messages.frontend import OptionError +from babel.messages.pofile import read_po +from babel.util import LOCALTZ +from tests.messages.consts import TEST_PROJECT_DISTRIBUTION_DATA, data_dir, this_dir +from tests.messages.utils import Distribution + + +@pytest.fixture() +def extract_cmd(monkeypatch): + monkeypatch.chdir(data_dir) + dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) + extract_cmd = frontend.ExtractMessages(dist) + extract_cmd.initialize_options() + return extract_cmd + + +def test_neither_default_nor_custom_keywords(extract_cmd): + extract_cmd.output_file = 'dummy' + extract_cmd.no_default_keywords = True + with pytest.raises(OptionError): + extract_cmd.finalize_options() + + +def test_no_output_file_specified(extract_cmd): + with pytest.raises(OptionError): + extract_cmd.finalize_options() + + +def test_both_sort_output_and_sort_by_file(extract_cmd): + extract_cmd.output_file = 'dummy' + extract_cmd.sort_output = True + extract_cmd.sort_by_file = True + with pytest.raises(OptionError): + extract_cmd.finalize_options() + + +def test_invalid_file_or_dir_input_path(extract_cmd): + extract_cmd.input_paths = 'nonexistent_path' + extract_cmd.output_file = 'dummy' + with pytest.raises(OptionError): + extract_cmd.finalize_options() + + +def test_input_paths_is_treated_as_list(extract_cmd, pot_file): + extract_cmd.input_paths = data_dir + extract_cmd.output_file = pot_file + extract_cmd.finalize_options() + extract_cmd.run() + + with pot_file.open() as f: + catalog = read_po(f) + msg = catalog.get('bar') + assert len(msg.locations) == 1 + assert 'file1.py' in msg.locations[0][0] + + +def test_input_paths_handle_spaces_after_comma(extract_cmd, pot_file): + extract_cmd.input_paths = f"{this_dir}, {data_dir}" + extract_cmd.output_file = pot_file + extract_cmd.finalize_options() + assert extract_cmd.input_paths == [this_dir, data_dir] + + +def test_input_dirs_is_alias_for_input_paths(extract_cmd, pot_file): + extract_cmd.input_dirs = this_dir + extract_cmd.output_file = pot_file + extract_cmd.finalize_options() + # Gets listified in `finalize_options`: + assert extract_cmd.input_paths == [extract_cmd.input_dirs] + + +def test_input_dirs_is_mutually_exclusive_with_input_paths(extract_cmd, pot_file): + extract_cmd.input_dirs = this_dir + extract_cmd.input_paths = this_dir + extract_cmd.output_file = pot_file + with pytest.raises(OptionError): + extract_cmd.finalize_options() + + +@freeze_time("1994-11-11") +def test_extraction_with_default_mapping(extract_cmd, pot_file): + extract_cmd.copyright_holder = 'FooBar, Inc.' + extract_cmd.msgid_bugs_address = 'bugs.address@email.tld' + extract_cmd.output_file = pot_file + extract_cmd.add_comments = 'TRANSLATOR:,TRANSLATORS:' + + extract_cmd.finalize_options() + extract_cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for TestProject. +# Copyright (C) {time.strftime('%Y')} FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , {time.strftime('%Y')}. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. TRANSLATOR: This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" + +#: project/ignored/this_wont_normally_be_here.py:11 +msgid "FooBar" +msgid_plural "FooBars" +msgstr[0] "" +msgstr[1] "" + +""" + assert expected_content == pot_file.read_text() + + +@freeze_time("1994-11-11") +def test_extraction_with_mapping_file(extract_cmd, pot_file): + extract_cmd.copyright_holder = 'FooBar, Inc.' + extract_cmd.msgid_bugs_address = 'bugs.address@email.tld' + extract_cmd.mapping_file = 'mapping.cfg' + extract_cmd.output_file = pot_file + extract_cmd.add_comments = 'TRANSLATOR:,TRANSLATORS:' + + extract_cmd.finalize_options() + extract_cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for TestProject. +# Copyright (C) {time.strftime('%Y')} FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , {time.strftime('%Y')}. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. TRANSLATOR: This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" + +""" + assert expected_content == pot_file.read_text() + + +@freeze_time("1994-11-11") +@pytest.mark.parametrize("ignore_pattern", ['**/ignored/**.*', 'ignored']) +def test_extraction_with_mapping_dict(extract_cmd, pot_file, ignore_pattern): + extract_cmd.distribution.message_extractors = { + 'project': [ + (ignore_pattern, 'ignore', None), + ('**.py', 'python', None), + ], + } + extract_cmd.copyright_holder = 'FooBar, Inc.' + extract_cmd.msgid_bugs_address = 'bugs.address@email.tld' + extract_cmd.output_file = pot_file + extract_cmd.add_comments = 'TRANSLATOR:,TRANSLATORS:' + + extract_cmd.finalize_options() + extract_cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for TestProject. +# Copyright (C) {time.strftime('%Y')} FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , {time.strftime('%Y')}. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. TRANSLATOR: This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" + +""" + assert expected_content == pot_file.read_text() + + +def test_extraction_add_location_file(extract_cmd, pot_file): + extract_cmd.distribution.message_extractors = { + 'project': [ + ('**/ignored/**.*', 'ignore', None), + ('**.py', 'python', None), + ], + } + extract_cmd.output_file = pot_file + extract_cmd.add_location = 'file' + extract_cmd.omit_header = True + + extract_cmd.finalize_options() + extract_cmd.run() + + expected_content = r"""#: project/file1.py +msgid "bar" +msgstr "" + +#: project/file2.py +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" + +""" + assert expected_content == pot_file.read_text() + + +def test_extraction_with_mapping_file_with_keywords(extract_cmd, pot_file): + """ + Test that keywords specified in mapping config file are properly parsed, + and merged with default keywords. + """ + extract_cmd.mapping_file = 'mapping_with_keywords.cfg' + extract_cmd.output_file = pot_file + extract_cmd.input_paths = 'project' + + extract_cmd.finalize_options() + extract_cmd.run() + + with pot_file.open() as f: + catalog = read_po(f) + + for msgid in ('bar', 'Choice X', 'Choice Y', 'Option C', 'Option A'): + msg = catalog[msgid] + assert not msg.auto_comments # This configuration didn't specify SPECIAL:... + assert msg.pluralizable == (msgid == 'Option A') + + +def test_extraction_with_mapping_file_with_comments(extract_cmd, pot_file): + """ + Test that add_comments specified in mapping config file are properly parsed. + Uses TOML format to test that code path. + """ + extract_cmd.mapping_file = 'mapping_with_keywords_and_comments.toml' + extract_cmd.output_file = pot_file + extract_cmd.input_paths = 'project/issue_1224_test.py' + + extract_cmd.finalize_options() + extract_cmd.run() + + with pot_file.open() as f: + catalog = read_po(f) + + # Check that messages were extracted and have the expected auto_comments + for msgid, expected_comment in [ + ('Choice X', 'extracted'), + ('Choice Y', 'special'), + ('Option C', None), + ('Option A', None), + ]: + msg = catalog[msgid] + if expected_comment: + assert any('SPECIAL' in comment and expected_comment in comment for comment in msg.auto_comments) + else: + assert not msg.auto_comments + assert msg.pluralizable == (msgid == 'Option A') diff --git a/tests/messages/frontend/test_frontend.py b/tests/messages/frontend/test_frontend.py new file mode 100644 index 000000000..17a7dbbd8 --- /dev/null +++ b/tests/messages/frontend/test_frontend.py @@ -0,0 +1,409 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +from __future__ import annotations + +import re +import shlex +from functools import partial +from io import BytesIO, StringIO + +import pytest + +from babel.messages import Catalog, extract, frontend +from babel.messages.frontend import ( + CommandLineInterface, + ExtractMessages, + UpdateCatalog, +) +from babel.messages.pofile import write_po +from tests.messages.consts import project_dir +from tests.messages.utils import CUSTOM_EXTRACTOR_COOKIE + +mapping_cfg = """ +[extractors] +custom = tests.messages.utils:custom_extractor + +# Special extractor for a given Python file +[custom: special.py] +treat = delicious + +# Python source files +[python: **.py] + +# Genshi templates +[genshi: **/templates/**.html] +include_attrs = + +[genshi: **/templates/**.txt] +template_class = genshi.template:TextTemplate +encoding = latin-1 + +# Some custom extractor +[custom: **/custom/*.*] +""" + +mapping_toml = """ +[extractors] +custom = "tests.messages.utils:custom_extractor" + +# Special extractor for a given Python file +[[mappings]] +method = "custom" +pattern = "special.py" +treat = "delightful" + +# Python source files +[[mappings]] +method = "python" +pattern = "**.py" + +# Genshi templates +[[mappings]] +method = "genshi" +pattern = "**/templates/**.html" +include_attrs = "" + +[[mappings]] +method = "genshi" +pattern = "**/templates/**.txt" +template_class = "genshi.template:TextTemplate" +encoding = "latin-1" + +# Some custom extractor +[[mappings]] +method = "custom" +pattern = "**/custom/*.*" +""" + + +@pytest.mark.parametrize( + ("data", "parser", "preprocess", "is_toml"), + [ + ( + mapping_cfg, + frontend.parse_mapping_cfg, + None, + False, + ), + ( + mapping_toml, + frontend._parse_mapping_toml, + None, + True, + ), + ( + mapping_toml, + partial(frontend._parse_mapping_toml, style="pyproject.toml"), + lambda s: re.sub(r"^(\[+)", r"\1tool.babel.", s, flags=re.MULTILINE), + True, + ), + ], + ids=("cfg", "toml", "pyproject-toml"), +) +def test_parse_mapping(data: str, parser, preprocess, is_toml): + if preprocess: + data = preprocess(data) + if is_toml: + buf = BytesIO(data.encode()) + else: + buf = StringIO(data) + + method_map, options_map = parser(buf) + assert len(method_map) == 5 + + assert method_map[1] == ('**.py', 'python') + assert options_map['**.py'] == {} + assert method_map[2] == ('**/templates/**.html', 'genshi') + assert options_map['**/templates/**.html']['include_attrs'] == '' + assert method_map[3] == ('**/templates/**.txt', 'genshi') + assert (options_map['**/templates/**.txt']['template_class'] + == 'genshi.template:TextTemplate') + assert options_map['**/templates/**.txt']['encoding'] == 'latin-1' + assert method_map[4] == ('**/custom/*.*', 'tests.messages.utils:custom_extractor') + assert options_map['**/custom/*.*'] == {} + + +def test_parse_keywords(): + kw = frontend.parse_keywords(['_', 'dgettext:2', 'dngettext:2,3', 'pgettext:1c,2']) + assert kw == { + '_': None, + 'dgettext': (2,), + 'dngettext': (2, 3), + 'pgettext': ((1, 'c'), 2), + } + + +def test_parse_keywords_with_t(): + kw = frontend.parse_keywords(['_:1', '_:2,2t', '_:2c,3,3t']) + + assert kw == { + '_': { + None: (1,), + 2: (2,), + 3: ((2, 'c'), 3), + }, + } + + +def test_extract_messages_with_t(): + content = rb""" +_("1 arg, arg 1") +_("2 args, arg 1", "2 args, arg 2") +_("3 args, arg 1", "3 args, arg 2", "3 args, arg 3") +_("4 args, arg 1", "4 args, arg 2", "4 args, arg 3", "4 args, arg 4") +""" + kw = frontend.parse_keywords(['_:1', '_:2,2t', '_:2c,3,3t']) + result = list(extract.extract("python", BytesIO(content), kw)) + expected = [(2, '1 arg, arg 1', [], None), + (3, '2 args, arg 1', [], None), + (3, '2 args, arg 2', [], None), + (4, '3 args, arg 1', [], None), + (4, '3 args, arg 3', [], '3 args, arg 2'), + (5, '4 args, arg 1', [], None)] + assert result == expected + + +def configure_cli_command(cmdline: str | list[str]): + """ + Helper to configure a command class, but not run it just yet. + + :param cmdline: The command line (sans the executable name) + :return: Command instance + """ + args = shlex.split(cmdline) if isinstance(cmdline, str) else list(cmdline) + cli = CommandLineInterface() + cmdinst = cli._configure_command(cmdname=args[0], argv=args[1:]) + return cmdinst + + +@pytest.mark.parametrize("split", (False, True)) +@pytest.mark.parametrize("arg_name", ("-k", "--keyword", "--keywords")) +def test_extract_keyword_args_384(split, arg_name): + # This is a regression test for https://github.com/python-babel/babel/issues/384 + # and it also tests that the rest of the forgotten aliases/shorthands implied by + # https://github.com/python-babel/babel/issues/390 are re-remembered (or rather + # that the mechanism for remembering them again works). + + kwarg_specs = [ + "gettext_noop", + "gettext_lazy", + "ngettext_lazy:1,2", + "ugettext_noop", + "ugettext_lazy", + "ungettext_lazy:1,2", + "pgettext_lazy:1c,2", + "npgettext_lazy:1c,2,3", + ] + + if split: # Generate a command line with multiple -ks + kwarg_text = " ".join(f"{arg_name} {kwarg_spec}" for kwarg_spec in kwarg_specs) + else: # Generate a single space-separated -k + specs = ' '.join(kwarg_specs) + kwarg_text = f'{arg_name} "{specs}"' + + # (Both of those invocation styles should be equivalent, so there is no parametrization from here on out) + + cmdinst = configure_cli_command( + f"extract -F babel-django.cfg --add-comments Translators: -o django232.pot {kwarg_text} .", + ) + assert isinstance(cmdinst, ExtractMessages) + assert set(cmdinst.keywords.keys()) == {'_', 'dgettext', 'dngettext', + 'dnpgettext', 'dpgettext', + 'gettext', 'gettext_lazy', + 'gettext_noop', 'N_', 'ngettext', + 'ngettext_lazy', 'npgettext', + 'npgettext_lazy', 'pgettext', + 'pgettext_lazy', 'ugettext', + 'ugettext_lazy', 'ugettext_noop', + 'ungettext', 'ungettext_lazy'} + + +def test_update_catalog_boolean_args(): + cmdinst = configure_cli_command( + "update --init-missing --no-wrap -N --ignore-obsolete --previous -i foo -o foo -l en") + assert isinstance(cmdinst, UpdateCatalog) + assert cmdinst.init_missing is True + assert cmdinst.no_wrap is True + assert cmdinst.no_fuzzy_matching is True + assert cmdinst.ignore_obsolete is True + assert cmdinst.previous is False # Mutually exclusive with no_fuzzy_matching + + +def test_compile_catalog_dir(tmp_path): + """ + Test that `compile` can compile all locales in a directory. + """ + locales = ("fi_FI", "sv_SE") + for locale in locales: + l_dir = tmp_path / locale / "LC_MESSAGES" + l_dir.mkdir(parents=True) + po_file = l_dir / 'messages.po' + po_file.write_text('msgid "foo"\nmsgstr "bar"\n') + cmdinst = configure_cli_command([ # fmt: skip + 'compile', + '--statistics', + '--use-fuzzy', + '-d', str(tmp_path), + ]) + assert not cmdinst.run() + for locale in locales: + assert (tmp_path / locale / "LC_MESSAGES" / "messages.mo").exists() + + +def test_compile_catalog_explicit(tmp_path): + """ + Test that `compile` can explicitly compile a single catalog. + """ + po_file = tmp_path / 'temp.po' + po_file.write_text('msgid "foo"\nmsgstr "bar"\n') + mo_file = tmp_path / 'temp.mo' + cmdinst = configure_cli_command([ # fmt: skip + 'compile', + '--statistics', + '--use-fuzzy', + '-i', str(po_file), + '-o', str(mo_file), + '-l', 'fi_FI', + ]) + assert not cmdinst.run() + assert mo_file.exists() + + +@pytest.mark.parametrize("explicit_locale", (None, 'fi_FI'), ids=("implicit", "explicit")) +def test_update_dir(tmp_path, explicit_locale: bool): + """ + Test that `update` can deal with directories too. + """ + template = Catalog() + template.add("1") + template.add("2") + template.add("3") + tmpl_file = tmp_path / 'temp-template.pot' + with tmpl_file.open("wb") as outfp: + write_po(outfp, template) + locales = ("fi_FI", "sv_SE") + for locale in locales: + l_dir = tmp_path / locale / "LC_MESSAGES" + l_dir.mkdir(parents=True) + po_file = l_dir / 'messages.po' + po_file.touch() + cmdinst = configure_cli_command([ # fmt: skip + 'update', + '-i', str(tmpl_file), + '-d', str(tmp_path), + *(['-l', explicit_locale] if explicit_locale else []), + ]) + assert not cmdinst.run() + for locale in locales: + if explicit_locale and locale != explicit_locale: + continue + assert (tmp_path / locale / "LC_MESSAGES" / "messages.po").stat().st_size > 0 + + +def test_extract_cli_knows_dash_s(): + # This is a regression test for https://github.com/python-babel/babel/issues/390 + cmdinst = configure_cli_command("extract -s -o foo babel") + assert isinstance(cmdinst, ExtractMessages) + assert cmdinst.strip_comments + + +def test_extract_cli_knows_dash_dash_last_dash_translator(): + cmdinst = configure_cli_command('extract --last-translator "FULL NAME EMAIL@ADDRESS" -o foo babel') + assert isinstance(cmdinst, ExtractMessages) + assert cmdinst.last_translator == "FULL NAME EMAIL@ADDRESS" + + +def test_extract_add_location(): + cmdinst = configure_cli_command("extract -o foo babel --add-location full") + assert isinstance(cmdinst, ExtractMessages) + assert cmdinst.add_location == 'full' + assert not cmdinst.no_location + assert cmdinst.include_lineno + + cmdinst = configure_cli_command("extract -o foo babel --add-location file") + assert isinstance(cmdinst, ExtractMessages) + assert cmdinst.add_location == 'file' + assert not cmdinst.no_location + assert not cmdinst.include_lineno + + cmdinst = configure_cli_command("extract -o foo babel --add-location never") + assert isinstance(cmdinst, ExtractMessages) + assert cmdinst.add_location == 'never' + assert cmdinst.no_location + + +def test_extract_error_code(monkeypatch, capsys): + monkeypatch.chdir(project_dir) + cmdinst = configure_cli_command("compile --domain=messages --directory i18n --locale fi_BUGGY") + assert cmdinst.run() == 1 + out, err = capsys.readouterr() + if err: + assert "unknown named placeholder 'merkki'" in err + + +@pytest.mark.parametrize("with_underscore_ignore", (False, True)) +def test_extract_ignore_dirs(monkeypatch, capsys, tmp_path, with_underscore_ignore): + pot_file = tmp_path / 'temp.pot' + monkeypatch.chdir(project_dir) + cmd = f"extract . -o '{pot_file}' --ignore-dirs '*ignored* .*' " + if with_underscore_ignore: + # This also tests that multiple arguments are supported. + cmd += "--ignore-dirs '_*'" + cmdinst = configure_cli_command(cmd) + assert isinstance(cmdinst, ExtractMessages) + assert cmdinst.directory_filter + cmdinst.run() + pot_content = pot_file.read_text() + + # The `ignored` directory is now actually ignored: + assert 'this_wont_normally_be_here' not in pot_content + + # Since we manually set a filter, the otherwise `_hidden` directory is walked into, + # unless we opt in to ignore it again + assert ('ssshhh....' in pot_content) != with_underscore_ignore + assert ('_hidden_by_default' in pot_content) != with_underscore_ignore + + +def test_extract_header_comment(monkeypatch, tmp_path): + pot_file = tmp_path / 'temp.pot' + monkeypatch.chdir(project_dir) + cmdinst = configure_cli_command(f"extract . -o '{pot_file}' --header-comment 'Boing' ") + cmdinst.run() + pot_content = pot_file.read_text() + assert 'Boing' in pot_content + + +@pytest.mark.parametrize("mapping_format", ("toml", "cfg")) +def test_pr_1121(tmp_path, monkeypatch, caplog, mapping_format): + """ + Test that extraction uses the first matching method and options, + instead of the first matching method and last matching options. + + Without the fix in PR #1121, this test would fail, + since the `custom_extractor` isn't passed a delicious treat via + the configuration. + """ + if mapping_format == "cfg": + mapping_file = (tmp_path / "mapping.cfg") + mapping_file.write_text(mapping_cfg) + else: + mapping_file = (tmp_path / "mapping.toml") + mapping_file.write_text(mapping_toml) + (tmp_path / "special.py").write_text("# this file is special") + pot_path = (tmp_path / "output.pot") + monkeypatch.chdir(tmp_path) + cmdinst = configure_cli_command(f"extract . -o {shlex.quote(str(pot_path))} --mapping {shlex.quote(mapping_file.name)}") + assert isinstance(cmdinst, ExtractMessages) + cmdinst.run() + # If the custom extractor didn't run, we wouldn't see the cookie in there. + assert CUSTOM_EXTRACTOR_COOKIE in pot_path.read_text() diff --git a/tests/messages/frontend/test_init.py b/tests/messages/frontend/test_init.py new file mode 100644 index 000000000..e69e5cce3 --- /dev/null +++ b/tests/messages/frontend/test_init.py @@ -0,0 +1,371 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +from __future__ import annotations + +import os +import shutil +from datetime import datetime + +import pytest +from freezegun import freeze_time + +from babel import __version__ as VERSION +from babel.dates import format_datetime +from babel.messages import frontend +from babel.util import LOCALTZ +from tests.messages.consts import ( + TEST_PROJECT_DISTRIBUTION_DATA, + data_dir, + get_po_file_path, + i18n_dir, +) +from tests.messages.utils import Distribution + + +@pytest.fixture +def init_cmd(monkeypatch): + monkeypatch.chdir(data_dir) + dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) + init_cmd = frontend.InitCatalog(dist) + init_cmd.initialize_options() + yield init_cmd + for dirname in ['en_US', 'ja_JP', 'lv_LV']: + locale_dir = os.path.join(i18n_dir, dirname) + if os.path.isdir(locale_dir): + shutil.rmtree(locale_dir) + + +def test_no_input_file(init_cmd): + init_cmd.locale = 'en_US' + init_cmd.output_file = 'dummy' + with pytest.raises(frontend.OptionError): + init_cmd.finalize_options() + + +def test_no_locale(init_cmd): + init_cmd.input_file = 'dummy' + init_cmd.output_file = 'dummy' + with pytest.raises(frontend.OptionError): + init_cmd.finalize_options() + + +@freeze_time("1994-11-11") +def test_with_output_dir(init_cmd): + init_cmd.input_file = 'project/i18n/messages.pot' + init_cmd.locale = 'en_US' + init_cmd.output_dir = 'project/i18n' + + init_cmd.finalize_options() + init_cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# English (United States) translations for TestProject. +# Copyright (C) 2007 FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , 2007. +# +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: 2007-04-01 15:30+0200\n" +"PO-Revision-Date: {date}\n" +"Last-Translator: FULL NAME \n" +"Language: en_US\n" +"Language-Team: en_US \n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" + +""" + with open(get_po_file_path('en_US')) as f: + actual_content = f.read() + assert expected_content == actual_content + + +@freeze_time("1994-11-11") +def test_keeps_catalog_non_fuzzy(init_cmd): + init_cmd.input_file = 'project/i18n/messages_non_fuzzy.pot' + init_cmd.locale = 'en_US' + init_cmd.output_dir = 'project/i18n' + + init_cmd.finalize_options() + init_cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# English (United States) translations for TestProject. +# Copyright (C) 2007 FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , 2007. +# +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: 2007-04-01 15:30+0200\n" +"PO-Revision-Date: {date}\n" +"Last-Translator: FULL NAME \n" +"Language: en_US\n" +"Language-Team: en_US \n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" + +""" + with open(get_po_file_path('en_US')) as f: + actual_content = f.read() + assert expected_content == actual_content + + +@freeze_time("1994-11-11") +def test_correct_init_more_than_2_plurals(init_cmd): + init_cmd.input_file = 'project/i18n/messages.pot' + init_cmd.locale = 'lv_LV' + init_cmd.output_dir = 'project/i18n' + + init_cmd.finalize_options() + init_cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Latvian (Latvia) translations for TestProject. +# Copyright (C) 2007 FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , 2007. +# +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: 2007-04-01 15:30+0200\n" +"PO-Revision-Date: {date}\n" +"Last-Translator: FULL NAME \n" +"Language: lv_LV\n" +"Language-Team: lv_LV \n" +"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 :" +" 2);\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" +msgstr[2] "" + +""" + with open(get_po_file_path('lv_LV')) as f: + actual_content = f.read() + assert expected_content == actual_content + + +@freeze_time("1994-11-11") +def test_correct_init_singular_plural_forms(init_cmd): + init_cmd.input_file = 'project/i18n/messages.pot' + init_cmd.locale = 'ja_JP' + init_cmd.output_dir = 'project/i18n' + + init_cmd.finalize_options() + init_cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='ja_JP') + expected_content = fr"""# Japanese (Japan) translations for TestProject. +# Copyright (C) 2007 FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , 2007. +# +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: 2007-04-01 15:30+0200\n" +"PO-Revision-Date: {date}\n" +"Last-Translator: FULL NAME \n" +"Language: ja_JP\n" +"Language-Team: ja_JP \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" + +""" + with open(get_po_file_path('ja_JP')) as f: + actual_content = f.read() + assert expected_content == actual_content + + +@freeze_time("1994-11-11") +def test_supports_no_wrap(init_cmd): + init_cmd.input_file = 'project/i18n/long_messages.pot' + init_cmd.locale = 'en_US' + init_cmd.output_dir = 'project/i18n' + + long_message = '"' + 'xxxxx ' * 15 + '"' + + with open('project/i18n/messages.pot', 'rb') as f: + pot_contents = f.read().decode('latin-1') + pot_with_very_long_line = pot_contents.replace('"bar"', long_message) + with open(init_cmd.input_file, 'wb') as f: + f.write(pot_with_very_long_line.encode('latin-1')) + init_cmd.no_wrap = True + + init_cmd.finalize_options() + init_cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en_US') + expected_content = fr"""# English (United States) translations for TestProject. +# Copyright (C) 2007 FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , 2007. +# +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: 2007-04-01 15:30+0200\n" +"PO-Revision-Date: {date}\n" +"Last-Translator: FULL NAME \n" +"Language: en_US\n" +"Language-Team: en_US \n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid {long_message} +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" + +""" + with open(get_po_file_path('en_US')) as f: + actual_content = f.read() + assert expected_content == actual_content + + +@freeze_time("1994-11-11") +def test_supports_width(init_cmd): + init_cmd.input_file = 'project/i18n/long_messages.pot' + init_cmd.locale = 'en_US' + init_cmd.output_dir = 'project/i18n' + + long_message = '"' + 'xxxxx ' * 15 + '"' + + with open('project/i18n/messages.pot', 'rb') as f: + pot_contents = f.read().decode('latin-1') + pot_with_very_long_line = pot_contents.replace('"bar"', long_message) + with open(init_cmd.input_file, 'wb') as f: + f.write(pot_with_very_long_line.encode('latin-1')) + init_cmd.width = 120 + init_cmd.finalize_options() + init_cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en_US') + expected_content = fr"""# English (United States) translations for TestProject. +# Copyright (C) 2007 FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR , 2007. +# +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: 2007-04-01 15:30+0200\n" +"PO-Revision-Date: {date}\n" +"Last-Translator: FULL NAME \n" +"Language: en_US\n" +"Language-Team: en_US \n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#. This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid {long_message} +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" + +""" + with open(get_po_file_path('en_US')) as f: + actual_content = f.read() + assert expected_content == actual_content diff --git a/tests/messages/test_catalog.py b/tests/messages/test_catalog.py index 692931ea2..191a2a498 100644 --- a/tests/messages/test_catalog.py +++ b/tests/messages/test_catalog.py @@ -12,7 +12,7 @@ import copy import datetime -import unittest +import pickle from io import StringIO from babel.dates import UTC, format_datetime @@ -20,215 +20,228 @@ from babel.util import FixedOffsetTimezone -class MessageTestCase(unittest.TestCase): - - def test_python_format(self): - assert catalog.PYTHON_FORMAT.search('foo %d bar') - assert catalog.PYTHON_FORMAT.search('foo %s bar') - assert catalog.PYTHON_FORMAT.search('foo %r bar') - assert catalog.PYTHON_FORMAT.search('foo %(name).1f') - assert catalog.PYTHON_FORMAT.search('foo %(name)3.3f') - assert catalog.PYTHON_FORMAT.search('foo %(name)3f') - assert catalog.PYTHON_FORMAT.search('foo %(name)06d') - assert catalog.PYTHON_FORMAT.search('foo %(name)Li') - assert catalog.PYTHON_FORMAT.search('foo %(name)#d') - assert catalog.PYTHON_FORMAT.search('foo %(name)-4.4hs') - assert catalog.PYTHON_FORMAT.search('foo %(name)*.3f') - assert catalog.PYTHON_FORMAT.search('foo %(name).*f') - assert catalog.PYTHON_FORMAT.search('foo %(name)3.*f') - assert catalog.PYTHON_FORMAT.search('foo %(name)*.*f') - assert catalog.PYTHON_FORMAT.search('foo %()s') - - def test_python_brace_format(self): - assert not catalog._has_python_brace_format('') - assert not catalog._has_python_brace_format('foo') - assert not catalog._has_python_brace_format('{') - assert not catalog._has_python_brace_format('}') - assert not catalog._has_python_brace_format('{} {') - assert not catalog._has_python_brace_format('{{}}') - assert catalog._has_python_brace_format('{}') - assert catalog._has_python_brace_format('foo {name}') - assert catalog._has_python_brace_format('foo {name!s}') - assert catalog._has_python_brace_format('foo {name!r}') - assert catalog._has_python_brace_format('foo {name!a}') - assert catalog._has_python_brace_format('foo {name!r:10}') - assert catalog._has_python_brace_format('foo {name!r:10.2}') - assert catalog._has_python_brace_format('foo {name!r:10.2f}') - assert catalog._has_python_brace_format('foo {name!r:10.2f} {name!r:10.2f}') - assert catalog._has_python_brace_format('foo {name!r:10.2f=}') - - def test_translator_comments(self): - mess = catalog.Message('foo', user_comments=['Comment About `foo`']) - assert mess.user_comments == ['Comment About `foo`'] - mess = catalog.Message('foo', - auto_comments=['Comment 1 About `foo`', - 'Comment 2 About `foo`']) - assert mess.auto_comments == ['Comment 1 About `foo`', 'Comment 2 About `foo`'] - - def test_clone_message_object(self): - msg = catalog.Message('foo', locations=[('foo.py', 42)]) - clone = msg.clone() - clone.locations.append(('bar.py', 42)) - assert msg.locations == [('foo.py', 42)] - msg.flags.add('fuzzy') - assert not clone.fuzzy and msg.fuzzy - - -class CatalogTestCase(unittest.TestCase): - - def test_add_returns_message_instance(self): - cat = catalog.Catalog() - message = cat.add('foo') - assert message.id == 'foo' - - def test_two_messages_with_same_singular(self): - cat = catalog.Catalog() - cat.add('foo') - cat.add(('foo', 'foos')) - assert len(cat) == 1 - - def test_duplicate_auto_comment(self): - cat = catalog.Catalog() - cat.add('foo', auto_comments=['A comment']) - cat.add('foo', auto_comments=['A comment', 'Another comment']) - assert cat['foo'].auto_comments == ['A comment', 'Another comment'] - - def test_duplicate_user_comment(self): - cat = catalog.Catalog() - cat.add('foo', user_comments=['A comment']) - cat.add('foo', user_comments=['A comment', 'Another comment']) - assert cat['foo'].user_comments == ['A comment', 'Another comment'] - - def test_duplicate_location(self): - cat = catalog.Catalog() - cat.add('foo', locations=[('foo.py', 1)]) - cat.add('foo', locations=[('foo.py', 1)]) - assert cat['foo'].locations == [('foo.py', 1)] - - def test_update_message_changed_to_plural(self): - cat = catalog.Catalog() - cat.add('foo', 'Voh') - tmpl = catalog.Catalog() - tmpl.add(('foo', 'foos')) - cat.update(tmpl) - assert cat['foo'].string == ('Voh', '') - assert cat['foo'].fuzzy - - def test_update_message_changed_to_simple(self): - cat = catalog.Catalog() - cat.add('foo' 'foos', ('Voh', 'Vöhs')) - tmpl = catalog.Catalog() - tmpl.add('foo') - cat.update(tmpl) - assert cat['foo'].string == 'Voh' - assert cat['foo'].fuzzy - - def test_update_message_updates_comments(self): - cat = catalog.Catalog() - cat['foo'] = catalog.Message('foo', locations=[('main.py', 5)]) - assert cat['foo'].auto_comments == [] - assert cat['foo'].user_comments == [] - # Update cat[u'foo'] with a new location and a comment - cat['foo'] = catalog.Message('foo', locations=[('main.py', 7)], - user_comments=['Foo Bar comment 1']) - assert cat['foo'].user_comments == ['Foo Bar comment 1'] - # now add yet another location with another comment - cat['foo'] = catalog.Message('foo', locations=[('main.py', 9)], - auto_comments=['Foo Bar comment 2']) - assert cat['foo'].auto_comments == ['Foo Bar comment 2'] - - def test_update_fuzzy_matching_with_case_change(self): - cat = catalog.Catalog() - cat.add('FOO', 'Voh') - cat.add('bar', 'Bahr') - tmpl = catalog.Catalog() - tmpl.add('foo') - cat.update(tmpl) - assert len(cat.obsolete) == 1 - assert 'FOO' not in cat - - assert cat['foo'].string == 'Voh' - assert cat['foo'].fuzzy is True - - def test_update_fuzzy_matching_with_char_change(self): - cat = catalog.Catalog() - cat.add('fo', 'Voh') - cat.add('bar', 'Bahr') - tmpl = catalog.Catalog() - tmpl.add('foo') - cat.update(tmpl) - assert len(cat.obsolete) == 1 - assert 'fo' not in cat - - assert cat['foo'].string == 'Voh' - assert cat['foo'].fuzzy is True - - def test_update_fuzzy_matching_no_msgstr(self): - cat = catalog.Catalog() - cat.add('fo', '') - tmpl = catalog.Catalog() - tmpl.add('fo') - tmpl.add('foo') - cat.update(tmpl) - assert 'fo' in cat - assert 'foo' in cat - - assert cat['fo'].string == '' - assert cat['fo'].fuzzy is False - assert cat['foo'].string is None - assert cat['foo'].fuzzy is False - - def test_update_fuzzy_matching_with_new_context(self): - cat = catalog.Catalog() - cat.add('foo', 'Voh') - cat.add('bar', 'Bahr') - tmpl = catalog.Catalog() - tmpl.add('Foo', context='Menu') - cat.update(tmpl) - assert len(cat.obsolete) == 1 - assert 'foo' not in cat - - message = cat.get('Foo', 'Menu') - assert message.string == 'Voh' - assert message.fuzzy is True - assert message.context == 'Menu' - - def test_update_fuzzy_matching_with_changed_context(self): - cat = catalog.Catalog() - cat.add('foo', 'Voh', context='Menu|File') - cat.add('bar', 'Bahr', context='Menu|File') - tmpl = catalog.Catalog() - tmpl.add('Foo', context='Menu|Edit') - cat.update(tmpl) - assert len(cat.obsolete) == 1 - assert cat.get('Foo', 'Menu|File') is None - - message = cat.get('Foo', 'Menu|Edit') - assert message.string == 'Voh' - assert message.fuzzy is True - assert message.context == 'Menu|Edit' - - def test_update_fuzzy_matching_no_cascading(self): - cat = catalog.Catalog() - cat.add('fo', 'Voh') - cat.add('foo', 'Vohe') - tmpl = catalog.Catalog() - tmpl.add('fo') - tmpl.add('foo') - tmpl.add('fooo') - cat.update(tmpl) - assert 'fo' in cat - assert 'foo' in cat - - assert cat['fo'].string == 'Voh' - assert cat['fo'].fuzzy is False - assert cat['foo'].string == 'Vohe' - assert cat['foo'].fuzzy is False - assert cat['fooo'].string == 'Vohe' - assert cat['fooo'].fuzzy is True - - def test_update_fuzzy_matching_long_string(self): - lipsum = "\ +def test_message_python_format(): + assert catalog.PYTHON_FORMAT.search('foo %d bar') + assert catalog.PYTHON_FORMAT.search('foo %s bar') + assert catalog.PYTHON_FORMAT.search('foo %r bar') + assert catalog.PYTHON_FORMAT.search('foo %(name).1f') + assert catalog.PYTHON_FORMAT.search('foo %(name)3.3f') + assert catalog.PYTHON_FORMAT.search('foo %(name)3f') + assert catalog.PYTHON_FORMAT.search('foo %(name)06d') + assert catalog.PYTHON_FORMAT.search('foo %(name)Li') + assert catalog.PYTHON_FORMAT.search('foo %(name)#d') + assert catalog.PYTHON_FORMAT.search('foo %(name)-4.4hs') + assert catalog.PYTHON_FORMAT.search('foo %(name)*.3f') + assert catalog.PYTHON_FORMAT.search('foo %(name).*f') + assert catalog.PYTHON_FORMAT.search('foo %(name)3.*f') + assert catalog.PYTHON_FORMAT.search('foo %(name)*.*f') + assert catalog.PYTHON_FORMAT.search('foo %()s') + + +def test_message_python_brace_format(): + assert not catalog._has_python_brace_format('') + assert not catalog._has_python_brace_format('foo') + assert not catalog._has_python_brace_format('{') + assert not catalog._has_python_brace_format('}') + assert not catalog._has_python_brace_format('{} {') + assert not catalog._has_python_brace_format('{{}}') + assert catalog._has_python_brace_format('{}') + assert catalog._has_python_brace_format('foo {name}') + assert catalog._has_python_brace_format('foo {name!s}') + assert catalog._has_python_brace_format('foo {name!r}') + assert catalog._has_python_brace_format('foo {name!a}') + assert catalog._has_python_brace_format('foo {name!r:10}') + assert catalog._has_python_brace_format('foo {name!r:10.2}') + assert catalog._has_python_brace_format('foo {name!r:10.2f}') + assert catalog._has_python_brace_format('foo {name!r:10.2f} {name!r:10.2f}') + assert catalog._has_python_brace_format('foo {name!r:10.2f=}') + + +def test_message_translator_comments(): + mess = catalog.Message('foo', user_comments=['Comment About `foo`']) + assert mess.user_comments == ['Comment About `foo`'] + mess = catalog.Message('foo', + auto_comments=['Comment 1 About `foo`', + 'Comment 2 About `foo`']) + assert mess.auto_comments == ['Comment 1 About `foo`', 'Comment 2 About `foo`'] + + +def test_message_clone_message_object(): + msg = catalog.Message('foo', locations=[('foo.py', 42)]) + clone = msg.clone() + clone.locations.append(('bar.py', 42)) + assert msg.locations == [('foo.py', 42)] + msg.flags.add('fuzzy') + assert not clone.fuzzy and msg.fuzzy + + +def test_catalog_add_returns_message_instance(): + cat = catalog.Catalog() + message = cat.add('foo') + assert message.id == 'foo' + + +def test_catalog_two_messages_with_same_singular(): + cat = catalog.Catalog() + cat.add('foo') + cat.add(('foo', 'foos')) + assert len(cat) == 1 + + +def test_catalog_duplicate_auto_comment(): + cat = catalog.Catalog() + cat.add('foo', auto_comments=['A comment']) + cat.add('foo', auto_comments=['A comment', 'Another comment']) + assert cat['foo'].auto_comments == ['A comment', 'Another comment'] + + +def test_catalog_duplicate_user_comment(): + cat = catalog.Catalog() + cat.add('foo', user_comments=['A comment']) + cat.add('foo', user_comments=['A comment', 'Another comment']) + assert cat['foo'].user_comments == ['A comment', 'Another comment'] + + +def test_catalog_duplicate_location(): + cat = catalog.Catalog() + cat.add('foo', locations=[('foo.py', 1)]) + cat.add('foo', locations=[('foo.py', 1)]) + assert cat['foo'].locations == [('foo.py', 1)] + + +def test_catalog_update_message_changed_to_plural(): + cat = catalog.Catalog() + cat.add('foo', 'Voh') + tmpl = catalog.Catalog() + tmpl.add(('foo', 'foos')) + cat.update(tmpl) + assert cat['foo'].string == ('Voh', '') + assert cat['foo'].fuzzy + + +def test_catalog_update_message_changed_to_simple(): + cat = catalog.Catalog() + cat.add('foo' 'foos', ('Voh', 'Vöhs')) + tmpl = catalog.Catalog() + tmpl.add('foo') + cat.update(tmpl) + assert cat['foo'].string == 'Voh' + assert cat['foo'].fuzzy + + +def test_catalog_update_message_updates_comments(): + cat = catalog.Catalog() + cat['foo'] = catalog.Message('foo', locations=[('main.py', 5)]) + assert cat['foo'].auto_comments == [] + assert cat['foo'].user_comments == [] + # Update cat['foo'] with a new location and a comment + cat['foo'] = catalog.Message('foo', locations=[('main.py', 7)], + user_comments=['Foo Bar comment 1']) + assert cat['foo'].user_comments == ['Foo Bar comment 1'] + # now add yet another location with another comment + cat['foo'] = catalog.Message('foo', locations=[('main.py', 9)], + auto_comments=['Foo Bar comment 2']) + assert cat['foo'].auto_comments == ['Foo Bar comment 2'] + + +def test_catalog_update_fuzzy_matching_with_case_change(): + cat = catalog.Catalog() + cat.add('FOO', 'Voh') + cat.add('bar', 'Bahr') + tmpl = catalog.Catalog() + tmpl.add('foo') + cat.update(tmpl) + assert len(cat.obsolete) == 1 + assert 'FOO' not in cat + + assert cat['foo'].string == 'Voh' + assert cat['foo'].fuzzy is True + + +def test_catalog_update_fuzzy_matching_with_char_change(): + cat = catalog.Catalog() + cat.add('fo', 'Voh') + cat.add('bar', 'Bahr') + tmpl = catalog.Catalog() + tmpl.add('foo') + cat.update(tmpl) + assert len(cat.obsolete) == 1 + assert 'fo' not in cat + + assert cat['foo'].string == 'Voh' + assert cat['foo'].fuzzy is True + + +def test_catalog_update_fuzzy_matching_no_msgstr(): + cat = catalog.Catalog() + cat.add('fo', '') + tmpl = catalog.Catalog() + tmpl.add('fo') + tmpl.add('foo') + cat.update(tmpl) + assert 'fo' in cat + assert 'foo' in cat + + assert cat['fo'].string == '' + assert cat['fo'].fuzzy is False + assert cat['foo'].string is None + assert cat['foo'].fuzzy is False + + +def test_catalog_update_fuzzy_matching_with_new_context(): + cat = catalog.Catalog() + cat.add('foo', 'Voh') + cat.add('bar', 'Bahr') + tmpl = catalog.Catalog() + tmpl.add('Foo', context='Menu') + cat.update(tmpl) + assert len(cat.obsolete) == 1 + assert 'foo' not in cat + + message = cat.get('Foo', 'Menu') + assert message.string == 'Voh' + assert message.fuzzy is True + assert message.context == 'Menu' + + +def test_catalog_update_fuzzy_matching_with_changed_context(): + cat = catalog.Catalog() + cat.add('foo', 'Voh', context='Menu|File') + cat.add('bar', 'Bahr', context='Menu|File') + tmpl = catalog.Catalog() + tmpl.add('Foo', context='Menu|Edit') + cat.update(tmpl) + assert len(cat.obsolete) == 1 + assert cat.get('Foo', 'Menu|File') is None + + message = cat.get('Foo', 'Menu|Edit') + assert message.string == 'Voh' + assert message.fuzzy is True + assert message.context == 'Menu|Edit' + + +def test_catalog_update_fuzzy_matching_no_cascading(): + cat = catalog.Catalog() + cat.add('fo', 'Voh') + cat.add('foo', 'Vohe') + tmpl = catalog.Catalog() + tmpl.add('fo') + tmpl.add('foo') + tmpl.add('fooo') + cat.update(tmpl) + assert 'fo' in cat + assert 'foo' in cat + + assert cat['fo'].string == 'Voh' + assert cat['fo'].fuzzy is False + assert cat['foo'].string == 'Vohe' + assert cat['foo'].fuzzy is False + assert cat['fooo'].string == 'Vohe' + assert cat['fooo'].fuzzy is True + + +def test_catalog_update_fuzzy_matching_long_string(): + lipsum = "\ Lorem Ipsum is simply dummy text of the printing and typesetting \ industry. Lorem Ipsum has been the industry's standard dummy text ever \ since the 1500s, when an unknown printer took a galley of type and \ @@ -238,113 +251,121 @@ def test_update_fuzzy_matching_long_string(self): the release of Letraset sheets containing Lorem Ipsum passages, and \ more recently with desktop publishing software like Aldus PageMaker \ including versions of Lorem Ipsum." - cat = catalog.Catalog() - cat.add("ZZZZZZ " + lipsum, "foo") - tmpl = catalog.Catalog() - tmpl.add(lipsum + " ZZZZZZ") - cat.update(tmpl) - assert cat[lipsum + " ZZZZZZ"].fuzzy is True - assert len(cat.obsolete) == 0 - - def test_update_without_fuzzy_matching(self): - cat = catalog.Catalog() - cat.add('fo', 'Voh') - cat.add('bar', 'Bahr') - tmpl = catalog.Catalog() - tmpl.add('foo') - cat.update(tmpl, no_fuzzy_matching=True) - assert len(cat.obsolete) == 2 - - def test_fuzzy_matching_regarding_plurals(self): - cat = catalog.Catalog() - cat.add(('foo', 'foh'), ('foo', 'foh')) - ru = copy.copy(cat) - ru.locale = 'ru_RU' - ru.update(cat) - assert ru['foo'].fuzzy is True - ru = copy.copy(cat) - ru.locale = 'ru_RU' - ru['foo'].string = ('foh', 'fohh', 'fohhh') - ru.update(cat) - assert ru['foo'].fuzzy is False - - def test_update_no_template_mutation(self): - tmpl = catalog.Catalog() - tmpl.add('foo') - cat1 = catalog.Catalog() - cat1.add('foo', 'Voh') - cat1.update(tmpl) - cat2 = catalog.Catalog() - cat2.update(tmpl) - - assert cat2['foo'].string is None - assert cat2['foo'].fuzzy is False - - def test_update_po_updates_pot_creation_date(self): - template = catalog.Catalog() - localized_catalog = copy.deepcopy(template) - localized_catalog.locale = 'de_DE' - assert template.mime_headers != localized_catalog.mime_headers - assert template.creation_date == localized_catalog.creation_date - template.creation_date = datetime.datetime.now() - \ - datetime.timedelta(minutes=5) - localized_catalog.update(template) - assert template.creation_date == localized_catalog.creation_date - - def test_update_po_ignores_pot_creation_date(self): - template = catalog.Catalog() - localized_catalog = copy.deepcopy(template) - localized_catalog.locale = 'de_DE' - assert template.mime_headers != localized_catalog.mime_headers - assert template.creation_date == localized_catalog.creation_date - template.creation_date = datetime.datetime.now() - \ - datetime.timedelta(minutes=5) - localized_catalog.update(template, update_creation_date=False) - assert template.creation_date != localized_catalog.creation_date - - def test_update_po_keeps_po_revision_date(self): - template = catalog.Catalog() - localized_catalog = copy.deepcopy(template) - localized_catalog.locale = 'de_DE' - fake_rev_date = datetime.datetime.now() - datetime.timedelta(days=5) - localized_catalog.revision_date = fake_rev_date - assert template.mime_headers != localized_catalog.mime_headers - assert template.creation_date == localized_catalog.creation_date - template.creation_date = datetime.datetime.now() - \ - datetime.timedelta(minutes=5) - localized_catalog.update(template) - assert localized_catalog.revision_date == fake_rev_date - - def test_stores_datetime_correctly(self): - localized = catalog.Catalog() - localized.locale = 'de_DE' - localized[''] = catalog.Message('', - "POT-Creation-Date: 2009-03-09 15:47-0700\n" + - "PO-Revision-Date: 2009-03-09 15:47-0700\n") - for key, value in localized.mime_headers: - if key in ('POT-Creation-Date', 'PO-Revision-Date'): - assert value == '2009-03-09 15:47-0700' - - def test_mime_headers_contain_same_information_as_attributes(self): - cat = catalog.Catalog() - cat[''] = catalog.Message('', - "Last-Translator: Foo Bar \n" + - "Language-Team: de \n" + - "POT-Creation-Date: 2009-03-01 11:20+0200\n" + - "PO-Revision-Date: 2009-03-09 15:47-0700\n") - assert cat.locale is None - mime_headers = dict(cat.mime_headers) - - assert cat.last_translator == 'Foo Bar ' - assert mime_headers['Last-Translator'] == 'Foo Bar ' - - assert cat.language_team == 'de ' - assert mime_headers['Language-Team'] == 'de ' - - dt = datetime.datetime(2009, 3, 9, 15, 47, tzinfo=FixedOffsetTimezone(-7 * 60)) - assert cat.revision_date == dt - formatted_dt = format_datetime(dt, 'yyyy-MM-dd HH:mmZ', locale='en') - assert mime_headers['PO-Revision-Date'] == formatted_dt + cat = catalog.Catalog() + cat.add("ZZZZZZ " + lipsum, "foo") + tmpl = catalog.Catalog() + tmpl.add(lipsum + " ZZZZZZ") + cat.update(tmpl) + assert cat[lipsum + " ZZZZZZ"].fuzzy is True + assert len(cat.obsolete) == 0 + + +def test_catalog_update_without_fuzzy_matching(): + cat = catalog.Catalog() + cat.add('fo', 'Voh') + cat.add('bar', 'Bahr') + tmpl = catalog.Catalog() + tmpl.add('foo') + cat.update(tmpl, no_fuzzy_matching=True) + assert len(cat.obsolete) == 2 + + +def test_catalog_fuzzy_matching_regarding_plurals(): + cat = catalog.Catalog() + cat.add(('foo', 'foh'), ('foo', 'foh')) + ru = copy.copy(cat) + ru.locale = 'ru_RU' + ru.update(cat) + assert ru['foo'].fuzzy is True + ru = copy.copy(cat) + ru.locale = 'ru_RU' + ru['foo'].string = ('foh', 'fohh', 'fohhh') + ru.update(cat) + assert ru['foo'].fuzzy is False + + +def test_catalog_update_no_template_mutation(): + tmpl = catalog.Catalog() + tmpl.add('foo') + cat1 = catalog.Catalog() + cat1.add('foo', 'Voh') + cat1.update(tmpl) + cat2 = catalog.Catalog() + cat2.update(tmpl) + + assert cat2['foo'].string is None + assert cat2['foo'].fuzzy is False + + +def test_catalog_update_po_updates_pot_creation_date(): + template = catalog.Catalog() + localized_catalog = copy.deepcopy(template) + localized_catalog.locale = 'de_DE' + assert template.mime_headers != localized_catalog.mime_headers + assert template.creation_date == localized_catalog.creation_date + template.creation_date = datetime.datetime.now() - \ + datetime.timedelta(minutes=5) + localized_catalog.update(template) + assert template.creation_date == localized_catalog.creation_date + + +def test_catalog_update_po_ignores_pot_creation_date(): + template = catalog.Catalog() + localized_catalog = copy.deepcopy(template) + localized_catalog.locale = 'de_DE' + assert template.mime_headers != localized_catalog.mime_headers + assert template.creation_date == localized_catalog.creation_date + template.creation_date = datetime.datetime.now() - \ + datetime.timedelta(minutes=5) + localized_catalog.update(template, update_creation_date=False) + assert template.creation_date != localized_catalog.creation_date + + +def test_catalog_update_po_keeps_po_revision_date(): + template = catalog.Catalog() + localized_catalog = copy.deepcopy(template) + localized_catalog.locale = 'de_DE' + fake_rev_date = datetime.datetime.now() - datetime.timedelta(days=5) + localized_catalog.revision_date = fake_rev_date + assert template.mime_headers != localized_catalog.mime_headers + assert template.creation_date == localized_catalog.creation_date + template.creation_date = datetime.datetime.now() - \ + datetime.timedelta(minutes=5) + localized_catalog.update(template) + assert localized_catalog.revision_date == fake_rev_date + + +def test_catalog_stores_datetime_correctly(): + localized = catalog.Catalog() + localized.locale = 'de_DE' + localized[''] = catalog.Message('', + "POT-Creation-Date: 2009-03-09 15:47-0700\n" + + "PO-Revision-Date: 2009-03-09 15:47-0700\n") + for key, value in localized.mime_headers: + if key in ('POT-Creation-Date', 'PO-Revision-Date'): + assert value == '2009-03-09 15:47-0700' + + +def test_catalog_mime_headers_contain_same_information_as_attributes(): + cat = catalog.Catalog() + cat[''] = catalog.Message('', + "Last-Translator: Foo Bar \n" + + "Language-Team: de \n" + + "POT-Creation-Date: 2009-03-01 11:20+0200\n" + + "PO-Revision-Date: 2009-03-09 15:47-0700\n") + assert cat.locale is None + mime_headers = dict(cat.mime_headers) + + assert cat.last_translator == 'Foo Bar ' + assert mime_headers['Last-Translator'] == 'Foo Bar ' + + assert cat.language_team == 'de ' + assert mime_headers['Language-Team'] == 'de ' + + dt = datetime.datetime(2009, 3, 9, 15, 47, tzinfo=FixedOffsetTimezone(-7 * 60)) + assert cat.revision_date == dt + formatted_dt = format_datetime(dt, 'yyyy-MM-dd HH:mmZ', locale='en') + assert mime_headers['PO-Revision-Date'] == formatted_dt def test_message_fuzzy(): @@ -359,14 +380,14 @@ def test_message_pluralizable(): assert catalog.Message(('foo', 'bar')).pluralizable -def test_message_python_format(): +def test_message_python_format_2(): assert not catalog.Message('foo').python_format assert not catalog.Message(('foo', 'foo')).python_format assert catalog.Message('foo %(name)s bar').python_format assert catalog.Message(('foo %(name)s', 'foo %(name)s')).python_format -def test_message_python_brace_format(): +def test_message_python_brace_format_2(): assert not catalog.Message('foo').python_brace_format assert not catalog.Message(('foo', 'foo')).python_brace_format assert catalog.Message('foo {name} bar').python_brace_format @@ -524,10 +545,10 @@ def test_catalog_update(): def test_datetime_parsing(): val1 = catalog._parse_datetime_header('2006-06-28 23:24+0200') - assert val1.year == 2006 - assert val1.month == 6 - assert val1.day == 28 - assert val1.tzinfo.zone == 'Etc/GMT+120' + assert val1.timetuple()[:5] == (2006, 6, 28, 23, 24) + assert val1.utctimetuple()[:5] == (2006, 6, 28, 21, 24) + assert val1.tzinfo.tzname(None) == 'Etc/GMT+120' + assert val1 == datetime.datetime(2006, 6, 28, 21, 24, tzinfo=UTC) val2 = catalog._parse_datetime_header('2006-06-28 23:24') assert val2.year == 2006 @@ -562,3 +583,16 @@ def test_update_catalog_comments(): # Auto comments will be obliterated here assert all(message.user_comments for message in catalog if message.id) + + +def test_catalog_tz_pickleable(): + """ + Test that catalogs with timezoned times are pickleable. + This would previously fail with `FixedOffsetTimezone.__init__() missing 1 required positional argument: 'offset'` + when trying to load the pickled data. + """ + pickle.loads(pickle.dumps(pofile.read_po(StringIO(r""" +msgid "" +msgstr "" +"POT-Creation-Date: 2007-04-01 15:30+0200\n" + """)))) diff --git a/tests/messages/test_checkers.py b/tests/messages/test_checkers.py index bba8f145a..8d4b1a77d 100644 --- a/tests/messages/test_checkers.py +++ b/tests/messages/test_checkers.py @@ -10,7 +10,6 @@ # individuals. For the exact contribution history, see the revision # history and logs, available at https://github.com/python-babel/babel/commits/master/. -import unittest from datetime import datetime from io import BytesIO @@ -26,20 +25,19 @@ from babel.messages.pofile import read_po from babel.util import LOCALTZ +# the last msgstr[idx] is always missing except for singular plural forms -class CheckersTestCase(unittest.TestCase): - # the last msgstr[idx] is always missing except for singular plural forms - def test_1_num_plurals_checkers(self): - for _locale in [p for p in PLURALS if PLURALS[p][0] == 1]: - try: - locale = Locale.parse(_locale) - except UnknownLocaleError: - # Just an alias? Not what we're testing here, let's continue - continue - date = format_datetime(datetime.now(LOCALTZ), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale=_locale) - plural = PLURALS[_locale][0] - po_file = (f"""\ +def test_1_num_plurals_checkers(): + for _locale in [p for p in PLURALS if PLURALS[p][0] == 1]: + try: + locale = Locale.parse(_locale) + except UnknownLocaleError: + # Just an alias? Not what we're testing here, let's continue + continue + date = format_datetime(datetime.now(LOCALTZ), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale=_locale) + plural = PLURALS[_locale][0] + po_file = (f"""\ # {locale.english_name} translations for TestProject. # Copyright (C) 2007 FooBar, Inc. # This file is distributed under the same license as the TestProject @@ -73,32 +71,33 @@ def test_1_num_plurals_checkers(self): """).encode('utf-8') - # This test will fail for revisions <= 406 because so far - # catalog.num_plurals was neglected - catalog = read_po(BytesIO(po_file), _locale) - message = catalog['foobar'] - checkers.num_plurals(catalog, message) - - def test_2_num_plurals_checkers(self): - # in this testcase we add an extra msgstr[idx], we should be - # disregarding it - for _locale in [p for p in PLURALS if PLURALS[p][0] == 2]: - if _locale in ['nn', 'no']: - _locale = 'nn_NO' - num_plurals = PLURALS[_locale.split('_')[0]][0] - plural_expr = PLURALS[_locale.split('_')[0]][1] - else: - num_plurals = PLURALS[_locale][0] - plural_expr = PLURALS[_locale][1] - try: - locale = Locale(_locale) - date = format_datetime(datetime.now(LOCALTZ), - 'yyyy-MM-dd HH:mmZ', - tzinfo=LOCALTZ, locale=_locale) - except UnknownLocaleError: - # Just an alias? Not what we're testing here, let's continue - continue - po_file = f"""\ + # This test will fail for revisions <= 406 because so far + # catalog.num_plurals was neglected + catalog = read_po(BytesIO(po_file), _locale) + message = catalog['foobar'] + checkers.num_plurals(catalog, message) + + +def test_2_num_plurals_checkers(): + # in this testcase we add an extra msgstr[idx], we should be + # disregarding it + for _locale in [p for p in PLURALS if PLURALS[p][0] == 2]: + if _locale in ['nn', 'no']: + _locale = 'nn_NO' + num_plurals = PLURALS[_locale.split('_')[0]][0] + plural_expr = PLURALS[_locale.split('_')[0]][1] + else: + num_plurals = PLURALS[_locale][0] + plural_expr = PLURALS[_locale][1] + try: + locale = Locale(_locale) + date = format_datetime(datetime.now(LOCALTZ), + 'yyyy-MM-dd HH:mmZ', + tzinfo=LOCALTZ, locale=_locale) + except UnknownLocaleError: + # Just an alias? Not what we're testing here, let's continue + continue + po_file = f"""\ # {locale.english_name} translations for TestProject. # Copyright (C) 2007 FooBar, Inc. # This file is distributed under the same license as the TestProject @@ -133,19 +132,20 @@ def test_2_num_plurals_checkers(self): msgstr[2] "" """.encode('utf-8') - # we should be adding the missing msgstr[0] - - # This test will fail for revisions <= 406 because so far - # catalog.num_plurals was neglected - catalog = read_po(BytesIO(po_file), _locale) - message = catalog['foobar'] - checkers.num_plurals(catalog, message) - - def test_3_num_plurals_checkers(self): - for _locale in [p for p in PLURALS if PLURALS[p][0] == 3]: - plural = format_datetime(datetime.now(LOCALTZ), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale=_locale) - english_name = Locale.parse(_locale).english_name - po_file = fr"""\ + # we should be adding the missing msgstr[0] + + # This test will fail for revisions <= 406 because so far + # catalog.num_plurals was neglected + catalog = read_po(BytesIO(po_file), _locale) + message = catalog['foobar'] + checkers.num_plurals(catalog, message) + + +def test_3_num_plurals_checkers(): + for _locale in [p for p in PLURALS if PLURALS[p][0] == 3]: + plural = format_datetime(datetime.now(LOCALTZ), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale=_locale) + english_name = Locale.parse(_locale).english_name + po_file = fr"""\ # {english_name} translations for TestProject. # Copyright (C) 2007 FooBar, Inc. # This file is distributed under the same license as the TestProject @@ -180,18 +180,19 @@ def test_3_num_plurals_checkers(self): """.encode('utf-8') - # This test will fail for revisions <= 406 because so far - # catalog.num_plurals was neglected - catalog = read_po(BytesIO(po_file), _locale) - message = catalog['foobar'] - checkers.num_plurals(catalog, message) - - def test_4_num_plurals_checkers(self): - for _locale in [p for p in PLURALS if PLURALS[p][0] == 4]: - date = format_datetime(datetime.now(LOCALTZ), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale=_locale) - english_name = Locale.parse(_locale).english_name - plural = PLURALS[_locale][0] - po_file = fr"""\ + # This test will fail for revisions <= 406 because so far + # catalog.num_plurals was neglected + catalog = read_po(BytesIO(po_file), _locale) + message = catalog['foobar'] + checkers.num_plurals(catalog, message) + + +def test_4_num_plurals_checkers(): + for _locale in [p for p in PLURALS if PLURALS[p][0] == 4]: + date = format_datetime(datetime.now(LOCALTZ), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale=_locale) + english_name = Locale.parse(_locale).english_name + plural = PLURALS[_locale][0] + po_file = fr"""\ # {english_name} translations for TestProject. # Copyright (C) 2007 FooBar, Inc. # This file is distributed under the same license as the TestProject @@ -227,18 +228,19 @@ def test_4_num_plurals_checkers(self): """.encode('utf-8') - # This test will fail for revisions <= 406 because so far - # catalog.num_plurals was neglected - catalog = read_po(BytesIO(po_file), _locale) - message = catalog['foobar'] - checkers.num_plurals(catalog, message) - - def test_5_num_plurals_checkers(self): - for _locale in [p for p in PLURALS if PLURALS[p][0] == 5]: - date = format_datetime(datetime.now(LOCALTZ), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale=_locale) - english_name = Locale.parse(_locale).english_name - plural = PLURALS[_locale][0] - po_file = fr"""\ + # This test will fail for revisions <= 406 because so far + # catalog.num_plurals was neglected + catalog = read_po(BytesIO(po_file), _locale) + message = catalog['foobar'] + checkers.num_plurals(catalog, message) + + +def test_5_num_plurals_checkers(): + for _locale in [p for p in PLURALS if PLURALS[p][0] == 5]: + date = format_datetime(datetime.now(LOCALTZ), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale=_locale) + english_name = Locale.parse(_locale).english_name + plural = PLURALS[_locale][0] + po_file = fr"""\ # {english_name} translations for TestProject. # Copyright (C) 2007 FooBar, Inc. # This file is distributed under the same license as the TestProject @@ -275,18 +277,19 @@ def test_5_num_plurals_checkers(self): """.encode('utf-8') - # This test will fail for revisions <= 406 because so far - # catalog.num_plurals was neglected - catalog = read_po(BytesIO(po_file), _locale) - message = catalog['foobar'] - checkers.num_plurals(catalog, message) - - def test_6_num_plurals_checkers(self): - for _locale in [p for p in PLURALS if PLURALS[p][0] == 6]: - english_name = Locale.parse(_locale).english_name - date = format_datetime(datetime.now(LOCALTZ), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale=_locale) - plural = PLURALS[_locale][0] - po_file = fr"""\ + # This test will fail for revisions <= 406 because so far + # catalog.num_plurals was neglected + catalog = read_po(BytesIO(po_file), _locale) + message = catalog['foobar'] + checkers.num_plurals(catalog, message) + + +def test_6_num_plurals_checkers(): + for _locale in [p for p in PLURALS if PLURALS[p][0] == 6]: + english_name = Locale.parse(_locale).english_name + date = format_datetime(datetime.now(LOCALTZ), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale=_locale) + plural = PLURALS[_locale][0] + po_file = fr"""\ # {english_name} translations for TestProject. # Copyright (C) 2007 FooBar, Inc. # This file is distributed under the same license as the TestProject @@ -324,64 +327,72 @@ def test_6_num_plurals_checkers(self): """.encode('utf-8') - # This test will fail for revisions <= 406 because so far - # catalog.num_plurals was neglected - catalog = read_po(BytesIO(po_file), _locale) - message = catalog['foobar'] - checkers.num_plurals(catalog, message) - - -class TestPythonFormat: - @pytest.mark.parametrize(('msgid', 'msgstr'), [ - ('foo %s', 'foo'), - (('foo %s', 'bar'), ('foo', 'bar')), - (('foo', 'bar %s'), ('foo', 'bar')), - (('foo %s', 'bar'), ('foo')), - ]) - def test_python_format_invalid(self, msgid, msgstr): - msg = Message(msgid, msgstr) - with pytest.raises(TranslationError): - python_format(None, msg) - - @pytest.mark.parametrize(('msgid', 'msgstr'), [ - ('foo', 'foo'), - ('foo', 'foo %s'), - (('foo %s', 'bar %d'), ('foo %s', 'bar %d')), - (('foo %s', 'bar %d'), ('foo %s', 'bar %d', 'baz')), - (('foo', 'bar %s'), ('foo')), - ]) - def test_python_format_valid(self, msgid, msgstr): - msg = Message(msgid, msgstr) + # This test will fail for revisions <= 406 because so far + # catalog.num_plurals was neglected + catalog = read_po(BytesIO(po_file), _locale) + message = catalog['foobar'] + checkers.num_plurals(catalog, message) + + +@pytest.mark.parametrize(('msgid', 'msgstr'), [ + ('foo %s', 'foo'), + (('foo %s', 'bar'), ('foo', 'bar')), + (('foo', 'bar %s'), ('foo', 'bar')), + (('foo %s', 'bar'), ('foo')), + (('foo %s', 'bar %d'), ('foo %s', 'bar %d', 'baz')), + (('foo %s', 'bar %d'), ('foo %s', 'bar %d', 'baz %d', 'qux')), +]) +def test_python_format_invalid(msgid, msgstr): + msg = Message(msgid, msgstr) + with pytest.raises(TranslationError): python_format(None, msg) - @pytest.mark.parametrize(('msgid', 'msgstr', 'error'), [ - ('%s %(foo)s', '%s %(foo)s', 'format string mixes positional and named placeholders'), - ('foo %s', 'foo', 'placeholders are incompatible'), - ('%s', '%(foo)s', 'the format strings are of different kinds'), - ('%s', '%s %d', 'positional format placeholders are unbalanced'), - ('%s', '%d', "incompatible format for placeholder 1: 's' and 'd' are not compatible"), - ('%s %s %d', '%s %s %s', "incompatible format for placeholder 3: 'd' and 's' are not compatible"), - ('%(foo)s', '%(bar)s', "unknown named placeholder 'bar'"), - ('%(foo)s', '%(bar)d', "unknown named placeholder 'bar'"), - ('%(foo)s', '%(foo)d', "incompatible format for placeholder 'foo': 'd' and 's' are not compatible"), - ]) - def test__validate_format_invalid(self, msgid, msgstr, error): - with pytest.raises(TranslationError, match=error): - _validate_format(msgid, msgstr) - - @pytest.mark.parametrize(('msgid', 'msgstr'), [ - ('foo', 'foo'), - ('foo', 'foo %s'), - ('%s foo', 'foo %s'), - ('%i', '%d'), - ('%d', '%u'), - ('%x', '%X'), - ('%f', '%F'), - ('%F', '%g'), - ('%g', '%G'), - ('%(foo)s', 'foo'), - ('%(foo)s', '%(foo)s %(foo)s'), - ('%(bar)s foo %(n)d', '%(n)d foo %(bar)s'), - ]) - def test__validate_format_valid(self, msgid, msgstr): + +@pytest.mark.parametrize(('msgid', 'msgstr'), [ + ('foo', 'foo'), + ('foo', 'foo %s'), + ('foo %s', ''), + (('foo %s', 'bar %d'), ('foo %s', 'bar %d')), + (('foo %s', 'bar %d'), ('foo %s', 'bar %d', 'baz %d')), + (('foo', 'bar %s'), ('foo')), + (('foo', 'bar %s'), ('', '')), + (('foo', 'bar %s'), ('foo', '')), + (('foo %s', 'bar %d'), ('foo %s', '')), +]) +def test_python_format_valid(msgid, msgstr): + msg = Message(msgid, msgstr) + python_format(None, msg) + + +@pytest.mark.parametrize(('msgid', 'msgstr', 'error'), [ + ('%s %(foo)s', '%s %(foo)s', 'format string mixes positional and named placeholders'), + ('foo %s', 'foo', 'placeholders are incompatible'), + ('%s', '%(foo)s', 'the format strings are of different kinds'), + ('%s', '%s %d', 'positional format placeholders are unbalanced'), + ('%s', '%d', "incompatible format for placeholder 1: 's' and 'd' are not compatible"), + ('%s %s %d', '%s %s %s', "incompatible format for placeholder 3: 'd' and 's' are not compatible"), + ('%(foo)s', '%(bar)s', "unknown named placeholder 'bar'"), + ('%(foo)s', '%(bar)d', "unknown named placeholder 'bar'"), + ('%(foo)s', '%(foo)d', "incompatible format for placeholder 'foo': 'd' and 's' are not compatible"), +]) +def test__validate_format_invalid(msgid, msgstr, error): + with pytest.raises(TranslationError, match=error): _validate_format(msgid, msgstr) + + +@pytest.mark.parametrize(('msgid', 'msgstr'), [ + ('foo', 'foo'), + ('foo', 'foo %s'), + ('%s foo', 'foo %s'), + ('%i', '%d'), + ('%d', '%u'), + ('%x', '%X'), + ('%f', '%F'), + ('%F', '%g'), + ('%g', '%G'), + ('%(foo)s', 'foo'), + ('%(foo)s', '%(foo)s %(foo)s'), + ('%(bar)s foo %(n)d', '%(n)d foo %(bar)s'), +]) +def test__validate_format_valid(msgid, msgstr): + _validate_format(msgid, msgstr) diff --git a/tests/messages/test_extract.py b/tests/messages/test_extract.py index d5ac3b2ca..41eda8903 100644 --- a/tests/messages/test_extract.py +++ b/tests/messages/test_extract.py @@ -10,9 +10,7 @@ # individuals. For the exact contribution history, see the revision # history and logs, available at https://github.com/python-babel/babel/commits/master/. -import codecs import sys -import unittest from io import BytesIO, StringIO import pytest @@ -20,10 +18,8 @@ from babel.messages import extract -class ExtractPythonTestCase(unittest.TestCase): - - def test_nested_calls(self): - buf = BytesIO(b"""\ +def test_invalid_filter(): + buf = BytesIO(b"""\ msg1 = _(i18n_arg.replace(r'\"', '"')) msg2 = ungettext(i18n_arg.replace(r'\"', '"'), multi_arg.replace(r'\"', '"'), 2) msg3 = ungettext("Babel", multi_arg.replace(r'\"', '"'), 2) @@ -33,304 +29,26 @@ def test_nested_calls(self): msg7 = _(hello.there) msg8 = gettext('Rabbit') msg9 = dgettext('wiki', model.addPage()) -msg10 = dngettext(getDomain(), 'Page', 'Pages', 3) -msg11 = ngettext( - "bunny", - "bunnies", - len(bunnies) -) -""") - messages = list(extract.extract_python(buf, - extract.DEFAULT_KEYWORDS.keys(), - [], {})) - assert messages == [ - (1, '_', None, []), - (2, 'ungettext', (None, None, None), []), - (3, 'ungettext', ('Babel', None, None), []), - (4, 'ungettext', (None, 'Babels', None), []), - (5, 'ungettext', ('bunny', 'bunnies', None), []), - (6, 'ungettext', (None, 'bunnies', None), []), - (7, '_', None, []), - (8, 'gettext', 'Rabbit', []), - (9, 'dgettext', ('wiki', None), []), - (10, 'dngettext', (None, 'Page', 'Pages', None), []), - (12, 'ngettext', ('bunny', 'bunnies', None), []), - ] - - def test_extract_default_encoding_ascii(self): - buf = BytesIO(b'_("a")') - messages = list(extract.extract_python( - buf, list(extract.DEFAULT_KEYWORDS), [], {}, - )) - # Should work great in both py2 and py3 - assert messages == [(1, '_', 'a', [])] - - def test_extract_default_encoding_utf8(self): - buf = BytesIO('_("☃")'.encode('UTF-8')) - messages = list(extract.extract_python( - buf, list(extract.DEFAULT_KEYWORDS), [], {}, - )) - assert messages == [(1, '_', '☃', [])] - - def test_nested_comments(self): - buf = BytesIO(b"""\ -msg = ngettext('pylon', # TRANSLATORS: shouldn't be - 'pylons', # TRANSLATORS: seeing this - count) -""") - messages = list(extract.extract_python(buf, ('ngettext',), - ['TRANSLATORS:'], {})) - assert messages == [(1, 'ngettext', ('pylon', 'pylons', None), [])] - - def test_comments_with_calls_that_spawn_multiple_lines(self): - buf = BytesIO(b"""\ -# NOTE: This Comment SHOULD Be Extracted -add_notice(req, ngettext("Catalog deleted.", - "Catalogs deleted.", len(selected))) - -# NOTE: This Comment SHOULD Be Extracted -add_notice(req, _("Locale deleted.")) - - -# NOTE: This Comment SHOULD Be Extracted -add_notice(req, ngettext("Foo deleted.", "Foos deleted.", len(selected))) - -# NOTE: This Comment SHOULD Be Extracted -# NOTE: And This One Too -add_notice(req, ngettext("Bar deleted.", - "Bars deleted.", len(selected))) -""") - messages = list(extract.extract_python(buf, ('ngettext', '_'), ['NOTE:'], - - {'strip_comment_tags': False})) - assert messages[0] == (2, 'ngettext', ('Catalog deleted.', 'Catalogs deleted.', None), ['NOTE: This Comment SHOULD Be Extracted']) - assert messages[1] == (6, '_', 'Locale deleted.', ['NOTE: This Comment SHOULD Be Extracted']) - assert messages[2] == (10, 'ngettext', ('Foo deleted.', 'Foos deleted.', None), ['NOTE: This Comment SHOULD Be Extracted']) - assert messages[3] == (14, 'ngettext', ('Bar deleted.', 'Bars deleted.', None), ['NOTE: This Comment SHOULD Be Extracted', 'NOTE: And This One Too']) - - def test_declarations(self): - buf = BytesIO(b"""\ -class gettext(object): - pass -def render_body(context,x,y=_('Page arg 1'),z=_('Page arg 2'),**pageargs): - pass -def ngettext(y='arg 1',z='arg 2',**pageargs): - pass -class Meta: - verbose_name = _('log entry') -""") - messages = list(extract.extract_python(buf, - extract.DEFAULT_KEYWORDS.keys(), - [], {})) - assert messages == [ - (3, '_', 'Page arg 1', []), - (3, '_', 'Page arg 2', []), - (8, '_', 'log entry', []), - ] - - def test_multiline(self): - buf = BytesIO(b"""\ -msg1 = ngettext('pylon', - 'pylons', count) -msg2 = ngettext('elvis', - 'elvises', - count) -""") - messages = list(extract.extract_python(buf, ('ngettext',), [], {})) - assert messages == [ - (1, 'ngettext', ('pylon', 'pylons', None), []), - (3, 'ngettext', ('elvis', 'elvises', None), []), - ] - - def test_npgettext(self): - buf = BytesIO(b"""\ -msg1 = npgettext('Strings','pylon', - 'pylons', count) -msg2 = npgettext('Strings','elvis', - 'elvises', - count) -""") - messages = list(extract.extract_python(buf, ('npgettext',), [], {})) - assert messages == [ - (1, 'npgettext', ('Strings', 'pylon', 'pylons', None), []), - (3, 'npgettext', ('Strings', 'elvis', 'elvises', None), []), - ] - buf = BytesIO(b"""\ -msg = npgettext('Strings', 'pylon', # TRANSLATORS: shouldn't be - 'pylons', # TRANSLATORS: seeing this - count) -""") - messages = list(extract.extract_python(buf, ('npgettext',), - ['TRANSLATORS:'], {})) - assert messages == [ - (1, 'npgettext', ('Strings', 'pylon', 'pylons', None), []), - ] - - def test_triple_quoted_strings(self): - buf = BytesIO(b"""\ -msg1 = _('''pylons''') -msg2 = ngettext(r'''elvis''', \"\"\"elvises\"\"\", count) -msg2 = ngettext(\"\"\"elvis\"\"\", 'elvises', count) -""") - messages = list(extract.extract_python(buf, - extract.DEFAULT_KEYWORDS.keys(), - [], {})) - assert messages == [ - (1, '_', 'pylons', []), - (2, 'ngettext', ('elvis', 'elvises', None), []), - (3, 'ngettext', ('elvis', 'elvises', None), []), - ] - - def test_multiline_strings(self): - buf = BytesIO(b"""\ -_('''This module provides internationalization and localization -support for your Python programs by providing an interface to the GNU -gettext message catalog library.''') -""") - messages = list(extract.extract_python(buf, - extract.DEFAULT_KEYWORDS.keys(), - [], {})) - assert messages == [ - (1, '_', - 'This module provides internationalization and localization\n' - 'support for your Python programs by providing an interface to ' - 'the GNU\ngettext message catalog library.', []), - ] - - def test_concatenated_strings(self): - buf = BytesIO(b"""\ -foobar = _('foo' 'bar') -""") - messages = list(extract.extract_python(buf, - extract.DEFAULT_KEYWORDS.keys(), - [], {})) - assert messages[0][2] == 'foobar' - - def test_unicode_string_arg(self): - buf = BytesIO(b"msg = _(u'Foo Bar')") - messages = list(extract.extract_python(buf, ('_',), [], {})) - assert messages[0][2] == 'Foo Bar' - - def test_comment_tag(self): - buf = BytesIO(b""" -# NOTE: A translation comment -msg = _(u'Foo Bar') -""") - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - assert messages[0][2] == 'Foo Bar' - assert messages[0][3] == ['NOTE: A translation comment'] - - def test_comment_tag_multiline(self): - buf = BytesIO(b""" -# NOTE: A translation comment -# with a second line -msg = _(u'Foo Bar') -""") - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - assert messages[0][2] == 'Foo Bar' - assert messages[0][3] == ['NOTE: A translation comment', 'with a second line'] - - def test_translator_comments_with_previous_non_translator_comments(self): - buf = BytesIO(b""" -# This shouldn't be in the output -# because it didn't start with a comment tag -# NOTE: A translation comment -# with a second line -msg = _(u'Foo Bar') -""") - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - assert messages[0][2] == 'Foo Bar' - assert messages[0][3] == ['NOTE: A translation comment', 'with a second line'] - - def test_comment_tags_not_on_start_of_comment(self): - buf = BytesIO(b""" -# This shouldn't be in the output -# because it didn't start with a comment tag -# do NOTE: this will not be a translation comment -# NOTE: This one will be -msg = _(u'Foo Bar') -""") - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - assert messages[0][2] == 'Foo Bar' - assert messages[0][3] == ['NOTE: This one will be'] - - def test_multiple_comment_tags(self): - buf = BytesIO(b""" -# NOTE1: A translation comment for tag1 -# with a second line -msg = _(u'Foo Bar1') - -# NOTE2: A translation comment for tag2 -msg = _(u'Foo Bar2') -""") - messages = list(extract.extract_python(buf, ('_',), - ['NOTE1:', 'NOTE2:'], {})) - assert messages[0][2] == 'Foo Bar1' - assert messages[0][3] == ['NOTE1: A translation comment for tag1', 'with a second line'] - assert messages[1][2] == 'Foo Bar2' - assert messages[1][3] == ['NOTE2: A translation comment for tag2'] - - def test_two_succeeding_comments(self): - buf = BytesIO(b""" -# NOTE: one -# NOTE: two -msg = _(u'Foo Bar') -""") - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - assert messages[0][2] == 'Foo Bar' - assert messages[0][3] == ['NOTE: one', 'NOTE: two'] - - def test_invalid_translator_comments(self): - buf = BytesIO(b""" -# NOTE: this shouldn't apply to any messages -hello = 'there' - -msg = _(u'Foo Bar') +msg10 = dngettext(domain, 'Page', 'Pages', 3) """) - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - assert messages[0][2] == 'Foo Bar' - assert messages[0][3] == [] + messages = \ + list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], + {})) + assert messages == [ + (5, ('bunny', 'bunnies'), [], None), + (8, 'Rabbit', [], None), + (10, ('Page', 'Pages'), [], None), + ] - def test_invalid_translator_comments2(self): - buf = BytesIO(b""" -# NOTE: Hi! -hithere = _('Hi there!') -# NOTE: you should not be seeing this in the .po -rows = [[v for v in range(0,10)] for row in range(0,10)] +def test_invalid_extract_method(): + buf = BytesIO(b'') + with pytest.raises(ValueError): + list(extract.extract('spam', buf)) -# this (NOTE:) should not show up either -hello = _('Hello') -""") - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - assert messages[0][2] == 'Hi there!' - assert messages[0][3] == ['NOTE: Hi!'] - assert messages[1][2] == 'Hello' - assert messages[1][3] == [] - - def test_invalid_translator_comments3(self): - buf = BytesIO(b""" -# NOTE: Hi, - -# there! -hithere = _('Hi there!') -""") - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - assert messages[0][2] == 'Hi there!' - assert messages[0][3] == [] - - def test_comment_tag_with_leading_space(self): - buf = BytesIO(b""" - #: A translation comment - #: with leading spaces -msg = _(u'Foo Bar') -""") - messages = list(extract.extract_python(buf, ('_',), [':'], {})) - assert messages[0][2] == 'Foo Bar' - assert messages[0][3] == [': A translation comment', ': with leading spaces'] - def test_different_signatures(self): - buf = BytesIO(b""" +def test_different_signatures(): + buf = BytesIO(b""" foo = _('foo', 'bar') n = ngettext('hello', 'there', n=3) n = ngettext(n=3, 'hello', 'there') @@ -338,226 +56,127 @@ def test_different_signatures(self): n = ngettext() n = ngettext('foo') """) - messages = list(extract.extract_python(buf, ('_', 'ngettext'), [], {})) - assert messages[0][2] == ('foo', 'bar') - assert messages[1][2] == ('hello', 'there', None) - assert messages[2][2] == (None, 'hello', 'there') - assert messages[3][2] == (None, None) - assert messages[4][2] is None - assert messages[5][2] == 'foo' - - def test_utf8_message(self): - buf = BytesIO(""" -# NOTE: hello -msg = _('Bonjour à tous') -""".encode('utf-8')) - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], - {'encoding': 'utf-8'})) - assert messages[0][2] == 'Bonjour à tous' - assert messages[0][3] == ['NOTE: hello'] - - def test_utf8_message_with_magic_comment(self): - buf = BytesIO("""# -*- coding: utf-8 -*- -# NOTE: hello -msg = _('Bonjour à tous') -""".encode('utf-8')) - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - assert messages[0][2] == 'Bonjour à tous' - assert messages[0][3] == ['NOTE: hello'] - - def test_utf8_message_with_utf8_bom(self): - buf = BytesIO(codecs.BOM_UTF8 + """ -# NOTE: hello -msg = _('Bonjour à tous') -""".encode('utf-8')) - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - assert messages[0][2] == 'Bonjour à tous' - assert messages[0][3] == ['NOTE: hello'] - - def test_utf8_message_with_utf8_bom_and_magic_comment(self): - buf = BytesIO(codecs.BOM_UTF8 + """# -*- coding: utf-8 -*- -# NOTE: hello -msg = _('Bonjour à tous') -""".encode('utf-8')) - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - assert messages[0][2] == 'Bonjour à tous' - assert messages[0][3] == ['NOTE: hello'] - - def test_utf8_bom_with_latin_magic_comment_fails(self): - buf = BytesIO(codecs.BOM_UTF8 + """# -*- coding: latin-1 -*- -# NOTE: hello -msg = _('Bonjour à tous') -""".encode('utf-8')) - with pytest.raises(SyntaxError): - list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - - def test_utf8_raw_strings_match_unicode_strings(self): - buf = BytesIO(codecs.BOM_UTF8 + """ -msg = _('Bonjour à tous') -msgu = _(u'Bonjour à tous') -""".encode('utf-8')) - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - assert messages[0][2] == 'Bonjour à tous' - assert messages[0][2] == messages[1][2] - - def test_extract_strip_comment_tags(self): - buf = BytesIO(b"""\ -#: This is a comment with a very simple -#: prefix specified -_('Servus') - -# NOTE: This is a multiline comment with -# a prefix too -_('Babatschi')""") - messages = list(extract.extract('python', buf, comment_tags=['NOTE:', ':'], - strip_comment_tags=True)) - assert messages[0][1] == 'Servus' - assert messages[0][2] == ['This is a comment with a very simple', 'prefix specified'] - assert messages[1][1] == 'Babatschi' - assert messages[1][2] == ['This is a multiline comment with', 'a prefix too'] - - def test_nested_messages(self): - buf = BytesIO(b""" -# NOTE: First -_(u'Hello, {name}!', name=_(u'Foo Bar')) - -# NOTE: Second -_(u'Hello, {name1} and {name2}!', name1=_(u'Heungsub'), - name2=_(u'Armin')) - -# NOTE: Third -_(u'Hello, {0} and {1}!', _(u'Heungsub'), - _(u'Armin')) -""") - messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) - assert messages[0][2] == ('Hello, {name}!', None) - assert messages[0][3] == ['NOTE: First'] - assert messages[1][2] == 'Foo Bar' - assert messages[1][3] == [] - assert messages[2][2] == ('Hello, {name1} and {name2}!', None) - assert messages[2][3] == ['NOTE: Second'] - assert messages[3][2] == 'Heungsub' - assert messages[3][3] == [] - assert messages[4][2] == 'Armin' - assert messages[4][3] == [] - assert messages[5][2] == ('Hello, {0} and {1}!', None) - assert messages[5][3] == ['NOTE: Third'] - assert messages[6][2] == 'Heungsub' - assert messages[6][3] == [] - assert messages[7][2] == 'Armin' - assert messages[7][3] == [] - - -class ExtractTestCase(unittest.TestCase): - - def test_invalid_filter(self): - buf = BytesIO(b"""\ -msg1 = _(i18n_arg.replace(r'\"', '"')) -msg2 = ungettext(i18n_arg.replace(r'\"', '"'), multi_arg.replace(r'\"', '"'), 2) -msg3 = ungettext("Babel", multi_arg.replace(r'\"', '"'), 2) -msg4 = ungettext(i18n_arg.replace(r'\"', '"'), "Babels", 2) -msg5 = ungettext('bunny', 'bunnies', random.randint(1, 2)) -msg6 = ungettext(arg0, 'bunnies', random.randint(1, 2)) -msg7 = _(hello.there) -msg8 = gettext('Rabbit') -msg9 = dgettext('wiki', model.addPage()) -msg10 = dngettext(domain, 'Page', 'Pages', 3) -""") - messages = \ - list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], - {})) - assert messages == [ - (5, ('bunny', 'bunnies'), [], None), - (8, 'Rabbit', [], None), - (10, ('Page', 'Pages'), [], None), - ] - - def test_invalid_extract_method(self): - buf = BytesIO(b'') - with pytest.raises(ValueError): - list(extract.extract('spam', buf)) - - def test_different_signatures(self): - buf = BytesIO(b""" -foo = _('foo', 'bar') -n = ngettext('hello', 'there', n=3) -n = ngettext(n=3, 'hello', 'there') -n = ngettext(n=3, *messages) -n = ngettext() -n = ngettext('foo') -""") - messages = \ - list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], - {})) - assert len(messages) == 2 - assert messages[0][1] == 'foo' - assert messages[1][1] == ('hello', 'there') - - def test_empty_string_msgid(self): - buf = BytesIO(b"""\ + messages = \ + list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], + {})) + assert len(messages) == 2 + assert messages[0][1] == 'foo' + assert messages[1][1] == ('hello', 'there') + + +def test_empty_string_msgid(): + buf = BytesIO(b"""\ msg = _('') """) - stderr = sys.stderr - sys.stderr = StringIO() - try: - messages = \ - list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, - [], {})) - assert messages == [] - assert 'warning: Empty msgid.' in sys.stderr.getvalue() - finally: - sys.stderr = stderr - - def test_warn_if_empty_string_msgid_found_in_context_aware_extraction_method(self): - buf = BytesIO(b"\nmsg = pgettext('ctxt', '')\n") - stderr = sys.stderr - sys.stderr = StringIO() - try: - messages = extract.extract('python', buf) - assert list(messages) == [] - assert 'warning: Empty msgid.' in sys.stderr.getvalue() - finally: - sys.stderr = stderr - - def test_extract_allows_callable(self): - def arbitrary_extractor(fileobj, keywords, comment_tags, options): - return [(1, None, (), ())] - for x in extract.extract(arbitrary_extractor, BytesIO(b"")): - assert x[0] == 1 - - def test_future(self): - buf = BytesIO(br""" + stderr = sys.stderr + sys.stderr = StringIO() + try: + messages = \ + list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, + [], {})) + assert messages == [] + assert 'warning: Empty msgid.' in sys.stderr.getvalue() + finally: + sys.stderr = stderr + + +def test_warn_if_empty_string_msgid_found_in_context_aware_extraction_method(): + buf = BytesIO(b"\nmsg = pgettext('ctxt', '')\n") + stderr = sys.stderr + sys.stderr = StringIO() + try: + messages = extract.extract('python', buf) + assert list(messages) == [] + assert 'warning: Empty msgid.' in sys.stderr.getvalue() + finally: + sys.stderr = stderr + + +def test_extract_allows_callable(): + def arbitrary_extractor(fileobj, keywords, comment_tags, options): + return [(1, None, (), ())] + for x in extract.extract(arbitrary_extractor, BytesIO(b"")): + assert x[0] == 1 + + +def test_future(): + buf = BytesIO(br""" # -*- coding: utf-8 -*- from __future__ import unicode_literals nbsp = _('\xa0') """) - messages = list(extract.extract('python', buf, - extract.DEFAULT_KEYWORDS, [], {})) - assert messages[0][1] == '\xa0' + messages = list(extract.extract('python', buf, + extract.DEFAULT_KEYWORDS, [], {})) + assert messages[0][1] == '\xa0' + - def test_f_strings(self): - buf = BytesIO(br""" +def test_f_strings(): + buf = BytesIO(br""" t1 = _('foobar') t2 = _(f'spameggs' f'feast') # should be extracted; constant parts only t2 = _(f'spameggs' 'kerroshampurilainen') # should be extracted (mixing f with no f) t3 = _(f'''whoa! a ''' # should be extracted (continues on following lines) f'flying shark' - '... hello' +'... hello' ) t4 = _(f'spameggs {t1}') # should not be extracted """) - messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) - assert len(messages) == 4 - assert messages[0][1] == 'foobar' - assert messages[1][1] == 'spameggsfeast' - assert messages[2][1] == 'spameggskerroshampurilainen' - assert messages[3][1] == 'whoa! a flying shark... hello' - - def test_f_strings_non_utf8(self): - buf = BytesIO(b""" + messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) + assert len(messages) == 4 + assert messages[0][1] == 'foobar' + assert messages[1][1] == 'spameggsfeast' + assert messages[2][1] == 'spameggskerroshampurilainen' + assert messages[3][1] == 'whoa! a flying shark... hello' + + +def test_f_strings_non_utf8(): + buf = BytesIO(b""" # -- coding: latin-1 -- t2 = _(f'\xe5\xe4\xf6' f'\xc5\xc4\xd6') """) - messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) - assert len(messages) == 1 - assert messages[0][1] == 'åäöÅÄÖ' + messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) + assert len(messages) == 1 + assert messages[0][1] == 'åäöÅÄÖ' + + +def test_issue_1195(): + buf = BytesIO(b""" +foof = { + 'test_string': StringWithMeta( + # NOTE: Text describing a test string + string=_( + 'Text string that is on a new line' + ), + ), +} +""") + messages = list(extract.extract('python', buf, {'_': None}, ["NOTE"], {})) + message = messages[0] + assert message[0] in (5, 6) # Depends on whether #1126 is in + assert message[1] == 'Text string that is on a new line' + assert message[2] == ['NOTE: Text describing a test string'] + + +def test_issue_1195_2(): + buf = BytesIO(b""" +# NOTE: This should still be considered, even if +# the text is far away +foof = _( + + + + + + + + + + 'Hey! Down here!') +""") + messages = list(extract.extract('python', buf, {'_': None}, ["NOTE"], {})) + message = messages[0] + assert message[1] == 'Hey! Down here!' + assert message[2] == [ + 'NOTE: This should still be considered, even if', + 'the text is far away', + ] diff --git a/tests/messages/test_extract_python.py b/tests/messages/test_extract_python.py new file mode 100644 index 000000000..86f15b1c6 --- /dev/null +++ b/tests/messages/test_extract_python.py @@ -0,0 +1,519 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +import codecs +from io import BytesIO + +import pytest + +from babel.messages import extract + + +def test_nested_calls(): + buf = BytesIO(b"""\ +msg1 = _(i18n_arg.replace(r'\"', '"')) +msg2 = ungettext(i18n_arg.replace(r'\"', '"'), multi_arg.replace(r'\"', '"'), 2) +msg3 = ungettext("Babel", multi_arg.replace(r'\"', '"'), 2) +msg4 = ungettext(i18n_arg.replace(r'\"', '"'), "Babels", 2) +msg5 = ungettext('bunny', 'bunnies', random.randint(1, 2)) +msg6 = ungettext(arg0, 'bunnies', random.randint(1, 2)) +msg7 = _(hello.there) +msg8 = gettext('Rabbit') +msg9 = dgettext('wiki', model.addPage()) +msg10 = dngettext(getDomain(), 'Page', 'Pages', 3) +msg11 = ngettext( +"bunny", +"bunnies", +len(bunnies) +) +""") + messages = list(extract.extract_python(buf, + extract.DEFAULT_KEYWORDS.keys(), + [], {})) + assert messages == [ + (1, '_', None, []), + (2, 'ungettext', (None, None, None), []), + (3, 'ungettext', ('Babel', None, None), []), + (4, 'ungettext', (None, 'Babels', None), []), + (5, 'ungettext', ('bunny', 'bunnies', None), []), + (6, 'ungettext', (None, 'bunnies', None), []), + (7, '_', None, []), + (8, 'gettext', 'Rabbit', []), + (9, 'dgettext', ('wiki', None), []), + (10, 'dngettext', (None, 'Page', 'Pages', None), []), + (12, 'ngettext', ('bunny', 'bunnies', None), []), + ] + + +def test_extract_default_encoding_ascii(): + buf = BytesIO(b'_("a")') + messages = list(extract.extract_python( + buf, list(extract.DEFAULT_KEYWORDS), [], {}, + )) + # Should work great in both py2 and py3 + assert messages == [(1, '_', 'a', [])] + + +def test_extract_default_encoding_utf8(): + buf = BytesIO('_("☃")'.encode('UTF-8')) + messages = list(extract.extract_python( + buf, list(extract.DEFAULT_KEYWORDS), [], {}, + )) + assert messages == [(1, '_', '☃', [])] + + +def test_nested_comments(): + buf = BytesIO(b"""\ +msg = ngettext('pylon', # TRANSLATORS: shouldn't be + 'pylons', # TRANSLATORS: seeing this + count) +""") + messages = list(extract.extract_python(buf, ('ngettext',), + ['TRANSLATORS:'], {})) + assert messages == [(1, 'ngettext', ('pylon', 'pylons', None), [])] + + +def test_comments_with_calls_that_spawn_multiple_lines(): + buf = BytesIO(b"""\ +# NOTE: This Comment SHOULD Be Extracted +add_notice(req, ngettext("Catalog deleted.", + "Catalogs deleted.", len(selected))) + +# NOTE: This Comment SHOULD Be Extracted +add_notice(req, _("Locale deleted.")) + + +# NOTE: This Comment SHOULD Be Extracted +add_notice(req, ngettext("Foo deleted.", "Foos deleted.", len(selected))) + +# NOTE: This Comment SHOULD Be Extracted +# NOTE: And This One Too +add_notice(req, ngettext("Bar deleted.", + "Bars deleted.", len(selected))) +""") + messages = list(extract.extract_python(buf, ('ngettext', '_'), ['NOTE:'], + + {'strip_comment_tags': False})) + assert messages[0] == (2, 'ngettext', ('Catalog deleted.', 'Catalogs deleted.', None), ['NOTE: This Comment SHOULD Be Extracted']) + assert messages[1] == (6, '_', 'Locale deleted.', ['NOTE: This Comment SHOULD Be Extracted']) + assert messages[2] == (10, 'ngettext', ('Foo deleted.', 'Foos deleted.', None), ['NOTE: This Comment SHOULD Be Extracted']) + assert messages[3] == (14, 'ngettext', ('Bar deleted.', 'Bars deleted.', None), ['NOTE: This Comment SHOULD Be Extracted', 'NOTE: And This One Too']) + + +def test_declarations(): + buf = BytesIO(b"""\ +class gettext(object): +pass +def render_body(context,x,y=_('Page arg 1'),z=_('Page arg 2'),**pageargs): +pass +def ngettext(y='arg 1',z='arg 2',**pageargs): +pass +class Meta: +verbose_name = _('log entry') +""") + messages = list(extract.extract_python(buf, + extract.DEFAULT_KEYWORDS.keys(), + [], {})) + assert messages == [ + (3, '_', 'Page arg 1', []), + (3, '_', 'Page arg 2', []), + (8, '_', 'log entry', []), + ] + + +def test_multiline(): + buf = BytesIO(b"""\ +msg1 = ngettext('pylon', + 'pylons', count) +msg2 = ngettext('elvis', + 'elvises', + count) +""") + messages = list(extract.extract_python(buf, ('ngettext',), [], {})) + assert messages == [ + (1, 'ngettext', ('pylon', 'pylons', None), []), + (3, 'ngettext', ('elvis', 'elvises', None), []), + ] + + +def test_dpgettext(): + buf = BytesIO(b"""\ +msg1 = dpgettext('dev', 'Strings', + 'pylon') +msg2 = dpgettext('dev', 'Strings', 'elvis') +""") + messages = list(extract.extract_python(buf, ('dpgettext',), [], {})) + assert messages == [ + (1, 'dpgettext', ('dev', 'Strings', 'pylon'), []), + (3, 'dpgettext', ('dev', 'Strings', 'elvis'), []), + ] + buf = BytesIO(b"""\ +msg = dpgettext('dev', 'Strings', 'pylon', # TRANSLATORS: shouldn't be + ) # TRANSLATORS: seeing this +""") + messages = list(extract.extract_python(buf, ('dpgettext',),['TRANSLATORS:'], {})) + assert messages == [ + (1, 'dpgettext', ('dev', 'Strings', 'pylon', None), []), + ] + + +def test_npgettext(): + buf = BytesIO(b"""\ +msg1 = npgettext('Strings','pylon', + 'pylons', count) +msg2 = npgettext('Strings','elvis', + 'elvises', + count) +""") + messages = list(extract.extract_python(buf, ('npgettext',), [], {})) + assert messages == [ + (1, 'npgettext', ('Strings', 'pylon', 'pylons', None), []), + (3, 'npgettext', ('Strings', 'elvis', 'elvises', None), []), + ] + buf = BytesIO(b"""\ +msg = npgettext('Strings', 'pylon', # TRANSLATORS: shouldn't be + 'pylons', # TRANSLATORS: seeing this + count) +""") + messages = list(extract.extract_python(buf, ('npgettext',), + ['TRANSLATORS:'], {})) + assert messages == [ + (1, 'npgettext', ('Strings', 'pylon', 'pylons', None), []), + ] + + +def test_dnpgettext(): + buf = BytesIO(b"""\ +msg1 = dnpgettext('dev', 'Strings','pylon', + 'pylons', count) +msg2 = dnpgettext('dev', 'Strings','elvis', + 'elvises', + count) +""") + messages = list(extract.extract_python(buf, ('dnpgettext',), [], {})) + assert messages == [ + (1, 'dnpgettext', ('dev', 'Strings', 'pylon', 'pylons', None), []), + (3, 'dnpgettext', ('dev', 'Strings', 'elvis', 'elvises', None), []), + ] + buf = BytesIO(b"""\ +msg = dnpgettext('dev', 'Strings', 'pylon', # TRANSLATORS: shouldn't be + 'pylons', # TRANSLATORS: seeing this + count) +""") + messages = list(extract.extract_python(buf, ('dnpgettext',),['TRANSLATORS:'], {})) + assert messages == [ + (1, 'dnpgettext', ('dev', 'Strings', 'pylon', 'pylons', None), []), + ] + + +def test_triple_quoted_strings(): + buf = BytesIO(b"""\ +msg1 = _('''pylons''') +msg2 = ngettext(r'''elvis''', \"\"\"elvises\"\"\", count) +msg2 = ngettext(\"\"\"elvis\"\"\", 'elvises', count) +""") + messages = list(extract.extract_python(buf, + extract.DEFAULT_KEYWORDS.keys(), + [], {})) + assert messages == [ + (1, '_', 'pylons', []), + (2, 'ngettext', ('elvis', 'elvises', None), []), + (3, 'ngettext', ('elvis', 'elvises', None), []), + ] + + +def test_multiline_strings(): + buf = BytesIO(b"""\ +_('''This module provides internationalization and localization +support for your Python programs by providing an interface to the GNU +gettext message catalog library.''') +""") + messages = list(extract.extract_python(buf, + extract.DEFAULT_KEYWORDS.keys(), + [], {})) + assert messages == [ + (1, '_', + 'This module provides internationalization and localization\n' + 'support for your Python programs by providing an interface to ' + 'the GNU\ngettext message catalog library.', []), + ] + + +def test_concatenated_strings(): + buf = BytesIO(b"""\ +foobar = _('foo' 'bar') +""") + messages = list(extract.extract_python(buf, + extract.DEFAULT_KEYWORDS.keys(), + [], {})) + assert messages[0][2] == 'foobar' + + +def test_unicode_string_arg(): + buf = BytesIO(b"msg = _('Foo Bar')") + messages = list(extract.extract_python(buf, ('_',), [], {})) + assert messages[0][2] == 'Foo Bar' + + +def test_comment_tag(): + buf = BytesIO(b""" +# NOTE: A translation comment +msg = _('Foo Bar') +""") + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == 'Foo Bar' + assert messages[0][3] == ['NOTE: A translation comment'] + + +def test_comment_tag_multiline(): + buf = BytesIO(b""" +# NOTE: A translation comment +# with a second line +msg = _('Foo Bar') +""") + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == 'Foo Bar' + assert messages[0][3] == ['NOTE: A translation comment', 'with a second line'] + + +def test_translator_comments_with_previous_non_translator_comments(): + buf = BytesIO(b""" +# This shouldn't be in the output +# because it didn't start with a comment tag +# NOTE: A translation comment +# with a second line +msg = _('Foo Bar') +""") + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == 'Foo Bar' + assert messages[0][3] == ['NOTE: A translation comment', 'with a second line'] + + +def test_comment_tags_not_on_start_of_comment(): + buf = BytesIO(b""" +# This shouldn't be in the output +# because it didn't start with a comment tag +# do NOTE: this will not be a translation comment +# NOTE: This one will be +msg = _('Foo Bar') +""") + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == 'Foo Bar' + assert messages[0][3] == ['NOTE: This one will be'] + + +def test_multiple_comment_tags(): + buf = BytesIO(b""" +# NOTE1: A translation comment for tag1 +# with a second line +msg = _('Foo Bar1') + +# NOTE2: A translation comment for tag2 +msg = _('Foo Bar2') +""") + messages = list(extract.extract_python(buf, ('_',), + ['NOTE1:', 'NOTE2:'], {})) + assert messages[0][2] == 'Foo Bar1' + assert messages[0][3] == ['NOTE1: A translation comment for tag1', 'with a second line'] + assert messages[1][2] == 'Foo Bar2' + assert messages[1][3] == ['NOTE2: A translation comment for tag2'] + + +def test_two_succeeding_comments(): + buf = BytesIO(b""" +# NOTE: one +# NOTE: two +msg = _('Foo Bar') +""") + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == 'Foo Bar' + assert messages[0][3] == ['NOTE: one', 'NOTE: two'] + + +def test_invalid_translator_comments(): + buf = BytesIO(b""" +# NOTE: this shouldn't apply to any messages +hello = 'there' + +msg = _('Foo Bar') +""") + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == 'Foo Bar' + assert messages[0][3] == [] + + +def test_invalid_translator_comments2(): + buf = BytesIO(b""" +# NOTE: Hi! +hithere = _('Hi there!') + +# NOTE: you should not be seeing this in the .po +rows = [[v for v in range(0,10)] for row in range(0,10)] + +# this (NOTE:) should not show up either +hello = _('Hello') +""") + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == 'Hi there!' + assert messages[0][3] == ['NOTE: Hi!'] + assert messages[1][2] == 'Hello' + assert messages[1][3] == [] + + +def test_invalid_translator_comments3(): + buf = BytesIO(b""" +# NOTE: Hi, + +# there! +hithere = _('Hi there!') +""") + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == 'Hi there!' + assert messages[0][3] == [] + + +def test_comment_tag_with_leading_space(): + buf = BytesIO(b""" +#: A translation comment +#: with leading spaces +msg = _('Foo Bar') +""") + messages = list(extract.extract_python(buf, ('_',), [':'], {})) + assert messages[0][2] == 'Foo Bar' + assert messages[0][3] == [': A translation comment', ': with leading spaces'] + + +def test_different_signatures(): + buf = BytesIO(b""" +foo = _('foo', 'bar') +n = ngettext('hello', 'there', n=3) +n = ngettext(n=3, 'hello', 'there') +n = ngettext(n=3, *messages) +n = ngettext() +n = ngettext('foo') +""") + messages = list(extract.extract_python(buf, ('_', 'ngettext'), [], {})) + assert messages[0][2] == ('foo', 'bar') + assert messages[1][2] == ('hello', 'there', None) + assert messages[2][2] == (None, 'hello', 'there') + assert messages[3][2] == (None, None) + assert messages[4][2] is None + assert messages[5][2] == 'foo' + + +def test_utf8_message(): + buf = BytesIO(""" +# NOTE: hello +msg = _('Bonjour à tous') +""".encode('utf-8')) + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], + {'encoding': 'utf-8'})) + assert messages[0][2] == 'Bonjour à tous' + assert messages[0][3] == ['NOTE: hello'] + + +def test_utf8_message_with_magic_comment(): + buf = BytesIO("""# -*- coding: utf-8 -*- +# NOTE: hello +msg = _('Bonjour à tous') +""".encode('utf-8')) + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == 'Bonjour à tous' + assert messages[0][3] == ['NOTE: hello'] + + +def test_utf8_message_with_utf8_bom(): + buf = BytesIO(codecs.BOM_UTF8 + """ +# NOTE: hello +msg = _('Bonjour à tous') +""".encode('utf-8')) + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == 'Bonjour à tous' + assert messages[0][3] == ['NOTE: hello'] + + +def test_utf8_message_with_utf8_bom_and_magic_comment(): + buf = BytesIO(codecs.BOM_UTF8 + """# -*- coding: utf-8 -*- +# NOTE: hello +msg = _('Bonjour à tous') +""".encode('utf-8')) + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == 'Bonjour à tous' + assert messages[0][3] == ['NOTE: hello'] + + +def test_utf8_bom_with_latin_magic_comment_fails(): + buf = BytesIO(codecs.BOM_UTF8 + """# -*- coding: latin-1 -*- +# NOTE: hello +msg = _('Bonjour à tous') +""".encode('utf-8')) + with pytest.raises(SyntaxError): + list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + + +def test_utf8_raw_strings_match_unicode_strings(): + buf = BytesIO(codecs.BOM_UTF8 + """ +msg = _('Bonjour à tous') +msgu = _('Bonjour à tous') +""".encode('utf-8')) + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == 'Bonjour à tous' + assert messages[0][2] == messages[1][2] + + +def test_extract_strip_comment_tags(): + buf = BytesIO(b"""\ +#: This is a comment with a very simple +#: prefix specified +_('Servus') + +# NOTE: This is a multiline comment with +# a prefix too +_('Babatschi')""") + messages = list(extract.extract('python', buf, comment_tags=['NOTE:', ':'], + strip_comment_tags=True)) + assert messages[0][1] == 'Servus' + assert messages[0][2] == ['This is a comment with a very simple', 'prefix specified'] + assert messages[1][1] == 'Babatschi' + assert messages[1][2] == ['This is a multiline comment with', 'a prefix too'] + + +def test_nested_messages(): + buf = BytesIO(b""" +# NOTE: First +_('Hello, {name}!', name=_('Foo Bar')) + +# NOTE: Second +_('Hello, {name1} and {name2}!', name1=_('Heungsub'), +name2=_('Armin')) + +# NOTE: Third +_('Hello, {0} and {1}!', _('Heungsub'), +_('Armin')) +""") + messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == ('Hello, {name}!', None) + assert messages[0][3] == ['NOTE: First'] + assert messages[1][2] == 'Foo Bar' + assert messages[1][3] == [] + assert messages[2][2] == ('Hello, {name1} and {name2}!', None) + assert messages[2][3] == ['NOTE: Second'] + assert messages[3][2] == 'Heungsub' + assert messages[3][3] == [] + assert messages[4][2] == 'Armin' + assert messages[4][3] == [] + assert messages[5][2] == ('Hello, {0} and {1}!', None) + assert messages[5][3] == ['NOTE: Third'] + assert messages[6][2] == 'Heungsub' + assert messages[6][3] == [] + assert messages[7][2] == 'Armin' + assert messages[7][3] == [] diff --git a/tests/messages/test_frontend.py b/tests/messages/test_frontend.py deleted file mode 100644 index c83948d28..000000000 --- a/tests/messages/test_frontend.py +++ /dev/null @@ -1,1701 +0,0 @@ -# -# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team -# All rights reserved. -# -# This software is licensed as described in the file LICENSE, which -# you should have received as part of this distribution. The terms -# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at https://github.com/python-babel/babel/commits/master/. -import logging -import os -import re -import shlex -import shutil -import sys -import time -import unittest -from datetime import datetime, timedelta -from functools import partial -from io import BytesIO, StringIO -from typing import List - -import pytest -from freezegun import freeze_time - -from babel import __version__ as VERSION -from babel.dates import format_datetime -from babel.messages import Catalog, extract, frontend -from babel.messages.frontend import ( - BaseError, - CommandLineInterface, - ExtractMessages, - OptionError, - UpdateCatalog, -) -from babel.messages.pofile import read_po, write_po -from babel.util import LOCALTZ -from tests.messages.consts import ( - TEST_PROJECT_DISTRIBUTION_DATA, - data_dir, - i18n_dir, - pot_file, - project_dir, - this_dir, -) -from tests.messages.utils import CUSTOM_EXTRACTOR_COOKIE - - -def _po_file(locale): - return os.path.join(i18n_dir, locale, 'LC_MESSAGES', 'messages.po') - - -class Distribution: # subset of distutils.dist.Distribution - def __init__(self, attrs: dict) -> None: - self.attrs = attrs - - def get_name(self) -> str: - return self.attrs['name'] - - def get_version(self) -> str: - return self.attrs['version'] - - @property - def packages(self) -> List[str]: - return self.attrs['packages'] - - -class CompileCatalogTestCase(unittest.TestCase): - - def setUp(self): - self.olddir = os.getcwd() - os.chdir(data_dir) - - self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) - self.cmd = frontend.CompileCatalog(self.dist) - self.cmd.initialize_options() - - def tearDown(self): - os.chdir(self.olddir) - - def test_no_directory_or_output_file_specified(self): - self.cmd.locale = 'en_US' - self.cmd.input_file = 'dummy' - with pytest.raises(OptionError): - self.cmd.finalize_options() - - def test_no_directory_or_input_file_specified(self): - self.cmd.locale = 'en_US' - self.cmd.output_file = 'dummy' - with pytest.raises(OptionError): - self.cmd.finalize_options() - - -class ExtractMessagesTestCase(unittest.TestCase): - - def setUp(self): - self.olddir = os.getcwd() - os.chdir(data_dir) - - self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) - self.cmd = frontend.ExtractMessages(self.dist) - self.cmd.initialize_options() - - def tearDown(self): - if os.path.isfile(pot_file): - os.unlink(pot_file) - - os.chdir(self.olddir) - - def assert_pot_file_exists(self): - assert os.path.isfile(pot_file) - - def test_neither_default_nor_custom_keywords(self): - self.cmd.output_file = 'dummy' - self.cmd.no_default_keywords = True - with pytest.raises(OptionError): - self.cmd.finalize_options() - - def test_no_output_file_specified(self): - with pytest.raises(OptionError): - self.cmd.finalize_options() - - def test_both_sort_output_and_sort_by_file(self): - self.cmd.output_file = 'dummy' - self.cmd.sort_output = True - self.cmd.sort_by_file = True - with pytest.raises(OptionError): - self.cmd.finalize_options() - - def test_invalid_file_or_dir_input_path(self): - self.cmd.input_paths = 'nonexistent_path' - self.cmd.output_file = 'dummy' - with pytest.raises(OptionError): - self.cmd.finalize_options() - - def test_input_paths_is_treated_as_list(self): - self.cmd.input_paths = data_dir - self.cmd.output_file = pot_file - self.cmd.finalize_options() - self.cmd.run() - - with open(pot_file) as f: - catalog = read_po(f) - msg = catalog.get('bar') - assert len(msg.locations) == 1 - assert ('file1.py' in msg.locations[0][0]) - - def test_input_paths_handle_spaces_after_comma(self): - self.cmd.input_paths = f"{this_dir}, {data_dir}" - self.cmd.output_file = pot_file - self.cmd.finalize_options() - assert self.cmd.input_paths == [this_dir, data_dir] - - def test_input_dirs_is_alias_for_input_paths(self): - self.cmd.input_dirs = this_dir - self.cmd.output_file = pot_file - self.cmd.finalize_options() - # Gets listified in `finalize_options`: - assert self.cmd.input_paths == [self.cmd.input_dirs] - - def test_input_dirs_is_mutually_exclusive_with_input_paths(self): - self.cmd.input_dirs = this_dir - self.cmd.input_paths = this_dir - self.cmd.output_file = pot_file - with pytest.raises(OptionError): - self.cmd.finalize_options() - - @freeze_time("1994-11-11") - def test_extraction_with_default_mapping(self): - self.cmd.copyright_holder = 'FooBar, Inc.' - self.cmd.msgid_bugs_address = 'bugs.address@email.tld' - self.cmd.output_file = 'project/i18n/temp.pot' - self.cmd.add_comments = 'TRANSLATOR:,TRANSLATORS:' - - self.cmd.finalize_options() - self.cmd.run() - - self.assert_pot_file_exists() - - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for TestProject. -# Copyright (C) {time.strftime('%Y')} FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , {time.strftime('%Y')}. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. TRANSLATOR: This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid "bar" -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" - -#: project/ignored/this_wont_normally_be_here.py:11 -msgid "FooBar" -msgid_plural "FooBars" -msgstr[0] "" -msgstr[1] "" - -""" - with open(pot_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - @freeze_time("1994-11-11") - def test_extraction_with_mapping_file(self): - self.cmd.copyright_holder = 'FooBar, Inc.' - self.cmd.msgid_bugs_address = 'bugs.address@email.tld' - self.cmd.mapping_file = 'mapping.cfg' - self.cmd.output_file = 'project/i18n/temp.pot' - self.cmd.add_comments = 'TRANSLATOR:,TRANSLATORS:' - - self.cmd.finalize_options() - self.cmd.run() - - self.assert_pot_file_exists() - - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for TestProject. -# Copyright (C) {time.strftime('%Y')} FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , {time.strftime('%Y')}. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. TRANSLATOR: This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid "bar" -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" - -""" - with open(pot_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - @freeze_time("1994-11-11") - def test_extraction_with_mapping_dict(self): - self.dist.message_extractors = { - 'project': [ - ('**/ignored/**.*', 'ignore', None), - ('**.py', 'python', None), - ], - } - self.cmd.copyright_holder = 'FooBar, Inc.' - self.cmd.msgid_bugs_address = 'bugs.address@email.tld' - self.cmd.output_file = 'project/i18n/temp.pot' - self.cmd.add_comments = 'TRANSLATOR:,TRANSLATORS:' - - self.cmd.finalize_options() - self.cmd.run() - - self.assert_pot_file_exists() - - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for TestProject. -# Copyright (C) {time.strftime('%Y')} FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , {time.strftime('%Y')}. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. TRANSLATOR: This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid "bar" -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" - -""" - with open(pot_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - def test_extraction_add_location_file(self): - self.dist.message_extractors = { - 'project': [ - ('**/ignored/**.*', 'ignore', None), - ('**.py', 'python', None), - ], - } - self.cmd.output_file = 'project/i18n/temp.pot' - self.cmd.add_location = 'file' - self.cmd.omit_header = True - - self.cmd.finalize_options() - self.cmd.run() - - self.assert_pot_file_exists() - - expected_content = r"""#: project/file1.py -msgid "bar" -msgstr "" - -#: project/file2.py -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" - -""" - with open(pot_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - -class InitCatalogTestCase(unittest.TestCase): - - def setUp(self): - self.olddir = os.getcwd() - os.chdir(data_dir) - - self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) - self.cmd = frontend.InitCatalog(self.dist) - self.cmd.initialize_options() - - def tearDown(self): - for dirname in ['en_US', 'ja_JP', 'lv_LV']: - locale_dir = os.path.join(i18n_dir, dirname) - if os.path.isdir(locale_dir): - shutil.rmtree(locale_dir) - - os.chdir(self.olddir) - - def test_no_input_file(self): - self.cmd.locale = 'en_US' - self.cmd.output_file = 'dummy' - with pytest.raises(OptionError): - self.cmd.finalize_options() - - def test_no_locale(self): - self.cmd.input_file = 'dummy' - self.cmd.output_file = 'dummy' - with pytest.raises(OptionError): - self.cmd.finalize_options() - - @freeze_time("1994-11-11") - def test_with_output_dir(self): - self.cmd.input_file = 'project/i18n/messages.pot' - self.cmd.locale = 'en_US' - self.cmd.output_dir = 'project/i18n' - - self.cmd.finalize_options() - self.cmd.run() - - po_file = _po_file('en_US') - assert os.path.isfile(po_file) - - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# English (United States) translations for TestProject. -# Copyright (C) 2007 FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , 2007. -# -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: 2007-04-01 15:30+0200\n" -"PO-Revision-Date: {date}\n" -"Last-Translator: FULL NAME \n" -"Language: en_US\n" -"Language-Team: en_US \n" -"Plural-Forms: nplurals=2; plural=(n != 1);\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid "bar" -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" - -""" - with open(po_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - @freeze_time("1994-11-11") - def test_keeps_catalog_non_fuzzy(self): - self.cmd.input_file = 'project/i18n/messages_non_fuzzy.pot' - self.cmd.locale = 'en_US' - self.cmd.output_dir = 'project/i18n' - - self.cmd.finalize_options() - self.cmd.run() - - po_file = _po_file('en_US') - assert os.path.isfile(po_file) - - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# English (United States) translations for TestProject. -# Copyright (C) 2007 FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , 2007. -# -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: 2007-04-01 15:30+0200\n" -"PO-Revision-Date: {date}\n" -"Last-Translator: FULL NAME \n" -"Language: en_US\n" -"Language-Team: en_US \n" -"Plural-Forms: nplurals=2; plural=(n != 1);\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid "bar" -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" - -""" - with open(po_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - @freeze_time("1994-11-11") - def test_correct_init_more_than_2_plurals(self): - self.cmd.input_file = 'project/i18n/messages.pot' - self.cmd.locale = 'lv_LV' - self.cmd.output_dir = 'project/i18n' - - self.cmd.finalize_options() - self.cmd.run() - - po_file = _po_file('lv_LV') - assert os.path.isfile(po_file) - - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Latvian (Latvia) translations for TestProject. -# Copyright (C) 2007 FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , 2007. -# -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: 2007-04-01 15:30+0200\n" -"PO-Revision-Date: {date}\n" -"Last-Translator: FULL NAME \n" -"Language: lv_LV\n" -"Language-Team: lv_LV \n" -"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 :" -" 2);\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid "bar" -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" -msgstr[2] "" - -""" - with open(po_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - @freeze_time("1994-11-11") - def test_correct_init_singular_plural_forms(self): - self.cmd.input_file = 'project/i18n/messages.pot' - self.cmd.locale = 'ja_JP' - self.cmd.output_dir = 'project/i18n' - - self.cmd.finalize_options() - self.cmd.run() - - po_file = _po_file('ja_JP') - assert os.path.isfile(po_file) - - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='ja_JP') - expected_content = fr"""# Japanese (Japan) translations for TestProject. -# Copyright (C) 2007 FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , 2007. -# -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: 2007-04-01 15:30+0200\n" -"PO-Revision-Date: {date}\n" -"Last-Translator: FULL NAME \n" -"Language: ja_JP\n" -"Language-Team: ja_JP \n" -"Plural-Forms: nplurals=1; plural=0;\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid "bar" -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" - -""" - with open(po_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - @freeze_time("1994-11-11") - def test_supports_no_wrap(self): - self.cmd.input_file = 'project/i18n/long_messages.pot' - self.cmd.locale = 'en_US' - self.cmd.output_dir = 'project/i18n' - - long_message = '"' + 'xxxxx ' * 15 + '"' - - with open('project/i18n/messages.pot', 'rb') as f: - pot_contents = f.read().decode('latin-1') - pot_with_very_long_line = pot_contents.replace('"bar"', long_message) - with open(self.cmd.input_file, 'wb') as f: - f.write(pot_with_very_long_line.encode('latin-1')) - self.cmd.no_wrap = True - - self.cmd.finalize_options() - self.cmd.run() - - po_file = _po_file('en_US') - assert os.path.isfile(po_file) - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en_US') - expected_content = fr"""# English (United States) translations for TestProject. -# Copyright (C) 2007 FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , 2007. -# -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: 2007-04-01 15:30+0200\n" -"PO-Revision-Date: {date}\n" -"Last-Translator: FULL NAME \n" -"Language: en_US\n" -"Language-Team: en_US \n" -"Plural-Forms: nplurals=2; plural=(n != 1);\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid {long_message} -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" - -""" - with open(po_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - @freeze_time("1994-11-11") - def test_supports_width(self): - self.cmd.input_file = 'project/i18n/long_messages.pot' - self.cmd.locale = 'en_US' - self.cmd.output_dir = 'project/i18n' - - long_message = '"' + 'xxxxx ' * 15 + '"' - - with open('project/i18n/messages.pot', 'rb') as f: - pot_contents = f.read().decode('latin-1') - pot_with_very_long_line = pot_contents.replace('"bar"', long_message) - with open(self.cmd.input_file, 'wb') as f: - f.write(pot_with_very_long_line.encode('latin-1')) - self.cmd.width = 120 - self.cmd.finalize_options() - self.cmd.run() - - po_file = _po_file('en_US') - assert os.path.isfile(po_file) - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en_US') - expected_content = fr"""# English (United States) translations for TestProject. -# Copyright (C) 2007 FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , 2007. -# -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: 2007-04-01 15:30+0200\n" -"PO-Revision-Date: {date}\n" -"Last-Translator: FULL NAME \n" -"Language: en_US\n" -"Language-Team: en_US \n" -"Plural-Forms: nplurals=2; plural=(n != 1);\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid {long_message} -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" - -""" - with open(po_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - -class CommandLineInterfaceTestCase(unittest.TestCase): - - def setUp(self): - data_dir = os.path.join(this_dir, 'data') - self.orig_working_dir = os.getcwd() - self.orig_argv = sys.argv - self.orig_stdout = sys.stdout - self.orig_stderr = sys.stderr - sys.argv = ['pybabel'] - sys.stdout = StringIO() - sys.stderr = StringIO() - os.chdir(data_dir) - - self._remove_log_handlers() - self.cli = frontend.CommandLineInterface() - - def tearDown(self): - os.chdir(self.orig_working_dir) - sys.argv = self.orig_argv - sys.stdout = self.orig_stdout - sys.stderr = self.orig_stderr - for dirname in ['lv_LV', 'ja_JP']: - locale_dir = os.path.join(i18n_dir, dirname) - if os.path.isdir(locale_dir): - shutil.rmtree(locale_dir) - self._remove_log_handlers() - - def _remove_log_handlers(self): - # Logging handlers will be reused if possible (#227). This breaks the - # implicit assumption that our newly created StringIO for sys.stderr - # contains the console output. Removing the old handler ensures that a - # new handler with our new StringIO instance will be used. - log = logging.getLogger('babel') - for handler in log.handlers: - log.removeHandler(handler) - - def test_usage(self): - try: - self.cli.run(sys.argv) - self.fail('Expected SystemExit') - except SystemExit as e: - assert e.code == 2 - assert sys.stderr.getvalue().lower() == """\ -usage: pybabel command [options] [args] - -pybabel: error: no valid command or option passed. try the -h/--help option for more information. -""" - - def test_list_locales(self): - """ - Test the command with the --list-locales arg. - """ - result = self.cli.run(sys.argv + ['--list-locales']) - assert not result - output = sys.stdout.getvalue() - assert 'fr_CH' in output - assert 'French (Switzerland)' in output - assert "\nb'" not in output # No bytes repr markers in output - - def _run_init_catalog(self): - i18n_dir = os.path.join(data_dir, 'project', 'i18n') - pot_path = os.path.join(data_dir, 'project', 'i18n', 'messages.pot') - init_argv = sys.argv + ['init', '--locale', 'en_US', '-d', i18n_dir, - '-i', pot_path] - self.cli.run(init_argv) - - def test_no_duplicated_output_for_multiple_runs(self): - self._run_init_catalog() - first_output = sys.stderr.getvalue() - self._run_init_catalog() - second_output = sys.stderr.getvalue()[len(first_output):] - - # in case the log message is not duplicated we should get the same - # output as before - assert first_output == second_output - - def test_frontend_can_log_to_predefined_handler(self): - custom_stream = StringIO() - log = logging.getLogger('babel') - log.addHandler(logging.StreamHandler(custom_stream)) - - self._run_init_catalog() - assert id(sys.stderr) != id(custom_stream) - assert not sys.stderr.getvalue() - assert custom_stream.getvalue() - - def test_help(self): - try: - self.cli.run(sys.argv + ['--help']) - self.fail('Expected SystemExit') - except SystemExit as e: - assert not e.code - content = sys.stdout.getvalue().lower() - assert 'options:' in content - assert all(command in content for command in ('init', 'update', 'compile', 'extract')) - - def assert_pot_file_exists(self): - assert os.path.isfile(pot_file) - - @freeze_time("1994-11-11") - def test_extract_with_default_mapping(self): - self.cli.run(sys.argv + ['extract', - '--copyright-holder', 'FooBar, Inc.', - '--project', 'TestProject', '--version', '0.1', - '--msgid-bugs-address', 'bugs.address@email.tld', - '-c', 'TRANSLATOR', '-c', 'TRANSLATORS:', - '-o', pot_file, 'project']) - self.assert_pot_file_exists() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for TestProject. -# Copyright (C) {time.strftime('%Y')} FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , {time.strftime('%Y')}. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. TRANSLATOR: This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid "bar" -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" - -#: project/ignored/this_wont_normally_be_here.py:11 -msgid "FooBar" -msgid_plural "FooBars" -msgstr[0] "" -msgstr[1] "" - -""" - with open(pot_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - @freeze_time("1994-11-11") - def test_extract_with_mapping_file(self): - self.cli.run(sys.argv + ['extract', - '--copyright-holder', 'FooBar, Inc.', - '--project', 'TestProject', '--version', '0.1', - '--msgid-bugs-address', 'bugs.address@email.tld', - '--mapping', os.path.join(data_dir, 'mapping.cfg'), - '-c', 'TRANSLATOR', '-c', 'TRANSLATORS:', - '-o', pot_file, 'project']) - self.assert_pot_file_exists() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for TestProject. -# Copyright (C) {time.strftime('%Y')} FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , {time.strftime('%Y')}. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. TRANSLATOR: This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid "bar" -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" - -""" - with open(pot_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - @freeze_time("1994-11-11") - def test_extract_with_exact_file(self): - """Tests that we can call extract with a particular file and only - strings from that file get extracted. (Note the absence of strings from file1.py) - """ - file_to_extract = os.path.join(data_dir, 'project', 'file2.py') - self.cli.run(sys.argv + ['extract', - '--copyright-holder', 'FooBar, Inc.', - '--project', 'TestProject', '--version', '0.1', - '--msgid-bugs-address', 'bugs.address@email.tld', - '--mapping', os.path.join(data_dir, 'mapping.cfg'), - '-c', 'TRANSLATOR', '-c', 'TRANSLATORS:', - '-o', pot_file, file_to_extract]) - self.assert_pot_file_exists() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for TestProject. -# Copyright (C) {time.strftime('%Y')} FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , {time.strftime('%Y')}. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" - -""" - with open(pot_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - @freeze_time("1994-11-11") - def test_init_with_output_dir(self): - po_file = _po_file('en_US') - self.cli.run(sys.argv + ['init', - '--locale', 'en_US', - '-d', os.path.join(i18n_dir), - '-i', os.path.join(i18n_dir, 'messages.pot')]) - assert os.path.isfile(po_file) - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# English (United States) translations for TestProject. -# Copyright (C) 2007 FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , 2007. -# -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: 2007-04-01 15:30+0200\n" -"PO-Revision-Date: {date}\n" -"Last-Translator: FULL NAME \n" -"Language: en_US\n" -"Language-Team: en_US \n" -"Plural-Forms: nplurals=2; plural=(n != 1);\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid "bar" -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" - -""" - with open(po_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - @freeze_time("1994-11-11") - def test_init_singular_plural_forms(self): - po_file = _po_file('ja_JP') - self.cli.run(sys.argv + ['init', - '--locale', 'ja_JP', - '-d', os.path.join(i18n_dir), - '-i', os.path.join(i18n_dir, 'messages.pot')]) - assert os.path.isfile(po_file) - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Japanese (Japan) translations for TestProject. -# Copyright (C) 2007 FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , 2007. -# -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: 2007-04-01 15:30+0200\n" -"PO-Revision-Date: {date}\n" -"Last-Translator: FULL NAME \n" -"Language: ja_JP\n" -"Language-Team: ja_JP \n" -"Plural-Forms: nplurals=1; plural=0;\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid "bar" -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" - -""" - with open(po_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - @freeze_time("1994-11-11") - def test_init_more_than_2_plural_forms(self): - po_file = _po_file('lv_LV') - self.cli.run(sys.argv + ['init', - '--locale', 'lv_LV', - '-d', i18n_dir, - '-i', os.path.join(i18n_dir, 'messages.pot')]) - assert os.path.isfile(po_file) - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Latvian (Latvia) translations for TestProject. -# Copyright (C) 2007 FooBar, Inc. -# This file is distributed under the same license as the TestProject -# project. -# FIRST AUTHOR , 2007. -# -msgid "" -msgstr "" -"Project-Id-Version: TestProject 0.1\n" -"Report-Msgid-Bugs-To: bugs.address@email.tld\n" -"POT-Creation-Date: 2007-04-01 15:30+0200\n" -"PO-Revision-Date: {date}\n" -"Last-Translator: FULL NAME \n" -"Language: lv_LV\n" -"Language-Team: lv_LV \n" -"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 :" -" 2);\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#. This will be a translator coment, -#. that will include several lines -#: project/file1.py:8 -msgid "bar" -msgstr "" - -#: project/file2.py:9 -msgid "foobar" -msgid_plural "foobars" -msgstr[0] "" -msgstr[1] "" -msgstr[2] "" - -""" - with open(po_file) as f: - actual_content = f.read() - assert expected_content == actual_content - - def test_compile_catalog(self): - po_file = _po_file('de_DE') - mo_file = po_file.replace('.po', '.mo') - self.cli.run(sys.argv + ['compile', - '--locale', 'de_DE', - '-d', i18n_dir]) - assert not os.path.isfile(mo_file), f'Expected no file at {mo_file!r}' - assert sys.stderr.getvalue() == f'catalog {po_file} is marked as fuzzy, skipping\n' - - def test_compile_fuzzy_catalog(self): - po_file = _po_file('de_DE') - mo_file = po_file.replace('.po', '.mo') - try: - self.cli.run(sys.argv + ['compile', - '--locale', 'de_DE', '--use-fuzzy', - '-d', i18n_dir]) - assert os.path.isfile(mo_file) - assert sys.stderr.getvalue() == f'compiling catalog {po_file} to {mo_file}\n' - finally: - if os.path.isfile(mo_file): - os.unlink(mo_file) - - def test_compile_catalog_with_more_than_2_plural_forms(self): - po_file = _po_file('ru_RU') - mo_file = po_file.replace('.po', '.mo') - try: - self.cli.run(sys.argv + ['compile', - '--locale', 'ru_RU', '--use-fuzzy', - '-d', i18n_dir]) - assert os.path.isfile(mo_file) - assert sys.stderr.getvalue() == f'compiling catalog {po_file} to {mo_file}\n' - finally: - if os.path.isfile(mo_file): - os.unlink(mo_file) - - def test_compile_catalog_multidomain(self): - po_foo = os.path.join(i18n_dir, 'de_DE', 'LC_MESSAGES', 'foo.po') - po_bar = os.path.join(i18n_dir, 'de_DE', 'LC_MESSAGES', 'bar.po') - mo_foo = po_foo.replace('.po', '.mo') - mo_bar = po_bar.replace('.po', '.mo') - try: - self.cli.run(sys.argv + ['compile', - '--locale', 'de_DE', '--domain', 'foo bar', '--use-fuzzy', - '-d', i18n_dir]) - for mo_file in [mo_foo, mo_bar]: - assert os.path.isfile(mo_file) - assert sys.stderr.getvalue() == ( - f'compiling catalog {po_foo} to {mo_foo}\n' - f'compiling catalog {po_bar} to {mo_bar}\n' - ) - - finally: - for mo_file in [mo_foo, mo_bar]: - if os.path.isfile(mo_file): - os.unlink(mo_file) - - def test_update(self): - template = Catalog() - template.add("1") - template.add("2") - template.add("3") - tmpl_file = os.path.join(i18n_dir, 'temp-template.pot') - with open(tmpl_file, "wb") as outfp: - write_po(outfp, template) - po_file = os.path.join(i18n_dir, 'temp1.po') - self.cli.run(sys.argv + ['init', - '-l', 'fi', - '-o', po_file, - '-i', tmpl_file, - ]) - with open(po_file) as infp: - catalog = read_po(infp) - assert len(catalog) == 3 - - # Add another entry to the template - - template.add("4") - - with open(tmpl_file, "wb") as outfp: - write_po(outfp, template) - - self.cli.run(sys.argv + ['update', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file]) - - with open(po_file) as infp: - catalog = read_po(infp) - assert len(catalog) == 4 # Catalog was updated - - def test_update_pot_creation_date(self): - template = Catalog() - template.add("1") - template.add("2") - template.add("3") - tmpl_file = os.path.join(i18n_dir, 'temp-template.pot') - with open(tmpl_file, "wb") as outfp: - write_po(outfp, template) - po_file = os.path.join(i18n_dir, 'temp1.po') - self.cli.run(sys.argv + ['init', - '-l', 'fi', - '-o', po_file, - '-i', tmpl_file, - ]) - with open(po_file) as infp: - catalog = read_po(infp) - assert len(catalog) == 3 - original_catalog_creation_date = catalog.creation_date - - # Update the template creation date - template.creation_date -= timedelta(minutes=3) - with open(tmpl_file, "wb") as outfp: - write_po(outfp, template) - - self.cli.run(sys.argv + ['update', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file]) - - with open(po_file) as infp: - catalog = read_po(infp) - # We didn't ignore the creation date, so expect a diff - assert catalog.creation_date != original_catalog_creation_date - - # Reset the "original" - original_catalog_creation_date = catalog.creation_date - - # Update the template creation date again - # This time, pass the ignore flag and expect the times are different - template.creation_date -= timedelta(minutes=5) - with open(tmpl_file, "wb") as outfp: - write_po(outfp, template) - - self.cli.run(sys.argv + ['update', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file, - '--ignore-pot-creation-date']) - - with open(po_file) as infp: - catalog = read_po(infp) - # We ignored creation date, so it should not have changed - assert catalog.creation_date == original_catalog_creation_date - - def test_check(self): - template = Catalog() - template.add("1") - template.add("2") - template.add("3") - tmpl_file = os.path.join(i18n_dir, 'temp-template.pot') - with open(tmpl_file, "wb") as outfp: - write_po(outfp, template) - po_file = os.path.join(i18n_dir, 'temp1.po') - self.cli.run(sys.argv + ['init', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file, - ]) - - # Update the catalog file - self.cli.run(sys.argv + ['update', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file]) - - # Run a check without introducing any changes to the template - self.cli.run(sys.argv + ['update', - '--check', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file]) - - # Add a new entry and expect the check to fail - template.add("4") - with open(tmpl_file, "wb") as outfp: - write_po(outfp, template) - - with pytest.raises(BaseError): - self.cli.run(sys.argv + ['update', - '--check', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file]) - - # Write the latest changes to the po-file - self.cli.run(sys.argv + ['update', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file]) - - # Update an entry and expect the check to fail - template.add("4", locations=[("foo.py", 1)]) - with open(tmpl_file, "wb") as outfp: - write_po(outfp, template) - - with pytest.raises(BaseError): - self.cli.run(sys.argv + ['update', - '--check', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file]) - - def test_check_pot_creation_date(self): - template = Catalog() - template.add("1") - template.add("2") - template.add("3") - tmpl_file = os.path.join(i18n_dir, 'temp-template.pot') - with open(tmpl_file, "wb") as outfp: - write_po(outfp, template) - po_file = os.path.join(i18n_dir, 'temp1.po') - self.cli.run(sys.argv + ['init', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file, - ]) - - # Update the catalog file - self.cli.run(sys.argv + ['update', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file]) - - # Run a check without introducing any changes to the template - self.cli.run(sys.argv + ['update', - '--check', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file]) - - # Run a check after changing the template creation date - template.creation_date = datetime.now() - timedelta(minutes=5) - with open(tmpl_file, "wb") as outfp: - write_po(outfp, template) - - # Should fail without --ignore-pot-creation-date flag - with pytest.raises(BaseError): - self.cli.run(sys.argv + ['update', - '--check', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file]) - # Should pass with --ignore-pot-creation-date flag - self.cli.run(sys.argv + ['update', - '--check', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file, - '--ignore-pot-creation-date']) - - def test_update_init_missing(self): - template = Catalog() - template.add("1") - template.add("2") - template.add("3") - tmpl_file = os.path.join(i18n_dir, 'temp2-template.pot') - with open(tmpl_file, "wb") as outfp: - write_po(outfp, template) - po_file = os.path.join(i18n_dir, 'temp2.po') - - self.cli.run(sys.argv + ['update', - '--init-missing', - '-l', 'fi', - '-o', po_file, - '-i', tmpl_file]) - - with open(po_file) as infp: - catalog = read_po(infp) - assert len(catalog) == 3 - - # Add another entry to the template - - template.add("4") - - with open(tmpl_file, "wb") as outfp: - write_po(outfp, template) - - self.cli.run(sys.argv + ['update', - '--init-missing', - '-l', 'fi_FI', - '-o', po_file, - '-i', tmpl_file]) - - with open(po_file) as infp: - catalog = read_po(infp) - assert len(catalog) == 4 # Catalog was updated - - -mapping_cfg = """ -[extractors] -custom = tests.messages.utils:custom_extractor - -# Special extractor for a given Python file -[custom: special.py] -treat = delicious - -# Python source files -[python: **.py] - -# Genshi templates -[genshi: **/templates/**.html] -include_attrs = - -[genshi: **/templates/**.txt] -template_class = genshi.template:TextTemplate -encoding = latin-1 - -# Some custom extractor -[custom: **/custom/*.*] -""" - -mapping_toml = """ -[extractors] -custom = "tests.messages.utils:custom_extractor" - -# Special extractor for a given Python file -[[mappings]] -method = "custom" -pattern = "special.py" -treat = "delightful" - -# Python source files -[[mappings]] -method = "python" -pattern = "**.py" - -# Genshi templates -[[mappings]] -method = "genshi" -pattern = "**/templates/**.html" -include_attrs = "" - -[[mappings]] -method = "genshi" -pattern = "**/templates/**.txt" -template_class = "genshi.template:TextTemplate" -encoding = "latin-1" - -# Some custom extractor -[[mappings]] -method = "custom" -pattern = "**/custom/*.*" -""" - - -@pytest.mark.parametrize( - ("data", "parser", "preprocess", "is_toml"), - [ - ( - mapping_cfg, - frontend.parse_mapping_cfg, - None, - False, - ), - ( - mapping_toml, - frontend._parse_mapping_toml, - None, - True, - ), - ( - mapping_toml, - partial(frontend._parse_mapping_toml, style="pyproject.toml"), - lambda s: re.sub(r"^(\[+)", r"\1tool.babel.", s, flags=re.MULTILINE), - True, - ), - ], - ids=("cfg", "toml", "pyproject-toml"), -) -def test_parse_mapping(data: str, parser, preprocess, is_toml): - if preprocess: - data = preprocess(data) - if is_toml: - buf = BytesIO(data.encode()) - else: - buf = StringIO(data) - - method_map, options_map = parser(buf) - assert len(method_map) == 5 - - assert method_map[1] == ('**.py', 'python') - assert options_map['**.py'] == {} - assert method_map[2] == ('**/templates/**.html', 'genshi') - assert options_map['**/templates/**.html']['include_attrs'] == '' - assert method_map[3] == ('**/templates/**.txt', 'genshi') - assert (options_map['**/templates/**.txt']['template_class'] - == 'genshi.template:TextTemplate') - assert options_map['**/templates/**.txt']['encoding'] == 'latin-1' - assert method_map[4] == ('**/custom/*.*', 'tests.messages.utils:custom_extractor') - assert options_map['**/custom/*.*'] == {} - - -def test_parse_keywords(): - kw = frontend.parse_keywords(['_', 'dgettext:2', - 'dngettext:2,3', 'pgettext:1c,2']) - assert kw == { - '_': None, - 'dgettext': (2,), - 'dngettext': (2, 3), - 'pgettext': ((1, 'c'), 2), - } - - -def test_parse_keywords_with_t(): - kw = frontend.parse_keywords(['_:1', '_:2,2t', '_:2c,3,3t']) - - assert kw == { - '_': { - None: (1,), - 2: (2,), - 3: ((2, 'c'), 3), - }, - } - - -def test_extract_messages_with_t(): - content = rb""" -_("1 arg, arg 1") -_("2 args, arg 1", "2 args, arg 2") -_("3 args, arg 1", "3 args, arg 2", "3 args, arg 3") -_("4 args, arg 1", "4 args, arg 2", "4 args, arg 3", "4 args, arg 4") -""" - kw = frontend.parse_keywords(['_:1', '_:2,2t', '_:2c,3,3t']) - result = list(extract.extract("python", BytesIO(content), kw)) - expected = [(2, '1 arg, arg 1', [], None), - (3, '2 args, arg 1', [], None), - (3, '2 args, arg 2', [], None), - (4, '3 args, arg 1', [], None), - (4, '3 args, arg 3', [], '3 args, arg 2'), - (5, '4 args, arg 1', [], None)] - assert result == expected - - -def configure_cli_command(cmdline): - """ - Helper to configure a command class, but not run it just yet. - - :param cmdline: The command line (sans the executable name) - :return: Command instance - """ - args = shlex.split(cmdline) - cli = CommandLineInterface() - cmdinst = cli._configure_command(cmdname=args[0], argv=args[1:]) - return cmdinst - - -@pytest.mark.parametrize("split", (False, True)) -@pytest.mark.parametrize("arg_name", ("-k", "--keyword", "--keywords")) -def test_extract_keyword_args_384(split, arg_name): - # This is a regression test for https://github.com/python-babel/babel/issues/384 - # and it also tests that the rest of the forgotten aliases/shorthands implied by - # https://github.com/python-babel/babel/issues/390 are re-remembered (or rather - # that the mechanism for remembering them again works). - - kwarg_specs = [ - "gettext_noop", - "gettext_lazy", - "ngettext_lazy:1,2", - "ugettext_noop", - "ugettext_lazy", - "ungettext_lazy:1,2", - "pgettext_lazy:1c,2", - "npgettext_lazy:1c,2,3", - ] - - if split: # Generate a command line with multiple -ks - kwarg_text = " ".join(f"{arg_name} {kwarg_spec}" for kwarg_spec in kwarg_specs) - else: # Generate a single space-separated -k - specs = ' '.join(kwarg_specs) - kwarg_text = f'{arg_name} "{specs}"' - - # (Both of those invocation styles should be equivalent, so there is no parametrization from here on out) - - cmdinst = configure_cli_command( - f"extract -F babel-django.cfg --add-comments Translators: -o django232.pot {kwarg_text} .", - ) - assert isinstance(cmdinst, ExtractMessages) - assert set(cmdinst.keywords.keys()) == {'_', 'dgettext', 'dngettext', - 'gettext', 'gettext_lazy', - 'gettext_noop', 'N_', 'ngettext', - 'ngettext_lazy', 'npgettext', - 'npgettext_lazy', 'pgettext', - 'pgettext_lazy', 'ugettext', - 'ugettext_lazy', 'ugettext_noop', - 'ungettext', 'ungettext_lazy'} - - -def test_update_catalog_boolean_args(): - cmdinst = configure_cli_command( - "update --init-missing --no-wrap -N --ignore-obsolete --previous -i foo -o foo -l en") - assert isinstance(cmdinst, UpdateCatalog) - assert cmdinst.init_missing is True - assert cmdinst.no_wrap is True - assert cmdinst.no_fuzzy_matching is True - assert cmdinst.ignore_obsolete is True - assert cmdinst.previous is False # Mutually exclusive with no_fuzzy_matching - - -def test_extract_cli_knows_dash_s(): - # This is a regression test for https://github.com/python-babel/babel/issues/390 - cmdinst = configure_cli_command("extract -s -o foo babel") - assert isinstance(cmdinst, ExtractMessages) - assert cmdinst.strip_comments - - -def test_extract_cli_knows_dash_dash_last_dash_translator(): - cmdinst = configure_cli_command('extract --last-translator "FULL NAME EMAIL@ADDRESS" -o foo babel') - assert isinstance(cmdinst, ExtractMessages) - assert cmdinst.last_translator == "FULL NAME EMAIL@ADDRESS" - - -def test_extract_add_location(): - cmdinst = configure_cli_command("extract -o foo babel --add-location full") - assert isinstance(cmdinst, ExtractMessages) - assert cmdinst.add_location == 'full' - assert not cmdinst.no_location - assert cmdinst.include_lineno - - cmdinst = configure_cli_command("extract -o foo babel --add-location file") - assert isinstance(cmdinst, ExtractMessages) - assert cmdinst.add_location == 'file' - assert not cmdinst.no_location - assert not cmdinst.include_lineno - - cmdinst = configure_cli_command("extract -o foo babel --add-location never") - assert isinstance(cmdinst, ExtractMessages) - assert cmdinst.add_location == 'never' - assert cmdinst.no_location - - -def test_extract_error_code(monkeypatch, capsys): - monkeypatch.chdir(project_dir) - cmdinst = configure_cli_command("compile --domain=messages --directory i18n --locale fi_BUGGY") - assert cmdinst.run() == 1 - out, err = capsys.readouterr() - if err: - # replace hack below for py2/py3 compatibility - assert "unknown named placeholder 'merkki'" in err.replace("u'", "'") - - -@pytest.mark.parametrize("with_underscore_ignore", (False, True)) -def test_extract_ignore_dirs(monkeypatch, capsys, tmp_path, with_underscore_ignore): - pot_file = tmp_path / 'temp.pot' - monkeypatch.chdir(project_dir) - cmd = f"extract . -o '{pot_file}' --ignore-dirs '*ignored* .*' " - if with_underscore_ignore: - # This also tests that multiple arguments are supported. - cmd += "--ignore-dirs '_*'" - cmdinst = configure_cli_command(cmd) - assert isinstance(cmdinst, ExtractMessages) - assert cmdinst.directory_filter - cmdinst.run() - pot_content = pot_file.read_text() - - # The `ignored` directory is now actually ignored: - assert 'this_wont_normally_be_here' not in pot_content - - # Since we manually set a filter, the otherwise `_hidden` directory is walked into, - # unless we opt in to ignore it again - assert ('ssshhh....' in pot_content) != with_underscore_ignore - assert ('_hidden_by_default' in pot_content) != with_underscore_ignore - - -def test_extract_header_comment(monkeypatch, tmp_path): - pot_file = tmp_path / 'temp.pot' - monkeypatch.chdir(project_dir) - cmdinst = configure_cli_command(f"extract . -o '{pot_file}' --header-comment 'Boing' ") - cmdinst.run() - pot_content = pot_file.read_text() - assert 'Boing' in pot_content - - -@pytest.mark.parametrize("mapping_format", ("toml", "cfg")) -def test_pr_1121(tmp_path, monkeypatch, caplog, mapping_format): - """ - Test that extraction uses the first matching method and options, - instead of the first matching method and last matching options. - - Without the fix in PR #1121, this test would fail, - since the `custom_extractor` isn't passed a delicious treat via - the configuration. - """ - if mapping_format == "cfg": - mapping_file = (tmp_path / "mapping.cfg") - mapping_file.write_text(mapping_cfg) - else: - mapping_file = (tmp_path / "mapping.toml") - mapping_file.write_text(mapping_toml) - (tmp_path / "special.py").write_text("# this file is special") - pot_path = (tmp_path / "output.pot") - monkeypatch.chdir(tmp_path) - cmdinst = configure_cli_command(f"extract . -o {shlex.quote(str(pot_path))} --mapping {shlex.quote(mapping_file.name)}") - assert isinstance(cmdinst, ExtractMessages) - cmdinst.run() - # If the custom extractor didn't run, we wouldn't see the cookie in there. - assert CUSTOM_EXTRACTOR_COOKIE in pot_path.read_text() diff --git a/tests/messages/test_mofile.py b/tests/messages/test_mofile.py index 8d1a89eb0..85f4e9f34 100644 --- a/tests/messages/test_mofile.py +++ b/tests/messages/test_mofile.py @@ -11,79 +11,76 @@ # history and logs, available at https://github.com/python-babel/babel/commits/master/. import os -import unittest from io import BytesIO from babel.messages import Catalog, mofile from babel.support import Translations +data_dir = os.path.join(os.path.dirname(__file__), 'data') -class ReadMoTestCase(unittest.TestCase): - def setUp(self): - self.datadir = os.path.join(os.path.dirname(__file__), 'data') +def test_basics(): + mo_path = os.path.join(data_dir, 'project', 'i18n', 'de', + 'LC_MESSAGES', 'messages.mo') + with open(mo_path, 'rb') as mo_file: + catalog = mofile.read_mo(mo_file) + assert len(catalog) == 2 + assert catalog.project == 'TestProject' + assert catalog.version == '0.1' + assert catalog['bar'].string == 'Stange' + assert catalog['foobar'].string == ['Fuhstange', 'Fuhstangen'] - def test_basics(self): - mo_path = os.path.join(self.datadir, 'project', 'i18n', 'de', - 'LC_MESSAGES', 'messages.mo') - with open(mo_path, 'rb') as mo_file: - catalog = mofile.read_mo(mo_file) - assert len(catalog) == 2 - assert catalog.project == 'TestProject' - assert catalog.version == '0.1' - assert catalog['bar'].string == 'Stange' - assert catalog['foobar'].string == ['Fuhstange', 'Fuhstangen'] -class WriteMoTestCase(unittest.TestCase): - - def test_sorting(self): - # Ensure the header is sorted to the first entry so that its charset - # can be applied to all subsequent messages by GNUTranslations - # (ensuring all messages are safely converted to unicode) - catalog = Catalog(locale='en_US') - catalog.add('', '''\ +def test_sorting(): + # Ensure the header is sorted to the first entry so that its charset + # can be applied to all subsequent messages by GNUTranslations + # (ensuring all messages are safely converted to unicode) + catalog = Catalog(locale='en_US') + catalog.add('', '''\ "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n''') - catalog.add('foo', 'Voh') - catalog.add(('There is', 'There are'), ('Es gibt', 'Es gibt')) - catalog.add('Fizz', '') - catalog.add(('Fuzz', 'Fuzzes'), ('', '')) - buf = BytesIO() - mofile.write_mo(buf, catalog) - buf.seek(0) - translations = Translations(fp=buf) - assert translations.ugettext('foo') == 'Voh' - assert translations.ungettext('There is', 'There are', 1) == 'Es gibt' - assert translations.ugettext('Fizz') == 'Fizz' - assert translations.ugettext('Fuzz') == 'Fuzz' - assert translations.ugettext('Fuzzes') == 'Fuzzes' + catalog.add('foo', 'Voh') + catalog.add(('There is', 'There are'), ('Es gibt', 'Es gibt')) + catalog.add('Fizz', '') + catalog.add(('Fuzz', 'Fuzzes'), ('', '')) + buf = BytesIO() + mofile.write_mo(buf, catalog) + buf.seek(0) + translations = Translations(fp=buf) + assert translations.ugettext('foo') == 'Voh' + assert translations.ungettext('There is', 'There are', 1) == 'Es gibt' + assert translations.ugettext('Fizz') == 'Fizz' + assert translations.ugettext('Fuzz') == 'Fuzz' + assert translations.ugettext('Fuzzes') == 'Fuzzes' + + +def test_more_plural_forms(): + catalog2 = Catalog(locale='ru_RU') + catalog2.add(('Fuzz', 'Fuzzes'), ('', '', '')) + buf = BytesIO() + mofile.write_mo(buf, catalog2) - def test_more_plural_forms(self): - catalog2 = Catalog(locale='ru_RU') - catalog2.add(('Fuzz', 'Fuzzes'), ('', '', '')) - buf = BytesIO() - mofile.write_mo(buf, catalog2) - def test_empty_translation_with_fallback(self): - catalog1 = Catalog(locale='fr_FR') - catalog1.add('', '''\ +def test_empty_translation_with_fallback(): + catalog1 = Catalog(locale='fr_FR') + catalog1.add('', '''\ "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n''') - catalog1.add('Fuzz', '') - buf1 = BytesIO() - mofile.write_mo(buf1, catalog1) - buf1.seek(0) - catalog2 = Catalog(locale='fr') - catalog2.add('', '''\ + catalog1.add('Fuzz', '') + buf1 = BytesIO() + mofile.write_mo(buf1, catalog1) + buf1.seek(0) + catalog2 = Catalog(locale='fr') + catalog2.add('', '''\ "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n''') - catalog2.add('Fuzz', 'Flou') - buf2 = BytesIO() - mofile.write_mo(buf2, catalog2) - buf2.seek(0) + catalog2.add('Fuzz', 'Flou') + buf2 = BytesIO() + mofile.write_mo(buf2, catalog2) + buf2.seek(0) - translations = Translations(fp=buf1) - translations.add_fallback(Translations(fp=buf2)) + translations = Translations(fp=buf1) + translations.add_fallback(Translations(fp=buf2)) - assert translations.ugettext('Fuzz') == 'Flou' + assert translations.ugettext('Fuzz') == 'Flou' diff --git a/tests/messages/test_normalized_string.py b/tests/messages/test_normalized_string.py deleted file mode 100644 index 9c95672b4..000000000 --- a/tests/messages/test_normalized_string.py +++ /dev/null @@ -1,17 +0,0 @@ -from babel.messages.pofile import _NormalizedString - - -def test_normalized_string(): - ab1 = _NormalizedString('a', 'b ') - ab2 = _NormalizedString('a', ' b') - ac1 = _NormalizedString('a', 'c') - ac2 = _NormalizedString(' a', 'c ') - z = _NormalizedString() - assert ab1 == ab2 and ac1 == ac2 # __eq__ - assert ab1 < ac1 # __lt__ - assert ac1 > ab2 # __gt__ - assert ac1 >= ac2 # __ge__ - assert ab1 <= ab2 # __le__ - assert ab1 != ac1 # __ne__ - assert not z # __nonzero__ / __bool__ - assert sorted([ab1, ab2, ac1]) # the sort order is not stable so we can't really check it, just that we can sort diff --git a/tests/messages/test_pofile.py b/tests/messages/test_pofile.py index 2bcc3df8d..cdbb58262 100644 --- a/tests/messages/test_pofile.py +++ b/tests/messages/test_pofile.py @@ -10,991 +10,48 @@ # individuals. For the exact contribution history, see the revision # history and logs, available at https://github.com/python-babel/babel/commits/master/. -import unittest -from datetime import datetime from io import BytesIO, StringIO import pytest from babel.core import Locale from babel.messages import pofile -from babel.messages.catalog import Catalog, Message +from babel.messages.catalog import Catalog from babel.messages.pofile import _enclose_filename_if_necessary, _extract_locations -from babel.util import FixedOffsetTimezone -class ReadPoTestCase(unittest.TestCase): - - def test_preserve_locale(self): - buf = StringIO(r'''msgid "foo" -msgstr "Voh"''') - catalog = pofile.read_po(buf, locale='en_US') - assert Locale('en', 'US') == catalog.locale - - def test_locale_gets_overridden_by_file(self): - buf = StringIO(r''' -msgid "" -msgstr "" -"Language: en_US\n"''') - catalog = pofile.read_po(buf, locale='de') - assert Locale('en', 'US') == catalog.locale - buf = StringIO(r''' -msgid "" -msgstr "" -"Language: ko-KR\n"''') - catalog = pofile.read_po(buf, locale='de') - assert Locale('ko', 'KR') == catalog.locale - - def test_preserve_domain(self): - buf = StringIO(r'''msgid "foo" -msgstr "Voh"''') - catalog = pofile.read_po(buf, domain='mydomain') - assert catalog.domain == 'mydomain' - - def test_applies_specified_encoding_during_read(self): - buf = BytesIO(''' -msgid "" -msgstr "" -"Project-Id-Version: 3.15\\n" -"Report-Msgid-Bugs-To: Fliegender Zirkus \\n" -"POT-Creation-Date: 2007-09-27 11:19+0700\\n" -"PO-Revision-Date: 2007-09-27 21:42-0700\\n" -"Last-Translator: John \\n" -"Language-Team: German Lang \\n" -"Plural-Forms: nplurals=2; plural=(n != 1);\\n" -"MIME-Version: 1.0\\n" -"Content-Type: text/plain; charset=iso-8859-1\\n" -"Content-Transfer-Encoding: 8bit\\n" -"Generated-By: Babel 1.0dev-r313\\n" - -msgid "foo" -msgstr "bär"'''.encode('iso-8859-1')) - catalog = pofile.read_po(buf, locale='de_DE') - assert catalog.get('foo').string == 'bär' - - def test_encoding_header_read(self): - buf = BytesIO(b'msgid ""\nmsgstr ""\n"Content-Type: text/plain; charset=mac_roman\\n"\n') - catalog = pofile.read_po(buf, locale='xx_XX') - assert catalog.charset == 'mac_roman' - - def test_plural_forms_header_parsed(self): - buf = BytesIO(b'msgid ""\nmsgstr ""\n"Plural-Forms: nplurals=42; plural=(n % 11);\\n"\n') - catalog = pofile.read_po(buf, locale='xx_XX') - assert catalog.plural_expr == '(n % 11)' - assert catalog.num_plurals == 42 - - def test_read_multiline(self): - buf = StringIO(r'''msgid "" -"Here's some text that\n" -"includesareallylongwordthatmightbutshouldnt" -" throw us into an infinite " -"loop\n" -msgstr ""''') - catalog = pofile.read_po(buf) - assert len(catalog) == 1 - message = list(catalog)[1] - assert message.id == ( - "Here's some text that\nincludesareallylongwordthat" - "mightbutshouldnt throw us into an infinite loop\n" - ) - - def test_fuzzy_header(self): - buf = StringIO(r''' -# Translations template for AReallyReallyLongNameForAProject. -# Copyright (C) 2007 ORGANIZATION -# This file is distributed under the same license as the -# AReallyReallyLongNameForAProject project. -# FIRST AUTHOR , 2007. -# -#, fuzzy -''') - catalog = pofile.read_po(buf) - assert len(list(catalog)) == 1 - assert list(catalog)[0].fuzzy - - def test_not_fuzzy_header(self): - buf = StringIO(r''' -# Translations template for AReallyReallyLongNameForAProject. -# Copyright (C) 2007 ORGANIZATION -# This file is distributed under the same license as the -# AReallyReallyLongNameForAProject project. -# FIRST AUTHOR , 2007. -# -''') - catalog = pofile.read_po(buf) - assert len(list(catalog)) == 1 - assert not list(catalog)[0].fuzzy - - def test_header_entry(self): - buf = StringIO(r''' -# SOME DESCRIPTIVE TITLE. -# Copyright (C) 2007 THE PACKAGE'S COPYRIGHT HOLDER -# This file is distributed under the same license as the PACKAGE package. -# FIRST AUTHOR , 2007. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: 3.15\n" -"Report-Msgid-Bugs-To: Fliegender Zirkus \n" -"POT-Creation-Date: 2007-09-27 11:19+0700\n" -"PO-Revision-Date: 2007-09-27 21:42-0700\n" -"Last-Translator: John \n" -"Language: de\n" -"Language-Team: German Lang \n" -"Plural-Forms: nplurals=2; plural=(n != 1);\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=iso-8859-2\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 1.0dev-r313\n" -''') - catalog = pofile.read_po(buf) - assert len(list(catalog)) == 1 - assert catalog.version == '3.15' - assert catalog.msgid_bugs_address == 'Fliegender Zirkus ' - assert datetime(2007, 9, 27, 11, 19, tzinfo=FixedOffsetTimezone(7 * 60)) == catalog.creation_date - assert catalog.last_translator == 'John ' - assert Locale('de') == catalog.locale - assert catalog.language_team == 'German Lang ' - assert catalog.charset == 'iso-8859-2' - assert list(catalog)[0].fuzzy - - def test_obsolete_message(self): - buf = StringIO(r'''# This is an obsolete message -#~ msgid "foo" -#~ msgstr "Voh" - -# This message is not obsolete -#: main.py:1 -msgid "bar" -msgstr "Bahr" -''') - catalog = pofile.read_po(buf) - assert len(catalog) == 1 - assert len(catalog.obsolete) == 1 - message = catalog.obsolete['foo'] - assert message.id == 'foo' - assert message.string == 'Voh' - assert message.user_comments == ['This is an obsolete message'] - - def test_obsolete_message_ignored(self): - buf = StringIO(r'''# This is an obsolete message -#~ msgid "foo" -#~ msgstr "Voh" - -# This message is not obsolete -#: main.py:1 -msgid "bar" -msgstr "Bahr" -''') - catalog = pofile.read_po(buf, ignore_obsolete=True) - assert len(catalog) == 1 - assert len(catalog.obsolete) == 0 - - def test_multi_line_obsolete_message(self): - buf = StringIO(r'''# This is an obsolete message -#~ msgid "" -#~ "foo" -#~ "foo" -#~ msgstr "" -#~ "Voh" -#~ "Vooooh" - -# This message is not obsolete -#: main.py:1 -msgid "bar" -msgstr "Bahr" -''') - catalog = pofile.read_po(buf) - assert len(catalog.obsolete) == 1 - message = catalog.obsolete['foofoo'] - assert message.id == 'foofoo' - assert message.string == 'VohVooooh' - assert message.user_comments == ['This is an obsolete message'] - - def test_unit_following_multi_line_obsolete_message(self): - buf = StringIO(r'''# This is an obsolete message -#~ msgid "" -#~ "foo" -#~ "fooooooo" -#~ msgstr "" -#~ "Voh" -#~ "Vooooh" - -# This message is not obsolete -#: main.py:1 -msgid "bar" -msgstr "Bahr" -''') - catalog = pofile.read_po(buf) - assert len(catalog) == 1 - message = catalog['bar'] - assert message.id == 'bar' - assert message.string == 'Bahr' - assert message.user_comments == ['This message is not obsolete'] - - def test_unit_before_obsolete_is_not_obsoleted(self): - buf = StringIO(r''' -# This message is not obsolete -#: main.py:1 -msgid "bar" -msgstr "Bahr" - -# This is an obsolete message -#~ msgid "" -#~ "foo" -#~ "fooooooo" -#~ msgstr "" -#~ "Voh" -#~ "Vooooh" -''') - catalog = pofile.read_po(buf) - assert len(catalog) == 1 - message = catalog['bar'] - assert message.id == 'bar' - assert message.string == 'Bahr' - assert message.user_comments == ['This message is not obsolete'] - - def test_with_context(self): - buf = BytesIO(b'''# Some string in the menu -#: main.py:1 -msgctxt "Menu" -msgid "foo" -msgstr "Voh" - -# Another string in the menu -#: main.py:2 -msgctxt "Menu" -msgid "bar" -msgstr "Bahr" -''') - catalog = pofile.read_po(buf, ignore_obsolete=True) - assert len(catalog) == 2 - message = catalog.get('foo', context='Menu') - assert message.context == 'Menu' - message = catalog.get('bar', context='Menu') - assert message.context == 'Menu' - - # And verify it pass through write_po - out_buf = BytesIO() - pofile.write_po(out_buf, catalog, omit_header=True) - assert out_buf.getvalue().strip() == buf.getvalue().strip() - - def test_obsolete_message_with_context(self): - buf = StringIO(''' -# This message is not obsolete -msgid "baz" -msgstr "Bazczch" - -# This is an obsolete message -#~ msgctxt "other" -#~ msgid "foo" -#~ msgstr "Voh" - -# This message is not obsolete -#: main.py:1 -msgid "bar" -msgstr "Bahr" -''') - catalog = pofile.read_po(buf) - assert len(catalog) == 2 - assert len(catalog.obsolete) == 1 - message = catalog.obsolete[("foo", "other")] - assert message.context == 'other' - assert message.string == 'Voh' - - def test_obsolete_messages_with_context(self): - buf = StringIO(''' -# This is an obsolete message -#~ msgctxt "apple" -#~ msgid "foo" -#~ msgstr "Foo" - -# This is an obsolete message with the same id but different context -#~ msgctxt "orange" -#~ msgid "foo" -#~ msgstr "Bar" -''') - catalog = pofile.read_po(buf) - assert len(catalog) == 0 - assert len(catalog.obsolete) == 2 - assert 'foo' not in catalog.obsolete - - apple_msg = catalog.obsolete[('foo', 'apple')] - assert apple_msg.id == 'foo' - assert apple_msg.string == 'Foo' - assert apple_msg.user_comments == ['This is an obsolete message'] - - orange_msg = catalog.obsolete[('foo', 'orange')] - assert orange_msg.id == 'foo' - assert orange_msg.string == 'Bar' - assert orange_msg.user_comments == ['This is an obsolete message with the same id but different context'] - - def test_obsolete_messages_roundtrip(self): - buf = StringIO('''\ -# This message is not obsolete -#: main.py:1 -msgid "bar" -msgstr "Bahr" - -# This is an obsolete message -#~ msgid "foo" -#~ msgstr "Voh" - -# This is an obsolete message -#~ msgctxt "apple" -#~ msgid "foo" -#~ msgstr "Foo" - -# This is an obsolete message with the same id but different context -#~ msgctxt "orange" -#~ msgid "foo" -#~ msgstr "Bar" - -''') - generated_po_file = ''.join(pofile.generate_po(pofile.read_po(buf), omit_header=True)) - assert buf.getvalue() == generated_po_file - - def test_multiline_context(self): - buf = StringIO(''' -msgctxt "a really long " -"message context " -"why?" -msgid "mid" -msgstr "mst" - ''') - catalog = pofile.read_po(buf) - assert len(catalog) == 1 - message = catalog.get('mid', context="a really long message context why?") - assert message is not None - assert message.context == 'a really long message context why?' - - def test_with_context_two(self): - buf = BytesIO(b'''msgctxt "Menu" -msgid "foo" -msgstr "Voh" - -msgctxt "Mannu" -msgid "bar" -msgstr "Bahr" -''') - catalog = pofile.read_po(buf, ignore_obsolete=True) - assert len(catalog) == 2 - message = catalog.get('foo', context='Menu') - assert message.context == 'Menu' - message = catalog.get('bar', context='Mannu') - assert message.context == 'Mannu' - - # And verify it pass through write_po - out_buf = BytesIO() - pofile.write_po(out_buf, catalog, omit_header=True) - assert out_buf.getvalue().strip() == buf.getvalue().strip(), out_buf.getvalue() - - def test_single_plural_form(self): - buf = StringIO(r'''msgid "foo" -msgid_plural "foos" -msgstr[0] "Voh"''') - catalog = pofile.read_po(buf, locale='ja_JP') - assert len(catalog) == 1 - assert catalog.num_plurals == 1 - message = catalog['foo'] - assert len(message.string) == 1 - - def test_singular_plural_form(self): - buf = StringIO(r'''msgid "foo" -msgid_plural "foos" -msgstr[0] "Voh" -msgstr[1] "Vohs"''') - catalog = pofile.read_po(buf, locale='nl_NL') - assert len(catalog) == 1 - assert catalog.num_plurals == 2 - message = catalog['foo'] - assert len(message.string) == 2 - - def test_more_than_two_plural_forms(self): - buf = StringIO(r'''msgid "foo" -msgid_plural "foos" -msgstr[0] "Voh" -msgstr[1] "Vohs" -msgstr[2] "Vohss"''') - catalog = pofile.read_po(buf, locale='lv_LV') - assert len(catalog) == 1 - assert catalog.num_plurals == 3 - message = catalog['foo'] - assert len(message.string) == 3 - assert message.string[2] == 'Vohss' - - def test_plural_with_square_brackets(self): - buf = StringIO(r'''msgid "foo" -msgid_plural "foos" -msgstr[0] "Voh [text]" -msgstr[1] "Vohs [text]"''') - catalog = pofile.read_po(buf, locale='nb_NO') - assert len(catalog) == 1 - assert catalog.num_plurals == 2 - message = catalog['foo'] - assert len(message.string) == 2 - - def test_obsolete_plural_with_square_brackets(self): - buf = StringIO('''\ -#~ msgid "foo" -#~ msgid_plural "foos" -#~ msgstr[0] "Voh [text]" -#~ msgstr[1] "Vohs [text]" -''') - catalog = pofile.read_po(buf, locale='nb_NO') - assert len(catalog) == 0 - assert len(catalog.obsolete) == 1 - assert catalog.num_plurals == 2 - message = catalog.obsolete['foo'] - assert len(message.string) == 2 - assert message.string[0] == 'Voh [text]' - assert message.string[1] == 'Vohs [text]' - - def test_missing_plural(self): - buf = StringIO('''\ -msgid "" -msgstr "" -"Plural-Forms: nplurals=3; plural=(n < 2) ? n : 2;\n" - -msgid "foo" -msgid_plural "foos" -msgstr[0] "Voh [text]" -msgstr[1] "Vohs [text]" -''') - catalog = pofile.read_po(buf, locale='nb_NO') - assert len(catalog) == 1 - assert catalog.num_plurals == 3 - message = catalog['foo'] - assert len(message.string) == 3 - assert message.string[0] == 'Voh [text]' - assert message.string[1] == 'Vohs [text]' - assert message.string[2] == '' - - def test_missing_plural_in_the_middle(self): - buf = StringIO('''\ -msgid "" -msgstr "" -"Plural-Forms: nplurals=3; plural=(n < 2) ? n : 2;\n" - -msgid "foo" -msgid_plural "foos" -msgstr[0] "Voh [text]" -msgstr[2] "Vohs [text]" -''') - catalog = pofile.read_po(buf, locale='nb_NO') - assert len(catalog) == 1 - assert catalog.num_plurals == 3 - message = catalog['foo'] - assert len(message.string) == 3 - assert message.string[0] == 'Voh [text]' - assert message.string[1] == '' - assert message.string[2] == 'Vohs [text]' - - def test_with_location(self): - buf = StringIO('''\ -#: main.py:1 \u2068filename with whitespace.py\u2069:123 -msgid "foo" -msgstr "bar" -''') - catalog = pofile.read_po(buf, locale='de_DE') - assert len(catalog) == 1 - message = catalog['foo'] - assert message.string == 'bar' - assert message.locations == [("main.py", 1), ("filename with whitespace.py", 123)] - - - def test_abort_invalid_po_file(self): - invalid_po = ''' - msgctxt "" - "{\"checksum\": 2148532640, \"cxt\": \"collector_thankyou\", \"id\": " - "270005359}" - msgid "" - "Thank you very much for your time.\n" - "If you have any questions regarding this survey, please contact Fulano " - "at nadie@blah.com" - msgstr "Merci de prendre le temps de remplir le sondage. - Pour toute question, veuillez communiquer avec Fulano à nadie@blah.com - " - ''' - invalid_po_2 = ''' - msgctxt "" - "{\"checksum\": 2148532640, \"cxt\": \"collector_thankyou\", \"id\": " - "270005359}" - msgid "" - "Thank you very much for your time.\n" - "If you have any questions regarding this survey, please contact Fulano " - "at fulano@blah.com." - msgstr "Merci de prendre le temps de remplir le sondage. - Pour toute question, veuillez communiquer avec Fulano a fulano@blah.com - " - ''' - # Catalog not created, throws Unicode Error - buf = StringIO(invalid_po) - output = pofile.read_po(buf, locale='fr', abort_invalid=False) - assert isinstance(output, Catalog) - - # Catalog not created, throws PoFileError - buf = StringIO(invalid_po_2) - with pytest.raises(pofile.PoFileError): - pofile.read_po(buf, locale='fr', abort_invalid=True) - - # Catalog is created with warning, no abort - buf = StringIO(invalid_po_2) - output = pofile.read_po(buf, locale='fr', abort_invalid=False) - assert isinstance(output, Catalog) - - # Catalog not created, aborted with PoFileError - buf = StringIO(invalid_po_2) - with pytest.raises(pofile.PoFileError): - pofile.read_po(buf, locale='fr', abort_invalid=True) - - def test_invalid_pofile_with_abort_flag(self): - parser = pofile.PoFileParser(None, abort_invalid=True) - lineno = 10 - line = 'Algo esta mal' - msg = 'invalid file' - with pytest.raises(pofile.PoFileError): - parser._invalid_pofile(line, lineno, msg) - - -class WritePoTestCase(unittest.TestCase): - - def test_join_locations(self): - catalog = Catalog() - catalog.add('foo', locations=[('main.py', 1)]) - catalog.add('foo', locations=[('utils.py', 3)]) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True) - assert buf.getvalue().strip() == b'''#: main.py:1 utils.py:3 -msgid "foo" -msgstr ""''' - - def test_write_po_file_with_specified_charset(self): - catalog = Catalog(charset='iso-8859-1') - catalog.add('foo', 'äöü', locations=[('main.py', 1)]) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=False) - po_file = buf.getvalue().strip() - assert b'"Content-Type: text/plain; charset=iso-8859-1\\n"' in po_file - assert 'msgstr "äöü"'.encode('iso-8859-1') in po_file - - def test_duplicate_comments(self): - catalog = Catalog() - catalog.add('foo', auto_comments=['A comment']) - catalog.add('foo', auto_comments=['A comment']) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True) - assert buf.getvalue().strip() == b'''#. A comment -msgid "foo" -msgstr ""''' - - def test_wrap_long_lines(self): - text = """Here's some text where -white space and line breaks matter, and should - -not be removed - -""" - catalog = Catalog() - catalog.add(text, locations=[('main.py', 1)]) - buf = BytesIO() - pofile.write_po(buf, catalog, no_location=True, omit_header=True, - width=42) - assert buf.getvalue().strip() == b'''msgid "" -"Here's some text where\\n" -"white space and line breaks matter, and" -" should\\n" -"\\n" -"not be removed\\n" -"\\n" -msgstr ""''' - - def test_wrap_long_lines_with_long_word(self): - text = """Here's some text that -includesareallylongwordthatmightbutshouldnt throw us into an infinite loop -""" - catalog = Catalog() - catalog.add(text, locations=[('main.py', 1)]) - buf = BytesIO() - pofile.write_po(buf, catalog, no_location=True, omit_header=True, - width=32) - assert buf.getvalue().strip() == b'''msgid "" -"Here's some text that\\n" -"includesareallylongwordthatmightbutshouldnt" -" throw us into an infinite " -"loop\\n" -msgstr ""''' - - def test_wrap_long_lines_in_header(self): - """ - Verify that long lines in the header comment are wrapped correctly. - """ - catalog = Catalog(project='AReallyReallyLongNameForAProject', - revision_date=datetime(2007, 4, 1)) - buf = BytesIO() - pofile.write_po(buf, catalog) - assert b'\n'.join(buf.getvalue().splitlines()[:7]) == b'''\ -# Translations template for AReallyReallyLongNameForAProject. -# Copyright (C) 2007 ORGANIZATION -# This file is distributed under the same license as the -# AReallyReallyLongNameForAProject project. -# FIRST AUTHOR , 2007. -# -#, fuzzy''' - - def test_wrap_locations_with_hyphens(self): - catalog = Catalog() - catalog.add('foo', locations=[ - ('doupy/templates/base/navmenu.inc.html.py', 60), - ]) - catalog.add('foo', locations=[ - ('doupy/templates/job-offers/helpers.html', 22), - ]) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True) - assert buf.getvalue().strip() == b'''#: doupy/templates/base/navmenu.inc.html.py:60 -#: doupy/templates/job-offers/helpers.html:22 -msgid "foo" -msgstr ""''' - - def test_no_wrap_and_width_behaviour_on_comments(self): - catalog = Catalog() - catalog.add("Pretty dam long message id, which must really be big " - "to test this wrap behaviour, if not it won't work.", - locations=[("fake.py", n) for n in range(1, 30)]) - buf = BytesIO() - pofile.write_po(buf, catalog, width=None, omit_header=True) - assert buf.getvalue().lower() == b"""\ -#: fake.py:1 fake.py:2 fake.py:3 fake.py:4 fake.py:5 fake.py:6 fake.py:7 -#: fake.py:8 fake.py:9 fake.py:10 fake.py:11 fake.py:12 fake.py:13 fake.py:14 -#: fake.py:15 fake.py:16 fake.py:17 fake.py:18 fake.py:19 fake.py:20 fake.py:21 -#: fake.py:22 fake.py:23 fake.py:24 fake.py:25 fake.py:26 fake.py:27 fake.py:28 -#: fake.py:29 -msgid "pretty dam long message id, which must really be big to test this wrap behaviour, if not it won't work." -msgstr "" - -""" - buf = BytesIO() - pofile.write_po(buf, catalog, width=100, omit_header=True) - assert buf.getvalue().lower() == b"""\ -#: fake.py:1 fake.py:2 fake.py:3 fake.py:4 fake.py:5 fake.py:6 fake.py:7 fake.py:8 fake.py:9 fake.py:10 -#: fake.py:11 fake.py:12 fake.py:13 fake.py:14 fake.py:15 fake.py:16 fake.py:17 fake.py:18 fake.py:19 -#: fake.py:20 fake.py:21 fake.py:22 fake.py:23 fake.py:24 fake.py:25 fake.py:26 fake.py:27 fake.py:28 -#: fake.py:29 -msgid "" -"pretty dam long message id, which must really be big to test this wrap behaviour, if not it won't" -" work." -msgstr "" - -""" - - def test_pot_with_translator_comments(self): - catalog = Catalog() - catalog.add('foo', locations=[('main.py', 1)], - auto_comments=['Comment About `foo`']) - catalog.add('bar', locations=[('utils.py', 3)], - user_comments=['Comment About `bar` with', - 'multiple lines.']) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True) - assert buf.getvalue().strip() == b'''#. Comment About `foo` -#: main.py:1 -msgid "foo" -msgstr "" - -# Comment About `bar` with -# multiple lines. -#: utils.py:3 -msgid "bar" -msgstr ""''' - - def test_po_with_obsolete_message(self): - catalog = Catalog() - catalog.add('foo', 'Voh', locations=[('main.py', 1)]) - catalog.obsolete['bar'] = Message('bar', 'Bahr', - locations=[('utils.py', 3)], - user_comments=['User comment']) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True) - assert buf.getvalue().strip() == b'''#: main.py:1 -msgid "foo" -msgstr "Voh" - -# User comment -#~ msgid "bar" -#~ msgstr "Bahr"''' - - def test_po_with_multiline_obsolete_message(self): - catalog = Catalog() - catalog.add('foo', 'Voh', locations=[('main.py', 1)]) - msgid = r"""Here's a message that covers -multiple lines, and should still be handled -correctly. -""" - msgstr = r"""Here's a message that covers -multiple lines, and should still be handled -correctly. -""" - catalog.obsolete[msgid] = Message(msgid, msgstr, - locations=[('utils.py', 3)]) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True) - assert buf.getvalue().strip() == b'''#: main.py:1 -msgid "foo" -msgstr "Voh" - -#~ msgid "" -#~ "Here's a message that covers\\n" -#~ "multiple lines, and should still be handled\\n" -#~ "correctly.\\n" -#~ msgstr "" -#~ "Here's a message that covers\\n" -#~ "multiple lines, and should still be handled\\n" -#~ "correctly.\\n"''' - - def test_po_with_obsolete_message_ignored(self): - catalog = Catalog() - catalog.add('foo', 'Voh', locations=[('main.py', 1)]) - catalog.obsolete['bar'] = Message('bar', 'Bahr', - locations=[('utils.py', 3)], - user_comments=['User comment']) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True, ignore_obsolete=True) - assert buf.getvalue().strip() == b'''#: main.py:1 -msgid "foo" -msgstr "Voh"''' - - def test_po_with_previous_msgid(self): - catalog = Catalog() - catalog.add('foo', 'Voh', locations=[('main.py', 1)], - previous_id='fo') - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True, include_previous=True) - assert buf.getvalue().strip() == b'''#: main.py:1 -#| msgid "fo" -msgid "foo" -msgstr "Voh"''' - - def test_po_with_previous_msgid_plural(self): - catalog = Catalog() - catalog.add(('foo', 'foos'), ('Voh', 'Voeh'), - locations=[('main.py', 1)], previous_id=('fo', 'fos')) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True, include_previous=True) - assert buf.getvalue().strip() == b'''#: main.py:1 -#| msgid "fo" -#| msgid_plural "fos" -msgid "foo" -msgid_plural "foos" -msgstr[0] "Voh" -msgstr[1] "Voeh"''' - - def test_sorted_po(self): - catalog = Catalog() - catalog.add('bar', locations=[('utils.py', 3)], - user_comments=['Comment About `bar` with', - 'multiple lines.']) - catalog.add(('foo', 'foos'), ('Voh', 'Voeh'), - locations=[('main.py', 1)]) - buf = BytesIO() - pofile.write_po(buf, catalog, sort_output=True) - value = buf.getvalue().strip() - assert b'''\ -# Comment About `bar` with -# multiple lines. -#: utils.py:3 -msgid "bar" -msgstr "" - -#: main.py:1 -msgid "foo" -msgid_plural "foos" -msgstr[0] "Voh" -msgstr[1] "Voeh"''' in value - assert value.find(b'msgid ""') < value.find(b'msgid "bar"') < value.find(b'msgid "foo"') - - def test_sorted_po_context(self): - catalog = Catalog() - catalog.add(('foo', 'foos'), ('Voh', 'Voeh'), - locations=[('main.py', 1)], - context='there') - catalog.add(('foo', 'foos'), ('Voh', 'Voeh'), - locations=[('main.py', 1)]) - catalog.add(('foo', 'foos'), ('Voh', 'Voeh'), - locations=[('main.py', 1)], - context='here') - buf = BytesIO() - pofile.write_po(buf, catalog, sort_output=True) - value = buf.getvalue().strip() - # We expect the foo without ctx, followed by "here" foo and "there" foo - assert b'''\ -#: main.py:1 -msgid "foo" -msgid_plural "foos" -msgstr[0] "Voh" -msgstr[1] "Voeh" - -#: main.py:1 -msgctxt "here" -msgid "foo" -msgid_plural "foos" -msgstr[0] "Voh" -msgstr[1] "Voeh" - -#: main.py:1 -msgctxt "there" -msgid "foo" -msgid_plural "foos" -msgstr[0] "Voh" -msgstr[1] "Voeh"''' in value - - def test_file_sorted_po(self): - catalog = Catalog() - catalog.add('bar', locations=[('utils.py', 3)]) - catalog.add(('foo', 'foos'), ('Voh', 'Voeh'), locations=[('main.py', 1)]) - buf = BytesIO() - pofile.write_po(buf, catalog, sort_by_file=True) - value = buf.getvalue().strip() - assert value.find(b'main.py') < value.find(b'utils.py') - - def test_file_with_no_lineno(self): - catalog = Catalog() - catalog.add('bar', locations=[('utils.py', None)], - user_comments=['Comment About `bar` with', - 'multiple lines.']) - buf = BytesIO() - pofile.write_po(buf, catalog, sort_output=True) - value = buf.getvalue().strip() - assert b'''\ -# Comment About `bar` with -# multiple lines. -#: utils.py -msgid "bar" -msgstr ""''' in value - - def test_silent_location_fallback(self): - buf = BytesIO(b'''\ -#: broken_file.py -msgid "missing line number" -msgstr "" - -#: broken_file.py:broken_line_number -msgid "broken line number" -msgstr ""''') - catalog = pofile.read_po(buf) - assert catalog['missing line number'].locations == [('broken_file.py', None)] - assert catalog['broken line number'].locations == [] - - def test_include_lineno(self): - catalog = Catalog() - catalog.add('foo', locations=[('main.py', 1)]) - catalog.add('foo', locations=[('utils.py', 3)]) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True, include_lineno=True) - assert buf.getvalue().strip() == b'''#: main.py:1 utils.py:3 -msgid "foo" -msgstr ""''' - - def test_no_include_lineno(self): - catalog = Catalog() - catalog.add('foo', locations=[('main.py', 1)]) - catalog.add('foo', locations=[('main.py', 2)]) - catalog.add('foo', locations=[('utils.py', 3)]) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True, include_lineno=False) - assert buf.getvalue().strip() == b'''#: main.py utils.py -msgid "foo" -msgstr ""''' - - def test_white_space_in_location(self): - catalog = Catalog() - catalog.add('foo', locations=[('main.py', 1)]) - catalog.add('foo', locations=[('utils b.py', 3)]) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True, include_lineno=True) - assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3 -msgid "foo" -msgstr ""''' - - def test_white_space_in_location_already_enclosed(self): - catalog = Catalog() - catalog.add('foo', locations=[('main.py', 1)]) - catalog.add('foo', locations=[('\u2068utils b.py\u2069', 3)]) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True, include_lineno=True) - assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3 -msgid "foo" -msgstr ""''' - - def test_tab_in_location(self): - catalog = Catalog() - catalog.add('foo', locations=[('main.py', 1)]) - catalog.add('foo', locations=[('utils\tb.py', 3)]) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True, include_lineno=True) - assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3 -msgid "foo" -msgstr ""''' - - def test_tab_in_location_already_enclosed(self): - catalog = Catalog() - catalog.add('foo', locations=[('main.py', 1)]) - catalog.add('foo', locations=[('\u2068utils\tb.py\u2069', 3)]) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True, include_lineno=True) - assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3 -msgid "foo" -msgstr ""''' - - - def test_wrap_with_enclosed_file_locations(self): - # Ensure that file names containing white space are not wrapped regardless of the --width parameter - catalog = Catalog() - catalog.add('foo', locations=[('\u2068test utils.py\u2069', 1)]) - catalog.add('foo', locations=[('\u2068test utils.py\u2069', 3)]) - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True, include_lineno=True, width=1) - assert buf.getvalue().strip() == b'''#: \xe2\x81\xa8test utils.py\xe2\x81\xa9:1 -#: \xe2\x81\xa8test utils.py\xe2\x81\xa9:3 -msgid "foo" -msgstr ""''' - - -class RoundtripPoTestCase(unittest.TestCase): +def test_enclosed_filenames_in_location_comment(): + catalog = Catalog() + catalog.add("foo", lineno=2, locations=[("main 1.py", 1)], string="") + catalog.add("bar", lineno=6, locations=[("other.py", 2)], string="") + catalog.add("baz", lineno=10, locations=[("main 1.py", 3), ("other.py", 4)], string="") + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True, include_lineno=True) + buf.seek(0) + catalog2 = pofile.read_po(buf) + assert True is catalog.is_identical(catalog2) - def test_enclosed_filenames_in_location_comment(self): - catalog = Catalog() - catalog.add("foo", lineno=2, locations=[("main 1.py", 1)], string="") - catalog.add("bar", lineno=6, locations=[("other.py", 2)], string="") - catalog.add("baz", lineno=10, locations=[("main 1.py", 3), ("other.py", 4)], string="") - buf = BytesIO() - pofile.write_po(buf, catalog, omit_header=True, include_lineno=True) - buf.seek(0) - catalog2 = pofile.read_po(buf) - assert True is catalog.is_identical(catalog2) +def test_unescape(): + escaped = '"Say:\\n \\"hello, world!\\"\\n"' + unescaped = 'Say:\n "hello, world!"\n' + assert unescaped != escaped + assert unescaped == pofile.unescape(escaped) -class PofileFunctionsTestCase(unittest.TestCase): - def test_unescape(self): - escaped = '"Say:\\n \\"hello, world!\\"\\n"' - unescaped = 'Say:\n "hello, world!"\n' - assert unescaped != escaped - assert unescaped == pofile.unescape(escaped) +def test_unescape_of_quoted_newline(): + # regression test for #198 + assert pofile.unescape(r'"\\n"') == '\\n' - def test_unescape_of_quoted_newline(self): - # regression test for #198 - assert pofile.unescape(r'"\\n"') == '\\n' - def test_denormalize_on_msgstr_without_empty_first_line(self): - # handle irregular multi-line msgstr (no "" as first line) - # gracefully (#171) - msgstr = '"multi-line\\n"\n" translation"' - expected_denormalized = 'multi-line\n translation' +def test_denormalize_on_msgstr_without_empty_first_line(): + # handle irregular multi-line msgstr (no "" as first line) + # gracefully (#171) + msgstr = '"multi-line\\n"\n" translation"' + expected_denormalized = 'multi-line\n translation' - assert expected_denormalized == pofile.denormalize(msgstr) - assert expected_denormalized == pofile.denormalize(f'""\n{msgstr}') + assert expected_denormalized == pofile.denormalize(msgstr) + assert expected_denormalized == pofile.denormalize(f'""\n{msgstr}') @pytest.mark.parametrize(("line", "locations"), [ @@ -1068,11 +125,18 @@ def test_iterable_of_strings(): """ Test we can parse from an iterable of strings. """ - catalog = pofile.read_po(['msgid "foo"', b'msgstr "Voh"'], locale="en_US") + catalog = pofile.read_po(['msgid "foo"', 'msgstr "Voh"'], locale="en_US") assert catalog.locale == Locale("en", "US") assert catalog.get("foo").string == "Voh" +@pytest.mark.parametrize("order", [1, -1]) +def test_iterable_of_mismatching_strings(order): + # Mixing and matching byteses and strs in the same read_po call is not allowed. + with pytest.raises(Exception): # noqa: B017 (will raise either TypeError or AttributeError) + pofile.read_po(['msgid "foo"', b'msgstr "Voh"'][::order]) + + def test_issue_1087(): buf = StringIO(r''' msgid "" diff --git a/tests/messages/test_pofile_read.py b/tests/messages/test_pofile_read.py new file mode 100644 index 000000000..d17f5d4af --- /dev/null +++ b/tests/messages/test_pofile_read.py @@ -0,0 +1,582 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +from datetime import datetime +from io import BytesIO, StringIO + +import pytest + +from babel import Locale +from babel.messages import Catalog, pofile +from babel.util import FixedOffsetTimezone + + +def test_preserve_locale(): + buf = StringIO(r'''msgid "foo" +msgstr "Voh"''') + catalog = pofile.read_po(buf, locale='en_US') + assert Locale('en', 'US') == catalog.locale + + +def test_locale_gets_overridden_by_file(): + buf = StringIO(r''' +msgid "" +msgstr "" +"Language: en_US\n"''') + catalog = pofile.read_po(buf, locale='de') + assert Locale('en', 'US') == catalog.locale + buf = StringIO(r''' +msgid "" +msgstr "" +"Language: ko-KR\n"''') + catalog = pofile.read_po(buf, locale='de') + assert Locale('ko', 'KR') == catalog.locale + + +def test_preserve_domain(): + buf = StringIO(r'''msgid "foo" +msgstr "Voh"''') + catalog = pofile.read_po(buf, domain='mydomain') + assert catalog.domain == 'mydomain' + + +def test_applies_specified_encoding_during_read(): + buf = BytesIO(''' +msgid "" +msgstr "" +"Project-Id-Version: 3.15\\n" +"Report-Msgid-Bugs-To: Fliegender Zirkus \\n" +"POT-Creation-Date: 2007-09-27 11:19+0700\\n" +"PO-Revision-Date: 2007-09-27 21:42-0700\\n" +"Last-Translator: John \\n" +"Language-Team: German Lang \\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\\n" +"MIME-Version: 1.0\\n" +"Content-Type: text/plain; charset=iso-8859-1\\n" +"Content-Transfer-Encoding: 8bit\\n" +"Generated-By: Babel 1.0dev-r313\\n" + +msgid "foo" +msgstr "bär"'''.encode('iso-8859-1')) + catalog = pofile.read_po(buf, locale='de_DE') + assert catalog.get('foo').string == 'bär' + + +def test_encoding_header_read(): + buf = BytesIO(b'msgid ""\nmsgstr ""\n"Content-Type: text/plain; charset=mac_roman\\n"\n') + catalog = pofile.read_po(buf, locale='xx_XX') + assert catalog.charset == 'mac_roman' + + +def test_plural_forms_header_parsed(): + buf = BytesIO(b'msgid ""\nmsgstr ""\n"Plural-Forms: nplurals=42; plural=(n % 11);\\n"\n') + catalog = pofile.read_po(buf, locale='xx_XX') + assert catalog.plural_expr == '(n % 11)' + assert catalog.num_plurals == 42 + + +def test_read_multiline(): + buf = StringIO(r'''msgid "" +"Here's some text that\n" +"includesareallylongwordthatmightbutshouldnt" +" throw us into an infinite " +"loop\n" +msgstr ""''') + catalog = pofile.read_po(buf) + assert len(catalog) == 1 + message = list(catalog)[1] + assert message.id == ( + "Here's some text that\nincludesareallylongwordthat" + "mightbutshouldnt throw us into an infinite loop\n" + ) + + +def test_fuzzy_header(): + buf = StringIO(r''' +# Translations template for AReallyReallyLongNameForAProject. +# Copyright (C) 2007 ORGANIZATION +# This file is distributed under the same license as the +# AReallyReallyLongNameForAProject project. +# FIRST AUTHOR , 2007. +# +#, fuzzy +''') + catalog = pofile.read_po(buf) + assert len(list(catalog)) == 1 + assert list(catalog)[0].fuzzy + + +def test_not_fuzzy_header(): + buf = StringIO(r''' +# Translations template for AReallyReallyLongNameForAProject. +# Copyright (C) 2007 ORGANIZATION +# This file is distributed under the same license as the +# AReallyReallyLongNameForAProject project. +# FIRST AUTHOR , 2007. +# +''') + catalog = pofile.read_po(buf) + assert len(list(catalog)) == 1 + assert not list(catalog)[0].fuzzy + + +def test_header_entry(): + buf = StringIO(r''' +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2007 THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , 2007. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: 3.15\n" +"Report-Msgid-Bugs-To: Fliegender Zirkus \n" +"POT-Creation-Date: 2007-09-27 11:19+0700\n" +"PO-Revision-Date: 2007-09-27 21:42-0700\n" +"Last-Translator: John \n" +"Language: de\n" +"Language-Team: German Lang \n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=iso-8859-2\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 1.0dev-r313\n" +''') + catalog = pofile.read_po(buf) + assert len(list(catalog)) == 1 + assert catalog.version == '3.15' + assert catalog.msgid_bugs_address == 'Fliegender Zirkus ' + assert datetime(2007, 9, 27, 11, 19, tzinfo=FixedOffsetTimezone(7 * 60)) == catalog.creation_date + assert catalog.last_translator == 'John ' + assert Locale('de') == catalog.locale + assert catalog.language_team == 'German Lang ' + assert catalog.charset == 'iso-8859-2' + assert list(catalog)[0].fuzzy + + +def test_obsolete_message(): + buf = StringIO(r'''# This is an obsolete message +#~ msgid "foo" +#~ msgstr "Voh" + +# This message is not obsolete +#: main.py:1 +msgid "bar" +msgstr "Bahr" +''') + catalog = pofile.read_po(buf) + assert len(catalog) == 1 + assert len(catalog.obsolete) == 1 + message = catalog.obsolete['foo'] + assert message.id == 'foo' + assert message.string == 'Voh' + assert message.user_comments == ['This is an obsolete message'] + + +def test_obsolete_message_ignored(): + buf = StringIO(r'''# This is an obsolete message +#~ msgid "foo" +#~ msgstr "Voh" + +# This message is not obsolete +#: main.py:1 +msgid "bar" +msgstr "Bahr" +''') + catalog = pofile.read_po(buf, ignore_obsolete=True) + assert len(catalog) == 1 + assert len(catalog.obsolete) == 0 + + +def test_multi_line_obsolete_message(): + buf = StringIO(r'''# This is an obsolete message +#~ msgid "" +#~ "foo" +#~ "foo" +#~ msgstr "" +#~ "Voh" +#~ "Vooooh" + +# This message is not obsolete +#: main.py:1 +msgid "bar" +msgstr "Bahr" +''') + catalog = pofile.read_po(buf) + assert len(catalog.obsolete) == 1 + message = catalog.obsolete['foofoo'] + assert message.id == 'foofoo' + assert message.string == 'VohVooooh' + assert message.user_comments == ['This is an obsolete message'] + + +def test_unit_following_multi_line_obsolete_message(): + buf = StringIO(r'''# This is an obsolete message +#~ msgid "" +#~ "foo" +#~ "fooooooo" +#~ msgstr "" +#~ "Voh" +#~ "Vooooh" + +# This message is not obsolete +#: main.py:1 +msgid "bar" +msgstr "Bahr" +''') + catalog = pofile.read_po(buf) + assert len(catalog) == 1 + message = catalog['bar'] + assert message.id == 'bar' + assert message.string == 'Bahr' + assert message.user_comments == ['This message is not obsolete'] + + +def test_unit_before_obsolete_is_not_obsoleted(): + buf = StringIO(r''' +# This message is not obsolete +#: main.py:1 +msgid "bar" +msgstr "Bahr" + +# This is an obsolete message +#~ msgid "" +#~ "foo" +#~ "fooooooo" +#~ msgstr "" +#~ "Voh" +#~ "Vooooh" +''') + catalog = pofile.read_po(buf) + assert len(catalog) == 1 + message = catalog['bar'] + assert message.id == 'bar' + assert message.string == 'Bahr' + assert message.user_comments == ['This message is not obsolete'] + + +def test_with_context(): + buf = BytesIO(b'''# Some string in the menu +#: main.py:1 +msgctxt "Menu" +msgid "foo" +msgstr "Voh" + +# Another string in the menu +#: main.py:2 +msgctxt "Menu" +msgid "bar" +msgstr "Bahr" +''') + catalog = pofile.read_po(buf, ignore_obsolete=True) + assert len(catalog) == 2 + message = catalog.get('foo', context='Menu') + assert message.context == 'Menu' + message = catalog.get('bar', context='Menu') + assert message.context == 'Menu' + + # And verify it pass through write_po + out_buf = BytesIO() + pofile.write_po(out_buf, catalog, omit_header=True) + assert out_buf.getvalue().strip() == buf.getvalue().strip() + + +def test_obsolete_message_with_context(): + buf = StringIO(''' +# This message is not obsolete +msgid "baz" +msgstr "Bazczch" + +# This is an obsolete message +#~ msgctxt "other" +#~ msgid "foo" +#~ msgstr "Voh" + +# This message is not obsolete +#: main.py:1 +msgid "bar" +msgstr "Bahr" +''') + catalog = pofile.read_po(buf) + assert len(catalog) == 2 + assert len(catalog.obsolete) == 1 + message = catalog.obsolete[("foo", "other")] + assert message.context == 'other' + assert message.string == 'Voh' + + +def test_obsolete_messages_with_context(): + buf = StringIO(''' +# This is an obsolete message +#~ msgctxt "apple" +#~ msgid "foo" +#~ msgstr "Foo" + +# This is an obsolete message with the same id but different context +#~ msgctxt "orange" +#~ msgid "foo" +#~ msgstr "Bar" +''') + catalog = pofile.read_po(buf) + assert len(catalog) == 0 + assert len(catalog.obsolete) == 2 + assert 'foo' not in catalog.obsolete + + apple_msg = catalog.obsolete[('foo', 'apple')] + assert apple_msg.id == 'foo' + assert apple_msg.string == 'Foo' + assert apple_msg.user_comments == ['This is an obsolete message'] + + orange_msg = catalog.obsolete[('foo', 'orange')] + assert orange_msg.id == 'foo' + assert orange_msg.string == 'Bar' + assert orange_msg.user_comments == ['This is an obsolete message with the same id but different context'] + + +def test_obsolete_messages_roundtrip(): + buf = StringIO('''\ +# This message is not obsolete +#: main.py:1 +msgid "bar" +msgstr "Bahr" + +# This is an obsolete message +#~ msgid "foo" +#~ msgstr "Voh" + +# This is an obsolete message +#~ msgctxt "apple" +#~ msgid "foo" +#~ msgstr "Foo" + +# This is an obsolete message with the same id but different context +#~ msgctxt "orange" +#~ msgid "foo" +#~ msgstr "Bar" + +''') + generated_po_file = ''.join(pofile.generate_po(pofile.read_po(buf), omit_header=True)) + assert buf.getvalue() == generated_po_file + + +def test_multiline_context(): + buf = StringIO(''' +msgctxt "a really long " +"message context " +"why?" +msgid "mid" +msgstr "mst" + ''') + catalog = pofile.read_po(buf) + assert len(catalog) == 1 + message = catalog.get('mid', context="a really long message context why?") + assert message is not None + assert message.context == 'a really long message context why?' + + +def test_with_context_two(): + buf = BytesIO(b'''msgctxt "Menu" +msgid "foo" +msgstr "Voh" + +msgctxt "Mannu" +msgid "bar" +msgstr "Bahr" +''') + catalog = pofile.read_po(buf, ignore_obsolete=True) + assert len(catalog) == 2 + message = catalog.get('foo', context='Menu') + assert message.context == 'Menu' + message = catalog.get('bar', context='Mannu') + assert message.context == 'Mannu' + + # And verify it pass through write_po + out_buf = BytesIO() + pofile.write_po(out_buf, catalog, omit_header=True) + assert out_buf.getvalue().strip() == buf.getvalue().strip(), out_buf.getvalue() + + +def test_single_plural_form(): + buf = StringIO(r'''msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh"''') + catalog = pofile.read_po(buf, locale='ja_JP') + assert len(catalog) == 1 + assert catalog.num_plurals == 1 + message = catalog['foo'] + assert len(message.string) == 1 + + +def test_singular_plural_form(): + buf = StringIO(r'''msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh" +msgstr[1] "Vohs"''') + catalog = pofile.read_po(buf, locale='nl_NL') + assert len(catalog) == 1 + assert catalog.num_plurals == 2 + message = catalog['foo'] + assert len(message.string) == 2 + + +def test_more_than_two_plural_forms(): + buf = StringIO(r'''msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh" +msgstr[1] "Vohs" +msgstr[2] "Vohss"''') + catalog = pofile.read_po(buf, locale='lv_LV') + assert len(catalog) == 1 + assert catalog.num_plurals == 3 + message = catalog['foo'] + assert len(message.string) == 3 + assert message.string[2] == 'Vohss' + + +def test_plural_with_square_brackets(): + buf = StringIO(r'''msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh [text]" +msgstr[1] "Vohs [text]"''') + catalog = pofile.read_po(buf, locale='nb_NO') + assert len(catalog) == 1 + assert catalog.num_plurals == 2 + message = catalog['foo'] + assert len(message.string) == 2 + + +def test_obsolete_plural_with_square_brackets(): + buf = StringIO('''\ +#~ msgid "foo" +#~ msgid_plural "foos" +#~ msgstr[0] "Voh [text]" +#~ msgstr[1] "Vohs [text]" +''') + catalog = pofile.read_po(buf, locale='nb_NO') + assert len(catalog) == 0 + assert len(catalog.obsolete) == 1 + assert catalog.num_plurals == 2 + message = catalog.obsolete['foo'] + assert len(message.string) == 2 + assert message.string[0] == 'Voh [text]' + assert message.string[1] == 'Vohs [text]' + + +def test_missing_plural(): + buf = StringIO('''\ +msgid "" +msgstr "" +"Plural-Forms: nplurals=3; plural=(n < 2) ? n : 2;\n" + +msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh [text]" +msgstr[1] "Vohs [text]" +''') + catalog = pofile.read_po(buf, locale='nb_NO') + assert len(catalog) == 1 + assert catalog.num_plurals == 3 + message = catalog['foo'] + assert len(message.string) == 3 + assert message.string[0] == 'Voh [text]' + assert message.string[1] == 'Vohs [text]' + assert message.string[2] == '' + + +def test_missing_plural_in_the_middle(): + buf = StringIO('''\ +msgid "" +msgstr "" +"Plural-Forms: nplurals=3; plural=(n < 2) ? n : 2;\n" + +msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh [text]" +msgstr[2] "Vohs [text]" +''') + catalog = pofile.read_po(buf, locale='nb_NO') + assert len(catalog) == 1 + assert catalog.num_plurals == 3 + message = catalog['foo'] + assert len(message.string) == 3 + assert message.string[0] == 'Voh [text]' + assert message.string[1] == '' + assert message.string[2] == 'Vohs [text]' + + +def test_with_location(): + buf = StringIO('''\ +#: main.py:1 \u2068filename with whitespace.py\u2069:123 +msgid "foo" +msgstr "bar" +''') + catalog = pofile.read_po(buf, locale='de_DE') + assert len(catalog) == 1 + message = catalog['foo'] + assert message.string == 'bar' + assert message.locations == [("main.py", 1), ("filename with whitespace.py", 123)] + + +def test_abort_invalid_po_file(): + invalid_po = ''' + msgctxt "" + "{\"checksum\": 2148532640, \"cxt\": \"collector_thankyou\", \"id\": " + "270005359}" + msgid "" + "Thank you very much for your time.\n" + "If you have any questions regarding this survey, please contact Fulano " + "at nadie@blah.com" + msgstr "Merci de prendre le temps de remplir le sondage. + Pour toute question, veuillez communiquer avec Fulano à nadie@blah.com + " + ''' + invalid_po_2 = ''' + msgctxt "" + "{\"checksum\": 2148532640, \"cxt\": \"collector_thankyou\", \"id\": " + "270005359}" + msgid "" + "Thank you very much for your time.\n" + "If you have any questions regarding this survey, please contact Fulano " + "at fulano@blah.com." + msgstr "Merci de prendre le temps de remplir le sondage. + Pour toute question, veuillez communiquer avec Fulano a fulano@blah.com + " + ''' + # Catalog not created, throws Unicode Error + buf = StringIO(invalid_po) + output = pofile.read_po(buf, locale='fr', abort_invalid=False) + assert isinstance(output, Catalog) + + # Catalog not created, throws PoFileError + buf = StringIO(invalid_po_2) + with pytest.raises(pofile.PoFileError): + pofile.read_po(buf, locale='fr', abort_invalid=True) + + # Catalog is created with warning, no abort + buf = StringIO(invalid_po_2) + output = pofile.read_po(buf, locale='fr', abort_invalid=False) + assert isinstance(output, Catalog) + + # Catalog not created, aborted with PoFileError + buf = StringIO(invalid_po_2) + with pytest.raises(pofile.PoFileError): + pofile.read_po(buf, locale='fr', abort_invalid=True) + + +def test_invalid_pofile_with_abort_flag(): + parser = pofile.PoFileParser(None, abort_invalid=True) + lineno = 10 + line = 'Algo esta mal' + msg = 'invalid file' + with pytest.raises(pofile.PoFileError): + parser._invalid_pofile(line, lineno, msg) diff --git a/tests/messages/test_pofile_write.py b/tests/messages/test_pofile_write.py new file mode 100644 index 000000000..0145f7928 --- /dev/null +++ b/tests/messages/test_pofile_write.py @@ -0,0 +1,441 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +from datetime import datetime +from io import BytesIO + +from babel.messages import Catalog, Message, pofile + + +def test_join_locations(): + catalog = Catalog() + catalog.add('foo', locations=[('main.py', 1)]) + catalog.add('foo', locations=[('utils.py', 3)]) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True) + assert buf.getvalue().strip() == b'''#: main.py:1 utils.py:3 +msgid "foo" +msgstr ""''' + + +def test_write_po_file_with_specified_charset(): + catalog = Catalog(charset='iso-8859-1') + catalog.add('foo', 'äöü', locations=[('main.py', 1)]) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=False) + po_file = buf.getvalue().strip() + assert b'"Content-Type: text/plain; charset=iso-8859-1\\n"' in po_file + assert 'msgstr "äöü"'.encode('iso-8859-1') in po_file + + +def test_duplicate_comments(): + catalog = Catalog() + catalog.add('foo', auto_comments=['A comment']) + catalog.add('foo', auto_comments=['A comment']) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True) + assert buf.getvalue().strip() == b'''#. A comment +msgid "foo" +msgstr ""''' + + +def test_wrap_long_lines(): + text = """Here's some text where +white space and line breaks matter, and should + +not be removed + +""" + catalog = Catalog() + catalog.add(text, locations=[('main.py', 1)]) + buf = BytesIO() + pofile.write_po(buf, catalog, no_location=True, omit_header=True, + width=42) + assert buf.getvalue().strip() == b'''msgid "" +"Here's some text where\\n" +"white space and line breaks matter, and" +" should\\n" +"\\n" +"not be removed\\n" +"\\n" +msgstr ""''' + + +def test_wrap_long_lines_with_long_word(): + text = """Here's some text that +includesareallylongwordthatmightbutshouldnt throw us into an infinite loop +""" + catalog = Catalog() + catalog.add(text, locations=[('main.py', 1)]) + buf = BytesIO() + pofile.write_po(buf, catalog, no_location=True, omit_header=True, + width=32) + assert buf.getvalue().strip() == b'''msgid "" +"Here's some text that\\n" +"includesareallylongwordthatmightbutshouldnt" +" throw us into an infinite " +"loop\\n" +msgstr ""''' + + +def test_wrap_long_lines_in_header(): + """ + Verify that long lines in the header comment are wrapped correctly. + """ + catalog = Catalog(project='AReallyReallyLongNameForAProject', + revision_date=datetime(2007, 4, 1)) + buf = BytesIO() + pofile.write_po(buf, catalog) + assert b'\n'.join(buf.getvalue().splitlines()[:7]) == b'''\ +# Translations template for AReallyReallyLongNameForAProject. +# Copyright (C) 2007 ORGANIZATION +# This file is distributed under the same license as the +# AReallyReallyLongNameForAProject project. +# FIRST AUTHOR , 2007. +# +#, fuzzy''' + + +def test_wrap_locations_with_hyphens(): + catalog = Catalog() + catalog.add('foo', locations=[ + ('doupy/templates/base/navmenu.inc.html.py', 60), + ]) + catalog.add('foo', locations=[ + ('doupy/templates/job-offers/helpers.html', 22), + ]) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True) + assert buf.getvalue().strip() == b'''#: doupy/templates/base/navmenu.inc.html.py:60 +#: doupy/templates/job-offers/helpers.html:22 +msgid "foo" +msgstr ""''' + + +def test_no_wrap_and_width_behaviour_on_comments(): + catalog = Catalog() + catalog.add("Pretty dam long message id, which must really be big " + "to test this wrap behaviour, if not it won't work.", + locations=[("fake.py", n) for n in range(1, 30)]) + buf = BytesIO() + pofile.write_po(buf, catalog, width=None, omit_header=True) + assert buf.getvalue().lower() == b"""\ +#: fake.py:1 fake.py:2 fake.py:3 fake.py:4 fake.py:5 fake.py:6 fake.py:7 +#: fake.py:8 fake.py:9 fake.py:10 fake.py:11 fake.py:12 fake.py:13 fake.py:14 +#: fake.py:15 fake.py:16 fake.py:17 fake.py:18 fake.py:19 fake.py:20 fake.py:21 +#: fake.py:22 fake.py:23 fake.py:24 fake.py:25 fake.py:26 fake.py:27 fake.py:28 +#: fake.py:29 +msgid "pretty dam long message id, which must really be big to test this wrap behaviour, if not it won't work." +msgstr "" + +""" + buf = BytesIO() + pofile.write_po(buf, catalog, width=100, omit_header=True) + assert buf.getvalue().lower() == b"""\ +#: fake.py:1 fake.py:2 fake.py:3 fake.py:4 fake.py:5 fake.py:6 fake.py:7 fake.py:8 fake.py:9 fake.py:10 +#: fake.py:11 fake.py:12 fake.py:13 fake.py:14 fake.py:15 fake.py:16 fake.py:17 fake.py:18 fake.py:19 +#: fake.py:20 fake.py:21 fake.py:22 fake.py:23 fake.py:24 fake.py:25 fake.py:26 fake.py:27 fake.py:28 +#: fake.py:29 +msgid "" +"pretty dam long message id, which must really be big to test this wrap behaviour, if not it won't" +" work." +msgstr "" + +""" + + +def test_pot_with_translator_comments(): + catalog = Catalog() + catalog.add('foo', locations=[('main.py', 1)], + auto_comments=['Comment About `foo`']) + catalog.add('bar', locations=[('utils.py', 3)], + user_comments=['Comment About `bar` with', + 'multiple lines.']) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True) + assert buf.getvalue().strip() == b'''#. Comment About `foo` +#: main.py:1 +msgid "foo" +msgstr "" + +# Comment About `bar` with +# multiple lines. +#: utils.py:3 +msgid "bar" +msgstr ""''' + + +def test_po_with_obsolete_message(): + catalog = Catalog() + catalog.add('foo', 'Voh', locations=[('main.py', 1)]) + catalog.obsolete['bar'] = Message('bar', 'Bahr', + locations=[('utils.py', 3)], + user_comments=['User comment']) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True) + assert buf.getvalue().strip() == b'''#: main.py:1 +msgid "foo" +msgstr "Voh" + +# User comment +#~ msgid "bar" +#~ msgstr "Bahr"''' + + +def test_po_with_multiline_obsolete_message(): + catalog = Catalog() + catalog.add('foo', 'Voh', locations=[('main.py', 1)]) + msgid = r"""Here's a message that covers +multiple lines, and should still be handled +correctly. +""" + msgstr = r"""Here's a message that covers +multiple lines, and should still be handled +correctly. +""" + catalog.obsolete[msgid] = Message(msgid, msgstr, + locations=[('utils.py', 3)]) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True) + assert buf.getvalue().strip() == b'''#: main.py:1 +msgid "foo" +msgstr "Voh" + +#~ msgid "" +#~ "Here's a message that covers\\n" +#~ "multiple lines, and should still be handled\\n" +#~ "correctly.\\n" +#~ msgstr "" +#~ "Here's a message that covers\\n" +#~ "multiple lines, and should still be handled\\n" +#~ "correctly.\\n"''' + + +def test_po_with_obsolete_message_ignored(): + catalog = Catalog() + catalog.add('foo', 'Voh', locations=[('main.py', 1)]) + catalog.obsolete['bar'] = Message('bar', 'Bahr', + locations=[('utils.py', 3)], + user_comments=['User comment']) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True, ignore_obsolete=True) + assert buf.getvalue().strip() == b'''#: main.py:1 +msgid "foo" +msgstr "Voh"''' + + +def test_po_with_previous_msgid(): + catalog = Catalog() + catalog.add('foo', 'Voh', locations=[('main.py', 1)], + previous_id='fo') + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True, include_previous=True) + assert buf.getvalue().strip() == b'''#: main.py:1 +#| msgid "fo" +msgid "foo" +msgstr "Voh"''' + + +def test_po_with_previous_msgid_plural(): + catalog = Catalog() + catalog.add(('foo', 'foos'), ('Voh', 'Voeh'), + locations=[('main.py', 1)], previous_id=('fo', 'fos')) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True, include_previous=True) + assert buf.getvalue().strip() == b'''#: main.py:1 +#| msgid "fo" +#| msgid_plural "fos" +msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh" +msgstr[1] "Voeh"''' + + +def test_sorted_po(): + catalog = Catalog() + catalog.add('bar', locations=[('utils.py', 3)], + user_comments=['Comment About `bar` with', + 'multiple lines.']) + catalog.add(('foo', 'foos'), ('Voh', 'Voeh'), + locations=[('main.py', 1)]) + buf = BytesIO() + pofile.write_po(buf, catalog, sort_output=True) + value = buf.getvalue().strip() + assert b'''\ +# Comment About `bar` with +# multiple lines. +#: utils.py:3 +msgid "bar" +msgstr "" + +#: main.py:1 +msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh" +msgstr[1] "Voeh"''' in value + assert value.find(b'msgid ""') < value.find(b'msgid "bar"') < value.find(b'msgid "foo"') + + +def test_sorted_po_context(): + catalog = Catalog() + catalog.add(('foo', 'foos'), ('Voh', 'Voeh'), + locations=[('main.py', 1)], + context='there') + catalog.add(('foo', 'foos'), ('Voh', 'Voeh'), + locations=[('main.py', 1)]) + catalog.add(('foo', 'foos'), ('Voh', 'Voeh'), + locations=[('main.py', 1)], + context='here') + buf = BytesIO() + pofile.write_po(buf, catalog, sort_output=True) + value = buf.getvalue().strip() + # We expect the foo without ctx, followed by "here" foo and "there" foo + assert b'''\ +#: main.py:1 +msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh" +msgstr[1] "Voeh" + +#: main.py:1 +msgctxt "here" +msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh" +msgstr[1] "Voeh" + +#: main.py:1 +msgctxt "there" +msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh" +msgstr[1] "Voeh"''' in value + + +def test_file_sorted_po(): + catalog = Catalog() + catalog.add('bar', locations=[('utils.py', 3)]) + catalog.add(('foo', 'foos'), ('Voh', 'Voeh'), locations=[('main.py', 1)]) + buf = BytesIO() + pofile.write_po(buf, catalog, sort_by_file=True) + value = buf.getvalue().strip() + assert value.find(b'main.py') < value.find(b'utils.py') + + +def test_file_with_no_lineno(): + catalog = Catalog() + catalog.add('bar', locations=[('utils.py', None)], + user_comments=['Comment About `bar` with', + 'multiple lines.']) + buf = BytesIO() + pofile.write_po(buf, catalog, sort_output=True) + value = buf.getvalue().strip() + assert b'''\ +# Comment About `bar` with +# multiple lines. +#: utils.py +msgid "bar" +msgstr ""''' in value + + +def test_silent_location_fallback(): + buf = BytesIO(b'''\ +#: broken_file.py +msgid "missing line number" +msgstr "" + +#: broken_file.py:broken_line_number +msgid "broken line number" +msgstr ""''') + catalog = pofile.read_po(buf) + assert catalog['missing line number'].locations == [('broken_file.py', None)] + assert catalog['broken line number'].locations == [] + + +def test_include_lineno(): + catalog = Catalog() + catalog.add('foo', locations=[('main.py', 1)]) + catalog.add('foo', locations=[('utils.py', 3)]) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True, include_lineno=True) + assert buf.getvalue().strip() == b'''#: main.py:1 utils.py:3 +msgid "foo" +msgstr ""''' + + +def test_no_include_lineno(): + catalog = Catalog() + catalog.add('foo', locations=[('main.py', 1)]) + catalog.add('foo', locations=[('main.py', 2)]) + catalog.add('foo', locations=[('utils.py', 3)]) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True, include_lineno=False) + assert buf.getvalue().strip() == b'''#: main.py utils.py +msgid "foo" +msgstr ""''' + + +def test_white_space_in_location(): + catalog = Catalog() + catalog.add('foo', locations=[('main.py', 1)]) + catalog.add('foo', locations=[('utils b.py', 3)]) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True, include_lineno=True) + assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3 +msgid "foo" +msgstr ""''' + + +def test_white_space_in_location_already_enclosed(): + catalog = Catalog() + catalog.add('foo', locations=[('main.py', 1)]) + catalog.add('foo', locations=[('\u2068utils b.py\u2069', 3)]) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True, include_lineno=True) + assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3 +msgid "foo" +msgstr ""''' + + +def test_tab_in_location(): + catalog = Catalog() + catalog.add('foo', locations=[('main.py', 1)]) + catalog.add('foo', locations=[('utils\tb.py', 3)]) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True, include_lineno=True) + assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3 +msgid "foo" +msgstr ""''' + + +def test_tab_in_location_already_enclosed(): + catalog = Catalog() + catalog.add('foo', locations=[('main.py', 1)]) + catalog.add('foo', locations=[('\u2068utils\tb.py\u2069', 3)]) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True, include_lineno=True) + assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3 +msgid "foo" +msgstr ""''' + + +def test_wrap_with_enclosed_file_locations(): + # Ensure that file names containing white space are not wrapped regardless of the --width parameter + catalog = Catalog() + catalog.add('foo', locations=[('\u2068test utils.py\u2069', 1)]) + catalog.add('foo', locations=[('\u2068test utils.py\u2069', 3)]) + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True, include_lineno=True, width=1) + assert buf.getvalue().strip() == b'''#: \xe2\x81\xa8test utils.py\xe2\x81\xa9:1 +#: \xe2\x81\xa8test utils.py\xe2\x81\xa9:3 +msgid "foo" +msgstr ""''' diff --git a/tests/messages/test_setuptools_frontend.py b/tests/messages/test_setuptools_frontend.py index a623efd29..5c3f4433b 100644 --- a/tests/messages/test_setuptools_frontend.py +++ b/tests/messages/test_setuptools_frontend.py @@ -45,6 +45,13 @@ def test_extract_distutils_keyword_arg_388(kwarg, expected): assert set(cmdinst.add_comments) == {"Bar", "Foo"} +@pytest.mark.xfail( + # Python 3.10.16[pypy-7.3.19-final] in GHA fails with "unsupported locale setting" + # in the subprocesses this test spawns. Hard to say why because it doesn't do that + # locally. + condition=(sys.implementation.name == "pypy" and "BABEL_TOX_INI_DIR" in os.environ), + reason="Test will likely fail with 'unsupported locale setting' in subprocesses; see comment", +) def test_setuptools_commands(tmp_path, monkeypatch): """ Smoke-tests all of the setuptools versions of the commands in turn. diff --git a/tests/messages/test_toml_config.py b/tests/messages/test_toml_config.py index 6a3c15700..1dd37a7ac 100644 --- a/tests/messages/test_toml_config.py +++ b/tests/messages/test_toml_config.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import pathlib from io import BytesIO @@ -9,19 +11,64 @@ assert toml_test_cases_path.is_dir(), "toml-test-cases directory not found" +def parse_toml(cfg: bytes | str): + if isinstance(cfg, str): + cfg = cfg.encode("utf-8") + return frontend._parse_mapping_toml(BytesIO(cfg)) + + def test_toml_mapping_multiple_patterns(): """ Test that patterns may be specified as a list in TOML, and are expanded to multiple entries in the method map. """ - method_map, options_map = frontend._parse_mapping_toml(BytesIO(b""" + method_map, options_map = parse_toml(""" [[mappings]] method = "python" pattern = ["xyz/**.py", "foo/**.py"] -""")) - assert len(method_map) == 2 - assert method_map[0] == ('xyz/**.py', 'python') - assert method_map[1] == ('foo/**.py', 'python') +""") + assert method_map == [ + ('xyz/**.py', 'python'), + ('foo/**.py', 'python'), + ] + + +@pytest.mark.parametrize( + ("keywords_val", "expected"), + [ + pytest.param('"foo bar quz"', {'bar': None, 'foo': None, 'quz': None}, id='string'), + pytest.param('["foo", "bar", "quz"]', {'bar': None, 'foo': None, 'quz': None}, id='list'), + pytest.param('"foo:1,2 bar quz"', {'bar': None, 'foo': (1, 2), 'quz': None}, id='s-args'), + pytest.param('["bar", "foo:1,2", "quz"]', {'bar': None, 'foo': (1, 2), 'quz': None}, id='l-args'), + pytest.param('[]', None, id='empty'), + ], +) +def test_toml_mapping_keywords_parsing(keywords_val, expected): + method_map, options_map = parse_toml(f""" +[[mappings]] +method = "python" +pattern = ["**.py"] +keywords = {keywords_val} +""") + assert options_map['**.py'].get('keywords') == expected + + +@pytest.mark.parametrize( + ("add_comments_val", "expected"), + [ + ('"SPECIAL SAUCE"', ['SPECIAL SAUCE']), # TOML will allow this as a single string + ('["SPECIAL", "SAUCE"]', ['SPECIAL', 'SAUCE']), + ('[]', None), + ], +) +def test_toml_mapping_add_comments_parsing(add_comments_val, expected): + method_map, options_map = parse_toml(f""" +[[mappings]] +method = "python" +pattern = ["**.py"] +add_comments = {add_comments_val} +""") + assert options_map['**.py'].get('add_comments') == expected @pytest.mark.parametrize("test_case", toml_test_cases_path.glob("bad.*.toml"), ids=lambda p: p.name) diff --git a/tests/messages/utils.py b/tests/messages/utils.py index d0797a337..ecd8a2b26 100644 --- a/tests/messages/utils.py +++ b/tests/messages/utils.py @@ -1,3 +1,5 @@ +from __future__ import annotations + CUSTOM_EXTRACTOR_COOKIE = "custom extractor was here" @@ -5,3 +7,18 @@ def custom_extractor(fileobj, keywords, comment_tags, options): if "treat" not in options: raise RuntimeError(f"The custom extractor refuses to run without a delicious treat; got {options!r}") return [(1, next(iter(keywords)), (CUSTOM_EXTRACTOR_COOKIE,), [])] + + +class Distribution: # subset of distutils.dist.Distribution + def __init__(self, attrs: dict) -> None: + self.attrs = attrs + + def get_name(self) -> str: + return self.attrs['name'] + + def get_version(self) -> str: + return self.attrs['version'] + + @property + def packages(self) -> list[str]: + return self.attrs['packages'] diff --git a/tests/test_core.py b/tests/test_core.py index aaf95a1c2..461d70782 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -55,10 +55,18 @@ def test_ignore_invalid_locales_in_lc_ctype(monkeypatch): default_locale('LC_CTYPE') -def test_get_global(): - assert core.get_global('zone_aliases')['GMT'] == 'Etc/GMT' - assert core.get_global('zone_aliases')['UTC'] == 'Etc/UTC' - assert core.get_global('zone_territories')['Europe/Berlin'] == 'DE' +def test_zone_aliases_and_territories(): + aliases = core.get_global('zone_aliases') + territories = core.get_global('zone_territories') + assert aliases['GMT'] == 'Etc/GMT' + assert aliases['UTC'] == 'Etc/UTC' + assert territories['Europe/Berlin'] == 'DE' + # Check that the canonical (IANA) names are used in `territories`, + # but that aliases are still available. + assert territories['Africa/Asmara'] == 'ER' + assert aliases['Africa/Asmera'] == 'Africa/Asmara' + assert territories['Europe/Kyiv'] == 'UA' + assert aliases['Europe/Kiev'] == 'Europe/Kyiv' def test_hash(): @@ -395,9 +403,14 @@ def test_language_alt_official_not_used(): def test_locale_parse_empty(): - with pytest.raises(ValueError, match="Empty"): + with pytest.raises(ValueError, match="Empty") as ei: Locale.parse("") + assert isinstance(ei.value.args[0], str) with pytest.raises(TypeError, match="Empty"): Locale.parse(None) with pytest.raises(TypeError, match="Empty"): Locale.parse(False) # weird...! + + +def test_get_cldr_version(): + assert core.get_cldr_version() == "47" diff --git a/tests/test_dates.py b/tests/test_dates.py index e47521e4d..12bb23433 100644 --- a/tests/test_dates.py +++ b/tests/test_dates.py @@ -799,6 +799,7 @@ def week_number(value): expected = '%04d-W%02d-%d' % value.isocalendar() assert week_number(value) == expected + def test_week_numbering_monday_mindays_4(): locale = Locale.parse('de_DE') assert locale.first_week_day == 0 @@ -1187,3 +1188,33 @@ def test_issue_1089(): def test_issue_1162(locale, format, negative, expected): delta = timedelta(seconds=10800) * (-1 if negative else +1) assert dates.format_timedelta(delta, add_direction=True, format=format, locale=locale) == expected + + +def test_issue_1192(): + # The actual returned value here is not actually strictly specified ("get_timezone_name" + # is not an operation specified as such). Issue #1192 concerned this invocation returning + # the invalid "no inheritance marker" value; _that_ should never be returned here. + # IOW, if the below "Hawaii-Aleutian Time" changes with e.g. CLDR updates, that's fine. + assert dates.get_timezone_name('Pacific/Honolulu', 'short', locale='en_GB') == "Hawaii-Aleutian Time" + + +@pytest.mark.xfail +def test_issue_1192_fmt(timezone_getter): + """ + There is an issue in how we format the fallback for z/zz in the absence of data + (esp. with the no inheritance marker present). + This test is marked xfail until that's fixed. + """ + # env TEST_TIMEZONES=Pacific/Honolulu TEST_LOCALES=en_US,en_GB TEST_TIME_FORMAT="YYYY-MM-dd H:mm z" bin/icu4c_date_format + # Defaulting TEST_TIME to 2025-03-04T13:53:00Z + # Pacific/Honolulu en_US 2025-03-04 3:53 HST + # Pacific/Honolulu en_GB 2025-03-04 3:53 GMT-10 + # env TEST_TIMEZONES=Pacific/Honolulu TEST_LOCALES=en_US,en_GB TEST_TIME_FORMAT="YYYY-MM-dd H:mm zz" bin/icu4c_date_format + # Pacific/Honolulu en_US 2025-03-04 3:53 HST + # Pacific/Honolulu en_GB 2025-03-04 3:53 GMT-10 + tz = timezone_getter("Pacific/Honolulu") + dt = _localize(tz, datetime(2025, 3, 4, 13, 53, tzinfo=UTC)) + assert dates.format_datetime(dt, "YYYY-MM-dd H:mm z", locale="en_US") == "2025-03-04 3:53 HST" + assert dates.format_datetime(dt, "YYYY-MM-dd H:mm z", locale="en_GB") == "2025-03-04 3:53 GMT-10" + assert dates.format_datetime(dt, "YYYY-MM-dd H:mm zz", locale="en_US") == "2025-03-04 3:53 HST" + assert dates.format_datetime(dt, "YYYY-MM-dd H:mm zz", locale="en_GB") == "2025-03-04 3:53 GMT-10" diff --git a/tests/test_day_periods.py b/tests/test_day_periods.py index 9b51e1dd9..b3494cd0c 100644 --- a/tests/test_day_periods.py +++ b/tests/test_day_periods.py @@ -11,7 +11,7 @@ ("fi", time(0), "midnight"), # (at) ("en_US", time(12), "noon"), # (at) ("en_US", time(21), "night1"), # (from, before) across 0:00 - ("en_US", time(5), "night1"), # (from, before) across 0:00 + ("en_US", time(5), "morning1"), # (from, before) across 0:00 ("en_US", time(6), "morning1"), # (from, before) ("agq", time(10), "am"), # no periods defined ("agq", time(22), "pm"), # no periods defined diff --git a/tests/test_localedata.py b/tests/test_localedata.py index 6911cbdcf..03cbed1dc 100644 --- a/tests/test_localedata.py +++ b/tests/test_localedata.py @@ -15,47 +15,46 @@ import random import sys import tempfile -import unittest -from operator import methodcaller import pytest from babel import Locale, UnknownLocaleError, localedata -class MergeResolveTestCase(unittest.TestCase): - - def test_merge_items(self): - d = {1: 'foo', 3: 'baz'} - localedata.merge(d, {1: 'Foo', 2: 'Bar'}) - assert d == {1: 'Foo', 2: 'Bar', 3: 'baz'} - - def test_merge_nested_dict(self): - d1 = {'x': {'a': 1, 'b': 2, 'c': 3}} - d2 = {'x': {'a': 1, 'b': 12, 'd': 14}} - localedata.merge(d1, d2) - assert d1 == {'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14}} - - def test_merge_nested_dict_no_overlap(self): - d1 = {'x': {'a': 1, 'b': 2}} - d2 = {'y': {'a': 11, 'b': 12}} - localedata.merge(d1, d2) - assert d1 == {'x': {'a': 1, 'b': 2}, 'y': {'a': 11, 'b': 12}} - - def test_merge_with_alias_and_resolve(self): - alias = localedata.Alias('x') - d1 = { - 'x': {'a': 1, 'b': 2, 'c': 3}, - 'y': alias, - } - d2 = { - 'x': {'a': 1, 'b': 12, 'd': 14}, - 'y': {'b': 22, 'e': 25}, - } - localedata.merge(d1, d2) - assert d1 == {'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14}, 'y': (alias, {'b': 22, 'e': 25})} - d = localedata.LocaleDataDict(d1) - assert dict(d.items()) == {'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14}, 'y': {'a': 1, 'b': 22, 'c': 3, 'd': 14, 'e': 25}} +def test_merge_items(): + d = {1: 'foo', 3: 'baz'} + localedata.merge(d, {1: 'Foo', 2: 'Bar'}) + assert d == {1: 'Foo', 2: 'Bar', 3: 'baz'} + + +def test_merge_nested_dict(): + d1 = {'x': {'a': 1, 'b': 2, 'c': 3}} + d2 = {'x': {'a': 1, 'b': 12, 'd': 14}} + localedata.merge(d1, d2) + assert d1 == {'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14}} + + +def test_merge_nested_dict_no_overlap(): + d1 = {'x': {'a': 1, 'b': 2}} + d2 = {'y': {'a': 11, 'b': 12}} + localedata.merge(d1, d2) + assert d1 == {'x': {'a': 1, 'b': 2}, 'y': {'a': 11, 'b': 12}} + + +def test_merge_with_alias_and_resolve(): + alias = localedata.Alias('x') + d1 = { + 'x': {'a': 1, 'b': 2, 'c': 3}, + 'y': alias, + } + d2 = { + 'x': {'a': 1, 'b': 12, 'd': 14}, + 'y': {'b': 22, 'e': 25}, + } + localedata.merge(d1, d2) + assert d1 == {'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14}, 'y': (alias, {'b': 22, 'e': 25})} + d = localedata.LocaleDataDict(d1) + assert dict(d.items()) == {'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14}, 'y': {'a': 1, 'b': 22, 'c': 3, 'd': 14, 'e': 25}} def test_load(): @@ -94,14 +93,13 @@ def test_unique_ids(): all_ids = localedata.locale_identifiers() assert len(all_ids) == len(set(all_ids)) # Check locale IDs don't collide after lower-case normalization. - lower_case_ids = list(map(methodcaller('lower'), all_ids)) + lower_case_ids = [id.lower() for id in all_ids] assert len(lower_case_ids) == len(set(lower_case_ids)) def test_mixedcased_locale(): for locale in localedata.locale_identifiers(): - locale_id = ''.join([ - methodcaller(random.choice(['lower', 'upper']))(c) for c in locale]) + locale_id = ''.join(c.lower() if random.random() < 0.5 else c.upper() for c in locale) assert localedata.exists(locale_id) diff --git a/tests/test_numbers.py b/tests/test_numbers.py index e9c216620..4f24f5b88 100644 --- a/tests/test_numbers.py +++ b/tests/test_numbers.py @@ -11,7 +11,6 @@ # history and logs, available at https://github.com/python-babel/babel/commits/master/. import decimal -import unittest from datetime import date import pytest @@ -29,214 +28,6 @@ ) -class FormatDecimalTestCase(unittest.TestCase): - - def test_patterns(self): - assert numbers.format_decimal(12345, '##0', locale='en_US') == '12345' - assert numbers.format_decimal(6.5, '0.00', locale='sv') == '6,50' - assert numbers.format_decimal((10.0 ** 20), '#.00', locale='en_US') == '100000000000000000000.00' - # regression test for #183, fraction digits were not correctly cut - # if the input was a float value and the value had more than 7 - # significant digits - assert numbers.format_decimal(12345678.051, '#,##0.00', locale='en_US') == '12,345,678.05' - - def test_subpatterns(self): - assert numbers.format_decimal((- 12345), '#,##0.##;-#', locale='en_US') == '-12,345' - assert numbers.format_decimal((- 12345), '#,##0.##;(#)', locale='en_US') == '(12,345)' - - def test_default_rounding(self): - """ - Testing Round-Half-Even (Banker's rounding) - - A '5' is rounded to the closest 'even' number - """ - assert numbers.format_decimal(5.5, '0', locale='sv') == '6' - assert numbers.format_decimal(6.5, '0', locale='sv') == '6' - assert numbers.format_decimal(1.2325, locale='sv') == '1,232' - assert numbers.format_decimal(1.2335, locale='sv') == '1,234' - - def test_significant_digits(self): - """Test significant digits patterns""" - assert numbers.format_decimal(123004, '@@', locale='en_US') == '120000' - assert numbers.format_decimal(1.12, '@', locale='sv') == '1' - assert numbers.format_decimal(1.1, '@@', locale='sv') == '1,1' - assert numbers.format_decimal(1.1, '@@@@@##', locale='sv') == '1,1000' - assert numbers.format_decimal(0.0001, '@@@', locale='sv') == '0,000100' - assert numbers.format_decimal(0.0001234, '@@@', locale='sv') == '0,000123' - assert numbers.format_decimal(0.0001234, '@@@#', locale='sv') == '0,0001234' - assert numbers.format_decimal(0.0001234, '@@@#', locale='sv') == '0,0001234' - assert numbers.format_decimal(0.12345, '@@@', locale='sv') == '0,123' - assert numbers.format_decimal(3.14159, '@@##', locale='sv') == '3,142' - assert numbers.format_decimal(1.23004, '@@##', locale='sv') == '1,23' - assert numbers.format_decimal(1230.04, '@@,@@', locale='en_US') == '12,30' - assert numbers.format_decimal(123.41, '@@##', locale='en_US') == '123.4' - assert numbers.format_decimal(1, '@@', locale='en_US') == '1.0' - assert numbers.format_decimal(0, '@', locale='en_US') == '0' - assert numbers.format_decimal(0.1, '@', locale='en_US') == '0.1' - assert numbers.format_decimal(0.1, '@#', locale='en_US') == '0.1' - assert numbers.format_decimal(0.1, '@@', locale='en_US') == '0.10' - - def test_decimals(self): - """Test significant digits patterns""" - assert numbers.format_decimal(decimal.Decimal('1.2345'), '#.00', locale='en_US') == '1.23' - assert numbers.format_decimal(decimal.Decimal('1.2345000'), '#.00', locale='en_US') == '1.23' - assert numbers.format_decimal(decimal.Decimal('1.2345000'), '@@', locale='en_US') == '1.2' - assert numbers.format_decimal(decimal.Decimal('12345678901234567890.12345'), '#.00', locale='en_US') == '12345678901234567890.12' - - def test_scientific_notation(self): - assert numbers.format_scientific(0.1, '#E0', locale='en_US') == '1E-1' - assert numbers.format_scientific(0.01, '#E0', locale='en_US') == '1E-2' - assert numbers.format_scientific(10, '#E0', locale='en_US') == '1E1' - assert numbers.format_scientific(1234, '0.###E0', locale='en_US') == '1.234E3' - assert numbers.format_scientific(1234, '0.#E0', locale='en_US') == '1.2E3' - # Exponent grouping - assert numbers.format_scientific(12345, '##0.####E0', locale='en_US') == '1.2345E4' - # Minimum number of int digits - assert numbers.format_scientific(12345, '00.###E0', locale='en_US') == '12.345E3' - assert numbers.format_scientific(-12345.6, '00.###E0', locale='en_US') == '-12.346E3' - assert numbers.format_scientific(-0.01234, '00.###E0', locale='en_US') == '-12.34E-3' - # Custom pattern suffix - assert numbers.format_scientific(123.45, '#.##E0 m/s', locale='en_US') == '1.23E2 m/s' - # Exponent patterns - assert numbers.format_scientific(123.45, '#.##E00 m/s', locale='en_US') == '1.23E02 m/s' - assert numbers.format_scientific(0.012345, '#.##E00 m/s', locale='en_US') == '1.23E-02 m/s' - assert numbers.format_scientific(decimal.Decimal('12345'), '#.##E+00 m/s', locale='en_US') == '1.23E+04 m/s' - # 0 (see ticket #99) - assert numbers.format_scientific(0, '#E0', locale='en_US') == '0E0' - - def test_formatting_of_very_small_decimals(self): - # previously formatting very small decimals could lead to a type error - # because the Decimal->string conversion was too simple (see #214) - number = decimal.Decimal("7E-7") - assert numbers.format_decimal(number, format="@@@", locale='en_US') == '0.000000700' - - def test_nan_and_infinity(self): - assert numbers.format_decimal(decimal.Decimal('Infinity'), locale='en_US') == '∞' - assert numbers.format_decimal(decimal.Decimal('-Infinity'), locale='en_US') == '-∞' - assert numbers.format_decimal(decimal.Decimal('NaN'), locale='en_US') == 'NaN' - assert numbers.format_compact_decimal(decimal.Decimal('Infinity'), locale='en_US', format_type="short") == '∞' - assert numbers.format_compact_decimal(decimal.Decimal('-Infinity'), locale='en_US', format_type="short") == '-∞' - assert numbers.format_compact_decimal(decimal.Decimal('NaN'), locale='en_US', format_type="short") == 'NaN' - assert numbers.format_currency(decimal.Decimal('Infinity'), 'USD', locale='en_US') == '$∞' - assert numbers.format_currency(decimal.Decimal('-Infinity'), 'USD', locale='en_US') == '-$∞' - - def test_group_separator(self): - assert numbers.format_decimal(29567.12, locale='en_US', group_separator=False) == '29567.12' - assert numbers.format_decimal(29567.12, locale='fr_CA', group_separator=False) == '29567,12' - assert numbers.format_decimal(29567.12, locale='pt_BR', group_separator=False) == '29567,12' - assert numbers.format_currency(1099.98, 'USD', locale='en_US', group_separator=False) == '$1099.98' - assert numbers.format_currency(101299.98, 'EUR', locale='fr_CA', group_separator=False) == '101299,98\xa0€' - assert numbers.format_currency(101299.98, 'EUR', locale='en_US', group_separator=False, format_type='name') == '101299.98 euros' - assert numbers.format_percent(251234.1234, locale='sv_SE', group_separator=False) == '25123412\xa0%' - - assert numbers.format_decimal(29567.12, locale='en_US', group_separator=True) == '29,567.12' - assert numbers.format_decimal(29567.12, locale='fr_CA', group_separator=True) == '29\xa0567,12' - assert numbers.format_decimal(29567.12, locale='pt_BR', group_separator=True) == '29.567,12' - assert numbers.format_currency(1099.98, 'USD', locale='en_US', group_separator=True) == '$1,099.98' - assert numbers.format_currency(101299.98, 'EUR', locale='fr_CA', group_separator=True) == '101\xa0299,98\xa0€' - assert numbers.format_currency(101299.98, 'EUR', locale='en_US', group_separator=True, format_type='name') == '101,299.98 euros' - assert numbers.format_percent(251234.1234, locale='sv_SE', group_separator=True) == '25\xa0123\xa0412\xa0%' - - def test_compact(self): - assert numbers.format_compact_decimal(1, locale='en_US', format_type="short") == '1' - assert numbers.format_compact_decimal(999, locale='en_US', format_type="short") == '999' - assert numbers.format_compact_decimal(1000, locale='en_US', format_type="short") == '1K' - assert numbers.format_compact_decimal(9000, locale='en_US', format_type="short") == '9K' - assert numbers.format_compact_decimal(9123, locale='en_US', format_type="short", fraction_digits=2) == '9.12K' - assert numbers.format_compact_decimal(10000, locale='en_US', format_type="short") == '10K' - assert numbers.format_compact_decimal(10000, locale='en_US', format_type="short", fraction_digits=2) == '10K' - assert numbers.format_compact_decimal(1000000, locale='en_US', format_type="short") == '1M' - assert numbers.format_compact_decimal(9000999, locale='en_US', format_type="short") == '9M' - assert numbers.format_compact_decimal(9000900099, locale='en_US', format_type="short", fraction_digits=5) == '9.0009B' - assert numbers.format_compact_decimal(1, locale='en_US', format_type="long") == '1' - assert numbers.format_compact_decimal(999, locale='en_US', format_type="long") == '999' - assert numbers.format_compact_decimal(1000, locale='en_US', format_type="long") == '1 thousand' - assert numbers.format_compact_decimal(9000, locale='en_US', format_type="long") == '9 thousand' - assert numbers.format_compact_decimal(9000, locale='en_US', format_type="long", fraction_digits=2) == '9 thousand' - assert numbers.format_compact_decimal(10000, locale='en_US', format_type="long") == '10 thousand' - assert numbers.format_compact_decimal(10000, locale='en_US', format_type="long", fraction_digits=2) == '10 thousand' - assert numbers.format_compact_decimal(1000000, locale='en_US', format_type="long") == '1 million' - assert numbers.format_compact_decimal(9999999, locale='en_US', format_type="long") == '10 million' - assert numbers.format_compact_decimal(9999999999, locale='en_US', format_type="long", fraction_digits=5) == '10 billion' - assert numbers.format_compact_decimal(1, locale='ja_JP', format_type="short") == '1' - assert numbers.format_compact_decimal(999, locale='ja_JP', format_type="short") == '999' - assert numbers.format_compact_decimal(1000, locale='ja_JP', format_type="short") == '1000' - assert numbers.format_compact_decimal(9123, locale='ja_JP', format_type="short") == '9123' - assert numbers.format_compact_decimal(10000, locale='ja_JP', format_type="short") == '1万' - assert numbers.format_compact_decimal(1234567, locale='ja_JP', format_type="short") == '123万' - assert numbers.format_compact_decimal(-1, locale='en_US', format_type="short") == '-1' - assert numbers.format_compact_decimal(-1234, locale='en_US', format_type="short", fraction_digits=2) == '-1.23K' - assert numbers.format_compact_decimal(-123456789, format_type='short', locale='en_US') == '-123M' - assert numbers.format_compact_decimal(-123456789, format_type='long', locale='en_US') == '-123 million' - assert numbers.format_compact_decimal(2345678, locale='mk', format_type='long') == '2 милиони' - assert numbers.format_compact_decimal(21000000, locale='mk', format_type='long') == '21 милион' - assert numbers.format_compact_decimal(21345, locale="gv", format_type="short") == '21K' - assert numbers.format_compact_decimal(1000, locale='it', format_type='long') == 'mille' - assert numbers.format_compact_decimal(1234, locale='it', format_type='long') == '1 mila' - assert numbers.format_compact_decimal(1000, locale='fr', format_type='long') == 'mille' - assert numbers.format_compact_decimal(1234, locale='fr', format_type='long') == '1 millier' - assert numbers.format_compact_decimal( - 12345, format_type="short", locale='ar_EG', fraction_digits=2, numbering_system='default', - ) == '12٫34\xa0ألف' - assert numbers.format_compact_decimal( - 12345, format_type="short", locale='ar_EG', fraction_digits=2, numbering_system='latn', - ) == '12.34\xa0ألف' - - -class NumberParsingTestCase(unittest.TestCase): - - def test_can_parse_decimals(self): - assert decimal.Decimal('1099.98') == numbers.parse_decimal('1,099.98', locale='en_US') - assert decimal.Decimal('1099.98') == numbers.parse_decimal('1.099,98', locale='de') - assert decimal.Decimal('1099.98') == numbers.parse_decimal('1,099.98', locale='ar', numbering_system="default") - assert decimal.Decimal('1099.98') == numbers.parse_decimal('1٬099٫98', locale='ar_EG', numbering_system="default") - with pytest.raises(numbers.NumberFormatError): - numbers.parse_decimal('2,109,998', locale='de') - with pytest.raises(numbers.UnsupportedNumberingSystemError): - numbers.parse_decimal('2,109,998', locale='de', numbering_system="unknown") - - def test_parse_decimal_strict_mode(self): - # Numbers with a misplaced grouping symbol should be rejected - with pytest.raises(numbers.NumberFormatError) as info: - numbers.parse_decimal('11.11', locale='de', strict=True) - assert info.value.suggestions == ['1.111', '11,11'] - # Numbers with two misplaced grouping symbols should be rejected - with pytest.raises(numbers.NumberFormatError) as info: - numbers.parse_decimal('80.00.00', locale='de', strict=True) - assert info.value.suggestions == ['800.000'] - # Partially grouped numbers should be rejected - with pytest.raises(numbers.NumberFormatError) as info: - numbers.parse_decimal('2000,000', locale='en_US', strict=True) - assert info.value.suggestions == ['2,000,000', '2,000'] - # Numbers with duplicate grouping symbols should be rejected - with pytest.raises(numbers.NumberFormatError) as info: - numbers.parse_decimal('0,,000', locale='en_US', strict=True) - assert info.value.suggestions == ['0'] - # Return only suggestion for 0 on strict - with pytest.raises(numbers.NumberFormatError) as info: - numbers.parse_decimal('0.00', locale='de', strict=True) - assert info.value.suggestions == ['0'] - # Properly formatted numbers should be accepted - assert str(numbers.parse_decimal('1.001', locale='de', strict=True)) == '1001' - # Trailing zeroes should be accepted - assert str(numbers.parse_decimal('3.00', locale='en_US', strict=True)) == '3.00' - # Numbers with a grouping symbol and no trailing zeroes should be accepted - assert str(numbers.parse_decimal('3,400.6', locale='en_US', strict=True)) == '3400.6' - # Numbers with a grouping symbol and trailing zeroes (not all zeroes after decimal) should be accepted - assert str(numbers.parse_decimal('3,400.60', locale='en_US', strict=True)) == '3400.60' - # Numbers with a grouping symbol and trailing zeroes (all zeroes after decimal) should be accepted - assert str(numbers.parse_decimal('3,400.00', locale='en_US', strict=True)) == '3400.00' - assert str(numbers.parse_decimal('3,400.0000', locale='en_US', strict=True)) == '3400.0000' - # Numbers with a grouping symbol and no decimal part should be accepted - assert str(numbers.parse_decimal('3,800', locale='en_US', strict=True)) == '3800' - # Numbers without any grouping symbol should be accepted - assert str(numbers.parse_decimal('2000.1', locale='en_US', strict=True)) == '2000.1' - # Numbers without any grouping symbol and no decimal should be accepted - assert str(numbers.parse_decimal('2580', locale='en_US', strict=True)) == '2580' - # High precision numbers should be accepted - assert str(numbers.parse_decimal('5,000001', locale='fr', strict=True)) == '5.000001' - - def test_list_currencies(): assert isinstance(list_currencies(), set) assert list_currencies().issuperset(['BAD', 'BAM', 'KRO']) diff --git a/tests/test_numbers_format_decimal.py b/tests/test_numbers_format_decimal.py new file mode 100644 index 000000000..356181b69 --- /dev/null +++ b/tests/test_numbers_format_decimal.py @@ -0,0 +1,177 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + + +import decimal + +from babel import numbers + + +def test_patterns(): + assert numbers.format_decimal(12345, '##0', locale='en_US') == '12345' + assert numbers.format_decimal(6.5, '0.00', locale='sv') == '6,50' + assert numbers.format_decimal((10.0 ** 20), '#.00', locale='en_US') == '100000000000000000000.00' + # regression test for #183, fraction digits were not correctly cut + # if the input was a float value and the value had more than 7 + # significant digits + assert numbers.format_decimal(12345678.051, '#,##0.00', locale='en_US') == '12,345,678.05' + + +def test_subpatterns(): + assert numbers.format_decimal((- 12345), '#,##0.##;-#', locale='en_US') == '-12,345' + assert numbers.format_decimal((- 12345), '#,##0.##;(#)', locale='en_US') == '(12,345)' + + +def test_default_rounding(): + """ + Testing Round-Half-Even (Banker's rounding) + + A '5' is rounded to the closest 'even' number + """ + assert numbers.format_decimal(5.5, '0', locale='sv') == '6' + assert numbers.format_decimal(6.5, '0', locale='sv') == '6' + assert numbers.format_decimal(1.2325, locale='sv') == '1,232' + assert numbers.format_decimal(1.2335, locale='sv') == '1,234' + + +def test_significant_digits(): + """Test significant digits patterns""" + assert numbers.format_decimal(123004, '@@', locale='en_US') == '120000' + assert numbers.format_decimal(1.12, '@', locale='sv') == '1' + assert numbers.format_decimal(1.1, '@@', locale='sv') == '1,1' + assert numbers.format_decimal(1.1, '@@@@@##', locale='sv') == '1,1000' + assert numbers.format_decimal(0.0001, '@@@', locale='sv') == '0,000100' + assert numbers.format_decimal(0.0001234, '@@@', locale='sv') == '0,000123' + assert numbers.format_decimal(0.0001234, '@@@#', locale='sv') == '0,0001234' + assert numbers.format_decimal(0.0001234, '@@@#', locale='sv') == '0,0001234' + assert numbers.format_decimal(0.12345, '@@@', locale='sv') == '0,123' + assert numbers.format_decimal(3.14159, '@@##', locale='sv') == '3,142' + assert numbers.format_decimal(1.23004, '@@##', locale='sv') == '1,23' + assert numbers.format_decimal(1230.04, '@@,@@', locale='en_US') == '12,30' + assert numbers.format_decimal(123.41, '@@##', locale='en_US') == '123.4' + assert numbers.format_decimal(1, '@@', locale='en_US') == '1.0' + assert numbers.format_decimal(0, '@', locale='en_US') == '0' + assert numbers.format_decimal(0.1, '@', locale='en_US') == '0.1' + assert numbers.format_decimal(0.1, '@#', locale='en_US') == '0.1' + assert numbers.format_decimal(0.1, '@@', locale='en_US') == '0.10' + + +def test_decimals(): + """Test significant digits patterns""" + assert numbers.format_decimal(decimal.Decimal('1.2345'), '#.00', locale='en_US') == '1.23' + assert numbers.format_decimal(decimal.Decimal('1.2345000'), '#.00', locale='en_US') == '1.23' + assert numbers.format_decimal(decimal.Decimal('1.2345000'), '@@', locale='en_US') == '1.2' + assert numbers.format_decimal(decimal.Decimal('12345678901234567890.12345'), '#.00', locale='en_US') == '12345678901234567890.12' + + +def test_scientific_notation(): + assert numbers.format_scientific(0.1, '#E0', locale='en_US') == '1E-1' + assert numbers.format_scientific(0.01, '#E0', locale='en_US') == '1E-2' + assert numbers.format_scientific(10, '#E0', locale='en_US') == '1E1' + assert numbers.format_scientific(1234, '0.###E0', locale='en_US') == '1.234E3' + assert numbers.format_scientific(1234, '0.#E0', locale='en_US') == '1.2E3' + # Exponent grouping + assert numbers.format_scientific(12345, '##0.####E0', locale='en_US') == '1.2345E4' + # Minimum number of int digits + assert numbers.format_scientific(12345, '00.###E0', locale='en_US') == '12.345E3' + assert numbers.format_scientific(-12345.6, '00.###E0', locale='en_US') == '-12.346E3' + assert numbers.format_scientific(-0.01234, '00.###E0', locale='en_US') == '-12.34E-3' + # Custom pattern suffix + assert numbers.format_scientific(123.45, '#.##E0 m/s', locale='en_US') == '1.23E2 m/s' + # Exponent patterns + assert numbers.format_scientific(123.45, '#.##E00 m/s', locale='en_US') == '1.23E02 m/s' + assert numbers.format_scientific(0.012345, '#.##E00 m/s', locale='en_US') == '1.23E-02 m/s' + assert numbers.format_scientific(decimal.Decimal('12345'), '#.##E+00 m/s', locale='en_US') == '1.23E+04 m/s' + # 0 (see ticket #99) + assert numbers.format_scientific(0, '#E0', locale='en_US') == '0E0' + + +def test_formatting_of_very_small_decimals(): + # previously formatting very small decimals could lead to a type error + # because the Decimal->string conversion was too simple (see #214) + number = decimal.Decimal("7E-7") + assert numbers.format_decimal(number, format="@@@", locale='en_US') == '0.000000700' + + +def test_nan_and_infinity(): + assert numbers.format_decimal(decimal.Decimal('Infinity'), locale='en_US') == '∞' + assert numbers.format_decimal(decimal.Decimal('-Infinity'), locale='en_US') == '-∞' + assert numbers.format_decimal(decimal.Decimal('NaN'), locale='en_US') == 'NaN' + assert numbers.format_compact_decimal(decimal.Decimal('Infinity'), locale='en_US', format_type="short") == '∞' + assert numbers.format_compact_decimal(decimal.Decimal('-Infinity'), locale='en_US', format_type="short") == '-∞' + assert numbers.format_compact_decimal(decimal.Decimal('NaN'), locale='en_US', format_type="short") == 'NaN' + assert numbers.format_currency(decimal.Decimal('Infinity'), 'USD', locale='en_US') == '$∞' + assert numbers.format_currency(decimal.Decimal('-Infinity'), 'USD', locale='en_US') == '-$∞' + + +def test_group_separator(): + assert numbers.format_decimal(29567.12, locale='en_US', group_separator=False) == '29567.12' + assert numbers.format_decimal(29567.12, locale='fr_CA', group_separator=False) == '29567,12' + assert numbers.format_decimal(29567.12, locale='pt_BR', group_separator=False) == '29567,12' + assert numbers.format_currency(1099.98, 'USD', locale='en_US', group_separator=False) == '$1099.98' + assert numbers.format_currency(101299.98, 'EUR', locale='fr_CA', group_separator=False) == '101299,98\xa0€' + assert numbers.format_currency(101299.98, 'EUR', locale='en_US', group_separator=False, format_type='name') == '101299.98 euros' + assert numbers.format_percent(251234.1234, locale='sv_SE', group_separator=False) == '25123412\xa0%' + + assert numbers.format_decimal(29567.12, locale='en_US', group_separator=True) == '29,567.12' + assert numbers.format_decimal(29567.12, locale='fr_CA', group_separator=True) == '29\xa0567,12' + assert numbers.format_decimal(29567.12, locale='pt_BR', group_separator=True) == '29.567,12' + assert numbers.format_currency(1099.98, 'USD', locale='en_US', group_separator=True) == '$1,099.98' + assert numbers.format_currency(101299.98, 'EUR', locale='fr_CA', group_separator=True) == '101\xa0299,98\xa0€' + assert numbers.format_currency(101299.98, 'EUR', locale='en_US', group_separator=True, format_type='name') == '101,299.98 euros' + assert numbers.format_percent(251234.1234, locale='sv_SE', group_separator=True) == '25\xa0123\xa0412\xa0%' + + +def test_compact(): + assert numbers.format_compact_decimal(1, locale='en_US', format_type="short") == '1' + assert numbers.format_compact_decimal(999, locale='en_US', format_type="short") == '999' + assert numbers.format_compact_decimal(1000, locale='en_US', format_type="short") == '1K' + assert numbers.format_compact_decimal(9000, locale='en_US', format_type="short") == '9K' + assert numbers.format_compact_decimal(9123, locale='en_US', format_type="short", fraction_digits=2) == '9.12K' + assert numbers.format_compact_decimal(10000, locale='en_US', format_type="short") == '10K' + assert numbers.format_compact_decimal(10000, locale='en_US', format_type="short", fraction_digits=2) == '10K' + assert numbers.format_compact_decimal(1000000, locale='en_US', format_type="short") == '1M' + assert numbers.format_compact_decimal(9000999, locale='en_US', format_type="short") == '9M' + assert numbers.format_compact_decimal(9000900099, locale='en_US', format_type="short", fraction_digits=5) == '9.0009B' + assert numbers.format_compact_decimal(1, locale='en_US', format_type="long") == '1' + assert numbers.format_compact_decimal(999, locale='en_US', format_type="long") == '999' + assert numbers.format_compact_decimal(1000, locale='en_US', format_type="long") == '1 thousand' + assert numbers.format_compact_decimal(9000, locale='en_US', format_type="long") == '9 thousand' + assert numbers.format_compact_decimal(9000, locale='en_US', format_type="long", fraction_digits=2) == '9 thousand' + assert numbers.format_compact_decimal(10000, locale='en_US', format_type="long") == '10 thousand' + assert numbers.format_compact_decimal(10000, locale='en_US', format_type="long", fraction_digits=2) == '10 thousand' + assert numbers.format_compact_decimal(1000000, locale='en_US', format_type="long") == '1 million' + assert numbers.format_compact_decimal(9999999, locale='en_US', format_type="long") == '10 million' + assert numbers.format_compact_decimal(9999999999, locale='en_US', format_type="long", fraction_digits=5) == '10 billion' + assert numbers.format_compact_decimal(1, locale='ja_JP', format_type="short") == '1' + assert numbers.format_compact_decimal(999, locale='ja_JP', format_type="short") == '999' + assert numbers.format_compact_decimal(1000, locale='ja_JP', format_type="short") == '1000' + assert numbers.format_compact_decimal(9123, locale='ja_JP', format_type="short") == '9123' + assert numbers.format_compact_decimal(10000, locale='ja_JP', format_type="short") == '1万' + assert numbers.format_compact_decimal(1234567, locale='ja_JP', format_type="short") == '123万' + assert numbers.format_compact_decimal(-1, locale='en_US', format_type="short") == '-1' + assert numbers.format_compact_decimal(-1234, locale='en_US', format_type="short", fraction_digits=2) == '-1.23K' + assert numbers.format_compact_decimal(-123456789, format_type='short', locale='en_US') == '-123M' + assert numbers.format_compact_decimal(-123456789, format_type='long', locale='en_US') == '-123 million' + assert numbers.format_compact_decimal(2345678, locale='mk', format_type='long') == '2 милиони' + assert numbers.format_compact_decimal(21000000, locale='mk', format_type='long') == '21 милион' + assert numbers.format_compact_decimal(21345, locale="gv", format_type="short") == '21K' + assert numbers.format_compact_decimal(1000, locale='it', format_type='long') == 'mille' + assert numbers.format_compact_decimal(1234, locale='it', format_type='long') == '1 mila' + assert numbers.format_compact_decimal(1000, locale='fr', format_type='long') == 'mille' + assert numbers.format_compact_decimal(1234, locale='fr', format_type='long') == '1 millier' + assert numbers.format_compact_decimal( + 12345, format_type="short", locale='ar_EG', fraction_digits=2, numbering_system='default', + ) == '12٫34\xa0ألف' + assert numbers.format_compact_decimal( + 12345, format_type="short", locale='ar_EG', fraction_digits=2, numbering_system='latn', + ) == '12.34\xa0ألف' diff --git a/tests/test_numbers_parsing.py b/tests/test_numbers_parsing.py new file mode 100644 index 000000000..0b1d03cad --- /dev/null +++ b/tests/test_numbers_parsing.py @@ -0,0 +1,70 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +import decimal + +import pytest + +from babel import numbers + + +def test_can_parse_decimals(): + assert decimal.Decimal('1099.98') == numbers.parse_decimal('1,099.98', locale='en_US') + assert decimal.Decimal('1099.98') == numbers.parse_decimal('1.099,98', locale='de') + assert decimal.Decimal('1099.98') == numbers.parse_decimal('1,099.98', locale='ar', numbering_system="default") + assert decimal.Decimal('1099.98') == numbers.parse_decimal('1٬099٫98', locale='ar_EG', numbering_system="default") + with pytest.raises(numbers.NumberFormatError): + numbers.parse_decimal('2,109,998', locale='de') + with pytest.raises(numbers.UnsupportedNumberingSystemError): + numbers.parse_decimal('2,109,998', locale='de', numbering_system="unknown") + + +def test_parse_decimal_strict_mode(): + # Numbers with a misplaced grouping symbol should be rejected + with pytest.raises(numbers.NumberFormatError) as info: + numbers.parse_decimal('11.11', locale='de', strict=True) + assert info.value.suggestions == ['1.111', '11,11'] + # Numbers with two misplaced grouping symbols should be rejected + with pytest.raises(numbers.NumberFormatError) as info: + numbers.parse_decimal('80.00.00', locale='de', strict=True) + assert info.value.suggestions == ['800.000'] + # Partially grouped numbers should be rejected + with pytest.raises(numbers.NumberFormatError) as info: + numbers.parse_decimal('2000,000', locale='en_US', strict=True) + assert info.value.suggestions == ['2,000,000', '2,000'] + # Numbers with duplicate grouping symbols should be rejected + with pytest.raises(numbers.NumberFormatError) as info: + numbers.parse_decimal('0,,000', locale='en_US', strict=True) + assert info.value.suggestions == ['0'] + # Return only suggestion for 0 on strict + with pytest.raises(numbers.NumberFormatError) as info: + numbers.parse_decimal('0.00', locale='de', strict=True) + assert info.value.suggestions == ['0'] + # Properly formatted numbers should be accepted + assert str(numbers.parse_decimal('1.001', locale='de', strict=True)) == '1001' + # Trailing zeroes should be accepted + assert str(numbers.parse_decimal('3.00', locale='en_US', strict=True)) == '3.00' + # Numbers with a grouping symbol and no trailing zeroes should be accepted + assert str(numbers.parse_decimal('3,400.6', locale='en_US', strict=True)) == '3400.6' + # Numbers with a grouping symbol and trailing zeroes (not all zeroes after decimal) should be accepted + assert str(numbers.parse_decimal('3,400.60', locale='en_US', strict=True)) == '3400.60' + # Numbers with a grouping symbol and trailing zeroes (all zeroes after decimal) should be accepted + assert str(numbers.parse_decimal('3,400.00', locale='en_US', strict=True)) == '3400.00' + assert str(numbers.parse_decimal('3,400.0000', locale='en_US', strict=True)) == '3400.0000' + # Numbers with a grouping symbol and no decimal part should be accepted + assert str(numbers.parse_decimal('3,800', locale='en_US', strict=True)) == '3800' + # Numbers without any grouping symbol should be accepted + assert str(numbers.parse_decimal('2000.1', locale='en_US', strict=True)) == '2000.1' + # Numbers without any grouping symbol and no decimal should be accepted + assert str(numbers.parse_decimal('2580', locale='en_US', strict=True)) == '2580' + # High precision numbers should be accepted + assert str(numbers.parse_decimal('5,000001', locale='fr', strict=True)) == '5.000001' diff --git a/tests/test_plural.py b/tests/test_plural.py index 83f881b23..bde356bc6 100644 --- a/tests/test_plural.py +++ b/tests/test_plural.py @@ -10,7 +10,6 @@ # individuals. For the exact contribution history, see the revision # history and logs, available at https://github.com/python-babel/babel/commits/master/. import decimal -import unittest import pytest @@ -198,76 +197,24 @@ def test_tokenize_malformed(rule_text): plural.tokenize_rule(rule_text) -class TestNextTokenTestCase(unittest.TestCase): +def test_next_token_empty(): + assert not plural.test_next_token([], '') - def test_empty(self): - assert not plural.test_next_token([], '') - def test_type_ok_and_no_value(self): - assert plural.test_next_token([('word', 'and')], 'word') +def test_next_token_type_ok_and_no_value(): + assert plural.test_next_token([('word', 'and')], 'word') - def test_type_ok_and_not_value(self): - assert not plural.test_next_token([('word', 'and')], 'word', 'or') - def test_type_ok_and_value_ok(self): - assert plural.test_next_token([('word', 'and')], 'word', 'and') +def test_next_token_type_ok_and_not_value(): + assert not plural.test_next_token([('word', 'and')], 'word', 'or') - def test_type_not_ok_and_value_ok(self): - assert not plural.test_next_token([('abc', 'and')], 'word', 'and') +def test_next_token_type_ok_and_value_ok(): + assert plural.test_next_token([('word', 'and')], 'word', 'and') -def make_range_list(*values): - ranges = [] - for v in values: - if isinstance(v, int): - val_node = plural.value_node(v) - ranges.append((val_node, val_node)) - else: - assert isinstance(v, tuple) - ranges.append((plural.value_node(v[0]), - plural.value_node(v[1]))) - return plural.range_list_node(ranges) - -class PluralRuleParserTestCase(unittest.TestCase): - - def setUp(self): - self.n = plural.ident_node('n') - - def n_eq(self, v): - return 'relation', ('in', self.n, make_range_list(v)) - - def test_error_when_unexpected_end(self): - with pytest.raises(plural.RuleError): - plural._Parser('n =') - - def test_eq_relation(self): - assert plural._Parser('n = 1').ast == self.n_eq(1) - - def test_in_range_relation(self): - assert plural._Parser('n = 2..4').ast == \ - ('relation', ('in', self.n, make_range_list((2, 4)))) - - def test_negate(self): - assert plural._Parser('n != 1').ast == plural.negate(self.n_eq(1)) - - def test_or(self): - assert plural._Parser('n = 1 or n = 2').ast ==\ - ('or', (self.n_eq(1), self.n_eq(2))) - - def test_and(self): - assert plural._Parser('n = 1 and n = 2').ast ==\ - ('and', (self.n_eq(1), self.n_eq(2))) - - def test_or_and(self): - assert plural._Parser('n = 0 or n != 1 and n % 100 = 1..19').ast == \ - ('or', (self.n_eq(0), - ('and', (plural.negate(self.n_eq(1)), - ('relation', ('in', - ('mod', (self.n, - plural.value_node(100))), - (make_range_list((1, 19))))))), - )) +def test_next_token_type_not_ok_and_value_ok(): + assert not plural.test_next_token([('abc', 'and')], 'word', 'and') EXTRACT_OPERANDS_TESTS = ( diff --git a/tests/test_plural_rule_parser.py b/tests/test_plural_rule_parser.py new file mode 100644 index 000000000..32a690148 --- /dev/null +++ b/tests/test_plural_rule_parser.py @@ -0,0 +1,84 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +import pytest + +from babel import plural + +N_NODE = plural.ident_node('n') + + +def make_range_list(*values): + ranges = [] + for v in values: + if isinstance(v, int): + val_node = plural.value_node(v) + ranges.append((val_node, val_node)) + else: + assert isinstance(v, tuple) + ranges.append((plural.value_node(v[0]), plural.value_node(v[1]))) + return plural.range_list_node(ranges) + + +def n_eq(v): + return 'relation', ('in', N_NODE, make_range_list(v)) + + +def test_error_when_unexpected_end(): + with pytest.raises(plural.RuleError): + plural._Parser('n =') + + +def test_eq_relation(): + assert plural._Parser('n = 1').ast == n_eq(1) + + +def test_in_range_relation(): + assert plural._Parser('n = 2..4').ast == ( + 'relation', + ('in', N_NODE, make_range_list((2, 4))), + ) + + +def test_negate(): + assert plural._Parser('n != 1').ast == plural.negate(n_eq(1)) + + +def test_or(): + assert plural._Parser('n = 1 or n = 2').ast == ('or', (n_eq(1), n_eq(2))) + + +def test_and(): + assert plural._Parser('n = 1 and n = 2').ast == ('and', (n_eq(1), n_eq(2))) + + +def test_or_and(): + assert plural._Parser('n = 0 or n != 1 and n % 100 = 1..19').ast == ( + 'or', + ( + n_eq(0), + ( + 'and', + ( + plural.negate(n_eq(1)), + ( + 'relation', + ( + 'in', + ('mod', (N_NODE, plural.value_node(100))), + (make_range_list((1, 19))), + ), + ), + ), + ), + ), + ) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index c36151e7e..316979885 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -17,6 +17,7 @@ 1.2 - 1.0, # Inaccurate float 10, # Plain old integer 0, # Zero + 1000, # A thousand (previously raised KeyError in the nl locale for compact currencies) ) @@ -46,6 +47,8 @@ def test_smoke_numbers(locale): assert numbers.format_decimal(number, locale=locale, numbering_system="default") assert numbers.format_currency(number, "EUR", locale=locale) assert numbers.format_currency(number, "EUR", locale=locale, numbering_system="default") + assert numbers.format_compact_currency(number, "EUR", locale=locale) + assert numbers.format_compact_currency(number, "EUR", locale=locale, numbering_system="default") assert numbers.format_scientific(number, locale=locale) assert numbers.format_scientific(number, locale=locale, numbering_system="default") assert numbers.format_percent(number / 100, locale=locale) diff --git a/tests/test_util.py b/tests/test_util.py index 1b464e079..a153dd6ff 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -12,7 +12,6 @@ import __future__ -import unittest from io import BytesIO import pytest @@ -46,16 +45,16 @@ def test_pathmatch(): assert not util.pathmatch('./foo/**.py', 'blah/foo/bar/baz.py') -class FixedOffsetTimezoneTestCase(unittest.TestCase): +def test_fixed_zone_negative_offset(): + assert util.FixedOffsetTimezone(-60).zone == 'Etc/GMT-60' - def test_zone_negative_offset(self): - assert util.FixedOffsetTimezone(-60).zone == 'Etc/GMT-60' - def test_zone_zero_offset(self): - assert util.FixedOffsetTimezone(0).zone == 'Etc/GMT+0' +def test_fixed_zone_zero_offset(): + assert util.FixedOffsetTimezone(0).zone == 'Etc/GMT+0' - def test_zone_positive_offset(self): - assert util.FixedOffsetTimezone(330).zone == 'Etc/GMT+330' + +def test_fixed_zone_positive_offset(): + assert util.FixedOffsetTimezone(330).zone == 'Etc/GMT+330' def parse_encoding(s): diff --git a/tox.ini b/tox.ini index 8aaa8a3e3..a48dca512 100644 --- a/tox.ini +++ b/tox.ini @@ -32,3 +32,4 @@ python = 3.11: py311 3.12: py312 3.13: py313 + 3.14: py314