diff --git a/.coveragerc b/.coveragerc index a3d8ae65e..32128ff86 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,4 +2,5 @@ exclude_lines = NotImplemented pragma: no cover - warnings.warn \ No newline at end of file + warnings.warn + if TYPE_CHECKING: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f8764b580..a35a1b4f3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,33 +11,40 @@ on: - '*-maint' jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: pre-commit/action@v3.0.0 test: runs-on: ${{ matrix.os }} strategy: matrix: os: ["ubuntu-20.04", "windows-2022", "macos-11"] - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "pypy-3.7"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "pypy-3.7"] env: BABEL_CLDR_NO_DOWNLOAD_PROGRESS: "1" BABEL_CLDR_QUIET: "1" steps: - uses: actions/checkout@v3 - - uses: actions/cache@v2 + - uses: actions/cache@v3 with: path: cldr key: cldr-${{ hashFiles('scripts/*cldr*') }} - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + cache: "pip" + cache-dependency-path: "**/setup.py" - name: Install dependencies run: | python -m pip install --upgrade pip setuptools wheel - python -m pip install tox tox-gh-actions==2.1.0 + python -m pip install 'tox~=4.0' 'tox-gh-actions~=3.0' - name: Run test via Tox run: tox --skip-missing-interpreters env: COVERAGE_XML_PATH: ${{ runner.temp }} - - uses: codecov/codecov-action@v2 + - uses: codecov/codecov-action@v3 with: directory: ${{ runner.temp }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0a345cee9..d1935c006 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,19 +1,24 @@ -- repo: https://github.com/pre-commit/pre-commit-hooks - sha: 97b88d9610bcc03982ddac33caba98bb2b751f5f +repos: + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.247 hooks: - - id: autopep8-wrapper - exclude: (docs/conf.py|tests/messages/data/) - - id: check-added-large-files - - id: check-docstring-first + - id: ruff + args: + - --fix + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-added-large-files + - id: check-docstring-first exclude: (docs/conf.py) - - id: check-json - - id: check-yaml - - id: debug-statements - - id: end-of-file-fixer - - id: flake8 - exclude: (docs/conf.py|babel/messages/__init__.py|babel/__init__.py|tests/messages/data|scripts/import_cldr.py) - - id: name-tests-test - args: ['--django'] + - id: check-json + - id: check-yaml + - id: debug-statements + exclude: (tests/messages/data/) + - id: end-of-file-fixer + exclude: (tests/messages/data/) + - id: name-tests-test + args: [ '--django' ] exclude: (tests/messages/data/) - - id: requirements-txt-fixer - - id: trailing-whitespace + - id: requirements-txt-fixer + - id: trailing-whitespace diff --git a/.readthedocs.yml b/.readthedocs.yml index a4a09ac65..cc83f360b 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -3,10 +3,17 @@ version: 2 build: - os: ubuntu-20.04 + os: ubuntu-22.04 tools: - python: "3.9" - + python: "3.11" + jobs: + pre_build: + # Replace any Babel version something may have pulled in + # with the copy we're working on. We'll also need to build + # the data files at that point, or date formatting _within_ + # Sphinx will fail. + - pip install -e . + - make import-cldr sphinx: configuration: docs/conf.py diff --git a/AUTHORS b/AUTHORS index 5b88f338e..0d2d12168 100644 --- a/AUTHORS +++ b/AUTHORS @@ -14,6 +14,7 @@ Babel is written and maintained by the Babel team and various contributors: - Isaac Jurado - Tobias Bieniek - Erick Wilder +- Jonah Lawrence - Michael Birtwell - Jonas Borgström - Kevin Deldycke @@ -33,6 +34,7 @@ Babel is written and maintained by the Babel team and various contributors: - Jennifer Wang - Lukas Balaga - sudheesh001 +- Jean Abou Samra - Niklas Hambüchen - Changaco - Xavier Fernandez @@ -47,8 +49,12 @@ Babel is written and maintained by the Babel team and various contributors: - Arturas Moskvinas - Leonardo Pistone - Hyunjun Kim +- Maciej Olko +- martin f. krafft +- DS/Charlie +- lilinjie +- Johannes Wilm - Eric L -- Jonah Lawrence - Przemyslaw Wegrzyn - Lukas Kahwe Smith - Lukas Juhrich diff --git a/CHANGES.rst b/CHANGES.rst index cb09f4f74..b5e4718eb 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,42 @@ Babel Changelog =============== +Version 2.12.0 +-------------- + +Deprecations & breaking changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Python 3.6 is no longer supported (:gh:`919`) - Aarni Koskela +* The `get_next_timezone_transition` function is no more (:gh:`958`) - Aarni Koskela + +New features +~~~~~~~~~~~~ + +* CLDR: Babel now uses CLDR 42 (:gh:`951`) - Aarni Koskela +* Dates: `pytz` is now optional; Babel will prefer it but will use `zoneinfo` when available. (:gh:`940`) - @ds-cbo +* General: Babel now ships type annotations, thanks to Jonah Lawrence's work in multiple PRs. +* Locales: @modifiers are now retained when parsing locales (:gh:`947`) - martin f. krafft +* Messages: JavaScript template string expression extraction is now smarter. (:gh:`939`) - Johannes Wilm +* Numbers: NaN and Infinity are now better supported (:gh:`955`) - Jonah Lawrence +* Numbers: Short compact currency formats are now supported (:gh:`926`) - Jonah Lawrence +* Numbers: There's now a `Format.compact_decimal` utility function. (:gh:`921`) - Jonah Lawrence + +Bugfixes +~~~~~~~~ + +* Dates: The cache for parsed datetime patterns is now bounded (:gh:`967`) - Aarni Koskela +* Messages: Fuzzy candidate matching accuracy is improved (:gh:`970`) - Jean Abou Samra +* Numbers: Compact singular formats and patterns with no numbers work correctly (:gh:`930`, :gh:`932`) - Jonah Lawrence, Jun Omae + +Improvements & cleanup +~~~~~~~~~~~~~~~~~~~~~~ + +* Dates: `babel.dates.UTC` is now an alias for `datetime.timezone.utc` (:gh:`957`) - Aarni Koskela +* Dates: `babel.localtime` was slightly cleaned up. (:gh:`952`) - Aarni Koskela +* Documentation: Documentation was improved by Maciej Olko, Jonah Lawrence, lilinjie, and Aarni Koskela. +* Infrastructure: Babel is now being linted with pre-commit and ruff. - Aarni Koskela + Version 2.11.0 -------------- @@ -13,35 +49,35 @@ Upcoming deprecation Improvements ~~~~~~~~~~~~ -* Support for hex escapes in JavaScript string literals :gh:`#877` - Przemyslaw Wegrzyn -* Add support for formatting decimals in compact form :gh:`#909` - Jonah Lawrence -* Adapt parse_date to handle ISO dates in ASCII format :gh:`#842` - Eric L. -* Use `ast` instead of `eval` for Python string extraction :gh:`#915` - Aarni Koskela +* Support for hex escapes in JavaScript string literals :gh:`877` - Przemyslaw Wegrzyn +* Add support for formatting decimals in compact form :gh:`909` - Jonah Lawrence +* Adapt parse_date to handle ISO dates in ASCII format :gh:`842` - Eric L. +* Use `ast` instead of `eval` for Python string extraction :gh:`915` - Aarni Koskela * This also enables extraction from static f-strings. F-strings with expressions are silently ignored (but won't raise an error as they used to). Infrastructure ~~~~~~~~~~~~~~ -* Tests: Use regular asserts and ``pytest.raises()`` :gh:`#875` – Aarni Koskela -* Wheels are now built in GitHub Actions :gh:`#888` – Aarni Koskela -* Small improvements to the CLDR downloader script :gh:`#894` – Aarni Koskela -* Remove antiquated `__nonzero__` methods :gh:`#896` - Nikita Sobolev -* Remove superfluous `__unicode__` declarations :gh:`#905` - Lukas Juhrich -* Mark package compatible with Python 3.11 :gh:`#913` - Aarni Koskela -* Quiesce pytest warnings :gh:`#916` - Aarni Koskela +* Tests: Use regular asserts and ``pytest.raises()`` :gh:`875` – Aarni Koskela +* Wheels are now built in GitHub Actions :gh:`888` – Aarni Koskela +* Small improvements to the CLDR downloader script :gh:`894` – Aarni Koskela +* Remove antiquated `__nonzero__` methods :gh:`896` - Nikita Sobolev +* Remove superfluous `__unicode__` declarations :gh:`905` - Lukas Juhrich +* Mark package compatible with Python 3.11 :gh:`913` - Aarni Koskela +* Quiesce pytest warnings :gh:`916` - Aarni Koskela Bugfixes ~~~~~~~~ -* Use email.Message for pofile header parsing instead of the deprecated ``cgi.parse_header`` function. :gh:`#876` – Aarni Koskela -* Remove determining time zone via systemsetup on macOS :gh:`#914` - Aarni Koskela +* Use email.Message for pofile header parsing instead of the deprecated ``cgi.parse_header`` function. :gh:`876` – Aarni Koskela +* Remove determining time zone via systemsetup on macOS :gh:`914` - Aarni Koskela Documentation ~~~~~~~~~~~~~ -* Update Python versions in documentation :gh:`#898` - Raphael Nestler -* Align BSD-3 license with OSI template :gh:`#912` - Lukas Kahwe Smith +* Update Python versions in documentation :gh:`898` - Raphael Nestler +* Align BSD-3 license with OSI template :gh:`912` - Lukas Kahwe Smith Version 2.10.3 -------------- @@ -606,7 +642,7 @@ Version 1.0 - Explicitly sort instead of using sorted() and don't assume ordering (Jython compatibility). - Removed ValueError raising for string formatting message checkers if the - string does not contain any string formattings (:trac:`150`). + string does not contain any string formatting (:trac:`150`). - Fix Serbian plural forms (:trac:`213`). - Small speed improvement in format_date() (:trac:`216`). - Fix so frontend.CommandLineInterface.run does not accumulate logging @@ -683,7 +719,7 @@ Version 0.9.6 - Explicitly sort instead of using sorted() and don't assume ordering (Python 2.3 and Jython compatibility). - Removed ValueError raising for string formatting message checkers if the - string does not contain any string formattings (:trac:`150`). + string does not contain any string formatting (:trac:`150`). - Fix Serbian plural forms (:trac:`213`). - Small speed improvement in format_date() (:trac:`216`). - Fix number formatting for locales where CLDR specifies alt or draft diff --git a/babel/__init__.py b/babel/__init__.py index df190033c..225ec143c 100644 --- a/babel/__init__.py +++ b/babel/__init__.py @@ -16,8 +16,22 @@ :license: BSD, see LICENSE for more details. """ -from babel.core import UnknownLocaleError, Locale, default_locale, \ - negotiate_locale, parse_locale, get_locale_identifier +from babel.core import ( + Locale, + UnknownLocaleError, + default_locale, + get_locale_identifier, + negotiate_locale, + parse_locale, +) +__version__ = '2.12.0' -__version__ = '2.11.0' +__all__ = [ + 'Locale', + 'UnknownLocaleError', + 'default_locale', + 'get_locale_identifier', + 'negotiate_locale', + 'parse_locale', +] diff --git a/babel/core.py b/babel/core.py index 220cbaf0a..57a6b63b3 100644 --- a/babel/core.py +++ b/babel/core.py @@ -8,8 +8,12 @@ :license: BSD, see LICENSE for more details. """ -import pickle +from __future__ import annotations + import os +import pickle +from collections.abc import Iterable, Mapping +from typing import TYPE_CHECKING, Any from babel import localedata from babel.plural import PluralRule @@ -17,6 +21,27 @@ __all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale', 'parse_locale'] +if TYPE_CHECKING: + from typing_extensions import Literal, TypeAlias + + _GLOBAL_KEY: TypeAlias = Literal[ + "all_currencies", + "currency_fractions", + "language_aliases", + "likely_subtags", + "parent_exceptions", + "script_aliases", + "territory_aliases", + "territory_currencies", + "territory_languages", + "territory_zones", + "variant_aliases", + "windows_zone_mapping", + "zone_aliases", + "zone_territories", + ] + + _global_data: Mapping[_GLOBAL_KEY, Mapping[str, Any]] | None _global_data = None _default_plural_rule = PluralRule({}) @@ -31,7 +56,7 @@ def _raise_no_data_error(): 'installing the library.') -def get_global(key): +def get_global(key: _GLOBAL_KEY) -> Mapping[str, Any]: """Return the dictionary for the given key in the global data. The global data is stored in the ``babel/global.dat`` file and contains @@ -73,6 +98,7 @@ def get_global(key): _raise_no_data_error() with open(filename, 'rb') as fileobj: _global_data = pickle.load(fileobj) + assert _global_data is not None return _global_data.get(key, {}) @@ -93,12 +119,12 @@ class UnknownLocaleError(Exception): is available. """ - def __init__(self, identifier): + def __init__(self, identifier: str) -> None: """Create the exception. :param identifier: the identifier string of the unsupported locale """ - Exception.__init__(self, 'unknown locale %r' % identifier) + Exception.__init__(self, f"unknown locale {identifier!r}") #: The identifier of the locale that could not be found. self.identifier = identifier @@ -136,7 +162,14 @@ class Locale: For more information see :rfc:`3066`. """ - def __init__(self, language, territory=None, script=None, variant=None): + def __init__( + self, + language: str, + territory: str | None = None, + script: str | None = None, + variant: str | None = None, + modifier: str | None = None, + ) -> None: """Initialize the locale object from the given identifier components. >>> locale = Locale('en', 'US') @@ -149,6 +182,7 @@ def __init__(self, language, territory=None, script=None, variant=None): :param territory: the territory (country or region) code :param script: the script code :param variant: the variant code + :param modifier: a modifier (following the '@' symbol, sometimes called '@variant') :raise `UnknownLocaleError`: if no locale data is available for the requested locale """ @@ -160,14 +194,17 @@ def __init__(self, language, territory=None, script=None, variant=None): self.script = script #: the variant code self.variant = variant + #: the modifier + self.modifier = modifier self.__data = None identifier = str(self) - if not localedata.exists(identifier): + identifier_without_modifier = identifier.partition('@')[0] + if not localedata.exists(identifier_without_modifier): raise UnknownLocaleError(identifier) @classmethod - def default(cls, category=None, aliases=LOCALE_ALIASES): + def default(cls, category: str | None = None, aliases: Mapping[str, str] = LOCALE_ALIASES) -> Locale: """Return the system default locale for the specified category. >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LC_MESSAGES']: @@ -192,7 +229,13 @@ def default(cls, category=None, aliases=LOCALE_ALIASES): return cls.parse(locale_string) @classmethod - def negotiate(cls, preferred, available, sep='_', aliases=LOCALE_ALIASES): + def negotiate( + cls, + preferred: Iterable[str], + available: Iterable[str], + sep: str = '_', + aliases: Mapping[str, str] = LOCALE_ALIASES, + ) -> Locale | None: """Find the best match between available and requested locale strings. >>> Locale.negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT']) @@ -202,13 +245,13 @@ def negotiate(cls, preferred, available, sep='_', aliases=LOCALE_ALIASES): >>> Locale.negotiate(['de_DE', 'de'], ['en_US']) You can specify the character used in the locale identifiers to separate - the differnet components. This separator is applied to both lists. Also, + the different components. This separator is applied to both lists. Also, case is ignored in the comparison: >>> Locale.negotiate(['de-DE', 'de'], ['en-us', 'de-de'], sep='-') Locale('de', territory='DE') - :param preferred: the list of locale identifers preferred by the user + :param preferred: the list of locale identifiers preferred by the user :param available: the list of locale identifiers available :param aliases: a dictionary of aliases for locale identifiers """ @@ -218,7 +261,12 @@ def negotiate(cls, preferred, available, sep='_', aliases=LOCALE_ALIASES): return Locale.parse(identifier, sep=sep) @classmethod - def parse(cls, identifier, sep='_', resolve_likely_subtags=True): + def parse( + cls, + identifier: str | Locale | None, + sep: str = '_', + resolve_likely_subtags: bool = True, + ) -> Locale: """Create a `Locale` instance for the given locale identifier. >>> l = Locale.parse('de-DE', sep='-') @@ -239,6 +287,11 @@ def parse(cls, identifier, sep='_', resolve_likely_subtags=True): >>> Locale.parse('und_AT') Locale('de', territory='AT') + Modifiers are optional, and always at the end, separated by "@": + + >>> Locale.parse('de_AT@euro') + Locale('de', territory='AT', modifier='euro') + :param identifier: the locale identifier string :param sep: optional component separator :param resolve_likely_subtags: if this is specified then a locale will @@ -256,13 +309,12 @@ def parse(cls, identifier, sep='_', resolve_likely_subtags=True): identifier :raise `UnknownLocaleError`: if no locale data is available for the requested locale + :raise `TypeError`: if the identifier is not a string or a `Locale` """ - if identifier is None: - return None - elif isinstance(identifier, Locale): + if isinstance(identifier, Locale): return identifier elif not isinstance(identifier, str): - raise TypeError('Unexpected value for identifier: %r' % (identifier,)) + raise TypeError(f"Unexpected value for identifier: {identifier!r}") parts = parse_locale(identifier, sep=sep) input_id = get_locale_identifier(parts) @@ -297,18 +349,22 @@ def _try_load_reducing(parts): # implement ICU like fuzzy locale objects and provide a way to # maximize and minimize locale tags. - language, territory, script, variant = parts + if len(parts) == 5: + language, territory, script, variant, modifier = parts + else: + language, territory, script, variant = parts + modifier = None language = get_global('language_aliases').get(language, language) - territory = get_global('territory_aliases').get(territory, (territory,))[0] - script = get_global('script_aliases').get(script, script) - variant = get_global('variant_aliases').get(variant, variant) + territory = get_global('territory_aliases').get(territory or '', (territory,))[0] + script = get_global('script_aliases').get(script or '', script) + variant = get_global('variant_aliases').get(variant or '', variant) if territory == 'ZZ': territory = None if script == 'Zzzz': script = None - parts = language, territory, script, variant + parts = language, territory, script, variant, modifier # First match: try the whole identifier new_id = get_locale_identifier(parts) @@ -322,48 +378,57 @@ def _try_load_reducing(parts): # simplified identifier that is just the language likely_subtag = get_global('likely_subtags').get(language) if likely_subtag is not None: - language2, _, script2, variant2 = parse_locale(likely_subtag) - locale = _try_load_reducing((language2, territory, script2, variant2)) + parts2 = parse_locale(likely_subtag) + if len(parts2) == 5: + language2, _, script2, variant2, modifier2 = parts2 + else: + language2, _, script2, variant2 = parts2 + modifier2 = None + locale = _try_load_reducing((language2, territory, script2, variant2, modifier2)) if locale is not None: return locale raise UnknownLocaleError(input_id) - def __eq__(self, other): - for key in ('language', 'territory', 'script', 'variant'): + def __eq__(self, other: object) -> bool: + for key in ('language', 'territory', 'script', 'variant', 'modifier'): if not hasattr(other, key): return False - return (self.language == other.language) and \ - (self.territory == other.territory) and \ - (self.script == other.script) and \ - (self.variant == other.variant) - - def __ne__(self, other): + return ( + self.language == getattr(other, 'language') and # noqa: B009 + self.territory == getattr(other, 'territory') and # noqa: B009 + self.script == getattr(other, 'script') and # noqa: B009 + self.variant == getattr(other, 'variant') and # noqa: B009 + self.modifier == getattr(other, 'modifier') # noqa: B009 + ) + + def __ne__(self, other: object) -> bool: return not self.__eq__(other) - def __hash__(self): - return hash((self.language, self.territory, self.script, self.variant)) + def __hash__(self) -> int: + return hash((self.language, self.territory, self.script, + self.variant, self.modifier)) - def __repr__(self): + def __repr__(self) -> str: parameters = [''] - for key in ('territory', 'script', 'variant'): + for key in ('territory', 'script', 'variant', 'modifier'): value = getattr(self, key) if value is not None: - parameters.append('%s=%r' % (key, value)) - parameter_string = '%r' % self.language + ', '.join(parameters) - return 'Locale(%s)' % parameter_string + parameters.append(f"{key}={value!r}") + return f"Locale({self.language!r}{', '.join(parameters)})" - def __str__(self): + def __str__(self) -> str: return get_locale_identifier((self.language, self.territory, - self.script, self.variant)) + self.script, self.variant, + self.modifier)) @property - def _data(self): + def _data(self) -> localedata.LocaleDataDict: if self.__data is None: self.__data = localedata.LocaleDataDict(localedata.load(str(self))) return self.__data - def get_display_name(self, locale=None): + def get_display_name(self, locale: Locale | str | None = None) -> str | None: """Return the display name of the locale using the given locale. The display name will include the language, territory, script, and @@ -372,6 +437,11 @@ def get_display_name(self, locale=None): >>> Locale('zh', 'CN', script='Hans').get_display_name('en') u'Chinese (Simplified, China)' + Modifiers are currently passed through verbatim: + + >>> Locale('it', 'IT', modifier='euro').get_display_name('en') + u'Italian (Italy, euro)' + :param locale: the locale to use """ if locale is None: @@ -386,9 +456,11 @@ def get_display_name(self, locale=None): details.append(locale.territories.get(self.territory)) if self.variant: details.append(locale.variants.get(self.variant)) + if self.modifier: + details.append(self.modifier) details = filter(None, details) if details: - retval += ' (%s)' % u', '.join(details) + retval += f" ({', '.join(details)})" return retval display_name = property(get_display_name, doc="""\ @@ -404,7 +476,7 @@ def get_display_name(self, locale=None): :type: `unicode` """) - def get_language_name(self, locale=None): + def get_language_name(self, locale: Locale | str | None = None) -> str | None: """Return the language of this locale in the given locale. >>> Locale('zh', 'CN', script='Hans').get_language_name('de') @@ -426,12 +498,12 @@ def get_language_name(self, locale=None): u'English' """) - def get_territory_name(self, locale=None): + def get_territory_name(self, locale: Locale | str | None = None) -> str | None: """Return the territory name in the given locale.""" if locale is None: locale = self locale = Locale.parse(locale) - return locale.territories.get(self.territory) + return locale.territories.get(self.territory or '') territory_name = property(get_territory_name, doc="""\ The localized territory name of the locale if available. @@ -440,12 +512,12 @@ def get_territory_name(self, locale=None): u'Deutschland' """) - def get_script_name(self, locale=None): + def get_script_name(self, locale: Locale | str | None = None) -> str | None: """Return the script name in the given locale.""" if locale is None: locale = self locale = Locale.parse(locale) - return locale.scripts.get(self.script) + return locale.scripts.get(self.script or '') script_name = property(get_script_name, doc="""\ The localized script name of the locale if available. @@ -455,7 +527,7 @@ def get_script_name(self, locale=None): """) @property - def english_name(self): + def english_name(self) -> str | None: """The english display name of the locale. >>> Locale('de').english_name @@ -469,7 +541,7 @@ def english_name(self): # { General Locale Display Names @property - def languages(self): + def languages(self) -> localedata.LocaleDataDict: """Mapping of language codes to translated language names. >>> Locale('de', 'DE').languages['ja'] @@ -481,7 +553,7 @@ def languages(self): return self._data['languages'] @property - def scripts(self): + def scripts(self) -> localedata.LocaleDataDict: """Mapping of script codes to translated script names. >>> Locale('en', 'US').scripts['Hira'] @@ -493,7 +565,7 @@ def scripts(self): return self._data['scripts'] @property - def territories(self): + def territories(self) -> localedata.LocaleDataDict: """Mapping of script codes to translated script names. >>> Locale('es', 'CO').territories['DE'] @@ -505,7 +577,7 @@ def territories(self): return self._data['territories'] @property - def variants(self): + def variants(self) -> localedata.LocaleDataDict: """Mapping of script codes to translated script names. >>> Locale('de', 'DE').variants['1901'] @@ -516,7 +588,7 @@ def variants(self): # { Number Formatting @property - def currencies(self): + def currencies(self) -> localedata.LocaleDataDict: """Mapping of currency codes to translated currency names. This only returns the generic form of the currency name, not the count specific one. If an actual number is requested use the @@ -530,7 +602,7 @@ def currencies(self): return self._data['currency_names'] @property - def currency_symbols(self): + def currency_symbols(self) -> localedata.LocaleDataDict: """Mapping of currency codes to symbols. >>> Locale('en', 'US').currency_symbols['USD'] @@ -541,7 +613,7 @@ def currency_symbols(self): return self._data['currency_symbols'] @property - def number_symbols(self): + def number_symbols(self) -> localedata.LocaleDataDict: """Symbols used in number formatting. .. note:: The format of the value returned may change between @@ -553,7 +625,7 @@ def number_symbols(self): return self._data['number_symbols'] @property - def decimal_formats(self): + def decimal_formats(self) -> localedata.LocaleDataDict: """Locale patterns for decimal number formatting. .. note:: The format of the value returned may change between @@ -565,7 +637,7 @@ def decimal_formats(self): return self._data['decimal_formats'] @property - def compact_decimal_formats(self): + def compact_decimal_formats(self) -> localedata.LocaleDataDict: """Locale patterns for compact decimal number formatting. .. note:: The format of the value returned may change between @@ -577,7 +649,7 @@ def compact_decimal_formats(self): return self._data['compact_decimal_formats'] @property - def currency_formats(self): + def currency_formats(self) -> localedata.LocaleDataDict: """Locale patterns for currency number formatting. .. note:: The format of the value returned may change between @@ -591,7 +663,19 @@ def currency_formats(self): return self._data['currency_formats'] @property - def percent_formats(self): + def compact_currency_formats(self) -> localedata.LocaleDataDict: + """Locale patterns for compact currency number formatting. + + .. note:: The format of the value returned may change between + Babel versions. + + >>> Locale('en', 'US').compact_currency_formats["short"]["one"]["1000"] + + """ + return self._data['compact_currency_formats'] + + @property + def percent_formats(self) -> localedata.LocaleDataDict: """Locale patterns for percent number formatting. .. note:: The format of the value returned may change between @@ -603,7 +687,7 @@ def percent_formats(self): return self._data['percent_formats'] @property - def scientific_formats(self): + def scientific_formats(self) -> localedata.LocaleDataDict: """Locale patterns for scientific number formatting. .. note:: The format of the value returned may change between @@ -617,7 +701,7 @@ def scientific_formats(self): # { Calendar Information and Date Formatting @property - def periods(self): + def periods(self) -> localedata.LocaleDataDict: """Locale display names for day periods (AM/PM). >>> Locale('en', 'US').periods['am'] @@ -626,10 +710,10 @@ def periods(self): try: return self._data['day_periods']['stand-alone']['wide'] except KeyError: - return {} + return localedata.LocaleDataDict({}) # pragma: no cover @property - def day_periods(self): + def day_periods(self) -> localedata.LocaleDataDict: """Locale display names for various day periods (not necessarily only AM/PM). These are not meant to be used without the relevant `day_period_rules`. @@ -637,13 +721,13 @@ def day_periods(self): return self._data['day_periods'] @property - def day_period_rules(self): + def day_period_rules(self) -> localedata.LocaleDataDict: """Day period rules for the locale. Used by `get_period_id`. """ - return self._data.get('day_period_rules', {}) + return self._data.get('day_period_rules', localedata.LocaleDataDict({})) @property - def days(self): + def days(self) -> localedata.LocaleDataDict: """Locale display names for weekdays. >>> Locale('de', 'DE').days['format']['wide'][3] @@ -652,7 +736,7 @@ def days(self): return self._data['days'] @property - def months(self): + def months(self) -> localedata.LocaleDataDict: """Locale display names for months. >>> Locale('de', 'DE').months['format']['wide'][10] @@ -661,7 +745,7 @@ def months(self): return self._data['months'] @property - def quarters(self): + def quarters(self) -> localedata.LocaleDataDict: """Locale display names for quarters. >>> Locale('de', 'DE').quarters['format']['wide'][1] @@ -670,7 +754,7 @@ def quarters(self): return self._data['quarters'] @property - def eras(self): + def eras(self) -> localedata.LocaleDataDict: """Locale display names for eras. .. note:: The format of the value returned may change between @@ -684,7 +768,7 @@ def eras(self): return self._data['eras'] @property - def time_zones(self): + def time_zones(self) -> localedata.LocaleDataDict: """Locale display names for time zones. .. note:: The format of the value returned may change between @@ -698,7 +782,7 @@ def time_zones(self): return self._data['time_zones'] @property - def meta_zones(self): + def meta_zones(self) -> localedata.LocaleDataDict: """Locale display names for meta time zones. Meta time zones are basically groups of different Olson time zones that @@ -715,7 +799,7 @@ def meta_zones(self): return self._data['meta_zones'] @property - def zone_formats(self): + def zone_formats(self) -> localedata.LocaleDataDict: """Patterns related to the formatting of time zones. .. note:: The format of the value returned may change between @@ -731,7 +815,7 @@ def zone_formats(self): return self._data['zone_formats'] @property - def first_week_day(self): + def first_week_day(self) -> int: """The first day of a week, with 0 being Monday. >>> Locale('de', 'DE').first_week_day @@ -742,7 +826,7 @@ def first_week_day(self): return self._data['week_data']['first_day'] @property - def weekend_start(self): + def weekend_start(self) -> int: """The day the weekend starts, with 0 being Monday. >>> Locale('de', 'DE').weekend_start @@ -751,7 +835,7 @@ def weekend_start(self): return self._data['week_data']['weekend_start'] @property - def weekend_end(self): + def weekend_end(self) -> int: """The day the weekend ends, with 0 being Monday. >>> Locale('de', 'DE').weekend_end @@ -760,7 +844,7 @@ def weekend_end(self): return self._data['week_data']['weekend_end'] @property - def min_week_days(self): + def min_week_days(self) -> int: """The minimum number of days in a week so that the week is counted as the first week of a year or month. @@ -770,7 +854,7 @@ def min_week_days(self): return self._data['week_data']['min_days'] @property - def date_formats(self): + def date_formats(self) -> localedata.LocaleDataDict: """Locale patterns for date formatting. .. note:: The format of the value returned may change between @@ -784,35 +868,35 @@ def date_formats(self): return self._data['date_formats'] @property - def time_formats(self): + def time_formats(self) -> localedata.LocaleDataDict: """Locale patterns for time formatting. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en', 'US').time_formats['short'] - + >>> Locale('fr', 'FR').time_formats['long'] """ return self._data['time_formats'] @property - def datetime_formats(self): + def datetime_formats(self) -> localedata.LocaleDataDict: """Locale patterns for datetime formatting. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en').datetime_formats['full'] - u"{1} 'at' {0}" + u'{1}, {0}' >>> Locale('th').datetime_formats['medium'] u'{1} {0}' """ return self._data['datetime_formats'] @property - def datetime_skeletons(self): + def datetime_skeletons(self) -> localedata.LocaleDataDict: """Locale patterns for formatting parts of a datetime. >>> Locale('en').datetime_skeletons['MEd'] @@ -825,7 +909,7 @@ def datetime_skeletons(self): return self._data['datetime_skeletons'] @property - def interval_formats(self): + def interval_formats(self) -> localedata.LocaleDataDict: """Locale patterns for interval formatting. .. note:: The format of the value returned may change between @@ -847,7 +931,7 @@ def interval_formats(self): return self._data['interval_formats'] @property - def plural_form(self): + def plural_form(self) -> PluralRule: """Plural rules for the locale. >>> Locale('en').plural_form(1) @@ -862,7 +946,7 @@ def plural_form(self): return self._data.get('plural_form', _default_plural_rule) @property - def list_patterns(self): + def list_patterns(self) -> localedata.LocaleDataDict: """Patterns for generating lists .. note:: The format of the value returned may change between @@ -878,7 +962,7 @@ def list_patterns(self): return self._data['list_patterns'] @property - def ordinal_form(self): + def ordinal_form(self) -> PluralRule: """Plural rules for the locale. >>> Locale('en').ordinal_form(1) @@ -895,7 +979,7 @@ def ordinal_form(self): return self._data.get('ordinal_form', _default_plural_rule) @property - def measurement_systems(self): + def measurement_systems(self) -> localedata.LocaleDataDict: """Localized names for various measurement systems. >>> Locale('fr', 'FR').measurement_systems['US'] @@ -907,7 +991,7 @@ def measurement_systems(self): return self._data['measurement_systems'] @property - def character_order(self): + def character_order(self) -> str: """The text direction for the language. >>> Locale('de', 'DE').character_order @@ -918,7 +1002,7 @@ def character_order(self): return self._data['character_order'] @property - def text_direction(self): + def text_direction(self) -> str: """The text direction for the language in CSS short-hand form. >>> Locale('de', 'DE').text_direction @@ -929,7 +1013,7 @@ def text_direction(self): return ''.join(word[0] for word in self.character_order.split('-')) @property - def unit_display_names(self): + def unit_display_names(self) -> localedata.LocaleDataDict: """Display names for units of measurement. .. seealso:: @@ -943,7 +1027,7 @@ def unit_display_names(self): return self._data['unit_display_names'] -def default_locale(category=None, aliases=LOCALE_ALIASES): +def default_locale(category: str | None = None, aliases: Mapping[str, str] = LOCALE_ALIASES) -> str | None: """Returns the system default locale for a given category, based on environment variables. @@ -988,7 +1072,7 @@ def default_locale(category=None, aliases=LOCALE_ALIASES): pass -def negotiate_locale(preferred, available, sep='_', aliases=LOCALE_ALIASES): +def negotiate_locale(preferred: Iterable[str], available: Iterable[str], sep: str = '_', aliases: Mapping[str, str] = LOCALE_ALIASES) -> str | None: """Find the best match between available and requested locale strings. >>> negotiate_locale(['de_DE', 'en_US'], ['de_DE', 'de_AT']) @@ -1051,9 +1135,12 @@ def negotiate_locale(preferred, available, sep='_', aliases=LOCALE_ALIASES): return None -def parse_locale(identifier, sep='_'): +def parse_locale( + identifier: str, + sep: str = '_' +) -> tuple[str, str | None, str | None, str | None] | tuple[str, str | None, str | None, str | None, str | None]: """Parse a locale identifier into a tuple of the form ``(language, - territory, script, variant)``. + territory, script, variant, modifier)``. >>> parse_locale('zh_CN') ('zh', 'CN', None, None) @@ -1065,12 +1152,22 @@ def parse_locale(identifier, sep='_'): ('en', '150', None, None) >>> parse_locale('en_us_posix') ('en', 'US', None, 'POSIX') + >>> parse_locale('it_IT@euro') + ('it', 'IT', None, None, 'euro') + >>> parse_locale('it_IT@custom') + ('it', 'IT', None, None, 'custom') + >>> parse_locale('it_IT@') + ('it', 'IT', None, None) The default component separator is "_", but a different separator can be - specified using the `sep` parameter: + specified using the `sep` parameter. + + The optional modifier is always separated with "@" and at the end: >>> parse_locale('zh-CN', sep='-') ('zh', 'CN', None, None) + >>> parse_locale('zh-CN@custom', sep='-') + ('zh', 'CN', None, None, 'custom') If the identifier cannot be parsed into a locale, a `ValueError` exception is raised: @@ -1080,14 +1177,13 @@ def parse_locale(identifier, sep='_'): ... ValueError: 'not_a_LOCALE_String' is not a valid locale identifier - Encoding information and locale modifiers are removed from the identifier: + Encoding information is removed from the identifier, while modifiers are + kept: - >>> parse_locale('it_IT@euro') - ('it', 'IT', None, None) >>> parse_locale('en_US.UTF-8') ('en', 'US', None, None) >>> parse_locale('de_DE.iso885915@euro') - ('de', 'DE', None, None) + ('de', 'DE', None, None, 'euro') See :rfc:`4646` for more information. @@ -1097,23 +1193,19 @@ def parse_locale(identifier, sep='_'): :raise `ValueError`: if the string does not appear to be a valid locale identifier """ + identifier, _, modifier = identifier.partition('@') if '.' in identifier: # this is probably the charset/encoding, which we don't care about identifier = identifier.split('.', 1)[0] - if '@' in identifier: - # this is a locale modifier such as @euro, which we don't care about - # either - identifier = identifier.split('@', 1)[0] parts = identifier.split(sep) lang = parts.pop(0).lower() if not lang.isalpha(): - raise ValueError('expected only letters, got %r' % lang) + raise ValueError(f"expected only letters, got {lang!r}") script = territory = variant = None - if parts: - if len(parts[0]) == 4 and parts[0].isalpha(): - script = parts.pop(0).title() + if parts and len(parts[0]) == 4 and parts[0].isalpha(): + script = parts.pop(0).title() if parts: if len(parts[0]) == 2 and parts[0].isalpha(): @@ -1121,30 +1213,46 @@ def parse_locale(identifier, sep='_'): elif len(parts[0]) == 3 and parts[0].isdigit(): territory = parts.pop(0) - if parts: - if len(parts[0]) == 4 and parts[0][0].isdigit() or \ - len(parts[0]) >= 5 and parts[0][0].isalpha(): - variant = parts.pop().upper() + if parts and ( + len(parts[0]) == 4 and parts[0][0].isdigit() or + len(parts[0]) >= 5 and parts[0][0].isalpha() + ): + variant = parts.pop().upper() if parts: - raise ValueError('%r is not a valid locale identifier' % identifier) - - return lang, territory, script, variant - - -def get_locale_identifier(tup, sep='_'): + raise ValueError(f"{identifier!r} is not a valid locale identifier") + + # TODO(3.0): always return a 5-tuple + if modifier: + return lang, territory, script, variant, modifier + else: + return lang, territory, script, variant + + +def get_locale_identifier( + tup: tuple[str] + | tuple[str, str | None] + | tuple[str, str | None, str | None] + | tuple[str, str | None, str | None, str | None] + | tuple[str, str | None, str | None, str | None, str | None], + sep: str = "_", +) -> str: """The reverse of :func:`parse_locale`. It creates a locale identifier out - of a ``(language, territory, script, variant)`` tuple. Items can be set to + of a ``(language, territory, script, variant, modifier)`` tuple. Items can be set to ``None`` and trailing ``None``\\s can also be left out of the tuple. - >>> get_locale_identifier(('de', 'DE', None, '1999')) - 'de_DE_1999' + >>> get_locale_identifier(('de', 'DE', None, '1999', 'custom')) + 'de_DE_1999@custom' + >>> get_locale_identifier(('fi', None, None, None, 'custom')) + 'fi@custom' + .. versionadded:: 1.0 :param tup: the tuple as returned by :func:`parse_locale`. :param sep: the separator for the identifier. """ - tup = tuple(tup[:4]) - lang, territory, script, variant = tup + (None,) * (4 - len(tup)) - return sep.join(filter(None, (lang, script, territory, variant))) + tup = tuple(tup[:5]) # type: ignore # length should be no more than 5 + lang, territory, script, variant, modifier = tup + (None,) * (5 - len(tup)) + ret = sep.join(filter(None, (lang, script, territory, variant))) + return f'{ret}@{modifier}' if modifier else ret diff --git a/babel/dates.py b/babel/dates.py index 8228bef88..78c7facaf 100644 --- a/babel/dates.py +++ b/babel/dates.py @@ -15,16 +15,32 @@ :license: BSD, see LICENSE for more details. """ +from __future__ import annotations import re import warnings -import pytz as _pytz +from functools import lru_cache +from typing import TYPE_CHECKING, SupportsInt -from datetime import date, datetime, time, timedelta -from bisect import bisect_right +try: + import pytz +except ModuleNotFoundError: + pytz = None + import zoneinfo -from babel.core import default_locale, get_global, Locale -from babel.util import UTC, LOCALTZ +import datetime +from collections.abc import Iterable + +from babel import localtime +from babel.core import Locale, default_locale, get_global +from babel.localedata import LocaleDataDict + +if TYPE_CHECKING: + from typing_extensions import Literal, TypeAlias + _Instant: TypeAlias = datetime.date | datetime.time | float | None + _PredefinedTimeFormat: TypeAlias = Literal['full', 'long', 'medium', 'short'] + _Context: TypeAlias = Literal['format', 'stand-alone'] + _DtOrTzinfo: TypeAlias = datetime.datetime | datetime.tzinfo | str | int | datetime.time | None # "If a given short metazone form is known NOT to be understood in a given # locale and the parent locale has this value such that it would normally @@ -33,18 +49,32 @@ # empty set characters ( U+2205 )." # - https://www.unicode.org/reports/tr35/tr35-dates.html#Metazone_Names -NO_INHERITANCE_MARKER = u'\u2205\u2205\u2205' +NO_INHERITANCE_MARKER = '\u2205\u2205\u2205' +UTC = datetime.timezone.utc +LOCALTZ = localtime.LOCALTZ LC_TIME = default_locale('LC_TIME') -# Aliases for use in scopes where the modules are shadowed by local variables -date_ = date -datetime_ = datetime -time_ = time + +def _localize(tz: datetime.tzinfo, dt: datetime.datetime) -> datetime.datetime: + # Support localizing with both pytz and zoneinfo tzinfos + # nothing to do + if dt.tzinfo is tz: + return dt + + if hasattr(tz, 'localize'): # pytz + return tz.localize(dt) + + if dt.tzinfo is None: + # convert naive to localized + return dt.replace(tzinfo=tz) + + # convert timezones + return dt.astimezone(tz) -def _get_dt_and_tzinfo(dt_or_tzinfo): +def _get_dt_and_tzinfo(dt_or_tzinfo: _DtOrTzinfo) -> tuple[datetime.datetime | None, datetime.tzinfo]: """ Parse a `dt_or_tzinfo` value into a datetime and a tzinfo. @@ -53,7 +83,7 @@ def _get_dt_and_tzinfo(dt_or_tzinfo): :rtype: tuple[datetime, tzinfo] """ if dt_or_tzinfo is None: - dt = datetime.now() + dt = datetime.datetime.now() tzinfo = LOCALTZ elif isinstance(dt_or_tzinfo, str): dt = None @@ -61,19 +91,16 @@ def _get_dt_and_tzinfo(dt_or_tzinfo): elif isinstance(dt_or_tzinfo, int): dt = None tzinfo = UTC - elif isinstance(dt_or_tzinfo, (datetime, time)): + elif isinstance(dt_or_tzinfo, (datetime.datetime, datetime.time)): dt = _get_datetime(dt_or_tzinfo) - if dt.tzinfo is not None: - tzinfo = dt.tzinfo - else: - tzinfo = UTC + tzinfo = dt.tzinfo if dt.tzinfo is not None else UTC else: dt = None tzinfo = dt_or_tzinfo return dt, tzinfo -def _get_tz_name(dt_or_tzinfo): +def _get_tz_name(dt_or_tzinfo: _DtOrTzinfo) -> str: """ Get the timezone name out of a time, datetime, or tzinfo object. @@ -85,10 +112,10 @@ def _get_tz_name(dt_or_tzinfo): elif hasattr(tzinfo, 'key') and tzinfo.key is not None: # ZoneInfo object return tzinfo.key else: - return tzinfo.tzname(dt or datetime.utcnow()) + return tzinfo.tzname(dt or datetime.datetime.utcnow()) -def _get_datetime(instant): +def _get_datetime(instant: _Instant) -> datetime.datetime: """ Get a datetime out of an "instant" (date, time, datetime, number). @@ -99,6 +126,7 @@ def _get_datetime(instant): Dates are converted to naive datetimes with midnight as the time component. + >>> from datetime import date, datetime >>> _get_datetime(date(2015, 1, 1)) datetime.datetime(2015, 1, 1, 0, 0) @@ -119,18 +147,18 @@ def _get_datetime(instant): :rtype: datetime """ if instant is None: - return datetime_.utcnow() - elif isinstance(instant, int) or isinstance(instant, float): - return datetime_.utcfromtimestamp(instant) - elif isinstance(instant, time): - return datetime_.combine(date.today(), instant) - elif isinstance(instant, date) and not isinstance(instant, datetime): - return datetime_.combine(instant, time()) + return datetime.datetime.utcnow() + elif isinstance(instant, (int, float)): + return datetime.datetime.utcfromtimestamp(instant) + elif isinstance(instant, datetime.time): + return datetime.datetime.combine(datetime.date.today(), instant) + elif isinstance(instant, datetime.date) and not isinstance(instant, datetime.datetime): + return datetime.datetime.combine(instant, datetime.time()) # TODO (3.x): Add an assertion/type check for this fallthrough branch: return instant -def _ensure_datetime_tzinfo(datetime, tzinfo=None): +def _ensure_datetime_tzinfo(dt: datetime.datetime, tzinfo: datetime.tzinfo | None = None) -> datetime.datetime: """ Ensure the datetime passed has an attached tzinfo. @@ -138,7 +166,8 @@ def _ensure_datetime_tzinfo(datetime, tzinfo=None): If a tzinfo is passed in, the datetime is normalized to that timezone. - >>> _ensure_datetime_tzinfo(datetime(2015, 1, 1)).tzinfo.zone + >>> from datetime import datetime + >>> _get_tz_name(_ensure_datetime_tzinfo(datetime(2015, 1, 1))) 'UTC' >>> tz = get_timezone("Europe/Stockholm") @@ -146,20 +175,23 @@ def _ensure_datetime_tzinfo(datetime, tzinfo=None): 14 :param datetime: Datetime to augment. - :param tzinfo: Optional tznfo. + :param tzinfo: optional tzinfo :return: datetime with tzinfo :rtype: datetime """ - if datetime.tzinfo is None: - datetime = datetime.replace(tzinfo=UTC) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=UTC) if tzinfo is not None: - datetime = datetime.astimezone(get_timezone(tzinfo)) + dt = dt.astimezone(get_timezone(tzinfo)) if hasattr(tzinfo, 'normalize'): # pytz - datetime = tzinfo.normalize(datetime) - return datetime + dt = tzinfo.normalize(dt) + return dt -def _get_time(time, tzinfo=None): +def _get_time( + time: datetime.time | datetime.datetime | None, + tzinfo: datetime.tzinfo | None = None, +) -> datetime.time: """ Get a timezoned time from a given instant. @@ -169,12 +201,14 @@ def _get_time(time, tzinfo=None): :rtype: time """ if time is None: - time = datetime.utcnow() + time = datetime.datetime.utcnow() elif isinstance(time, (int, float)): - time = datetime.utcfromtimestamp(time) + time = datetime.datetime.utcfromtimestamp(time) + if time.tzinfo is None: time = time.replace(tzinfo=UTC) - if isinstance(time, datetime): + + if isinstance(time, datetime.datetime): if tzinfo is not None: time = time.astimezone(tzinfo) if hasattr(tzinfo, 'normalize'): # pytz @@ -185,141 +219,40 @@ def _get_time(time, tzinfo=None): return time -def get_timezone(zone=None): +def get_timezone(zone: str | datetime.tzinfo | None = None) -> datetime.tzinfo: """Looks up a timezone by name and returns it. The timezone object - returned comes from ``pytz`` and corresponds to the `tzinfo` interface and - can be used with all of the functions of Babel that operate with dates. + returned comes from ``pytz`` or ``zoneinfo``, whichever is available. + It corresponds to the `tzinfo` interface and can be used with all of + the functions of Babel that operate with dates. If a timezone is not known a :exc:`LookupError` is raised. If `zone` is ``None`` a local zone object is returned. :param zone: the name of the timezone to look up. If a timezone object - itself is passed in, mit's returned unchanged. + itself is passed in, it's returned unchanged. """ if zone is None: return LOCALTZ if not isinstance(zone, str): return zone - try: - return _pytz.timezone(zone) - except _pytz.UnknownTimeZoneError: - raise LookupError('Unknown timezone %s' % zone) - - -def get_next_timezone_transition(zone=None, dt=None): - """Given a timezone it will return a :class:`TimezoneTransition` object - that holds the information about the next timezone transition that's going - to happen. For instance this can be used to detect when the next DST - change is going to happen and how it looks like. - The transition is calculated relative to the given datetime object. The - next transition that follows the date is used. If a transition cannot - be found the return value will be `None`. - - Transition information can only be provided for timezones returned by - the :func:`get_timezone` function. - - This function is pending deprecation with no replacement planned in the - Babel library. - - :param zone: the timezone for which the transition should be looked up. - If not provided the local timezone is used. - :param dt: the date after which the next transition should be found. - If not given the current time is assumed. - """ - warnings.warn( - "get_next_timezone_transition() is deprecated and will be " - "removed in the next version of Babel. " - "Please see https://github.com/python-babel/babel/issues/716 " - "for discussion.", - category=DeprecationWarning, - ) - zone = get_timezone(zone) - dt = _get_datetime(dt).replace(tzinfo=None) - - if not hasattr(zone, '_utc_transition_times'): - raise TypeError('Given timezone does not have UTC transition ' - 'times. This can happen because the operating ' - 'system fallback local timezone is used or a ' - 'custom timezone object') - - try: - idx = max(0, bisect_right(zone._utc_transition_times, dt)) - old_trans = zone._transition_info[idx - 1] - new_trans = zone._transition_info[idx] - old_tz = zone._tzinfos[old_trans] - new_tz = zone._tzinfos[new_trans] - except (LookupError, ValueError): - return None - - return TimezoneTransition( - activates=zone._utc_transition_times[idx], - from_tzinfo=old_tz, - to_tzinfo=new_tz, - reference_date=dt - ) - - -class TimezoneTransition: - """A helper object that represents the return value from - :func:`get_next_timezone_transition`. - - This class is pending deprecation with no replacement planned in the - Babel library. - - :field activates: - The time of the activation of the timezone transition in UTC. - :field from_tzinfo: - The timezone from where the transition starts. - :field to_tzinfo: - The timezone for after the transition. - :field reference_date: - The reference date that was provided. This is the `dt` parameter - to the :func:`get_next_timezone_transition`. - """ - - def __init__(self, activates, from_tzinfo, to_tzinfo, reference_date=None): - warnings.warn( - "TimezoneTransition is deprecated and will be " - "removed in the next version of Babel. " - "Please see https://github.com/python-babel/babel/issues/716 " - "for discussion.", - category=DeprecationWarning, - ) - self.activates = activates - self.from_tzinfo = from_tzinfo - self.to_tzinfo = to_tzinfo - self.reference_date = reference_date - - @property - def from_tz(self): - """The name of the timezone before the transition.""" - return self.from_tzinfo._tzname - - @property - def to_tz(self): - """The name of the timezone after the transition.""" - return self.to_tzinfo._tzname - - @property - def from_offset(self): - """The UTC offset in seconds before the transition.""" - return int(self.from_tzinfo._utcoffset.total_seconds()) - - @property - def to_offset(self): - """The UTC offset in seconds after the transition.""" - return int(self.to_tzinfo._utcoffset.total_seconds()) + if pytz: + try: + return pytz.timezone(zone) + except pytz.UnknownTimeZoneError as e: + exc = e + else: + assert zoneinfo + try: + return zoneinfo.ZoneInfo(zone) + except zoneinfo.ZoneInfoNotFoundError as e: + exc = e - def __repr__(self): - return ' %s (%s)>' % ( - self.from_tz, - self.to_tz, - self.activates, - ) + raise LookupError(f"Unknown timezone {zone}") from exc -def get_period_names(width='wide', context='stand-alone', locale=LC_TIME): +def get_period_names(width: Literal['abbreviated', 'narrow', 'wide'] = 'wide', + context: _Context = 'stand-alone', locale: Locale | str | None = LC_TIME) -> LocaleDataDict: """Return the names for day periods (AM/PM) used by the locale. >>> get_period_names(locale='en_US')['am'] @@ -332,7 +265,8 @@ def get_period_names(width='wide', context='stand-alone', locale=LC_TIME): return Locale.parse(locale).day_periods[context][width] -def get_day_names(width='wide', context='format', locale=LC_TIME): +def get_day_names(width: Literal['abbreviated', 'narrow', 'short', 'wide'] = 'wide', + context: _Context = 'format', locale: Locale | str | None = LC_TIME) -> LocaleDataDict: """Return the day names used by the locale for the specified format. >>> get_day_names('wide', locale='en_US')[1] @@ -351,7 +285,8 @@ def get_day_names(width='wide', context='format', locale=LC_TIME): return Locale.parse(locale).days[context][width] -def get_month_names(width='wide', context='format', locale=LC_TIME): +def get_month_names(width: Literal['abbreviated', 'narrow', 'wide'] = 'wide', + context: _Context = 'format', locale: Locale | str | None = LC_TIME) -> LocaleDataDict: """Return the month names used by the locale for the specified format. >>> get_month_names('wide', locale='en_US')[1] @@ -368,7 +303,8 @@ def get_month_names(width='wide', context='format', locale=LC_TIME): return Locale.parse(locale).months[context][width] -def get_quarter_names(width='wide', context='format', locale=LC_TIME): +def get_quarter_names(width: Literal['abbreviated', 'narrow', 'wide'] = 'wide', + context: _Context = 'format', locale: Locale | str | None = LC_TIME) -> LocaleDataDict: """Return the quarter names used by the locale for the specified format. >>> get_quarter_names('wide', locale='en_US')[1] @@ -385,7 +321,8 @@ def get_quarter_names(width='wide', context='format', locale=LC_TIME): return Locale.parse(locale).quarters[context][width] -def get_era_names(width='wide', locale=LC_TIME): +def get_era_names(width: Literal['abbreviated', 'narrow', 'wide'] = 'wide', + locale: Locale | str | None = LC_TIME) -> LocaleDataDict: """Return the era names used by the locale for the specified format. >>> get_era_names('wide', locale='en_US')[1] @@ -399,7 +336,7 @@ def get_era_names(width='wide', locale=LC_TIME): return Locale.parse(locale).eras[width] -def get_date_format(format='medium', locale=LC_TIME): +def get_date_format(format: _PredefinedTimeFormat = 'medium', locale: Locale | str | None = LC_TIME) -> DateTimePattern: """Return the date formatting patterns used by the locale for the specified format. @@ -415,7 +352,7 @@ def get_date_format(format='medium', locale=LC_TIME): return Locale.parse(locale).date_formats[format] -def get_datetime_format(format='medium', locale=LC_TIME): +def get_datetime_format(format: _PredefinedTimeFormat = 'medium', locale: Locale | str | None = LC_TIME) -> DateTimePattern: """Return the datetime formatting patterns used by the locale for the specified format. @@ -432,12 +369,12 @@ def get_datetime_format(format='medium', locale=LC_TIME): return patterns[format] -def get_time_format(format='medium', locale=LC_TIME): +def get_time_format(format: _PredefinedTimeFormat = 'medium', locale: Locale | str | None = LC_TIME) -> DateTimePattern: """Return the time formatting patterns used by the locale for the specified format. >>> get_time_format(locale='en_US') - + >>> get_time_format('full', locale='de_DE') @@ -448,10 +385,16 @@ def get_time_format(format='medium', locale=LC_TIME): return Locale.parse(locale).time_formats[format] -def get_timezone_gmt(datetime=None, width='long', locale=LC_TIME, return_z=False): +def get_timezone_gmt( + datetime: _Instant = None, + width: Literal['long', 'short', 'iso8601', 'iso8601_short'] = 'long', + locale: Locale | str | None = LC_TIME, + return_z: bool = False, +) -> str: """Return the timezone associated with the given `datetime` object formatted as string indicating the offset from GMT. + >>> from datetime import datetime >>> dt = datetime(2007, 4, 1, 15, 30) >>> get_timezone_gmt(dt, locale='en') u'GMT+00:00' @@ -460,7 +403,7 @@ def get_timezone_gmt(datetime=None, width='long', locale=LC_TIME, return_z=False >>> get_timezone_gmt(dt, locale='en', width='iso8601_short') u'+00' >>> tz = get_timezone('America/Los_Angeles') - >>> dt = tz.localize(datetime(2007, 4, 1, 15, 30)) + >>> dt = _localize(tz, datetime(2007, 4, 1, 15, 30)) >>> get_timezone_gmt(dt, locale='en') u'GMT-07:00' >>> get_timezone_gmt(dt, 'short', locale='en') @@ -492,32 +435,36 @@ def get_timezone_gmt(datetime=None, width='long', locale=LC_TIME, return_z=False if return_z and hours == 0 and seconds == 0: return 'Z' elif seconds == 0 and width == 'iso8601_short': - return u'%+03d' % hours + return '%+03d' % hours elif width == 'short' or width == 'iso8601_short': - pattern = u'%+03d%02d' + pattern = '%+03d%02d' elif width == 'iso8601': - pattern = u'%+03d:%02d' + pattern = '%+03d:%02d' else: pattern = locale.zone_formats['gmt'] % '%+03d:%02d' return pattern % (hours, seconds // 60) -def get_timezone_location(dt_or_tzinfo=None, locale=LC_TIME, return_city=False): - u"""Return a representation of the given timezone using "location format". +def get_timezone_location( + dt_or_tzinfo: _DtOrTzinfo = None, + locale: Locale | str | None = LC_TIME, + return_city: bool = False, +) -> str: + """Return a representation of the given timezone using "location format". The result depends on both the local display name of the country and the city associated with the time zone: >>> tz = get_timezone('America/St_Johns') >>> print(get_timezone_location(tz, locale='de_DE')) - Kanada (St. John’s) Zeit + Kanada (St. John’s) (Ortszeit) >>> print(get_timezone_location(tz, locale='en')) Canada (St. John’s) Time >>> print(get_timezone_location(tz, locale='en', return_city=True)) St. John’s >>> tz = get_timezone('America/Mexico_City') >>> get_timezone_location(tz, locale='de_DE') - u'Mexiko (Mexiko-Stadt) Zeit' + u'Mexiko (Mexiko-Stadt) (Ortszeit)' If the timezone is associated with a country that uses only a single timezone, just the localized country name is returned: @@ -578,11 +525,18 @@ def get_timezone_location(dt_or_tzinfo=None, locale=LC_TIME, return_city=False): }) -def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False, - locale=LC_TIME, zone_variant=None, return_zone=False): +def get_timezone_name( + dt_or_tzinfo: _DtOrTzinfo = None, + width: Literal['long', 'short'] = 'long', + uncommon: bool = False, + locale: Locale | str | None = LC_TIME, + zone_variant: Literal['generic', 'daylight', 'standard'] | None = None, + return_zone: bool = False, +) -> str: r"""Return the localized display name for the given timezone. The timezone may be specified using a ``datetime`` or `tzinfo` object. + >>> from datetime import time >>> dt = time(15, 30, tzinfo=get_timezone('America/Los_Angeles')) >>> get_timezone_name(dt, locale='en_US') u'Pacific Standard Time' @@ -592,7 +546,7 @@ def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False, u'PST' If this function gets passed only a `tzinfo` object and no concrete - `datetime`, the returned display name is indenpendent of daylight savings + `datetime`, the returned display name is independent of daylight savings time. This can be used for example for selecting timezones, or to set the time of events that recur across DST changes: @@ -659,10 +613,7 @@ def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False, zone_variant = 'generic' else: dst = tzinfo.dst(dt) - if dst: - zone_variant = 'daylight' - else: - zone_variant = 'standard' + zone_variant = "daylight" if dst else "standard" else: if zone_variant not in ('generic', 'standard', 'daylight'): raise ValueError('Invalid zone variation') @@ -673,9 +624,8 @@ def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False, return zone info = locale.time_zones.get(zone, {}) # Try explicitly translated zone names first - if width in info: - if zone_variant in info[width]: - return info[width][zone_variant] + if width in info and zone_variant in info[width]: + return info[width][zone_variant] metazone = get_global('meta_zones').get(zone) if metazone: @@ -697,9 +647,14 @@ def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False, return get_timezone_location(dt_or_tzinfo, locale=locale) -def format_date(date=None, format='medium', locale=LC_TIME): +def format_date( + date: datetime.date | None = None, + format: _PredefinedTimeFormat | str = 'medium', + locale: Locale | str | None = LC_TIME, +) -> str: """Return a date formatted according to the given pattern. + >>> from datetime import date >>> d = date(2007, 4, 1) >>> format_date(d, locale='en_US') u'Apr 1, 2007' @@ -719,8 +674,8 @@ def format_date(date=None, format='medium', locale=LC_TIME): :param locale: a `Locale` object or a locale identifier """ if date is None: - date = date_.today() - elif isinstance(date, datetime): + date = datetime.date.today() + elif isinstance(date, datetime.datetime): date = date.date() locale = Locale.parse(locale) @@ -730,20 +685,24 @@ def format_date(date=None, format='medium', locale=LC_TIME): return pattern.apply(date, locale) -def format_datetime(datetime=None, format='medium', tzinfo=None, - locale=LC_TIME): +def format_datetime( + datetime: _Instant = None, + format: _PredefinedTimeFormat | str = 'medium', + tzinfo: datetime.tzinfo | None = None, + locale: Locale | str | None = LC_TIME, +) -> str: r"""Return a date formatted according to the given pattern. + >>> from datetime import datetime >>> dt = datetime(2007, 4, 1, 15, 30) >>> format_datetime(dt, locale='en_US') - u'Apr 1, 2007, 3:30:00 PM' + u'Apr 1, 2007, 3:30:00\u202fPM' - For any pattern requiring the display of the time-zone, the third-party - ``pytz`` package is needed to explicitly specify the time-zone: + For any pattern requiring the display of the timezone: >>> format_datetime(dt, 'full', tzinfo=get_timezone('Europe/Paris'), ... locale='fr_FR') - u'dimanche 1 avril 2007 \xe0 17:30:00 heure d\u2019\xe9t\xe9 d\u2019Europe centrale' + 'dimanche 1 avril 2007, 17:30:00 heure d’été d’Europe centrale' >>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz", ... tzinfo=get_timezone('US/Eastern'), locale='en') u'2007.04.01 AD at 11:30:00 EDT' @@ -768,12 +727,17 @@ def format_datetime(datetime=None, format='medium', tzinfo=None, return parse_pattern(format).apply(datetime, locale) -def format_time(time=None, format='medium', tzinfo=None, locale=LC_TIME): +def format_time( + time: datetime.time | datetime.datetime | float | None = None, + format: _PredefinedTimeFormat | str = 'medium', + tzinfo: datetime.tzinfo | None = None, locale: Locale | str | None = LC_TIME, +) -> str: r"""Return a time formatted according to the given pattern. + >>> from datetime import datetime, time >>> t = time(15, 30) >>> format_time(t, locale='en_US') - u'3:30:00 PM' + u'3:30:00\u202fPM' >>> format_time(t, format='short', locale='de_DE') u'15:30' @@ -788,9 +752,9 @@ def format_time(time=None, format='medium', tzinfo=None, locale=LC_TIME): >>> t = datetime(2007, 4, 1, 15, 30) >>> tzinfo = get_timezone('Europe/Paris') - >>> t = tzinfo.localize(t) + >>> t = _localize(tzinfo, t) >>> format_time(t, format='full', tzinfo=tzinfo, locale='fr_FR') - u'15:30:00 heure d\u2019\xe9t\xe9 d\u2019Europe centrale' + '15:30:00 heure d’été d’Europe centrale' >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=get_timezone('US/Eastern'), ... locale='en') u"09 o'clock AM, Eastern Daylight Time" @@ -814,7 +778,7 @@ def format_time(time=None, format='medium', tzinfo=None, locale=LC_TIME): u'15:30:00 heure normale d\u2019Europe centrale' >>> format_time(t, format='full', tzinfo=get_timezone('US/Eastern'), ... locale='en_US') - u'3:30:00 PM Eastern Standard Time' + u'3:30:00\u202fPM Eastern Standard Time' :param time: the ``time`` or ``datetime`` object; if `None`, the current time in UTC is used @@ -823,15 +787,26 @@ def format_time(time=None, format='medium', tzinfo=None, locale=LC_TIME): :param tzinfo: the time-zone to apply to the time for display :param locale: a `Locale` object or a locale identifier """ + + # get reference date for if we need to find the right timezone variant + # in the pattern + ref_date = time.date() if isinstance(time, datetime.datetime) else None + time = _get_time(time, tzinfo) locale = Locale.parse(locale) if format in ('full', 'long', 'medium', 'short'): format = get_time_format(format, locale=locale) - return parse_pattern(format).apply(time, locale) + return parse_pattern(format).apply(time, locale, reference_date=ref_date) -def format_skeleton(skeleton, datetime=None, tzinfo=None, fuzzy=True, locale=LC_TIME): +def format_skeleton( + skeleton: str, + datetime: _Instant = None, + tzinfo: datetime.tzinfo | None = None, + fuzzy: bool = True, + locale: Locale | str | None = LC_TIME, +) -> str: r"""Return a time and/or date formatted according to the given pattern. The skeletons are defined in the CLDR data and provide more flexibility @@ -839,6 +814,7 @@ def format_skeleton(skeleton, datetime=None, tzinfo=None, fuzzy=True, locale=LC_ The are defined using the date/time symbols without order or punctuation and map to a suitable format for the given locale. + >>> from datetime import datetime >>> t = datetime(2007, 4, 1, 15, 30) >>> format_skeleton('MMMEd', t, locale='fr') u'dim. 1 avr.' @@ -869,7 +845,7 @@ def format_skeleton(skeleton, datetime=None, tzinfo=None, fuzzy=True, locale=LC_ return format_datetime(datetime, format, tzinfo, locale) -TIMEDELTA_UNITS = ( +TIMEDELTA_UNITS: tuple[tuple[str, int], ...] = ( ('year', 3600 * 24 * 365), ('month', 3600 * 24 * 30), ('week', 3600 * 24 * 7), @@ -880,11 +856,17 @@ def format_skeleton(skeleton, datetime=None, tzinfo=None, fuzzy=True, locale=LC_ ) -def format_timedelta(delta, granularity='second', threshold=.85, - add_direction=False, format='long', - locale=LC_TIME): +def format_timedelta( + delta: datetime.timedelta | int, + granularity: Literal['year', 'month', 'week', 'day', 'hour', 'minute', 'second'] = 'second', + threshold: float = .85, + add_direction: bool = False, + format: Literal['narrow', 'short', 'medium', 'long'] = 'long', + locale: Locale | str | None = LC_TIME, +) -> str: """Return a time delta according to the rules of the given locale. + >>> from datetime import timedelta >>> format_timedelta(timedelta(weeks=12), locale='en_US') u'3 months' >>> format_timedelta(timedelta(seconds=1), locale='es') @@ -893,8 +875,7 @@ def format_timedelta(delta, granularity='second', threshold=.85, The granularity parameter can be provided to alter the lowest unit presented, which defaults to a second. - >>> format_timedelta(timedelta(hours=3), granularity='day', - ... locale='en_US') + >>> format_timedelta(timedelta(hours=3), granularity='day', locale='en_US') u'1 day' The threshold parameter can be used to determine at which value the @@ -945,7 +926,7 @@ def format_timedelta(delta, granularity='second', threshold=.85, ' is deprecated. Use "long" instead', category=DeprecationWarning) format = 'long' - if isinstance(delta, timedelta): + if isinstance(delta, datetime.timedelta): seconds = int((delta.days * 86400) + delta.seconds) else: seconds = delta @@ -958,7 +939,7 @@ def _iter_patterns(a_unit): yield unit_rel_patterns['future'] else: yield unit_rel_patterns['past'] - a_unit = 'duration-' + a_unit + a_unit = f"duration-{a_unit}" yield locale._data['unit_patterns'].get(a_unit, {}).get(format) for unit, secs_per_unit in TIMEDELTA_UNITS: @@ -975,18 +956,24 @@ def _iter_patterns(a_unit): break # This really should not happen if pattern is None: - return u'' + return '' return pattern.replace('{0}', str(value)) - return u'' + return '' -def _format_fallback_interval(start, end, skeleton, tzinfo, locale): +def _format_fallback_interval( + start: _Instant, + end: _Instant, + skeleton: str | None, + tzinfo: datetime.tzinfo | None, + locale: Locale | str | None = LC_TIME, +) -> str: if skeleton in locale.datetime_skeletons: # Use the given skeleton format = lambda dt: format_skeleton(skeleton, dt, tzinfo, locale=locale) - elif all((isinstance(d, date) and not isinstance(d, datetime)) for d in (start, end)): # Both are just dates + elif all((isinstance(d, datetime.date) and not isinstance(d, datetime.datetime)) for d in (start, end)): # Both are just dates format = lambda dt: format_date(dt, locale=locale) - elif all((isinstance(d, time) and not isinstance(d, date)) for d in (start, end)): # Both are times + elif all((isinstance(d, datetime.time) and not isinstance(d, datetime.date)) for d in (start, end)): # Both are times format = lambda dt: format_time(dt, tzinfo=tzinfo, locale=locale) else: format = lambda dt: format_datetime(dt, tzinfo=tzinfo, locale=locale) @@ -1004,10 +991,18 @@ def _format_fallback_interval(start, end, skeleton, tzinfo, locale): ) -def format_interval(start, end, skeleton=None, tzinfo=None, fuzzy=True, locale=LC_TIME): +def format_interval( + start: _Instant, + end: _Instant, + skeleton: str | None = None, + tzinfo: datetime.tzinfo | None = None, + fuzzy: bool = True, + locale: Locale | str | None = LC_TIME, +) -> str: """ Format an interval between two instants according to the locale's rules. + >>> from datetime import date, time >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "yMd", locale="fi") u'15.\u201317.1.2016' @@ -1015,7 +1010,7 @@ def format_interval(start, end, skeleton=None, tzinfo=None, fuzzy=True, locale=L '12:12\u201316:16' >>> format_interval(time(5, 12), time(16, 16), "hm", locale="en_US") - '5:12 AM \u2013 4:16 PM' + '5:12\u202fAM\u2009–\u20094:16\u202fPM' >>> format_interval(time(16, 18), time(16, 24), "Hm", locale="it") '16:18\u201316:24' @@ -1034,7 +1029,7 @@ def format_interval(start, end, skeleton=None, tzinfo=None, fuzzy=True, locale=L '16:18:00\uff5e16:24:00' >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "xxx", locale="de") - '15.01.2016 \u2013 17.01.2016' + '15.01.2016\u2009–\u200917.01.2016' :param start: First instant (datetime/date/time) :param end: Second instant (datetime/date/time) @@ -1087,27 +1082,32 @@ def format_interval(start, end, skeleton=None, tzinfo=None, fuzzy=True, locale=L # > single date using availableFormats, and return. for field in PATTERN_CHAR_ORDER: # These are in largest-to-smallest order - if field in skel_formats: - if start_fmt.extract(field) != end_fmt.extract(field): - # > If there is a match, use the pieces of the corresponding pattern to - # > format the start and end datetime, as above. - return "".join( - parse_pattern(pattern).apply(instant, locale) - for pattern, instant - in zip(skel_formats[field], (start, end)) - ) + if field in skel_formats and start_fmt.extract(field) != end_fmt.extract(field): + # > If there is a match, use the pieces of the corresponding pattern to + # > format the start and end datetime, as above. + return "".join( + parse_pattern(pattern).apply(instant, locale) + for pattern, instant + in zip(skel_formats[field], (start, end)) + ) # > Otherwise, format the start and end datetime using the fallback pattern. return _format_fallback_interval(start, end, skeleton, tzinfo, locale) -def get_period_id(time, tzinfo=None, type=None, locale=LC_TIME): +def get_period_id( + time: _Instant, + tzinfo: datetime.tzinfo | None = None, + type: Literal['selection'] | None = None, + locale: Locale | str | None = LC_TIME, +) -> str: """ Get the day period ID for a given time. This ID can be used as a key for the period name dictionary. + >>> from datetime import time >>> get_period_names(locale="de")[get_period_id(time(7, 42), locale="de")] u'Morgen' @@ -1176,7 +1176,11 @@ class ParseError(ValueError): pass -def parse_date(string, locale=LC_TIME, format='medium'): +def parse_date( + string: str, + locale: Locale | str | None = LC_TIME, + format: _PredefinedTimeFormat = 'medium', +) -> datetime.date: """Parse a date from a string. This function first tries to interpret the string as ISO-8601 @@ -1206,7 +1210,7 @@ def parse_date(string, locale=LC_TIME, format='medium'): string, flags=re.ASCII) # allow only ASCII digits if iso_alike: try: - return date(*map(int, iso_alike.groups())) + return datetime.date(*map(int, iso_alike.groups())) except ValueError: pass # a locale format might fit better, so let's continue @@ -1217,26 +1221,26 @@ def parse_date(string, locale=LC_TIME, format='medium'): month_idx = format_str.index('l') day_idx = format_str.index('d') - indexes = [(year_idx, 'Y'), (month_idx, 'M'), (day_idx, 'D')] - indexes.sort() + indexes = sorted([(year_idx, 'Y'), (month_idx, 'M'), (day_idx, 'D')]) indexes = {item[1]: idx for idx, item in enumerate(indexes)} # FIXME: this currently only supports numbers, but should also support month # names, both in the requested locale, and english year = numbers[indexes['Y']] - if len(year) == 2: - year = 2000 + int(year) - else: - year = int(year) + year = 2000 + int(year) if len(year) == 2 else int(year) month = int(numbers[indexes['M']]) day = int(numbers[indexes['D']]) if month > 12: month, day = day, month - return date(year, month, day) + return datetime.date(year, month, day) -def parse_time(string, locale=LC_TIME, format='medium'): +def parse_time( + string: str, + locale: Locale | str | None = LC_TIME, + format: _PredefinedTimeFormat = 'medium', +) -> datetime.time: """Parse a time from a string. This function uses the time format for the locale as a hint to determine @@ -1263,8 +1267,7 @@ def parse_time(string, locale=LC_TIME, format='medium'): min_idx = format_str.index('m') sec_idx = format_str.index('s') - indexes = [(hour_idx, 'H'), (min_idx, 'M'), (sec_idx, 'S')] - indexes.sort() + indexes = sorted([(hour_idx, 'H'), (min_idx, 'M'), (sec_idx, 'S')]) indexes = {item[1]: idx for idx, item in enumerate(indexes)} # TODO: support time zones @@ -1272,9 +1275,8 @@ def parse_time(string, locale=LC_TIME, format='medium'): # Check if the format specifies a period to be used; # if it does, look for 'pm' to figure out an offset. hour_offset = 0 - if 'a' in format_str: - if 'pm' in string.lower(): - hour_offset = 12 + if 'a' in format_str and 'pm' in string.lower(): + hour_offset = 12 # Parse up to three numbers from the string. minute = second = 0 @@ -1283,41 +1285,52 @@ def parse_time(string, locale=LC_TIME, format='medium'): minute = int(numbers[indexes['M']]) if len(numbers) > 2: second = int(numbers[indexes['S']]) - return time(hour, minute, second) + return datetime.time(hour, minute, second) class DateTimePattern: - def __init__(self, pattern, format): + def __init__(self, pattern: str, format: DateTimeFormat): self.pattern = pattern self.format = format - def __repr__(self): - return '<%s %r>' % (type(self).__name__, self.pattern) + def __repr__(self) -> str: + return f"<{type(self).__name__} {self.pattern!r}>" - def __str__(self): + def __str__(self) -> str: pat = self.pattern return pat - def __mod__(self, other): - if type(other) is not DateTimeFormat: + def __mod__(self, other: DateTimeFormat) -> str: + if not isinstance(other, DateTimeFormat): return NotImplemented return self.format % other - def apply(self, datetime, locale): - return self % DateTimeFormat(datetime, locale) + def apply( + self, + datetime: datetime.date | datetime.time, + locale: Locale | str | None, + reference_date: datetime.date | None = None, + ) -> str: + return self % DateTimeFormat(datetime, locale, reference_date) class DateTimeFormat: - def __init__(self, value, locale): - assert isinstance(value, (date, datetime, time)) - if isinstance(value, (datetime, time)) and value.tzinfo is None: + def __init__( + self, + value: datetime.date | datetime.time, + locale: Locale | str, + reference_date: datetime.date | None = None + ) -> None: + assert isinstance(value, (datetime.date, datetime.datetime, datetime.time)) + if isinstance(value, (datetime.datetime, datetime.time)) and value.tzinfo is None: value = value.replace(tzinfo=UTC) self.value = value self.locale = Locale.parse(locale) + self.reference_date = reference_date - def __getitem__(self, name): + def __getitem__(self, name: str) -> str: char = name[0] num = len(name) if char == 'G': @@ -1365,9 +1378,9 @@ def __getitem__(self, name): elif char in ('z', 'Z', 'v', 'V', 'x', 'X', 'O'): return self.format_timezone(char, num) else: - raise KeyError('Unsupported date/time field %r' % char) + raise KeyError(f"Unsupported date/time field {char!r}") - def extract(self, char): + def extract(self, char: str) -> int: char = str(char)[0] if char == 'y': return self.value.year @@ -1384,14 +1397,14 @@ def extract(self, char): elif char == 'a': return int(self.value.hour >= 12) # 0 for am, 1 for pm else: - raise NotImplementedError("Not implemented: extracting %r from %r" % (char, self.value)) + raise NotImplementedError(f"Not implemented: extracting {char!r} from {self.value!r}") - def format_era(self, char, num): + def format_era(self, char: str, num: int) -> str: width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)] era = int(self.value.year >= 0) return get_era_names(width, self.locale)[era] - def format_year(self, char, num): + def format_year(self, char: str, num: int) -> str: value = self.value.year if char.isupper(): value = self.value.isocalendar()[0] @@ -1400,7 +1413,7 @@ def format_year(self, char, num): year = year[-2:] return year - def format_quarter(self, char, num): + def format_quarter(self, char: str, num: int) -> str: quarter = (self.value.month - 1) // 3 + 1 if num <= 2: return '%0*d' % (num, quarter) @@ -1408,33 +1421,34 @@ def format_quarter(self, char, num): context = {'Q': 'format', 'q': 'stand-alone'}[char] return get_quarter_names(width, context, self.locale)[quarter] - def format_month(self, char, num): + def format_month(self, char: str, num: int) -> str: if num <= 2: return '%0*d' % (num, self.value.month) width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num] context = {'M': 'format', 'L': 'stand-alone'}[char] return get_month_names(width, context, self.locale)[self.value.month] - def format_week(self, char, num): + def format_week(self, char: str, num: int) -> str: if char.islower(): # week of year day_of_year = self.get_day_of_year() week = self.get_week_number(day_of_year) if week == 0: - date = self.value - timedelta(days=day_of_year) + date = self.value - datetime.timedelta(days=day_of_year) week = self.get_week_number(self.get_day_of_year(date), date.weekday()) return self.format(week, num) else: # week of month week = self.get_week_number(self.value.day) if week == 0: - date = self.value - timedelta(days=self.value.day) + date = self.value - datetime.timedelta(days=self.value.day) week = self.get_week_number(date.day, date.weekday()) - return '%d' % week + return str(week) - def format_weekday(self, char='E', num=4): + def format_weekday(self, char: str = 'E', num: int = 4) -> str: """ Return weekday from parsed datetime according to format pattern. + >>> from datetime import date >>> format = DateTimeFormat(date(2016, 2, 28), Locale.parse('en_US')) >>> format.format_weekday() u'Sunday' @@ -1465,22 +1479,20 @@ def format_weekday(self, char='E', num=4): num = 3 weekday = self.value.weekday() width = {3: 'abbreviated', 4: 'wide', 5: 'narrow', 6: 'short'}[num] - if char == 'c': - context = 'stand-alone' - else: - context = 'format' + context = "stand-alone" if char == "c" else "format" return get_day_names(width, context, self.locale)[weekday] - def format_day_of_year(self, num): + def format_day_of_year(self, num: int) -> str: return self.format(self.get_day_of_year(), num) - def format_day_of_week_in_month(self): - return '%d' % ((self.value.day - 1) // 7 + 1) + def format_day_of_week_in_month(self) -> str: + return str((self.value.day - 1) // 7 + 1) - def format_period(self, char, num): + def format_period(self, char: str, num: int) -> str: """ Return period from parsed datetime according to format pattern. + >>> from datetime import datetime, time >>> format = DateTimeFormat(time(13, 42), 'fi_FI') >>> format.format_period('a', 1) u'ip.' @@ -1517,9 +1529,9 @@ def format_period(self, char, num): period_names = get_period_names(context=context, width=width, locale=self.locale) if period in period_names: return period_names[period] - raise ValueError('Could not format period %s in %s' % (period, self.locale)) + raise ValueError(f"Could not format period {period} in {self.locale}") - def format_frac_seconds(self, num): + def format_frac_seconds(self, num: int) -> str: """ Return fractional seconds. Rounds the time's microseconds to the precision given by the number \ @@ -1533,58 +1545,66 @@ def format_milliseconds_in_day(self, num): self.value.minute * 60000 + self.value.hour * 3600000 return self.format(msecs, num) - def format_timezone(self, char, num): + def format_timezone(self, char: str, num: int) -> str: width = {3: 'short', 4: 'long', 5: 'iso8601'}[max(3, num)] + + # It could be that we only receive a time to format, but also have a + # reference date which is important to distinguish between timezone + # variants (summer/standard time) + value = self.value + if self.reference_date: + value = datetime.datetime.combine(self.reference_date, self.value) + if char == 'z': - return get_timezone_name(self.value, width, locale=self.locale) + return get_timezone_name(value, width, locale=self.locale) elif char == 'Z': if num == 5: - return get_timezone_gmt(self.value, width, locale=self.locale, return_z=True) - return get_timezone_gmt(self.value, width, locale=self.locale) + return get_timezone_gmt(value, width, locale=self.locale, return_z=True) + return get_timezone_gmt(value, width, locale=self.locale) elif char == 'O': if num == 4: - return get_timezone_gmt(self.value, width, locale=self.locale) + return get_timezone_gmt(value, width, locale=self.locale) # TODO: To add support for O:1 elif char == 'v': - return get_timezone_name(self.value.tzinfo, width, + return get_timezone_name(value.tzinfo, width, locale=self.locale) elif char == 'V': if num == 1: - return get_timezone_name(self.value.tzinfo, width, + return get_timezone_name(value.tzinfo, width, uncommon=True, locale=self.locale) elif num == 2: - return get_timezone_name(self.value.tzinfo, locale=self.locale, return_zone=True) + return get_timezone_name(value.tzinfo, locale=self.locale, return_zone=True) elif num == 3: - return get_timezone_location(self.value.tzinfo, locale=self.locale, return_city=True) - return get_timezone_location(self.value.tzinfo, locale=self.locale) + return get_timezone_location(value.tzinfo, locale=self.locale, return_city=True) + return get_timezone_location(value.tzinfo, locale=self.locale) # Included additional elif condition to add support for 'Xx' in timezone format elif char == 'X': if num == 1: - return get_timezone_gmt(self.value, width='iso8601_short', locale=self.locale, + return get_timezone_gmt(value, width='iso8601_short', locale=self.locale, return_z=True) elif num in (2, 4): - return get_timezone_gmt(self.value, width='short', locale=self.locale, + return get_timezone_gmt(value, width='short', locale=self.locale, return_z=True) elif num in (3, 5): - return get_timezone_gmt(self.value, width='iso8601', locale=self.locale, + return get_timezone_gmt(value, width='iso8601', locale=self.locale, return_z=True) elif char == 'x': if num == 1: - return get_timezone_gmt(self.value, width='iso8601_short', locale=self.locale) + return get_timezone_gmt(value, width='iso8601_short', locale=self.locale) elif num in (2, 4): - return get_timezone_gmt(self.value, width='short', locale=self.locale) + return get_timezone_gmt(value, width='short', locale=self.locale) elif num in (3, 5): - return get_timezone_gmt(self.value, width='iso8601', locale=self.locale) + return get_timezone_gmt(value, width='iso8601', locale=self.locale) - def format(self, value, length): + def format(self, value: SupportsInt, length: int) -> str: return '%0*d' % (length, value) - def get_day_of_year(self, date=None): + def get_day_of_year(self, date: datetime.date | None = None) -> int: if date is None: date = self.value return (date - date.replace(month=1, day=1)).days + 1 - def get_week_number(self, day_of_period, day_of_week=None): + def get_week_number(self, day_of_period: int, day_of_week: int | None = None) -> int: """Return the number of the week of a day within a period. This may be the week number in a year or the week number in a month. @@ -1592,17 +1612,15 @@ def get_week_number(self, day_of_period, day_of_week=None): first week of the period is so short that it actually counts as the last week of the previous period, this function will return 0. - >>> format = DateTimeFormat(date(2006, 1, 8), Locale.parse('de_DE')) - >>> format.get_week_number(6) + >>> date = datetime.date(2006, 1, 8) + >>> DateTimeFormat(date, 'de_DE').get_week_number(6) 1 - - >>> format = DateTimeFormat(date(2006, 1, 8), Locale.parse('en_US')) - >>> format.get_week_number(6) + >>> DateTimeFormat(date, 'en_US').get_week_number(6) 2 :param day_of_period: the number of the day in the period (usually either the day of month or the day of year) - :param day_of_week: the week day; if ommitted, the week day of the + :param day_of_week: the week day; if omitted, the week day of the current date is assumed """ if day_of_week is None: @@ -1622,14 +1640,14 @@ def get_week_number(self, day_of_period, day_of_week=None): # we must count from zero.For example the above calculation gives week 53 # for 2018-12-31. By iso-calender definition 2018 has a max of 52 # weeks, thus the weeknumber must be 53-52=1. - max_weeks = date(year=self.value.year, day=28, month=12).isocalendar()[1] + max_weeks = datetime.date(year=self.value.year, day=28, month=12).isocalendar()[1] if week_number > max_weeks: week_number -= max_weeks return week_number -PATTERN_CHARS = { +PATTERN_CHARS: dict[str, list[int] | None] = { 'G': [1, 2, 3, 4, 5], # era 'y': None, 'Y': None, 'u': None, # year 'Q': [1, 2, 3, 4, 5], 'q': [1, 2, 3, 4, 5], # quarter @@ -1650,10 +1668,8 @@ def get_week_number(self, day_of_period, day_of_week=None): #: in order of decreasing magnitude. PATTERN_CHAR_ORDER = "GyYuUQqMLlwWdDFgEecabBChHKkjJmsSAzZOvVXx" -_pattern_cache = {} - -def parse_pattern(pattern): +def parse_pattern(pattern: str | DateTimePattern) -> DateTimePattern: """Parse date, time, and datetime format patterns. >>> parse_pattern("MMMMd").format @@ -1674,12 +1690,13 @@ def parse_pattern(pattern): :param pattern: the formatting pattern to parse """ - if type(pattern) is DateTimePattern: + if isinstance(pattern, DateTimePattern): return pattern + return _cached_parse_pattern(pattern) - if pattern in _pattern_cache: - return _pattern_cache[pattern] +@lru_cache(maxsize=1024) +def _cached_parse_pattern(pattern: str) -> DateTimePattern: result = [] for tok_type, tok_value in tokenize_pattern(pattern): @@ -1689,17 +1706,14 @@ def parse_pattern(pattern): fieldchar, fieldnum = tok_value limit = PATTERN_CHARS[fieldchar] if limit and fieldnum not in limit: - raise ValueError('Invalid length for field: %r' - % (fieldchar * fieldnum)) + raise ValueError(f"Invalid length for field: {fieldchar * fieldnum!r}") result.append('%%(%s)s' % (fieldchar * fieldnum)) else: - raise NotImplementedError("Unknown token type: %s" % tok_type) - - _pattern_cache[pattern] = pat = DateTimePattern(pattern, u''.join(result)) - return pat + raise NotImplementedError(f"Unknown token type: {tok_type}") + return DateTimePattern(pattern, ''.join(result)) -def tokenize_pattern(pattern): +def tokenize_pattern(pattern: str) -> list[tuple[str, str | tuple[str, int]]]: """ Tokenize date format patterns. @@ -1730,7 +1744,7 @@ def append_field(): fieldchar[0] = '' fieldnum[0] = 0 - for idx, char in enumerate(pattern.replace("''", '\0')): + for char in pattern.replace("''", '\0'): if quotebuf is None: if char == "'": # quote started if fieldchar[0]: @@ -1768,7 +1782,7 @@ def append_field(): return result -def untokenize_pattern(tokens): +def untokenize_pattern(tokens: Iterable[tuple[str, str | tuple[str, int]]]) -> str: """ Turn a date format pattern token stream back into a string. @@ -1789,7 +1803,7 @@ def untokenize_pattern(tokens): return "".join(output) -def split_interval_pattern(pattern): +def split_interval_pattern(pattern: str) -> list[str]: """ Split an interval-describing datetime pattern into multiple pieces. @@ -1827,7 +1841,7 @@ def split_interval_pattern(pattern): return [untokenize_pattern(tokens) for tokens in parts] -def match_skeleton(skeleton, options, allow_different_fields=False): +def match_skeleton(skeleton: str, options: Iterable[str], allow_different_fields: bool = False) -> str | None: """ Find the closest match for the given datetime skeleton among the options given. diff --git a/babel/languages.py b/babel/languages.py index cac59c162..564f555d2 100644 --- a/babel/languages.py +++ b/babel/languages.py @@ -1,7 +1,9 @@ +from __future__ import annotations + from babel.core import get_global -def get_official_languages(territory, regional=False, de_facto=False): +def get_official_languages(territory: str, regional: bool = False, de_facto: bool = False) -> tuple[str, ...]: """ Get the official language(s) for the given territory. @@ -41,7 +43,7 @@ def get_official_languages(territory, regional=False, de_facto=False): return tuple(lang for _, lang in pairs) -def get_territory_language_info(territory): +def get_territory_language_info(territory: str) -> dict[str, dict[str, float | str | None]]: """ Get a dictionary of language information for a territory. diff --git a/babel/lists.py b/babel/lists.py index 11cc7d725..97fc49a71 100644 --- a/babel/lists.py +++ b/babel/lists.py @@ -13,13 +13,22 @@ :copyright: (c) 2015-2022 by the Babel Team. :license: BSD, see LICENSE for more details. """ +from __future__ import annotations + +from collections.abc import Sequence +from typing import TYPE_CHECKING from babel.core import Locale, default_locale +if TYPE_CHECKING: + from typing_extensions import Literal + DEFAULT_LOCALE = default_locale() -def format_list(lst, style='standard', locale=DEFAULT_LOCALE): +def format_list(lst: Sequence[str], + style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard', + locale: Locale | str | None = DEFAULT_LOCALE) -> str: """ Format the items in `lst` as a list. @@ -68,11 +77,10 @@ def format_list(lst, style='standard', locale=DEFAULT_LOCALE): return lst[0] if style not in locale.list_patterns: - raise ValueError('Locale %s does not support list formatting style %r (supported are %s)' % ( - locale, - style, - list(sorted(locale.list_patterns)), - )) + raise ValueError( + f'Locale {locale} does not support list formatting style {style!r} ' + f'(supported are {sorted(locale.list_patterns)})' + ) patterns = locale.list_patterns[style] if len(lst) == 2: diff --git a/babel/localedata.py b/babel/localedata.py index 14e6bcdf4..a9c7c75ec 100644 --- a/babel/localedata.py +++ b/babel/localedata.py @@ -11,22 +11,26 @@ :license: BSD, see LICENSE for more details. """ -import pickle +from __future__ import annotations + import os +import pickle import re import sys import threading from collections import abc +from collections.abc import Iterator, Mapping, MutableMapping +from functools import lru_cache from itertools import chain +from typing import Any - -_cache = {} +_cache: dict[str, Any] = {} _cache_lock = threading.RLock() _dirname = os.path.join(os.path.dirname(__file__), 'locale-data') _windows_reserved_name_re = re.compile("^(con|prn|aux|nul|com[0-9]|lpt[0-9])$", re.I) -def normalize_locale(name): +def normalize_locale(name: str) -> str | None: """Normalize a locale ID by stripping spaces and apply proper casing. Returns the normalized locale ID string or `None` if the ID is not @@ -40,7 +44,7 @@ def normalize_locale(name): return locale_id -def resolve_locale_filename(name): +def resolve_locale_filename(name: os.PathLike[str] | str) -> str: """ Resolve a locale identifier to a `.dat` path on disk. """ @@ -50,13 +54,13 @@ def resolve_locale_filename(name): # Ensure we're not left with one of the Windows reserved names. if sys.platform == "win32" and _windows_reserved_name_re.match(os.path.splitext(name)[0]): - raise ValueError("Name %s is invalid on Windows" % name) + raise ValueError(f"Name {name} is invalid on Windows") # Build the path. - return os.path.join(_dirname, '%s.dat' % name) + return os.path.join(_dirname, f"{name}.dat") -def exists(name): +def exists(name: str) -> bool: """Check whether locale data is available for the given locale. Returns `True` if it exists, `False` otherwise. @@ -71,31 +75,27 @@ def exists(name): return True if file_found else bool(normalize_locale(name)) -def locale_identifiers(): +@lru_cache(maxsize=None) +def locale_identifiers() -> list[str]: """Return a list of all locale identifiers for which locale data is available. - This data is cached after the first invocation in `locale_identifiers.cache`. - - Removing the `locale_identifiers.cache` attribute or setting it to `None` - will cause this function to re-read the list from disk. + This data is cached after the first invocation. + You can clear the cache by calling `locale_identifiers.cache_clear()`. .. versionadded:: 0.8.1 :return: a list of locale identifiers (strings) """ - data = getattr(locale_identifiers, 'cache', None) - if data is None: - locale_identifiers.cache = data = [ - stem - for stem, extension in - (os.path.splitext(filename) for filename in os.listdir(_dirname)) - if extension == '.dat' and stem != 'root' - ] - return data - - -def load(name, merge_inherited=True): + return [ + stem + for stem, extension in + (os.path.splitext(filename) for filename in os.listdir(_dirname)) + if extension == '.dat' and stem != 'root' + ] + + +def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str, Any]: """Load the locale data for the given locale. The locale data is a dictionary that contains much of the data defined by @@ -118,7 +118,7 @@ def load(name, merge_inherited=True): :param merge_inherited: whether the inherited data should be merged into the data of the requested locale :raise `IOError`: if no locale data file is found for the given locale - identifer, or one of the locales it inherits from + identifier, or one of the locales it inherits from """ name = os.path.basename(name) _cache_lock.acquire() @@ -133,10 +133,7 @@ def load(name, merge_inherited=True): parent = get_global('parent_exceptions').get(name) if not parent: parts = name.split('_') - if len(parts) == 1: - parent = 'root' - else: - parent = '_'.join(parts[:-1]) + parent = "root" if len(parts) == 1 else "_".join(parts[:-1]) data = load(parent).copy() filename = resolve_locale_filename(name) with open(filename, 'rb') as fileobj: @@ -150,7 +147,7 @@ def load(name, merge_inherited=True): _cache_lock.release() -def merge(dict1, dict2): +def merge(dict1: MutableMapping[Any, Any], dict2: Mapping[Any, Any]) -> None: """Merge the data from `dict2` into the `dict1` dictionary, making copies of nested dictionaries. @@ -190,13 +187,13 @@ class Alias: as specified by the `keys`. """ - def __init__(self, keys): + def __init__(self, keys: tuple[str, ...]) -> None: self.keys = tuple(keys) - def __repr__(self): - return '<%s %r>' % (type(self).__name__, self.keys) + def __repr__(self) -> str: + return f"<{type(self).__name__} {self.keys!r}>" - def resolve(self, data): + def resolve(self, data: Mapping[str | int | None, Any]) -> Mapping[str | int | None, Any]: """Resolve the alias based on the given data. This is done recursively, so if one alias resolves to a second alias, @@ -221,19 +218,19 @@ class LocaleDataDict(abc.MutableMapping): values. """ - def __init__(self, data, base=None): + def __init__(self, data: MutableMapping[str | int | None, Any], base: Mapping[str | int | None, Any] | None = None): self._data = data if base is None: base = data self.base = base - def __len__(self): + def __len__(self) -> int: return len(self._data) - def __iter__(self): + def __iter__(self) -> Iterator[str | int | None]: return iter(self._data) - def __getitem__(self, key): + def __getitem__(self, key: str | int | None) -> Any: orig = val = self._data[key] if isinstance(val, Alias): # resolve an alias val = val.resolve(self.base) @@ -241,17 +238,17 @@ def __getitem__(self, key): alias, others = val val = alias.resolve(self.base).copy() merge(val, others) - if type(val) is dict: # Return a nested alias-resolving dict + if isinstance(val, dict): # Return a nested alias-resolving dict val = LocaleDataDict(val, base=self.base) if val is not orig: self._data[key] = val return val - def __setitem__(self, key, value): + def __setitem__(self, key: str | int | None, value: Any) -> None: self._data[key] = value - def __delitem__(self, key): + def __delitem__(self, key: str | int | None) -> None: del self._data[key] - def copy(self): + def copy(self) -> LocaleDataDict: return LocaleDataDict(self._data.copy(), base=self.base) diff --git a/babel/localtime/__init__.py b/babel/localtime/__init__.py index 7e626a0f1..29577fa37 100644 --- a/babel/localtime/__init__.py +++ b/babel/localtime/__init__.py @@ -9,12 +9,8 @@ :license: BSD, see LICENSE for more details. """ +import datetime import sys -import pytz -import time -from datetime import timedelta -from datetime import tzinfo -from threading import RLock if sys.platform == 'win32': from babel.localtime._win32 import _get_localzone @@ -22,46 +18,18 @@ from babel.localtime._unix import _get_localzone -_cached_tz = None -_cache_lock = RLock() - -STDOFFSET = timedelta(seconds=-time.timezone) -if time.daylight: - DSTOFFSET = timedelta(seconds=-time.altzone) -else: - DSTOFFSET = STDOFFSET - -DSTDIFF = DSTOFFSET - STDOFFSET -ZERO = timedelta(0) - - -class _FallbackLocalTimezone(tzinfo): - - def utcoffset(self, dt): - if self._isdst(dt): - return DSTOFFSET - else: - return STDOFFSET - - def dst(self, dt): - if self._isdst(dt): - return DSTDIFF - else: - return ZERO - - def tzname(self, dt): - return time.tzname[self._isdst(dt)] - - def _isdst(self, dt): - tt = (dt.year, dt.month, dt.day, - dt.hour, dt.minute, dt.second, - dt.weekday(), 0, -1) - stamp = time.mktime(tt) - tt = time.localtime(stamp) - return tt.tm_isdst > 0 +# TODO(3.0): the offset constants are not part of the public API +# and should be removed +from babel.localtime._fallback import ( + DSTDIFF, # noqa: F401 + DSTOFFSET, # noqa: F401 + STDOFFSET, # noqa: F401 + ZERO, # noqa: F401 + _FallbackLocalTimezone, +) -def get_localzone(): +def get_localzone() -> datetime.tzinfo: """Returns the current underlying local timezone object. Generally this function does not need to be used, it's a better idea to use the :data:`LOCALTZ` singleton instead. @@ -71,5 +39,5 @@ def get_localzone(): try: LOCALTZ = get_localzone() -except pytz.UnknownTimeZoneError: +except LookupError: LOCALTZ = _FallbackLocalTimezone() diff --git a/babel/localtime/_fallback.py b/babel/localtime/_fallback.py new file mode 100644 index 000000000..14979a53b --- /dev/null +++ b/babel/localtime/_fallback.py @@ -0,0 +1,44 @@ +""" + babel.localtime._fallback + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + Emulated fallback local timezone when all else fails. + + :copyright: (c) 2013-2023 by the Babel Team. + :license: BSD, see LICENSE for more details. +""" + +import datetime +import time + +STDOFFSET = datetime.timedelta(seconds=-time.timezone) +DSTOFFSET = datetime.timedelta(seconds=-time.altzone) if time.daylight else STDOFFSET + +DSTDIFF = DSTOFFSET - STDOFFSET +ZERO = datetime.timedelta(0) + + +class _FallbackLocalTimezone(datetime.tzinfo): + + def utcoffset(self, dt: datetime.datetime) -> datetime.timedelta: + if self._isdst(dt): + return DSTOFFSET + else: + return STDOFFSET + + def dst(self, dt: datetime.datetime) -> datetime.timedelta: + if self._isdst(dt): + return DSTDIFF + else: + return ZERO + + def tzname(self, dt: datetime.datetime) -> str: + return time.tzname[self._isdst(dt)] + + def _isdst(self, dt: datetime.datetime) -> bool: + tt = (dt.year, dt.month, dt.day, + dt.hour, dt.minute, dt.second, + dt.weekday(), 0, -1) + stamp = time.mktime(tt) + tt = time.localtime(stamp) + return tt.tm_isdst > 0 diff --git a/babel/localtime/_helpers.py b/babel/localtime/_helpers.py new file mode 100644 index 000000000..159f9a569 --- /dev/null +++ b/babel/localtime/_helpers.py @@ -0,0 +1,43 @@ +try: + import pytz +except ModuleNotFoundError: + pytz = None + import zoneinfo + + +def _get_tzinfo(tzenv: str): + """Get the tzinfo from `zoneinfo` or `pytz` + + :param tzenv: timezone in the form of Continent/City + :return: tzinfo object or None if not found + """ + if pytz: + try: + return pytz.timezone(tzenv) + except pytz.UnknownTimeZoneError: + pass + else: + try: + return zoneinfo.ZoneInfo(tzenv) + except zoneinfo.ZoneInfoNotFoundError: + pass + + return None + + +def _get_tzinfo_or_raise(tzenv: str): + tzinfo = _get_tzinfo(tzenv) + if tzinfo is None: + raise LookupError( + f"Can not find timezone {tzenv}. \n" + "Timezone names are generally in the form `Continent/City`." + ) + return tzinfo + + +def _get_tzinfo_from_file(tzfilename: str): + with open(tzfilename, 'rb') as tzfile: + if pytz: + return pytz.tzfile.build_tzinfo('local', tzfile) + else: + return zoneinfo.ZoneInfo.from_file(tzfile) diff --git a/babel/localtime/_unix.py b/babel/localtime/_unix.py index 3d1480ed8..eb81beb61 100644 --- a/babel/localtime/_unix.py +++ b/babel/localtime/_unix.py @@ -1,33 +1,31 @@ +import datetime import os import re -import pytz +from babel.localtime._helpers import ( + _get_tzinfo, + _get_tzinfo_from_file, + _get_tzinfo_or_raise, +) -def _tz_from_env(tzenv): + +def _tz_from_env(tzenv: str) -> datetime.tzinfo: if tzenv[0] == ':': tzenv = tzenv[1:] # TZ specifies a file if os.path.exists(tzenv): - with open(tzenv, 'rb') as tzfile: - return pytz.tzfile.build_tzinfo('local', tzfile) + return _get_tzinfo_from_file(tzenv) # TZ specifies a zoneinfo zone. - try: - tz = pytz.timezone(tzenv) - # That worked, so we return this: - return tz - except pytz.UnknownTimeZoneError: - raise pytz.UnknownTimeZoneError( - "tzlocal() does not support non-zoneinfo timezones like %s. \n" - "Please use a timezone in the form of Continent/City") + return _get_tzinfo_or_raise(tzenv) -def _get_localzone(_root='/'): +def _get_localzone(_root: str = '/') -> datetime.tzinfo: """Tries to find the local timezone configuration. - This method prefers finding the timezone name and passing that to pytz, - over passing in the localtime file, as in the later case the zoneinfo - name is unknown. + This method prefers finding the timezone name and passing that to + zoneinfo or pytz, over passing in the localtime file, as in the later + case the zoneinfo name is unknown. The parameter _root makes the function look for files like /etc/localtime beneath the _root directory. This is primarily used by the tests. In normal usage you call the function without parameters. @@ -48,10 +46,9 @@ def _get_localzone(_root='/'): pos = link_dst.find('/zoneinfo/') if pos >= 0: zone_name = link_dst[pos + 10:] - try: - return pytz.timezone(zone_name) - except pytz.UnknownTimeZoneError: - pass + tzinfo = _get_tzinfo(zone_name) + if tzinfo is not None: + return tzinfo # Now look for distribution specific configuration files # that contain the timezone name. @@ -69,7 +66,8 @@ def _get_localzone(_root='/'): etctz, dummy = etctz.split(' ', 1) if '#' in etctz: etctz, dummy = etctz.split('#', 1) - return pytz.timezone(etctz.replace(' ', '_')) + + return _get_tzinfo_or_raise(etctz.replace(' ', '_')) # CentOS has a ZONE setting in /etc/sysconfig/clock, # OpenSUSE has a TIMEZONE setting in /etc/sysconfig/clock and @@ -87,7 +85,7 @@ def _get_localzone(_root='/'): if match is not None: # We found a timezone etctz = match.group("etctz") - return pytz.timezone(etctz.replace(' ', '_')) + return _get_tzinfo_or_raise(etctz.replace(' ', '_')) # No explicit setting existed. Use localtime for filename in ('etc/localtime', 'usr/local/etc/localtime'): @@ -95,8 +93,6 @@ def _get_localzone(_root='/'): if not os.path.exists(tzpath): continue + return _get_tzinfo_from_file(tzpath) - with open(tzpath, 'rb') as tzfile: - return pytz.tzfile.build_tzinfo('local', tzfile) - - raise pytz.UnknownTimeZoneError('Can not find any timezone configuration') + raise LookupError('Can not find any timezone configuration') diff --git a/babel/localtime/_win32.py b/babel/localtime/_win32.py index 09b87b14e..1a52567bc 100644 --- a/babel/localtime/_win32.py +++ b/babel/localtime/_win32.py @@ -1,23 +1,27 @@ +from __future__ import annotations + try: import winreg except ImportError: winreg = None -from babel.core import get_global -import pytz +import datetime +from typing import Any, Dict, cast +from babel.core import get_global +from babel.localtime._helpers import _get_tzinfo_or_raise # When building the cldr data on windows this module gets imported. # Because at that point there is no global.dat yet this call will # fail. We want to catch it down in that case then and just assume # the mapping was empty. try: - tz_names = get_global('windows_zone_mapping') + tz_names: dict[str, str] = cast(Dict[str, str], get_global('windows_zone_mapping')) except RuntimeError: tz_names = {} -def valuestodict(key): +def valuestodict(key) -> dict[str, Any]: """Convert a registry key's values to a dictionary.""" dict = {} size = winreg.QueryInfoKey(key)[1] @@ -27,7 +31,7 @@ def valuestodict(key): return dict -def get_localzone_name(): +def get_localzone_name() -> str: # Windows is special. It has unique time zone names (in several # meanings of the word) available, but unfortunately, they can be # translated to the language of the operating system, so we need to @@ -77,17 +81,18 @@ def get_localzone_name(): if timezone is None: # Nope, that didn't work. Try adding 'Standard Time', # it seems to work a lot of times: - timezone = tz_names.get(tzkeyname + ' Standard Time') + timezone = tz_names.get(f"{tzkeyname} Standard Time") # Return what we have. if timezone is None: - raise pytz.UnknownTimeZoneError('Can not find timezone ' + tzkeyname) + raise LookupError(f"Can not find timezone {tzkeyname}") return timezone -def _get_localzone(): +def _get_localzone() -> datetime.tzinfo: if winreg is None: - raise pytz.UnknownTimeZoneError( + raise LookupError( 'Runtime support not available') - return pytz.timezone(get_localzone_name()) + + return _get_tzinfo_or_raise(get_localzone_name()) diff --git a/babel/messages/__init__.py b/babel/messages/__init__.py index ad4fd346d..60aeba0ed 100644 --- a/babel/messages/__init__.py +++ b/babel/messages/__init__.py @@ -8,4 +8,14 @@ :license: BSD, see LICENSE for more details. """ -from babel.messages.catalog import * +from babel.messages.catalog import ( + Catalog, + Message, + TranslationError, +) + +__all__ = [ + "Catalog", + "Message", + "TranslationError", +] diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index e43a28c02..47b9e62ea 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -7,23 +7,56 @@ :copyright: (c) 2013-2022 by the Babel Team. :license: BSD, see LICENSE for more details. """ +from __future__ import annotations +import datetime import re - from collections import OrderedDict -from datetime import datetime, time as time_ -from difflib import get_close_matches -from email import message_from_string +from collections.abc import Iterable, Iterator from copy import copy +from difflib import SequenceMatcher +from email import message_from_string +from heapq import nlargest +from typing import TYPE_CHECKING from babel import __version__ as VERSION from babel.core import Locale, UnknownLocaleError from babel.dates import format_datetime from babel.messages.plurals import get_plural -from babel.util import distinct, LOCALTZ, FixedOffsetTimezone, _cmp +from babel.util import LOCALTZ, FixedOffsetTimezone, _cmp, distinct + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + + _MessageID: TypeAlias = str | tuple[str, ...] | list[str] __all__ = ['Message', 'Catalog', 'TranslationError'] +def get_close_matches(word, possibilities, n=3, cutoff=0.6): + """A modified version of ``difflib.get_close_matches``. + + It just passes ``autojunk=False`` to the ``SequenceMatcher``, to work + around https://github.com/python/cpython/issues/90825. + """ + if not n > 0: # pragma: no cover + raise ValueError(f"n must be > 0: {n!r}") + if not 0.0 <= cutoff <= 1.0: # pragma: no cover + raise ValueError(f"cutoff must be in [0.0, 1.0]: {cutoff!r}") + result = [] + s = SequenceMatcher(autojunk=False) # only line changed from difflib.py + s.set_seq2(word) + for x in possibilities: + s.set_seq1(x) + if s.real_quick_ratio() >= cutoff and \ + s.quick_ratio() >= cutoff and \ + s.ratio() >= cutoff: + result.append((s.ratio(), x)) + + # Move the best scorers to head of list + result = nlargest(n, result) + # Strip scores for the best n matches + return [x for score, x in result] + PYTHON_FORMAT = re.compile(r''' \% @@ -37,10 +70,10 @@ ''', re.VERBOSE) -def _parse_datetime_header(value): +def _parse_datetime_header(value: str) -> datetime.datetime: match = re.match(r'^(?P.*?)(?P[+-]\d{4})?$', value) - dt = datetime.strptime(match.group('datetime'), '%Y-%m-%d %H:%M') + dt = datetime.datetime.strptime(match.group('datetime'), '%Y-%m-%d %H:%M') # Separate the offset into a sign component, hours, and # minutes tzoffset = match.group('tzoffset') @@ -49,7 +82,7 @@ def _parse_datetime_header(value): hours_offset_s, mins_offset_s = rest[:2], rest[2:] # Make them all integers - plus_minus = int(plus_minus_s + '1') + plus_minus = int(f"{plus_minus_s}1") hours_offset = int(hours_offset_s) mins_offset = int(mins_offset_s) @@ -70,8 +103,18 @@ def _parse_datetime_header(value): class Message: """Representation of a single message in a catalog.""" - def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), - user_comments=(), previous_id=(), lineno=None, context=None): + def __init__( + self, + id: _MessageID, + string: _MessageID | None = '', + locations: Iterable[tuple[str, int]] = (), + flags: Iterable[str] = (), + auto_comments: Iterable[str] = (), + user_comments: Iterable[str] = (), + previous_id: _MessageID = (), + lineno: int | None = None, + context: str | None = None, + ) -> None: """Create the message object. :param id: the message ID, or a ``(singular, plural)`` tuple for @@ -90,7 +133,7 @@ def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), """ self.id = id if not string and self.pluralizable: - string = (u'', u'') + string = ('', '') self.string = string self.locations = list(distinct(locations)) self.flags = set(flags) @@ -107,11 +150,10 @@ def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), self.lineno = lineno self.context = context - def __repr__(self): - return '<%s %r (flags: %r)>' % (type(self).__name__, self.id, - list(self.flags)) + def __repr__(self) -> str: + return f"<{type(self).__name__} {self.id!r} (flags: {list(self.flags)!r})>" - def __cmp__(self, other): + def __cmp__(self, other: object) -> int: """Compare Messages, taking into account plural ids""" def values_to_compare(obj): if isinstance(obj, Message) and obj.pluralizable: @@ -119,38 +161,38 @@ def values_to_compare(obj): return obj.id, obj.context or '' return _cmp(values_to_compare(self), values_to_compare(other)) - def __gt__(self, other): + def __gt__(self, other: object) -> bool: return self.__cmp__(other) > 0 - def __lt__(self, other): + def __lt__(self, other: object) -> bool: return self.__cmp__(other) < 0 - def __ge__(self, other): + def __ge__(self, other: object) -> bool: return self.__cmp__(other) >= 0 - def __le__(self, other): + def __le__(self, other: object) -> bool: return self.__cmp__(other) <= 0 - def __eq__(self, other): + def __eq__(self, other: object) -> bool: return self.__cmp__(other) == 0 - def __ne__(self, other): + def __ne__(self, other: object) -> bool: return self.__cmp__(other) != 0 - def is_identical(self, other): + def is_identical(self, other: Message) -> bool: """Checks whether messages are identical, taking into account all properties. """ assert isinstance(other, Message) return self.__dict__ == other.__dict__ - def clone(self): + def clone(self) -> Message: return Message(*map(copy, (self.id, self.string, self.locations, self.flags, self.auto_comments, self.user_comments, self.previous_id, self.lineno, self.context))) - def check(self, catalog=None): + def check(self, catalog: Catalog | None = None) -> list[TranslationError]: """Run various validation checks on the message. Some validations are only performed if the catalog is provided. This method returns a sequence of `TranslationError` objects. @@ -161,7 +203,7 @@ def check(self, catalog=None): in a catalog. """ from babel.messages.checkers import checkers - errors = [] + errors: list[TranslationError] = [] for checker in checkers: try: checker(catalog, self) @@ -170,7 +212,7 @@ def check(self, catalog=None): return errors @property - def fuzzy(self): + def fuzzy(self) -> bool: """Whether the translation is fuzzy. >>> Message('foo').fuzzy @@ -185,7 +227,7 @@ def fuzzy(self): return 'fuzzy' in self.flags @property - def pluralizable(self): + def pluralizable(self) -> bool: """Whether the message is plurizable. >>> Message('foo').pluralizable @@ -197,7 +239,7 @@ def pluralizable(self): return isinstance(self.id, (list, tuple)) @property - def python_format(self): + def python_format(self) -> bool: """Whether the message contains Python-style parameters. >>> Message('foo %(name)s bar').python_format @@ -217,14 +259,15 @@ class TranslationError(Exception): translations are encountered.""" -DEFAULT_HEADER = u"""\ +DEFAULT_HEADER = """\ # Translations template for PROJECT. # Copyright (C) YEAR ORGANIZATION # This file is distributed under the same license as the PROJECT project. # FIRST AUTHOR , YEAR. #""" -def parse_separated_header(value: str): + +def parse_separated_header(value: str) -> dict[str, str]: # Adapted from https://peps.python.org/pep-0594/#cgi from email.message import Message m = Message() @@ -235,11 +278,22 @@ def parse_separated_header(value: str): class Catalog: """Representation of a message catalog.""" - def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER, - project=None, version=None, copyright_holder=None, - msgid_bugs_address=None, creation_date=None, - revision_date=None, last_translator=None, language_team=None, - charset=None, fuzzy=True): + def __init__( + self, + locale: str | Locale | None = None, + domain: str | None = None, + header_comment: str | None = DEFAULT_HEADER, + project: str | None = None, + version: str | None = None, + copyright_holder: str | None = None, + msgid_bugs_address: str | None = None, + creation_date: datetime.datetime | str | None = None, + revision_date: datetime.datetime | datetime.time | float | str | None = None, + last_translator: str | None = None, + language_team: str | None = None, + charset: str | None = None, + fuzzy: bool = True, + ) -> None: """Initialize the catalog object. :param locale: the locale identifier or `Locale` object, or `None` @@ -263,7 +317,7 @@ def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER, self.domain = domain self.locale = locale self._header_comment = header_comment - self._messages = OrderedDict() + self._messages: OrderedDict[str | tuple[str, str], Message] = OrderedDict() self.project = project or 'PROJECT' self.version = version or 'VERSION' @@ -278,22 +332,23 @@ def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER, self.charset = charset or 'utf-8' if creation_date is None: - creation_date = datetime.now(LOCALTZ) - elif isinstance(creation_date, datetime) and not creation_date.tzinfo: + creation_date = datetime.datetime.now(LOCALTZ) + elif isinstance(creation_date, datetime.datetime) and not creation_date.tzinfo: creation_date = creation_date.replace(tzinfo=LOCALTZ) self.creation_date = creation_date if revision_date is None: revision_date = 'YEAR-MO-DA HO:MI+ZONE' - elif isinstance(revision_date, datetime) and not revision_date.tzinfo: + elif isinstance(revision_date, datetime.datetime) and not revision_date.tzinfo: revision_date = revision_date.replace(tzinfo=LOCALTZ) self.revision_date = revision_date self.fuzzy = fuzzy - self.obsolete = OrderedDict() # Dictionary of obsolete messages + # Dictionary of obsolete messages + self.obsolete: OrderedDict[str | tuple[str, str], Message] = OrderedDict() self._num_plurals = None self._plural_expr = None - def _set_locale(self, locale): + def _set_locale(self, locale: Locale | str | None) -> None: if locale is None: self._locale_identifier = None self._locale = None @@ -312,20 +367,20 @@ def _set_locale(self, locale): self._locale = None return - raise TypeError('`locale` must be a Locale, a locale identifier string, or None; got %r' % locale) + raise TypeError(f"`locale` must be a Locale, a locale identifier string, or None; got {locale!r}") - def _get_locale(self): + def _get_locale(self) -> Locale | None: return self._locale - def _get_locale_identifier(self): + def _get_locale_identifier(self) -> str | None: return self._locale_identifier locale = property(_get_locale, _set_locale) locale_identifier = property(_get_locale_identifier) - def _get_header_comment(self): + def _get_header_comment(self) -> str: comment = self._header_comment - year = datetime.now(LOCALTZ).strftime('%Y') + year = datetime.datetime.now(LOCALTZ).strftime('%Y') if hasattr(self.revision_date, 'strftime'): year = self.revision_date.strftime('%Y') comment = comment.replace('PROJECT', self.project) \ @@ -334,10 +389,10 @@ def _get_header_comment(self): .replace('ORGANIZATION', self.copyright_holder) locale_name = (self.locale.english_name if self.locale else self.locale_identifier) if locale_name: - comment = comment.replace('Translations template', '%s translations' % locale_name) + comment = comment.replace("Translations template", f"{locale_name} translations") return comment - def _set_header_comment(self, string): + def _set_header_comment(self, string: str | None) -> None: self._header_comment = string header_comment = property(_get_header_comment, _set_header_comment, doc="""\ @@ -373,15 +428,14 @@ def _set_header_comment(self, string): :type: `unicode` """) - def _get_mime_headers(self): - headers = [] - headers.append(('Project-Id-Version', - '%s %s' % (self.project, self.version))) + def _get_mime_headers(self) -> list[tuple[str, str]]: + headers: list[tuple[str, str]] = [] + headers.append(("Project-Id-Version", f"{self.project} {self.version}")) headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address)) headers.append(('POT-Creation-Date', format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', locale='en'))) - if isinstance(self.revision_date, (datetime, time_, int, float)): + if isinstance(self.revision_date, (datetime.datetime, datetime.time, int, float)): headers.append(('PO-Revision-Date', format_datetime(self.revision_date, 'yyyy-MM-dd HH:mmZ', locale='en'))) @@ -399,26 +453,25 @@ def _get_mime_headers(self): if self.locale is not None: headers.append(('Plural-Forms', self.plural_forms)) headers.append(('MIME-Version', '1.0')) - headers.append(('Content-Type', - 'text/plain; charset=%s' % self.charset)) + headers.append(("Content-Type", f"text/plain; charset={self.charset}")) headers.append(('Content-Transfer-Encoding', '8bit')) - headers.append(('Generated-By', 'Babel %s\n' % VERSION)) + headers.append(("Generated-By", f"Babel {VERSION}\n")) return headers - def _force_text(self, s, encoding='utf-8', errors='strict'): + def _force_text(self, s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') -> str: if isinstance(s, str): return s if isinstance(s, bytes): return s.decode(encoding, errors) return str(s) - def _set_mime_headers(self, headers): + def _set_mime_headers(self, headers: Iterable[tuple[str, str]]) -> None: for name, value in headers: name = self._force_text(name.lower(), encoding=self.charset) value = self._force_text(value, encoding=self.charset) if name == 'project-id-version': parts = value.split(' ') - self.project = u' '.join(parts[:-1]) + self.project = ' '.join(parts[:-1]) self.version = parts[-1] elif name == 'report-msgid-bugs-to': self.msgid_bugs_address = value @@ -434,7 +487,7 @@ def _set_mime_headers(self, headers): if 'charset' in params: self.charset = params['charset'].lower() elif name == 'plural-forms': - params = parse_separated_header(' ;' + value) + params = parse_separated_header(f" ;{value}") self._num_plurals = int(params.get('nplurals', 2)) self._plural_expr = params.get('plural', '(n != 1)') elif name == 'pot-creation-date': @@ -454,6 +507,7 @@ def _set_mime_headers(self, headers): Here's an example of the output for such a catalog template: >>> from babel.dates import UTC + >>> from datetime import datetime >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC) >>> catalog = Catalog(project='Foobar', version='1.0', ... creation_date=created) @@ -496,7 +550,7 @@ def _set_mime_headers(self, headers): """) @property - def num_plurals(self): + def num_plurals(self) -> int: """The number of plurals used by the catalog or locale. >>> Catalog(locale='en').num_plurals @@ -513,7 +567,7 @@ def num_plurals(self): return self._num_plurals @property - def plural_expr(self): + def plural_expr(self) -> str: """The plural expression used by the catalog or locale. >>> Catalog(locale='en').plural_expr @@ -532,7 +586,7 @@ def plural_expr(self): return self._plural_expr @property - def plural_forms(self): + def plural_forms(self) -> str: """Return the plural forms declaration for the locale. >>> Catalog(locale='en').plural_forms @@ -541,51 +595,51 @@ def plural_forms(self): 'nplurals=2; plural=(n > 1);' :type: `str`""" - return 'nplurals=%s; plural=%s;' % (self.num_plurals, self.plural_expr) + return f"nplurals={self.num_plurals}; plural={self.plural_expr};" - def __contains__(self, id): + def __contains__(self, id: _MessageID) -> bool: """Return whether the catalog has a message with the specified ID.""" return self._key_for(id) in self._messages - def __len__(self): + def __len__(self) -> int: """The number of messages in the catalog. This does not include the special ``msgid ""`` entry.""" return len(self._messages) - def __iter__(self): + def __iter__(self) -> Iterator[Message]: """Iterates through all the entries in the catalog, in the order they were added, yielding a `Message` object for every entry. :rtype: ``iterator``""" buf = [] for name, value in self.mime_headers: - buf.append('%s: %s' % (name, value)) + buf.append(f"{name}: {value}") flags = set() if self.fuzzy: flags |= {'fuzzy'} - yield Message(u'', '\n'.join(buf), flags=flags) + yield Message('', '\n'.join(buf), flags=flags) for key in self._messages: yield self._messages[key] - def __repr__(self): + def __repr__(self) -> str: locale = '' if self.locale: - locale = ' %s' % self.locale - return '<%s %r%s>' % (type(self).__name__, self.domain, locale) + locale = f" {self.locale}" + return f"<{type(self).__name__} {self.domain!r}{locale}>" - def __delitem__(self, id): + def __delitem__(self, id: _MessageID) -> None: """Delete the message with the specified ID.""" self.delete(id) - def __getitem__(self, id): + def __getitem__(self, id: _MessageID) -> Message: """Return the message with the specified ID. :param id: the message ID """ return self.get(id) - def __setitem__(self, id, message): + def __setitem__(self, id: _MessageID, message: Message) -> None: """Add or update the message with the specified ID. >>> catalog = Catalog() @@ -626,17 +680,26 @@ def __setitem__(self, id, message): elif id == '': # special treatment for the header message self.mime_headers = message_from_string(message.string).items() - self.header_comment = '\n'.join([('# %s' % c).rstrip() for c - in message.user_comments]) + self.header_comment = "\n".join([f"# {c}".rstrip() for c in message.user_comments]) self.fuzzy = message.fuzzy else: if isinstance(id, (list, tuple)): assert isinstance(message.string, (list, tuple)), \ - 'Expected sequence but got %s' % type(message.string) + f"Expected sequence but got {type(message.string)}" self._messages[key] = message - def add(self, id, string=None, locations=(), flags=(), auto_comments=(), - user_comments=(), previous_id=(), lineno=None, context=None): + def add( + self, + id: _MessageID, + string: _MessageID | None = None, + locations: Iterable[tuple[str, int]] = (), + flags: Iterable[str] = (), + auto_comments: Iterable[str] = (), + user_comments: Iterable[str] = (), + previous_id: _MessageID = (), + lineno: int | None = None, + context: str | None = None, + ) -> Message: """Add or update the message with the specified ID. >>> catalog = Catalog() @@ -668,21 +731,21 @@ def add(self, id, string=None, locations=(), flags=(), auto_comments=(), self[id] = message return message - def check(self): + def check(self) -> Iterable[tuple[Message, list[TranslationError]]]: """Run various validation checks on the translations in the catalog. For every message which fails validation, this method yield a ``(message, errors)`` tuple, where ``message`` is the `Message` object and ``errors`` is a sequence of `TranslationError` objects. - :rtype: ``iterator`` + :rtype: ``generator`` of ``(message, errors)`` """ for message in self._messages.values(): errors = message.check(catalog=self) if errors: yield message, errors - def get(self, id, context=None): + def get(self, id: _MessageID, context: str | None = None) -> Message | None: """Return the message with the specified ID and context. :param id: the message ID @@ -690,7 +753,7 @@ def get(self, id, context=None): """ return self._messages.get(self._key_for(id, context)) - def delete(self, id, context=None): + def delete(self, id: _MessageID, context: str | None = None) -> None: """Delete the message with the specified ID and context. :param id: the message ID @@ -700,7 +763,13 @@ def delete(self, id, context=None): if key in self._messages: del self._messages[key] - def update(self, template, no_fuzzy_matching=False, update_header_comment=False, keep_user_comments=True): + def update( + self, + template: Catalog, + no_fuzzy_matching: bool = False, + update_header_comment: bool = False, + keep_user_comments: bool = True, + ) -> None: """Update the catalog based on the given template catalog. >>> from babel.messages import Catalog @@ -760,25 +829,30 @@ def update(self, template, no_fuzzy_matching=False, update_header_comment=False, # Prepare for fuzzy matching fuzzy_candidates = [] if not no_fuzzy_matching: - fuzzy_candidates = { - self._key_for(msgid): messages[msgid].context - for msgid in messages if msgid and messages[msgid].string - } + fuzzy_candidates = {} + for msgid in messages: + if msgid and messages[msgid].string: + key = self._key_for(msgid) + ctxt = messages[msgid].context + modified_key = key.lower().strip() + fuzzy_candidates[modified_key] = (key, ctxt) fuzzy_matches = set() - def _merge(message, oldkey, newkey): + def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None: message = message.clone() fuzzy = False if oldkey != newkey: fuzzy = True fuzzy_matches.add(oldkey) oldmsg = messages.get(oldkey) + assert oldmsg is not None if isinstance(oldmsg.id, str): message.previous_id = [oldmsg.id] else: message.previous_id = list(oldmsg.id) else: oldmsg = remaining.pop(oldkey, None) + assert oldmsg is not None message.string = oldmsg.string if keep_user_comments: @@ -788,7 +862,7 @@ def _merge(message, oldkey, newkey): if not isinstance(message.string, (list, tuple)): fuzzy = True message.string = tuple( - [message.string] + ([u''] * (len(message.id) - 1)) + [message.string] + ([''] * (len(message.id) - 1)) ) elif len(message.string) != self.num_plurals: fuzzy = True @@ -798,7 +872,7 @@ def _merge(message, oldkey, newkey): message.string = message.string[0] message.flags |= oldmsg.flags if fuzzy: - message.flags |= {u'fuzzy'} + message.flags |= {'fuzzy'} self[message.id] = message for message in template: @@ -816,8 +890,8 @@ def _merge(message, oldkey, newkey): matches = get_close_matches(matchkey.lower().strip(), fuzzy_candidates.keys(), 1) if matches: - newkey = matches[0] - newctxt = fuzzy_candidates[newkey] + modified_key = matches[0] + newkey, newctxt = fuzzy_candidates[modified_key] if newctxt is not None: newkey = newkey, newctxt _merge(message, newkey, key) @@ -838,7 +912,7 @@ def _merge(message, oldkey, newkey): # used to update the catalog self.creation_date = template.creation_date - def _key_for(self, id, context=None): + def _key_for(self, id: _MessageID, context: str | None = None) -> tuple[str, str] | str: """The key for a message is just the singular ID even for pluralizable messages, but is a ``(msgid, msgctxt)`` tuple for context-specific messages. @@ -850,7 +924,7 @@ def _key_for(self, id, context=None): key = (key, context) return key - def is_identical(self, other): + def is_identical(self, other: Catalog) -> bool: """Checks if catalogs are identical, taking into account messages and headers. """ diff --git a/babel/messages/checkers.py b/babel/messages/checkers.py index 4292c02d3..38a26e844 100644 --- a/babel/messages/checkers.py +++ b/babel/messages/checkers.py @@ -9,9 +9,11 @@ :copyright: (c) 2013-2022 by the Babel Team. :license: BSD, see LICENSE for more details. """ +from __future__ import annotations -from babel.messages.catalog import TranslationError, PYTHON_FORMAT +from collections.abc import Callable +from babel.messages.catalog import PYTHON_FORMAT, Catalog, Message, TranslationError #: list of format chars that are compatible to each other _string_format_compatibilities = [ @@ -21,7 +23,7 @@ ] -def num_plurals(catalog, message): +def num_plurals(catalog: Catalog | None, message: Message) -> None: """Verify the number of plurals in the translation.""" if not message.pluralizable: if not isinstance(message.string, str): @@ -41,7 +43,7 @@ def num_plurals(catalog, message): catalog.num_plurals) -def python_format(catalog, message): +def python_format(catalog: Catalog | None, message: Message) -> None: """Verify the format string placeholders in the translation.""" if 'python-format' not in message.flags: return @@ -57,14 +59,14 @@ def python_format(catalog, message): _validate_format(msgid, msgstr) -def _validate_format(format, alternative): +def _validate_format(format: str, alternative: str) -> None: """Test format string `alternative` against `format`. `format` can be the msgid of a message and `alternative` one of the `msgstr`\\s. The two arguments are not interchangeable as `alternative` may contain less placeholders if `format` uses named placeholders. The behavior of this function is undefined if the string does not use - string formattings. + string formatting. If the string formatting of `alternative` is compatible to `format` the function returns `None`, otherwise a `TranslationError` is raised. @@ -89,8 +91,8 @@ def _validate_format(format, alternative): :raises TranslationError: on formatting errors """ - def _parse(string): - result = [] + def _parse(string: str) -> list[tuple[str, str]]: + result: list[tuple[str, str]] = [] for match in PYTHON_FORMAT.finditer(string): name, format, typechar = match.groups() if typechar == '%' and name is None: @@ -98,7 +100,7 @@ def _parse(string): result.append((name, str(typechar))) return result - def _compatible(a, b): + def _compatible(a: str, b: str) -> bool: if a == b: return True for set in _string_format_compatibilities: @@ -106,9 +108,9 @@ def _compatible(a, b): return True return False - def _check_positional(results): + def _check_positional(results: list[tuple[str, str]]) -> bool: positional = None - for name, char in results: + for name, _char in results: if positional is None: positional = name is None else: @@ -144,7 +146,7 @@ def _check_positional(results): type_map = dict(a) for name, typechar in b: if name not in type_map: - raise TranslationError('unknown named placeholder %r' % name) + raise TranslationError(f'unknown named placeholder {name!r}') elif not _compatible(typechar, type_map[name]): raise TranslationError('incompatible format for ' 'placeholder %r: ' @@ -152,8 +154,8 @@ def _check_positional(results): (name, typechar, type_map[name])) -def _find_checkers(): - checkers = [] +def _find_checkers() -> list[Callable[[Catalog | None, Message], object]]: + checkers: list[Callable[[Catalog | None, Message], object]] = [] try: from pkg_resources import working_set except ImportError: @@ -168,4 +170,4 @@ def _find_checkers(): return checkers -checkers = _find_checkers() +checkers: list[Callable[[Catalog | None, Message], object]] = _find_checkers() diff --git a/babel/messages/extract.py b/babel/messages/extract.py index 74e57a181..5a34f645d 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -15,19 +15,66 @@ :copyright: (c) 2013-2022 by the Babel Team. :license: BSD, see LICENSE for more details. """ +from __future__ import annotations + import ast +import io import os -from os.path import relpath import sys -from tokenize import generate_tokens, COMMENT, NAME, OP, STRING +from collections.abc import ( + Callable, + Collection, + Generator, + Iterable, + Mapping, + MutableSequence, +) +from os.path import relpath +from textwrap import dedent +from tokenize import COMMENT, NAME, OP, STRING, generate_tokens +from typing import TYPE_CHECKING, Any from babel.util import parse_encoding, parse_future_flags, pathmatch -from textwrap import dedent +if TYPE_CHECKING: + from typing import IO, Protocol + + from _typeshed import SupportsItems, SupportsRead, SupportsReadline + from typing_extensions import Final, TypeAlias, TypedDict + + class _PyOptions(TypedDict, total=False): + encoding: str + + class _JSOptions(TypedDict, total=False): + encoding: str + jsx: bool + template_string: bool + parse_template_string: bool + + class _FileObj(SupportsRead[bytes], SupportsReadline[bytes], Protocol): + def seek(self, __offset: int, __whence: int = ...) -> int: ... + def tell(self) -> int: ... -GROUP_NAME = 'babel.extractors' + _Keyword: TypeAlias = tuple[int | tuple[int, int] | tuple[int, str], ...] | None -DEFAULT_KEYWORDS = { + # 5-tuple of (filename, lineno, messages, comments, context) + _FileExtractionResult: TypeAlias = tuple[str, int, str | tuple[str, ...], list[str], str | None] + + # 4-tuple of (lineno, message, comments, context) + _ExtractionResult: TypeAlias = tuple[int, str | tuple[str, ...], list[str], str | None] + + # Required arguments: fileobj, keywords, comment_tags, options + # Return value: Iterable of (lineno, message, comments, context) + _CallableExtractionMethod: TypeAlias = Callable[ + [_FileObj | IO[bytes], Mapping[str, _Keyword], Collection[str], Mapping[str, Any]], + Iterable[_ExtractionResult], + ] + + _ExtractionMethod: TypeAlias = _CallableExtractionMethod | str + +GROUP_NAME: Final[str] = 'babel.extractors' + +DEFAULT_KEYWORDS: dict[str, _Keyword] = { '_': None, 'gettext': None, 'ngettext': (1, 2), @@ -40,18 +87,14 @@ 'npgettext': ((1, 'c'), 2, 3) } -DEFAULT_MAPPING = [('**.py', 'python')] - -empty_msgid_warning = ( - '%s: warning: Empty msgid. It is reserved by GNU gettext: gettext("") ' - 'returns the header entry with meta information, not the empty string.') +DEFAULT_MAPPING: list[tuple[str, str]] = [('**.py', 'python')] -def _strip_comment_tags(comments, tags): +def _strip_comment_tags(comments: MutableSequence[str], tags: Iterable[str]): """Helper function for `extract` that strips comment tags from strings in a list of comment lines. This functions operates in-place. """ - def _strip(line): + def _strip(line: str): for tag in tags: if line.startswith(tag): return line[len(tag):].strip() @@ -59,22 +102,22 @@ def _strip(line): comments[:] = map(_strip, comments) -def default_directory_filter(dirpath): +def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool: subdir = os.path.basename(dirpath) # Legacy default behavior: ignore dot and underscore directories return not (subdir.startswith('.') or subdir.startswith('_')) def extract_from_dir( - dirname=None, - method_map=DEFAULT_MAPPING, - options_map=None, - keywords=DEFAULT_KEYWORDS, - comment_tags=(), - callback=None, - strip_comment_tags=False, - directory_filter=None, -): + dirname: str | os.PathLike[str] | None = None, + method_map: Iterable[tuple[str, str]] = DEFAULT_MAPPING, + options_map: SupportsItems[str, dict[str, Any]] | None = None, + keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS, + comment_tags: Collection[str] = (), + callback: Callable[[str, str, dict[str, Any]], object] | None = None, + strip_comment_tags: bool = False, + directory_filter: Callable[[str], bool] | None = None, +) -> Generator[_FileExtractionResult, None, None]: """Extract messages from any source files found in the given directory. This function generates tuples of the form ``(filename, lineno, message, @@ -174,9 +217,16 @@ def extract_from_dir( ) -def check_and_call_extract_file(filepath, method_map, options_map, - callback, keywords, comment_tags, - strip_comment_tags, dirpath=None): +def check_and_call_extract_file( + filepath: str | os.PathLike[str], + method_map: Iterable[tuple[str, str]], + options_map: SupportsItems[str, dict[str, Any]], + callback: Callable[[str, str, dict[str, Any]], object] | None, + keywords: Mapping[str, _Keyword], + comment_tags: Collection[str], + strip_comment_tags: bool, + dirpath: str | os.PathLike[str] | None = None, +) -> Generator[_FileExtractionResult, None, None]: """Checks if the given file matches an extraction method mapping, and if so, calls extract_from_file. Note that the extraction method mappings are based relative to dirpath. @@ -226,13 +276,19 @@ def check_and_call_extract_file(filepath, method_map, options_map, options=options, strip_comment_tags=strip_comment_tags ): - yield (filename, ) + message_tuple + yield (filename, *message_tuple) break -def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, - comment_tags=(), options=None, strip_comment_tags=False): +def extract_from_file( + method: _ExtractionMethod, + filename: str | os.PathLike[str], + keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS, + comment_tags: Collection[str] = (), + options: Mapping[str, Any] | None = None, + strip_comment_tags: bool = False, +) -> list[_ExtractionResult]: """Extract messages from a specific file. This function returns a list of tuples of the form ``(lineno, message, comments, context)``. @@ -259,8 +315,14 @@ def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, options, strip_comment_tags)) -def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), - options=None, strip_comment_tags=False): +def extract( + method: _ExtractionMethod, + fileobj: _FileObj, + keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS, + comment_tags: Collection[str] = (), + options: Mapping[str, Any] | None = None, + strip_comment_tags: bool = False, +) -> Generator[_ExtractionResult, None, None]: """Extract messages from the given file-like object using the specified extraction method. @@ -332,16 +394,13 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), func = builtin.get(method) if func is None: - raise ValueError('Unknown extraction method %r' % method) + raise ValueError(f"Unknown extraction method {method!r}") results = func(fileobj, keywords.keys(), comment_tags, options=options or {}) for lineno, funcname, messages, comments in results: - if funcname: - spec = keywords[funcname] or (1,) - else: - spec = (1,) + spec = keywords[funcname] or (1,) if funcname else (1,) if not isinstance(messages, (list, tuple)): messages = [messages] if not messages: @@ -377,9 +436,11 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), first_msg_index = spec[0] - 1 if not messages[first_msg_index]: # An empty string msgid isn't valid, emit a warning - where = '%s:%i' % (hasattr(fileobj, 'name') and - fileobj.name or '(unknown)', lineno) - sys.stderr.write((empty_msgid_warning % where) + '\n') + filename = (getattr(fileobj, "name", None) or "(unknown)") + sys.stderr.write( + f"{filename}:{lineno}: warning: Empty msgid. It is reserved by GNU gettext: gettext(\"\") " + f"returns the header entry with meta information, not the empty string.\n" + ) continue messages = tuple(msgs) @@ -391,14 +452,24 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), yield lineno, messages, comments, context -def extract_nothing(fileobj, keywords, comment_tags, options): +def extract_nothing( + fileobj: _FileObj, + keywords: Mapping[str, _Keyword], + comment_tags: Collection[str], + options: Mapping[str, Any], +) -> list[_ExtractionResult]: """Pseudo extractor that does not actually extract anything, but simply returns an empty list. """ return [] -def extract_python(fileobj, keywords, comment_tags, options): +def extract_python( + fileobj: IO[bytes], + keywords: Mapping[str, _Keyword], + comment_tags: Collection[str], + options: _PyOptions, +) -> Generator[_ExtractionResult, None, None]: """Extract messages from Python source code. It returns an iterator yielding tuples in the following form ``(lineno, @@ -466,12 +537,9 @@ def extract_python(fileobj, keywords, comment_tags, options): else: messages.append(None) - if len(messages) > 1: - messages = tuple(messages) - else: - messages = messages[0] - # Comments don't apply unless they immediately preceed the - # message + messages = tuple(messages) if len(messages) > 1 else messages[0] + # Comments don't apply unless they immediately + # precede the message if translator_comments and \ translator_comments[-1][0] < message_lineno - 1: translator_comments = [] @@ -511,7 +579,7 @@ def extract_python(fileobj, keywords, comment_tags, options): funcname = value -def _parse_python_string(value, encoding, future_flags): +def _parse_python_string(value: str, encoding: str, future_flags: int) -> str | None: # Unwrap quotes in a safe manner, maintaining the string's encoding # https://sourceforge.net/tracker/?func=detail&atid=355470&aid=617979&group_id=5470 code = compile( @@ -533,7 +601,13 @@ def _parse_python_string(value, encoding, future_flags): return None -def extract_javascript(fileobj, keywords, comment_tags, options): +def extract_javascript( + fileobj: _FileObj, + keywords: Mapping[str, _Keyword], + comment_tags: Collection[str], + options: _JSOptions, + lineno: int = 1, +) -> Generator[_ExtractionResult, None, None]: """Extract messages from JavaScript source code. :param fileobj: the seekable, file-like object the messages should be @@ -545,7 +619,11 @@ def extract_javascript(fileobj, keywords, comment_tags, options): :param options: a dictionary of additional options (optional) Supported options are: * `jsx` -- set to false to disable JSX/E4X support. - * `template_string` -- set to false to disable ES6 template string support. + * `template_string` -- if `True`, supports gettext(`key`) + * `parse_template_string` -- if `True` will parse the + contents of javascript + template strings. + :param lineno: line number offset (for parsing embedded fragments) """ from babel.messages.jslexer import Token, tokenize, unquote_string funcname = message_lineno = None @@ -557,12 +635,12 @@ def extract_javascript(fileobj, keywords, comment_tags, options): last_token = None call_stack = -1 dotted = any('.' in kw for kw in keywords) - for token in tokenize( fileobj.read().decode(encoding), jsx=options.get("jsx", True), template_string=options.get("template_string", True), - dotted=dotted + dotted=dotted, + lineno=lineno ): if ( # Turn keyword`foo` expressions into keyword("foo") calls: funcname and # have a keyword... @@ -574,7 +652,10 @@ def extract_javascript(fileobj, keywords, comment_tags, options): call_stack = 0 token = Token('operator', ')', token.lineno) - if token.type == 'operator' and token.value == '(': + if options.get('parse_template_string') and not funcname and token.type == 'template_string': + yield from parse_template_string(token.value, keywords, comment_tags, options, token.lineno) + + elif token.type == 'operator' and token.value == '(': if funcname: message_lineno = token.lineno call_stack += 1 @@ -592,7 +673,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options): break elif token.type == 'multilinecomment': - # only one multi-line comment may preceed a translation + # only one multi-line comment may precede a translation translator_comments = [] value = token.value[2:-2].strip() for comment_tag in comment_tags: @@ -666,3 +747,46 @@ def extract_javascript(fileobj, keywords, comment_tags, options): funcname = token.value last_token = token + + +def parse_template_string( + template_string: str, + keywords: Mapping[str, _Keyword], + comment_tags: Collection[str], + options: _JSOptions, + lineno: int = 1, +) -> Generator[_ExtractionResult, None, None]: + """Parse JavaScript template string. + + :param template_string: the template string to be parsed + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :param lineno: starting line number (optional) + """ + from babel.messages.jslexer import line_re + prev_character = None + level = 0 + inside_str = False + expression_contents = '' + for character in template_string[1:-1]: + if not inside_str and character in ('"', "'", '`'): + inside_str = character + elif inside_str == character and prev_character != r'\\': + inside_str = False + if level: + expression_contents += character + if not inside_str: + if character == '{' and prev_character == '$': + level += 1 + elif level and character == '}': + level -= 1 + if level == 0 and expression_contents: + expression_contents = expression_contents[0:-1] + fake_file_obj = io.BytesIO(expression_contents.encode()) + yield from extract_javascript(fake_file_obj, keywords, comment_tags, options, lineno) + lineno += len(line_re.findall(expression_contents)) + expression_contents = '' + prev_character = character diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 6e09d1095..5baefbbb3 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -8,6 +8,7 @@ :license: BSD, see LICENSE for more details. """ +import datetime import fnmatch import logging import optparse @@ -18,14 +19,19 @@ import tempfile from collections import OrderedDict from configparser import RawConfigParser -from datetime import datetime from io import StringIO +from typing import Iterable -from babel import __version__ as VERSION from babel import Locale, localedata +from babel import __version__ as VERSION from babel.core import UnknownLocaleError -from babel.messages.catalog import Catalog, DEFAULT_HEADER -from babel.messages.extract import DEFAULT_KEYWORDS, DEFAULT_MAPPING, check_and_call_extract_file, extract_from_dir +from babel.messages.catalog import DEFAULT_HEADER, Catalog +from babel.messages.extract import ( + DEFAULT_KEYWORDS, + DEFAULT_MAPPING, + check_and_call_extract_file, + extract_from_dir, +) from babel.messages.mofile import write_mo from babel.messages.pofile import read_po, write_po from babel.util import LOCALTZ @@ -38,15 +44,16 @@ distutils_log = log # "distutils.log → (no replacement yet)" try: - from setuptools.errors import OptionError, SetupError, BaseError + from setuptools.errors import BaseError, OptionError, SetupError except ImportError: # Error aliases only added in setuptools 59 (2021-11). OptionError = SetupError = BaseError = Exception except ImportError: from distutils import log as distutils_log from distutils.cmd import Command as _Command - from distutils.errors import DistutilsOptionError as OptionError, DistutilsSetupError as SetupError, DistutilsError as BaseError - + from distutils.errors import DistutilsError as BaseError + from distutils.errors import DistutilsOptionError as OptionError + from distutils.errors import DistutilsSetupError as SetupError def listify_value(arg, split=None): @@ -188,10 +195,10 @@ def finalize_options(self): def run(self): n_errors = 0 for domain in self.domain: - for catalog, errors in self._run_domain(domain).items(): + for errors in self._run_domain(domain).values(): n_errors += len(errors) if n_errors: - self.log.error('%d errors encountered.' % n_errors) + self.log.error('%d errors encountered.', n_errors) return (1 if n_errors else 0) def _run_domain(self, domain): @@ -203,19 +210,19 @@ def _run_domain(self, domain): po_files.append((self.locale, os.path.join(self.directory, self.locale, 'LC_MESSAGES', - domain + '.po'))) + f"{domain}.po"))) mo_files.append(os.path.join(self.directory, self.locale, 'LC_MESSAGES', - domain + '.mo')) + f"{domain}.mo")) else: for locale in os.listdir(self.directory): po_file = os.path.join(self.directory, locale, - 'LC_MESSAGES', domain + '.po') + 'LC_MESSAGES', f"{domain}.po") if os.path.exists(po_file): po_files.append((locale, po_file)) mo_files.append(os.path.join(self.directory, locale, 'LC_MESSAGES', - domain + '.mo')) + f"{domain}.mo")) else: po_files.append((self.locale, self.input_file)) if self.output_file: @@ -223,7 +230,7 @@ def _run_domain(self, domain): else: mo_files.append(os.path.join(self.directory, self.locale, 'LC_MESSAGES', - domain + '.mo')) + f"{domain}.mo")) if not po_files: raise OptionError('no message catalogs found') @@ -405,10 +412,7 @@ def finalize_options(self): 'input-dirs and input-paths are mutually exclusive' ) - if self.no_default_keywords: - keywords = {} - else: - keywords = DEFAULT_KEYWORDS.copy() + keywords = {} if self.no_default_keywords else DEFAULT_KEYWORDS.copy() keywords.update(parse_keywords(listify_value(self.keywords))) @@ -451,7 +455,7 @@ def finalize_options(self): for path in self.input_paths: if not os.path.exists(path): - raise OptionError("Input path: %s does not exist" % path) + raise OptionError(f"Input path: {path} does not exist") self.add_comments = listify_value(self.add_comments or (), ",") @@ -472,6 +476,27 @@ def finalize_options(self): else: self.directory_filter = None + def _build_callback(self, path: str): + def callback(filename: str, method: str, options: dict): + if method == 'ignore': + return + + # If we explicitly provide a full filepath, just use that. + # Otherwise, path will be the directory path and filename + # is the relative path from that dir to the file. + # So we can join those to get the full filepath. + if os.path.isfile(path): + filepath = path + else: + filepath = os.path.normpath(os.path.join(path, filename)) + + optstr = '' + if options: + opt_values = ", ".join(f'{k}="{v}"' for k, v in options.items()) + optstr = f" ({opt_values})" + self.log.info('extracting messages from %s%s', filepath, optstr) + return callback + def run(self): mappings = self._get_mappings() with open(self.output_file, 'wb') as outfile: @@ -483,25 +508,7 @@ def run(self): header_comment=(self.header_comment or DEFAULT_HEADER)) for path, method_map, options_map in mappings: - def callback(filename, method, options): - if method == 'ignore': - return - - # If we explicitly provide a full filepath, just use that. - # Otherwise, path will be the directory path and filename - # is the relative path from that dir to the file. - # So we can join those to get the full filepath. - if os.path.isfile(path): - filepath = path - else: - filepath = os.path.normpath(os.path.join(path, filename)) - - optstr = '' - if options: - optstr = ' (%s)' % ', '.join(['%s="%s"' % (k, v) for - k, v in options.items()]) - self.log.info('extracting messages from %s%s', filepath, optstr) - + callback = self._build_callback(path) if os.path.isfile(path): current_dir = os.getcwd() extracted = check_and_call_extract_file( @@ -640,7 +647,7 @@ def finalize_options(self): raise OptionError('you must specify the output directory') if not self.output_file: self.output_file = os.path.join(self.output_dir, self.locale, - 'LC_MESSAGES', self.domain + '.po') + 'LC_MESSAGES', f"{self.domain}.po") if not os.path.exists(os.path.dirname(self.output_file)): os.makedirs(os.path.dirname(self.output_file)) @@ -662,7 +669,7 @@ def run(self): catalog = read_po(infile, locale=self.locale) catalog.locale = self._locale - catalog.revision_date = datetime.now(LOCALTZ) + catalog.revision_date = datetime.datetime.now(LOCALTZ) catalog.fuzzy = False with open(self.output_file, 'wb') as outfile: @@ -782,12 +789,12 @@ def run(self): po_files.append((self.locale, os.path.join(self.output_dir, self.locale, 'LC_MESSAGES', - self.domain + '.po'))) + f"{self.domain}.po"))) else: for locale in os.listdir(self.output_dir): po_file = os.path.join(self.output_dir, locale, 'LC_MESSAGES', - self.domain + '.po') + f"{self.domain}.po") if os.path.exists(po_file): po_files.append((locale, po_file)) else: @@ -818,7 +825,7 @@ def run(self): catalog = read_po(infile, locale=self.locale) catalog.locale = self._locale - catalog.revision_date = datetime.now(LOCALTZ) + catalog.revision_date = datetime.datetime.now(LOCALTZ) catalog.fuzzy = False with open(filename, 'wb') as outfile: @@ -842,7 +849,7 @@ def run(self): omit_header=self.omit_header, ignore_obsolete=self.ignore_obsolete, include_previous=self.previous, width=self.width) - except: + except Exception: os.remove(tmpname) raise @@ -889,7 +896,7 @@ class CommandLineInterface: """ usage = '%%prog %s [options] %s' - version = '%%prog %s' % VERSION + version = f'%prog {VERSION}' commands = { 'compile': 'compile message catalogs to MO files', 'extract': 'extract messages from source files and generate a POT file', @@ -935,12 +942,10 @@ def run(self, argv=None): self._configure_logging(options.loglevel) if options.list_locales: identifiers = localedata.locale_identifiers() - longest = max(len(identifier) for identifier in identifiers) - identifiers.sort() - format = u'%%-%ds %%s' % (longest + 1) - for identifier in identifiers: + id_width = max(len(identifier) for identifier in identifiers) + 1 + for identifier in sorted(identifiers): locale = Locale.parse(identifier) - print(format % (identifier, locale.english_name)) + print(f"{identifier:<{id_width}} {locale.english_name}") return 0 if not args: @@ -949,7 +954,7 @@ def run(self, argv=None): cmdname = args[0] if cmdname not in self.commands: - self.parser.error('unknown command "%s"' % cmdname) + self.parser.error(f'unknown command "{cmdname}"') cmdinst = self._configure_command(cmdname, args[1:]) return cmdinst.run() @@ -972,11 +977,9 @@ def _configure_logging(self, loglevel): def _help(self): print(self.parser.format_help()) print("commands:") - longest = max(len(command) for command in self.commands) - format = " %%-%ds %%s" % max(8, longest + 1) - commands = sorted(self.commands.items()) - for name, description in commands: - print(format % (name, description)) + cmd_width = max(8, max(len(command) for command in self.commands) + 1) + for name, description in sorted(self.commands.items()): + print(f" {name:<{cmd_width}} {description}") def _configure_command(self, cmdname, argv): """ @@ -997,14 +1000,14 @@ def _configure_command(self, cmdname, argv): as_args = getattr(cmdclass, "as_args", ()) for long, short, help in cmdclass.user_options: name = long.strip("=") - default = getattr(cmdinst, name.replace('-', '_')) - strs = ["--%s" % name] + default = getattr(cmdinst, name.replace("-", "_")) + strs = [f"--{name}"] if short: - strs.append("-%s" % short) + strs.append(f"-{short}") strs.extend(cmdclass.option_aliases.get(name, ())) choices = cmdclass.option_choices.get(name, None) if name == as_args: - parser.usage += "<%s>" % name + parser.usage += f"<{name}>" elif name in cmdclass.boolean_options: parser.add_option(*strs, action="store_true", help=help) elif name in cmdclass.multiple_value_options: @@ -1105,7 +1108,7 @@ def parse_mapping(fileobj, filename=None): return method_map, options_map -def parse_keywords(strings=[]): +def parse_keywords(strings: Iterable[str] = ()): """Parse keywords specifications from the given list of strings. >>> kw = sorted(parse_keywords(['_', 'dgettext:2', 'dngettext:2,3', 'pgettext:1c,2']).items()) diff --git a/babel/messages/jslexer.py b/babel/messages/jslexer.py index 1264b2dbc..0563f6221 100644 --- a/babel/messages/jslexer.py +++ b/babel/messages/jslexer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ babel.messages.jslexer ~~~~~~~~~~~~~~~~~~~~~~ @@ -9,17 +8,20 @@ :copyright: (c) 2013-2022 by the Babel Team. :license: BSD, see LICENSE for more details. """ -from collections import namedtuple +from __future__ import annotations + import re +from collections.abc import Generator +from typing import NamedTuple -operators = sorted([ +operators: list[str] = sorted([ '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=', '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=', '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')', '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':' ], key=len, reverse=True) -escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'} +escapes: dict[str, str] = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'} name_re = re.compile(r'[\w$_][\w\d$_]*', re.UNICODE) dotted_name_re = re.compile(r'[\w$_][\w\d$_.]*[\w\d$_.]', re.UNICODE) @@ -30,9 +32,14 @@ uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}') hex_escape_re = re.compile(r'[a-fA-F0-9]{1,2}') -Token = namedtuple('Token', 'type value lineno') -_rules = [ +class Token(NamedTuple): + type: str + value: str + lineno: int + + +_rules: list[tuple[str | None, re.Pattern[str]]] = [ (None, re.compile(r'\s+', re.UNICODE)), (None, re.compile(r'