From 880b1d3b752760970ff7159805821fbfa92b631d Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Thu, 9 Apr 2026 22:13:08 +0200 Subject: [PATCH 1/2] Update some libs and tests to 3.14.4 --- Lib/_android_support.py | 7 ++++++ Lib/annotationlib.py | 19 +++++++++++--- Lib/argparse.py | 10 +++++--- Lib/glob.py | 27 +++++++++++++++++--- Lib/inspect.py | 3 ++- Lib/subprocess.py | 19 +++++++------- Lib/tempfile.py | 36 +++++++++++++-------------- Lib/test/test_annotationlib.py | 25 +++++++++++++++++++ Lib/test/test_argparse.py | 21 ++++++++++++++++ Lib/test/test_inspect/test_inspect.py | 3 +-- Lib/test/test_tempfile.py | 31 ++++++++++++++++++----- Lib/urllib/parse.py | 6 +++-- Lib/zipfile/__init__.py | 2 +- 13 files changed, 160 insertions(+), 49 deletions(-) diff --git a/Lib/_android_support.py b/Lib/_android_support.py index a439d03a144..320dab52acd 100644 --- a/Lib/_android_support.py +++ b/Lib/_android_support.py @@ -168,6 +168,13 @@ def write(self, prio, tag, message): # message. message = message.replace(b"\x00", b"\xc0\x80") + # On API level 30 and higher, Logcat will strip any number of leading + # newlines. This is visible in all `logcat` modes, even --binary. Work + # around this by adding a leading space, which shouldn't make any + # difference to the log's usability. + if message.startswith(b"\n"): + message = b" " + message + with self._lock: now = time() self._bucket_level += ( diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py index 832d160de7f..9fee2564114 100644 --- a/Lib/annotationlib.py +++ b/Lib/annotationlib.py @@ -919,7 +919,7 @@ def get_annotations( does not exist, the __annotate__ function is called. The FORWARDREF format uses __annotations__ if it exists and can be evaluated, and otherwise falls back to calling the __annotate__ function. - The SOURCE format tries __annotate__ first, and falls back to + The STRING format tries __annotate__ first, and falls back to using __annotations__, stringified using annotations_to_string(). This function handles several details for you: @@ -1037,13 +1037,26 @@ def get_annotations( obj_globals = obj_locals = unwrap = None if unwrap is not None: + # Use an id-based visited set to detect cycles in the __wrapped__ + # and functools.partial.func chain (e.g. f.__wrapped__ = f). + # On cycle detection we stop and use whatever __globals__ we have + # found so far, mirroring the approach of inspect.unwrap(). + _seen_ids = {id(unwrap)} while True: if hasattr(unwrap, "__wrapped__"): - unwrap = unwrap.__wrapped__ + candidate = unwrap.__wrapped__ + if id(candidate) in _seen_ids: + break + _seen_ids.add(id(candidate)) + unwrap = candidate continue if functools := sys.modules.get("functools"): if isinstance(unwrap, functools.partial): - unwrap = unwrap.func + candidate = unwrap.func + if id(candidate) in _seen_ids: + break + _seen_ids.add(id(candidate)) + unwrap = candidate continue break if hasattr(unwrap, "__globals__"): diff --git a/Lib/argparse.py b/Lib/argparse.py index 1d7d34f9924..8cf85694300 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -149,6 +149,10 @@ def _copy_items(items): return copy.copy(items) +def _identity(value): + return value + + # =============== # Formatting Help # =============== @@ -200,7 +204,7 @@ def _set_color(self, color): self._decolor = decolor else: self._theme = get_theme(force_no_color=True).argparse - self._decolor = lambda text: text + self._decolor = _identity # =============================== # Section and indentation methods @@ -1903,9 +1907,7 @@ def __init__(self, self._subparsers = None # register types - def identity(string): - return string - self.register('type', None, identity) + self.register('type', None, _identity) # add help argument if necessary # (using explicit default to override global argument_default) diff --git a/Lib/glob.py b/Lib/glob.py index f1a87c82fc5..7ce3998c27c 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -15,7 +15,7 @@ def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, include_hidden=False): - """Return a list of paths matching a pathname pattern. + """Return a list of paths matching a `pathname` pattern. The pattern may contain simple shell-style wildcards a la fnmatch. Unlike fnmatch, filenames starting with a @@ -25,6 +25,15 @@ def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, The order of the returned list is undefined. Sort it if you need a particular order. + If `root_dir` is not None, it should be a path-like object specifying the + root directory for searching. It has the same effect as changing the + current directory before calling it (without actually + changing it). If pathname is relative, the result will contain + paths relative to `root_dir`. + + If `dir_fd` is not None, it should be a file descriptor referring to a + directory, and paths will then be relative to that directory. + If `include_hidden` is true, the patterns '*', '?', '**' will match hidden directories. @@ -36,7 +45,7 @@ def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, include_hidden=False): - """Return an iterator which yields the paths matching a pathname pattern. + """Return an iterator which yields the paths matching a `pathname` pattern. The pattern may contain simple shell-style wildcards a la fnmatch. However, unlike fnmatch, filenames starting with a @@ -46,7 +55,19 @@ def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, The order of the returned paths is undefined. Sort them if you need a particular order. - If recursive is true, the pattern '**' will match any files and + If `root_dir` is not None, it should be a path-like object specifying + the root directory for searching. It has the same effect as changing + the current directory before calling it (without actually + changing it). If pathname is relative, the result will contain + paths relative to `root_dir`. + + If `dir_fd` is not None, it should be a file descriptor referring to a + directory, and paths will then be relative to that directory. + + If `include_hidden` is true, the patterns '*', '?', '**' will match hidden + directories. + + If `recursive` is true, the pattern '**' will match any files and zero or more directories and subdirectories. """ sys.audit("glob.glob", pathname, recursive) diff --git a/Lib/inspect.py b/Lib/inspect.py index 3cee85f39a6..2d229051b4d 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -2660,11 +2660,12 @@ class Parameter: The annotation for the parameter if specified. If the parameter has no annotation, this attribute is set to `Parameter.empty`. - * kind : str + * kind Describes how argument values are bound to the parameter. Possible values: `Parameter.POSITIONAL_ONLY`, `Parameter.POSITIONAL_OR_KEYWORD`, `Parameter.VAR_POSITIONAL`, `Parameter.KEYWORD_ONLY`, `Parameter.VAR_KEYWORD`. + Every value has a `description` attribute describing meaning. """ __slots__ = ('_name', '_kind', '_default', '_annotation') diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 578d7b95d05..52b7b711770 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -351,15 +351,16 @@ def _args_from_interpreter_flags(): # -X options if dev_mode: args.extend(('-X', 'dev')) - for opt in ('faulthandler', 'tracemalloc', 'importtime', - 'frozen_modules', 'showrefcount', 'utf8', 'gil'): - if opt in xoptions: - value = xoptions[opt] - if value is True: - arg = opt - else: - arg = '%s=%s' % (opt, value) - args.extend(('-X', arg)) + for opt in sorted(xoptions): + if opt == 'dev': + # handled above via sys.flags.dev_mode + continue + value = xoptions[opt] + if value is True: + arg = opt + else: + arg = '%s=%s' % (opt, value) + args.extend(('-X', arg)) return args diff --git a/Lib/tempfile.py b/Lib/tempfile.py index 5e3ccab5f48..a34e062f839 100644 --- a/Lib/tempfile.py +++ b/Lib/tempfile.py @@ -57,10 +57,11 @@ if hasattr(_os, 'O_BINARY'): _bin_openflags |= _os.O_BINARY -if hasattr(_os, 'TMP_MAX'): - TMP_MAX = _os.TMP_MAX -else: - TMP_MAX = 10000 +# This is more than enough. +# Each name contains over 40 random bits. Even with a million temporary +# files, the chance of a conflict is less than 1 in a million, and with +# 20 attempts, it is less than 1e-120. +TMP_MAX = 20 # This variable _was_ unused for legacy reasons, see issue 10354. # But as of 3.5 we actually use it at runtime so changing it would @@ -196,8 +197,7 @@ def _get_default_tempdir(dirlist=None): for dir in dirlist: if dir != _os.curdir: dir = _os.path.abspath(dir) - # Try only a few names per directory. - for seq in range(100): + for seq in range(TMP_MAX): name = next(namer) filename = _os.path.join(dir, name) try: @@ -213,10 +213,8 @@ def _get_default_tempdir(dirlist=None): except FileExistsError: pass except PermissionError: - # This exception is thrown when a directory with the chosen name - # already exists on windows. - if (_os.name == 'nt' and _os.path.isdir(dir) and - _os.access(dir, _os.W_OK)): + # See the comment in mkdtemp(). + if _os.name == 'nt' and _os.path.isdir(dir): continue break # no point trying more names in this directory except OSError: @@ -258,10 +256,8 @@ def _mkstemp_inner(dir, pre, suf, flags, output_type): except FileExistsError: continue # try again except PermissionError: - # This exception is thrown when a directory with the chosen name - # already exists on windows. - if (_os.name == 'nt' and _os.path.isdir(dir) and - _os.access(dir, _os.W_OK)): + # See the comment in mkdtemp(). + if _os.name == 'nt' and _os.path.isdir(dir) and seq < TMP_MAX - 1: continue else: raise @@ -386,10 +382,14 @@ def mkdtemp(suffix=None, prefix=None, dir=None): except FileExistsError: continue # try again except PermissionError: - # This exception is thrown when a directory with the chosen name - # already exists on windows. - if (_os.name == 'nt' and _os.path.isdir(dir) and - _os.access(dir, _os.W_OK)): + # On Posix, this exception is raised when the user has no + # write access to the parent directory. + # On Windows, it is also raised when a directory with + # the chosen name already exists, or if the parent directory + # is not a directory. + # We cannot distinguish between "directory-exists-error" and + # "access-denied-error". + if _os.name == 'nt' and _os.path.isdir(dir) and seq < TMP_MAX - 1: continue else: raise diff --git a/Lib/test/test_annotationlib.py b/Lib/test/test_annotationlib.py index e89d6c0b161..50cf8fcb6b4 100644 --- a/Lib/test/test_annotationlib.py +++ b/Lib/test/test_annotationlib.py @@ -646,6 +646,31 @@ def foo(): get_annotations(foo, format=Format.FORWARDREF, eval_str=True) get_annotations(foo, format=Format.STRING, eval_str=True) + def test_eval_str_wrapped_cycle_self(self): + # gh-146556: self-referential __wrapped__ cycle must not hang. + def f(x: 'int') -> 'str': ... + f.__wrapped__ = f + # Cycle is detected and broken; globals from f itself are used. + result = get_annotations(f, eval_str=True) + self.assertEqual(result, {'x': int, 'return': str}) + + def test_eval_str_wrapped_cycle_mutual(self): + # gh-146556: mutual __wrapped__ cycle (a -> b -> a) must not hang. + def a(x: 'int'): ... + def b(): ... + a.__wrapped__ = b + b.__wrapped__ = a + result = get_annotations(a, eval_str=True) + self.assertEqual(result, {'x': int}) + + def test_eval_str_wrapped_chain_no_cycle(self): + # gh-146556: a valid (non-cyclic) __wrapped__ chain must still work. + def inner(x: 'int'): ... + def outer(x: 'int'): ... + outer.__wrapped__ = inner + result = get_annotations(outer, eval_str=True) + self.assertEqual(result, {'x': int}) + def test_stock_annotations(self): def foo(a: int, b: str): pass diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index f48fb765bb3..8331d021813 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -81,6 +81,27 @@ def test_skip_invalid_stdout(self): self.assertRegex(mocked_stderr.getvalue(), r'usage:') +class TestArgumentParserPickleable(unittest.TestCase): + + @force_not_colorized + def test_pickle_roundtrip(self): + import pickle + parser = argparse.ArgumentParser(exit_on_error=False) + parser.add_argument('--foo', type=int, default=42) + parser.add_argument('bar', nargs='?', default='baz') + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(protocol=proto): + # Try to pickle and unpickle the parser + parser2 = pickle.loads(pickle.dumps(parser, protocol=proto)) + # Check that the round-tripped parser still works + ns = parser2.parse_args(['--foo', '123', 'quux']) + self.assertEqual(ns.foo, 123) + self.assertEqual(ns.bar, 'quux') + ns2 = parser2.parse_args([]) + self.assertEqual(ns2.foo, 42) + self.assertEqual(ns2.bar, 'baz') + + class TestCase(unittest.TestCase): def setUp(self): diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index 431c722482b..ea32d11b637 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -6234,8 +6234,7 @@ def test_operator_module_has_signatures(self): def test_os_module_has_signatures(self): unsupported_signature = {'chmod', 'utime'} unsupported_signature |= {name for name in - ['get_terminal_size', 'link', 'posix_spawn', 'posix_spawnp', - 'register_at_fork', 'startfile'] + ['get_terminal_size', 'link', 'register_at_fork', 'startfile'] if hasattr(os, name)} self._test_module_has_signatures(os, unsupported_signature=unsupported_signature) diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py index f01e5dc7fb1..1ae8f6b3b25 100644 --- a/Lib/test/test_tempfile.py +++ b/Lib/test/test_tempfile.py @@ -330,17 +330,36 @@ def _mock_candidate_names(*names): class TestBadTempdir: def test_read_only_directory(self): with _inside_empty_temp_dir(): - oldmode = mode = os.stat(tempfile.tempdir).st_mode - mode &= ~(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH) - os.chmod(tempfile.tempdir, mode) + probe = os.path.join(tempfile.tempdir, 'probe') + if os.name == 'nt': + cmd = ['icacls', tempfile.tempdir, '/deny', 'Everyone:(W)'] + stdout = None if support.verbose > 1 else subprocess.DEVNULL + subprocess.run(cmd, check=True, stdout=stdout) + else: + oldmode = mode = os.stat(tempfile.tempdir).st_mode + mode &= ~(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH) + mode = stat.S_IREAD + os.chmod(tempfile.tempdir, mode) try: - if os.access(tempfile.tempdir, os.W_OK): + # Check that the directory is read-only. + try: + os.mkdir(probe) + except PermissionError: + pass + else: + os.rmdir(probe) self.skipTest("can't set the directory read-only") + # gh-66305: Now it takes a split second, but previously + # it took about 10 days on Windows. with self.assertRaises(PermissionError): self.make_temp() - self.assertEqual(os.listdir(tempfile.tempdir), []) finally: - os.chmod(tempfile.tempdir, oldmode) + if os.name == 'nt': + cmd = ['icacls', tempfile.tempdir, '/grant:r', 'Everyone:(M)'] + subprocess.run(cmd, check=True, stdout=stdout) + else: + os.chmod(tempfile.tempdir, oldmode) + self.assertEqual(os.listdir(tempfile.tempdir), []) def test_nonexisting_directory(self): with _inside_empty_temp_dir(): diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 67d9bbea0d3..a651e815ddc 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -1,6 +1,6 @@ """Parse (absolute and relative) URLs. -urlparse module is based upon the following RFC specifications. +urllib.parse module is based upon the following RFC specifications. RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding and L. Masinter, January 2005. @@ -20,7 +20,7 @@ McCahill, December 1994 RFC 3986 is considered the current standard and any future changes to -urlparse module should conform with it. The urlparse module is +urllib.parse module should conform with it. The urllib.parse module is currently not entirely compliant with this RFC due to defacto scenarios for parsing, and for backward compatibility purposes, some parsing quirks from older RFCs are retained. The testcases in @@ -390,6 +390,8 @@ def urlparse(url, scheme='', allow_fragments=True): path or query. Note that % escapes are not expanded. + + urlsplit() should generally be used instead of urlparse(). """ url, scheme, _coerce_result = _coerce_args(url, scheme) scheme, netloc, url, params, query, fragment = _urlparse(url, scheme, allow_fragments) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index ac2332e5846..19aea290b58 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -950,7 +950,7 @@ class ZipExtFile(io.BufferedIOBase): """ # Max size supported by decompressor. - MAX_N = 1 << 31 - 1 + MAX_N = (1 << 31) - 1 # Read from compressed files in 4k blocks. MIN_READ_SIZE = 4096 From c1c3e6e64d146c03e38640ac9ac2a5128fb2cfb3 Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Thu, 9 Apr 2026 22:16:49 +0200 Subject: [PATCH 2/2] Update `pydoc_data` --- Lib/pydoc_data/module_docs.py | 2 +- Lib/pydoc_data/topics.py | 900 +++++++++++++++++++++------------- 2 files changed, 568 insertions(+), 334 deletions(-) diff --git a/Lib/pydoc_data/module_docs.py b/Lib/pydoc_data/module_docs.py index 2a6ede3aa14..d65837838d1 100644 --- a/Lib/pydoc_data/module_docs.py +++ b/Lib/pydoc_data/module_docs.py @@ -1,4 +1,4 @@ -# Autogenerated by Sphinx on Tue Feb 3 17:32:13 2026 +# Autogenerated by Sphinx on Tue Apr 7 16:13:12 2026 # as part of the release process. module_docs = { diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index 4e31cf08bb5..6dca99ce9b1 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -1,4 +1,4 @@ -# Autogenerated by Sphinx on Tue Feb 3 17:32:13 2026 +# Autogenerated by Sphinx on Tue Apr 7 16:13:12 2026 # as part of the release process. topics = { @@ -46,11 +46,10 @@ | "[" [target_list] "]" | attributeref | subscription - | slicing | "*" target -(See section Primaries for the syntax definitions for *attributeref*, -*subscription*, and *slicing*.) +(See section Primaries for the syntax definitions for *attributeref* +and *subscription*.) An assignment statement evaluates the expression list (remember that this can be a single expression or a comma-separated list, the latter @@ -59,12 +58,11 @@ Assignment is defined recursively depending on the form of the target (list). When a target is part of a mutable object (an attribute -reference, subscription or slicing), the mutable object must -ultimately perform the assignment and decide about its validity, and -may raise an exception if the assignment is unacceptable. The rules -observed by various types and the exceptions raised are given with the -definition of the object types (see section The standard type -hierarchy). +reference or subscription), the mutable object must ultimately perform +the assignment and decide about its validity, and may raise an +exception if the assignment is unacceptable. The rules observed by +various types and the exceptions raised are given with the definition +of the object types (see section The standard type hierarchy). Assignment of an object to a target list, optionally enclosed in parentheses or square brackets, is recursively defined as follows. @@ -130,9 +128,13 @@ class Cls: attributes, such as properties created with "property()". * If the target is a subscription: The primary expression in the - reference is evaluated. It should yield either a mutable sequence - object (such as a list) or a mapping object (such as a dictionary). - Next, the subscript expression is evaluated. + reference is evaluated. Next, the subscript expression is evaluated. + Then, the primary’s "__setitem__()" method is called with two + arguments: the subscript and the assigned object. + + Typically, "__setitem__()" is defined on mutable sequence objects + (such as lists) and mapping objects (such as dictionaries), and + behaves as follows. If the primary is a mutable sequence object (such as a list), the subscript must yield an integer. If it is negative, the sequence’s @@ -149,27 +151,17 @@ class Cls: existing key/value pair with the same key value, or insert a new key/value pair (if no key with the same value existed). - For user-defined objects, the "__setitem__()" method is called with - appropriate arguments. - -* If the target is a slicing: The primary expression in the reference - is evaluated. It should yield a mutable sequence object (such as a - list). The assigned object should be a sequence object of the same - type. Next, the lower and upper bound expressions are evaluated, - insofar they are present; defaults are zero and the sequence’s - length. The bounds should evaluate to integers. If either bound is - negative, the sequence’s length is added to it. The resulting - bounds are clipped to lie between zero and the sequence’s length, - inclusive. Finally, the sequence object is asked to replace the - slice with the items of the assigned sequence. The length of the - slice may be different from the length of the assigned sequence, - thus changing the length of the target sequence, if the target - sequence allows it. - -**CPython implementation detail:** In the current implementation, the -syntax for targets is taken to be the same as for expressions, and -invalid syntax is rejected during the code generation phase, causing -less detailed error messages. + If the target is a slicing: The primary expression should evaluate + to a mutable sequence object (such as a list). The assigned object + should be *iterable*. The slicing’s lower and upper bounds should be + integers; if they are "None" (or not present), the defaults are zero + and the sequence’s length. If either bound is negative, the + sequence’s length is added to it. The resulting bounds are clipped + to lie between zero and the sequence’s length, inclusive. Finally, + the sequence object is asked to replace the slice with the items of + the assigned sequence. The length of the slice may be different + from the length of the assigned sequence, thus changing the length + of the target sequence, if the target sequence allows it. Although the definition of assignment implies that overlaps between the left-hand side and the right-hand side are ‘simultaneous’ (for @@ -196,7 +188,7 @@ class Cls: binary operation and an assignment statement: augmented_assignment_stmt: augtarget augop (expression_list | yield_expression) - augtarget: identifier | attributeref | subscription | slicing + augtarget: identifier | attributeref | subscription augop: "+=" | "-=" | "*=" | "@=" | "/=" | "//=" | "%=" | "**=" | ">>=" | "<<=" | "&=" | "^=" | "|=" @@ -369,13 +361,12 @@ async def func(param1, param2): Is semantically equivalent to: - iter = (ITER) - iter = type(iter).__aiter__(iter) + iter = (ITER).__aiter__() running = True while running: try: - TARGET = await type(iter).__anext__(iter) + TARGET = await iter.__anext__() except StopAsyncIteration: running = False else: @@ -383,7 +374,8 @@ async def func(param1, param2): else: SUITE2 -See also "__aiter__()" and "__anext__()" for details. +except that implicit special method lookup is used for "__aiter__()" +and "__anext__()". It is a "SyntaxError" to use an "async for" statement outside the body of a coroutine function. @@ -405,9 +397,9 @@ async def func(param1, param2): is semantically equivalent to: manager = (EXPRESSION) - aenter = type(manager).__aenter__ - aexit = type(manager).__aexit__ - value = await aenter(manager) + aenter = manager.__aenter__ + aexit = manager.__aexit__ + value = await aenter() hit_except = False try: @@ -415,13 +407,14 @@ async def func(param1, param2): SUITE except: hit_except = True - if not await aexit(manager, *sys.exc_info()): + if not await aexit(*sys.exc_info()): raise finally: if not hit_except: - await aexit(manager, None, None, None) + await aexit(None, None, None) -See also "__aenter__()" and "__aexit__()" for details. +except that implicit special method lookup is used for "__aenter__()" +and "__aexit__()". It is a "SyntaxError" to use an "async with" statement outside the body of a coroutine function. @@ -489,16 +482,34 @@ async def func(param1, param2): 'atom-literals': r'''Literals ******** -Python supports string and bytes literals and various numeric -literals: +A *literal* is a textual representation of a value. Python supports +numeric, string and bytes literals. Format strings and template +strings are treated as string literals. + +Numeric literals consist of a single "NUMBER" token, which names an +integer, floating-point number, or an imaginary number. See the +Numeric literals section in Lexical analysis documentation for +details. + +String and bytes literals may consist of several tokens. See section +String literal concatenation for details. + +Note that negative and complex numbers, like "-3" or "3+4.2j", are +syntactically not literals, but unary or binary arithmetic operations +involving the "-" or "+" operator. + +Evaluation of a literal yields an object of the given type ("int", +"float", "complex", "str", "bytes", or "Template") with the given +value. The value may be approximated in the case of floating-point and +imaginary literals. + +The formal grammar for literals is: literal: strings | NUMBER -Evaluation of a literal yields an object of the given type (string, -bytes, integer, floating-point number, complex number) with the given -value. The value may be approximated in the case of floating-point -and imaginary (complex) literals. See section Literals for details. -See section String literal concatenation for details on "strings". + +Literals and object identity +============================ All literals correspond to immutable data types, and hence the object’s identity is less important than its value. Multiple @@ -506,21 +517,53 @@ async def func(param1, param2): occurrence in the program text or a different occurrence) may obtain the same object or a different object with the same value. +CPython implementation detail: For example, in CPython, *small* +integers with the same value evaluate to the same object: + + >>> x = 7 + >>> y = 7 + >>> x is y + True + +However, large integers evaluate to different objects: + + >>> x = 123456789 + >>> y = 123456789 + >>> x is y + False + +This behavior may change in future versions of CPython. In particular, +the boundary between “small” and “large” integers has already changed +in the past.CPython will emit a "SyntaxWarning" when you compare +literals using "is": + + >>> x = 7 + >>> x is 7 + :1: SyntaxWarning: "is" with 'int' literal. Did you mean "=="? + True + +See When can I rely on identity tests with the is operator? for more +information. + +Template strings are immutable but may reference mutable objects as +"Interpolation" values. For the purposes of this section, two +t-strings have the “same value” if both their structure and the +*identity* of the values match. + +**CPython implementation detail:** Currently, each evaluation of a +template string results in a different object. + String literal concatenation ============================ -Multiple adjacent string or bytes literals (delimited by whitespace), -possibly using different quoting conventions, are allowed, and their -meaning is the same as their concatenation: +Multiple adjacent string or bytes literals, possibly using different +quoting conventions, are allowed, and their meaning is the same as +their concatenation: >>> "hello" 'world' "helloworld" -Formally: - - strings: ( STRING | fstring)+ | tstring+ - This feature is defined at the syntactical level, so it only works with literals. To concatenate string expressions at run time, the ‘+’ operator may be used: @@ -551,6 +594,10 @@ async def func(param1, param2): >>> t"Hello" t"{name}!" Template(strings=('Hello', '!'), interpolations=(...)) + +Formally: + + strings: (STRING | fstring)+ | tstring+ ''', 'attribute-access': r'''Customizing attribute access **************************** @@ -916,7 +963,7 @@ class derived from a ""variable-length" built-in type" such as binary operation and an assignment statement: augmented_assignment_stmt: augtarget augop (expression_list | yield_expression) - augtarget: identifier | attributeref | subscription | slicing + augtarget: identifier | attributeref | subscription augop: "+=" | "-=" | "*=" | "@=" | "/=" | "//=" | "%=" | "**=" | ">>=" | "<<=" | "&=" | "^=" | "|=" @@ -1010,7 +1057,7 @@ class and instance attributes applies as for regular assignments. The "%" (modulo) operator yields the remainder from the division of the first argument by the second. The numeric arguments are first -converted to a common type. A zero right argument raises the +converted to a common type. A zero right argument raises the "ZeroDivisionError" exception. The arguments may be floating-point numbers, e.g., "3.14%0.7" equals "0.34" (since "3.14" equals "4*0.7 + 0.34".) The modulo operator always yields a result with the same sign @@ -2120,9 +2167,9 @@ def foo(): is semantically equivalent to: manager = (EXPRESSION) - enter = type(manager).__enter__ - exit = type(manager).__exit__ - value = enter(manager) + enter = manager.__enter__ + exit = manager.__exit__ + value = enter() hit_except = False try: @@ -2130,11 +2177,14 @@ def foo(): SUITE except: hit_except = True - if not exit(manager, *sys.exc_info()): + if not exit(*sys.exc_info()): raise finally: if not hit_except: - exit(manager, None, None, None) + exit(None, None, None) + +except that implicit special method lookup is used for "__enter__()" +and "__exit__()". With more than one item, the context managers are processed as if multiple "with" statements were nested: @@ -3066,13 +3116,12 @@ async def func(param1, param2): Is semantically equivalent to: - iter = (ITER) - iter = type(iter).__aiter__(iter) + iter = (ITER).__aiter__() running = True while running: try: - TARGET = await type(iter).__anext__(iter) + TARGET = await iter.__anext__() except StopAsyncIteration: running = False else: @@ -3080,7 +3129,8 @@ async def func(param1, param2): else: SUITE2 -See also "__aiter__()" and "__anext__()" for details. +except that implicit special method lookup is used for "__aiter__()" +and "__anext__()". It is a "SyntaxError" to use an "async for" statement outside the body of a coroutine function. @@ -3102,9 +3152,9 @@ async def func(param1, param2): is semantically equivalent to: manager = (EXPRESSION) - aenter = type(manager).__aenter__ - aexit = type(manager).__aexit__ - value = await aenter(manager) + aenter = manager.__aenter__ + aexit = manager.__aexit__ + value = await aenter() hit_except = False try: @@ -3112,13 +3162,14 @@ async def func(param1, param2): SUITE except: hit_except = True - if not await aexit(manager, *sys.exc_info()): + if not await aexit(*sys.exc_info()): raise finally: if not hit_except: - await aexit(manager, None, None, None) + await aexit(None, None, None) -See also "__aenter__()" and "__aexit__()" for details. +except that implicit special method lookup is used for "__aenter__()" +and "__aexit__()". It is a "SyntaxError" to use an "async with" statement outside the body of a coroutine function. @@ -3529,19 +3580,13 @@ def f() -> annotation: ... When a description of an arithmetic operator below uses the phrase “the numeric arguments are converted to a common real type”, this -means that the operator implementation for built-in types works as -follows: - -* If both arguments are complex numbers, no conversion is performed; - -* if either argument is a complex or a floating-point number, the - other is converted to a floating-point number; +means that the operator implementation for built-in numeric types +works as described in the Numeric Types section of the standard +library documentation. -* otherwise, both must be integers and no conversion is necessary. - -Some additional rules apply for certain operators (e.g., a string as a -left argument to the ‘%’ operator). Extensions must define their own -conversion behavior. +Some additional rules apply for certain operators and non-numeric +operands (for example, a string as a left argument to the "%" +operator). Extensions must define their own conversion behavior. ''', 'customization': r'''Basic customization ******************* @@ -3698,7 +3743,7 @@ def f() -> annotation: ... formatting to one of the built-in types, or use a similar formatting option syntax. - See Format Specification Mini-Language for a description of the + See Format specification mini-language for a description of the standard formatting syntax. The return value must be a string object. @@ -3835,7 +3880,7 @@ def __hash__(self): intended to provide protection against a denial-of-service caused by carefully chosen inputs that exploit the worst case performance of a dict insertion, *O*(*n*^2) complexity. See - http://ocert.org/advisories/ocert-2011-003.html for + https://ocert.org/advisories/ocert-2011-003.html for details.Changing hash values affects the iteration order of sets. Python has never made guarantees about this ordering (and it typically varies between 32-bit and 64-bit builds).See also @@ -4685,8 +4730,8 @@ def inner(x): statement in the same code block. Trying to delete an unbound name raises a "NameError" exception. -Deletion of attribute references, subscriptions and slicings is passed -to the primary object involved; deletion of a slicing is in general +Deletion of attribute references and subscriptions is passed to the +primary object involved; deletion of a slicing is in general equivalent to assignment of an empty slice of the right type (but even this is determined by the sliced object). @@ -5481,7 +5526,7 @@ class of the instance or a *non-virtual base class* thereof. The Changed in version 3.11: Starred elements are now allowed in the expression list. ''', - 'formatstrings': r'''Format String Syntax + 'formatstrings': r'''Format string syntax ******************** The "str.format()" method and the "Formatter" class share the same @@ -5516,7 +5561,7 @@ class of the instance or a *non-virtual base class* thereof. The preceded by a colon "':'". These specify a non-default format for the replacement value. -See also the Format Specification Mini-Language section. +See also the Format specification mini-language section. The *field_name* itself begins with an *arg_name* that is either a number or a keyword. If it’s a number, it refers to a positional @@ -5584,12 +5629,12 @@ class of the instance or a *non-virtual base class* thereof. The See the Format examples section for some examples. -Format Specification Mini-Language +Format specification mini-language ================================== “Format specifications” are used within replacement fields contained within a format string to define how individual values are presented -(see Format String Syntax, f-strings, and t-strings). They can also be +(see Format string syntax, f-strings, and t-strings). They can also be passed directly to the built-in "format()" function. Each formattable type may define how the format specification is to be interpreted. @@ -5996,8 +6041,8 @@ class of the instance or a *non-virtual base class* thereof. The Using type-specific formatting: - >>> import datetime - >>> d = datetime.datetime(2010, 7, 4, 12, 15, 58) + >>> import datetime as dt + >>> d = dt.datetime(2010, 7, 4, 12, 15, 58) >>> '{:%Y-%m-%d %H:%M:%S}'.format(d) '2010-07-04 12:15:58' @@ -6405,8 +6450,8 @@ def whats_on_the_telly(penguin=None): remaining characters must be in the “letter- and digit-like” set "xid_continue". -These sets based on the *XID_Start* and *XID_Continue* sets as defined -by the Unicode standard annex UAX-31. Python’s "xid_start" +These sets are based on the *XID_Start* and *XID_Continue* sets as +defined by the Unicode standard annex UAX-31. Python’s "xid_start" additionally includes the underscore ("_"). Note that Python does not necessarily conform to UAX-31. @@ -6614,7 +6659,9 @@ def whats_on_the_telly(penguin=None): The *public names* defined by a module are determined by checking the module’s namespace for a variable named "__all__"; if defined, it must be a sequence of strings which are names defined or imported by that -module. The names given in "__all__" are all considered public and +module. Names containing non-ASCII characters must be in the +normalization form NFKC; see Non-ASCII characters in names for +details. The names given in "__all__" are all considered public and are required to exist. If "__all__" is not defined, the set of public names includes all names found in the module’s namespace which do not begin with an underscore character ("'_'"). "__all__" should contain @@ -7620,8 +7667,8 @@ class that has an "__rsub__()" method, "type(y).__rsub__(y, x)" is | value...}", "{expressions...}" | list display, dictionary display, set | | | display | +-------------------------------------------------+---------------------------------------+ -| "x[index]", "x[index:index]", | Subscription, slicing, call, | -| "x(arguments...)", "x.attribute" | attribute reference | +| "x[index]", "x[index:index]" "x(arguments...)", | Subscription (including slicing), | +| "x.attribute" | call, attribute reference | +-------------------------------------------------+---------------------------------------+ | "await x" | Await expression | +-------------------------------------------------+---------------------------------------+ @@ -7738,8 +7785,8 @@ class C: pass # a class with no methods (yet) The power operator has the same semantics as the built-in "pow()" function, when called with two arguments: it yields its left argument -raised to the power of its right argument. The numeric arguments are -first converted to a common type, and the result is of that type. +raised to the power of its right argument. Numeric arguments are first +converted to a common type, and the result is of that type. For int operands, the result has the same type as the operands unless the second argument is negative; in that case, all arguments are @@ -7945,35 +7992,46 @@ class C: pass # a class with no methods (yet) Added in version 3.4. -Note: +object.__getitem__(self, subscript) - Slicing is done exclusively with the following three methods. A - call like + Called to implement *subscription*, that is, "self[subscript]". See + Subscriptions and slicings for details on the syntax. - a[1:2] = b + There are two types of built-in objects that support subscription + via "__getitem__()": - is translated to + * **sequences**, where *subscript* (also called *index*) should be + an integer or a "slice" object. See the sequence documentation + for the expected behavior, including handling "slice" objects and + negative indices. - a[slice(1, 2, None)] = b + * **mappings**, where *subscript* is also called the *key*. See + mapping documentation for the expected behavior. - and so forth. Missing slice items are always filled in with "None". + If *subscript* is of an inappropriate type, "__getitem__()" should + raise "TypeError". If *subscript* has an inappropriate value, + "__getitem__()" should raise an "LookupError" or one of its + subclasses ("IndexError" for sequences; "KeyError" for mappings). + + Note: -object.__getitem__(self, key) + Slicing is handled by "__getitem__()", "__setitem__()", and + "__delitem__()". A call like - Called to implement evaluation of "self[key]". For *sequence* - types, the accepted keys should be integers. Optionally, they may - support "slice" objects as well. Negative index support is also - optional. If *key* is of an inappropriate type, "TypeError" may be - raised; if *key* is a value outside the set of indexes for the - sequence (after any special interpretation of negative values), - "IndexError" should be raised. For *mapping* types, if *key* is - missing (not in the container), "KeyError" should be raised. + a[1:2] = b + + is translated to + + a[slice(1, 2, None)] = b + + and so forth. Missing slice items are always filled in with + "None". Note: - "for" loops expect that an "IndexError" will be raised for - illegal indexes to allow proper detection of the end of the - sequence. + The sequence iteration protocol (used, for example, in "for" + loops), expects that an "IndexError" will be raised for illegal + indexes to allow proper detection of the end of a sequence. Note: @@ -8063,37 +8121,40 @@ class C: pass # a class with no methods (yet) 'slicings': r'''Slicings ******** -A slicing selects a range of items in a sequence object (e.g., a -string, tuple or list). Slicings may be used as expressions or as -targets in assignment or "del" statements. The syntax for a slicing: - - slicing: primary "[" slice_list "]" - slice_list: slice_item ("," slice_item)* [","] - slice_item: expression | proper_slice - proper_slice: [lower_bound] ":" [upper_bound] [ ":" [stride] ] - lower_bound: expression - upper_bound: expression - stride: expression - -There is ambiguity in the formal syntax here: anything that looks like -an expression list also looks like a slice list, so any subscription -can be interpreted as a slicing. Rather than further complicating the -syntax, this is disambiguated by defining that in this case the -interpretation as a subscription takes priority over the -interpretation as a slicing (this is the case if the slice list -contains no proper slice). - -The semantics for a slicing are as follows. The primary is indexed -(using the same "__getitem__()" method as normal subscription) with a -key that is constructed from the slice list, as follows. If the slice -list contains at least one comma, the key is a tuple containing the -conversion of the slice items; otherwise, the conversion of the lone -slice item is the key. The conversion of a slice item that is an -expression is that expression. The conversion of a proper slice is a -slice object (see section The standard type hierarchy) whose "start", -"stop" and "step" attributes are the values of the expressions given -as lower bound, upper bound and stride, respectively, substituting -"None" for missing expressions. +A more advanced form of subscription, *slicing*, is commonly used to +extract a portion of a sequence. In this form, the subscript is a +*slice*: up to three expressions separated by colons. Any of the +expressions may be omitted, but a slice must contain at least one +colon: + + >>> number_names = ['zero', 'one', 'two', 'three', 'four', 'five'] + >>> number_names[1:3] + ['one', 'two'] + >>> number_names[1:] + ['one', 'two', 'three', 'four', 'five'] + >>> number_names[:3] + ['zero', 'one', 'two'] + >>> number_names[:] + ['zero', 'one', 'two', 'three', 'four', 'five'] + >>> number_names[::2] + ['zero', 'two', 'four'] + >>> number_names[:-3] + ['zero', 'one', 'two'] + >>> del number_names[4:] + >>> number_names + ['zero', 'one', 'two', 'three'] + +When a slice is evaluated, the interpreter constructs a "slice" object +whose "start", "stop" and "step" attributes, respectively, are the +results of the expressions between the colons. Any missing expression +evaluates to "None". This "slice" object is then passed to the +"__getitem__()" or "__class_getitem__()" *special method*, as above. + + # continuing with the SubscriptionDemo instance defined above: + >>> demo[2:3] + subscripted with: slice(2, 3, None) + >>> demo[::'spam'] + subscripted with: slice(None, None, 'spam') ''', 'specialattrs': r'''Special Attributes ****************** @@ -8314,7 +8375,7 @@ class C: pass # a class with no methods (yet) formatting to one of the built-in types, or use a similar formatting option syntax. - See Format Specification Mini-Language for a description of the + See Format specification mini-language for a description of the standard formatting syntax. The return value must be a string object. @@ -8451,7 +8512,7 @@ def __hash__(self): intended to provide protection against a denial-of-service caused by carefully chosen inputs that exploit the worst case performance of a dict insertion, *O*(*n*^2) complexity. See - http://ocert.org/advisories/ocert-2011-003.html for + https://ocert.org/advisories/ocert-2011-003.html for details.Changing hash values affects the iteration order of sets. Python has never made guarantees about this ordering (and it typically varies between 32-bit and 64-bit builds).See also @@ -9310,35 +9371,46 @@ class of a class is known as that class’s *metaclass*, and most Added in version 3.4. -Note: +object.__getitem__(self, subscript) + + Called to implement *subscription*, that is, "self[subscript]". See + Subscriptions and slicings for details on the syntax. - Slicing is done exclusively with the following three methods. A - call like + There are two types of built-in objects that support subscription + via "__getitem__()": - a[1:2] = b + * **sequences**, where *subscript* (also called *index*) should be + an integer or a "slice" object. See the sequence documentation + for the expected behavior, including handling "slice" objects and + negative indices. + + * **mappings**, where *subscript* is also called the *key*. See + mapping documentation for the expected behavior. + + If *subscript* is of an inappropriate type, "__getitem__()" should + raise "TypeError". If *subscript* has an inappropriate value, + "__getitem__()" should raise an "LookupError" or one of its + subclasses ("IndexError" for sequences; "KeyError" for mappings). + + Note: - is translated to + Slicing is handled by "__getitem__()", "__setitem__()", and + "__delitem__()". A call like - a[slice(1, 2, None)] = b + a[1:2] = b - and so forth. Missing slice items are always filled in with "None". + is translated to -object.__getitem__(self, key) + a[slice(1, 2, None)] = b - Called to implement evaluation of "self[key]". For *sequence* - types, the accepted keys should be integers. Optionally, they may - support "slice" objects as well. Negative index support is also - optional. If *key* is of an inappropriate type, "TypeError" may be - raised; if *key* is a value outside the set of indexes for the - sequence (after any special interpretation of negative values), - "IndexError" should be raised. For *mapping* types, if *key* is - missing (not in the container), "KeyError" should be raised. + and so forth. Missing slice items are always filled in with + "None". Note: - "for" loops expect that an "IndexError" will be raised for - illegal indexes to allow proper detection of the end of the - sequence. + The sequence iteration protocol (used, for example, in "for" + loops), expects that an "IndexError" will be raised for illegal + indexes to allow proper detection of the end of a sequence. Note: @@ -9656,14 +9728,27 @@ class is used in a class pattern with positional arguments, each "inspect.BufferFlags" provides a convenient way to interpret the flags. The method must return a "memoryview" object. + **Thread safety:** In *free-threaded* Python, implementations must + manage any internal export counter using atomic operations. The + method must be safe to call concurrently from multiple threads, and + the returned buffer’s underlying data must remain valid until the + corresponding "__release_buffer__()" call completes. See Thread + safety for memoryview objects for details. + object.__release_buffer__(self, buffer) Called when a buffer is no longer needed. The *buffer* argument is a "memoryview" object that was previously returned by "__buffer__()". The method must release any resources associated - with the buffer. This method should return "None". Buffer objects - that do not need to perform any cleanup are not required to - implement this method. + with the buffer. This method should return "None". + + **Thread safety:** In *free-threaded* Python, any export counter + decrement must use atomic operations. Resource cleanup must be + thread-safe, as the final release may race with concurrent releases + from other threads. + + Buffer objects that do not need to perform any cleanup are not + required to implement this method. Added in version 3.12. @@ -9804,7 +9889,7 @@ class is used in a class pattern with positional arguments, each Strings also support two styles of string formatting, one providing a large degree of flexibility and customization (see "str.format()", -Format String Syntax and Custom String Formatting) and the other based +Format string syntax and Custom string formatting) and the other based on C "printf" style formatting that handles a narrower range of types and is slightly harder to use correctly, but is often faster for the cases it can handle (printf-style String Formatting). @@ -9990,7 +10075,7 @@ class is used in a class pattern with positional arguments, each >>> "{1} expects the {0} Inquisition!".format("Spanish", "Nobody") 'Nobody expects the Spanish Inquisition!' - See Format String Syntax for a description of the various + See Format string syntax for a description of the various formatting options that can be specified in format strings. Note: @@ -10045,6 +10130,16 @@ class is used in a class pattern with positional arguments, each there is at least one character, "False" otherwise. A character "c" is alphanumeric if one of the following returns "True": "c.isalpha()", "c.isdecimal()", "c.isdigit()", or "c.isnumeric()". + For example: + + >>> 'abc123'.isalnum() + True + >>> 'abc123!@#'.isalnum() + False + >>> ''.isalnum() + False + >>> ' '.isalnum() + False str.isalpha() @@ -10173,16 +10268,31 @@ class is used in a class pattern with positional arguments, each >>> '\t'.isprintable(), '\n'.isprintable() (False, False) + See also "isspace()". + str.isspace() Return "True" if there are only whitespace characters in the string and there is at least one character, "False" otherwise. + For example: + + >>> ''.isspace() + False + >>> ' '.isspace() + True + >>> '\t\n'.isspace() # TAB and BREAK LINE + True + >>> '\u3000'.isspace() # IDEOGRAPHIC SPACE + True + A character is *whitespace* if in the Unicode character database (see "unicodedata"), either its general category is "Zs" (“Separator, space”), or its bidirectional class is one of "WS", "B", or "S". + See also "isprintable()". + str.istitle() Return "True" if the string is a titlecased string and there is at @@ -10306,6 +10416,17 @@ class is used in a class pattern with positional arguments, each found, return a 3-tuple containing the string itself, followed by two empty strings. + For example: + + >>> 'Monty Python'.partition(' ') + ('Monty', ' ', 'Python') + >>> "Monty Python's Flying Circus".partition(' ') + ('Monty', ' ', "Python's Flying Circus") + >>> 'Monty Python'.partition('-') + ('Monty Python', '', '') + + See also "rpartition()". + str.removeprefix(prefix, /) If the string starts with the *prefix* string, return @@ -10388,6 +10509,17 @@ class is used in a class pattern with positional arguments, each space). The original string is returned if *width* is less than or equal to "len(s)". + For example: + + >>> 'Python'.rjust(10) + ' Python' + >>> 'Python'.rjust(10, '.') + '....Python' + >>> 'Monty Python'.rjust(10, '.') + 'Monty Python' + + See also "ljust()" and "zfill()". + str.rpartition(sep, /) Split the string at the last occurrence of *sep*, and return a @@ -10422,21 +10554,23 @@ class is used in a class pattern with positional arguments, each *chars* argument is a string specifying the set of characters to be removed. If omitted or "None", the *chars* argument defaults to removing whitespace. The *chars* argument is not a suffix; rather, - all combinations of its values are stripped: + all combinations of its values are stripped. For example: >>> ' spacious '.rstrip() ' spacious' >>> 'mississippi'.rstrip('ipz') 'mississ' - See "str.removesuffix()" for a method that will remove a single - suffix string rather than all of a set of characters. For example: + See "removesuffix()" for a method that will remove a single suffix + string rather than all of a set of characters. For example: >>> 'Monty Python'.rstrip(' Python') 'M' >>> 'Monty Python'.removesuffix(' Python') 'Monty' + See also "strip()". + str.split(sep=None, maxsplit=-1) Return a list of the words in the string, using *sep* as the @@ -10561,6 +10695,17 @@ class is used in a class pattern with positional arguments, each With optional *start*, test string beginning at that position. With optional *end*, stop comparing string at that position. + For example: + + >>> 'Python'.startswith('Py') + True + >>> 'a tuple of prefixes'.startswith(('at', 'a')) + True + >>> 'Python is amazing'.startswith('is', 7) + True + + See also "endswith()" and "removeprefix()". + str.strip(chars=None, /) Return a copy of the string with the leading and trailing @@ -10568,7 +10713,9 @@ class is used in a class pattern with positional arguments, each set of characters to be removed. If omitted or "None", the *chars* argument defaults to removing whitespace. The *chars* argument is not a prefix or suffix; rather, all combinations of its values are - stripped: + stripped. + + For example: >>> ' spacious '.strip() 'spacious' @@ -10579,12 +10726,16 @@ class is used in a class pattern with positional arguments, each stripped from the string. Characters are removed from the leading end until reaching a string character that is not contained in the set of characters in *chars*. A similar action takes place on the - trailing end. For example: + trailing end. + + For example: >>> comment_string = '#....... Section 3.2.1 Issue #32 .......' >>> comment_string.strip('.#! ') 'Section 3.2.1 Issue #32' + See also "rstrip()". + str.swapcase() Return a copy of the string with uppercase characters converted to @@ -10669,6 +10820,8 @@ class is used in a class pattern with positional arguments, each '00042' >>> "-42".zfill(5) '-0042' + + See also "rjust()". ''', 'strings': '''String and Bytes literals ************************* @@ -11235,62 +11388,168 @@ class is used in a class pattern with positional arguments, each ''', - 'subscriptions': r'''Subscriptions -************* + 'subscriptions': r'''Subscriptions and slicings +************************** + +The *subscription* syntax is usually used for selecting an element +from a container – for example, to get a value from a "dict": -The subscription of an instance of a container class will generally -select an element from the container. The subscription of a *generic -class* will generally return a GenericAlias object. + >>> digits_by_name = {'one': 1, 'two': 2} + >>> digits_by_name['two'] # Subscripting a dictionary using the key 'two' + 2 - subscription: primary "[" flexible_expression_list "]" +In the subscription syntax, the object being subscribed – a primary – +is followed by a *subscript* in square brackets. In the simplest case, +the subscript is a single expression. -When an object is subscripted, the interpreter will evaluate the -primary and the expression list. +Depending on the type of the object being subscribed, the subscript is +sometimes called a *key* (for mappings), *index* (for sequences), or +*type argument* (for *generic types*). Syntactically, these are all +equivalent: -The primary must evaluate to an object that supports subscription. An -object may support subscription through defining one or both of -"__getitem__()" and "__class_getitem__()". When the primary is -subscripted, the evaluated result of the expression list will be -passed to one of these methods. For more details on when -"__class_getitem__" is called instead of "__getitem__", see + >>> colors = ['red', 'blue', 'green', 'black'] + >>> colors[3] # Subscripting a list using the index 3 + 'black' + + >>> list[str] # Parameterizing the list type using the type argument str + list[str] + +At runtime, the interpreter will evaluate the primary and the +subscript, and call the primary’s "__getitem__()" or +"__class_getitem__()" *special method* with the subscript as argument. +For more details on which of these methods is called, see __class_getitem__ versus __getitem__. -If the expression list contains at least one comma, or if any of the -expressions are starred, the expression list will evaluate to a -"tuple" containing the items of the expression list. Otherwise, the -expression list will evaluate to the value of the list’s sole member. - -Changed in version 3.11: Expressions in an expression list may be -starred. See **PEP 646**. - -For built-in objects, there are two types of objects that support -subscription via "__getitem__()": - -1. Mappings. If the primary is a *mapping*, the expression list must - evaluate to an object whose value is one of the keys of the - mapping, and the subscription selects the value in the mapping that - corresponds to that key. An example of a builtin mapping class is - the "dict" class. - -2. Sequences. If the primary is a *sequence*, the expression list must - evaluate to an "int" or a "slice" (as discussed in the following - section). Examples of builtin sequence classes include the "str", - "list" and "tuple" classes. - -The formal syntax makes no special provision for negative indices in -*sequences*. However, built-in sequences all provide a "__getitem__()" -method that interprets negative indices by adding the length of the -sequence to the index so that, for example, "x[-1]" selects the last -item of "x". The resulting value must be a nonnegative integer less -than the number of items in the sequence, and the subscription selects -the item whose index is that value (counting from zero). Since the -support for negative indices and slicing occurs in the object’s -"__getitem__()" method, subclasses overriding this method will need to -explicitly add that support. - -A "string" is a special kind of sequence whose items are *characters*. -A character is not a separate data type but a string of exactly one -character. +To show how subscription works, we can define a custom object that +implements "__getitem__()" and prints out the value of the subscript: + + >>> class SubscriptionDemo: + ... def __getitem__(self, key): + ... print(f'subscripted with: {key!r}') + ... + >>> demo = SubscriptionDemo() + >>> demo[1] + subscripted with: 1 + >>> demo['a' * 3] + subscripted with: 'aaa' + +See "__getitem__()" documentation for how built-in types handle +subscription. + +Subscriptions may also be used as targets in assignment or deletion +statements. In these cases, the interpreter will call the subscripted +object’s "__setitem__()" or "__delitem__()" *special method*, +respectively, instead of "__getitem__()". + + >>> colors = ['red', 'blue', 'green', 'black'] + >>> colors[3] = 'white' # Setting item at index + >>> colors + ['red', 'blue', 'green', 'white'] + >>> del colors[3] # Deleting item at index 3 + >>> colors + ['red', 'blue', 'green'] + +All advanced forms of *subscript* documented in the following sections +are also usable for assignment and deletion. + + +Slicings +======== + +A more advanced form of subscription, *slicing*, is commonly used to +extract a portion of a sequence. In this form, the subscript is a +*slice*: up to three expressions separated by colons. Any of the +expressions may be omitted, but a slice must contain at least one +colon: + + >>> number_names = ['zero', 'one', 'two', 'three', 'four', 'five'] + >>> number_names[1:3] + ['one', 'two'] + >>> number_names[1:] + ['one', 'two', 'three', 'four', 'five'] + >>> number_names[:3] + ['zero', 'one', 'two'] + >>> number_names[:] + ['zero', 'one', 'two', 'three', 'four', 'five'] + >>> number_names[::2] + ['zero', 'two', 'four'] + >>> number_names[:-3] + ['zero', 'one', 'two'] + >>> del number_names[4:] + >>> number_names + ['zero', 'one', 'two', 'three'] + +When a slice is evaluated, the interpreter constructs a "slice" object +whose "start", "stop" and "step" attributes, respectively, are the +results of the expressions between the colons. Any missing expression +evaluates to "None". This "slice" object is then passed to the +"__getitem__()" or "__class_getitem__()" *special method*, as above. + + # continuing with the SubscriptionDemo instance defined above: + >>> demo[2:3] + subscripted with: slice(2, 3, None) + >>> demo[::'spam'] + subscripted with: slice(None, None, 'spam') + + +Comma-separated subscripts +========================== + +The subscript can also be given as two or more comma-separated +expressions or slices: + + # continuing with the SubscriptionDemo instance defined above: + >>> demo[1, 2, 3] + subscripted with: (1, 2, 3) + >>> demo[1:2, 3] + subscripted with: (slice(1, 2, None), 3) + +This form is commonly used with numerical libraries for slicing multi- +dimensional data. In this case, the interpreter constructs a "tuple" +of the results of the expressions or slices, and passes this tuple to +the "__getitem__()" or "__class_getitem__()" *special method*, as +above. + +The subscript may also be given as a single expression or slice +followed by a comma, to specify a one-element tuple: + + >>> demo['spam',] + subscripted with: ('spam',) + + +“Starred” subscriptions +======================= + +Added in version 3.11: Expressions in *tuple_slices* may be starred. +See **PEP 646**. + +The subscript can also contain a starred expression. In this case, the +interpreter unpacks the result into a tuple, and passes this tuple to +"__getitem__()" or "__class_getitem__()": + + # continuing with the SubscriptionDemo instance defined above: + >>> demo[*range(10)] + subscripted with: (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) + +Starred expressions may be combined with comma-separated expressions +and slices: + + >>> demo['a', 'b', *range(3), 'c'] + subscripted with: ('a', 'b', 0, 1, 2, 'c') + + +Formal subscription grammar +=========================== + + subscription: primary '[' subscript ']' + subscript: single_subscript | tuple_subscript + single_subscript: proper_slice | assignment_expression + proper_slice: [expression] ":" [expression] [ ":" [expression] ] + tuple_subscript: ','.(single_subscript | starred_expression)+ [','] + +Recall that the "|" operator denotes ordered choice. Specifically, in +"subscript", if both alternatives would match, the first +("single_subscript") has priority. ''', 'truth': r'''Truth Value Testing ******************* @@ -11696,10 +11955,19 @@ def foo(): "a[-2]" equals "a[n-2]", the second to last item of sequence a with length "n". -Sequences also support slicing: "a[i:j]" selects all items with index -*k* such that *i* "<=" *k* "<" *j*. When used as an expression, a -slice is a sequence of the same type. The comment above about negative -indexes also applies to negative slice positions. +The resulting value must be a nonnegative integer less than the number +of items in the sequence. If it is not, an "IndexError" is raised. + +Sequences also support slicing: "a[start:stop]" selects all items with +index *k* such that *start* "<=" *k* "<" *stop*. When used as an +expression, a slice is a sequence of the same type. The comment above +about negative subscripts also applies to negative slice positions. +Note that no error is raised if a slice position is less than zero or +larger than the length of the sequence. + +If *start* is missing or "None", slicing behaves as if *start* was +zero. If *stop* is missing or "None", slicing behaves as if *stop* was +equal to the length of the sequence. Some sequences also support “extended slicing” with a third “step” parameter: "a[i:j:k]" selects all items of *a* with index *x* where "x @@ -11720,27 +11988,33 @@ def foo(): The following types are immutable sequences: Strings - A string is a sequence of values that represent Unicode code - points. All the code points in the range "U+0000 - U+10FFFF" can be - represented in a string. Python doesn’t have a char type; instead, - every code point in the string is represented as a string object - with length "1". The built-in function "ord()" converts a code - point from its string form to an integer in the range "0 - 10FFFF"; - "chr()" converts an integer in the range "0 - 10FFFF" to the - corresponding length "1" string object. "str.encode()" can be used - to convert a "str" to "bytes" using the given text encoding, and + A string ("str") is a sequence of values that represent + *characters*, or more formally, *Unicode code points*. All the code + points in the range "0" to "0x10FFFF" can be represented in a + string. + + Python doesn’t have a dedicated *character* type. Instead, every + code point in the string is represented as a string object with + length "1". + + The built-in function "ord()" converts a code point from its string + form to an integer in the range "0" to "0x10FFFF"; "chr()" converts + an integer in the range "0" to "0x10FFFF" to the corresponding + length "1" string object. "str.encode()" can be used to convert a + "str" to "bytes" using the given text encoding, and "bytes.decode()" can be used to achieve the opposite. Tuples - The items of a tuple are arbitrary Python objects. Tuples of two or - more items are formed by comma-separated lists of expressions. A - tuple of one item (a ‘singleton’) can be formed by affixing a comma - to an expression (an expression by itself does not create a tuple, - since parentheses must be usable for grouping of expressions). An - empty tuple can be formed by an empty pair of parentheses. + The items of a "tuple" are arbitrary Python objects. Tuples of two + or more items are formed by comma-separated lists of expressions. + A tuple of one item (a ‘singleton’) can be formed by affixing a + comma to an expression (an expression by itself does not create a + tuple, since parentheses must be usable for grouping of + expressions). An empty tuple can be formed by an empty pair of + parentheses. Bytes - A bytes object is an immutable array. The items are 8-bit bytes, + A "bytes" object is an immutable array. The items are 8-bit bytes, represented by integers in the range 0 <= x < 256. Bytes literals (like "b'abc'") and the built-in "bytes()" constructor can be used to create bytes objects. Also, bytes objects can be decoded to @@ -12510,11 +12784,28 @@ class instance has a namespace implemented as a dictionary which is socket objects (and perhaps by other functions or methods provided by extension modules). +File objects implement common methods, listed below, to simplify usage +in generic code. They are expected to be With Statement Context +Managers. + The objects "sys.stdin", "sys.stdout" and "sys.stderr" are initialized to file objects corresponding to the interpreter’s standard input, output and error streams; they are all open in text mode and therefore follow the interface defined by the "io.TextIOBase" abstract class. +file.read(size=-1, /) + + Retrieve up to *size* data from the file. As a convenience if + *size* is unspecified or -1 retrieve all data available. + +file.write(data, /) + + Store *data* to the file. + +file.close() + + Flush any buffers and close the underlying file. + Internal types ============== @@ -13202,6 +13493,11 @@ class dict(iterable, /, **kwargs) "types.MappingProxyType" can be used to create a read-only view of a "dict". +See also: + + For detailed information on thread-safety guarantees for "dict" + objects, see Thread safety for dict objects. + Dictionary view objects ======================= @@ -13535,7 +13831,7 @@ class dict(iterable, /, **kwargs) Return the total number of occurrences of *value* in *sequence*. -sequence.index(value[, start[, stop]) +sequence.index(value[, start[, stop]]) Return the index of the first occurrence of *value* in *sequence*. @@ -13625,7 +13921,7 @@ class dict(iterable, /, **kwargs) sequence.append(value, /) - Append *value* to the end of the sequence This is equivalent to + Append *value* to the end of the sequence. This is equivalent to writing "seq[len(seq):len(seq)] = [value]". sequence.clear() @@ -13754,75 +14050,10 @@ class list(iterable=(), /) empty for the duration, and raises "ValueError" if it can detect that the list has been mutated during a sort. -Thread safety: Reading a single element from a "list" is *atomic*: - - lst[i] # list.__getitem__ - -The following methods traverse the list and use *atomic* reads of each -item to perform their function. That means that they may return -results affected by concurrent modifications: - - item in lst - lst.index(item) - lst.count(item) - -All of the above methods/operations are also lock-free. They do not -block concurrent modifications. Other operations that hold a lock will -not block these from observing intermediate states.All other -operations from here on block using the per-object lock.Writing a -single item via "lst[i] = x" is safe to call from multiple threads and -will not corrupt the list.The following operations return new objects -and appear *atomic* to other threads: - - lst1 + lst2 # concatenates two lists into a new list - x * lst # repeats lst x times into a new list - lst.copy() # returns a shallow copy of the list - -Methods that only operate on a single elements with no shifting -required are *atomic*: - - lst.append(x) # append to the end of the list, no shifting required - lst.pop() # pop element from the end of the list, no shifting required - -The "clear()" method is also *atomic*. Other threads cannot observe -elements being removed.The "sort()" method is not *atomic*. Other -threads cannot observe intermediate states during sorting, but the -list appears empty for the duration of the sort.The following -operations may allow lock-free operations to observe intermediate -states since they modify multiple elements in place: - - lst.insert(idx, item) # shifts elements - lst.pop(idx) # idx not at the end of the list, shifts elements - lst *= x # copies elements in place - -The "remove()" method may allow concurrent modifications since element -comparison may execute arbitrary Python code (via -"__eq__()")."extend()" is safe to call from multiple threads. -However, its guarantees depend on the iterable passed to it. If it is -a "list", a "tuple", a "set", a "frozenset", a "dict" or a dictionary -view object (but not their subclasses), the "extend" operation is safe -from concurrent modifications to the iterable. Otherwise, an iterator -is created which can be concurrently modified by another thread. The -same applies to inplace concatenation of a list with other iterables -when using "lst += iterable".Similarly, assigning to a list slice with -"lst[i:j] = iterable" is safe to call from multiple threads, but -"iterable" is only locked when it is also a "list" (but not its -subclasses).Operations that involve multiple accesses, as well as -iteration, are never atomic. For example: - - # NOT atomic: read-modify-write - lst[i] = lst[i] + 1 - - # NOT atomic: check-then-act - if lst: - item = lst.pop() - - # NOT thread-safe: iteration while modifying - for item in lst: - process(item) # another thread may modify lst - -Consider external synchronization when sharing "list" instances across -threads. See Python support for free threading for more information. +See also: + + For detailed information on thread-safety guarantees for "list" + objects, see Thread safety for list objects. Tuples @@ -14043,7 +14274,7 @@ class range(start, stop, step=1, /) sequence.append(value, /) - Append *value* to the end of the sequence This is equivalent to + Append *value* to the end of the sequence. This is equivalent to writing "seq[len(seq):len(seq)] = [value]". sequence.clear() @@ -14194,9 +14425,9 @@ class range(start, stop, step=1, /) is semantically equivalent to: manager = (EXPRESSION) - enter = type(manager).__enter__ - exit = type(manager).__exit__ - value = enter(manager) + enter = manager.__enter__ + exit = manager.__exit__ + value = enter() hit_except = False try: @@ -14204,11 +14435,14 @@ class range(start, stop, step=1, /) SUITE except: hit_except = True - if not exit(manager, *sys.exc_info()): + if not exit(*sys.exc_info()): raise finally: if not hit_except: - exit(manager, None, None, None) + exit(None, None, None) + +except that implicit special method lookup is used for "__enter__()" +and "__exit__()". With more than one item, the context managers are processed as if multiple "with" statements were nested: