diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 73acb1fe8d4..10e1c936688 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -9,12 +9,15 @@ The function translate(PATTERN) returns a regular expression corresponding to PATTERN. (It does not compile it.) """ + +import functools +import itertools import os import posixpath import re -import functools -__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] +__all__ = ["filter", "filterfalse", "fnmatch", "fnmatchcase", "translate"] + def fnmatch(name, pat): """Test whether FILENAME matches PATTERN. @@ -35,6 +38,7 @@ def fnmatch(name, pat): pat = os.path.normcase(pat) return fnmatchcase(name, pat) + @functools.lru_cache(maxsize=32768, typed=True) def _compile_pattern(pat): if isinstance(pat, bytes): @@ -45,6 +49,7 @@ def _compile_pattern(pat): res = translate(pat) return re.compile(res).match + def filter(names, pat): """Construct a list from those elements of the iterable NAMES that match PAT.""" result = [] @@ -61,6 +66,22 @@ def filter(names, pat): result.append(name) return result + +def filterfalse(names, pat): + """Construct a list from those elements of the iterable NAMES that do not match PAT.""" + pat = os.path.normcase(pat) + match = _compile_pattern(pat) + if os.path is posixpath: + # normcase on posix is NOP. Optimize it away from the loop. + return list(itertools.filterfalse(match, names)) + + result = [] + for name in names: + if match(os.path.normcase(name)) is None: + result.append(name) + return result + + def fnmatchcase(name, pat): """Test whether FILENAME matches PATTERN, including case. @@ -77,24 +98,32 @@ def translate(pat): There is no way to quote meta-characters. """ - STAR = object() - parts = _translate(pat, STAR, '.') - return _join_translated_parts(parts, STAR) + parts, star_indices = _translate(pat, '*', '.') + return _join_translated_parts(parts, star_indices) + +_re_setops_sub = re.compile(r'([&~|])').sub +_re_escape = functools.lru_cache(maxsize=512)(re.escape) -def _translate(pat, STAR, QUESTION_MARK): + +def _translate(pat, star, question_mark): res = [] add = res.append + star_indices = [] + i, n = 0, len(pat) while i < n: c = pat[i] i = i+1 if c == '*': + # store the position of the wildcard + star_indices.append(len(res)) + add(star) # compress consecutive `*` into one - if (not res) or res[-1] is not STAR: - add(STAR) + while i < n and pat[i] == '*': + i += 1 elif c == '?': - add(QUESTION_MARK) + add(question_mark) elif c == '[': j = i if j < n and pat[j] == '!': @@ -133,8 +162,6 @@ def _translate(pat, STAR, QUESTION_MARK): # Hyphens that create ranges shouldn't be escaped. stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') for s in chunks) - # Escape set operations (&&, ~~ and ||). - stuff = re.sub(r'([&~|])', r'\\\1', stuff) i = j+1 if not stuff: # Empty range: never match. @@ -143,50 +170,40 @@ def _translate(pat, STAR, QUESTION_MARK): # Negated empty range: match any character. add('.') else: + # Escape set operations (&&, ~~ and ||). + stuff = _re_setops_sub(r'\\\1', stuff) if stuff[0] == '!': stuff = '^' + stuff[1:] elif stuff[0] in ('^', '['): stuff = '\\' + stuff add(f'[{stuff}]') else: - add(re.escape(c)) - assert i == n - return res - - -def _join_translated_parts(inp, STAR): - # Deal with STARs. - res = [] - add = res.append - i, n = 0, len(inp) - # Fixed pieces at the start? - while i < n and inp[i] is not STAR: - add(inp[i]) - i += 1 - # Now deal with STAR fixed STAR fixed ... - # For an interior `STAR fixed` pairing, we want to do a minimal - # .*? match followed by `fixed`, with no possibility of backtracking. - # Atomic groups ("(?>...)") allow us to spell that directly. - # Note: people rely on the undocumented ability to join multiple - # translate() results together via "|" to build large regexps matching - # "one of many" shell patterns. - while i < n: - assert inp[i] is STAR - i += 1 - if i == n: - add(".*") - break - assert inp[i] is not STAR - fixed = [] - while i < n and inp[i] is not STAR: - fixed.append(inp[i]) - i += 1 - fixed = "".join(fixed) - if i == n: - add(".*") - add(fixed) - else: - add(f"(?>.*?{fixed})") + add(_re_escape(c)) assert i == n - res = "".join(res) - return fr'(?s:{res})\Z' + return res, star_indices + + +def _join_translated_parts(parts, star_indices): + if not star_indices: + return fr'(?s:{"".join(parts)})\z' + iter_star_indices = iter(star_indices) + j = next(iter_star_indices) + buffer = parts[:j] # fixed pieces at the start + append, extend = buffer.append, buffer.extend + i = j + 1 + for j in iter_star_indices: + # Now deal with STAR fixed STAR fixed ... + # For an interior `STAR fixed` pairing, we want to do a minimal + # .*? match followed by `fixed`, with no possibility of backtracking. + # Atomic groups ("(?>...)") allow us to spell that directly. + # Note: people rely on the undocumented ability to join multiple + # translate() results together via "|" to build large regexps matching + # "one of many" shell patterns. + append('(?>.*?') + extend(parts[i:j]) + append(')') + i = j + 1 + append('.*') + extend(parts[i:]) + res = ''.join(buffer) + return fr'(?s:{res})\z' diff --git a/Lib/glob.py b/Lib/glob.py index c506e0e2157..f1a87c82fc5 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -22,6 +22,9 @@ def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, dot are special cases that are not matched by '*' and '?' patterns by default. + The order of the returned list is undefined. Sort it if you need a + particular order. + If `include_hidden` is true, the patterns '*', '?', '**' will match hidden directories. @@ -40,6 +43,9 @@ def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, dot are special cases that are not matched by '*' and '?' patterns. + The order of the returned paths is undefined. Sort them if you need a + particular order. + If recursive is true, the pattern '**' will match any files and zero or more directories and subdirectories. """ @@ -312,24 +318,24 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): if part: if not include_hidden and part[0] in '*?': results.append(r'(?!\.)') - results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)) + results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)[0]) if idx < last_part_idx: results.append(any_sep) res = ''.join(results) - return fr'(?s:{res})\Z' + return fr'(?s:{res})\z' @functools.lru_cache(maxsize=512) -def _compile_pattern(pat, sep, case_sensitive, recursive=True): +def _compile_pattern(pat, seps, case_sensitive, recursive=True): """Compile given glob pattern to a re.Pattern object (observing case sensitivity).""" flags = re.NOFLAG if case_sensitive else re.IGNORECASE - regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep) + regex = translate(pat, recursive=recursive, include_hidden=True, seps=seps) return re.compile(regex, flags=flags).match -class _Globber: - """Class providing shell-style pattern matching and globbing. +class _GlobberBase: + """Abstract class providing shell-style pattern matching and globbing. """ def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): @@ -338,34 +344,31 @@ def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): self.case_pedantic = case_pedantic self.recursive = recursive - # Low-level methods - - lstat = operator.methodcaller('lstat') - add_slash = operator.methodcaller('joinpath', '') + # Abstract methods @staticmethod - def scandir(path): - """Emulates os.scandir(), which returns an object that can be used as - a context manager. This method is called by walk() and glob(). + def lexists(path): + """Implements os.path.lexists(). """ - return contextlib.nullcontext(path.iterdir()) + raise NotImplementedError @staticmethod - def concat_path(path, text): - """Appends text to the given path. + def scandir(path): + """Like os.scandir(), but generates (entry, name, path) tuples. """ - return path.with_segments(path._raw_path + text) + raise NotImplementedError @staticmethod - def parse_entry(entry): - """Returns the path of an entry yielded from scandir(). + def concat_path(path, text): + """Implements path concatenation. """ - return entry + raise NotImplementedError # High-level methods - def compile(self, pat): - return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive) + def compile(self, pat, altsep=None): + seps = (self.sep, altsep) if altsep else self.sep + return _compile_pattern(pat, seps, self.case_sensitive, self.recursive) def selector(self, parts): """Returns a function that selects from a given path, walking and @@ -387,10 +390,12 @@ def selector(self, parts): def special_selector(self, part, parts): """Returns a function that selects special children of the given path. """ + if parts: + part += self.sep select_next = self.selector(parts) def select_special(path, exists=False): - path = self.concat_path(self.add_slash(path), part) + path = self.concat_path(path, part) return select_next(path, exists) return select_special @@ -400,14 +405,16 @@ def literal_selector(self, part, parts): # Optimization: consume and join any subsequent literal parts here, # rather than leaving them for the next selector. This reduces the - # number of string concatenation operations and calls to add_slash(). + # number of string concatenation operations. while parts and magic_check.search(parts[-1]) is None: part += self.sep + parts.pop() + if parts: + part += self.sep select_next = self.selector(parts) def select_literal(path, exists=False): - path = self.concat_path(self.add_slash(path), part) + path = self.concat_path(path, part) return select_next(path, exists=False) return select_literal @@ -423,23 +430,19 @@ def wildcard_selector(self, part, parts): def select_wildcard(path, exists=False): try: - # We must close the scandir() object before proceeding to - # avoid exhausting file descriptors when globbing deep trees. - with self.scandir(path) as scandir_it: - entries = list(scandir_it) + entries = self.scandir(path) except OSError: pass else: - for entry in entries: - if match is None or match(entry.name): + for entry, entry_name, entry_path in entries: + if match is None or match(entry_name): if dir_only: try: if not entry.is_dir(): continue except OSError: continue - entry_path = self.parse_entry(entry) - if dir_only: + entry_path = self.concat_path(entry_path, self.sep) yield from select_next(entry_path, exists=True) else: yield entry_path @@ -469,7 +472,6 @@ def recursive_selector(self, part, parts): select_next = self.selector(parts) def select_recursive(path, exists=False): - path = self.add_slash(path) match_pos = len(str(path)) if match is None or match(str(path), match_pos): yield from select_next(path, exists) @@ -480,14 +482,11 @@ def select_recursive(path, exists=False): def select_recursive_step(stack, match_pos): path = stack.pop() try: - # We must close the scandir() object before proceeding to - # avoid exhausting file descriptors when globbing deep trees. - with self.scandir(path) as scandir_it: - entries = list(scandir_it) + entries = self.scandir(path) except OSError: pass else: - for entry in entries: + for entry, _entry_name, entry_path in entries: is_dir = False try: if entry.is_dir(follow_symlinks=follow_symlinks): @@ -496,8 +495,10 @@ def select_recursive_step(stack, match_pos): pass if is_dir or not dir_only: - entry_path = self.parse_entry(entry) - if match is None or match(str(entry_path), match_pos): + entry_path_str = str(entry_path) + if dir_only: + entry_path = self.concat_path(entry_path, self.sep) + if match is None or match(entry_path_str, match_pos): if dir_only: yield from select_next(entry_path, exists=True) else: @@ -516,30 +517,37 @@ def select_exists(self, path, exists=False): # Optimization: this path is already known to exist, e.g. because # it was returned from os.scandir(), so we skip calling lstat(). yield path - else: - try: - self.lstat(path) - yield path - except OSError: - pass + elif self.lexists(path): + yield path -class _StringGlobber(_Globber): - lstat = staticmethod(os.lstat) - scandir = staticmethod(os.scandir) - parse_entry = operator.attrgetter('path') +class _StringGlobber(_GlobberBase): + """Provides shell-style pattern matching and globbing for string paths. + """ + lexists = staticmethod(os.path.lexists) concat_path = operator.add - if os.name == 'nt': - @staticmethod - def add_slash(pathname): - tail = os.path.splitroot(pathname)[2] - if not tail or tail[-1] in '\\/': - return pathname - return f'{pathname}\\' - else: - @staticmethod - def add_slash(pathname): - if not pathname or pathname[-1] == '/': - return pathname - return f'{pathname}/' + @staticmethod + def scandir(path): + # We must close the scandir() object before proceeding to + # avoid exhausting file descriptors when globbing deep trees. + with os.scandir(path) as scandir_it: + entries = list(scandir_it) + return ((entry, entry.name, entry.path) for entry in entries) + + +class _PathGlobber(_GlobberBase): + """Provides shell-style pattern matching and globbing for pathlib paths. + """ + + @staticmethod + def lexists(path): + return path.info.exists(follow_symlinks=False) + + @staticmethod + def scandir(path): + return ((child.info, child.name, child) for child in path.iterdir()) + + @staticmethod + def concat_path(path, text): + return path.with_segments(str(path) + text) diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py index 357d127c090..9005f1ef865 100644 --- a/Lib/logging/__init__.py +++ b/Lib/logging/__init__.py @@ -1475,8 +1475,6 @@ class Logger(Filterer): level, and "input.csv", "input.xls" and "input.gnu" for the sub-levels. There is no arbitrary limit to the depth of nesting. """ - _tls = threading.local() - def __init__(self, name, level=NOTSET): """ Initialize the logger with a name and an optional level. @@ -1673,19 +1671,14 @@ def handle(self, record): This method is used for unpickled records received from a socket, as well as those created locally. Logger-level filtering is applied. """ - if self._is_disabled(): + if self.disabled: return - - self._tls.in_progress = True - try: - maybe_record = self.filter(record) - if not maybe_record: - return - if isinstance(maybe_record, LogRecord): - record = maybe_record - self.callHandlers(record) - finally: - self._tls.in_progress = False + maybe_record = self.filter(record) + if not maybe_record: + return + if isinstance(maybe_record, LogRecord): + record = maybe_record + self.callHandlers(record) def addHandler(self, hdlr): """ @@ -1773,7 +1766,7 @@ def isEnabledFor(self, level): """ Is this logger enabled for level 'level'? """ - if self._is_disabled(): + if self.disabled: return False try: @@ -1823,11 +1816,6 @@ def _hierlevel(logger): if isinstance(item, Logger) and item.parent is self and _hierlevel(item) == 1 + _hierlevel(item.parent)) - def _is_disabled(self): - # We need to use getattr as it will only be set the first time a log - # message is recorded on any given thread - return self.disabled or getattr(self._tls, 'in_progress', False) - def __repr__(self): level = getLevelName(self.getEffectiveLevel()) return '<%s %s (%s)>' % (self.__class__.__name__, self.name, level) @@ -1864,9 +1852,9 @@ class LoggerAdapter(object): def __init__(self, logger, extra=None, merge_extra=False): """ - Initialize the adapter with a logger and a dict-like object which - provides contextual information. This constructor signature allows - easy stacking of LoggerAdapters, if so desired. + Initialize the adapter with a logger and an optional dict-like object + which provides contextual information. This constructor signature + allows easy stacking of LoggerAdapters, if so desired. You can effectively pass keyword arguments as shown in the following example: @@ -1897,8 +1885,9 @@ def process(self, msg, kwargs): Normally, you'll only need to override this one method in a LoggerAdapter subclass for your specific needs. """ - if self.merge_extra and "extra" in kwargs: - kwargs["extra"] = {**self.extra, **kwargs["extra"]} + if self.merge_extra and kwargs.get("extra") is not None: + if self.extra is not None: + kwargs["extra"] = {**self.extra, **kwargs["extra"]} else: kwargs["extra"] = self.extra return msg, kwargs diff --git a/Lib/logging/config.py b/Lib/logging/config.py index 190b4f92259..3d9aa00fa52 100644 --- a/Lib/logging/config.py +++ b/Lib/logging/config.py @@ -865,6 +865,8 @@ def configure_handler(self, config): else: factory = klass kwargs = {k: config[k] for k in config if (k != '.' and valid_ident(k))} + # When deprecation ends for using the 'strm' parameter, remove the + # "except TypeError ..." try: result = factory(**kwargs) except TypeError as te: @@ -876,6 +878,15 @@ def configure_handler(self, config): #(e.g. by Django) kwargs['strm'] = kwargs.pop('stream') result = factory(**kwargs) + + import warnings + warnings.warn( + "Support for custom logging handlers with the 'strm' argument " + "is deprecated and scheduled for removal in Python 3.16. " + "Define handlers with the 'stream' argument instead.", + DeprecationWarning, + stacklevel=2, + ) if formatter: result.setFormatter(formatter) if level is not None: @@ -1006,7 +1017,8 @@ class ConfigSocketReceiver(ThreadingTCPServer): A simple TCP socket-based logging config receiver. """ - allow_reuse_address = 1 + allow_reuse_address = True + allow_reuse_port = False def __init__(self, host='localhost', port=DEFAULT_LOGGING_CONFIG_PORT, handler=None, ready=None, verify=None): diff --git a/Lib/logging/handlers.py b/Lib/logging/handlers.py index d3ea06c731e..2748b5941ea 100644 --- a/Lib/logging/handlers.py +++ b/Lib/logging/handlers.py @@ -855,7 +855,7 @@ class SysLogHandler(logging.Handler): } def __init__(self, address=('localhost', SYSLOG_UDP_PORT), - facility=LOG_USER, socktype=None): + facility=LOG_USER, socktype=None, timeout=None): """ Initialize a handler. @@ -872,6 +872,7 @@ def __init__(self, address=('localhost', SYSLOG_UDP_PORT), self.address = address self.facility = facility self.socktype = socktype + self.timeout = timeout self.socket = None self.createSocket() @@ -933,6 +934,8 @@ def createSocket(self): err = sock = None try: sock = socket.socket(af, socktype, proto) + if self.timeout: + sock.settimeout(self.timeout) if socktype == socket.SOCK_STREAM: sock.connect(sa) break @@ -1529,6 +1532,19 @@ def __init__(self, queue, *handlers, respect_handler_level=False): self._thread = None self.respect_handler_level = respect_handler_level + def __enter__(self): + """ + For use as a context manager. Starts the listener. + """ + self.start() + return self + + def __exit__(self, *args): + """ + For use as a context manager. Stops the listener. + """ + self.stop() + def dequeue(self, block): """ Dequeue a record and return it, optionally blocking. diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 954bb0a7453..7d0f4c1fd40 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -23,10 +23,11 @@ read_mime_types(file) -- parse one file, return a dictionary or None """ -import os -import sys -import posixpath -import urllib.parse +try: + from _winapi import _mimetypes_read_windows_registry +except ImportError: + _mimetypes_read_windows_registry = None + try: import winreg as _winreg except ImportError: @@ -34,7 +35,7 @@ __all__ = [ "knownfiles", "inited", "MimeTypes", - "guess_type", "guess_all_extensions", "guess_extension", + "guess_type", "guess_file_type", "guess_all_extensions", "guess_extension", "add_type", "init", "read_mime_types", "suffix_map", "encodings_map", "types_map", "common_types" ] @@ -88,7 +89,21 @@ def add_type(self, type, ext, strict=True): If strict is true, information will be added to list of standard types, else to the list of non-standard types. + + Valid extensions are empty or start with a '.'. """ + if ext and not ext.startswith('.'): + from warnings import _deprecated + + _deprecated( + "Undotted extensions", + "Using undotted extensions is deprecated and " + "will raise a ValueError in Python {remove}", + remove=(3, 16), + ) + + if not type: + return self.types_map[strict][ext] = type exts = self.types_map_inv[strict].setdefault(type, []) if ext not in exts: @@ -110,11 +125,21 @@ def guess_type(self, url, strict=True): mapped to '.tar.gz'. (This is table-driven too, using the dictionary suffix_map.) - Optional `strict' argument when False adds a bunch of commonly found, + Optional 'strict' argument when False adds a bunch of commonly found, but non-standard types. """ + # Lazy import to improve module import time + import os + import urllib.parse + + # TODO: Deprecate accepting file paths (in particular path-like objects). url = os.fspath(url) - scheme, url = urllib.parse._splittype(url) + p = urllib.parse.urlparse(url) + if p.scheme and len(p.scheme) > 1: + scheme = p.scheme + url = p.path + else: + return self.guess_file_type(url, strict=strict) if scheme == 'data': # syntax of data URLs: # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data @@ -134,26 +159,43 @@ def guess_type(self, url, strict=True): if '=' in type or '/' not in type: type = 'text/plain' return type, None # never compressed, so encoding is None - base, ext = posixpath.splitext(url) - while ext in self.suffix_map: - base, ext = posixpath.splitext(base + self.suffix_map[ext]) + + # Lazy import to improve module import time + import posixpath + + return self._guess_file_type(url, strict, posixpath.splitext) + + def guess_file_type(self, path, *, strict=True): + """Guess the type of a file based on its path. + + Similar to guess_type(), but takes file path instead of URL. + """ + # Lazy import to improve module import time + import os + + path = os.fsdecode(path) + path = os.path.splitdrive(path)[1] + return self._guess_file_type(path, strict, os.path.splitext) + + def _guess_file_type(self, path, strict, splitext): + base, ext = splitext(path) + while (ext_lower := ext.lower()) in self.suffix_map: + base, ext = splitext(base + self.suffix_map[ext_lower]) + # encodings_map is case sensitive if ext in self.encodings_map: encoding = self.encodings_map[ext] - base, ext = posixpath.splitext(base) + base, ext = splitext(base) else: encoding = None + ext = ext.lower() types_map = self.types_map[True] if ext in types_map: return types_map[ext], encoding - elif ext.lower() in types_map: - return types_map[ext.lower()], encoding elif strict: return None, encoding types_map = self.types_map[False] if ext in types_map: return types_map[ext], encoding - elif ext.lower() in types_map: - return types_map[ext.lower()], encoding else: return None, encoding @@ -163,13 +205,13 @@ def guess_all_extensions(self, type, strict=True): Return value is a list of strings giving the possible filename extensions, including the leading dot ('.'). The extension is not guaranteed to have been associated with any particular data stream, - but would be mapped to the MIME type `type' by guess_type(). + but would be mapped to the MIME type 'type' by guess_type(). - Optional `strict' argument when false adds a bunch of commonly found, + Optional 'strict' argument when false adds a bunch of commonly found, but non-standard types. """ type = type.lower() - extensions = self.types_map_inv[True].get(type, []) + extensions = list(self.types_map_inv[True].get(type, [])) if not strict: for ext in self.types_map_inv[False].get(type, []): if ext not in extensions: @@ -182,11 +224,11 @@ def guess_extension(self, type, strict=True): Return value is a string giving a filename extension, including the leading dot ('.'). The extension is not guaranteed to have been associated with any particular data - stream, but would be mapped to the MIME type `type' by - guess_type(). If no extension can be guessed for `type', None + stream, but would be mapped to the MIME type 'type' by + guess_type(). If no extension can be guessed for 'type', None is returned. - Optional `strict' argument when false adds a bunch of commonly found, + Optional 'strict' argument when false adds a bunch of commonly found, but non-standard types. """ extensions = self.guess_all_extensions(type, strict) @@ -213,10 +255,7 @@ def readfp(self, fp, strict=True): list of standard types, else to the list of non-standard types. """ - while 1: - line = fp.readline() - if not line: - break + while line := fp.readline(): words = line.split() for i in range(len(words)): if words[i][0] == '#': @@ -237,10 +276,21 @@ def read_windows_registry(self, strict=True): types. """ - # Windows only - if not _winreg: + if not _mimetypes_read_windows_registry and not _winreg: return + add_type = self.add_type + if strict: + add_type = lambda type, ext: self.add_type(type, ext, True) + + # Accelerated function if it is available + if _mimetypes_read_windows_registry: + _mimetypes_read_windows_registry(add_type) + elif _winreg: + self._read_windows_registry(add_type) + + @classmethod + def _read_windows_registry(cls, add_type): def enum_types(mimedb): i = 0 while True: @@ -265,7 +315,7 @@ def enum_types(mimedb): subkey, 'Content Type') if datatype != _winreg.REG_SZ: continue - self.add_type(mimetype, subkeyname, strict) + add_type(mimetype, subkeyname) except OSError: continue @@ -284,7 +334,7 @@ def guess_type(url, strict=True): to ".tar.gz". (This is table-driven too, using the dictionary suffix_map). - Optional `strict' argument when false adds a bunch of commonly found, but + Optional 'strict' argument when false adds a bunch of commonly found, but non-standard types. """ if _db is None: @@ -292,17 +342,27 @@ def guess_type(url, strict=True): return _db.guess_type(url, strict) +def guess_file_type(path, *, strict=True): + """Guess the type of a file based on its path. + + Similar to guess_type(), but takes file path instead of URL. + """ + if _db is None: + init() + return _db.guess_file_type(path, strict=strict) + + def guess_all_extensions(type, strict=True): """Guess the extensions for a file based on its MIME type. Return value is a list of strings giving the possible filename extensions, including the leading dot ('.'). The extension is not guaranteed to have been associated with any particular data - stream, but would be mapped to the MIME type `type' by - guess_type(). If no extension can be guessed for `type', None + stream, but would be mapped to the MIME type 'type' by + guess_type(). If no extension can be guessed for 'type', None is returned. - Optional `strict' argument when false adds a bunch of commonly found, + Optional 'strict' argument when false adds a bunch of commonly found, but non-standard types. """ if _db is None: @@ -315,10 +375,10 @@ def guess_extension(type, strict=True): Return value is a string giving a filename extension, including the leading dot ('.'). The extension is not guaranteed to have been associated with any particular data stream, but would be mapped to the - MIME type `type' by guess_type(). If no extension can be guessed for - `type', None is returned. + MIME type 'type' by guess_type(). If no extension can be guessed for + 'type', None is returned. - Optional `strict' argument when false adds a bunch of commonly found, + Optional 'strict' argument when false adds a bunch of commonly found, but non-standard types. """ if _db is None: @@ -349,8 +409,8 @@ def init(files=None): if files is None or _db is None: db = MimeTypes() - if _winreg: - db.read_windows_registry() + # Quick return if not supported + db.read_windows_registry() if files is None: files = knownfiles @@ -359,6 +419,9 @@ def init(files=None): else: db = _db + # Lazy import to improve module import time + import os + for file in files: if os.path.isfile(file): db.read(file) @@ -401,23 +464,28 @@ def _default_mime_types(): '.Z': 'compress', '.bz2': 'bzip2', '.xz': 'xz', + '.br': 'br', } # Before adding new types, make sure they are either registered with IANA, - # at http://www.iana.org/assignments/media-types + # at https://www.iana.org/assignments/media-types/media-types.xhtml # or extensions, i.e. using the x- prefix # If you add to these, please keep them sorted by mime type. # Make sure the entry with the preferred file extension for a particular mime type # appears before any others of the same mimetype. types_map = _types_map_default = { - '.js' : 'application/javascript', - '.mjs' : 'application/javascript', + '.js' : 'text/javascript', + '.mjs' : 'text/javascript', + '.epub' : 'application/epub+zip', + '.gz' : 'application/gzip', '.json' : 'application/json', '.webmanifest': 'application/manifest+json', '.doc' : 'application/msword', '.dot' : 'application/msword', '.wiz' : 'application/msword', + '.nq' : 'application/n-quads', + '.nt' : 'application/n-triples', '.bin' : 'application/octet-stream', '.a' : 'application/octet-stream', '.dll' : 'application/octet-stream', @@ -426,24 +494,37 @@ def _default_mime_types(): '.obj' : 'application/octet-stream', '.so' : 'application/octet-stream', '.oda' : 'application/oda', + '.ogx' : 'application/ogg', '.pdf' : 'application/pdf', '.p7c' : 'application/pkcs7-mime', '.ps' : 'application/postscript', '.ai' : 'application/postscript', '.eps' : 'application/postscript', + '.trig' : 'application/trig', '.m3u' : 'application/vnd.apple.mpegurl', '.m3u8' : 'application/vnd.apple.mpegurl', '.xls' : 'application/vnd.ms-excel', '.xlb' : 'application/vnd.ms-excel', + '.eot' : 'application/vnd.ms-fontobject', '.ppt' : 'application/vnd.ms-powerpoint', '.pot' : 'application/vnd.ms-powerpoint', '.ppa' : 'application/vnd.ms-powerpoint', '.pps' : 'application/vnd.ms-powerpoint', '.pwz' : 'application/vnd.ms-powerpoint', + '.odg' : 'application/vnd.oasis.opendocument.graphics', + '.odp' : 'application/vnd.oasis.opendocument.presentation', + '.ods' : 'application/vnd.oasis.opendocument.spreadsheet', + '.odt' : 'application/vnd.oasis.opendocument.text', + '.pptx' : 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + '.xlsx' : 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + '.docx' : 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + '.rar' : 'application/vnd.rar', '.wasm' : 'application/wasm', + '.7z' : 'application/x-7z-compressed', '.bcpio' : 'application/x-bcpio', '.cpio' : 'application/x-cpio', '.csh' : 'application/x-csh', + '.deb' : 'application/x-debian-package', '.dvi' : 'application/x-dvi', '.gtar' : 'application/x-gtar', '.hdf' : 'application/x-hdf', @@ -453,10 +534,12 @@ def _default_mime_types(): '.cdf' : 'application/x-netcdf', '.nc' : 'application/x-netcdf', '.p12' : 'application/x-pkcs12', + '.php' : 'application/x-httpd-php', '.pfx' : 'application/x-pkcs12', '.ram' : 'application/x-pn-realaudio', '.pyc' : 'application/x-python-code', '.pyo' : 'application/x-python-code', + '.rpm' : 'application/x-rpm', '.sh' : 'application/x-sh', '.shar' : 'application/x-shar', '.swf' : 'application/x-shockwave-flash', @@ -479,29 +562,61 @@ def _default_mime_types(): '.rdf' : 'application/xml', '.wsdl' : 'application/xml', '.xpdl' : 'application/xml', + '.yaml' : 'application/yaml', + '.yml' : 'application/yaml', '.zip' : 'application/zip', + '.3gp' : 'audio/3gpp', + '.3gpp' : 'audio/3gpp', + '.3g2' : 'audio/3gpp2', + '.3gpp2' : 'audio/3gpp2', + '.aac' : 'audio/aac', + '.adts' : 'audio/aac', + '.loas' : 'audio/aac', + '.ass' : 'audio/aac', '.au' : 'audio/basic', '.snd' : 'audio/basic', + '.flac' : 'audio/flac', + '.mka' : 'audio/matroska', + '.m4a' : 'audio/mp4', '.mp3' : 'audio/mpeg', '.mp2' : 'audio/mpeg', + '.ogg' : 'audio/ogg', + '.opus' : 'audio/opus', '.aif' : 'audio/x-aiff', '.aifc' : 'audio/x-aiff', '.aiff' : 'audio/x-aiff', '.ra' : 'audio/x-pn-realaudio', - '.wav' : 'audio/x-wav', + '.wav' : 'audio/vnd.wave', + '.otf' : 'font/otf', + '.ttf' : 'font/ttf', + '.weba' : 'audio/webm', + '.woff' : 'font/woff', + '.woff2' : 'font/woff2', + '.avif' : 'image/avif', '.bmp' : 'image/bmp', + '.emf' : 'image/emf', + '.fits' : 'image/fits', + '.g3' : 'image/g3fax', '.gif' : 'image/gif', '.ief' : 'image/ief', + '.jp2' : 'image/jp2', '.jpg' : 'image/jpeg', '.jpe' : 'image/jpeg', '.jpeg' : 'image/jpeg', + '.jpm' : 'image/jpm', + '.jpx' : 'image/jpx', + '.heic' : 'image/heic', + '.heif' : 'image/heif', '.png' : 'image/png', '.svg' : 'image/svg+xml', + '.t38' : 'image/t38', '.tiff' : 'image/tiff', '.tif' : 'image/tiff', + '.tfx' : 'image/tiff-fx', '.ico' : 'image/vnd.microsoft.icon', + '.webp' : 'image/webp', + '.wmf' : 'image/wmf', '.ras' : 'image/x-cmu-raster', - '.bmp' : 'image/x-ms-bmp', '.pnm' : 'image/x-portable-anymap', '.pbm' : 'image/x-portable-bitmap', '.pgm' : 'image/x-portable-graymap', @@ -514,34 +629,49 @@ def _default_mime_types(): '.mht' : 'message/rfc822', '.mhtml' : 'message/rfc822', '.nws' : 'message/rfc822', + '.gltf' : 'model/gltf+json', + '.glb' : 'model/gltf-binary', + '.stl' : 'model/stl', '.css' : 'text/css', '.csv' : 'text/csv', '.html' : 'text/html', '.htm' : 'text/html', + '.md' : 'text/markdown', + '.markdown': 'text/markdown', + '.n3' : 'text/n3', '.txt' : 'text/plain', '.bat' : 'text/plain', '.c' : 'text/plain', '.h' : 'text/plain', '.ksh' : 'text/plain', '.pl' : 'text/plain', + '.srt' : 'text/plain', '.rtx' : 'text/richtext', + '.rtf' : 'text/rtf', '.tsv' : 'text/tab-separated-values', + '.vtt' : 'text/vtt', '.py' : 'text/x-python', + '.rst' : 'text/x-rst', '.etx' : 'text/x-setext', '.sgm' : 'text/x-sgml', '.sgml' : 'text/x-sgml', '.vcf' : 'text/x-vcard', '.xml' : 'text/xml', + '.mkv' : 'video/matroska', + '.mk3d' : 'video/matroska-3d', '.mp4' : 'video/mp4', '.mpeg' : 'video/mpeg', '.m1v' : 'video/mpeg', '.mpa' : 'video/mpeg', '.mpe' : 'video/mpeg', '.mpg' : 'video/mpeg', + '.ogv' : 'video/ogg', '.mov' : 'video/quicktime', '.qt' : 'video/quicktime', '.webm' : 'video/webm', - '.avi' : 'video/x-msvideo', + '.avi' : 'video/vnd.avi', + '.m4v' : 'video/x-m4v', + '.wmv' : 'video/x-ms-wmv', '.movie' : 'video/x-sgi-movie', } @@ -551,6 +681,7 @@ def _default_mime_types(): # Please sort these too common_types = _common_types_default = { '.rtf' : 'application/rtf', + '.apk' : 'application/vnd.android.package-archive', '.midi': 'audio/midi', '.mid' : 'audio/midi', '.jpg' : 'image/jpg', @@ -564,51 +695,53 @@ def _default_mime_types(): _default_mime_types() -def _main(): - import getopt - - USAGE = """\ -Usage: mimetypes.py [options] type - -Options: - --help / -h -- print this message and exit - --lenient / -l -- additionally search of some common, but non-standard - types. - --extension / -e -- guess extension instead of type - -More than one type argument may be given. -""" - - def usage(code, msg=''): - print(USAGE) - if msg: print(msg) - sys.exit(code) - - try: - opts, args = getopt.getopt(sys.argv[1:], 'hle', - ['help', 'lenient', 'extension']) - except getopt.error as msg: - usage(1, msg) - - strict = 1 - extension = 0 - for opt, arg in opts: - if opt in ('-h', '--help'): - usage(0) - elif opt in ('-l', '--lenient'): - strict = 0 - elif opt in ('-e', '--extension'): - extension = 1 - for gtype in args: - if extension: - guess = guess_extension(gtype, strict) - if not guess: print("I don't know anything about type", gtype) - else: print(guess) - else: - guess, encoding = guess_type(gtype, strict) - if not guess: print("I don't know anything about type", gtype) - else: print('type:', guess, 'encoding:', encoding) +def _parse_args(args): + from argparse import ArgumentParser + + parser = ArgumentParser( + description='map filename extensions to MIME types', color=True + ) + parser.add_argument( + '-e', '--extension', + action='store_true', + help='guess extension instead of type' + ) + parser.add_argument( + '-l', '--lenient', + action='store_true', + help='additionally search for common but non-standard types' + ) + parser.add_argument('type', nargs='+', help='a type to search') + args = parser.parse_args(args) + return args, parser.format_help() + + +def _main(args=None): + """Run the mimetypes command-line interface and return a text to print.""" + args, help_text = _parse_args(args) + + results = [] + if args.extension: + for gtype in args.type: + guess = guess_extension(gtype, not args.lenient) + if guess: + results.append(str(guess)) + else: + results.append(f"error: unknown type {gtype}") + return results + else: + for gtype in args.type: + guess, encoding = guess_type(gtype, not args.lenient) + if guess: + results.append(f"type: {guess} encoding: {encoding}") + else: + results.append(f"error: media type unknown for {gtype}") + return results if __name__ == '__main__': - _main() + import sys + + results = _main() + print("\n".join(results)) + sys.exit(any(result.startswith("error: ") for result in results)) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 4b3edf535a6..0d763d1f0dc 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -5,8 +5,1303 @@ operating systems. """ -from ._abc import * -from ._local import * +import io +import ntpath +import operator +import os +import posixpath +import sys +from errno import * +from glob import _StringGlobber, _no_recurse_symlinks +from itertools import chain +from stat import S_ISDIR, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO +from _collections_abc import Sequence -__all__ = (_abc.__all__ + - _local.__all__) +try: + import pwd +except ImportError: + pwd = None +try: + import grp +except ImportError: + grp = None + +from pathlib._os import ( + PathInfo, DirEntryInfo, + ensure_different_files, ensure_distinct_paths, + copyfile2, copyfileobj, magic_open, copy_info, +) + + +__all__ = [ + "UnsupportedOperation", + "PurePath", "PurePosixPath", "PureWindowsPath", + "Path", "PosixPath", "WindowsPath", + ] + + +class UnsupportedOperation(NotImplementedError): + """An exception that is raised when an unsupported operation is attempted. + """ + pass + + +class _PathParents(Sequence): + """This object provides sequence-like access to the logical ancestors + of a path. Don't try to construct it yourself.""" + __slots__ = ('_path', '_drv', '_root', '_tail') + + def __init__(self, path): + self._path = path + self._drv = path.drive + self._root = path.root + self._tail = path._tail + + def __len__(self): + return len(self._tail) + + def __getitem__(self, idx): + if isinstance(idx, slice): + return tuple(self[i] for i in range(*idx.indices(len(self)))) + + if idx >= len(self) or idx < -len(self): + raise IndexError(idx) + if idx < 0: + idx += len(self) + return self._path._from_parsed_parts(self._drv, self._root, + self._tail[:-idx - 1]) + + def __repr__(self): + return "<{}.parents>".format(type(self._path).__name__) + + +class PurePath: + """Base class for manipulating paths without I/O. + + PurePath represents a filesystem path and offers operations which + don't imply any actual filesystem I/O. Depending on your system, + instantiating a PurePath will return either a PurePosixPath or a + PureWindowsPath object. You can also instantiate either of these classes + directly, regardless of your system. + """ + + __slots__ = ( + # The `_raw_paths` slot stores unjoined string paths. This is set in + # the `__init__()` method. + '_raw_paths', + + # The `_drv`, `_root` and `_tail_cached` slots store parsed and + # normalized parts of the path. They are set when any of the `drive`, + # `root` or `_tail` properties are accessed for the first time. The + # three-part division corresponds to the result of + # `os.path.splitroot()`, except that the tail is further split on path + # separators (i.e. it is a list of strings), and that the root and + # tail are normalized. + '_drv', '_root', '_tail_cached', + + # The `_str` slot stores the string representation of the path, + # computed from the drive, root and tail when `__str__()` is called + # for the first time. It's used to implement `_str_normcase` + '_str', + + # The `_str_normcase_cached` slot stores the string path with + # normalized case. It is set when the `_str_normcase` property is + # accessed for the first time. It's used to implement `__eq__()` + # `__hash__()`, and `_parts_normcase` + '_str_normcase_cached', + + # The `_parts_normcase_cached` slot stores the case-normalized + # string path after splitting on path separators. It's set when the + # `_parts_normcase` property is accessed for the first time. It's used + # to implement comparison methods like `__lt__()`. + '_parts_normcase_cached', + + # The `_hash` slot stores the hash of the case-normalized string + # path. It's set when `__hash__()` is called for the first time. + '_hash', + ) + parser = os.path + + def __new__(cls, *args, **kwargs): + """Construct a PurePath from one or several strings and or existing + PurePath objects. The strings and path objects are combined so as + to yield a canonicalized path, which is incorporated into the + new PurePath object. + """ + if cls is PurePath: + cls = PureWindowsPath if os.name == 'nt' else PurePosixPath + return object.__new__(cls) + + def __init__(self, *args): + paths = [] + for arg in args: + if isinstance(arg, PurePath): + if arg.parser is not self.parser: + # GH-103631: Convert separators for backwards compatibility. + paths.append(arg.as_posix()) + else: + paths.extend(arg._raw_paths) + else: + try: + path = os.fspath(arg) + except TypeError: + path = arg + if not isinstance(path, str): + raise TypeError( + "argument should be a str or an os.PathLike " + "object where __fspath__ returns a str, " + f"not {type(path).__name__!r}") + paths.append(path) + self._raw_paths = paths + + def with_segments(self, *pathsegments): + """Construct a new path object from any number of path-like objects. + Subclasses may override this method to customize how new path objects + are created from methods like `iterdir()`. + """ + return type(self)(*pathsegments) + + def joinpath(self, *pathsegments): + """Combine this path with one or several arguments, and return a + new path representing either a subpath (if all arguments are relative + paths) or a totally different path (if one of the arguments is + anchored). + """ + return self.with_segments(self, *pathsegments) + + def __truediv__(self, key): + try: + return self.with_segments(self, key) + except TypeError: + return NotImplemented + + def __rtruediv__(self, key): + try: + return self.with_segments(key, self) + except TypeError: + return NotImplemented + + def __reduce__(self): + return self.__class__, tuple(self._raw_paths) + + def __repr__(self): + return "{}({!r})".format(self.__class__.__name__, self.as_posix()) + + def __fspath__(self): + return str(self) + + def __bytes__(self): + """Return the bytes representation of the path. This is only + recommended to use under Unix.""" + return os.fsencode(self) + + @property + def _str_normcase(self): + # String with normalized case, for hashing and equality checks + try: + return self._str_normcase_cached + except AttributeError: + if self.parser is posixpath: + self._str_normcase_cached = str(self) + else: + self._str_normcase_cached = str(self).lower() + return self._str_normcase_cached + + def __hash__(self): + try: + return self._hash + except AttributeError: + self._hash = hash(self._str_normcase) + return self._hash + + def __eq__(self, other): + if not isinstance(other, PurePath): + return NotImplemented + return self._str_normcase == other._str_normcase and self.parser is other.parser + + @property + def _parts_normcase(self): + # Cached parts with normalized case, for comparisons. + try: + return self._parts_normcase_cached + except AttributeError: + self._parts_normcase_cached = self._str_normcase.split(self.parser.sep) + return self._parts_normcase_cached + + def __lt__(self, other): + if not isinstance(other, PurePath) or self.parser is not other.parser: + return NotImplemented + return self._parts_normcase < other._parts_normcase + + def __le__(self, other): + if not isinstance(other, PurePath) or self.parser is not other.parser: + return NotImplemented + return self._parts_normcase <= other._parts_normcase + + def __gt__(self, other): + if not isinstance(other, PurePath) or self.parser is not other.parser: + return NotImplemented + return self._parts_normcase > other._parts_normcase + + def __ge__(self, other): + if not isinstance(other, PurePath) or self.parser is not other.parser: + return NotImplemented + return self._parts_normcase >= other._parts_normcase + + def __str__(self): + """Return the string representation of the path, suitable for + passing to system calls.""" + try: + return self._str + except AttributeError: + self._str = self._format_parsed_parts(self.drive, self.root, + self._tail) or '.' + return self._str + + @classmethod + def _format_parsed_parts(cls, drv, root, tail): + if drv or root: + return drv + root + cls.parser.sep.join(tail) + elif tail and cls.parser.splitdrive(tail[0])[0]: + tail = ['.'] + tail + return cls.parser.sep.join(tail) + + def _from_parsed_parts(self, drv, root, tail): + path = self._from_parsed_string(self._format_parsed_parts(drv, root, tail)) + path._drv = drv + path._root = root + path._tail_cached = tail + return path + + def _from_parsed_string(self, path_str): + path = self.with_segments(path_str) + path._str = path_str or '.' + return path + + @classmethod + def _parse_path(cls, path): + if not path: + return '', '', [] + sep = cls.parser.sep + altsep = cls.parser.altsep + if altsep: + path = path.replace(altsep, sep) + drv, root, rel = cls.parser.splitroot(path) + if not root and drv.startswith(sep) and not drv.endswith(sep): + drv_parts = drv.split(sep) + if len(drv_parts) == 4 and drv_parts[2] not in '?.': + # e.g. //server/share + root = sep + elif len(drv_parts) == 6: + # e.g. //?/unc/server/share + root = sep + return drv, root, [x for x in rel.split(sep) if x and x != '.'] + + @classmethod + def _parse_pattern(cls, pattern): + """Parse a glob pattern to a list of parts. This is much like + _parse_path, except: + + - Rather than normalizing and returning the drive and root, we raise + NotImplementedError if either are present. + - If the path has no real parts, we raise ValueError. + - If the path ends in a slash, then a final empty part is added. + """ + drv, root, rel = cls.parser.splitroot(pattern) + if root or drv: + raise NotImplementedError("Non-relative patterns are unsupported") + sep = cls.parser.sep + altsep = cls.parser.altsep + if altsep: + rel = rel.replace(altsep, sep) + parts = [x for x in rel.split(sep) if x and x != '.'] + if not parts: + raise ValueError(f"Unacceptable pattern: {str(pattern)!r}") + elif rel.endswith(sep): + # GH-65238: preserve trailing slash in glob patterns. + parts.append('') + return parts + + def as_posix(self): + """Return the string representation of the path with forward (/) + slashes.""" + return str(self).replace(self.parser.sep, '/') + + @property + def _raw_path(self): + paths = self._raw_paths + if len(paths) == 1: + return paths[0] + elif paths: + # Join path segments from the initializer. + return self.parser.join(*paths) + else: + return '' + + @property + def drive(self): + """The drive prefix (letter or UNC path), if any.""" + try: + return self._drv + except AttributeError: + self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) + return self._drv + + @property + def root(self): + """The root of the path, if any.""" + try: + return self._root + except AttributeError: + self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) + return self._root + + @property + def _tail(self): + try: + return self._tail_cached + except AttributeError: + self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) + return self._tail_cached + + @property + def anchor(self): + """The concatenation of the drive and root, or ''.""" + return self.drive + self.root + + @property + def parts(self): + """An object providing sequence-like access to the + components in the filesystem path.""" + if self.drive or self.root: + return (self.drive + self.root,) + tuple(self._tail) + else: + return tuple(self._tail) + + @property + def parent(self): + """The logical parent of the path.""" + drv = self.drive + root = self.root + tail = self._tail + if not tail: + return self + return self._from_parsed_parts(drv, root, tail[:-1]) + + @property + def parents(self): + """A sequence of this path's logical parents.""" + # The value of this property should not be cached on the path object, + # as doing so would introduce a reference cycle. + return _PathParents(self) + + @property + def name(self): + """The final path component, if any.""" + tail = self._tail + if not tail: + return '' + return tail[-1] + + def with_name(self, name): + """Return a new path with the file name changed.""" + p = self.parser + if not name or p.sep in name or (p.altsep and p.altsep in name) or name == '.': + raise ValueError(f"Invalid name {name!r}") + tail = self._tail.copy() + if not tail: + raise ValueError(f"{self!r} has an empty name") + tail[-1] = name + return self._from_parsed_parts(self.drive, self.root, tail) + + def with_stem(self, stem): + """Return a new path with the stem changed.""" + suffix = self.suffix + if not suffix: + return self.with_name(stem) + elif not stem: + # If the suffix is non-empty, we can't make the stem empty. + raise ValueError(f"{self!r} has a non-empty suffix") + else: + return self.with_name(stem + suffix) + + def with_suffix(self, suffix): + """Return a new path with the file suffix changed. If the path + has no suffix, add given suffix. If the given suffix is an empty + string, remove the suffix from the path. + """ + stem = self.stem + if not stem: + # If the stem is empty, we can't make the suffix non-empty. + raise ValueError(f"{self!r} has an empty name") + elif suffix and not suffix.startswith('.'): + raise ValueError(f"Invalid suffix {suffix!r}") + else: + return self.with_name(stem + suffix) + + @property + def stem(self): + """The final path component, minus its last suffix.""" + name = self.name + i = name.rfind('.') + if i != -1: + stem = name[:i] + # Stem must contain at least one non-dot character. + if stem.lstrip('.'): + return stem + return name + + @property + def suffix(self): + """ + The final component's last suffix, if any. + + This includes the leading period. For example: '.txt' + """ + name = self.name.lstrip('.') + i = name.rfind('.') + if i != -1: + return name[i:] + return '' + + @property + def suffixes(self): + """ + A list of the final component's suffixes, if any. + + These include the leading periods. For example: ['.tar', '.gz'] + """ + return ['.' + ext for ext in self.name.lstrip('.').split('.')[1:]] + + def relative_to(self, other, *, walk_up=False): + """Return the relative path to another path identified by the passed + arguments. If the operation is not possible (because this is not + related to the other path), raise ValueError. + + The *walk_up* parameter controls whether `..` may be used to resolve + the path. + """ + if not hasattr(other, 'with_segments'): + other = self.with_segments(other) + for step, path in enumerate(chain([other], other.parents)): + if path == self or path in self.parents: + break + elif not walk_up: + raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}") + elif path.name == '..': + raise ValueError(f"'..' segment in {str(other)!r} cannot be walked") + else: + raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors") + parts = ['..'] * step + self._tail[len(path._tail):] + return self._from_parsed_parts('', '', parts) + + def is_relative_to(self, other): + """Return True if the path is relative to another path or False. + """ + if not hasattr(other, 'with_segments'): + other = self.with_segments(other) + return other == self or other in self.parents + + def is_absolute(self): + """True if the path is absolute (has both a root and, if applicable, + a drive).""" + if self.parser is posixpath: + # Optimization: work with raw paths on POSIX. + for path in self._raw_paths: + if path.startswith('/'): + return True + return False + return self.parser.isabs(self) + + def is_reserved(self): + """Return True if the path contains one of the special names reserved + by the system, if any.""" + import warnings + msg = ("pathlib.PurePath.is_reserved() is deprecated and scheduled " + "for removal in Python 3.15. Use os.path.isreserved() to " + "detect reserved paths on Windows.") + warnings._deprecated("pathlib.PurePath.is_reserved", msg, remove=(3, 15)) + if self.parser is ntpath: + return self.parser.isreserved(self) + return False + + def as_uri(self): + """Return the path as a URI.""" + import warnings + msg = ("pathlib.PurePath.as_uri() is deprecated and scheduled " + "for removal in Python 3.19. Use pathlib.Path.as_uri().") + warnings._deprecated("pathlib.PurePath.as_uri", msg, remove=(3, 19)) + if not self.is_absolute(): + raise ValueError("relative path can't be expressed as a file URI") + + drive = self.drive + if len(drive) == 2 and drive[1] == ':': + # It's a path on a local drive => 'file:///c:/a/b' + prefix = 'file:///' + drive + path = self.as_posix()[2:] + elif drive: + # It's a path on a network drive => 'file://host/share/a/b' + prefix = 'file:' + path = self.as_posix() + else: + # It's a posix path => 'file:///etc/hosts' + prefix = 'file://' + path = str(self) + from urllib.parse import quote_from_bytes + return prefix + quote_from_bytes(os.fsencode(path)) + + def full_match(self, pattern, *, case_sensitive=None): + """ + Return True if this path matches the given glob-style pattern. The + pattern is matched against the entire path. + """ + if not hasattr(pattern, 'with_segments'): + pattern = self.with_segments(pattern) + if case_sensitive is None: + case_sensitive = self.parser is posixpath + + # The string representation of an empty path is a single dot ('.'). Empty + # paths shouldn't match wildcards, so we change it to the empty string. + path = str(self) if self.parts else '' + pattern = str(pattern) if pattern.parts else '' + globber = _StringGlobber(self.parser.sep, case_sensitive, recursive=True) + return globber.compile(pattern)(path) is not None + + def match(self, path_pattern, *, case_sensitive=None): + """ + Return True if this path matches the given pattern. If the pattern is + relative, matching is done from the right; otherwise, the entire path + is matched. The recursive wildcard '**' is *not* supported by this + method. + """ + if not hasattr(path_pattern, 'with_segments'): + path_pattern = self.with_segments(path_pattern) + if case_sensitive is None: + case_sensitive = self.parser is posixpath + path_parts = self.parts[::-1] + pattern_parts = path_pattern.parts[::-1] + if not pattern_parts: + raise ValueError("empty pattern") + if len(path_parts) < len(pattern_parts): + return False + if len(path_parts) > len(pattern_parts) and path_pattern.anchor: + return False + globber = _StringGlobber(self.parser.sep, case_sensitive) + for path_part, pattern_part in zip(path_parts, pattern_parts): + match = globber.compile(pattern_part) + if match(path_part) is None: + return False + return True + +# Subclassing os.PathLike makes isinstance() checks slower, +# which in turn makes Path construction slower. Register instead! +os.PathLike.register(PurePath) + + +class PurePosixPath(PurePath): + """PurePath subclass for non-Windows systems. + + On a POSIX system, instantiating a PurePath should return this object. + However, you can also instantiate it directly on any system. + """ + parser = posixpath + __slots__ = () + + +class PureWindowsPath(PurePath): + """PurePath subclass for Windows systems. + + On a Windows system, instantiating a PurePath should return this object. + However, you can also instantiate it directly on any system. + """ + parser = ntpath + __slots__ = () + + +class Path(PurePath): + """PurePath subclass that can make system calls. + + Path represents a filesystem path but unlike PurePath, also offers + methods to do system calls on path objects. Depending on your system, + instantiating a Path will return either a PosixPath or a WindowsPath + object. You can also instantiate a PosixPath or WindowsPath directly, + but cannot instantiate a WindowsPath on a POSIX system or vice versa. + """ + __slots__ = ('_info',) + + def __new__(cls, *args, **kwargs): + if cls is Path: + cls = WindowsPath if os.name == 'nt' else PosixPath + return object.__new__(cls) + + @property + def info(self): + """ + A PathInfo object that exposes the file type and other file attributes + of this path. + """ + try: + return self._info + except AttributeError: + self._info = PathInfo(self) + return self._info + + def stat(self, *, follow_symlinks=True): + """ + Return the result of the stat() system call on this path, like + os.stat() does. + """ + return os.stat(self, follow_symlinks=follow_symlinks) + + def lstat(self): + """ + Like stat(), except if the path points to a symlink, the symlink's + status information is returned, rather than its target's. + """ + return os.lstat(self) + + def exists(self, *, follow_symlinks=True): + """ + Whether this path exists. + + This method normally follows symlinks; to check whether a symlink exists, + add the argument follow_symlinks=False. + """ + if follow_symlinks: + return os.path.exists(self) + return os.path.lexists(self) + + def is_dir(self, *, follow_symlinks=True): + """ + Whether this path is a directory. + """ + if follow_symlinks: + return os.path.isdir(self) + try: + return S_ISDIR(self.stat(follow_symlinks=follow_symlinks).st_mode) + except (OSError, ValueError): + return False + + def is_file(self, *, follow_symlinks=True): + """ + Whether this path is a regular file (also True for symlinks pointing + to regular files). + """ + if follow_symlinks: + return os.path.isfile(self) + try: + return S_ISREG(self.stat(follow_symlinks=follow_symlinks).st_mode) + except (OSError, ValueError): + return False + + def is_mount(self): + """ + Check if this path is a mount point + """ + return os.path.ismount(self) + + def is_symlink(self): + """ + Whether this path is a symbolic link. + """ + return os.path.islink(self) + + def is_junction(self): + """ + Whether this path is a junction. + """ + return os.path.isjunction(self) + + def is_block_device(self): + """ + Whether this path is a block device. + """ + try: + return S_ISBLK(self.stat().st_mode) + except (OSError, ValueError): + return False + + def is_char_device(self): + """ + Whether this path is a character device. + """ + try: + return S_ISCHR(self.stat().st_mode) + except (OSError, ValueError): + return False + + def is_fifo(self): + """ + Whether this path is a FIFO. + """ + try: + return S_ISFIFO(self.stat().st_mode) + except (OSError, ValueError): + return False + + def is_socket(self): + """ + Whether this path is a socket. + """ + try: + return S_ISSOCK(self.stat().st_mode) + except (OSError, ValueError): + return False + + def samefile(self, other_path): + """Return whether other_path is the same or not as this file + (as returned by os.path.samefile()). + """ + st = self.stat() + try: + other_st = other_path.stat() + except AttributeError: + other_st = self.with_segments(other_path).stat() + return (st.st_ino == other_st.st_ino and + st.st_dev == other_st.st_dev) + + def open(self, mode='r', buffering=-1, encoding=None, + errors=None, newline=None): + """ + Open the file pointed to by this path and return a file object, as + the built-in open() function does. + """ + if "b" not in mode: + encoding = io.text_encoding(encoding) + return io.open(self, mode, buffering, encoding, errors, newline) + + def read_bytes(self): + """ + Open the file in bytes mode, read it, and close the file. + """ + with self.open(mode='rb', buffering=0) as f: + return f.read() + + def read_text(self, encoding=None, errors=None, newline=None): + """ + Open the file in text mode, read it, and close the file. + """ + # Call io.text_encoding() here to ensure any warning is raised at an + # appropriate stack level. + encoding = io.text_encoding(encoding) + with self.open(mode='r', encoding=encoding, errors=errors, newline=newline) as f: + return f.read() + + def write_bytes(self, data): + """ + Open the file in bytes mode, write to it, and close the file. + """ + # type-check for the buffer interface before truncating the file + view = memoryview(data) + with self.open(mode='wb') as f: + return f.write(view) + + def write_text(self, data, encoding=None, errors=None, newline=None): + """ + Open the file in text mode, write to it, and close the file. + """ + # Call io.text_encoding() here to ensure any warning is raised at an + # appropriate stack level. + encoding = io.text_encoding(encoding) + if not isinstance(data, str): + raise TypeError('data must be str, not %s' % + data.__class__.__name__) + with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: + return f.write(data) + + _remove_leading_dot = operator.itemgetter(slice(2, None)) + _remove_trailing_slash = operator.itemgetter(slice(-1)) + + def _filter_trailing_slash(self, paths): + sep = self.parser.sep + anchor_len = len(self.anchor) + for path_str in paths: + if len(path_str) > anchor_len and path_str[-1] == sep: + path_str = path_str[:-1] + yield path_str + + def _from_dir_entry(self, dir_entry, path_str): + path = self.with_segments(path_str) + path._str = path_str + path._info = DirEntryInfo(dir_entry) + return path + + def iterdir(self): + """Yield path objects of the directory contents. + + The children are yielded in arbitrary order, and the + special entries '.' and '..' are not included. + """ + root_dir = str(self) + with os.scandir(root_dir) as scandir_it: + entries = list(scandir_it) + if root_dir == '.': + return (self._from_dir_entry(e, e.name) for e in entries) + else: + return (self._from_dir_entry(e, e.path) for e in entries) + + def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): + """Iterate over this subtree and yield all existing files (of any + kind, including directories) matching the given relative pattern. + """ + sys.audit("pathlib.Path.glob", self, pattern) + if case_sensitive is None: + case_sensitive = self.parser is posixpath + case_pedantic = False + else: + # The user has expressed a case sensitivity choice, but we don't + # know the case sensitivity of the underlying filesystem, so we + # must use scandir() for everything, including non-wildcard parts. + case_pedantic = True + parts = self._parse_pattern(pattern) + recursive = True if recurse_symlinks else _no_recurse_symlinks + globber = _StringGlobber(self.parser.sep, case_sensitive, case_pedantic, recursive) + select = globber.selector(parts[::-1]) + root = str(self) + paths = select(self.parser.join(root, '')) + + # Normalize results + if root == '.': + paths = map(self._remove_leading_dot, paths) + if parts[-1] == '': + paths = map(self._remove_trailing_slash, paths) + elif parts[-1] == '**': + paths = self._filter_trailing_slash(paths) + paths = map(self._from_parsed_string, paths) + return paths + + def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): + """Recursively yield all existing files (of any kind, including + directories) matching the given relative pattern, anywhere in + this subtree. + """ + sys.audit("pathlib.Path.rglob", self, pattern) + pattern = self.parser.join('**', pattern) + return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks) + + def walk(self, top_down=True, on_error=None, follow_symlinks=False): + """Walk the directory tree from this directory, similar to os.walk().""" + sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) + root_dir = str(self) + if not follow_symlinks: + follow_symlinks = os._walk_symlinks_as_files + results = os.walk(root_dir, top_down, on_error, follow_symlinks) + for path_str, dirnames, filenames in results: + if root_dir == '.': + path_str = path_str[2:] + yield self._from_parsed_string(path_str), dirnames, filenames + + def absolute(self): + """Return an absolute version of this path + No normalization or symlink resolution is performed. + + Use resolve() to resolve symlinks and remove '..' segments. + """ + if self.is_absolute(): + return self + if self.root: + drive = os.path.splitroot(os.getcwd())[0] + return self._from_parsed_parts(drive, self.root, self._tail) + if self.drive: + # There is a CWD on each drive-letter drive. + cwd = os.path.abspath(self.drive) + else: + cwd = os.getcwd() + if not self._tail: + # Fast path for "empty" paths, e.g. Path("."), Path("") or Path(). + # We pass only one argument to with_segments() to avoid the cost + # of joining, and we exploit the fact that getcwd() returns a + # fully-normalized string by storing it in _str. This is used to + # implement Path.cwd(). + return self._from_parsed_string(cwd) + drive, root, rel = os.path.splitroot(cwd) + if not rel: + return self._from_parsed_parts(drive, root, self._tail) + tail = rel.split(self.parser.sep) + tail.extend(self._tail) + return self._from_parsed_parts(drive, root, tail) + + @classmethod + def cwd(cls): + """Return a new path pointing to the current working directory.""" + cwd = os.getcwd() + path = cls(cwd) + path._str = cwd # getcwd() returns a normalized path + return path + + def resolve(self, strict=False): + """ + Make the path absolute, resolving all symlinks on the way and also + normalizing it. + """ + + return self.with_segments(os.path.realpath(self, strict=strict)) + + if pwd: + def owner(self, *, follow_symlinks=True): + """ + Return the login name of the file owner. + """ + uid = self.stat(follow_symlinks=follow_symlinks).st_uid + return pwd.getpwuid(uid).pw_name + else: + def owner(self, *, follow_symlinks=True): + """ + Return the login name of the file owner. + """ + f = f"{type(self).__name__}.owner()" + raise UnsupportedOperation(f"{f} is unsupported on this system") + + if grp: + def group(self, *, follow_symlinks=True): + """ + Return the group name of the file gid. + """ + gid = self.stat(follow_symlinks=follow_symlinks).st_gid + return grp.getgrgid(gid).gr_name + else: + def group(self, *, follow_symlinks=True): + """ + Return the group name of the file gid. + """ + f = f"{type(self).__name__}.group()" + raise UnsupportedOperation(f"{f} is unsupported on this system") + + if hasattr(os, "readlink"): + def readlink(self): + """ + Return the path to which the symbolic link points. + """ + return self.with_segments(os.readlink(self)) + else: + def readlink(self): + """ + Return the path to which the symbolic link points. + """ + f = f"{type(self).__name__}.readlink()" + raise UnsupportedOperation(f"{f} is unsupported on this system") + + def touch(self, mode=0o666, exist_ok=True): + """ + Create this file with the given access mode, if it doesn't exist. + """ + + if exist_ok: + # First try to bump modification time + # Implementation note: GNU touch uses the UTIME_NOW option of + # the utimensat() / futimens() functions. + try: + os.utime(self, None) + except OSError: + # Avoid exception chaining + pass + else: + return + flags = os.O_CREAT | os.O_WRONLY + if not exist_ok: + flags |= os.O_EXCL + fd = os.open(self, flags, mode) + os.close(fd) + + def mkdir(self, mode=0o777, parents=False, exist_ok=False): + """ + Create a new directory at this given path. + """ + try: + os.mkdir(self, mode) + except FileNotFoundError: + if not parents or self.parent == self: + raise + self.parent.mkdir(parents=True, exist_ok=True) + self.mkdir(mode, parents=False, exist_ok=exist_ok) + except OSError: + # Cannot rely on checking for EEXIST, since the operating system + # could give priority to other errors like EACCES or EROFS + if not exist_ok or not self.is_dir(): + raise + + def chmod(self, mode, *, follow_symlinks=True): + """ + Change the permissions of the path, like os.chmod(). + """ + os.chmod(self, mode, follow_symlinks=follow_symlinks) + + def lchmod(self, mode): + """ + Like chmod(), except if the path points to a symlink, the symlink's + permissions are changed, rather than its target's. + """ + self.chmod(mode, follow_symlinks=False) + + def unlink(self, missing_ok=False): + """ + Remove this file or link. + If the path is a directory, use rmdir() instead. + """ + try: + os.unlink(self) + except FileNotFoundError: + if not missing_ok: + raise + + def rmdir(self): + """ + Remove this directory. The directory must be empty. + """ + os.rmdir(self) + + def _delete(self): + """ + Delete this file or directory (including all sub-directories). + """ + if self.is_symlink() or self.is_junction(): + self.unlink() + elif self.is_dir(): + # Lazy import to improve module import time + import shutil + shutil.rmtree(self) + else: + self.unlink() + + def rename(self, target): + """ + Rename this path to the target path. + + The target path may be absolute or relative. Relative paths are + interpreted relative to the current working directory, *not* the + directory of the Path object. + + Returns the new Path instance pointing to the target path. + """ + os.rename(self, target) + if not hasattr(target, 'with_segments'): + target = self.with_segments(target) + return target + + def replace(self, target): + """ + Rename this path to the target path, overwriting if that path exists. + + The target path may be absolute or relative. Relative paths are + interpreted relative to the current working directory, *not* the + directory of the Path object. + + Returns the new Path instance pointing to the target path. + """ + os.replace(self, target) + if not hasattr(target, 'with_segments'): + target = self.with_segments(target) + return target + + def copy(self, target, **kwargs): + """ + Recursively copy this file or directory tree to the given destination. + """ + if not hasattr(target, 'with_segments'): + target = self.with_segments(target) + ensure_distinct_paths(self, target) + target._copy_from(self, **kwargs) + return target.joinpath() # Empty join to ensure fresh metadata. + + def copy_into(self, target_dir, **kwargs): + """ + Copy this file or directory tree into the given existing directory. + """ + name = self.name + if not name: + raise ValueError(f"{self!r} has an empty name") + elif hasattr(target_dir, 'with_segments'): + target = target_dir / name + else: + target = self.with_segments(target_dir, name) + return self.copy(target, **kwargs) + + def _copy_from(self, source, follow_symlinks=True, preserve_metadata=False): + """ + Recursively copy the given path to this path. + """ + if not follow_symlinks and source.info.is_symlink(): + self._copy_from_symlink(source, preserve_metadata) + elif source.info.is_dir(): + children = source.iterdir() + os.mkdir(self) + for child in children: + self.joinpath(child.name)._copy_from( + child, follow_symlinks, preserve_metadata) + if preserve_metadata: + copy_info(source.info, self) + else: + self._copy_from_file(source, preserve_metadata) + + def _copy_from_file(self, source, preserve_metadata=False): + ensure_different_files(source, self) + with magic_open(source, 'rb') as source_f: + with open(self, 'wb') as target_f: + copyfileobj(source_f, target_f) + if preserve_metadata: + copy_info(source.info, self) + + if copyfile2: + # Use fast OS routine for local file copying where available. + _copy_from_file_fallback = _copy_from_file + def _copy_from_file(self, source, preserve_metadata=False): + try: + source = os.fspath(source) + except TypeError: + pass + else: + copyfile2(source, str(self)) + return + self._copy_from_file_fallback(source, preserve_metadata) + + if os.name == 'nt': + # If a directory-symlink is copied *before* its target, then + # os.symlink() incorrectly creates a file-symlink on Windows. Avoid + # this by passing *target_is_dir* to os.symlink() on Windows. + def _copy_from_symlink(self, source, preserve_metadata=False): + os.symlink(str(source.readlink()), self, source.info.is_dir()) + if preserve_metadata: + copy_info(source.info, self, follow_symlinks=False) + else: + def _copy_from_symlink(self, source, preserve_metadata=False): + os.symlink(str(source.readlink()), self) + if preserve_metadata: + copy_info(source.info, self, follow_symlinks=False) + + def move(self, target): + """ + Recursively move this file or directory tree to the given destination. + """ + # Use os.replace() if the target is os.PathLike and on the same FS. + try: + target = self.with_segments(target) + except TypeError: + pass + else: + ensure_different_files(self, target) + try: + os.replace(self, target) + except OSError as err: + if err.errno != EXDEV: + raise + else: + return target.joinpath() # Empty join to ensure fresh metadata. + # Fall back to copy+delete. + target = self.copy(target, follow_symlinks=False, preserve_metadata=True) + self._delete() + return target + + def move_into(self, target_dir): + """ + Move this file or directory tree into the given existing directory. + """ + name = self.name + if not name: + raise ValueError(f"{self!r} has an empty name") + elif hasattr(target_dir, 'with_segments'): + target = target_dir / name + else: + target = self.with_segments(target_dir, name) + return self.move(target) + + if hasattr(os, "symlink"): + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the target path. + Note the order of arguments (link, target) is the reverse of os.symlink. + """ + os.symlink(target, self, target_is_directory) + else: + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the target path. + Note the order of arguments (link, target) is the reverse of os.symlink. + """ + f = f"{type(self).__name__}.symlink_to()" + raise UnsupportedOperation(f"{f} is unsupported on this system") + + if hasattr(os, "link"): + def hardlink_to(self, target): + """ + Make this path a hard link pointing to the same file as *target*. + + Note the order of arguments (self, target) is the reverse of os.link's. + """ + os.link(target, self) + else: + def hardlink_to(self, target): + """ + Make this path a hard link pointing to the same file as *target*. + + Note the order of arguments (self, target) is the reverse of os.link's. + """ + f = f"{type(self).__name__}.hardlink_to()" + raise UnsupportedOperation(f"{f} is unsupported on this system") + + def expanduser(self): + """ Return a new path with expanded ~ and ~user constructs + (as returned by os.path.expanduser) + """ + if (not (self.drive or self.root) and + self._tail and self._tail[0][:1] == '~'): + homedir = os.path.expanduser(self._tail[0]) + if homedir[:1] == "~": + raise RuntimeError("Could not determine home directory.") + drv, root, tail = self._parse_path(homedir) + return self._from_parsed_parts(drv, root, tail + self._tail[1:]) + + return self + + @classmethod + def home(cls): + """Return a new path pointing to expanduser('~'). + """ + homedir = os.path.expanduser("~") + if homedir == "~": + raise RuntimeError("Could not determine home directory.") + return cls(homedir) + + def as_uri(self): + """Return the path as a URI.""" + if not self.is_absolute(): + raise ValueError("relative paths can't be expressed as file URIs") + from urllib.request import pathname2url + return pathname2url(str(self), add_scheme=True) + + @classmethod + def from_uri(cls, uri): + """Return a new path from the given 'file' URI.""" + from urllib.error import URLError + from urllib.request import url2pathname + try: + path = cls(url2pathname(uri, require_scheme=True)) + except URLError as exc: + raise ValueError(exc.reason) from None + if not path.is_absolute(): + raise ValueError(f"URI is not absolute: {uri!r}") + return path + + +class PosixPath(Path, PurePosixPath): + """Path subclass for non-Windows systems. + + On a POSIX system, instantiating a Path should return this object. + """ + __slots__ = () + + if os.name == 'nt': + def __new__(cls, *args, **kwargs): + raise UnsupportedOperation( + f"cannot instantiate {cls.__name__!r} on your system") + +class WindowsPath(Path, PureWindowsPath): + """Path subclass for Windows systems. + + On a Windows system, instantiating a Path should return this object. + """ + __slots__ = () + + if os.name != 'nt': + def __new__(cls, *args, **kwargs): + raise UnsupportedOperation( + f"cannot instantiate {cls.__name__!r} on your system") diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py deleted file mode 100644 index 4d24146aa53..00000000000 --- a/Lib/pathlib/_abc.py +++ /dev/null @@ -1,930 +0,0 @@ -""" -Abstract base classes for rich path objects. - -This module is published as a PyPI package called "pathlib-abc". - -This module is also a *PRIVATE* part of the Python standard library, where -it's developed alongside pathlib. If it finds success and maturity as a PyPI -package, it could become a public part of the standard library. - -Two base classes are defined here -- PurePathBase and PathBase -- that -resemble pathlib's PurePath and Path respectively. -""" - -import functools -from glob import _Globber, _no_recurse_symlinks -from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL -from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO - - -__all__ = ["UnsupportedOperation"] - -# -# Internals -# - -_WINERROR_NOT_READY = 21 # drive exists but is not accessible -_WINERROR_INVALID_NAME = 123 # fix for bpo-35306 -_WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself - -# EBADF - guard against macOS `stat` throwing EBADF -_IGNORED_ERRNOS = (ENOENT, ENOTDIR, EBADF, ELOOP) - -_IGNORED_WINERRORS = ( - _WINERROR_NOT_READY, - _WINERROR_INVALID_NAME, - _WINERROR_CANT_RESOLVE_FILENAME) - -def _ignore_error(exception): - return (getattr(exception, 'errno', None) in _IGNORED_ERRNOS or - getattr(exception, 'winerror', None) in _IGNORED_WINERRORS) - - -@functools.cache -def _is_case_sensitive(parser): - return parser.normcase('Aa') == 'Aa' - - -class UnsupportedOperation(NotImplementedError): - """An exception that is raised when an unsupported operation is called on - a path object. - """ - pass - - -class ParserBase: - """Base class for path parsers, which do low-level path manipulation. - - Path parsers provide a subset of the os.path API, specifically those - functions needed to provide PurePathBase functionality. Each PurePathBase - subclass references its path parser via a 'parser' class attribute. - - Every method in this base class raises an UnsupportedOperation exception. - """ - - @classmethod - def _unsupported_msg(cls, attribute): - return f"{cls.__name__}.{attribute} is unsupported" - - @property - def sep(self): - """The character used to separate path components.""" - raise UnsupportedOperation(self._unsupported_msg('sep')) - - def join(self, path, *paths): - """Join path segments.""" - raise UnsupportedOperation(self._unsupported_msg('join()')) - - def split(self, path): - """Split the path into a pair (head, tail), where *head* is everything - before the final path separator, and *tail* is everything after. - Either part may be empty. - """ - raise UnsupportedOperation(self._unsupported_msg('split()')) - - def splitdrive(self, path): - """Split the path into a 2-item tuple (drive, tail), where *drive* is - a device name or mount point, and *tail* is everything after the - drive. Either part may be empty.""" - raise UnsupportedOperation(self._unsupported_msg('splitdrive()')) - - def normcase(self, path): - """Normalize the case of the path.""" - raise UnsupportedOperation(self._unsupported_msg('normcase()')) - - def isabs(self, path): - """Returns whether the path is absolute, i.e. unaffected by the - current directory or drive.""" - raise UnsupportedOperation(self._unsupported_msg('isabs()')) - - -class PurePathBase: - """Base class for pure path objects. - - This class *does not* provide several magic methods that are defined in - its subclass PurePath. They are: __fspath__, __bytes__, __reduce__, - __hash__, __eq__, __lt__, __le__, __gt__, __ge__. Its initializer and path - joining methods accept only strings, not os.PathLike objects more broadly. - """ - - __slots__ = ( - # The `_raw_path` slot store a joined string path. This is set in the - # `__init__()` method. - '_raw_path', - - # The '_resolving' slot stores a boolean indicating whether the path - # is being processed by `PathBase.resolve()`. This prevents duplicate - # work from occurring when `resolve()` calls `stat()` or `readlink()`. - '_resolving', - ) - parser = ParserBase() - _globber = _Globber - - def __init__(self, path, *paths): - self._raw_path = self.parser.join(path, *paths) if paths else path - if not isinstance(self._raw_path, str): - raise TypeError( - f"path should be a str, not {type(self._raw_path).__name__!r}") - self._resolving = False - - def with_segments(self, *pathsegments): - """Construct a new path object from any number of path-like objects. - Subclasses may override this method to customize how new path objects - are created from methods like `iterdir()`. - """ - return type(self)(*pathsegments) - - def __str__(self): - """Return the string representation of the path, suitable for - passing to system calls.""" - return self._raw_path - - def as_posix(self): - """Return the string representation of the path with forward (/) - slashes.""" - return str(self).replace(self.parser.sep, '/') - - @property - def drive(self): - """The drive prefix (letter or UNC path), if any.""" - return self.parser.splitdrive(self.anchor)[0] - - @property - def root(self): - """The root of the path, if any.""" - return self.parser.splitdrive(self.anchor)[1] - - @property - def anchor(self): - """The concatenation of the drive and root, or ''.""" - return self._stack[0] - - @property - def name(self): - """The final path component, if any.""" - return self.parser.split(self._raw_path)[1] - - @property - def suffix(self): - """ - The final component's last suffix, if any. - - This includes the leading period. For example: '.txt' - """ - name = self.name - i = name.rfind('.') - if 0 < i < len(name) - 1: - return name[i:] - else: - return '' - - @property - def suffixes(self): - """ - A list of the final component's suffixes, if any. - - These include the leading periods. For example: ['.tar', '.gz'] - """ - name = self.name - if name.endswith('.'): - return [] - name = name.lstrip('.') - return ['.' + suffix for suffix in name.split('.')[1:]] - - @property - def stem(self): - """The final path component, minus its last suffix.""" - name = self.name - i = name.rfind('.') - if 0 < i < len(name) - 1: - return name[:i] - else: - return name - - def with_name(self, name): - """Return a new path with the file name changed.""" - split = self.parser.split - if split(name)[0]: - raise ValueError(f"Invalid name {name!r}") - return self.with_segments(split(self._raw_path)[0], name) - - def with_stem(self, stem): - """Return a new path with the stem changed.""" - suffix = self.suffix - if not suffix: - return self.with_name(stem) - elif not stem: - # If the suffix is non-empty, we can't make the stem empty. - raise ValueError(f"{self!r} has a non-empty suffix") - else: - return self.with_name(stem + suffix) - - def with_suffix(self, suffix): - """Return a new path with the file suffix changed. If the path - has no suffix, add given suffix. If the given suffix is an empty - string, remove the suffix from the path. - """ - stem = self.stem - if not stem: - # If the stem is empty, we can't make the suffix non-empty. - raise ValueError(f"{self!r} has an empty name") - elif suffix and not (suffix.startswith('.') and len(suffix) > 1): - raise ValueError(f"Invalid suffix {suffix!r}") - else: - return self.with_name(stem + suffix) - - def relative_to(self, other, *, walk_up=False): - """Return the relative path to another path identified by the passed - arguments. If the operation is not possible (because this is not - related to the other path), raise ValueError. - - The *walk_up* parameter controls whether `..` may be used to resolve - the path. - """ - if not isinstance(other, PurePathBase): - other = self.with_segments(other) - anchor0, parts0 = self._stack - anchor1, parts1 = other._stack - if anchor0 != anchor1: - raise ValueError(f"{self._raw_path!r} and {other._raw_path!r} have different anchors") - while parts0 and parts1 and parts0[-1] == parts1[-1]: - parts0.pop() - parts1.pop() - for part in parts1: - if not part or part == '.': - pass - elif not walk_up: - raise ValueError(f"{self._raw_path!r} is not in the subpath of {other._raw_path!r}") - elif part == '..': - raise ValueError(f"'..' segment in {other._raw_path!r} cannot be walked") - else: - parts0.append('..') - return self.with_segments('', *reversed(parts0)) - - def is_relative_to(self, other): - """Return True if the path is relative to another path or False. - """ - if not isinstance(other, PurePathBase): - other = self.with_segments(other) - anchor0, parts0 = self._stack - anchor1, parts1 = other._stack - if anchor0 != anchor1: - return False - while parts0 and parts1 and parts0[-1] == parts1[-1]: - parts0.pop() - parts1.pop() - for part in parts1: - if part and part != '.': - return False - return True - - @property - def parts(self): - """An object providing sequence-like access to the - components in the filesystem path.""" - anchor, parts = self._stack - if anchor: - parts.append(anchor) - return tuple(reversed(parts)) - - def joinpath(self, *pathsegments): - """Combine this path with one or several arguments, and return a - new path representing either a subpath (if all arguments are relative - paths) or a totally different path (if one of the arguments is - anchored). - """ - return self.with_segments(self._raw_path, *pathsegments) - - def __truediv__(self, key): - try: - return self.with_segments(self._raw_path, key) - except TypeError: - return NotImplemented - - def __rtruediv__(self, key): - try: - return self.with_segments(key, self._raw_path) - except TypeError: - return NotImplemented - - @property - def _stack(self): - """ - Split the path into a 2-tuple (anchor, parts), where *anchor* is the - uppermost parent of the path (equivalent to path.parents[-1]), and - *parts* is a reversed list of parts following the anchor. - """ - split = self.parser.split - path = self._raw_path - parent, name = split(path) - names = [] - while path != parent: - names.append(name) - path = parent - parent, name = split(path) - return path, names - - @property - def parent(self): - """The logical parent of the path.""" - path = self._raw_path - parent = self.parser.split(path)[0] - if path != parent: - parent = self.with_segments(parent) - parent._resolving = self._resolving - return parent - return self - - @property - def parents(self): - """A sequence of this path's logical parents.""" - split = self.parser.split - path = self._raw_path - parent = split(path)[0] - parents = [] - while path != parent: - parents.append(self.with_segments(parent)) - path = parent - parent = split(path)[0] - return tuple(parents) - - def is_absolute(self): - """True if the path is absolute (has both a root and, if applicable, - a drive).""" - return self.parser.isabs(self._raw_path) - - @property - def _pattern_str(self): - """The path expressed as a string, for use in pattern-matching.""" - return str(self) - - def match(self, path_pattern, *, case_sensitive=None): - """ - Return True if this path matches the given pattern. If the pattern is - relative, matching is done from the right; otherwise, the entire path - is matched. The recursive wildcard '**' is *not* supported by this - method. - """ - if not isinstance(path_pattern, PurePathBase): - path_pattern = self.with_segments(path_pattern) - if case_sensitive is None: - case_sensitive = _is_case_sensitive(self.parser) - sep = path_pattern.parser.sep - path_parts = self.parts[::-1] - pattern_parts = path_pattern.parts[::-1] - if not pattern_parts: - raise ValueError("empty pattern") - if len(path_parts) < len(pattern_parts): - return False - if len(path_parts) > len(pattern_parts) and path_pattern.anchor: - return False - globber = self._globber(sep, case_sensitive) - for path_part, pattern_part in zip(path_parts, pattern_parts): - match = globber.compile(pattern_part) - if match(path_part) is None: - return False - return True - - def full_match(self, pattern, *, case_sensitive=None): - """ - Return True if this path matches the given glob-style pattern. The - pattern is matched against the entire path. - """ - if not isinstance(pattern, PurePathBase): - pattern = self.with_segments(pattern) - if case_sensitive is None: - case_sensitive = _is_case_sensitive(self.parser) - globber = self._globber(pattern.parser.sep, case_sensitive, recursive=True) - match = globber.compile(pattern._pattern_str) - return match(self._pattern_str) is not None - - - -class PathBase(PurePathBase): - """Base class for concrete path objects. - - This class provides dummy implementations for many methods that derived - classes can override selectively; the default implementations raise - UnsupportedOperation. The most basic methods, such as stat() and open(), - directly raise UnsupportedOperation; these basic methods are called by - other methods such as is_dir() and read_text(). - - The Path class derives this class to implement local filesystem paths. - Users may derive their own classes to implement virtual filesystem paths, - such as paths in archive files or on remote storage systems. - """ - __slots__ = () - - # Maximum number of symlinks to follow in resolve() - _max_symlinks = 40 - - @classmethod - def _unsupported_msg(cls, attribute): - return f"{cls.__name__}.{attribute} is unsupported" - - def stat(self, *, follow_symlinks=True): - """ - Return the result of the stat() system call on this path, like - os.stat() does. - """ - raise UnsupportedOperation(self._unsupported_msg('stat()')) - - def lstat(self): - """ - Like stat(), except if the path points to a symlink, the symlink's - status information is returned, rather than its target's. - """ - return self.stat(follow_symlinks=False) - - - # Convenience functions for querying the stat results - - def exists(self, *, follow_symlinks=True): - """ - Whether this path exists. - - This method normally follows symlinks; to check whether a symlink exists, - add the argument follow_symlinks=False. - """ - try: - self.stat(follow_symlinks=follow_symlinks) - except OSError as e: - if not _ignore_error(e): - raise - return False - except ValueError: - # Non-encodable path - return False - return True - - def is_dir(self, *, follow_symlinks=True): - """ - Whether this path is a directory. - """ - try: - return S_ISDIR(self.stat(follow_symlinks=follow_symlinks).st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_file(self, *, follow_symlinks=True): - """ - Whether this path is a regular file (also True for symlinks pointing - to regular files). - """ - try: - return S_ISREG(self.stat(follow_symlinks=follow_symlinks).st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_mount(self): - """ - Check if this path is a mount point - """ - # Need to exist and be a dir - if not self.exists() or not self.is_dir(): - return False - - try: - parent_dev = self.parent.stat().st_dev - except OSError: - return False - - dev = self.stat().st_dev - if dev != parent_dev: - return True - ino = self.stat().st_ino - parent_ino = self.parent.stat().st_ino - return ino == parent_ino - - def is_symlink(self): - """ - Whether this path is a symbolic link. - """ - try: - return S_ISLNK(self.lstat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist - return False - except ValueError: - # Non-encodable path - return False - - def is_junction(self): - """ - Whether this path is a junction. - """ - # Junctions are a Windows-only feature, not present in POSIX nor the - # majority of virtual filesystems. There is no cross-platform idiom - # to check for junctions (using stat().st_mode). - return False - - def is_block_device(self): - """ - Whether this path is a block device. - """ - try: - return S_ISBLK(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_char_device(self): - """ - Whether this path is a character device. - """ - try: - return S_ISCHR(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_fifo(self): - """ - Whether this path is a FIFO. - """ - try: - return S_ISFIFO(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_socket(self): - """ - Whether this path is a socket. - """ - try: - return S_ISSOCK(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def samefile(self, other_path): - """Return whether other_path is the same or not as this file - (as returned by os.path.samefile()). - """ - st = self.stat() - try: - other_st = other_path.stat() - except AttributeError: - other_st = self.with_segments(other_path).stat() - return (st.st_ino == other_st.st_ino and - st.st_dev == other_st.st_dev) - - def open(self, mode='r', buffering=-1, encoding=None, - errors=None, newline=None): - """ - Open the file pointed to by this path and return a file object, as - the built-in open() function does. - """ - raise UnsupportedOperation(self._unsupported_msg('open()')) - - def read_bytes(self): - """ - Open the file in bytes mode, read it, and close the file. - """ - with self.open(mode='rb') as f: - return f.read() - - def read_text(self, encoding=None, errors=None, newline=None): - """ - Open the file in text mode, read it, and close the file. - """ - with self.open(mode='r', encoding=encoding, errors=errors, newline=newline) as f: - return f.read() - - def write_bytes(self, data): - """ - Open the file in bytes mode, write to it, and close the file. - """ - # type-check for the buffer interface before truncating the file - view = memoryview(data) - with self.open(mode='wb') as f: - return f.write(view) - - def write_text(self, data, encoding=None, errors=None, newline=None): - """ - Open the file in text mode, write to it, and close the file. - """ - if not isinstance(data, str): - raise TypeError('data must be str, not %s' % - data.__class__.__name__) - with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: - return f.write(data) - - def iterdir(self): - """Yield path objects of the directory contents. - - The children are yielded in arbitrary order, and the - special entries '.' and '..' are not included. - """ - raise UnsupportedOperation(self._unsupported_msg('iterdir()')) - - def _glob_selector(self, parts, case_sensitive, recurse_symlinks): - if case_sensitive is None: - case_sensitive = _is_case_sensitive(self.parser) - case_pedantic = False - else: - # The user has expressed a case sensitivity choice, but we don't - # know the case sensitivity of the underlying filesystem, so we - # must use scandir() for everything, including non-wildcard parts. - case_pedantic = True - recursive = True if recurse_symlinks else _no_recurse_symlinks - globber = self._globber(self.parser.sep, case_sensitive, case_pedantic, recursive) - return globber.selector(parts) - - def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=True): - """Iterate over this subtree and yield all existing files (of any - kind, including directories) matching the given relative pattern. - """ - if not isinstance(pattern, PurePathBase): - pattern = self.with_segments(pattern) - anchor, parts = pattern._stack - if anchor: - raise NotImplementedError("Non-relative patterns are unsupported") - select = self._glob_selector(parts, case_sensitive, recurse_symlinks) - return select(self) - - def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True): - """Recursively yield all existing files (of any kind, including - directories) matching the given relative pattern, anywhere in - this subtree. - """ - if not isinstance(pattern, PurePathBase): - pattern = self.with_segments(pattern) - pattern = '**' / pattern - return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks) - - def walk(self, top_down=True, on_error=None, follow_symlinks=False): - """Walk the directory tree from this directory, similar to os.walk().""" - paths = [self] - while paths: - path = paths.pop() - if isinstance(path, tuple): - yield path - continue - dirnames = [] - filenames = [] - if not top_down: - paths.append((path, dirnames, filenames)) - try: - for child in path.iterdir(): - try: - if child.is_dir(follow_symlinks=follow_symlinks): - if not top_down: - paths.append(child) - dirnames.append(child.name) - else: - filenames.append(child.name) - except OSError: - filenames.append(child.name) - except OSError as error: - if on_error is not None: - on_error(error) - if not top_down: - while not isinstance(paths.pop(), tuple): - pass - continue - if top_down: - yield path, dirnames, filenames - paths += [path.joinpath(d) for d in reversed(dirnames)] - - def absolute(self): - """Return an absolute version of this path - No normalization or symlink resolution is performed. - - Use resolve() to resolve symlinks and remove '..' segments. - """ - raise UnsupportedOperation(self._unsupported_msg('absolute()')) - - @classmethod - def cwd(cls): - """Return a new path pointing to the current working directory.""" - # We call 'absolute()' rather than using 'os.getcwd()' directly to - # enable users to replace the implementation of 'absolute()' in a - # subclass and benefit from the new behaviour here. This works because - # os.path.abspath('.') == os.getcwd(). - return cls('').absolute() - - def expanduser(self): - """ Return a new path with expanded ~ and ~user constructs - (as returned by os.path.expanduser) - """ - raise UnsupportedOperation(self._unsupported_msg('expanduser()')) - - @classmethod - def home(cls): - """Return a new path pointing to expanduser('~'). - """ - return cls("~").expanduser() - - def readlink(self): - """ - Return the path to which the symbolic link points. - """ - raise UnsupportedOperation(self._unsupported_msg('readlink()')) - readlink._supported = False - - def resolve(self, strict=False): - """ - Make the path absolute, resolving all symlinks on the way and also - normalizing it. - """ - if self._resolving: - return self - path_root, parts = self._stack - path = self.with_segments(path_root) - try: - path = path.absolute() - except UnsupportedOperation: - path_tail = [] - else: - path_root, path_tail = path._stack - path_tail.reverse() - - # If the user has *not* overridden the `readlink()` method, then symlinks are unsupported - # and (in non-strict mode) we can improve performance by not calling `stat()`. - querying = strict or getattr(self.readlink, '_supported', True) - link_count = 0 - while parts: - part = parts.pop() - if not part or part == '.': - continue - if part == '..': - if not path_tail: - if path_root: - # Delete '..' segment immediately following root - continue - elif path_tail[-1] != '..': - # Delete '..' segment and its predecessor - path_tail.pop() - continue - path_tail.append(part) - if querying and part != '..': - path = self.with_segments(path_root + self.parser.sep.join(path_tail)) - path._resolving = True - try: - st = path.stat(follow_symlinks=False) - if S_ISLNK(st.st_mode): - # Like Linux and macOS, raise OSError(errno.ELOOP) if too many symlinks are - # encountered during resolution. - link_count += 1 - if link_count >= self._max_symlinks: - raise OSError(ELOOP, "Too many symbolic links in path", self._raw_path) - target_root, target_parts = path.readlink()._stack - # If the symlink target is absolute (like '/etc/hosts'), set the current - # path to its uppermost parent (like '/'). - if target_root: - path_root = target_root - path_tail.clear() - else: - path_tail.pop() - # Add the symlink target's reversed tail parts (like ['hosts', 'etc']) to - # the stack of unresolved path parts. - parts.extend(target_parts) - continue - elif parts and not S_ISDIR(st.st_mode): - raise NotADirectoryError(ENOTDIR, "Not a directory", self._raw_path) - except OSError: - if strict: - raise - else: - querying = False - return self.with_segments(path_root + self.parser.sep.join(path_tail)) - - def symlink_to(self, target, target_is_directory=False): - """ - Make this path a symlink pointing to the target path. - Note the order of arguments (link, target) is the reverse of os.symlink. - """ - raise UnsupportedOperation(self._unsupported_msg('symlink_to()')) - - def hardlink_to(self, target): - """ - Make this path a hard link pointing to the same file as *target*. - - Note the order of arguments (self, target) is the reverse of os.link's. - """ - raise UnsupportedOperation(self._unsupported_msg('hardlink_to()')) - - def touch(self, mode=0o666, exist_ok=True): - """ - Create this file with the given access mode, if it doesn't exist. - """ - raise UnsupportedOperation(self._unsupported_msg('touch()')) - - def mkdir(self, mode=0o777, parents=False, exist_ok=False): - """ - Create a new directory at this given path. - """ - raise UnsupportedOperation(self._unsupported_msg('mkdir()')) - - def rename(self, target): - """ - Rename this path to the target path. - - The target path may be absolute or relative. Relative paths are - interpreted relative to the current working directory, *not* the - directory of the Path object. - - Returns the new Path instance pointing to the target path. - """ - raise UnsupportedOperation(self._unsupported_msg('rename()')) - - def replace(self, target): - """ - Rename this path to the target path, overwriting if that path exists. - - The target path may be absolute or relative. Relative paths are - interpreted relative to the current working directory, *not* the - directory of the Path object. - - Returns the new Path instance pointing to the target path. - """ - raise UnsupportedOperation(self._unsupported_msg('replace()')) - - def chmod(self, mode, *, follow_symlinks=True): - """ - Change the permissions of the path, like os.chmod(). - """ - raise UnsupportedOperation(self._unsupported_msg('chmod()')) - - def lchmod(self, mode): - """ - Like chmod(), except if the path points to a symlink, the symlink's - permissions are changed, rather than its target's. - """ - self.chmod(mode, follow_symlinks=False) - - def unlink(self, missing_ok=False): - """ - Remove this file or link. - If the path is a directory, use rmdir() instead. - """ - raise UnsupportedOperation(self._unsupported_msg('unlink()')) - - def rmdir(self): - """ - Remove this directory. The directory must be empty. - """ - raise UnsupportedOperation(self._unsupported_msg('rmdir()')) - - def owner(self, *, follow_symlinks=True): - """ - Return the login name of the file owner. - """ - raise UnsupportedOperation(self._unsupported_msg('owner()')) - - def group(self, *, follow_symlinks=True): - """ - Return the group name of the file gid. - """ - raise UnsupportedOperation(self._unsupported_msg('group()')) - - @classmethod - def from_uri(cls, uri): - """Return a new path from the given 'file' URI.""" - raise UnsupportedOperation(cls._unsupported_msg('from_uri()')) - - def as_uri(self): - """Return the path as a URI.""" - raise UnsupportedOperation(self._unsupported_msg('as_uri()')) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 0188e7c7722..58e137f2a92 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -1,861 +1,12 @@ -import io -import ntpath -import operator -import os -import posixpath -import sys -import warnings -from glob import _StringGlobber -from itertools import chain -from _collections_abc import Sequence - -try: - import pwd -except ImportError: - pwd = None -try: - import grp -except ImportError: - grp = None - -from ._abc import UnsupportedOperation, PurePathBase, PathBase +""" +This module exists so that pathlib objects pickled under Python 3.13 can be +unpickled in 3.14+. +""" +from pathlib import * __all__ = [ + "UnsupportedOperation", "PurePath", "PurePosixPath", "PureWindowsPath", "Path", "PosixPath", "WindowsPath", - ] - - -class _PathParents(Sequence): - """This object provides sequence-like access to the logical ancestors - of a path. Don't try to construct it yourself.""" - __slots__ = ('_path', '_drv', '_root', '_tail') - - def __init__(self, path): - self._path = path - self._drv = path.drive - self._root = path.root - self._tail = path._tail - - def __len__(self): - return len(self._tail) - - def __getitem__(self, idx): - if isinstance(idx, slice): - return tuple(self[i] for i in range(*idx.indices(len(self)))) - - if idx >= len(self) or idx < -len(self): - raise IndexError(idx) - if idx < 0: - idx += len(self) - return self._path._from_parsed_parts(self._drv, self._root, - self._tail[:-idx - 1]) - - def __repr__(self): - return "<{}.parents>".format(type(self._path).__name__) - - -class PurePath(PurePathBase): - """Base class for manipulating paths without I/O. - - PurePath represents a filesystem path and offers operations which - don't imply any actual filesystem I/O. Depending on your system, - instantiating a PurePath will return either a PurePosixPath or a - PureWindowsPath object. You can also instantiate either of these classes - directly, regardless of your system. - """ - - __slots__ = ( - # The `_raw_paths` slot stores unnormalized string paths. This is set - # in the `__init__()` method. - '_raw_paths', - - # The `_drv`, `_root` and `_tail_cached` slots store parsed and - # normalized parts of the path. They are set when any of the `drive`, - # `root` or `_tail` properties are accessed for the first time. The - # three-part division corresponds to the result of - # `os.path.splitroot()`, except that the tail is further split on path - # separators (i.e. it is a list of strings), and that the root and - # tail are normalized. - '_drv', '_root', '_tail_cached', - - # The `_str` slot stores the string representation of the path, - # computed from the drive, root and tail when `__str__()` is called - # for the first time. It's used to implement `_str_normcase` - '_str', - - # The `_str_normcase_cached` slot stores the string path with - # normalized case. It is set when the `_str_normcase` property is - # accessed for the first time. It's used to implement `__eq__()` - # `__hash__()`, and `_parts_normcase` - '_str_normcase_cached', - - # The `_parts_normcase_cached` slot stores the case-normalized - # string path after splitting on path separators. It's set when the - # `_parts_normcase` property is accessed for the first time. It's used - # to implement comparison methods like `__lt__()`. - '_parts_normcase_cached', - - # The `_hash` slot stores the hash of the case-normalized string - # path. It's set when `__hash__()` is called for the first time. - '_hash', - ) - parser = os.path - _globber = _StringGlobber - - def __new__(cls, *args, **kwargs): - """Construct a PurePath from one or several strings and or existing - PurePath objects. The strings and path objects are combined so as - to yield a canonicalized path, which is incorporated into the - new PurePath object. - """ - if cls is PurePath: - cls = PureWindowsPath if os.name == 'nt' else PurePosixPath - return object.__new__(cls) - - def __init__(self, *args): - paths = [] - for arg in args: - if isinstance(arg, PurePath): - if arg.parser is not self.parser: - # GH-103631: Convert separators for backwards compatibility. - paths.append(arg.as_posix()) - else: - paths.extend(arg._raw_paths) - else: - try: - path = os.fspath(arg) - except TypeError: - path = arg - if not isinstance(path, str): - raise TypeError( - "argument should be a str or an os.PathLike " - "object where __fspath__ returns a str, " - f"not {type(path).__name__!r}") - paths.append(path) - # Avoid calling super().__init__, as an optimisation - self._raw_paths = paths - - def joinpath(self, *pathsegments): - """Combine this path with one or several arguments, and return a - new path representing either a subpath (if all arguments are relative - paths) or a totally different path (if one of the arguments is - anchored). - """ - return self.with_segments(self, *pathsegments) - - def __truediv__(self, key): - try: - return self.with_segments(self, key) - except TypeError: - return NotImplemented - - def __rtruediv__(self, key): - try: - return self.with_segments(key, self) - except TypeError: - return NotImplemented - - def __reduce__(self): - return self.__class__, tuple(self._raw_paths) - - def __repr__(self): - return "{}({!r})".format(self.__class__.__name__, self.as_posix()) - - def __fspath__(self): - return str(self) - - def __bytes__(self): - """Return the bytes representation of the path. This is only - recommended to use under Unix.""" - return os.fsencode(self) - - @property - def _str_normcase(self): - # String with normalized case, for hashing and equality checks - try: - return self._str_normcase_cached - except AttributeError: - if self.parser is posixpath: - self._str_normcase_cached = str(self) - else: - self._str_normcase_cached = str(self).lower() - return self._str_normcase_cached - - def __hash__(self): - try: - return self._hash - except AttributeError: - self._hash = hash(self._str_normcase) - return self._hash - - def __eq__(self, other): - if not isinstance(other, PurePath): - return NotImplemented - return self._str_normcase == other._str_normcase and self.parser is other.parser - - @property - def _parts_normcase(self): - # Cached parts with normalized case, for comparisons. - try: - return self._parts_normcase_cached - except AttributeError: - self._parts_normcase_cached = self._str_normcase.split(self.parser.sep) - return self._parts_normcase_cached - - def __lt__(self, other): - if not isinstance(other, PurePath) or self.parser is not other.parser: - return NotImplemented - return self._parts_normcase < other._parts_normcase - - def __le__(self, other): - if not isinstance(other, PurePath) or self.parser is not other.parser: - return NotImplemented - return self._parts_normcase <= other._parts_normcase - - def __gt__(self, other): - if not isinstance(other, PurePath) or self.parser is not other.parser: - return NotImplemented - return self._parts_normcase > other._parts_normcase - - def __ge__(self, other): - if not isinstance(other, PurePath) or self.parser is not other.parser: - return NotImplemented - return self._parts_normcase >= other._parts_normcase - - def __str__(self): - """Return the string representation of the path, suitable for - passing to system calls.""" - try: - return self._str - except AttributeError: - self._str = self._format_parsed_parts(self.drive, self.root, - self._tail) or '.' - return self._str - - @classmethod - def _format_parsed_parts(cls, drv, root, tail): - if drv or root: - return drv + root + cls.parser.sep.join(tail) - elif tail and cls.parser.splitdrive(tail[0])[0]: - tail = ['.'] + tail - return cls.parser.sep.join(tail) - - def _from_parsed_parts(self, drv, root, tail): - path = self._from_parsed_string(self._format_parsed_parts(drv, root, tail)) - path._drv = drv - path._root = root - path._tail_cached = tail - return path - - def _from_parsed_string(self, path_str): - path = self.with_segments(path_str) - path._str = path_str or '.' - return path - - @classmethod - def _parse_path(cls, path): - if not path: - return '', '', [] - sep = cls.parser.sep - altsep = cls.parser.altsep - if altsep: - path = path.replace(altsep, sep) - drv, root, rel = cls.parser.splitroot(path) - if not root and drv.startswith(sep) and not drv.endswith(sep): - drv_parts = drv.split(sep) - if len(drv_parts) == 4 and drv_parts[2] not in '?.': - # e.g. //server/share - root = sep - elif len(drv_parts) == 6: - # e.g. //?/unc/server/share - root = sep - parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.'] - return drv, root, parsed - - @property - def _raw_path(self): - """The joined but unnormalized path.""" - paths = self._raw_paths - if len(paths) == 0: - path = '' - elif len(paths) == 1: - path = paths[0] - else: - path = self.parser.join(*paths) - return path - - @property - def drive(self): - """The drive prefix (letter or UNC path), if any.""" - try: - return self._drv - except AttributeError: - self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) - return self._drv - - @property - def root(self): - """The root of the path, if any.""" - try: - return self._root - except AttributeError: - self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) - return self._root - - @property - def _tail(self): - try: - return self._tail_cached - except AttributeError: - self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) - return self._tail_cached - - @property - def anchor(self): - """The concatenation of the drive and root, or ''.""" - return self.drive + self.root - - @property - def parts(self): - """An object providing sequence-like access to the - components in the filesystem path.""" - if self.drive or self.root: - return (self.drive + self.root,) + tuple(self._tail) - else: - return tuple(self._tail) - - @property - def parent(self): - """The logical parent of the path.""" - drv = self.drive - root = self.root - tail = self._tail - if not tail: - return self - return self._from_parsed_parts(drv, root, tail[:-1]) - - @property - def parents(self): - """A sequence of this path's logical parents.""" - # The value of this property should not be cached on the path object, - # as doing so would introduce a reference cycle. - return _PathParents(self) - - @property - def name(self): - """The final path component, if any.""" - tail = self._tail - if not tail: - return '' - return tail[-1] - - def with_name(self, name): - """Return a new path with the file name changed.""" - p = self.parser - if not name or p.sep in name or (p.altsep and p.altsep in name) or name == '.': - raise ValueError(f"Invalid name {name!r}") - tail = self._tail.copy() - if not tail: - raise ValueError(f"{self!r} has an empty name") - tail[-1] = name - return self._from_parsed_parts(self.drive, self.root, tail) - - def relative_to(self, other, /, *_deprecated, walk_up=False): - """Return the relative path to another path identified by the passed - arguments. If the operation is not possible (because this is not - related to the other path), raise ValueError. - - The *walk_up* parameter controls whether `..` may be used to resolve - the path. - """ - if _deprecated: - msg = ("support for supplying more than one positional argument " - "to pathlib.PurePath.relative_to() is deprecated and " - "scheduled for removal in Python 3.14") - warnings.warn(msg, DeprecationWarning, stacklevel=2) - other = self.with_segments(other, *_deprecated) - elif not isinstance(other, PurePath): - other = self.with_segments(other) - for step, path in enumerate(chain([other], other.parents)): - if path == self or path in self.parents: - break - elif not walk_up: - raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}") - elif path.name == '..': - raise ValueError(f"'..' segment in {str(other)!r} cannot be walked") - else: - raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors") - parts = ['..'] * step + self._tail[len(path._tail):] - return self._from_parsed_parts('', '', parts) - - def is_relative_to(self, other, /, *_deprecated): - """Return True if the path is relative to another path or False. - """ - if _deprecated: - msg = ("support for supplying more than one argument to " - "pathlib.PurePath.is_relative_to() is deprecated and " - "scheduled for removal in Python 3.14") - warnings.warn(msg, DeprecationWarning, stacklevel=2) - other = self.with_segments(other, *_deprecated) - elif not isinstance(other, PurePath): - other = self.with_segments(other) - return other == self or other in self.parents - - def is_absolute(self): - """True if the path is absolute (has both a root and, if applicable, - a drive).""" - if self.parser is posixpath: - # Optimization: work with raw paths on POSIX. - for path in self._raw_paths: - if path.startswith('/'): - return True - return False - return self.parser.isabs(self) - - def is_reserved(self): - """Return True if the path contains one of the special names reserved - by the system, if any.""" - msg = ("pathlib.PurePath.is_reserved() is deprecated and scheduled " - "for removal in Python 3.15. Use os.path.isreserved() to " - "detect reserved paths on Windows.") - warnings.warn(msg, DeprecationWarning, stacklevel=2) - if self.parser is ntpath: - return self.parser.isreserved(self) - return False - - def as_uri(self): - """Return the path as a URI.""" - if not self.is_absolute(): - raise ValueError("relative path can't be expressed as a file URI") - - drive = self.drive - if len(drive) == 2 and drive[1] == ':': - # It's a path on a local drive => 'file:///c:/a/b' - prefix = 'file:///' + drive - path = self.as_posix()[2:] - elif drive: - # It's a path on a network drive => 'file://host/share/a/b' - prefix = 'file:' - path = self.as_posix() - else: - # It's a posix path => 'file:///etc/hosts' - prefix = 'file://' - path = str(self) - from urllib.parse import quote_from_bytes - return prefix + quote_from_bytes(os.fsencode(path)) - - @property - def _pattern_str(self): - """The path expressed as a string, for use in pattern-matching.""" - # The string representation of an empty path is a single dot ('.'). Empty - # paths shouldn't match wildcards, so we change it to the empty string. - path_str = str(self) - return '' if path_str == '.' else path_str - -# Subclassing os.PathLike makes isinstance() checks slower, -# which in turn makes Path construction slower. Register instead! -os.PathLike.register(PurePath) - - -class PurePosixPath(PurePath): - """PurePath subclass for non-Windows systems. - - On a POSIX system, instantiating a PurePath should return this object. - However, you can also instantiate it directly on any system. - """ - parser = posixpath - __slots__ = () - - -class PureWindowsPath(PurePath): - """PurePath subclass for Windows systems. - - On a Windows system, instantiating a PurePath should return this object. - However, you can also instantiate it directly on any system. - """ - parser = ntpath - __slots__ = () - - -class Path(PathBase, PurePath): - """PurePath subclass that can make system calls. - - Path represents a filesystem path but unlike PurePath, also offers - methods to do system calls on path objects. Depending on your system, - instantiating a Path will return either a PosixPath or a WindowsPath - object. You can also instantiate a PosixPath or WindowsPath directly, - but cannot instantiate a WindowsPath on a POSIX system or vice versa. - """ - __slots__ = () - as_uri = PurePath.as_uri - - @classmethod - def _unsupported_msg(cls, attribute): - return f"{cls.__name__}.{attribute} is unsupported on this system" - - def __init__(self, *args, **kwargs): - if kwargs: - msg = ("support for supplying keyword arguments to pathlib.PurePath " - "is deprecated and scheduled for removal in Python {remove}") - warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14)) - super().__init__(*args) - - def __new__(cls, *args, **kwargs): - if cls is Path: - cls = WindowsPath if os.name == 'nt' else PosixPath - return object.__new__(cls) - - def stat(self, *, follow_symlinks=True): - """ - Return the result of the stat() system call on this path, like - os.stat() does. - """ - return os.stat(self, follow_symlinks=follow_symlinks) - - def is_mount(self): - """ - Check if this path is a mount point - """ - return os.path.ismount(self) - - def is_junction(self): - """ - Whether this path is a junction. - """ - return os.path.isjunction(self) - - def open(self, mode='r', buffering=-1, encoding=None, - errors=None, newline=None): - """ - Open the file pointed to by this path and return a file object, as - the built-in open() function does. - """ - if "b" not in mode: - encoding = io.text_encoding(encoding) - return io.open(self, mode, buffering, encoding, errors, newline) - - def read_text(self, encoding=None, errors=None, newline=None): - """ - Open the file in text mode, read it, and close the file. - """ - # Call io.text_encoding() here to ensure any warning is raised at an - # appropriate stack level. - encoding = io.text_encoding(encoding) - return PathBase.read_text(self, encoding, errors, newline) - - def write_text(self, data, encoding=None, errors=None, newline=None): - """ - Open the file in text mode, write to it, and close the file. - """ - # Call io.text_encoding() here to ensure any warning is raised at an - # appropriate stack level. - encoding = io.text_encoding(encoding) - return PathBase.write_text(self, data, encoding, errors, newline) - - _remove_leading_dot = operator.itemgetter(slice(2, None)) - _remove_trailing_slash = operator.itemgetter(slice(-1)) - - def _filter_trailing_slash(self, paths): - sep = self.parser.sep - anchor_len = len(self.anchor) - for path_str in paths: - if len(path_str) > anchor_len and path_str[-1] == sep: - path_str = path_str[:-1] - yield path_str - - def iterdir(self): - """Yield path objects of the directory contents. - - The children are yielded in arbitrary order, and the - special entries '.' and '..' are not included. - """ - root_dir = str(self) - with os.scandir(root_dir) as scandir_it: - paths = [entry.path for entry in scandir_it] - if root_dir == '.': - paths = map(self._remove_leading_dot, paths) - return map(self._from_parsed_string, paths) - - def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): - """Iterate over this subtree and yield all existing files (of any - kind, including directories) matching the given relative pattern. - """ - sys.audit("pathlib.Path.glob", self, pattern) - if not isinstance(pattern, PurePath): - pattern = self.with_segments(pattern) - if pattern.anchor: - raise NotImplementedError("Non-relative patterns are unsupported") - parts = pattern._tail.copy() - if not parts: - raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - raw = pattern._raw_path - if raw[-1] in (self.parser.sep, self.parser.altsep): - # GH-65238: pathlib doesn't preserve trailing slash. Add it back. - parts.append('') - select = self._glob_selector(parts[::-1], case_sensitive, recurse_symlinks) - root = str(self) - paths = select(root) - - # Normalize results - if root == '.': - paths = map(self._remove_leading_dot, paths) - if parts[-1] == '': - paths = map(self._remove_trailing_slash, paths) - elif parts[-1] == '**': - paths = self._filter_trailing_slash(paths) - paths = map(self._from_parsed_string, paths) - return paths - - def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): - """Recursively yield all existing files (of any kind, including - directories) matching the given relative pattern, anywhere in - this subtree. - """ - sys.audit("pathlib.Path.rglob", self, pattern) - if not isinstance(pattern, PurePath): - pattern = self.with_segments(pattern) - pattern = '**' / pattern - return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks) - - def walk(self, top_down=True, on_error=None, follow_symlinks=False): - """Walk the directory tree from this directory, similar to os.walk().""" - sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) - root_dir = str(self) - if not follow_symlinks: - follow_symlinks = os._walk_symlinks_as_files - results = os.walk(root_dir, top_down, on_error, follow_symlinks) - for path_str, dirnames, filenames in results: - if root_dir == '.': - path_str = path_str[2:] - yield self._from_parsed_string(path_str), dirnames, filenames - - def absolute(self): - """Return an absolute version of this path - No normalization or symlink resolution is performed. - - Use resolve() to resolve symlinks and remove '..' segments. - """ - if self.is_absolute(): - return self - if self.root: - drive = os.path.splitroot(os.getcwd())[0] - return self._from_parsed_parts(drive, self.root, self._tail) - if self.drive: - # There is a CWD on each drive-letter drive. - cwd = os.path.abspath(self.drive) - else: - cwd = os.getcwd() - if not self._tail: - # Fast path for "empty" paths, e.g. Path("."), Path("") or Path(). - # We pass only one argument to with_segments() to avoid the cost - # of joining, and we exploit the fact that getcwd() returns a - # fully-normalized string by storing it in _str. This is used to - # implement Path.cwd(). - return self._from_parsed_string(cwd) - drive, root, rel = os.path.splitroot(cwd) - if not rel: - return self._from_parsed_parts(drive, root, self._tail) - tail = rel.split(self.parser.sep) - tail.extend(self._tail) - return self._from_parsed_parts(drive, root, tail) - - def resolve(self, strict=False): - """ - Make the path absolute, resolving all symlinks on the way and also - normalizing it. - """ - - return self.with_segments(os.path.realpath(self, strict=strict)) - - if pwd: - def owner(self, *, follow_symlinks=True): - """ - Return the login name of the file owner. - """ - uid = self.stat(follow_symlinks=follow_symlinks).st_uid - return pwd.getpwuid(uid).pw_name - - if grp: - def group(self, *, follow_symlinks=True): - """ - Return the group name of the file gid. - """ - gid = self.stat(follow_symlinks=follow_symlinks).st_gid - return grp.getgrgid(gid).gr_name - - if hasattr(os, "readlink"): - def readlink(self): - """ - Return the path to which the symbolic link points. - """ - return self.with_segments(os.readlink(self)) - - def touch(self, mode=0o666, exist_ok=True): - """ - Create this file with the given access mode, if it doesn't exist. - """ - - if exist_ok: - # First try to bump modification time - # Implementation note: GNU touch uses the UTIME_NOW option of - # the utimensat() / futimens() functions. - try: - os.utime(self, None) - except OSError: - # Avoid exception chaining - pass - else: - return - flags = os.O_CREAT | os.O_WRONLY - if not exist_ok: - flags |= os.O_EXCL - fd = os.open(self, flags, mode) - os.close(fd) - - def mkdir(self, mode=0o777, parents=False, exist_ok=False): - """ - Create a new directory at this given path. - """ - try: - os.mkdir(self, mode) - except FileNotFoundError: - if not parents or self.parent == self: - raise - self.parent.mkdir(parents=True, exist_ok=True) - self.mkdir(mode, parents=False, exist_ok=exist_ok) - except OSError: - # Cannot rely on checking for EEXIST, since the operating system - # could give priority to other errors like EACCES or EROFS - if not exist_ok or not self.is_dir(): - raise - - def chmod(self, mode, *, follow_symlinks=True): - """ - Change the permissions of the path, like os.chmod(). - """ - os.chmod(self, mode, follow_symlinks=follow_symlinks) - - def unlink(self, missing_ok=False): - """ - Remove this file or link. - If the path is a directory, use rmdir() instead. - """ - try: - os.unlink(self) - except FileNotFoundError: - if not missing_ok: - raise - - def rmdir(self): - """ - Remove this directory. The directory must be empty. - """ - os.rmdir(self) - - def rename(self, target): - """ - Rename this path to the target path. - - The target path may be absolute or relative. Relative paths are - interpreted relative to the current working directory, *not* the - directory of the Path object. - - Returns the new Path instance pointing to the target path. - """ - os.rename(self, target) - return self.with_segments(target) - - def replace(self, target): - """ - Rename this path to the target path, overwriting if that path exists. - - The target path may be absolute or relative. Relative paths are - interpreted relative to the current working directory, *not* the - directory of the Path object. - - Returns the new Path instance pointing to the target path. - """ - os.replace(self, target) - return self.with_segments(target) - - if hasattr(os, "symlink"): - def symlink_to(self, target, target_is_directory=False): - """ - Make this path a symlink pointing to the target path. - Note the order of arguments (link, target) is the reverse of os.symlink. - """ - os.symlink(target, self, target_is_directory) - - if hasattr(os, "link"): - def hardlink_to(self, target): - """ - Make this path a hard link pointing to the same file as *target*. - - Note the order of arguments (self, target) is the reverse of os.link's. - """ - os.link(target, self) - - def expanduser(self): - """ Return a new path with expanded ~ and ~user constructs - (as returned by os.path.expanduser) - """ - if (not (self.drive or self.root) and - self._tail and self._tail[0][:1] == '~'): - homedir = os.path.expanduser(self._tail[0]) - if homedir[:1] == "~": - raise RuntimeError("Could not determine home directory.") - drv, root, tail = self._parse_path(homedir) - return self._from_parsed_parts(drv, root, tail + self._tail[1:]) - - return self - - @classmethod - def from_uri(cls, uri): - """Return a new path from the given 'file' URI.""" - if not uri.startswith('file:'): - raise ValueError(f"URI does not start with 'file:': {uri!r}") - path = uri[5:] - if path[:3] == '///': - # Remove empty authority - path = path[2:] - elif path[:12] == '//localhost/': - # Remove 'localhost' authority - path = path[11:] - if path[:3] == '///' or (path[:1] == '/' and path[2:3] in ':|'): - # Remove slash before DOS device/UNC path - path = path[1:] - if path[1:2] == '|': - # Replace bar with colon in DOS drive - path = path[:1] + ':' + path[2:] - from urllib.parse import unquote_to_bytes - path = cls(os.fsdecode(unquote_to_bytes(path))) - if not path.is_absolute(): - raise ValueError(f"URI is not absolute: {uri!r}") - return path - - -class PosixPath(Path, PurePosixPath): - """Path subclass for non-Windows systems. - - On a POSIX system, instantiating a Path should return this object. - """ - __slots__ = () - - if os.name == 'nt': - def __new__(cls, *args, **kwargs): - raise UnsupportedOperation( - f"cannot instantiate {cls.__name__!r} on your system") - -class WindowsPath(Path, PureWindowsPath): - """Path subclass for Windows systems. - - On a Windows system, instantiating a Path should return this object. - """ - __slots__ = () - - if os.name != 'nt': - def __new__(cls, *args, **kwargs): - raise UnsupportedOperation( - f"cannot instantiate {cls.__name__!r} on your system") +] diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py new file mode 100644 index 00000000000..039836941dd --- /dev/null +++ b/Lib/pathlib/_os.py @@ -0,0 +1,530 @@ +""" +Low-level OS functionality wrappers used by pathlib. +""" + +from errno import * +from io import TextIOWrapper, text_encoding +from stat import S_ISDIR, S_ISREG, S_ISLNK, S_IMODE +import os +import sys +try: + import fcntl +except ImportError: + fcntl = None +try: + import posix +except ImportError: + posix = None +try: + import _winapi +except ImportError: + _winapi = None + + +def _get_copy_blocksize(infd): + """Determine blocksize for fastcopying on Linux. + Hopefully the whole file will be copied in a single call. + The copying itself should be performed in a loop 'till EOF is + reached (0 return) so a blocksize smaller or bigger than the actual + file size should not make any difference, also in case the file + content changes while being copied. + """ + try: + blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB + except OSError: + blocksize = 2 ** 27 # 128 MiB + # On 32-bit architectures truncate to 1 GiB to avoid OverflowError, + # see gh-82500. + if sys.maxsize < 2 ** 32: + blocksize = min(blocksize, 2 ** 30) + return blocksize + + +if fcntl and hasattr(fcntl, 'FICLONE'): + def _ficlone(source_fd, target_fd): + """ + Perform a lightweight copy of two files, where the data blocks are + copied only when modified. This is known as Copy on Write (CoW), + instantaneous copy or reflink. + """ + fcntl.ioctl(target_fd, fcntl.FICLONE, source_fd) +else: + _ficlone = None + + +if posix and hasattr(posix, '_fcopyfile'): + def _fcopyfile(source_fd, target_fd): + """ + Copy a regular file content using high-performance fcopyfile(3) + syscall (macOS). + """ + posix._fcopyfile(source_fd, target_fd, posix._COPYFILE_DATA) +else: + _fcopyfile = None + + +if hasattr(os, 'copy_file_range'): + def _copy_file_range(source_fd, target_fd): + """ + Copy data from one regular mmap-like fd to another by using a + high-performance copy_file_range(2) syscall that gives filesystems + an opportunity to implement the use of reflinks or server-side + copy. + This should work on Linux >= 4.5 only. + """ + blocksize = _get_copy_blocksize(source_fd) + offset = 0 + while True: + sent = os.copy_file_range(source_fd, target_fd, blocksize, + offset_dst=offset) + if sent == 0: + break # EOF + offset += sent +else: + _copy_file_range = None + + +if hasattr(os, 'sendfile'): + def _sendfile(source_fd, target_fd): + """Copy data from one regular mmap-like fd to another by using + high-performance sendfile(2) syscall. + This should work on Linux >= 2.6.33 only. + """ + blocksize = _get_copy_blocksize(source_fd) + offset = 0 + while True: + sent = os.sendfile(target_fd, source_fd, offset, blocksize) + if sent == 0: + break # EOF + offset += sent +else: + _sendfile = None + + +if _winapi and hasattr(_winapi, 'CopyFile2'): + def copyfile2(source, target): + """ + Copy from one file to another using CopyFile2 (Windows only). + """ + _winapi.CopyFile2(source, target, 0) +else: + copyfile2 = None + + +def copyfileobj(source_f, target_f): + """ + Copy data from file-like object source_f to file-like object target_f. + """ + try: + source_fd = source_f.fileno() + target_fd = target_f.fileno() + except Exception: + pass # Fall through to generic code. + else: + try: + # Use OS copy-on-write where available. + if _ficlone: + try: + _ficlone(source_fd, target_fd) + return + except OSError as err: + if err.errno not in (EBADF, EOPNOTSUPP, ETXTBSY, EXDEV): + raise err + + # Use OS copy where available. + if _fcopyfile: + try: + _fcopyfile(source_fd, target_fd) + return + except OSError as err: + if err.errno not in (EINVAL, ENOTSUP): + raise err + if _copy_file_range: + try: + _copy_file_range(source_fd, target_fd) + return + except OSError as err: + if err.errno not in (ETXTBSY, EXDEV): + raise err + if _sendfile: + try: + _sendfile(source_fd, target_fd) + return + except OSError as err: + if err.errno != ENOTSOCK: + raise err + except OSError as err: + # Produce more useful error messages. + err.filename = source_f.name + err.filename2 = target_f.name + raise err + + # Last resort: copy with fileobj read() and write(). + read_source = source_f.read + write_target = target_f.write + while buf := read_source(1024 * 1024): + write_target(buf) + + +def magic_open(path, mode='r', buffering=-1, encoding=None, errors=None, + newline=None): + """ + Open the file pointed to by this path and return a file object, as + the built-in open() function does. + """ + text = 'b' not in mode + if text: + # Call io.text_encoding() here to ensure any warning is raised at an + # appropriate stack level. + encoding = text_encoding(encoding) + try: + return open(path, mode, buffering, encoding, errors, newline) + except TypeError: + pass + cls = type(path) + mode = ''.join(sorted(c for c in mode if c not in 'bt')) + if text: + try: + attr = getattr(cls, f'__open_{mode}__') + except AttributeError: + pass + else: + return attr(path, buffering, encoding, errors, newline) + elif encoding is not None: + raise ValueError("binary mode doesn't take an encoding argument") + elif errors is not None: + raise ValueError("binary mode doesn't take an errors argument") + elif newline is not None: + raise ValueError("binary mode doesn't take a newline argument") + + try: + attr = getattr(cls, f'__open_{mode}b__') + except AttributeError: + pass + else: + stream = attr(path, buffering) + if text: + stream = TextIOWrapper(stream, encoding, errors, newline) + return stream + + raise TypeError(f"{cls.__name__} can't be opened with mode {mode!r}") + + +def ensure_distinct_paths(source, target): + """ + Raise OSError(EINVAL) if the other path is within this path. + """ + # Note: there is no straightforward, foolproof algorithm to determine + # if one directory is within another (a particularly perverse example + # would be a single network share mounted in one location via NFS, and + # in another location via CIFS), so we simply checks whether the + # other path is lexically equal to, or within, this path. + if source == target: + err = OSError(EINVAL, "Source and target are the same path") + elif source in target.parents: + err = OSError(EINVAL, "Source path is a parent of target path") + else: + return + err.filename = str(source) + err.filename2 = str(target) + raise err + + +def ensure_different_files(source, target): + """ + Raise OSError(EINVAL) if both paths refer to the same file. + """ + try: + source_file_id = source.info._file_id + target_file_id = target.info._file_id + except AttributeError: + if source != target: + return + else: + try: + if source_file_id() != target_file_id(): + return + except (OSError, ValueError): + return + err = OSError(EINVAL, "Source and target are the same file") + err.filename = str(source) + err.filename2 = str(target) + raise err + + +def copy_info(info, target, follow_symlinks=True): + """Copy metadata from the given PathInfo to the given local path.""" + copy_times_ns = ( + hasattr(info, '_access_time_ns') and + hasattr(info, '_mod_time_ns') and + (follow_symlinks or os.utime in os.supports_follow_symlinks)) + if copy_times_ns: + t0 = info._access_time_ns(follow_symlinks=follow_symlinks) + t1 = info._mod_time_ns(follow_symlinks=follow_symlinks) + os.utime(target, ns=(t0, t1), follow_symlinks=follow_symlinks) + + # We must copy extended attributes before the file is (potentially) + # chmod()'ed read-only, otherwise setxattr() will error with -EACCES. + copy_xattrs = ( + hasattr(info, '_xattrs') and + hasattr(os, 'setxattr') and + (follow_symlinks or os.setxattr in os.supports_follow_symlinks)) + if copy_xattrs: + xattrs = info._xattrs(follow_symlinks=follow_symlinks) + for attr, value in xattrs: + try: + os.setxattr(target, attr, value, follow_symlinks=follow_symlinks) + except OSError as e: + if e.errno not in (EPERM, ENOTSUP, ENODATA, EINVAL, EACCES): + raise + + copy_posix_permissions = ( + hasattr(info, '_posix_permissions') and + (follow_symlinks or os.chmod in os.supports_follow_symlinks)) + if copy_posix_permissions: + posix_permissions = info._posix_permissions(follow_symlinks=follow_symlinks) + try: + os.chmod(target, posix_permissions, follow_symlinks=follow_symlinks) + except NotImplementedError: + # if we got a NotImplementedError, it's because + # * follow_symlinks=False, + # * lchown() is unavailable, and + # * either + # * fchownat() is unavailable or + # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW. + # (it returned ENOSUP.) + # therefore we're out of options--we simply cannot chown the + # symlink. give up, suppress the error. + # (which is what shutil always did in this circumstance.) + pass + + copy_bsd_flags = ( + hasattr(info, '_bsd_flags') and + hasattr(os, 'chflags') and + (follow_symlinks or os.chflags in os.supports_follow_symlinks)) + if copy_bsd_flags: + bsd_flags = info._bsd_flags(follow_symlinks=follow_symlinks) + try: + os.chflags(target, bsd_flags, follow_symlinks=follow_symlinks) + except OSError as why: + if why.errno not in (EOPNOTSUPP, ENOTSUP): + raise + + +class _PathInfoBase: + __slots__ = ('_path', '_stat_result', '_lstat_result') + + def __init__(self, path): + self._path = str(path) + + def __repr__(self): + path_type = "WindowsPath" if os.name == "nt" else "PosixPath" + return f"<{path_type}.info>" + + def _stat(self, *, follow_symlinks=True, ignore_errors=False): + """Return the status as an os.stat_result, or None if stat() fails and + ignore_errors is true.""" + if follow_symlinks: + try: + result = self._stat_result + except AttributeError: + pass + else: + if ignore_errors or result is not None: + return result + try: + self._stat_result = os.stat(self._path) + except (OSError, ValueError): + self._stat_result = None + if not ignore_errors: + raise + return self._stat_result + else: + try: + result = self._lstat_result + except AttributeError: + pass + else: + if ignore_errors or result is not None: + return result + try: + self._lstat_result = os.lstat(self._path) + except (OSError, ValueError): + self._lstat_result = None + if not ignore_errors: + raise + return self._lstat_result + + def _posix_permissions(self, *, follow_symlinks=True): + """Return the POSIX file permissions.""" + return S_IMODE(self._stat(follow_symlinks=follow_symlinks).st_mode) + + def _file_id(self, *, follow_symlinks=True): + """Returns the identifier of the file.""" + st = self._stat(follow_symlinks=follow_symlinks) + return st.st_dev, st.st_ino + + def _access_time_ns(self, *, follow_symlinks=True): + """Return the access time in nanoseconds.""" + return self._stat(follow_symlinks=follow_symlinks).st_atime_ns + + def _mod_time_ns(self, *, follow_symlinks=True): + """Return the modify time in nanoseconds.""" + return self._stat(follow_symlinks=follow_symlinks).st_mtime_ns + + if hasattr(os.stat_result, 'st_flags'): + def _bsd_flags(self, *, follow_symlinks=True): + """Return the flags.""" + return self._stat(follow_symlinks=follow_symlinks).st_flags + + if hasattr(os, 'listxattr'): + def _xattrs(self, *, follow_symlinks=True): + """Return the xattrs as a list of (attr, value) pairs, or an empty + list if extended attributes aren't supported.""" + try: + return [ + (attr, os.getxattr(self._path, attr, follow_symlinks=follow_symlinks)) + for attr in os.listxattr(self._path, follow_symlinks=follow_symlinks)] + except OSError as err: + if err.errno not in (EPERM, ENOTSUP, ENODATA, EINVAL, EACCES): + raise + return [] + + +class _WindowsPathInfo(_PathInfoBase): + """Implementation of pathlib.types.PathInfo that provides status + information for Windows paths. Don't try to construct it yourself.""" + __slots__ = ('_exists', '_is_dir', '_is_file', '_is_symlink') + + def exists(self, *, follow_symlinks=True): + """Whether this path exists.""" + if not follow_symlinks and self.is_symlink(): + return True + try: + return self._exists + except AttributeError: + if os.path.exists(self._path): + self._exists = True + return True + else: + self._exists = self._is_dir = self._is_file = False + return False + + def is_dir(self, *, follow_symlinks=True): + """Whether this path is a directory.""" + if not follow_symlinks and self.is_symlink(): + return False + try: + return self._is_dir + except AttributeError: + if os.path.isdir(self._path): + self._is_dir = self._exists = True + return True + else: + self._is_dir = False + return False + + def is_file(self, *, follow_symlinks=True): + """Whether this path is a regular file.""" + if not follow_symlinks and self.is_symlink(): + return False + try: + return self._is_file + except AttributeError: + if os.path.isfile(self._path): + self._is_file = self._exists = True + return True + else: + self._is_file = False + return False + + def is_symlink(self): + """Whether this path is a symbolic link.""" + try: + return self._is_symlink + except AttributeError: + self._is_symlink = os.path.islink(self._path) + return self._is_symlink + + +class _PosixPathInfo(_PathInfoBase): + """Implementation of pathlib.types.PathInfo that provides status + information for POSIX paths. Don't try to construct it yourself.""" + __slots__ = () + + def exists(self, *, follow_symlinks=True): + """Whether this path exists.""" + st = self._stat(follow_symlinks=follow_symlinks, ignore_errors=True) + if st is None: + return False + return True + + def is_dir(self, *, follow_symlinks=True): + """Whether this path is a directory.""" + st = self._stat(follow_symlinks=follow_symlinks, ignore_errors=True) + if st is None: + return False + return S_ISDIR(st.st_mode) + + def is_file(self, *, follow_symlinks=True): + """Whether this path is a regular file.""" + st = self._stat(follow_symlinks=follow_symlinks, ignore_errors=True) + if st is None: + return False + return S_ISREG(st.st_mode) + + def is_symlink(self): + """Whether this path is a symbolic link.""" + st = self._stat(follow_symlinks=False, ignore_errors=True) + if st is None: + return False + return S_ISLNK(st.st_mode) + + +PathInfo = _WindowsPathInfo if os.name == 'nt' else _PosixPathInfo + + +class DirEntryInfo(_PathInfoBase): + """Implementation of pathlib.types.PathInfo that provides status + information by querying a wrapped os.DirEntry object. Don't try to + construct it yourself.""" + __slots__ = ('_entry',) + + def __init__(self, entry): + super().__init__(entry.path) + self._entry = entry + + def _stat(self, *, follow_symlinks=True, ignore_errors=False): + try: + return self._entry.stat(follow_symlinks=follow_symlinks) + except OSError: + if not ignore_errors: + raise + return None + + def exists(self, *, follow_symlinks=True): + """Whether this path exists.""" + if not follow_symlinks: + return True + return self._stat(ignore_errors=True) is not None + + def is_dir(self, *, follow_symlinks=True): + """Whether this path is a directory.""" + try: + return self._entry.is_dir(follow_symlinks=follow_symlinks) + except OSError: + return False + + def is_file(self, *, follow_symlinks=True): + """Whether this path is a regular file.""" + try: + return self._entry.is_file(follow_symlinks=follow_symlinks) + except OSError: + return False + + def is_symlink(self): + """Whether this path is a symbolic link.""" + try: + return self._entry.is_symlink() + except OSError: + return False diff --git a/Lib/pathlib/types.py b/Lib/pathlib/types.py new file mode 100644 index 00000000000..d8f5c34a1a7 --- /dev/null +++ b/Lib/pathlib/types.py @@ -0,0 +1,430 @@ +""" +Protocols for supporting classes in pathlib. +""" + +# This module also provides abstract base classes for rich path objects. +# These ABCs are a *private* part of the Python standard library, but they're +# made available as a PyPI package called "pathlib-abc". It's possible they'll +# become an official part of the standard library in future. +# +# Three ABCs are provided -- _JoinablePath, _ReadablePath and _WritablePath + + +from abc import ABC, abstractmethod +from glob import _PathGlobber +from io import text_encoding +from pathlib._os import magic_open, ensure_distinct_paths, ensure_different_files, copyfileobj +from pathlib import PurePath, Path +from typing import Optional, Protocol, runtime_checkable + + +def _explode_path(path, split): + """ + Split the path into a 2-tuple (anchor, parts), where *anchor* is the + uppermost parent of the path (equivalent to path.parents[-1]), and + *parts* is a reversed list of parts following the anchor. + """ + parent, name = split(path) + names = [] + while path != parent: + names.append(name) + path = parent + parent, name = split(path) + return path, names + + +@runtime_checkable +class _PathParser(Protocol): + """Protocol for path parsers, which do low-level path manipulation. + + Path parsers provide a subset of the os.path API, specifically those + functions needed to provide JoinablePath functionality. Each JoinablePath + subclass references its path parser via a 'parser' class attribute. + """ + + sep: str + altsep: Optional[str] + def split(self, path: str) -> tuple[str, str]: ... + def splitext(self, path: str) -> tuple[str, str]: ... + def normcase(self, path: str) -> str: ... + + +@runtime_checkable +class PathInfo(Protocol): + """Protocol for path info objects, which support querying the file type. + Methods may return cached results. + """ + def exists(self, *, follow_symlinks: bool = True) -> bool: ... + def is_dir(self, *, follow_symlinks: bool = True) -> bool: ... + def is_file(self, *, follow_symlinks: bool = True) -> bool: ... + def is_symlink(self) -> bool: ... + + +class _JoinablePath(ABC): + """Abstract base class for pure path objects. + + This class *does not* provide several magic methods that are defined in + its implementation PurePath. They are: __init__, __fspath__, __bytes__, + __reduce__, __hash__, __eq__, __lt__, __le__, __gt__, __ge__. + """ + __slots__ = () + + @property + @abstractmethod + def parser(self): + """Implementation of pathlib._types.Parser used for low-level path + parsing and manipulation. + """ + raise NotImplementedError + + @abstractmethod + def with_segments(self, *pathsegments): + """Construct a new path object from any number of path-like objects. + Subclasses may override this method to customize how new path objects + are created from methods like `iterdir()`. + """ + raise NotImplementedError + + @abstractmethod + def __str__(self): + """Return the string representation of the path, suitable for + passing to system calls.""" + raise NotImplementedError + + @property + def anchor(self): + """The concatenation of the drive and root, or ''.""" + return _explode_path(str(self), self.parser.split)[0] + + @property + def name(self): + """The final path component, if any.""" + return self.parser.split(str(self))[1] + + @property + def suffix(self): + """ + The final component's last suffix, if any. + + This includes the leading period. For example: '.txt' + """ + return self.parser.splitext(self.name)[1] + + @property + def suffixes(self): + """ + A list of the final component's suffixes, if any. + + These include the leading periods. For example: ['.tar', '.gz'] + """ + split = self.parser.splitext + stem, suffix = split(self.name) + suffixes = [] + while suffix: + suffixes.append(suffix) + stem, suffix = split(stem) + return suffixes[::-1] + + @property + def stem(self): + """The final path component, minus its last suffix.""" + return self.parser.splitext(self.name)[0] + + def with_name(self, name): + """Return a new path with the file name changed.""" + split = self.parser.split + if split(name)[0]: + raise ValueError(f"Invalid name {name!r}") + path = str(self) + path = path.removesuffix(split(path)[1]) + name + return self.with_segments(path) + + def with_stem(self, stem): + """Return a new path with the stem changed.""" + suffix = self.suffix + if not suffix: + return self.with_name(stem) + elif not stem: + # If the suffix is non-empty, we can't make the stem empty. + raise ValueError(f"{self!r} has a non-empty suffix") + else: + return self.with_name(stem + suffix) + + def with_suffix(self, suffix): + """Return a new path with the file suffix changed. If the path + has no suffix, add given suffix. If the given suffix is an empty + string, remove the suffix from the path. + """ + stem = self.stem + if not stem: + # If the stem is empty, we can't make the suffix non-empty. + raise ValueError(f"{self!r} has an empty name") + elif suffix and not suffix.startswith('.'): + raise ValueError(f"Invalid suffix {suffix!r}") + else: + return self.with_name(stem + suffix) + + @property + def parts(self): + """An object providing sequence-like access to the + components in the filesystem path.""" + anchor, parts = _explode_path(str(self), self.parser.split) + if anchor: + parts.append(anchor) + return tuple(reversed(parts)) + + def joinpath(self, *pathsegments): + """Combine this path with one or several arguments, and return a + new path representing either a subpath (if all arguments are relative + paths) or a totally different path (if one of the arguments is + anchored). + """ + return self.with_segments(str(self), *pathsegments) + + def __truediv__(self, key): + try: + return self.with_segments(str(self), key) + except TypeError: + return NotImplemented + + def __rtruediv__(self, key): + try: + return self.with_segments(key, str(self)) + except TypeError: + return NotImplemented + + @property + def parent(self): + """The logical parent of the path.""" + path = str(self) + parent = self.parser.split(path)[0] + if path != parent: + return self.with_segments(parent) + return self + + @property + def parents(self): + """A sequence of this path's logical parents.""" + split = self.parser.split + path = str(self) + parent = split(path)[0] + parents = [] + while path != parent: + parents.append(self.with_segments(parent)) + path = parent + parent = split(path)[0] + return tuple(parents) + + def full_match(self, pattern): + """ + Return True if this path matches the given glob-style pattern. The + pattern is matched against the entire path. + """ + case_sensitive = self.parser.normcase('Aa') == 'Aa' + globber = _PathGlobber(self.parser.sep, case_sensitive, recursive=True) + match = globber.compile(pattern, altsep=self.parser.altsep) + return match(str(self)) is not None + + +class _ReadablePath(_JoinablePath): + """Abstract base class for readable path objects. + + The Path class implements this ABC for local filesystem paths. Users may + create subclasses to implement readable virtual filesystem paths, such as + paths in archive files or on remote storage systems. + """ + __slots__ = () + + @property + @abstractmethod + def info(self): + """ + A PathInfo object that exposes the file type and other file attributes + of this path. + """ + raise NotImplementedError + + @abstractmethod + def __open_rb__(self, buffering=-1): + """ + Open the file pointed to by this path for reading in binary mode and + return a file object, like open(mode='rb'). + """ + raise NotImplementedError + + def read_bytes(self): + """ + Open the file in bytes mode, read it, and close the file. + """ + with magic_open(self, mode='rb', buffering=0) as f: + return f.read() + + def read_text(self, encoding=None, errors=None, newline=None): + """ + Open the file in text mode, read it, and close the file. + """ + # Call io.text_encoding() here to ensure any warning is raised at an + # appropriate stack level. + encoding = text_encoding(encoding) + with magic_open(self, mode='r', encoding=encoding, errors=errors, newline=newline) as f: + return f.read() + + @abstractmethod + def iterdir(self): + """Yield path objects of the directory contents. + + The children are yielded in arbitrary order, and the + special entries '.' and '..' are not included. + """ + raise NotImplementedError + + def glob(self, pattern, *, recurse_symlinks=True): + """Iterate over this subtree and yield all existing files (of any + kind, including directories) matching the given relative pattern. + """ + anchor, parts = _explode_path(pattern, self.parser.split) + if anchor: + raise NotImplementedError("Non-relative patterns are unsupported") + elif not parts: + raise ValueError(f"Unacceptable pattern: {pattern!r}") + elif not recurse_symlinks: + raise NotImplementedError("recurse_symlinks=False is unsupported") + case_sensitive = self.parser.normcase('Aa') == 'Aa' + globber = _PathGlobber(self.parser.sep, case_sensitive, recursive=True) + select = globber.selector(parts) + return select(self.joinpath('')) + + def walk(self, top_down=True, on_error=None, follow_symlinks=False): + """Walk the directory tree from this directory, similar to os.walk().""" + paths = [self] + while paths: + path = paths.pop() + if isinstance(path, tuple): + yield path + continue + dirnames = [] + filenames = [] + if not top_down: + paths.append((path, dirnames, filenames)) + try: + for child in path.iterdir(): + if child.info.is_dir(follow_symlinks=follow_symlinks): + if not top_down: + paths.append(child) + dirnames.append(child.name) + else: + filenames.append(child.name) + except OSError as error: + if on_error is not None: + on_error(error) + if not top_down: + while not isinstance(paths.pop(), tuple): + pass + continue + if top_down: + yield path, dirnames, filenames + paths += [path.joinpath(d) for d in reversed(dirnames)] + + @abstractmethod + def readlink(self): + """ + Return the path to which the symbolic link points. + """ + raise NotImplementedError + + def copy(self, target, **kwargs): + """ + Recursively copy this file or directory tree to the given destination. + """ + ensure_distinct_paths(self, target) + target._copy_from(self, **kwargs) + return target.joinpath() # Empty join to ensure fresh metadata. + + def copy_into(self, target_dir, **kwargs): + """ + Copy this file or directory tree into the given existing directory. + """ + name = self.name + if not name: + raise ValueError(f"{self!r} has an empty name") + return self.copy(target_dir / name, **kwargs) + + +class _WritablePath(_JoinablePath): + """Abstract base class for writable path objects. + + The Path class implements this ABC for local filesystem paths. Users may + create subclasses to implement writable virtual filesystem paths, such as + paths in archive files or on remote storage systems. + """ + __slots__ = () + + @abstractmethod + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the target path. + Note the order of arguments (link, target) is the reverse of os.symlink. + """ + raise NotImplementedError + + @abstractmethod + def mkdir(self): + """ + Create a new directory at this given path. + """ + raise NotImplementedError + + @abstractmethod + def __open_wb__(self, buffering=-1): + """ + Open the file pointed to by this path for writing in binary mode and + return a file object, like open(mode='wb'). + """ + raise NotImplementedError + + def write_bytes(self, data): + """ + Open the file in bytes mode, write to it, and close the file. + """ + # type-check for the buffer interface before truncating the file + view = memoryview(data) + with magic_open(self, mode='wb') as f: + return f.write(view) + + def write_text(self, data, encoding=None, errors=None, newline=None): + """ + Open the file in text mode, write to it, and close the file. + """ + # Call io.text_encoding() here to ensure any warning is raised at an + # appropriate stack level. + encoding = text_encoding(encoding) + if not isinstance(data, str): + raise TypeError('data must be str, not %s' % + data.__class__.__name__) + with magic_open(self, mode='w', encoding=encoding, errors=errors, newline=newline) as f: + return f.write(data) + + def _copy_from(self, source, follow_symlinks=True): + """ + Recursively copy the given path to this path. + """ + stack = [(source, self)] + while stack: + src, dst = stack.pop() + if not follow_symlinks and src.info.is_symlink(): + dst.symlink_to(str(src.readlink()), src.info.is_dir()) + elif src.info.is_dir(): + children = src.iterdir() + dst.mkdir() + for child in children: + stack.append((child, dst.joinpath(child.name))) + else: + ensure_different_files(src, dst) + with magic_open(src, 'rb') as source_f: + with magic_open(dst, 'wb') as target_f: + copyfileobj(source_f, target_f) + + +_JoinablePath.register(PurePath) +_ReadablePath.register(Path) +_WritablePath.register(Path) diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index 7e8abbf6ffe..af2808a77da 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -61,7 +61,7 @@ resulting RE will match the second character. \number Matches the contents of the group of the same number. \A Matches only at the start of the string. - \Z Matches only at the end of the string. + \z Matches only at the end of the string. \b Matches the empty string, but only at the start or end of a word. \B Matches the empty string, but not at the start or end of a word. \d Matches any decimal digit; equivalent to the set [0-9] in diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py index 1b1aaa7714b..20dd561d1c1 100644 --- a/Lib/re/_compiler.py +++ b/Lib/re/_compiler.py @@ -28,6 +28,8 @@ POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE), } +_CHARSET_ALL = [(NEGATE, None)] + def _combine_flags(flags, add_flags, del_flags, TYPE_FLAGS=_parser.TYPE_FLAGS): if add_flags & TYPE_FLAGS: @@ -84,17 +86,22 @@ def _compile(code, pattern, flags): code[skip] = _len(code) - skip elif op is IN: charset, hascased = _optimize_charset(av, iscased, tolower, fixes) - if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE: - emit(IN_LOC_IGNORE) - elif not hascased: - emit(IN) - elif not fixes: # ascii - emit(IN_IGNORE) + if not charset: + emit(FAILURE) + elif charset == _CHARSET_ALL: + emit(ANY_ALL) else: - emit(IN_UNI_IGNORE) - skip = _len(code); emit(0) - _compile_charset(charset, flags, code) - code[skip] = _len(code) - skip + if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE: + emit(IN_LOC_IGNORE) + elif not hascased: + emit(IN) + elif not fixes: # ascii + emit(IN_IGNORE) + else: + emit(IN_UNI_IGNORE) + skip = _len(code); emit(0) + _compile_charset(charset, flags, code) + code[skip] = _len(code) - skip elif op is ANY: if flags & SRE_FLAG_DOTALL: emit(ANY_ALL) @@ -277,6 +284,10 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None): charmap[i] = 1 elif op is NEGATE: out.append((op, av)) + elif op is CATEGORY and tail and (CATEGORY, CH_NEGATE[av]) in tail: + # Optimize [\s\S] etc. + out = [] if out else _CHARSET_ALL + return out, False else: tail.append((op, av)) except IndexError: @@ -524,13 +535,18 @@ def _compile_info(code, pattern, flags): # look for a literal prefix prefix = [] prefix_skip = 0 - charset = [] # not used + charset = None # not used if not (flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE): # look for literal prefix prefix, prefix_skip, got_all = _get_literal_prefix(pattern, flags) # if no prefix, look for charset prefix if not prefix: charset = _get_charset_prefix(pattern, flags) + if charset: + charset, hascased = _optimize_charset(charset) + assert not hascased + if charset == _CHARSET_ALL: + charset = None ## if prefix: ## print("*** PREFIX", prefix, prefix_skip) ## if charset: @@ -565,8 +581,6 @@ def _compile_info(code, pattern, flags): # generate overlap table code.extend(_generate_overlap_table(prefix)) elif charset: - charset, hascased = _optimize_charset(charset) - assert not hascased _compile_charset(charset, flags, code) code[skip] = len(code) - skip diff --git a/Lib/re/_constants.py b/Lib/re/_constants.py index 9c3c294ba44..d6f32302d37 100644 --- a/Lib/re/_constants.py +++ b/Lib/re/_constants.py @@ -15,7 +15,7 @@ MAGIC = 20230612 -from _sre import MAXREPEAT, MAXGROUPS +from _sre import MAXREPEAT, MAXGROUPS # noqa: F401 # SRE standard exception (access as sre.error) # should this really be here? @@ -206,6 +206,8 @@ def _makecodes(*names): CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK } +CH_NEGATE = dict(zip(CHCODES[::2] + CHCODES[1::2], CHCODES[1::2] + CHCODES[::2])) + # flags SRE_FLAG_IGNORECASE = 2 # case insensitive SRE_FLAG_LOCALE = 4 # honour system locale diff --git a/Lib/re/_parser.py b/Lib/re/_parser.py index f3c779340fe..35ab7ede2a7 100644 --- a/Lib/re/_parser.py +++ b/Lib/re/_parser.py @@ -49,7 +49,8 @@ r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), - r"\Z": (AT, AT_END_STRING), # end of string + r"\z": (AT, AT_END_STRING), # end of string + r"\Z": (AT, AT_END_STRING), # end of string (obsolete) } FLAGS = { @@ -807,14 +808,6 @@ def _parse(source, state, verbose, nested, first=False): state.grouprefpos[condgroup] = ( source.tell() - len(condname) - 1 ) - if not (condname.isdecimal() and condname.isascii()): - import warnings - warnings.warn( - "bad character in group name %s at position %d" % - (repr(condname) if source.istext else ascii(condname), - source.tell() - len(condname) - 1), - DeprecationWarning, stacklevel=nested + 6 - ) state.checklookbehindgroup(condgroup, source) item_yes = _parse(source, state, verbose, nested + 1) if source.match("|"): @@ -1038,14 +1031,6 @@ def addgroup(index, pos): if index >= MAXGROUPS: raise s.error("invalid group reference %d" % index, len(name) + 1) - if not (name.isdecimal() and name.isascii()): - import warnings - warnings.warn( - "bad character in group name %s at position %d" % - (repr(name) if s.istext else ascii(name), - s.tell() - len(name) - 1), - DeprecationWarning, stacklevel=5 - ) addgroup(index, len(name) + 1) elif c == "0": if s.next in OCTDIGITS: diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 10ed496d4e2..5daaf3b3fdd 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -1,11 +1,15 @@ """Test cases for the fnmatch module.""" -import unittest import os import string +import unittest import warnings +from fnmatch import fnmatch, fnmatchcase, translate, filter, filterfalse + + +IGNORECASE = os.path.normcase('P') == os.path.normcase('p') +NORMSEP = os.path.normcase('\\') == os.path.normcase('/') -from fnmatch import fnmatch, fnmatchcase, translate, filter class FnmatchTestCase(unittest.TestCase): @@ -77,23 +81,20 @@ def test_bytes(self): self.check_match(b'foo\nbar', b'foo*') def test_case(self): - ignorecase = os.path.normcase('ABC') == os.path.normcase('abc') check = self.check_match check('abc', 'abc') - check('AbC', 'abc', ignorecase) - check('abc', 'AbC', ignorecase) + check('AbC', 'abc', IGNORECASE) + check('abc', 'AbC', IGNORECASE) check('AbC', 'AbC') def test_sep(self): - normsep = os.path.normcase('\\') == os.path.normcase('/') check = self.check_match check('usr/bin', 'usr/bin') - check('usr\\bin', 'usr/bin', normsep) - check('usr/bin', 'usr\\bin', normsep) + check('usr\\bin', 'usr/bin', NORMSEP) + check('usr/bin', 'usr\\bin', NORMSEP) check('usr\\bin', 'usr\\bin') def test_char_set(self): - ignorecase = os.path.normcase('ABC') == os.path.normcase('abc') check = self.check_match tescases = string.ascii_lowercase + string.digits + string.punctuation for c in tescases: @@ -101,11 +102,11 @@ def test_char_set(self): check(c, '[!az]', c not in 'az') # Case insensitive. for c in tescases: - check(c, '[AZ]', (c in 'az') and ignorecase) - check(c, '[!AZ]', (c not in 'az') or not ignorecase) + check(c, '[AZ]', (c in 'az') and IGNORECASE) + check(c, '[!AZ]', (c not in 'az') or not IGNORECASE) for c in string.ascii_uppercase: - check(c, '[az]', (c in 'AZ') and ignorecase) - check(c, '[!az]', (c not in 'AZ') or not ignorecase) + check(c, '[az]', (c in 'AZ') and IGNORECASE) + check(c, '[!az]', (c not in 'AZ') or not IGNORECASE) # Repeated same character. for c in tescases: check(c, '[aa]', c == 'a') @@ -120,8 +121,6 @@ def test_char_set(self): check('[!]', '[!]') def test_range(self): - ignorecase = os.path.normcase('ABC') == os.path.normcase('abc') - normsep = os.path.normcase('\\') == os.path.normcase('/') check = self.check_match tescases = string.ascii_lowercase + string.digits + string.punctuation for c in tescases: @@ -131,11 +130,11 @@ def test_range(self): check(c, '[!b-dx-z]', c not in 'bcdxyz') # Case insensitive. for c in tescases: - check(c, '[B-D]', (c in 'bcd') and ignorecase) - check(c, '[!B-D]', (c not in 'bcd') or not ignorecase) + check(c, '[B-D]', (c in 'bcd') and IGNORECASE) + check(c, '[!B-D]', (c not in 'bcd') or not IGNORECASE) for c in string.ascii_uppercase: - check(c, '[b-d]', (c in 'BCD') and ignorecase) - check(c, '[!b-d]', (c not in 'BCD') or not ignorecase) + check(c, '[b-d]', (c in 'BCD') and IGNORECASE) + check(c, '[!b-d]', (c not in 'BCD') or not IGNORECASE) # Upper bound == lower bound. for c in tescases: check(c, '[b-b]', c == 'b') @@ -144,7 +143,7 @@ def test_range(self): check(c, '[!-#]', c not in '-#') check(c, '[!--.]', c not in '-.') check(c, '[^-`]', c in '^_`') - if not (normsep and c == '/'): + if not (NORMSEP and c == '/'): check(c, '[[-^]', c in r'[\]^') check(c, r'[\-^]', c in r'\]^') check(c, '[b-]', c in '-b') @@ -160,47 +159,45 @@ def test_range(self): check(c, '[d-bx-z]', c in 'xyz') check(c, '[!d-bx-z]', c not in 'xyz') check(c, '[d-b^-`]', c in '^_`') - if not (normsep and c == '/'): + if not (NORMSEP and c == '/'): check(c, '[d-b[-^]', c in r'[\]^') def test_sep_in_char_set(self): - normsep = os.path.normcase('\\') == os.path.normcase('/') check = self.check_match check('/', r'[/]') check('\\', r'[\]') - check('/', r'[\]', normsep) - check('\\', r'[/]', normsep) + check('/', r'[\]', NORMSEP) + check('\\', r'[/]', NORMSEP) check('[/]', r'[/]', False) check(r'[\\]', r'[/]', False) check('\\', r'[\t]') - check('/', r'[\t]', normsep) + check('/', r'[\t]', NORMSEP) check('t', r'[\t]') check('\t', r'[\t]', False) def test_sep_in_range(self): - normsep = os.path.normcase('\\') == os.path.normcase('/') check = self.check_match - check('a/b', 'a[.-0]b', not normsep) + check('a/b', 'a[.-0]b', not NORMSEP) check('a\\b', 'a[.-0]b', False) - check('a\\b', 'a[Z-^]b', not normsep) + check('a\\b', 'a[Z-^]b', not NORMSEP) check('a/b', 'a[Z-^]b', False) - check('a/b', 'a[/-0]b', not normsep) + check('a/b', 'a[/-0]b', not NORMSEP) check(r'a\b', 'a[/-0]b', False) check('a[/-0]b', 'a[/-0]b', False) check(r'a[\-0]b', 'a[/-0]b', False) check('a/b', 'a[.-/]b') - check(r'a\b', 'a[.-/]b', normsep) + check(r'a\b', 'a[.-/]b', NORMSEP) check('a[.-/]b', 'a[.-/]b', False) check(r'a[.-\]b', 'a[.-/]b', False) check(r'a\b', r'a[\-^]b') - check('a/b', r'a[\-^]b', normsep) + check('a/b', r'a[\-^]b', NORMSEP) check(r'a[\-^]b', r'a[\-^]b', False) check('a[/-^]b', r'a[\-^]b', False) - check(r'a\b', r'a[Z-\]b', not normsep) + check(r'a\b', r'a[Z-\]b', not NORMSEP) check('a/b', r'a[Z-\]b', False) check(r'a[Z-\]b', r'a[Z-\]b', False) check('a[Z-/]b', r'a[Z-\]b', False) @@ -221,24 +218,24 @@ class TranslateTestCase(unittest.TestCase): def test_translate(self): import re - self.assertEqual(translate('*'), r'(?s:.*)\Z') - self.assertEqual(translate('?'), r'(?s:.)\Z') - self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z') - self.assertEqual(translate('[abc]'), r'(?s:[abc])\Z') - self.assertEqual(translate('[]]'), r'(?s:[]])\Z') - self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z') - self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z') - self.assertEqual(translate('[x'), r'(?s:\[x)\Z') + self.assertEqual(translate('*'), r'(?s:.*)\z') + self.assertEqual(translate('?'), r'(?s:.)\z') + self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\z') + self.assertEqual(translate('[abc]'), r'(?s:[abc])\z') + self.assertEqual(translate('[]]'), r'(?s:[]])\z') + self.assertEqual(translate('[!x]'), r'(?s:[^x])\z') + self.assertEqual(translate('[^x]'), r'(?s:[\^x])\z') + self.assertEqual(translate('[x'), r'(?s:\[x)\z') # from the docs - self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z') + self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\z') # squash consecutive stars - self.assertEqual(translate('*********'), r'(?s:.*)\Z') - self.assertEqual(translate('A*********'), r'(?s:A.*)\Z') - self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') - self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') + self.assertEqual(translate('*********'), r'(?s:.*)\z') + self.assertEqual(translate('A*********'), r'(?s:A.*)\z') + self.assertEqual(translate('*********A'), r'(?s:.*A)\z') + self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\z') # fancy translation to prevent exponential-time match failure t = translate('**a*a****a') - self.assertEqual(t, r'(?s:(?>.*?a)(?>.*?a).*a)\Z') + self.assertEqual(t, r'(?s:(?>.*?a)(?>.*?a).*a)\z') # and try pasting multiple translate results - it's an undocumented # feature that this works r1 = translate('**a**a**a*') @@ -250,6 +247,75 @@ def test_translate(self): self.assertTrue(re.match(fatre, 'cbabcaxc')) self.assertFalse(re.match(fatre, 'dabccbad')) + def test_translate_wildcards(self): + for pattern, expect in [ + ('ab*', r'(?s:ab.*)\z'), + ('ab*cd', r'(?s:ab.*cd)\z'), + ('ab*cd*', r'(?s:ab(?>.*?cd).*)\z'), + ('ab*cd*12', r'(?s:ab(?>.*?cd).*12)\z'), + ('ab*cd*12*', r'(?s:ab(?>.*?cd)(?>.*?12).*)\z'), + ('ab*cd*12*34', r'(?s:ab(?>.*?cd)(?>.*?12).*34)\z'), + ('ab*cd*12*34*', r'(?s:ab(?>.*?cd)(?>.*?12)(?>.*?34).*)\z'), + ]: + with self.subTest(pattern): + translated = translate(pattern) + self.assertEqual(translated, expect, pattern) + + for pattern, expect in [ + ('*ab', r'(?s:.*ab)\z'), + ('*ab*', r'(?s:(?>.*?ab).*)\z'), + ('*ab*cd', r'(?s:(?>.*?ab).*cd)\z'), + ('*ab*cd*', r'(?s:(?>.*?ab)(?>.*?cd).*)\z'), + ('*ab*cd*12', r'(?s:(?>.*?ab)(?>.*?cd).*12)\z'), + ('*ab*cd*12*', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12).*)\z'), + ('*ab*cd*12*34', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12).*34)\z'), + ('*ab*cd*12*34*', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12)(?>.*?34).*)\z'), + ]: + with self.subTest(pattern): + translated = translate(pattern) + self.assertEqual(translated, expect, pattern) + + def test_translate_expressions(self): + for pattern, expect in [ + ('[', r'(?s:\[)\z'), + ('[!', r'(?s:\[!)\z'), + ('[]', r'(?s:\[\])\z'), + ('[abc', r'(?s:\[abc)\z'), + ('[!abc', r'(?s:\[!abc)\z'), + ('[abc]', r'(?s:[abc])\z'), + ('[!abc]', r'(?s:[^abc])\z'), + ('[!abc][!def]', r'(?s:[^abc][^def])\z'), + # with [[ + ('[[', r'(?s:\[\[)\z'), + ('[[a', r'(?s:\[\[a)\z'), + ('[[]', r'(?s:[\[])\z'), + ('[[]a', r'(?s:[\[]a)\z'), + ('[[]]', r'(?s:[\[]\])\z'), + ('[[]a]', r'(?s:[\[]a\])\z'), + ('[[a]', r'(?s:[\[a])\z'), + ('[[a]]', r'(?s:[\[a]\])\z'), + ('[[a]b', r'(?s:[\[a]b)\z'), + # backslashes + ('[\\', r'(?s:\[\\)\z'), + (r'[\]', r'(?s:[\\])\z'), + (r'[\\]', r'(?s:[\\\\])\z'), + ]: + with self.subTest(pattern): + translated = translate(pattern) + self.assertEqual(translated, expect, pattern) + + def test_star_indices_locations(self): + from fnmatch import _translate + + blocks = ['a^b', '***', '?', '?', '[a-z]', '[1-9]', '*', '++', '[[a'] + parts, star_indices = _translate(''.join(blocks), '*', '.') + expect_parts = ['a', r'\^', 'b', '*', + '.', '.', '[a-z]', '[1-9]', '*', + r'\+', r'\+', r'\[', r'\[', 'a'] + self.assertListEqual(parts, expect_parts) + self.assertListEqual(star_indices, [3, 8]) + + class FilterTestCase(unittest.TestCase): def test_filter(self): @@ -263,18 +329,41 @@ def test_mix_bytes_str(self): self.assertRaises(TypeError, filter, [b'test'], '*') def test_case(self): - ignorecase = os.path.normcase('P') == os.path.normcase('p') self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'), - ['Test.py', 'Test.PL'] if ignorecase else ['Test.py']) + ['Test.py', 'Test.PL'] if IGNORECASE else ['Test.py']) self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'), - ['Test.py', 'Test.PL'] if ignorecase else ['Test.PL']) + ['Test.py', 'Test.PL'] if IGNORECASE else ['Test.PL']) def test_sep(self): - normsep = os.path.normcase('\\') == os.path.normcase('/') self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'), - ['usr/bin', 'usr\\lib'] if normsep else ['usr/bin']) + ['usr/bin', 'usr\\lib'] if NORMSEP else ['usr/bin']) self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'), - ['usr/bin', 'usr\\lib'] if normsep else ['usr\\lib']) + ['usr/bin', 'usr\\lib'] if NORMSEP else ['usr\\lib']) + + +class FilterFalseTestCase(unittest.TestCase): + + def test_filterfalse(self): + actual = filterfalse(['Python', 'Ruby', 'Perl', 'Tcl'], 'P*') + self.assertListEqual(actual, ['Ruby', 'Tcl']) + actual = filterfalse([b'Python', b'Ruby', b'Perl', b'Tcl'], b'P*') + self.assertListEqual(actual, [b'Ruby', b'Tcl']) + + def test_mix_bytes_str(self): + self.assertRaises(TypeError, filterfalse, ['test'], b'*') + self.assertRaises(TypeError, filterfalse, [b'test'], '*') + + def test_case(self): + self.assertEqual(filterfalse(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'), + ['Test.rb'] if IGNORECASE else ['Test.rb', 'Test.PL']) + self.assertEqual(filterfalse(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'), + ['Test.rb'] if IGNORECASE else ['Test.py', 'Test.rb',]) + + def test_sep(self): + self.assertEqual(filterfalse(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'), + ['usr'] if NORMSEP else ['usr', 'usr\\lib']) + self.assertEqual(filterfalse(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'), + ['usr'] if NORMSEP else ['usr/bin', 'usr']) if __name__ == "__main__": diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index c3fb8939a69..d0ed5129253 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -459,111 +459,59 @@ def test_translate_matching(self): def test_translate(self): def fn(pat): return glob.translate(pat, seps='/') - self.assertEqual(fn('foo'), r'(?s:foo)\Z') - self.assertEqual(fn('foo/bar'), r'(?s:foo/bar)\Z') - self.assertEqual(fn('*'), r'(?s:[^/.][^/]*)\Z') - self.assertEqual(fn('?'), r'(?s:(?!\.)[^/])\Z') - self.assertEqual(fn('a*'), r'(?s:a[^/]*)\Z') - self.assertEqual(fn('*a'), r'(?s:(?!\.)[^/]*a)\Z') - self.assertEqual(fn('.*'), r'(?s:\.[^/]*)\Z') - self.assertEqual(fn('?aa'), r'(?s:(?!\.)[^/]aa)\Z') - self.assertEqual(fn('aa?'), r'(?s:aa[^/])\Z') - self.assertEqual(fn('aa[ab]'), r'(?s:aa[ab])\Z') - self.assertEqual(fn('**'), r'(?s:(?!\.)[^/]*)\Z') - self.assertEqual(fn('***'), r'(?s:(?!\.)[^/]*)\Z') - self.assertEqual(fn('a**'), r'(?s:a[^/]*)\Z') - self.assertEqual(fn('**b'), r'(?s:(?!\.)[^/]*b)\Z') + self.assertEqual(fn('foo'), r'(?s:foo)\z') + self.assertEqual(fn('foo/bar'), r'(?s:foo/bar)\z') + self.assertEqual(fn('*'), r'(?s:[^/.][^/]*)\z') + self.assertEqual(fn('?'), r'(?s:(?!\.)[^/])\z') + self.assertEqual(fn('a*'), r'(?s:a[^/]*)\z') + self.assertEqual(fn('*a'), r'(?s:(?!\.)[^/]*a)\z') + self.assertEqual(fn('.*'), r'(?s:\.[^/]*)\z') + self.assertEqual(fn('?aa'), r'(?s:(?!\.)[^/]aa)\z') + self.assertEqual(fn('aa?'), r'(?s:aa[^/])\z') + self.assertEqual(fn('aa[ab]'), r'(?s:aa[ab])\z') + self.assertEqual(fn('**'), r'(?s:(?!\.)[^/]*)\z') + self.assertEqual(fn('***'), r'(?s:(?!\.)[^/]*)\z') + self.assertEqual(fn('a**'), r'(?s:a[^/]*)\z') + self.assertEqual(fn('**b'), r'(?s:(?!\.)[^/]*b)\z') self.assertEqual(fn('/**/*/*.*/**'), - r'(?s:/(?!\.)[^/]*/[^/.][^/]*/(?!\.)[^/]*\.[^/]*/(?!\.)[^/]*)\Z') + r'(?s:/(?!\.)[^/]*/[^/.][^/]*/(?!\.)[^/]*\.[^/]*/(?!\.)[^/]*)\z') def test_translate_include_hidden(self): def fn(pat): return glob.translate(pat, include_hidden=True, seps='/') - self.assertEqual(fn('foo'), r'(?s:foo)\Z') - self.assertEqual(fn('foo/bar'), r'(?s:foo/bar)\Z') - self.assertEqual(fn('*'), r'(?s:[^/]+)\Z') - self.assertEqual(fn('?'), r'(?s:[^/])\Z') - self.assertEqual(fn('a*'), r'(?s:a[^/]*)\Z') - self.assertEqual(fn('*a'), r'(?s:[^/]*a)\Z') - self.assertEqual(fn('.*'), r'(?s:\.[^/]*)\Z') - self.assertEqual(fn('?aa'), r'(?s:[^/]aa)\Z') - self.assertEqual(fn('aa?'), r'(?s:aa[^/])\Z') - self.assertEqual(fn('aa[ab]'), r'(?s:aa[ab])\Z') - self.assertEqual(fn('**'), r'(?s:[^/]*)\Z') - self.assertEqual(fn('***'), r'(?s:[^/]*)\Z') - self.assertEqual(fn('a**'), r'(?s:a[^/]*)\Z') - self.assertEqual(fn('**b'), r'(?s:[^/]*b)\Z') - self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/[^/]*/[^/]+/[^/]*\.[^/]*/[^/]*)\Z') + self.assertEqual(fn('foo'), r'(?s:foo)\z') + self.assertEqual(fn('foo/bar'), r'(?s:foo/bar)\z') + self.assertEqual(fn('*'), r'(?s:[^/]+)\z') + self.assertEqual(fn('?'), r'(?s:[^/])\z') + self.assertEqual(fn('a*'), r'(?s:a[^/]*)\z') + self.assertEqual(fn('*a'), r'(?s:[^/]*a)\z') + self.assertEqual(fn('.*'), r'(?s:\.[^/]*)\z') + self.assertEqual(fn('?aa'), r'(?s:[^/]aa)\z') + self.assertEqual(fn('aa?'), r'(?s:aa[^/])\z') + self.assertEqual(fn('aa[ab]'), r'(?s:aa[ab])\z') + self.assertEqual(fn('**'), r'(?s:[^/]*)\z') + self.assertEqual(fn('***'), r'(?s:[^/]*)\z') + self.assertEqual(fn('a**'), r'(?s:a[^/]*)\z') + self.assertEqual(fn('**b'), r'(?s:[^/]*b)\z') + self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/[^/]*/[^/]+/[^/]*\.[^/]*/[^/]*)\z') def test_translate_recursive(self): def fn(pat): return glob.translate(pat, recursive=True, include_hidden=True, seps='/') - self.assertEqual(fn('*'), r'(?s:[^/]+)\Z') - self.assertEqual(fn('?'), r'(?s:[^/])\Z') - self.assertEqual(fn('**'), r'(?s:.*)\Z') - self.assertEqual(fn('**/**'), r'(?s:.*)\Z') - self.assertEqual(fn('***'), r'(?s:[^/]*)\Z') - self.assertEqual(fn('a**'), r'(?s:a[^/]*)\Z') - self.assertEqual(fn('**b'), r'(?s:[^/]*b)\Z') - self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/(?:.+/)?[^/]+/[^/]*\.[^/]*/.*)\Z') + self.assertEqual(fn('*'), r'(?s:[^/]+)\z') + self.assertEqual(fn('?'), r'(?s:[^/])\z') + self.assertEqual(fn('**'), r'(?s:.*)\z') + self.assertEqual(fn('**/**'), r'(?s:.*)\z') + self.assertEqual(fn('***'), r'(?s:[^/]*)\z') + self.assertEqual(fn('a**'), r'(?s:a[^/]*)\z') + self.assertEqual(fn('**b'), r'(?s:[^/]*b)\z') + self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/(?:.+/)?[^/]+/[^/]*\.[^/]*/.*)\z') def test_translate_seps(self): def fn(pat): return glob.translate(pat, recursive=True, include_hidden=True, seps=['/', '\\']) - self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z') - self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z') - - -@skip_unless_symlink -class SymlinkLoopGlobTests(unittest.TestCase): - - # gh-109959: On Linux, glob._isdir() and glob._lexists() can return False - # randomly when checking the "link/" symbolic link. - # https://github.com/python/cpython/issues/109959#issuecomment-2577550700 - @unittest.skip("flaky test") - def test_selflink(self): - tempdir = TESTFN + "_dir" - os.makedirs(tempdir) - self.addCleanup(shutil.rmtree, tempdir) - with change_cwd(tempdir): - os.makedirs('dir') - create_empty_file(os.path.join('dir', 'file')) - os.symlink(os.curdir, os.path.join('dir', 'link')) - - results = glob.glob('**', recursive=True) - self.assertEqual(len(results), len(set(results))) - results = set(results) - depth = 0 - while results: - path = os.path.join(*(['dir'] + ['link'] * depth)) - self.assertIn(path, results) - results.remove(path) - if not results: - break - path = os.path.join(path, 'file') - self.assertIn(path, results) - results.remove(path) - depth += 1 - - results = glob.glob(os.path.join('**', 'file'), recursive=True) - self.assertEqual(len(results), len(set(results))) - results = set(results) - depth = 0 - while results: - path = os.path.join(*(['dir'] + ['link'] * depth + ['file'])) - self.assertIn(path, results) - results.remove(path) - depth += 1 - - results = glob.glob(os.path.join('**', ''), recursive=True) - self.assertEqual(len(results), len(set(results))) - results = set(results) - depth = 0 - while results: - path = os.path.join(*(['dir'] + ['link'] * depth + [''])) - self.assertIn(path, results) - results.remove(path) - depth += 1 + self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\z') + self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\z') if __name__ == "__main__": diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 275578d53cb..5267d2fe011 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -2098,7 +2098,6 @@ def test_local_unknown_cert(self): h.request('GET', '/') self.assertEqual(exc_info.exception.reason, 'CERTIFICATE_VERIFY_FAILED') - @unittest.expectedFailure # TODO: RUSTPYTHON http.client.RemoteDisconnected: Remote end closed connection without response def test_local_good_hostname(self): # The (valid) cert validates the HTTPS hostname import ssl @@ -2112,7 +2111,6 @@ def test_local_good_hostname(self): self.addCleanup(resp.close) self.assertEqual(resp.status, 404) - @unittest.expectedFailure # TODO: RUSTPYTHON http.client.RemoteDisconnected: Remote end closed connection without response def test_local_bad_hostname(self): # The (valid) cert doesn't validate the HTTPS hostname import ssl diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 63b778d8b97..f402a34fbdf 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -498,7 +498,6 @@ def test_list_dir_nonascii_dirname(self): @unittest.skipUnless(os_helper.TESTFN_NONASCII, 'need os_helper.TESTFN_NONASCII') - @unittest.expectedFailure # TODO: RUSTPYTHON; http.client.RemoteDisconnected: Remote end closed connection without response def test_list_dir_nonascii_filename(self): filename = os_helper.TESTFN_NONASCII + '.txt' self.check_list_dir_filename(filename) @@ -519,7 +518,6 @@ def test_list_dir_undecodable_dirname(self): 'undecodable name cannot be decoded on win32') @unittest.skipUnless(os_helper.TESTFN_UNDECODABLE, 'need os_helper.TESTFN_UNDECODABLE') - @unittest.expectedFailure # TODO: RUSTPYTHON; http.client.RemoteDisconnected: Remote end closed connection without response def test_list_dir_undecodable_filename(self): filename = os.fsdecode(os_helper.TESTFN_UNDECODABLE) + '.txt' self.check_list_dir_filename(filename) @@ -536,7 +534,6 @@ def test_list_dir_unencodable_dirname(self): @unittest.skipUnless(os_helper.TESTFN_UNENCODABLE, 'need os_helper.TESTFN_UNENCODABLE') - @unittest.expectedFailure # TODO: RUSTPYTHON; http.client.RemoteDisconnected: Remote end closed connection without response def test_list_dir_unencodable_filename(self): filename = os_helper.TESTFN_UNENCODABLE + '.txt' self.check_list_dir_filename(filename) @@ -550,7 +547,6 @@ def test_list_dir_escape_dirname(self): self.check_list_dir_dirname(dirname, quotedname=urllib.parse.quote(dirname, safe='&<>\'"')) - @unittest.expectedFailure # TODO: RUSTPYTHON; http.client.RemoteDisconnected: Remote end closed connection without response def test_list_dir_escape_filename(self): # Characters that need special treating in URL or HTML. for name in ('q?', 'f#', '&', '&', '', '"dq"', "'sq'", @@ -618,7 +614,6 @@ def test_get_dir_redirect_location_domain_injection_bug(self): # follows that isn't important in this Location: header. self.assertStartsWith(location, 'https://pypi.org/') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_get(self): #constructs the path relative to the root directory of the HTTPServer response = self.request(self.base_url + '/test') @@ -670,7 +665,6 @@ def test_get(self): finally: os.chmod(self.tempdir, 0o755) - @unittest.expectedFailure # TODO: RUSTPYTHON; http.client.RemoteDisconnected: Remote end closed connection without response def test_head(self): response = self.request( self.base_url + '/test', method='HEAD') @@ -680,7 +674,6 @@ def test_head(self): self.assertEqual(response.getheader('content-type'), 'application/octet-stream') - @unittest.expectedFailure # TODO: RUSTPYTHON; http.client.RemoteDisconnected: Remote end closed connection without response def test_browser_cache(self): """Check that when a request to /test is sent with the request header If-Modified-Since set to date of last modification, the server returns @@ -699,7 +692,6 @@ def test_browser_cache(self): response = self.request(self.base_url + '/test', headers=headers) self.check_status_and_reason(response, HTTPStatus.NOT_MODIFIED) - @unittest.expectedFailure # TODO: RUSTPYTHON; http.client.RemoteDisconnected: Remote end closed connection without response def test_browser_cache_file_changed(self): # with If-Modified-Since earlier than Last-Modified, must return 200 dt = self.last_modif_datetime @@ -711,7 +703,6 @@ def test_browser_cache_file_changed(self): response = self.request(self.base_url + '/test', headers=headers) self.check_status_and_reason(response, HTTPStatus.OK) - @unittest.expectedFailure # TODO: RUSTPYTHON; http.client.RemoteDisconnected: Remote end closed connection without response def test_browser_cache_with_If_None_Match_header(self): # if If-None-Match header is present, ignore If-Modified-Since @@ -730,7 +721,6 @@ def test_invalid_requests(self): response = self.request('/', method='GETs') self.check_status_and_reason(response, HTTPStatus.NOT_IMPLEMENTED) - @unittest.expectedFailure # TODO: RUSTPYTHON; http.client.RemoteDisconnected: Remote end closed connection without response def test_last_modified(self): """Checks that the datetime returned in Last-Modified response header is the actual datetime of last modification, rounded to the second @@ -740,7 +730,6 @@ def test_last_modified(self): last_modif_header = response.headers['Last-modified'] self.assertEqual(last_modif_header, self.last_modif_header) - @unittest.expectedFailure # TODO: RUSTPYTHON; http.client.RemoteDisconnected: Remote end closed connection without response def test_path_without_leading_slash(self): response = self.request(self.tempdir_name + '/test') self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 12b61e76423..25d5f91eb1e 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -22,6 +22,7 @@ import logging.handlers import logging.config + import codecs import configparser import copy @@ -680,9 +681,6 @@ def test_pathlike_objects(self): os.unlink(fn) @unittest.skipIf(os.name == 'nt', 'WatchedFileHandler not appropriate for Windows.') - @unittest.skipIf( - support.is_emscripten, "Emscripten cannot fstat unlinked files." - ) @threading_helper.requires_working_threading() @support.requires_resource('walltime') def test_race(self): @@ -1039,6 +1037,7 @@ class TestTCPServer(ControlMixin, ThreadingTCPServer): """ allow_reuse_address = True + allow_reuse_port = False def __init__(self, addr, handler, poll_interval=0.5, bind_and_activate=True): @@ -1137,7 +1136,7 @@ def test_basic(self): self.assertEqual(mailfrom, 'me') self.assertEqual(rcpttos, ['you']) self.assertIn('\nSubject: Log\n', data) - self.assertTrue(data.endswith('\n\nHello \u2713')) + self.assertEndsWith(data, '\n\nHello \u2713') h.close() def process_message(self, *args): @@ -2098,6 +2097,18 @@ def test_udp_reconnection(self): self.handled.wait(support.LONG_TIMEOUT) self.assertEqual(self.log_output, b'<11>sp\xc3\xa4m\x00') + @patch('socket.socket') + def test_tcp_timeout(self, mock_socket): + instance_mock_sock = mock_socket.return_value + instance_mock_sock.connect.side_effect = socket.timeout + + with self.assertRaises(socket.timeout): + logging.handlers.SysLogHandler(address=('localhost', 514), + socktype=socket.SOCK_STREAM, + timeout=1) + + instance_mock_sock.close.assert_called() + @unittest.skipUnless(hasattr(socket, "AF_UNIX"), "Unix sockets required") class UnixSysLogHandlerTest(SysLogHandlerTest): @@ -3271,6 +3282,37 @@ def format(self, record): } } + # Remove when deprecation ends. + class DeprecatedStrmHandler(logging.StreamHandler): + def __init__(self, strm=None): + super().__init__(stream=strm) + + config_custom_handler_with_deprecated_strm_arg = { + "version": 1, + "formatters": { + "form1": { + "format": "%(levelname)s ++ %(message)s", + }, + }, + "handlers": { + "hand1": { + "class": DeprecatedStrmHandler, + "formatter": "form1", + "level": "NOTSET", + "stream": "ext://sys.stdout", + }, + }, + "loggers": { + "compiler.parser": { + "level": "DEBUG", + "handlers": ["hand1"], + }, + }, + "root": { + "level": "WARNING", + }, + } + def apply_config(self, conf): logging.config.dictConfig(conf) @@ -3360,6 +3402,15 @@ def test_config5_ok(self): self.test_config1_ok(config=self.config5) self.check_handler('hand1', CustomHandler) + def test_deprecation_warning_custom_handler_with_strm_arg(self): + msg = ( + "Support for custom logging handlers with the 'strm' argument " + "is deprecated and scheduled for removal in Python 3.16. " + "Define handlers with the 'stream' argument instead." + ) + with self.assertWarnsRegex(DeprecationWarning, msg): + self.test_config1_ok(config=self.config_custom_handler_with_deprecated_strm_arg) + def test_config6_failure(self): self.assertRaises(Exception, self.apply_config, self.config6) @@ -3527,7 +3578,7 @@ def test_config14_ok(self): self.assertEqual(h.foo, 'bar') self.assertEqual(h.terminator, '!\n') logging.warning('Exclamation') - self.assertTrue(output.getvalue().endswith('Exclamation!\n')) + self.assertEndsWith(output.getvalue(), 'Exclamation!\n') def test_config15_ok(self): @@ -4057,9 +4108,9 @@ def _mpinit_issue121723(qspec, message_to_log): # log a message (this creates a record put in the queue) logging.getLogger().info(message_to_log) - @unittest.skip('TODO: RUSTPYTHON, flaky EOFError') + @unittest.skip('TODO: RUSTPYTHON; flaky EOFError') # TODO: RUSTPYTHON - SemLock not implemented on Windows - @unittest.expectedFailureIfWindows("TODO: RUSTPYTHON") + @unittest.expectedFailureIfWindows('TODO: RUSTPYTHON') @skip_if_tsan_fork @support.requires_subprocess() def test_multiprocessing_queues(self): @@ -4120,7 +4171,7 @@ def test_90195(self): self.assertFalse(logger.disabled) # TODO: RUSTPYTHON - SemLock not implemented on Windows - @unittest.expectedFailureIfWindows("TODO: RUSTPYTHON") + @unittest.expectedFailureIfWindows('TODO: RUSTPYTHON') def test_111615(self): # See gh-111615 import_helper.import_module('_multiprocessing') # see gh-113692 @@ -4289,7 +4340,7 @@ def test_queue_handler(self): msg = self.next_message() self.que_logger.warning(msg) data = self.queue.get_nowait() - self.assertTrue(isinstance(data, logging.LogRecord)) + self.assertIsInstance(data, logging.LogRecord) self.assertEqual(data.name, self.que_logger.name) self.assertEqual((data.msg, data.args), (msg, None)) @@ -4340,17 +4391,27 @@ def test_queue_listener(self): self.assertTrue(handler.matches(levelno=logging.CRITICAL, message='6')) handler.close() + def test_queue_listener_context_manager(self): + handler = TestHandler(support.Matcher()) + with logging.handlers.QueueListener(self.queue, handler) as listener: + self.assertIsInstance(listener, logging.handlers.QueueListener) + self.assertIsNotNone(listener._thread) + self.assertIsNone(listener._thread) + # doesn't hurt to call stop() more than once. listener.stop() self.assertIsNone(listener._thread) def test_queue_listener_multi_start(self): handler = TestHandler(support.Matcher()) - listener = logging.handlers.QueueListener(self.queue, handler) + with logging.handlers.QueueListener(self.queue, handler) as listener: + self.assertRaises(RuntimeError, listener.start) + + with listener: + self.assertRaises(RuntimeError, listener.start) + listener.start() - self.assertRaises(RuntimeError, listener.start) listener.stop() - self.assertIsNone(listener._thread) def test_queue_listener_with_StreamHandler(self): # Test that traceback and stack-info only appends once (bpo-34334, bpo-46755). @@ -4889,19 +4950,19 @@ def test_formatting(self): r.addHandler(h) try: raise RuntimeError('deliberate mistake') - except: + except RuntimeError: logging.exception('failed', stack_info=True) r.removeHandler(h) h.close() r = h.records[0] - self.assertTrue(r.exc_text.startswith('Traceback (most recent ' - 'call last):\n')) - self.assertTrue(r.exc_text.endswith('\nRuntimeError: ' - 'deliberate mistake')) - self.assertTrue(r.stack_info.startswith('Stack (most recent ' - 'call last):\n')) - self.assertTrue(r.stack_info.endswith('logging.exception(\'failed\', ' - 'stack_info=True)')) + self.assertStartsWith(r.exc_text, + 'Traceback (most recent call last):\n') + self.assertEndsWith(r.exc_text, + '\nRuntimeError: deliberate mistake') + self.assertStartsWith(r.stack_info, + 'Stack (most recent call last):\n') + self.assertEndsWith(r.stack_info, + "logging.exception('failed', stack_info=True)") class LastResortTest(BaseTest): @@ -5246,8 +5307,8 @@ class LogRecordTest(BaseTest): def test_str_rep(self): r = logging.makeLogRecord({}) s = str(r) - self.assertTrue(s.startswith('')) + self.assertStartsWith(s, '') def test_dict_arg(self): h = RecordingHandler() @@ -5359,6 +5420,8 @@ async def _make_record_async(self, assertion): r = logging.makeLogRecord({}) assertion(r.taskName) + # TODO: RUSTPYTHON + @unittest.expectedFailure @support.requires_working_socket() def test_taskName_with_asyncio_imported(self): try: @@ -5369,8 +5432,10 @@ def test_taskName_with_asyncio_imported(self): logging.logAsyncioTasks = False runner.run(make_record(self.assertIsNone)) finally: - asyncio.set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) + # TODO: RUSTPYTHON + @unittest.expectedFailure @support.requires_working_socket() def test_taskName_without_asyncio_imported(self): try: @@ -5381,7 +5446,7 @@ def test_taskName_without_asyncio_imported(self): logging.logAsyncioTasks = False runner.run(make_record(self.assertIsNone)) finally: - asyncio.set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class BasicConfigTest(unittest.TestCase): @@ -5660,6 +5725,8 @@ def dummy_handle_error(record): # didn't write anything due to the encoding error self.assertEqual(data, r'') + # TODO: RUSTPYTHON + @unittest.expectedFailure @support.requires_working_socket() def test_log_taskName(self): async def log_record(): @@ -5685,7 +5752,7 @@ async def log_record(): data = f.read().strip() self.assertRegex(data, r'Task-\d+ - hello world') finally: - asyncio.set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) if handler: handler.close() @@ -5748,7 +5815,7 @@ def cleanup(): self.addCleanup(cleanup) self.addCleanup(logging.shutdown) - self.adapter = logging.LoggerAdapter(logger=self.logger, extra=None) + self.adapter = logging.LoggerAdapter(logger=self.logger) def test_exception(self): msg = 'testing exception: %r' @@ -5897,14 +5964,14 @@ def test_extra_in_records(self): self.adapter.critical('foo should be here') self.assertEqual(len(self.recording.records), 1) record = self.recording.records[0] - self.assertTrue(hasattr(record, 'foo')) + self.assertHasAttr(record, 'foo') self.assertEqual(record.foo, '1') def test_extra_not_merged_by_default(self): self.adapter.critical('foo should NOT be here', extra={'foo': 'nope'}) self.assertEqual(len(self.recording.records), 1) record = self.recording.records[0] - self.assertFalse(hasattr(record, 'foo')) + self.assertNotHasAttr(record, 'foo') def test_extra_merged(self): self.adapter = logging.LoggerAdapter(logger=self.logger, @@ -5914,11 +5981,23 @@ def test_extra_merged(self): self.adapter.critical('foo and bar should be here', extra={'bar': '2'}) self.assertEqual(len(self.recording.records), 1) record = self.recording.records[0] - self.assertTrue(hasattr(record, 'foo')) - self.assertTrue(hasattr(record, 'bar')) + self.assertHasAttr(record, 'foo') + self.assertHasAttr(record, 'bar') self.assertEqual(record.foo, '1') self.assertEqual(record.bar, '2') + self.adapter.critical('no extra') # should not fail + self.assertEqual(len(self.recording.records), 2) + record = self.recording.records[-1] + self.assertEqual(record.foo, '1') + self.assertNotHasAttr(record, 'bar') + + self.adapter.critical('none extra', extra=None) # should not fail + self.assertEqual(len(self.recording.records), 3) + record = self.recording.records[-1] + self.assertEqual(record.foo, '1') + self.assertNotHasAttr(record, 'bar') + def test_extra_merged_log_call_has_precedence(self): self.adapter = logging.LoggerAdapter(logger=self.logger, extra={'foo': '1'}, @@ -5927,9 +6006,28 @@ def test_extra_merged_log_call_has_precedence(self): self.adapter.critical('foo shall be min', extra={'foo': '2'}) self.assertEqual(len(self.recording.records), 1) record = self.recording.records[0] - self.assertTrue(hasattr(record, 'foo')) + self.assertHasAttr(record, 'foo') self.assertEqual(record.foo, '2') + def test_extra_merged_without_extra(self): + self.adapter = logging.LoggerAdapter(logger=self.logger, + merge_extra=True) + + self.adapter.critical('foo should be here', extra={'foo': '1'}) + self.assertEqual(len(self.recording.records), 1) + record = self.recording.records[-1] + self.assertEqual(record.foo, '1') + + self.adapter.critical('no extra') # should not fail + self.assertEqual(len(self.recording.records), 2) + record = self.recording.records[-1] + self.assertNotHasAttr(record, 'foo') + + self.adapter.critical('none extra', extra=None) # should not fail + self.assertEqual(len(self.recording.records), 3) + record = self.recording.records[-1] + self.assertNotHasAttr(record, 'foo') + class PrefixAdapter(logging.LoggerAdapter): prefix = 'Adapter' @@ -6641,18 +6739,19 @@ def namer(filename): p = '%s.log.' % prefix for c in candidates: d, fn = os.path.split(c) - self.assertTrue(fn.startswith(p)) + self.assertStartsWith(fn, p) elif prefix.startswith('d.e'): for c in candidates: d, fn = os.path.split(c) - self.assertTrue(fn.endswith('.log'), fn) - self.assertTrue(fn.startswith(prefix + '.') and - fn[len(prefix) + 2].isdigit()) + self.assertEndsWith(fn, '.log') + self.assertStartsWith(fn, prefix + '.') + self.assertTrue(fn[len(prefix) + 2].isdigit()) elif prefix == 'g': for c in candidates: d, fn = os.path.split(c) - self.assertTrue(fn.endswith('.oldlog')) - self.assertTrue(fn.startswith('g') and fn[1].isdigit()) + self.assertEndsWith(fn, '.oldlog') + self.assertStartsWith(fn, 'g') + self.assertTrue(fn[1].isdigit()) def test_compute_files_to_delete_same_filename_different_extensions(self): # See GH-93205 for background @@ -6687,10 +6786,10 @@ def test_compute_files_to_delete_same_filename_different_extensions(self): rotator = rotators[i] candidates = rotator.getFilesToDelete() self.assertEqual(len(candidates), n_files - backupCount, candidates) - matcher = re.compile(r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}\Z") + matcher = re.compile(r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}\z") for c in candidates: d, fn = os.path.split(c) - self.assertTrue(fn.startswith(prefix+'.')) + self.assertStartsWith(fn, prefix+'.') suffix = fn[(len(prefix)+1):] self.assertRegex(suffix, matcher) diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index 23092ffd0f3..c1806b1c133 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -1,12 +1,18 @@ import io -import locale import mimetypes -import pathlib +import os +import shlex import sys -import unittest - -from test import support +import unittest.mock from platform import win32_edition +from test import support +from test.support import cpython_only, force_not_colorized, os_helper +from test.support.import_helper import ensure_lazy_imports + +try: + import _winapi +except ImportError: + _winapi = None def setUpModule(): @@ -28,15 +34,30 @@ class MimeTypesTestCase(unittest.TestCase): def setUp(self): self.db = mimetypes.MimeTypes() + def test_case_sensitivity(self): + eq = self.assertEqual + eq(self.db.guess_file_type("foobar.html"), ("text/html", None)) + eq(self.db.guess_type("scheme:foobar.html"), ("text/html", None)) + eq(self.db.guess_file_type("foobar.HTML"), ("text/html", None)) + eq(self.db.guess_type("scheme:foobar.HTML"), ("text/html", None)) + eq(self.db.guess_file_type("foobar.tgz"), ("application/x-tar", "gzip")) + eq(self.db.guess_type("scheme:foobar.tgz"), ("application/x-tar", "gzip")) + eq(self.db.guess_file_type("foobar.TGZ"), ("application/x-tar", "gzip")) + eq(self.db.guess_type("scheme:foobar.TGZ"), ("application/x-tar", "gzip")) + eq(self.db.guess_file_type("foobar.tar.Z"), ("application/x-tar", "compress")) + eq(self.db.guess_type("scheme:foobar.tar.Z"), ("application/x-tar", "compress")) + eq(self.db.guess_file_type("foobar.tar.z"), (None, None)) + eq(self.db.guess_type("scheme:foobar.tar.z"), (None, None)) + def test_default_data(self): eq = self.assertEqual - eq(self.db.guess_type("foo.html"), ("text/html", None)) - eq(self.db.guess_type("foo.HTML"), ("text/html", None)) - eq(self.db.guess_type("foo.tgz"), ("application/x-tar", "gzip")) - eq(self.db.guess_type("foo.tar.gz"), ("application/x-tar", "gzip")) - eq(self.db.guess_type("foo.tar.Z"), ("application/x-tar", "compress")) - eq(self.db.guess_type("foo.tar.bz2"), ("application/x-tar", "bzip2")) - eq(self.db.guess_type("foo.tar.xz"), ("application/x-tar", "xz")) + eq(self.db.guess_file_type("foo.html"), ("text/html", None)) + eq(self.db.guess_file_type("foo.HTML"), ("text/html", None)) + eq(self.db.guess_file_type("foo.tgz"), ("application/x-tar", "gzip")) + eq(self.db.guess_file_type("foo.tar.gz"), ("application/x-tar", "gzip")) + eq(self.db.guess_file_type("foo.tar.Z"), ("application/x-tar", "compress")) + eq(self.db.guess_file_type("foo.tar.bz2"), ("application/x-tar", "bzip2")) + eq(self.db.guess_file_type("foo.tar.xz"), ("application/x-tar", "xz")) def test_data_urls(self): eq = self.assertEqual @@ -50,12 +71,10 @@ def test_file_parsing(self): eq = self.assertEqual sio = io.StringIO("x-application/x-unittest pyunit\n") self.db.readfp(sio) - eq(self.db.guess_type("foo.pyunit"), + eq(self.db.guess_file_type("foo.pyunit"), ("x-application/x-unittest", None)) eq(self.db.guess_extension("x-application/x-unittest"), ".pyunit") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_read_mime_types(self): eq = self.assertEqual @@ -64,32 +83,40 @@ def test_read_mime_types(self): with os_helper.temp_dir() as directory: data = "x-application/x-unittest pyunit\n" - file = pathlib.Path(directory, "sample.mimetype") - file.write_text(data) + file = os.path.join(directory, "sample.mimetype") + with open(file, 'w', encoding="utf-8") as f: + f.write(data) mime_dict = mimetypes.read_mime_types(file) eq(mime_dict[".pyunit"], "x-application/x-unittest") + data = "x-application/x-unittest2 pyunit2\n" + file = os.path.join(directory, "sample2.mimetype") + with open(file, 'w', encoding="utf-8") as f: + f.write(data) + mime_dict = mimetypes.read_mime_types(os_helper.FakePath(file)) + eq(mime_dict[".pyunit2"], "x-application/x-unittest2") + # bpo-41048: read_mime_types should read the rule file with 'utf-8' encoding. # Not with locale encoding. _bootlocale has been imported because io.open(...) # uses it. - with os_helper.temp_dir() as directory: - data = "application/no-mans-land Fran\u00E7ais" - file = pathlib.Path(directory, "sample.mimetype") - file.write_text(data, encoding='utf-8') - import _bootlocale - with support.swap_attr(_bootlocale, 'getpreferredencoding', lambda do_setlocale=True: 'ASCII'): - mime_dict = mimetypes.read_mime_types(file) - eq(mime_dict[".Français"], "application/no-mans-land") + data = "application/no-mans-land Fran\u00E7ais" + filename = "filename" + fp = io.StringIO(data) + with unittest.mock.patch.object(mimetypes, 'open', + return_value=fp) as mock_open: + mime_dict = mimetypes.read_mime_types(filename) + mock_open.assert_called_with(filename, encoding='utf-8') + eq(mime_dict[".Français"], "application/no-mans-land") def test_non_standard_types(self): eq = self.assertEqual # First try strict - eq(self.db.guess_type('foo.xul', strict=True), (None, None)) + eq(self.db.guess_file_type('foo.xul', strict=True), (None, None)) eq(self.db.guess_extension('image/jpg', strict=True), None) # And then non-strict - eq(self.db.guess_type('foo.xul', strict=False), ('text/xul', None)) - eq(self.db.guess_type('foo.XUL', strict=False), ('text/xul', None)) - eq(self.db.guess_type('foo.invalid', strict=False), (None, None)) + eq(self.db.guess_file_type('foo.xul', strict=False), ('text/xul', None)) + eq(self.db.guess_file_type('foo.XUL', strict=False), ('text/xul', None)) + eq(self.db.guess_file_type('foo.invalid', strict=False), (None, None)) eq(self.db.guess_extension('image/jpg', strict=False), '.jpg') eq(self.db.guess_extension('image/JPG', strict=False), '.jpg') @@ -99,37 +126,77 @@ def test_filename_with_url_delimiters(self): # compared to when interpreted as filename because of the semicolon. eq = self.assertEqual gzip_expected = ('application/x-tar', 'gzip') - eq(self.db.guess_type(";1.tar.gz"), gzip_expected) - eq(self.db.guess_type("?1.tar.gz"), gzip_expected) - eq(self.db.guess_type("#1.tar.gz"), gzip_expected) - eq(self.db.guess_type("#1#.tar.gz"), gzip_expected) - eq(self.db.guess_type(";1#.tar.gz"), gzip_expected) - eq(self.db.guess_type(";&1=123;?.tar.gz"), gzip_expected) - eq(self.db.guess_type("?k1=v1&k2=v2.tar.gz"), gzip_expected) + for name in ( + ';1.tar.gz', + '?1.tar.gz', + '#1.tar.gz', + '#1#.tar.gz', + ';1#.tar.gz', + ';&1=123;?.tar.gz', + '?k1=v1&k2=v2.tar.gz', + ): + for prefix in ('', '/', '\\', + 'c:', 'c:/', 'c:\\', 'c:/d/', 'c:\\d\\', + '//share/server/', '\\\\share\\server\\'): + path = prefix + name + with self.subTest(path=path): + eq(self.db.guess_file_type(path), gzip_expected) + eq(self.db.guess_type(path), gzip_expected) + expected = (None, None) if os.name == 'nt' else gzip_expected + for prefix in ('//', '\\\\', '//share/', '\\\\share\\'): + path = prefix + name + with self.subTest(path=path): + eq(self.db.guess_file_type(path), expected) + eq(self.db.guess_type(path), expected) + eq(self.db.guess_file_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected) eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected) + eq(self.db.guess_file_type(r'foo/.tar.gz'), (None, 'gzip')) + eq(self.db.guess_type(r'foo/.tar.gz'), (None, 'gzip')) + expected = (None, 'gzip') if os.name == 'nt' else gzip_expected + eq(self.db.guess_file_type(r'foo\.tar.gz'), expected) + eq(self.db.guess_type(r'foo\.tar.gz'), expected) + eq(self.db.guess_type(r'scheme:foo\.tar.gz'), gzip_expected) + + def test_url(self): + result = self.db.guess_type('http://example.com/host.html') + result = self.db.guess_type('http://host.html') + msg = 'URL only has a host name, not a file' + self.assertSequenceEqual(result, (None, None), msg) + result = self.db.guess_type('http://example.com/host.html') + msg = 'Should be text/html' + self.assertSequenceEqual(result, ('text/html', None), msg) + result = self.db.guess_type('http://example.com/host.html#x.tar') + self.assertSequenceEqual(result, ('text/html', None)) + result = self.db.guess_type('http://example.com/host.html?q=x.tar') + self.assertSequenceEqual(result, ('text/html', None)) + def test_guess_all_types(self): - eq = self.assertEqual - unless = self.assertTrue # First try strict. Use a set here for testing the results because if # test_urllib2 is run before test_mimetypes, global state is modified # such that the 'all' set will have more items in it. - all = set(self.db.guess_all_extensions('text/plain', strict=True)) - unless(all >= set(['.bat', '.c', '.h', '.ksh', '.pl', '.txt'])) + all = self.db.guess_all_extensions('text/plain', strict=True) + self.assertTrue(set(all) >= {'.bat', '.c', '.h', '.ksh', '.pl', '.txt'}) + self.assertEqual(len(set(all)), len(all)) # no duplicates # And now non-strict all = self.db.guess_all_extensions('image/jpg', strict=False) - all.sort() - eq(all, ['.jpg']) + self.assertEqual(all, ['.jpg']) # And now for no hits all = self.db.guess_all_extensions('image/jpg', strict=True) - eq(all, []) + self.assertEqual(all, []) + # And now for type existing in both strict and non-strict mappings. + self.db.add_type('test-type', '.strict-ext') + self.db.add_type('test-type', '.non-strict-ext', strict=False) + all = self.db.guess_all_extensions('test-type', strict=False) + self.assertEqual(all, ['.strict-ext', '.non-strict-ext']) + all = self.db.guess_all_extensions('test-type') + self.assertEqual(all, ['.strict-ext']) + # Test that changing the result list does not affect the global state + all.append('.no-such-ext') + all = self.db.guess_all_extensions('test-type') + self.assertNotIn('.no-such-ext', all) def test_encoding(self): - getpreferredencoding = locale.getpreferredencoding - self.addCleanup(setattr, locale, 'getpreferredencoding', - getpreferredencoding) - locale.getpreferredencoding = lambda: 'ascii' - filename = support.findfile("mime.types") mimes = mimetypes.MimeTypes([filename]) exts = mimes.guess_all_extensions('application/vnd.geocube+xml', @@ -146,29 +213,110 @@ def test_init_reinitializes(self): # Poison should be gone. self.assertEqual(mimetypes.guess_extension('foo/bar'), None) + @unittest.skipIf(sys.platform.startswith("win"), "Non-Windows only") + def test_guess_known_extensions(self): + # Issue 37529 + # The test fails on Windows because Windows adds mime types from the Registry + # and that creates some duplicates. + from mimetypes import types_map + for v in types_map.values(): + self.assertIsNotNone(mimetypes.guess_extension(v)) + def test_preferred_extension(self): def check_extensions(): - self.assertEqual(mimetypes.guess_extension('application/octet-stream'), '.bin') - self.assertEqual(mimetypes.guess_extension('application/postscript'), '.ps') - self.assertEqual(mimetypes.guess_extension('application/vnd.apple.mpegurl'), '.m3u') - self.assertEqual(mimetypes.guess_extension('application/vnd.ms-excel'), '.xls') - self.assertEqual(mimetypes.guess_extension('application/vnd.ms-powerpoint'), '.ppt') - self.assertEqual(mimetypes.guess_extension('application/x-texinfo'), '.texi') - self.assertEqual(mimetypes.guess_extension('application/x-troff'), '.roff') - self.assertEqual(mimetypes.guess_extension('application/xml'), '.xsl') - self.assertEqual(mimetypes.guess_extension('audio/mpeg'), '.mp3') - self.assertEqual(mimetypes.guess_extension('image/jpeg'), '.jpg') - self.assertEqual(mimetypes.guess_extension('image/tiff'), '.tiff') - self.assertEqual(mimetypes.guess_extension('message/rfc822'), '.eml') - self.assertEqual(mimetypes.guess_extension('text/html'), '.html') - self.assertEqual(mimetypes.guess_extension('text/plain'), '.txt') - self.assertEqual(mimetypes.guess_extension('video/mpeg'), '.mpeg') - self.assertEqual(mimetypes.guess_extension('video/quicktime'), '.mov') + for mime_type, ext in ( + ("application/epub+zip", ".epub"), + ("application/octet-stream", ".bin"), + ("application/gzip", ".gz"), + ("application/ogg", ".ogx"), + ("application/postscript", ".ps"), + ("application/vnd.apple.mpegurl", ".m3u"), + ("application/vnd.ms-excel", ".xls"), + ("application/vnd.ms-fontobject", ".eot"), + ("application/vnd.ms-powerpoint", ".ppt"), + ("application/vnd.oasis.opendocument.graphics", ".odg"), + ("application/vnd.oasis.opendocument.presentation", ".odp"), + ("application/vnd.oasis.opendocument.spreadsheet", ".ods"), + ("application/vnd.oasis.opendocument.text", ".odt"), + ("application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx"), + ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx"), + ("application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx"), + ("application/vnd.rar", ".rar"), + ("application/x-7z-compressed", ".7z"), + ("application/x-debian-package", ".deb"), + ("application/x-httpd-php", ".php"), + ("application/x-rpm", ".rpm"), + ("application/x-texinfo", ".texi"), + ("application/x-troff", ".roff"), + ("application/xml", ".xsl"), + ("application/yaml", ".yaml"), + ("audio/flac", ".flac"), + ("audio/matroska", ".mka"), + ("audio/mp4", ".m4a"), + ("audio/mpeg", ".mp3"), + ("audio/ogg", ".ogg"), + ("audio/vnd.wave", ".wav"), + ("audio/webm", ".weba"), + ("font/otf", ".otf"), + ("font/ttf", ".ttf"), + ("font/woff", ".woff"), + ("font/woff2", ".woff2"), + ("image/avif", ".avif"), + ("image/emf", ".emf"), + ("image/fits", ".fits"), + ("image/g3fax", ".g3"), + ("image/jp2", ".jp2"), + ("image/jpeg", ".jpg"), + ("image/jpm", ".jpm"), + ("image/t38", ".t38"), + ("image/tiff", ".tiff"), + ("image/tiff-fx", ".tfx"), + ("image/webp", ".webp"), + ("image/wmf", ".wmf"), + ("message/rfc822", ".eml"), + ("model/gltf+json", ".gltf"), + ("model/gltf-binary", ".glb"), + ("model/stl", ".stl"), + ("text/html", ".html"), + ("text/plain", ".txt"), + ("text/rtf", ".rtf"), + ("text/x-rst", ".rst"), + ("video/matroska", ".mkv"), + ("video/matroska-3d", ".mk3d"), + ("video/mpeg", ".mpeg"), + ("video/ogg", ".ogv"), + ("video/quicktime", ".mov"), + ("video/vnd.avi", ".avi"), + ("video/x-m4v", ".m4v"), + ("video/x-ms-wmv", ".wmv"), + ): + with self.subTest(mime_type=mime_type, ext=ext): + self.assertEqual(mimetypes.guess_extension(mime_type), ext) check_extensions() mimetypes.init() check_extensions() + def test_guess_file_type(self): + def check_file_type(): + for mime_type, ext in ( + ("application/yaml", ".yaml"), + ("application/yaml", ".yml"), + ("audio/mpeg", ".mp2"), + ("audio/mpeg", ".mp3"), + ("video/mpeg", ".m1v"), + ("video/mpeg", ".mpe"), + ("video/mpeg", ".mpeg"), + ("video/mpeg", ".mpg"), + ): + with self.subTest(mime_type=mime_type, ext=ext): + result, _ = mimetypes.guess_file_type(f"filename{ext}") + self.assertEqual(result, mime_type) + + check_file_type() + mimetypes.init() + check_file_type() + def test_init_stability(self): mimetypes.init() @@ -189,27 +337,59 @@ def test_init_stability(self): def test_path_like_ob(self): filename = "LICENSE.txt" - filepath = pathlib.Path(filename) - filepath_with_abs_dir = pathlib.Path('/dir/'+filename) - filepath_relative = pathlib.Path('../dir/'+filename) - path_dir = pathlib.Path('./') + filepath = os_helper.FakePath(filename) + filepath_with_abs_dir = os_helper.FakePath('/dir/'+filename) + filepath_relative = os_helper.FakePath('../dir/'+filename) + path_dir = os_helper.FakePath('./') - expected = self.db.guess_type(filename) + expected = self.db.guess_file_type(filename) + self.assertEqual(self.db.guess_file_type(filepath), expected) self.assertEqual(self.db.guess_type(filepath), expected) + self.assertEqual(self.db.guess_file_type( + filepath_with_abs_dir), expected) self.assertEqual(self.db.guess_type( filepath_with_abs_dir), expected) + self.assertEqual(self.db.guess_file_type(filepath_relative), expected) self.assertEqual(self.db.guess_type(filepath_relative), expected) + + self.assertEqual(self.db.guess_file_type(path_dir), (None, None)) self.assertEqual(self.db.guess_type(path_dir), (None, None)) + def test_bytes_path(self): + self.assertEqual(self.db.guess_file_type(b'foo.html'), + self.db.guess_file_type('foo.html')) + self.assertEqual(self.db.guess_file_type(b'foo.tar.gz'), + self.db.guess_file_type('foo.tar.gz')) + self.assertEqual(self.db.guess_file_type(b'foo.tgz'), + self.db.guess_file_type('foo.tgz')) + def test_keywords_args_api(self): + self.assertEqual(self.db.guess_file_type( + path="foo.html", strict=True), ("text/html", None)) self.assertEqual(self.db.guess_type( - url="foo.html", strict=True), ("text/html", None)) + url="scheme:foo.html", strict=True), ("text/html", None)) self.assertEqual(self.db.guess_all_extensions( type='image/jpg', strict=True), []) self.assertEqual(self.db.guess_extension( type='image/jpg', strict=False), '.jpg') + def test_added_types_are_used(self): + mimetypes.add_type('testing/default-type', '') + mime_type, _ = mimetypes.guess_type('') + self.assertEqual(mime_type, 'testing/default-type') + + mime_type, _ = mimetypes.guess_type('test.myext') + self.assertEqual(mime_type, None) + + mimetypes.add_type('testing/type', '.myext') + mime_type, _ = mimetypes.guess_type('test.myext') + self.assertEqual(mime_type, 'testing/type') + + def test_add_type_with_undotted_extension_deprecated(self): + with self.assertWarns(DeprecationWarning): + mimetypes.add_type("testing/type", "undotted") + @unittest.skipUnless(sys.platform.startswith("win"), "Windows only") class Win32MimeTypesTestCase(unittest.TestCase): @@ -236,58 +416,94 @@ def test_registry_parsing(self): eq(self.db.guess_type("image.jpg"), ("image/jpeg", None)) eq(self.db.guess_type("image.png"), ("image/png", None)) + @unittest.skipIf(not hasattr(_winapi, "_mimetypes_read_windows_registry"), + "read_windows_registry accelerator unavailable") + def test_registry_accelerator(self): + from_accel = {} + from_reg = {} + _winapi._mimetypes_read_windows_registry( + lambda v, k: from_accel.setdefault(k, set()).add(v) + ) + mimetypes.MimeTypes._read_windows_registry( + lambda v, k: from_reg.setdefault(k, set()).add(v) + ) + self.assertEqual(list(from_reg), list(from_accel)) + for k in from_reg: + self.assertEqual(from_reg[k], from_accel[k]) + class MiscTestCase(unittest.TestCase): def test__all__(self): support.check__all__(self, mimetypes) - -class MimetypesCliTestCase(unittest.TestCase): - - def mimetypes_cmd(self, *args, **kwargs): - support.patch(self, sys, "argv", [sys.executable, *args]) - with support.captured_stdout() as output: - mimetypes._main() - return output.getvalue().strip() - - def test_help_option(self): - support.patch(self, sys, "argv", [sys.executable, "-h"]) - with support.captured_stdout() as output: - with self.assertRaises(SystemExit) as cm: - mimetypes._main() - - self.assertIn("Usage: mimetypes.py", output.getvalue()) - self.assertEqual(cm.exception.code, 0) - - def test_invalid_option(self): - support.patch(self, sys, "argv", [sys.executable, "--invalid"]) - with support.captured_stdout() as output: - with self.assertRaises(SystemExit) as cm: - mimetypes._main() - - self.assertIn("Usage: mimetypes.py", output.getvalue()) - self.assertEqual(cm.exception.code, 1) - - def test_guess_extension(self): - eq = self.assertEqual - - extension = self.mimetypes_cmd("-l", "-e", "image/jpg") - eq(extension, ".jpg") - - extension = self.mimetypes_cmd("-e", "image/jpg") - eq(extension, "I don't know anything about type image/jpg") - - extension = self.mimetypes_cmd("-e", "image/jpeg") - eq(extension, ".jpg") - - def test_guess_type(self): - eq = self.assertEqual - - type_info = self.mimetypes_cmd("-l", "foo.pic") - eq(type_info, "type: image/pict encoding: None") - - type_info = self.mimetypes_cmd("foo.pic") - eq(type_info, "I don't know anything about type foo.pic") + @cpython_only + def test_lazy_import(self): + ensure_lazy_imports("mimetypes", {"os", "posixpath", "urllib.parse", "argparse"}) + + +class CommandLineTest(unittest.TestCase): + @force_not_colorized + def test_parse_args(self): + args, help_text = mimetypes._parse_args("-h") + self.assertTrue(help_text.startswith("usage: ")) + + args, help_text = mimetypes._parse_args("--invalid") + self.assertTrue(help_text.startswith("usage: ")) + + args, _ = mimetypes._parse_args(shlex.split("-l -e image/jpg")) + self.assertTrue(args.extension) + self.assertTrue(args.lenient) + self.assertEqual(args.type, ["image/jpg"]) + + args, _ = mimetypes._parse_args(shlex.split("-e image/jpg")) + self.assertTrue(args.extension) + self.assertFalse(args.lenient) + self.assertEqual(args.type, ["image/jpg"]) + + args, _ = mimetypes._parse_args(shlex.split("-l foo.webp")) + self.assertFalse(args.extension) + self.assertTrue(args.lenient) + self.assertEqual(args.type, ["foo.webp"]) + + args, _ = mimetypes._parse_args(shlex.split("foo.pic")) + self.assertFalse(args.extension) + self.assertFalse(args.lenient) + self.assertEqual(args.type, ["foo.pic"]) + + def test_multiple_inputs(self): + result = "\n".join(mimetypes._main(shlex.split("foo.pdf foo.png"))) + self.assertEqual( + result, + "type: application/pdf encoding: None\n" + "type: image/png encoding: None" + ) + + def test_multiple_inputs_error(self): + result = "\n".join(mimetypes._main(shlex.split("foo.pdf foo.bar_ext"))) + self.assertEqual( + result, + "type: application/pdf encoding: None\n" + "error: media type unknown for foo.bar_ext" + ) + + + def test_invocation(self): + for command, expected in [ + ("-l -e image/jpg", ".jpg"), + ("-e image/jpeg", ".jpg"), + ("-l foo.webp", "type: image/webp encoding: None"), + ]: + result = "\n".join(mimetypes._main(shlex.split(command))) + self.assertEqual(result, expected) + + def test_invocation_error(self): + for command, expected in [ + ("-e image/jpg", "error: unknown type image/jpg"), + ("foo.bar_ext", "error: media type unknown for foo.bar_ext"), + ]: + with self.subTest(command=command): + result = "\n".join(mimetypes._main(shlex.split(command))) + self.assertEqual(result, expected) if __name__ == "__main__": diff --git a/Lib/test/test_pathlib/support/__init__.py b/Lib/test/test_pathlib/support/__init__.py new file mode 100644 index 00000000000..dcaef654d77 --- /dev/null +++ b/Lib/test/test_pathlib/support/__init__.py @@ -0,0 +1,2 @@ +# Set to 'True' if the tests are run against the pathlib-abc PyPI package. +is_pypi = False diff --git a/Lib/test/test_pathlib/support/lexical_path.py b/Lib/test/test_pathlib/support/lexical_path.py new file mode 100644 index 00000000000..f29a521af9b --- /dev/null +++ b/Lib/test/test_pathlib/support/lexical_path.py @@ -0,0 +1,51 @@ +""" +Simple implementation of JoinablePath, for use in pathlib tests. +""" + +import ntpath +import os.path +import posixpath + +from . import is_pypi + +if is_pypi: + from pathlib_abc import _JoinablePath +else: + from pathlib.types import _JoinablePath + + +class LexicalPath(_JoinablePath): + __slots__ = ('_segments',) + parser = os.path + + def __init__(self, *pathsegments): + self._segments = pathsegments + + def __hash__(self): + return hash(str(self)) + + def __eq__(self, other): + if not isinstance(other, LexicalPath): + return NotImplemented + return str(self) == str(other) + + def __str__(self): + if not self._segments: + return '' + return self.parser.join(*self._segments) + + def __repr__(self): + return f'{type(self).__name__}({str(self)!r})' + + def with_segments(self, *pathsegments): + return type(self)(*pathsegments) + + +class LexicalPosixPath(LexicalPath): + __slots__ = () + parser = posixpath + + +class LexicalWindowsPath(LexicalPath): + __slots__ = () + parser = ntpath diff --git a/Lib/test/test_pathlib/support/local_path.py b/Lib/test/test_pathlib/support/local_path.py new file mode 100644 index 00000000000..d481fd45ead --- /dev/null +++ b/Lib/test/test_pathlib/support/local_path.py @@ -0,0 +1,177 @@ +""" +Implementations of ReadablePath and WritablePath for local paths, for use in +pathlib tests. + +LocalPathGround is also defined here. It helps establish the "ground truth" +about local paths in tests. +""" + +import os + +from . import is_pypi +from .lexical_path import LexicalPath + +if is_pypi: + from shutil import rmtree + from pathlib_abc import PathInfo, _ReadablePath, _WritablePath + can_symlink = True + testfn = "TESTFN" +else: + from pathlib.types import PathInfo, _ReadablePath, _WritablePath + from test.support import os_helper + can_symlink = os_helper.can_symlink() + testfn = os_helper.TESTFN + rmtree = os_helper.rmtree + + +class LocalPathGround: + can_symlink = can_symlink + + def __init__(self, path_cls): + self.path_cls = path_cls + + def setup(self, local_suffix=""): + root = self.path_cls(testfn + local_suffix) + os.mkdir(root) + return root + + def teardown(self, root): + rmtree(root) + + def create_file(self, p, data=b''): + with open(p, 'wb') as f: + f.write(data) + + def create_dir(self, p): + os.mkdir(p) + + def create_symlink(self, p, target): + os.symlink(target, p) + + def create_hierarchy(self, p): + os.mkdir(os.path.join(p, 'dirA')) + os.mkdir(os.path.join(p, 'dirB')) + os.mkdir(os.path.join(p, 'dirC')) + os.mkdir(os.path.join(p, 'dirC', 'dirD')) + with open(os.path.join(p, 'fileA'), 'wb') as f: + f.write(b"this is file A\n") + with open(os.path.join(p, 'dirB', 'fileB'), 'wb') as f: + f.write(b"this is file B\n") + with open(os.path.join(p, 'dirC', 'fileC'), 'wb') as f: + f.write(b"this is file C\n") + with open(os.path.join(p, 'dirC', 'novel.txt'), 'wb') as f: + f.write(b"this is a novel\n") + with open(os.path.join(p, 'dirC', 'dirD', 'fileD'), 'wb') as f: + f.write(b"this is file D\n") + if self.can_symlink: + # Relative symlinks. + os.symlink('fileA', os.path.join(p, 'linkA')) + os.symlink('non-existing', os.path.join(p, 'brokenLink')) + os.symlink('dirB', + os.path.join(p, 'linkB'), + target_is_directory=True) + os.symlink(os.path.join('..', 'dirB'), + os.path.join(p, 'dirA', 'linkC'), + target_is_directory=True) + # Broken symlink (pointing to itself). + os.symlink('brokenLinkLoop', os.path.join(p, 'brokenLinkLoop')) + + isdir = staticmethod(os.path.isdir) + isfile = staticmethod(os.path.isfile) + islink = staticmethod(os.path.islink) + readlink = staticmethod(os.readlink) + + def readtext(self, p): + with open(p, 'r', encoding='utf-8') as f: + return f.read() + + def readbytes(self, p): + with open(p, 'rb') as f: + return f.read() + + +class LocalPathInfo(PathInfo): + """ + Simple implementation of PathInfo for a local path + """ + __slots__ = ('_path', '_exists', '_is_dir', '_is_file', '_is_symlink') + + def __init__(self, path): + self._path = str(path) + self._exists = None + self._is_dir = None + self._is_file = None + self._is_symlink = None + + def exists(self, *, follow_symlinks=True): + """Whether this path exists.""" + if not follow_symlinks and self.is_symlink(): + return True + if self._exists is None: + self._exists = os.path.exists(self._path) + return self._exists + + def is_dir(self, *, follow_symlinks=True): + """Whether this path is a directory.""" + if not follow_symlinks and self.is_symlink(): + return False + if self._is_dir is None: + self._is_dir = os.path.isdir(self._path) + return self._is_dir + + def is_file(self, *, follow_symlinks=True): + """Whether this path is a regular file.""" + if not follow_symlinks and self.is_symlink(): + return False + if self._is_file is None: + self._is_file = os.path.isfile(self._path) + return self._is_file + + def is_symlink(self): + """Whether this path is a symbolic link.""" + if self._is_symlink is None: + self._is_symlink = os.path.islink(self._path) + return self._is_symlink + + +class ReadableLocalPath(_ReadablePath, LexicalPath): + """ + Simple implementation of a ReadablePath class for local filesystem paths. + """ + __slots__ = ('info',) + + def __init__(self, *pathsegments): + super().__init__(*pathsegments) + self.info = LocalPathInfo(self) + + def __fspath__(self): + return str(self) + + def __open_rb__(self, buffering=-1): + return open(self, 'rb') + + def iterdir(self): + return (self / name for name in os.listdir(self)) + + def readlink(self): + return self.with_segments(os.readlink(self)) + + +class WritableLocalPath(_WritablePath, LexicalPath): + """ + Simple implementation of a WritablePath class for local filesystem paths. + """ + + __slots__ = () + + def __fspath__(self): + return str(self) + + def __open_wb__(self, buffering=-1): + return open(self, 'wb') + + def mkdir(self, mode=0o777): + os.mkdir(self, mode) + + def symlink_to(self, target, target_is_directory=False): + os.symlink(target, self, target_is_directory) diff --git a/Lib/test/test_pathlib/support/zip_path.py b/Lib/test/test_pathlib/support/zip_path.py new file mode 100644 index 00000000000..2905260c9df --- /dev/null +++ b/Lib/test/test_pathlib/support/zip_path.py @@ -0,0 +1,336 @@ +""" +Implementations of ReadablePath and WritablePath for zip file members, for use +in pathlib tests. + +ZipPathGround is also defined here. It helps establish the "ground truth" +about zip file members in tests. +""" + +import errno +import io +import posixpath +import stat +import zipfile +from stat import S_IFMT, S_ISDIR, S_ISREG, S_ISLNK + +from . import is_pypi + +if is_pypi: + from pathlib_abc import PathInfo, _ReadablePath, _WritablePath +else: + from pathlib.types import PathInfo, _ReadablePath, _WritablePath + + +class ZipPathGround: + can_symlink = True + + def __init__(self, path_cls): + self.path_cls = path_cls + + def setup(self, local_suffix=""): + return self.path_cls(zip_file=zipfile.ZipFile(io.BytesIO(), "w")) + + def teardown(self, root): + root.zip_file.close() + + def create_file(self, path, data=b''): + path.zip_file.writestr(str(path), data) + + def create_dir(self, path): + zip_info = zipfile.ZipInfo(str(path) + '/') + zip_info.external_attr |= stat.S_IFDIR << 16 + zip_info.external_attr |= stat.FILE_ATTRIBUTE_DIRECTORY + path.zip_file.writestr(zip_info, '') + + def create_symlink(self, path, target): + zip_info = zipfile.ZipInfo(str(path)) + zip_info.external_attr = stat.S_IFLNK << 16 + path.zip_file.writestr(zip_info, target.encode()) + + def create_hierarchy(self, p): + # Add regular files + self.create_file(p.joinpath('fileA'), b'this is file A\n') + self.create_file(p.joinpath('dirB/fileB'), b'this is file B\n') + self.create_file(p.joinpath('dirC/fileC'), b'this is file C\n') + self.create_file(p.joinpath('dirC/dirD/fileD'), b'this is file D\n') + self.create_file(p.joinpath('dirC/novel.txt'), b'this is a novel\n') + # Add symlinks + self.create_symlink(p.joinpath('linkA'), 'fileA') + self.create_symlink(p.joinpath('linkB'), 'dirB') + self.create_symlink(p.joinpath('dirA/linkC'), '../dirB') + self.create_symlink(p.joinpath('brokenLink'), 'non-existing') + self.create_symlink(p.joinpath('brokenLinkLoop'), 'brokenLinkLoop') + + def readtext(self, p): + with p.zip_file.open(str(p), 'r') as f: + f = io.TextIOWrapper(f, encoding='utf-8') + return f.read() + + def readbytes(self, p): + with p.zip_file.open(str(p), 'r') as f: + return f.read() + + readlink = readtext + + def isdir(self, p): + path_str = str(p) + "/" + return path_str in p.zip_file.NameToInfo + + def isfile(self, p): + info = p.zip_file.NameToInfo.get(str(p)) + if info is None: + return False + return not stat.S_ISLNK(info.external_attr >> 16) + + def islink(self, p): + info = p.zip_file.NameToInfo.get(str(p)) + if info is None: + return False + return stat.S_ISLNK(info.external_attr >> 16) + + +class MissingZipPathInfo(PathInfo): + """ + PathInfo implementation that is used when a zip file member is missing. + """ + __slots__ = () + + def exists(self, follow_symlinks=True): + return False + + def is_dir(self, follow_symlinks=True): + return False + + def is_file(self, follow_symlinks=True): + return False + + def is_symlink(self): + return False + + def resolve(self): + return self + + +missing_zip_path_info = MissingZipPathInfo() + + +class ZipPathInfo(PathInfo): + """ + PathInfo implementation for an existing zip file member. + """ + __slots__ = ('zip_file', 'zip_info', 'parent', 'children') + + def __init__(self, zip_file, parent=None): + self.zip_file = zip_file + self.zip_info = None + self.parent = parent or self + self.children = {} + + def exists(self, follow_symlinks=True): + if follow_symlinks and self.is_symlink(): + return self.resolve().exists() + return True + + def is_dir(self, follow_symlinks=True): + if follow_symlinks and self.is_symlink(): + return self.resolve().is_dir() + elif self.zip_info is None: + return True + elif fmt := S_IFMT(self.zip_info.external_attr >> 16): + return S_ISDIR(fmt) + else: + return self.zip_info.filename.endswith('/') + + def is_file(self, follow_symlinks=True): + if follow_symlinks and self.is_symlink(): + return self.resolve().is_file() + elif self.zip_info is None: + return False + elif fmt := S_IFMT(self.zip_info.external_attr >> 16): + return S_ISREG(fmt) + else: + return not self.zip_info.filename.endswith('/') + + def is_symlink(self): + if self.zip_info is None: + return False + elif fmt := S_IFMT(self.zip_info.external_attr >> 16): + return S_ISLNK(fmt) + else: + return False + + def resolve(self, path=None, create=False, follow_symlinks=True): + """ + Traverse zip hierarchy (parents, children and symlinks) starting + from this PathInfo. This is called from three places: + + - When a zip file member is added to ZipFile.filelist, this method + populates the ZipPathInfo tree (using create=True). + - When ReadableZipPath.info is accessed, this method is finds a + ZipPathInfo entry for the path without resolving any final symlink + (using follow_symlinks=False) + - When ZipPathInfo methods are called with follow_symlinks=True, this + method resolves any symlink in the final path position. + """ + link_count = 0 + stack = path.split('/')[::-1] if path else [] + info = self + while True: + if info.is_symlink() and (follow_symlinks or stack): + link_count += 1 + if link_count >= 40: + return missing_zip_path_info # Symlink loop! + path = info.zip_file.read(info.zip_info).decode() + stack += path.split('/')[::-1] if path else [] + info = info.parent + + if stack: + name = stack.pop() + else: + return info + + if name == '..': + info = info.parent + elif name and name != '.': + if name not in info.children: + if create: + info.children[name] = ZipPathInfo(info.zip_file, info) + else: + return missing_zip_path_info # No such child! + info = info.children[name] + + +class ZipFileList: + """ + `list`-like object that we inject as `ZipFile.filelist`. We maintain a + tree of `ZipPathInfo` objects representing the zip file members. + """ + + __slots__ = ('tree', '_items') + + def __init__(self, zip_file): + self.tree = ZipPathInfo(zip_file) + self._items = [] + for item in zip_file.filelist: + self.append(item) + + def __len__(self): + return len(self._items) + + def __iter__(self): + return iter(self._items) + + def append(self, item): + self._items.append(item) + self.tree.resolve(item.filename, create=True).zip_info = item + + +class ReadableZipPath(_ReadablePath): + """ + Simple implementation of a ReadablePath class for .zip files. + """ + + __slots__ = ('_segments', 'zip_file') + parser = posixpath + + def __init__(self, *pathsegments, zip_file): + self._segments = pathsegments + self.zip_file = zip_file + if not isinstance(zip_file.filelist, ZipFileList): + zip_file.filelist = ZipFileList(zip_file) + + def __hash__(self): + return hash((str(self), self.zip_file)) + + def __eq__(self, other): + if not isinstance(other, ReadableZipPath): + return NotImplemented + return str(self) == str(other) and self.zip_file is other.zip_file + + def __str__(self): + if not self._segments: + return '' + return self.parser.join(*self._segments) + + def __repr__(self): + return f'{type(self).__name__}({str(self)!r}, zip_file={self.zip_file!r})' + + def with_segments(self, *pathsegments): + return type(self)(*pathsegments, zip_file=self.zip_file) + + @property + def info(self): + tree = self.zip_file.filelist.tree + return tree.resolve(str(self), follow_symlinks=False) + + def __open_rb__(self, buffering=-1): + info = self.info.resolve() + if not info.exists(): + raise FileNotFoundError(errno.ENOENT, "File not found", self) + elif info.is_dir(): + raise IsADirectoryError(errno.EISDIR, "Is a directory", self) + return self.zip_file.open(info.zip_info, 'r') + + def iterdir(self): + info = self.info.resolve() + if not info.exists(): + raise FileNotFoundError(errno.ENOENT, "File not found", self) + elif not info.is_dir(): + raise NotADirectoryError(errno.ENOTDIR, "Not a directory", self) + return (self / name for name in info.children) + + def readlink(self): + info = self.info + if not info.exists(): + raise FileNotFoundError(errno.ENOENT, "File not found", self) + elif not info.is_symlink(): + raise OSError(errno.EINVAL, "Not a symlink", self) + return self.with_segments(self.zip_file.read(info.zip_info).decode()) + + +class WritableZipPath(_WritablePath): + """ + Simple implementation of a WritablePath class for .zip files. + """ + + __slots__ = ('_segments', 'zip_file') + parser = posixpath + + def __init__(self, *pathsegments, zip_file): + self._segments = pathsegments + self.zip_file = zip_file + + def __hash__(self): + return hash((str(self), self.zip_file)) + + def __eq__(self, other): + if not isinstance(other, WritableZipPath): + return NotImplemented + return str(self) == str(other) and self.zip_file is other.zip_file + + def __str__(self): + if not self._segments: + return '' + return self.parser.join(*self._segments) + + def __repr__(self): + return f'{type(self).__name__}({str(self)!r}, zip_file={self.zip_file!r})' + + def with_segments(self, *pathsegments): + return type(self)(*pathsegments, zip_file=self.zip_file) + + def __open_wb__(self, buffering=-1): + return self.zip_file.open(str(self), 'w') + + def mkdir(self, mode=0o777): + zinfo = zipfile.ZipInfo(str(self) + '/') + zinfo.external_attr |= stat.S_IFDIR << 16 + zinfo.external_attr |= stat.FILE_ATTRIBUTE_DIRECTORY + self.zip_file.writestr(zinfo, '') + + def symlink_to(self, target, target_is_directory=False): + zinfo = zipfile.ZipInfo(str(self)) + zinfo.external_attr = stat.S_IFLNK << 16 + if target_is_directory: + zinfo.external_attr |= 0x10 + self.zip_file.writestr(zinfo, str(target)) diff --git a/Lib/test/test_pathlib/test_copy.py b/Lib/test/test_pathlib/test_copy.py new file mode 100644 index 00000000000..5f4cf82a031 --- /dev/null +++ b/Lib/test/test_pathlib/test_copy.py @@ -0,0 +1,174 @@ +""" +Tests for copying from pathlib.types._ReadablePath to _WritablePath. +""" + +import contextlib +import unittest + +from .support import is_pypi +from .support.local_path import LocalPathGround +from .support.zip_path import ZipPathGround, ReadableZipPath, WritableZipPath + + +class CopyTestBase: + def setUp(self): + self.source_root = self.source_ground.setup() + self.source_ground.create_hierarchy(self.source_root) + self.target_root = self.target_ground.setup(local_suffix="_target") + + def tearDown(self): + self.source_ground.teardown(self.source_root) + self.target_ground.teardown(self.target_root) + + def test_copy_file(self): + source = self.source_root / 'fileA' + target = self.target_root / 'copyA' + result = source.copy(target) + self.assertEqual(result, target) + self.assertTrue(self.target_ground.isfile(target)) + self.assertEqual(self.source_ground.readbytes(source), + self.target_ground.readbytes(result)) + + def test_copy_file_empty(self): + source = self.source_root / 'empty' + target = self.target_root / 'copyA' + self.source_ground.create_file(source, b'') + result = source.copy(target) + self.assertEqual(result, target) + self.assertTrue(self.target_ground.isfile(target)) + self.assertEqual(self.target_ground.readbytes(result), b'') + + def test_copy_file_to_existing_file(self): + source = self.source_root / 'fileA' + target = self.target_root / 'copyA' + self.target_ground.create_file(target, b'this is a copy\n') + with contextlib.ExitStack() as stack: + if isinstance(target, WritableZipPath): + stack.enter_context(self.assertWarns(UserWarning)) + result = source.copy(target) + self.assertEqual(result, target) + self.assertTrue(self.target_ground.isfile(target)) + self.assertEqual(self.source_ground.readbytes(source), + self.target_ground.readbytes(result)) + + def test_copy_file_to_directory(self): + if isinstance(self.target_root, WritableZipPath): + self.skipTest('needs local target') + source = self.source_root / 'fileA' + target = self.target_root / 'copyA' + self.target_ground.create_dir(target) + self.assertRaises(OSError, source.copy, target) + + def test_copy_file_to_itself(self): + source = self.source_root / 'fileA' + self.assertRaises(OSError, source.copy, source) + self.assertRaises(OSError, source.copy, source, follow_symlinks=False) + + def test_copy_dir(self): + source = self.source_root / 'dirC' + target = self.target_root / 'copyC' + result = source.copy(target) + self.assertEqual(result, target) + self.assertTrue(self.target_ground.isdir(target)) + self.assertTrue(self.target_ground.isfile(target / 'fileC')) + self.assertEqual(self.target_ground.readtext(target / 'fileC'), 'this is file C\n') + self.assertTrue(self.target_ground.isdir(target / 'dirD')) + self.assertTrue(self.target_ground.isfile(target / 'dirD' / 'fileD')) + self.assertEqual(self.target_ground.readtext(target / 'dirD' / 'fileD'), 'this is file D\n') + + def test_copy_dir_follow_symlinks_true(self): + if not self.source_ground.can_symlink: + self.skipTest('needs symlink support on source') + source = self.source_root / 'dirC' + target = self.target_root / 'copyC' + self.source_ground.create_symlink(source / 'linkC', 'fileC') + self.source_ground.create_symlink(source / 'linkD', 'dirD') + result = source.copy(target) + self.assertEqual(result, target) + self.assertTrue(self.target_ground.isdir(target)) + self.assertFalse(self.target_ground.islink(target / 'linkC')) + self.assertTrue(self.target_ground.isfile(target / 'linkC')) + self.assertEqual(self.target_ground.readtext(target / 'linkC'), 'this is file C\n') + self.assertFalse(self.target_ground.islink(target / 'linkD')) + self.assertTrue(self.target_ground.isdir(target / 'linkD')) + self.assertTrue(self.target_ground.isfile(target / 'linkD' / 'fileD')) + self.assertEqual(self.target_ground.readtext(target / 'linkD' / 'fileD'), 'this is file D\n') + + def test_copy_dir_follow_symlinks_false(self): + if not self.source_ground.can_symlink: + self.skipTest('needs symlink support on source') + if not self.target_ground.can_symlink: + self.skipTest('needs symlink support on target') + source = self.source_root / 'dirC' + target = self.target_root / 'copyC' + self.source_ground.create_symlink(source / 'linkC', 'fileC') + self.source_ground.create_symlink(source / 'linkD', 'dirD') + result = source.copy(target, follow_symlinks=False) + self.assertEqual(result, target) + self.assertTrue(self.target_ground.isdir(target)) + self.assertTrue(self.target_ground.islink(target / 'linkC')) + self.assertEqual(self.target_ground.readlink(target / 'linkC'), 'fileC') + self.assertTrue(self.target_ground.islink(target / 'linkD')) + self.assertEqual(self.target_ground.readlink(target / 'linkD'), 'dirD') + + def test_copy_dir_to_existing_directory(self): + if isinstance(self.target_root, WritableZipPath): + self.skipTest('needs local target') + source = self.source_root / 'dirC' + target = self.target_root / 'copyC' + self.target_ground.create_dir(target) + self.assertRaises(FileExistsError, source.copy, target) + + def test_copy_dir_to_itself(self): + source = self.source_root / 'dirC' + self.assertRaises(OSError, source.copy, source) + self.assertRaises(OSError, source.copy, source, follow_symlinks=False) + + def test_copy_dir_into_itself(self): + source = self.source_root / 'dirC' + target = self.source_root / 'dirC' / 'dirD' / 'copyC' + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) + + def test_copy_into(self): + source = self.source_root / 'fileA' + target_dir = self.target_root / 'dirA' + self.target_ground.create_dir(target_dir) + result = source.copy_into(target_dir) + self.assertEqual(result, target_dir / 'fileA') + self.assertTrue(self.target_ground.isfile(result)) + self.assertEqual(self.source_ground.readbytes(source), + self.target_ground.readbytes(result)) + + def test_copy_into_empty_name(self): + source = self.source_root.with_segments() + target_dir = self.target_root / 'dirA' + self.target_ground.create_dir(target_dir) + self.assertRaises(ValueError, source.copy_into, target_dir) + + +class ZipToZipPathCopyTest(CopyTestBase, unittest.TestCase): + source_ground = ZipPathGround(ReadableZipPath) + target_ground = ZipPathGround(WritableZipPath) + + +if not is_pypi: + from pathlib import Path + + class ZipToLocalPathCopyTest(CopyTestBase, unittest.TestCase): + source_ground = ZipPathGround(ReadableZipPath) + target_ground = LocalPathGround(Path) + + + class LocalToZipPathCopyTest(CopyTestBase, unittest.TestCase): + source_ground = LocalPathGround(Path) + target_ground = ZipPathGround(WritableZipPath) + + + class LocalToLocalPathCopyTest(CopyTestBase, unittest.TestCase): + source_ground = LocalPathGround(Path) + target_ground = LocalPathGround(Path) + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_pathlib/test_join.py b/Lib/test/test_pathlib/test_join.py new file mode 100644 index 00000000000..6b51a09e5ac --- /dev/null +++ b/Lib/test/test_pathlib/test_join.py @@ -0,0 +1,395 @@ +""" +Tests for pathlib.types._JoinablePath +""" + +import unittest +import threading +from test.support import threading_helper + +from .support import is_pypi +from .support.lexical_path import LexicalPath + +if is_pypi: + from pathlib_abc import _PathParser, _JoinablePath +else: + from pathlib.types import _PathParser, _JoinablePath + + +class JoinTestBase: + def test_is_joinable(self): + p = self.cls() + self.assertIsInstance(p, _JoinablePath) + + def test_parser(self): + self.assertIsInstance(self.cls.parser, _PathParser) + + def test_constructor(self): + P = self.cls + p = P('a') + self.assertIsInstance(p, P) + P() + P('a', 'b', 'c') + P('/a', 'b', 'c') + P('a/b/c') + P('/a/b/c') + + def test_with_segments(self): + class P(self.cls): + def __init__(self, *pathsegments, session_id): + super().__init__(*pathsegments) + self.session_id = session_id + + def with_segments(self, *pathsegments): + return type(self)(*pathsegments, session_id=self.session_id) + p = P('foo', 'bar', session_id=42) + self.assertEqual(42, (p / 'foo').session_id) + self.assertEqual(42, ('foo' / p).session_id) + self.assertEqual(42, p.joinpath('foo').session_id) + self.assertEqual(42, p.with_name('foo').session_id) + self.assertEqual(42, p.with_stem('foo').session_id) + self.assertEqual(42, p.with_suffix('.foo').session_id) + self.assertEqual(42, p.with_segments('foo').session_id) + self.assertEqual(42, p.parent.session_id) + for parent in p.parents: + self.assertEqual(42, parent.session_id) + + def test_join(self): + P = self.cls + sep = self.cls.parser.sep + p = P(f'a{sep}b') + pp = p.joinpath('c') + self.assertEqual(pp, P(f'a{sep}b{sep}c')) + self.assertIs(type(pp), type(p)) + pp = p.joinpath('c', 'd') + self.assertEqual(pp, P(f'a{sep}b{sep}c{sep}d')) + pp = p.joinpath(f'{sep}c') + self.assertEqual(pp, P(f'{sep}c')) + + def test_div(self): + # Basically the same as joinpath(). + P = self.cls + sep = self.cls.parser.sep + p = P(f'a{sep}b') + pp = p / 'c' + self.assertEqual(pp, P(f'a{sep}b{sep}c')) + self.assertIs(type(pp), type(p)) + pp = p / f'c{sep}d' + self.assertEqual(pp, P(f'a{sep}b{sep}c{sep}d')) + pp = p / 'c' / 'd' + self.assertEqual(pp, P(f'a{sep}b{sep}c{sep}d')) + pp = 'c' / p / 'd' + self.assertEqual(pp, P(f'c{sep}a{sep}b{sep}d')) + pp = p/ f'{sep}c' + self.assertEqual(pp, P(f'{sep}c')) + + def test_full_match(self): + P = self.cls + # Simple relative pattern. + self.assertTrue(P('b.py').full_match('b.py')) + self.assertFalse(P('a/b.py').full_match('b.py')) + self.assertFalse(P('/a/b.py').full_match('b.py')) + self.assertFalse(P('a.py').full_match('b.py')) + self.assertFalse(P('b/py').full_match('b.py')) + self.assertFalse(P('/a.py').full_match('b.py')) + self.assertFalse(P('b.py/c').full_match('b.py')) + # Wildcard relative pattern. + self.assertTrue(P('b.py').full_match('*.py')) + self.assertFalse(P('a/b.py').full_match('*.py')) + self.assertFalse(P('/a/b.py').full_match('*.py')) + self.assertFalse(P('b.pyc').full_match('*.py')) + self.assertFalse(P('b./py').full_match('*.py')) + self.assertFalse(P('b.py/c').full_match('*.py')) + # Multi-part relative pattern. + self.assertTrue(P('ab/c.py').full_match('a*/*.py')) + self.assertFalse(P('/d/ab/c.py').full_match('a*/*.py')) + self.assertFalse(P('a.py').full_match('a*/*.py')) + self.assertFalse(P('/dab/c.py').full_match('a*/*.py')) + self.assertFalse(P('ab/c.py/d').full_match('a*/*.py')) + # Absolute pattern. + self.assertTrue(P('/b.py').full_match('/*.py')) + self.assertFalse(P('b.py').full_match('/*.py')) + self.assertFalse(P('a/b.py').full_match('/*.py')) + self.assertFalse(P('/a/b.py').full_match('/*.py')) + # Multi-part absolute pattern. + self.assertTrue(P('/a/b.py').full_match('/a/*.py')) + self.assertFalse(P('/ab.py').full_match('/a/*.py')) + self.assertFalse(P('/a/b/c.py').full_match('/a/*.py')) + # Multi-part glob-style pattern. + self.assertTrue(P('a').full_match('**')) + self.assertTrue(P('c.py').full_match('**')) + self.assertTrue(P('a/b/c.py').full_match('**')) + self.assertTrue(P('/a/b/c.py').full_match('**')) + self.assertTrue(P('/a/b/c.py').full_match('/**')) + self.assertTrue(P('/a/b/c.py').full_match('/a/**')) + self.assertTrue(P('/a/b/c.py').full_match('**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/a/**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/a/b/**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/**/**/**/**/*.py')) + self.assertFalse(P('c.py').full_match('**/a.py')) + self.assertFalse(P('c.py').full_match('c/**')) + self.assertFalse(P('a/b/c.py').full_match('**/a')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c.')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**')) + self.assertFalse(P('a/b/c.py').full_match('/a/b/c.py/**')) + self.assertFalse(P('a/b/c.py').full_match('/**/a/b/c.py')) + # Matching against empty path + self.assertFalse(P('').full_match('*')) + self.assertTrue(P('').full_match('**')) + self.assertFalse(P('').full_match('**/*')) + # Matching with empty pattern + self.assertTrue(P('').full_match('')) + self.assertTrue(P('.').full_match('.')) + self.assertFalse(P('/').full_match('')) + self.assertFalse(P('/').full_match('.')) + self.assertFalse(P('foo').full_match('')) + self.assertFalse(P('foo').full_match('.')) + + def test_parts(self): + # `parts` returns a tuple. + sep = self.cls.parser.sep + P = self.cls + p = P(f'a{sep}b') + parts = p.parts + self.assertEqual(parts, ('a', 'b')) + # When the path is absolute, the anchor is a separate part. + p = P(f'{sep}a{sep}b') + parts = p.parts + self.assertEqual(parts, (sep, 'a', 'b')) + + @threading_helper.requires_working_threading() + def test_parts_multithreaded(self): + P = self.cls + + NUM_THREADS = 10 + NUM_ITERS = 10 + + for _ in range(NUM_ITERS): + b = threading.Barrier(NUM_THREADS) + path = P('a') / 'b' / 'c' / 'd' / 'e' + expected = ('a', 'b', 'c', 'd', 'e') + + def check_parts(): + b.wait() + self.assertEqual(path.parts, expected) + + threads = [threading.Thread(target=check_parts) for _ in range(NUM_THREADS)] + with threading_helper.start_threads(threads): + pass + + def test_parent(self): + # Relative + P = self.cls + p = P('a/b/c') + self.assertEqual(p.parent, P('a/b')) + self.assertEqual(p.parent.parent, P('a')) + self.assertEqual(p.parent.parent.parent, P('')) + self.assertEqual(p.parent.parent.parent.parent, P('')) + # Anchored + p = P('/a/b/c') + self.assertEqual(p.parent, P('/a/b')) + self.assertEqual(p.parent.parent, P('/a')) + self.assertEqual(p.parent.parent.parent, P('/')) + self.assertEqual(p.parent.parent.parent.parent, P('/')) + + def test_parents(self): + # Relative + P = self.cls + p = P('a/b/c') + par = p.parents + self.assertEqual(len(par), 3) + self.assertEqual(par[0], P('a/b')) + self.assertEqual(par[1], P('a')) + self.assertEqual(par[2], P('')) + self.assertEqual(par[-1], P('')) + self.assertEqual(par[-2], P('a')) + self.assertEqual(par[-3], P('a/b')) + self.assertEqual(par[0:1], (P('a/b'),)) + self.assertEqual(par[:2], (P('a/b'), P('a'))) + self.assertEqual(par[:-1], (P('a/b'), P('a'))) + self.assertEqual(par[1:], (P('a'), P(''))) + self.assertEqual(par[::2], (P('a/b'), P(''))) + self.assertEqual(par[::-1], (P(''), P('a'), P('a/b'))) + self.assertEqual(list(par), [P('a/b'), P('a'), P('')]) + with self.assertRaises(IndexError): + par[-4] + with self.assertRaises(IndexError): + par[3] + with self.assertRaises(TypeError): + par[0] = p + # Anchored + p = P('/a/b/c') + par = p.parents + self.assertEqual(len(par), 3) + self.assertEqual(par[0], P('/a/b')) + self.assertEqual(par[1], P('/a')) + self.assertEqual(par[2], P('/')) + self.assertEqual(par[-1], P('/')) + self.assertEqual(par[-2], P('/a')) + self.assertEqual(par[-3], P('/a/b')) + self.assertEqual(par[0:1], (P('/a/b'),)) + self.assertEqual(par[:2], (P('/a/b'), P('/a'))) + self.assertEqual(par[:-1], (P('/a/b'), P('/a'))) + self.assertEqual(par[1:], (P('/a'), P('/'))) + self.assertEqual(par[::2], (P('/a/b'), P('/'))) + self.assertEqual(par[::-1], (P('/'), P('/a'), P('/a/b'))) + self.assertEqual(list(par), [P('/a/b'), P('/a'), P('/')]) + with self.assertRaises(IndexError): + par[-4] + with self.assertRaises(IndexError): + par[3] + + def test_anchor(self): + P = self.cls + sep = self.cls.parser.sep + self.assertEqual(P('').anchor, '') + self.assertEqual(P(f'a{sep}b').anchor, '') + self.assertEqual(P(sep).anchor, sep) + self.assertEqual(P(f'{sep}a{sep}b').anchor, sep) + + def test_name(self): + P = self.cls + self.assertEqual(P('').name, '') + self.assertEqual(P('/').name, '') + self.assertEqual(P('a/b').name, 'b') + self.assertEqual(P('/a/b').name, 'b') + self.assertEqual(P('a/b.py').name, 'b.py') + self.assertEqual(P('/a/b.py').name, 'b.py') + + def test_suffix(self): + P = self.cls + self.assertEqual(P('').suffix, '') + self.assertEqual(P('.').suffix, '') + self.assertEqual(P('..').suffix, '') + self.assertEqual(P('/').suffix, '') + self.assertEqual(P('a/b').suffix, '') + self.assertEqual(P('/a/b').suffix, '') + self.assertEqual(P('/a/b/.').suffix, '') + self.assertEqual(P('a/b.py').suffix, '.py') + self.assertEqual(P('/a/b.py').suffix, '.py') + self.assertEqual(P('a/.hgrc').suffix, '') + self.assertEqual(P('/a/.hgrc').suffix, '') + self.assertEqual(P('a/.hg.rc').suffix, '.rc') + self.assertEqual(P('/a/.hg.rc').suffix, '.rc') + self.assertEqual(P('a/b.tar.gz').suffix, '.gz') + self.assertEqual(P('/a/b.tar.gz').suffix, '.gz') + self.assertEqual(P('a/trailing.dot.').suffix, '.') + self.assertEqual(P('/a/trailing.dot.').suffix, '.') + self.assertEqual(P('a/..d.o.t..').suffix, '.') + self.assertEqual(P('a/inn.er..dots').suffix, '.dots') + self.assertEqual(P('photo').suffix, '') + self.assertEqual(P('photo.jpg').suffix, '.jpg') + + def test_suffixes(self): + P = self.cls + self.assertEqual(P('').suffixes, []) + self.assertEqual(P('.').suffixes, []) + self.assertEqual(P('/').suffixes, []) + self.assertEqual(P('a/b').suffixes, []) + self.assertEqual(P('/a/b').suffixes, []) + self.assertEqual(P('/a/b/.').suffixes, []) + self.assertEqual(P('a/b.py').suffixes, ['.py']) + self.assertEqual(P('/a/b.py').suffixes, ['.py']) + self.assertEqual(P('a/.hgrc').suffixes, []) + self.assertEqual(P('/a/.hgrc').suffixes, []) + self.assertEqual(P('a/.hg.rc').suffixes, ['.rc']) + self.assertEqual(P('/a/.hg.rc').suffixes, ['.rc']) + self.assertEqual(P('a/b.tar.gz').suffixes, ['.tar', '.gz']) + self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz']) + self.assertEqual(P('a/trailing.dot.').suffixes, ['.dot', '.']) + self.assertEqual(P('/a/trailing.dot.').suffixes, ['.dot', '.']) + self.assertEqual(P('a/..d.o.t..').suffixes, ['.o', '.t', '.', '.']) + self.assertEqual(P('a/inn.er..dots').suffixes, ['.er', '.', '.dots']) + self.assertEqual(P('photo').suffixes, []) + self.assertEqual(P('photo.jpg').suffixes, ['.jpg']) + + def test_stem(self): + P = self.cls + self.assertEqual(P('..').stem, '..') + self.assertEqual(P('').stem, '') + self.assertEqual(P('/').stem, '') + self.assertEqual(P('a/b').stem, 'b') + self.assertEqual(P('a/b.py').stem, 'b') + self.assertEqual(P('a/.hgrc').stem, '.hgrc') + self.assertEqual(P('a/.hg.rc').stem, '.hg') + self.assertEqual(P('a/b.tar.gz').stem, 'b.tar') + self.assertEqual(P('a/trailing.dot.').stem, 'trailing.dot') + self.assertEqual(P('a/..d.o.t..').stem, '..d.o.t.') + self.assertEqual(P('a/inn.er..dots').stem, 'inn.er.') + self.assertEqual(P('photo').stem, 'photo') + self.assertEqual(P('photo.jpg').stem, 'photo') + + def test_with_name(self): + P = self.cls + self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml')) + self.assertEqual(P('/a/b').with_name('d.xml'), P('/a/d.xml')) + self.assertEqual(P('a/b.py').with_name('d.xml'), P('a/d.xml')) + self.assertEqual(P('/a/b.py').with_name('d.xml'), P('/a/d.xml')) + self.assertEqual(P('a/Dot ending.').with_name('d.xml'), P('a/d.xml')) + self.assertEqual(P('/a/Dot ending.').with_name('d.xml'), P('/a/d.xml')) + self.assertRaises(ValueError, P('a/b').with_name, '/c') + self.assertRaises(ValueError, P('a/b').with_name, 'c/') + self.assertRaises(ValueError, P('a/b').with_name, 'c/d') + + def test_with_stem(self): + P = self.cls + self.assertEqual(P('a/b').with_stem('d'), P('a/d')) + self.assertEqual(P('/a/b').with_stem('d'), P('/a/d')) + self.assertEqual(P('a/b.py').with_stem('d'), P('a/d.py')) + self.assertEqual(P('/a/b.py').with_stem('d'), P('/a/d.py')) + self.assertEqual(P('/a/b.tar.gz').with_stem('d'), P('/a/d.gz')) + self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d.')) + self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d.')) + self.assertRaises(ValueError, P('foo.gz').with_stem, '') + self.assertRaises(ValueError, P('/a/b/foo.gz').with_stem, '') + self.assertRaises(ValueError, P('a/b').with_stem, '/c') + self.assertRaises(ValueError, P('a/b').with_stem, 'c/') + self.assertRaises(ValueError, P('a/b').with_stem, 'c/d') + + def test_with_suffix(self): + P = self.cls + self.assertEqual(P('a/b').with_suffix('.gz'), P('a/b.gz')) + self.assertEqual(P('/a/b').with_suffix('.gz'), P('/a/b.gz')) + self.assertEqual(P('a/b.py').with_suffix('.gz'), P('a/b.gz')) + self.assertEqual(P('/a/b.py').with_suffix('.gz'), P('/a/b.gz')) + # Stripping suffix. + self.assertEqual(P('a/b.py').with_suffix(''), P('a/b')) + self.assertEqual(P('/a/b').with_suffix(''), P('/a/b')) + # Single dot + self.assertEqual(P('a/b').with_suffix('.'), P('a/b.')) + self.assertEqual(P('/a/b').with_suffix('.'), P('/a/b.')) + self.assertEqual(P('a/b.py').with_suffix('.'), P('a/b.')) + self.assertEqual(P('/a/b.py').with_suffix('.'), P('/a/b.')) + # Path doesn't have a "filename" component. + self.assertRaises(ValueError, P('').with_suffix, '.gz') + self.assertRaises(ValueError, P('/').with_suffix, '.gz') + # Invalid suffix. + self.assertRaises(ValueError, P('a/b').with_suffix, 'gz') + self.assertRaises(ValueError, P('a/b').with_suffix, '/') + self.assertRaises(ValueError, P('a/b').with_suffix, '/.gz') + self.assertRaises(ValueError, P('a/b').with_suffix, 'c/d') + self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d') + self.assertRaises(ValueError, P('a/b').with_suffix, './.d') + self.assertRaises(ValueError, P('a/b').with_suffix, '.d/.') + self.assertRaises(TypeError, P('a/b').with_suffix, None) + + +class LexicalPathJoinTest(JoinTestBase, unittest.TestCase): + cls = LexicalPath + + +if not is_pypi: + from pathlib import PurePath, Path + + class PurePathJoinTest(JoinTestBase, unittest.TestCase): + cls = PurePath + + class PathJoinTest(JoinTestBase, unittest.TestCase): + cls = Path + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_pathlib/test_join_posix.py b/Lib/test/test_pathlib/test_join_posix.py new file mode 100644 index 00000000000..d24fb1087c9 --- /dev/null +++ b/Lib/test/test_pathlib/test_join_posix.py @@ -0,0 +1,51 @@ +""" +Tests for Posix-flavoured pathlib.types._JoinablePath +""" + +import os +import unittest + +from .support import is_pypi +from .support.lexical_path import LexicalPosixPath + + +class JoinTestBase: + def test_join(self): + P = self.cls + p = P('//a') + pp = p.joinpath('b') + self.assertEqual(pp, P('//a/b')) + pp = P('/a').joinpath('//c') + self.assertEqual(pp, P('//c')) + pp = P('//a').joinpath('/c') + self.assertEqual(pp, P('/c')) + + def test_div(self): + # Basically the same as joinpath(). + P = self.cls + p = P('//a') + pp = p / 'b' + self.assertEqual(pp, P('//a/b')) + pp = P('/a') / '//c' + self.assertEqual(pp, P('//c')) + pp = P('//a') / '/c' + self.assertEqual(pp, P('/c')) + + +class LexicalPosixPathJoinTest(JoinTestBase, unittest.TestCase): + cls = LexicalPosixPath + + +if not is_pypi: + from pathlib import PurePosixPath, PosixPath + + class PurePosixPathJoinTest(JoinTestBase, unittest.TestCase): + cls = PurePosixPath + + if os.name != 'nt': + class PosixPathJoinTest(JoinTestBase, unittest.TestCase): + cls = PosixPath + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_pathlib/test_join_windows.py b/Lib/test/test_pathlib/test_join_windows.py new file mode 100644 index 00000000000..2cc634f25ef --- /dev/null +++ b/Lib/test/test_pathlib/test_join_windows.py @@ -0,0 +1,290 @@ +""" +Tests for Windows-flavoured pathlib.types._JoinablePath +""" + +import os +import unittest + +from .support import is_pypi +from .support.lexical_path import LexicalWindowsPath + + +class JoinTestBase: + def test_join(self): + P = self.cls + p = P('C:/a/b') + pp = p.joinpath('x/y') + self.assertEqual(pp, P(r'C:/a/b\x/y')) + pp = p.joinpath('/x/y') + self.assertEqual(pp, P('C:/x/y')) + # Joining with a different drive => the first path is ignored, even + # if the second path is relative. + pp = p.joinpath('D:x/y') + self.assertEqual(pp, P('D:x/y')) + pp = p.joinpath('D:/x/y') + self.assertEqual(pp, P('D:/x/y')) + pp = p.joinpath('//host/share/x/y') + self.assertEqual(pp, P('//host/share/x/y')) + # Joining with the same drive => the first path is appended to if + # the second path is relative. + pp = p.joinpath('c:x/y') + self.assertEqual(pp, P(r'c:/a/b\x/y')) + pp = p.joinpath('c:/x/y') + self.assertEqual(pp, P('c:/x/y')) + # Joining with files with NTFS data streams => the filename should + # not be parsed as a drive letter + pp = p.joinpath('./d:s') + self.assertEqual(pp, P(r'C:/a/b\./d:s')) + pp = p.joinpath('./dd:s') + self.assertEqual(pp, P(r'C:/a/b\./dd:s')) + pp = p.joinpath('E:d:s') + self.assertEqual(pp, P('E:d:s')) + # Joining onto a UNC path with no root + pp = P('//server').joinpath('share') + self.assertEqual(pp, P(r'//server\share')) + pp = P('//./BootPartition').joinpath('Windows') + self.assertEqual(pp, P(r'//./BootPartition\Windows')) + + def test_div(self): + # Basically the same as joinpath(). + P = self.cls + p = P('C:/a/b') + self.assertEqual(p / 'x/y', P(r'C:/a/b\x/y')) + self.assertEqual(p / 'x' / 'y', P(r'C:/a/b\x\y')) + self.assertEqual(p / '/x/y', P('C:/x/y')) + self.assertEqual(p / '/x' / 'y', P(r'C:/x\y')) + # Joining with a different drive => the first path is ignored, even + # if the second path is relative. + self.assertEqual(p / 'D:x/y', P('D:x/y')) + self.assertEqual(p / 'D:' / 'x/y', P('D:x/y')) + self.assertEqual(p / 'D:/x/y', P('D:/x/y')) + self.assertEqual(p / 'D:' / '/x/y', P('D:/x/y')) + self.assertEqual(p / '//host/share/x/y', P('//host/share/x/y')) + # Joining with the same drive => the first path is appended to if + # the second path is relative. + self.assertEqual(p / 'c:x/y', P(r'c:/a/b\x/y')) + self.assertEqual(p / 'c:/x/y', P('c:/x/y')) + # Joining with files with NTFS data streams => the filename should + # not be parsed as a drive letter + self.assertEqual(p / './d:s', P(r'C:/a/b\./d:s')) + self.assertEqual(p / './dd:s', P(r'C:/a/b\./dd:s')) + self.assertEqual(p / 'E:d:s', P('E:d:s')) + + def test_str(self): + p = self.cls(r'a\b\c') + self.assertEqual(str(p), 'a\\b\\c') + p = self.cls(r'c:\a\b\c') + self.assertEqual(str(p), 'c:\\a\\b\\c') + p = self.cls('\\\\a\\b\\') + self.assertEqual(str(p), '\\\\a\\b\\') + p = self.cls(r'\\a\b\c') + self.assertEqual(str(p), '\\\\a\\b\\c') + p = self.cls(r'\\a\b\c\d') + self.assertEqual(str(p), '\\\\a\\b\\c\\d') + + def test_parts(self): + P = self.cls + p = P(r'c:a\b') + parts = p.parts + self.assertEqual(parts, ('c:', 'a', 'b')) + p = P(r'c:\a\b') + parts = p.parts + self.assertEqual(parts, ('c:\\', 'a', 'b')) + p = P(r'\\a\b\c\d') + parts = p.parts + self.assertEqual(parts, ('\\\\a\\b\\', 'c', 'd')) + + def test_parent(self): + # Anchored + P = self.cls + p = P('z:a/b/c') + self.assertEqual(p.parent, P('z:a/b')) + self.assertEqual(p.parent.parent, P('z:a')) + self.assertEqual(p.parent.parent.parent, P('z:')) + self.assertEqual(p.parent.parent.parent.parent, P('z:')) + p = P('z:/a/b/c') + self.assertEqual(p.parent, P('z:/a/b')) + self.assertEqual(p.parent.parent, P('z:/a')) + self.assertEqual(p.parent.parent.parent, P('z:/')) + self.assertEqual(p.parent.parent.parent.parent, P('z:/')) + p = P('//a/b/c/d') + self.assertEqual(p.parent, P('//a/b/c')) + self.assertEqual(p.parent.parent, P('//a/b/')) + self.assertEqual(p.parent.parent.parent, P('//a/b/')) + + def test_parents(self): + # Anchored + P = self.cls + p = P('z:a/b') + par = p.parents + self.assertEqual(len(par), 2) + self.assertEqual(par[0], P('z:a')) + self.assertEqual(par[1], P('z:')) + self.assertEqual(par[0:1], (P('z:a'),)) + self.assertEqual(par[:-1], (P('z:a'),)) + self.assertEqual(par[:2], (P('z:a'), P('z:'))) + self.assertEqual(par[1:], (P('z:'),)) + self.assertEqual(par[::2], (P('z:a'),)) + self.assertEqual(par[::-1], (P('z:'), P('z:a'))) + self.assertEqual(list(par), [P('z:a'), P('z:')]) + with self.assertRaises(IndexError): + par[2] + p = P('z:/a/b') + par = p.parents + self.assertEqual(len(par), 2) + self.assertEqual(par[0], P('z:/a')) + self.assertEqual(par[1], P('z:/')) + self.assertEqual(par[0:1], (P('z:/a'),)) + self.assertEqual(par[0:-1], (P('z:/a'),)) + self.assertEqual(par[:2], (P('z:/a'), P('z:/'))) + self.assertEqual(par[1:], (P('z:/'),)) + self.assertEqual(par[::2], (P('z:/a'),)) + self.assertEqual(par[::-1], (P('z:/'), P('z:/a'),)) + self.assertEqual(list(par), [P('z:/a'), P('z:/')]) + with self.assertRaises(IndexError): + par[2] + p = P('//a/b/c/d') + par = p.parents + self.assertEqual(len(par), 2) + self.assertEqual(par[0], P('//a/b/c')) + self.assertEqual(par[1], P('//a/b/')) + self.assertEqual(par[0:1], (P('//a/b/c'),)) + self.assertEqual(par[0:-1], (P('//a/b/c'),)) + self.assertEqual(par[:2], (P('//a/b/c'), P('//a/b/'))) + self.assertEqual(par[1:], (P('//a/b/'),)) + self.assertEqual(par[::2], (P('//a/b/c'),)) + self.assertEqual(par[::-1], (P('//a/b/'), P('//a/b/c'))) + self.assertEqual(list(par), [P('//a/b/c'), P('//a/b/')]) + with self.assertRaises(IndexError): + par[2] + + def test_anchor(self): + P = self.cls + self.assertEqual(P('c:').anchor, 'c:') + self.assertEqual(P('c:a/b').anchor, 'c:') + self.assertEqual(P('c:\\').anchor, 'c:\\') + self.assertEqual(P('c:\\a\\b\\').anchor, 'c:\\') + self.assertEqual(P('\\\\a\\b\\').anchor, '\\\\a\\b\\') + self.assertEqual(P('\\\\a\\b\\c\\d').anchor, '\\\\a\\b\\') + + def test_name(self): + P = self.cls + self.assertEqual(P('c:').name, '') + self.assertEqual(P('c:/').name, '') + self.assertEqual(P('c:a/b').name, 'b') + self.assertEqual(P('c:/a/b').name, 'b') + self.assertEqual(P('c:a/b.py').name, 'b.py') + self.assertEqual(P('c:/a/b.py').name, 'b.py') + self.assertEqual(P('//My.py/Share.php').name, '') + self.assertEqual(P('//My.py/Share.php/a/b').name, 'b') + + def test_stem(self): + P = self.cls + self.assertEqual(P('c:').stem, '') + self.assertEqual(P('c:..').stem, '..') + self.assertEqual(P('c:/').stem, '') + self.assertEqual(P('c:a/b').stem, 'b') + self.assertEqual(P('c:a/b.py').stem, 'b') + self.assertEqual(P('c:a/.hgrc').stem, '.hgrc') + self.assertEqual(P('c:a/.hg.rc').stem, '.hg') + self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') + self.assertEqual(P('c:a/trailing.dot.').stem, 'trailing.dot') + + def test_suffix(self): + P = self.cls + self.assertEqual(P('c:').suffix, '') + self.assertEqual(P('c:/').suffix, '') + self.assertEqual(P('c:a/b').suffix, '') + self.assertEqual(P('c:/a/b').suffix, '') + self.assertEqual(P('c:a/b.py').suffix, '.py') + self.assertEqual(P('c:/a/b.py').suffix, '.py') + self.assertEqual(P('c:a/.hgrc').suffix, '') + self.assertEqual(P('c:/a/.hgrc').suffix, '') + self.assertEqual(P('c:a/.hg.rc').suffix, '.rc') + self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc') + self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz') + self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz') + self.assertEqual(P('c:a/trailing.dot.').suffix, '.') + self.assertEqual(P('c:/a/trailing.dot.').suffix, '.') + self.assertEqual(P('//My.py/Share.php').suffix, '') + self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') + + def test_suffixes(self): + P = self.cls + self.assertEqual(P('c:').suffixes, []) + self.assertEqual(P('c:/').suffixes, []) + self.assertEqual(P('c:a/b').suffixes, []) + self.assertEqual(P('c:/a/b').suffixes, []) + self.assertEqual(P('c:a/b.py').suffixes, ['.py']) + self.assertEqual(P('c:/a/b.py').suffixes, ['.py']) + self.assertEqual(P('c:a/.hgrc').suffixes, []) + self.assertEqual(P('c:/a/.hgrc').suffixes, []) + self.assertEqual(P('c:a/.hg.rc').suffixes, ['.rc']) + self.assertEqual(P('c:/a/.hg.rc').suffixes, ['.rc']) + self.assertEqual(P('c:a/b.tar.gz').suffixes, ['.tar', '.gz']) + self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz']) + self.assertEqual(P('//My.py/Share.php').suffixes, []) + self.assertEqual(P('//My.py/Share.php/a/b').suffixes, []) + self.assertEqual(P('c:a/trailing.dot.').suffixes, ['.dot', '.']) + self.assertEqual(P('c:/a/trailing.dot.').suffixes, ['.dot', '.']) + + def test_with_name(self): + P = self.cls + self.assertEqual(P(r'c:a\b').with_name('d.xml'), P(r'c:a\d.xml')) + self.assertEqual(P(r'c:\a\b').with_name('d.xml'), P(r'c:\a\d.xml')) + self.assertEqual(P(r'c:a\Dot ending.').with_name('d.xml'), P(r'c:a\d.xml')) + self.assertEqual(P(r'c:\a\Dot ending.').with_name('d.xml'), P(r'c:\a\d.xml')) + self.assertRaises(ValueError, P(r'c:a\b').with_name, r'd:\e') + self.assertRaises(ValueError, P(r'c:a\b').with_name, r'\\My\Share') + + def test_with_stem(self): + P = self.cls + self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d')) + self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) + self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d.')) + self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d.')) + self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:/e') + self.assertRaises(ValueError, P('c:a/b').with_stem, '//My/Share') + + def test_with_suffix(self): + P = self.cls + self.assertEqual(P('c:a/b').with_suffix('.gz'), P('c:a/b.gz')) + self.assertEqual(P('c:/a/b').with_suffix('.gz'), P('c:/a/b.gz')) + self.assertEqual(P('c:a/b.py').with_suffix('.gz'), P('c:a/b.gz')) + self.assertEqual(P('c:/a/b.py').with_suffix('.gz'), P('c:/a/b.gz')) + # Path doesn't have a "filename" component. + self.assertRaises(ValueError, P('').with_suffix, '.gz') + self.assertRaises(ValueError, P('/').with_suffix, '.gz') + self.assertRaises(ValueError, P('//My/Share').with_suffix, '.gz') + # Invalid suffix. + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '/') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '/.gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\.gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:.gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c/d') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c\\d') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c/d') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c\\d') + self.assertRaises(TypeError, P('c:a/b').with_suffix, None) + + +class LexicalWindowsPathJoinTest(JoinTestBase, unittest.TestCase): + cls = LexicalWindowsPath + + +if not is_pypi: + from pathlib import PureWindowsPath, WindowsPath + + class PureWindowsPathJoinTest(JoinTestBase, unittest.TestCase): + cls = PureWindowsPath + + if os.name == 'nt': + class WindowsPathJoinTest(JoinTestBase, unittest.TestCase): + cls = WindowsPath + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 6cbc15d7675..d107c718ba5 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1,5 +1,7 @@ # TODO: RUSTPYTHON # Has not been tested with Windows style paths +import collections +import contextlib import io import os import sys @@ -16,28 +18,82 @@ from urllib.request import pathname2url from test.support import import_helper -from test.support import is_emscripten, is_wasi +from test.support import cpython_only +from test.support import is_emscripten, is_wasi, is_wasm32 from test.support import infinite_recursion from test.support import os_helper from test.support.os_helper import TESTFN, FS_NONASCII, FakePath -from test.test_pathlib import test_pathlib_abc -from test.test_pathlib.test_pathlib_abc import needs_posix, needs_windows, needs_symlinks - +try: + import fcntl +except ImportError: + fcntl = None try: import grp, pwd except ImportError: grp = pwd = None +try: + import posix +except ImportError: + posix = None root_in_posix = False if hasattr(os, 'geteuid'): root_in_posix = (os.geteuid() == 0) + +def patch_replace(old_test): + def new_replace(self, target): + raise OSError(errno.EXDEV, "Cross-device link", self, target) + + def new_test(self): + old_replace = self.cls.replace + self.cls.replace = new_replace + try: + old_test(self) + finally: + self.cls.replace = old_replace + return new_test + + +_tests_needing_posix = set() +_tests_needing_windows = set() +_tests_needing_symlinks = set() + +def needs_posix(fn): + """Decorator that marks a test as requiring a POSIX-flavoured path class.""" + _tests_needing_posix.add(fn.__name__) + return fn + +def needs_windows(fn): + """Decorator that marks a test as requiring a Windows-flavoured path class.""" + _tests_needing_windows.add(fn.__name__) + return fn + +def needs_symlinks(fn): + """Decorator that marks a test as requiring a path class that supports symlinks.""" + _tests_needing_symlinks.add(fn.__name__) + return fn + + + +class UnsupportedOperationTest(unittest.TestCase): + def test_is_notimplemented(self): + self.assertIsSubclass(pathlib.UnsupportedOperation, NotImplementedError) + self.assertIsInstance(pathlib.UnsupportedOperation(), NotImplementedError) + + +class LazyImportTest(unittest.TestCase): + @cpython_only + def test_lazy_import(self): + import_helper.ensure_lazy_imports("pathlib", {"shutil"}) + + # # Tests for the pure classes. # -class PurePathTest(test_pathlib_abc.DummyPurePathTest): +class PurePathTest(unittest.TestCase): cls = pathlib.PurePath # Make sure any symbolic links in the base test path are resolved. @@ -59,6 +115,72 @@ class PurePathTest(test_pathlib_abc.DummyPurePathTest): ], } + def setUp(self): + name = self.id().split('.')[-1] + if name in _tests_needing_posix and self.cls.parser is not posixpath: + self.skipTest('requires POSIX-flavoured path class') + if name in _tests_needing_windows and self.cls.parser is posixpath: + self.skipTest('requires Windows-flavoured path class') + p = self.cls('a') + self.parser = p.parser + self.sep = self.parser.sep + self.altsep = self.parser.altsep + + def _check_str_subclass(self, *args): + # Issue #21127: it should be possible to construct a PurePath object + # from a str subclass instance, and it then gets converted to + # a pure str object. + class StrSubclass(str): + pass + P = self.cls + p = P(*(StrSubclass(x) for x in args)) + self.assertEqual(p, P(*args)) + for part in p.parts: + self.assertIs(type(part), str) + + def test_str_subclass_common(self): + self._check_str_subclass('') + self._check_str_subclass('.') + self._check_str_subclass('a') + self._check_str_subclass('a/b.txt') + self._check_str_subclass('/a/b.txt') + + @needs_windows + def test_str_subclass_windows(self): + self._check_str_subclass('.\\a:b') + self._check_str_subclass('c:') + self._check_str_subclass('c:a') + self._check_str_subclass('c:a\\b.txt') + self._check_str_subclass('c:\\') + self._check_str_subclass('c:\\a') + self._check_str_subclass('c:\\a\\b.txt') + self._check_str_subclass('\\\\some\\share') + self._check_str_subclass('\\\\some\\share\\a') + self._check_str_subclass('\\\\some\\share\\a\\b.txt') + + def _check_str(self, expected, args): + p = self.cls(*args) + self.assertEqual(str(p), expected.replace('/', self.sep)) + + def test_str_common(self): + # Canonicalized paths roundtrip. + for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): + self._check_str(pathstr, (pathstr,)) + # Other tests for str() are in test_equivalences(). + + @needs_windows + def test_str_windows(self): + p = self.cls('a/b/c') + self.assertEqual(str(p), 'a\\b\\c') + p = self.cls('c:/a/b/c') + self.assertEqual(str(p), 'c:\\a\\b\\c') + p = self.cls('//a/b') + self.assertEqual(str(p), '\\\\a\\b\\') + p = self.cls('//a/b/c') + self.assertEqual(str(p), '\\\\a\\b\\c') + p = self.cls('//a/b/c/d') + self.assertEqual(str(p), '\\\\a\\b\\c\\d') + def test_concrete_class(self): if self.cls is pathlib.PurePath: expected = pathlib.PureWindowsPath if os.name == 'nt' else pathlib.PurePosixPath @@ -150,15 +272,6 @@ def test_empty_path(self): # Special case for the empty path. self._check_str('.', ('',)) - def test_parts_interning(self): - P = self.cls - p = P('/usr/bin/foo') - q = P('/usr/local/bin') - # 'usr' - self.assertIs(p.parts[1], q.parts[1]) - # 'bin' - self.assertIs(p.parts[2], q.parts[3]) - def test_join_nested(self): P = self.cls p = P('a/b').joinpath(P('c')) @@ -182,6 +295,12 @@ def test_pickling_common(self): self.assertEqual(hash(pp), hash(p)) self.assertEqual(str(pp), str(p)) + def test_unpicking_3_13(self): + data = (b"\x80\x04\x95'\x00\x00\x00\x00\x00\x00\x00\x8c\x0e" + b"pathlib._local\x94\x8c\rPurePosixPath\x94\x93\x94)R\x94.") + p = pickle.loads(data) + self.assertIsInstance(p, pathlib.PurePosixPath) + def test_repr_common(self): for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): with self.subTest(pathstr=pathstr): @@ -189,8 +308,8 @@ def test_repr_common(self): clsname = p.__class__.__name__ r = repr(p) # The repr() is in the form ClassName("forward-slashes path"). - self.assertTrue(r.startswith(clsname + '('), r) - self.assertTrue(r.endswith(')'), r) + self.assertStartsWith(r, clsname + '(') + self.assertEndsWith(r, ')') inner = r[len(clsname) + 1 : -1] self.assertEqual(eval(inner), p.as_posix()) @@ -200,6 +319,31 @@ def test_fspath_common(self): self._check_str(p.__fspath__(), ('a/b',)) self._check_str(os.fspath(p), ('a/b',)) + def test_bytes(self): + P = self.cls + with self.assertRaises(TypeError): + P(b'a') + with self.assertRaises(TypeError): + P(b'a', 'b') + with self.assertRaises(TypeError): + P('a', b'b') + with self.assertRaises(TypeError): + P('a').joinpath(b'b') + with self.assertRaises(TypeError): + P('a') / b'b' + with self.assertRaises(TypeError): + b'a' / P('b') + with self.assertRaises(TypeError): + P('a').match(b'b') + with self.assertRaises(TypeError): + P('a').relative_to(b'b') + with self.assertRaises(TypeError): + P('a').with_name(b'b') + with self.assertRaises(TypeError): + P('a').with_stem(b'b') + with self.assertRaises(TypeError): + P('a').with_suffix(b'b') + def test_bytes_exc_message(self): P = self.cls message = (r"argument should be a str or an os\.PathLike object " @@ -216,6 +360,12 @@ def test_as_bytes_common(self): P = self.cls self.assertEqual(bytes(P('a/b')), b'a' + sep + b'b') + def test_as_posix_common(self): + P = self.cls + for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): + self.assertEqual(P(pathstr).as_posix(), pathstr) + # Other tests for as_posix() are in test_equivalences(). + def test_eq_common(self): P = self.cls self.assertEqual(P('a/b'), P('a/b')) @@ -277,12 +427,18 @@ def assertLess(a, b): with self.assertRaises(TypeError): P() < {} + def make_uri(self, path): + if isinstance(path, pathlib.Path): + return path.as_uri() + with self.assertWarns(DeprecationWarning): + return path.as_uri() + def test_as_uri_common(self): P = self.cls with self.assertRaises(ValueError): - P('a').as_uri() + self.make_uri(P('a')) with self.assertRaises(ValueError): - P().as_uri() + self.make_uri(P()) def test_repr_roundtrips(self): for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): @@ -295,6 +451,51 @@ def test_repr_roundtrips(self): self.assertEqual(q, p) self.assertEqual(repr(q), r) + def test_drive_common(self): + P = self.cls + self.assertEqual(P('a/b').drive, '') + self.assertEqual(P('/a/b').drive, '') + self.assertEqual(P('').drive, '') + + @needs_windows + def test_drive_windows(self): + P = self.cls + self.assertEqual(P('c:').drive, 'c:') + self.assertEqual(P('c:a/b').drive, 'c:') + self.assertEqual(P('c:/').drive, 'c:') + self.assertEqual(P('c:/a/b/').drive, 'c:') + self.assertEqual(P('//a/b').drive, '\\\\a\\b') + self.assertEqual(P('//a/b/').drive, '\\\\a\\b') + self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b') + self.assertEqual(P('./c:a').drive, '') + + + def test_root_common(self): + P = self.cls + sep = self.sep + self.assertEqual(P('').root, '') + self.assertEqual(P('a/b').root, '') + self.assertEqual(P('/').root, sep) + self.assertEqual(P('/a/b').root, sep) + + @needs_posix + def test_root_posix(self): + P = self.cls + self.assertEqual(P('/a/b').root, '/') + # POSIX special case for two leading slashes. + self.assertEqual(P('//a/b').root, '//') + + @needs_windows + def test_root_windows(self): + P = self.cls + self.assertEqual(P('c:').root, '') + self.assertEqual(P('c:a/b').root, '') + self.assertEqual(P('c:/').root, '\\') + self.assertEqual(P('c:/a/b/').root, '\\') + self.assertEqual(P('//a/b').root, '\\') + self.assertEqual(P('//a/b/').root, '\\') + self.assertEqual(P('//a/b/c/d').root, '\\') + def test_name_empty(self): P = self.cls self.assertEqual(P('').name, '') @@ -306,6 +507,18 @@ def test_stem_empty(self): self.assertEqual(P('').stem, '') self.assertEqual(P('.').stem, '') + @needs_windows + def test_with_name_windows(self): + P = self.cls + self.assertRaises(ValueError, P(r'c:').with_name, 'd.xml') + self.assertRaises(ValueError, P(r'c:\\').with_name, 'd.xml') + self.assertRaises(ValueError, P(r'\\My\Share').with_name, 'd.xml') + # NTFS alternate data streams + self.assertEqual(str(P('a').with_name('d:')), '.\\d:') + self.assertEqual(str(P('a').with_name('d:e')), '.\\d:e') + self.assertEqual(P(r'c:a\b').with_name('d:'), P(r'c:a\d:')) + self.assertEqual(P(r'c:a\b').with_name('d:e'), P(r'c:a\d:e')) + def test_with_name_empty(self): P = self.cls self.assertRaises(ValueError, P('').with_name, 'd.xml') @@ -314,6 +527,18 @@ def test_with_name_empty(self): self.assertRaises(ValueError, P('a/b').with_name, '') self.assertRaises(ValueError, P('a/b').with_name, '.') + @needs_windows + def test_with_stem_windows(self): + P = self.cls + self.assertRaises(ValueError, P('c:').with_stem, 'd') + self.assertRaises(ValueError, P('c:/').with_stem, 'd') + self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') + # NTFS alternate data streams + self.assertEqual(str(P('a').with_stem('d:')), '.\\d:') + self.assertEqual(str(P('a').with_stem('d:e')), '.\\d:e') + self.assertEqual(P('c:a/b').with_stem('d:'), P('c:a/d:')) + self.assertEqual(P('c:a/b').with_stem('d:e'), P('c:a/d:e')) + def test_with_stem_empty(self): P = self.cls self.assertRaises(ValueError, P('').with_stem, 'd') @@ -322,30 +547,102 @@ def test_with_stem_empty(self): self.assertRaises(ValueError, P('a/b').with_stem, '') self.assertRaises(ValueError, P('a/b').with_stem, '.') - def test_relative_to_several_args(self): - P = self.cls - p = P('a/b') - with self.assertWarns(DeprecationWarning): - p.relative_to('a', 'b') - p.relative_to('a', 'b', walk_up=True) - - def test_is_relative_to_several_args(self): - P = self.cls - p = P('a/b') - with self.assertWarns(DeprecationWarning): - p.is_relative_to('a', 'b') - def test_is_reserved_deprecated(self): P = self.cls p = P('a/b') with self.assertWarns(DeprecationWarning): p.is_reserved() + def test_full_match_case_sensitive(self): + P = self.cls + self.assertFalse(P('A.py').full_match('a.PY', case_sensitive=True)) + self.assertTrue(P('A.py').full_match('a.PY', case_sensitive=False)) + self.assertFalse(P('c:/a/B.Py').full_match('C:/A/*.pY', case_sensitive=True)) + self.assertTrue(P('/a/b/c.py').full_match('/A/*/*.Py', case_sensitive=False)) + def test_match_empty(self): P = self.cls self.assertRaises(ValueError, P('a').match, '') self.assertRaises(ValueError, P('a').match, '.') + def test_match_common(self): + P = self.cls + # Simple relative pattern. + self.assertTrue(P('b.py').match('b.py')) + self.assertTrue(P('a/b.py').match('b.py')) + self.assertTrue(P('/a/b.py').match('b.py')) + self.assertFalse(P('a.py').match('b.py')) + self.assertFalse(P('b/py').match('b.py')) + self.assertFalse(P('/a.py').match('b.py')) + self.assertFalse(P('b.py/c').match('b.py')) + # Wildcard relative pattern. + self.assertTrue(P('b.py').match('*.py')) + self.assertTrue(P('a/b.py').match('*.py')) + self.assertTrue(P('/a/b.py').match('*.py')) + self.assertFalse(P('b.pyc').match('*.py')) + self.assertFalse(P('b./py').match('*.py')) + self.assertFalse(P('b.py/c').match('*.py')) + # Multi-part relative pattern. + self.assertTrue(P('ab/c.py').match('a*/*.py')) + self.assertTrue(P('/d/ab/c.py').match('a*/*.py')) + self.assertFalse(P('a.py').match('a*/*.py')) + self.assertFalse(P('/dab/c.py').match('a*/*.py')) + self.assertFalse(P('ab/c.py/d').match('a*/*.py')) + # Absolute pattern. + self.assertTrue(P('/b.py').match('/*.py')) + self.assertFalse(P('b.py').match('/*.py')) + self.assertFalse(P('a/b.py').match('/*.py')) + self.assertFalse(P('/a/b.py').match('/*.py')) + # Multi-part absolute pattern. + self.assertTrue(P('/a/b.py').match('/a/*.py')) + self.assertFalse(P('/ab.py').match('/a/*.py')) + self.assertFalse(P('/a/b/c.py').match('/a/*.py')) + # Multi-part glob-style pattern. + self.assertFalse(P('/a/b/c.py').match('/**/*.py')) + self.assertTrue(P('/a/b/c.py').match('/a/**/*.py')) + # Case-sensitive flag + self.assertFalse(P('A.py').match('a.PY', case_sensitive=True)) + self.assertTrue(P('A.py').match('a.PY', case_sensitive=False)) + self.assertFalse(P('c:/a/B.Py').match('C:/A/*.pY', case_sensitive=True)) + self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', case_sensitive=False)) + # Matching against empty path + self.assertFalse(P('').match('*')) + self.assertFalse(P('').match('**')) + self.assertFalse(P('').match('**/*')) + + @needs_posix + def test_match_posix(self): + P = self.cls + self.assertFalse(P('A.py').match('a.PY')) + + @needs_windows + def test_match_windows(self): + P = self.cls + # Absolute patterns. + self.assertTrue(P('c:/b.py').match('*:/*.py')) + self.assertTrue(P('c:/b.py').match('c:/*.py')) + self.assertFalse(P('d:/b.py').match('c:/*.py')) # wrong drive + self.assertFalse(P('b.py').match('/*.py')) + self.assertFalse(P('b.py').match('c:*.py')) + self.assertFalse(P('b.py').match('c:/*.py')) + self.assertFalse(P('c:b.py').match('/*.py')) + self.assertFalse(P('c:b.py').match('c:/*.py')) + self.assertFalse(P('/b.py').match('c:*.py')) + self.assertFalse(P('/b.py').match('c:/*.py')) + # UNC patterns. + self.assertTrue(P('//some/share/a.py').match('//*/*/*.py')) + self.assertTrue(P('//some/share/a.py').match('//some/share/*.py')) + self.assertFalse(P('//other/share/a.py').match('//some/share/*.py')) + self.assertFalse(P('//some/share/a/b.py').match('//some/share/*.py')) + # Case-insensitivity. + self.assertTrue(P('B.py').match('b.PY')) + self.assertTrue(P('c:/a/B.Py').match('C:/A/*.pY')) + self.assertTrue(P('//Some/Share/B.Py').match('//somE/sharE/*.pY')) + # Path anchor doesn't match pattern anchor + self.assertFalse(P('c:/b.py').match('/*.py')) # 'c:/' vs '/' + self.assertFalse(P('c:/b.py').match('c:*.py')) # 'c:/' vs 'c:' + self.assertFalse(P('//some/share/a.py').match('/*.py')) # '//some/share/' vs '/' + @needs_posix def test_parse_path_posix(self): check = self._check_parse_path @@ -369,9 +666,9 @@ def test_eq_posix(self): @needs_posix def test_as_uri_posix(self): P = self.cls - self.assertEqual(P('/').as_uri(), 'file:///') - self.assertEqual(P('/a/b.c').as_uri(), 'file:///a/b.c') - self.assertEqual(P('/a/b%#c').as_uri(), 'file:///a/b%25%23c') + self.assertEqual(self.make_uri(P('/')), 'file:///') + self.assertEqual(self.make_uri(P('/a/b.c')), 'file:///a/b.c') + self.assertEqual(self.make_uri(P('/a/b%#c')), 'file:///a/b%25%23c') @needs_posix def test_as_uri_non_ascii(self): @@ -381,7 +678,7 @@ def test_as_uri_non_ascii(self): os.fsencode('\xe9') except UnicodeEncodeError: self.skipTest("\\xe9 cannot be encoded to the filesystem encoding") - self.assertEqual(P('/a/b\xe9').as_uri(), + self.assertEqual(self.make_uri(P('/a/b\xe9')), 'file:///a/b' + quote_from_bytes(os.fsencode('\xe9'))) @needs_posix @@ -475,21 +772,21 @@ def test_eq_windows(self): def test_as_uri_windows(self): P = self.cls with self.assertRaises(ValueError): - P('/a/b').as_uri() + self.make_uri(P('/a/b')) with self.assertRaises(ValueError): - P('c:a/b').as_uri() - self.assertEqual(P('c:/').as_uri(), 'file:///c:/') - self.assertEqual(P('c:/a/b.c').as_uri(), 'file:///c:/a/b.c') - self.assertEqual(P('c:/a/b%#c').as_uri(), 'file:///c:/a/b%25%23c') - self.assertEqual(P('//some/share/').as_uri(), 'file://some/share/') - self.assertEqual(P('//some/share/a/b.c').as_uri(), + self.make_uri(P('c:a/b')) + self.assertEqual(self.make_uri(P('c:/')), 'file:///c:/') + self.assertEqual(self.make_uri(P('c:/a/b.c')), 'file:///c:/a/b.c') + self.assertEqual(self.make_uri(P('c:/a/b%#c')), 'file:///c:/a/b%25%23c') + self.assertEqual(self.make_uri(P('//some/share/')), 'file://some/share/') + self.assertEqual(self.make_uri(P('//some/share/a/b.c')), 'file://some/share/a/b.c') from urllib.parse import quote_from_bytes QUOTED_FS_NONASCII = quote_from_bytes(os.fsencode(FS_NONASCII)) - self.assertEqual(P('c:/a/b' + FS_NONASCII).as_uri(), + self.assertEqual(self.make_uri(P('c:/a/b' + FS_NONASCII)), 'file:///c:/a/b' + QUOTED_FS_NONASCII) - self.assertEqual(P('//some/share/a/b%#c' + FS_NONASCII).as_uri(), + self.assertEqual(self.make_uri(P('//some/share/a/b%#c' + FS_NONASCII)), 'file://some/share/a/b%25%23c' + QUOTED_FS_NONASCII) @needs_windows @@ -510,6 +807,311 @@ def assertOrderedEqual(a, b): self.assertFalse(p < q) self.assertFalse(p > q) + @needs_posix + def test_is_absolute_posix(self): + P = self.cls + self.assertFalse(P('').is_absolute()) + self.assertFalse(P('a').is_absolute()) + self.assertFalse(P('a/b/').is_absolute()) + self.assertTrue(P('/').is_absolute()) + self.assertTrue(P('/a').is_absolute()) + self.assertTrue(P('/a/b/').is_absolute()) + self.assertTrue(P('//a').is_absolute()) + self.assertTrue(P('//a/b').is_absolute()) + + @needs_windows + def test_is_absolute_windows(self): + P = self.cls + # Under NT, only paths with both a drive and a root are absolute. + self.assertFalse(P().is_absolute()) + self.assertFalse(P('a').is_absolute()) + self.assertFalse(P('a/b/').is_absolute()) + self.assertFalse(P('/').is_absolute()) + self.assertFalse(P('/a').is_absolute()) + self.assertFalse(P('/a/b/').is_absolute()) + self.assertFalse(P('c:').is_absolute()) + self.assertFalse(P('c:a').is_absolute()) + self.assertFalse(P('c:a/b/').is_absolute()) + self.assertTrue(P('c:/').is_absolute()) + self.assertTrue(P('c:/a').is_absolute()) + self.assertTrue(P('c:/a/b/').is_absolute()) + # UNC paths are absolute by definition. + self.assertTrue(P('//').is_absolute()) + self.assertTrue(P('//a').is_absolute()) + self.assertTrue(P('//a/b').is_absolute()) + self.assertTrue(P('//a/b/').is_absolute()) + self.assertTrue(P('//a/b/c').is_absolute()) + self.assertTrue(P('//a/b/c/d').is_absolute()) + self.assertTrue(P('//?/UNC/').is_absolute()) + self.assertTrue(P('//?/UNC/spam').is_absolute()) + + def test_relative_to_common(self): + P = self.cls + p = P('a/b') + self.assertRaises(TypeError, p.relative_to) + self.assertRaises(TypeError, p.relative_to, b'a') + self.assertEqual(p.relative_to(P('')), P('a/b')) + self.assertEqual(p.relative_to(''), P('a/b')) + self.assertEqual(p.relative_to(P('a')), P('b')) + self.assertEqual(p.relative_to('a'), P('b')) + self.assertEqual(p.relative_to('a/'), P('b')) + self.assertEqual(p.relative_to(P('a/b')), P('')) + self.assertEqual(p.relative_to('a/b'), P('')) + self.assertEqual(p.relative_to(P(''), walk_up=True), P('a/b')) + self.assertEqual(p.relative_to('', walk_up=True), P('a/b')) + self.assertEqual(p.relative_to(P('a'), walk_up=True), P('b')) + self.assertEqual(p.relative_to('a', walk_up=True), P('b')) + self.assertEqual(p.relative_to('a/', walk_up=True), P('b')) + self.assertEqual(p.relative_to(P('a/b'), walk_up=True), P('')) + self.assertEqual(p.relative_to('a/b', walk_up=True), P('')) + self.assertEqual(p.relative_to(P('a/c'), walk_up=True), P('../b')) + self.assertEqual(p.relative_to('a/c', walk_up=True), P('../b')) + self.assertEqual(p.relative_to(P('a/b/c'), walk_up=True), P('..')) + self.assertEqual(p.relative_to('a/b/c', walk_up=True), P('..')) + self.assertEqual(p.relative_to(P('c'), walk_up=True), P('../a/b')) + self.assertEqual(p.relative_to('c', walk_up=True), P('../a/b')) + # Unrelated paths. + self.assertRaises(ValueError, p.relative_to, P('c')) + self.assertRaises(ValueError, p.relative_to, P('a/b/c')) + self.assertRaises(ValueError, p.relative_to, P('a/c')) + self.assertRaises(ValueError, p.relative_to, P('/a')) + self.assertRaises(ValueError, p.relative_to, P("../a")) + self.assertRaises(ValueError, p.relative_to, P("a/..")) + self.assertRaises(ValueError, p.relative_to, P("/a/..")) + self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/a'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("../a"), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("a/.."), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("/a/.."), walk_up=True) + p = P('/a/b') + self.assertEqual(p.relative_to(P('/')), P('a/b')) + self.assertEqual(p.relative_to('/'), P('a/b')) + self.assertEqual(p.relative_to(P('/a')), P('b')) + self.assertEqual(p.relative_to('/a'), P('b')) + self.assertEqual(p.relative_to('/a/'), P('b')) + self.assertEqual(p.relative_to(P('/a/b')), P('')) + self.assertEqual(p.relative_to('/a/b'), P('')) + self.assertEqual(p.relative_to(P('/'), walk_up=True), P('a/b')) + self.assertEqual(p.relative_to('/', walk_up=True), P('a/b')) + self.assertEqual(p.relative_to(P('/a'), walk_up=True), P('b')) + self.assertEqual(p.relative_to('/a', walk_up=True), P('b')) + self.assertEqual(p.relative_to('/a/', walk_up=True), P('b')) + self.assertEqual(p.relative_to(P('/a/b'), walk_up=True), P('')) + self.assertEqual(p.relative_to('/a/b', walk_up=True), P('')) + self.assertEqual(p.relative_to(P('/a/c'), walk_up=True), P('../b')) + self.assertEqual(p.relative_to('/a/c', walk_up=True), P('../b')) + self.assertEqual(p.relative_to(P('/a/b/c'), walk_up=True), P('..')) + self.assertEqual(p.relative_to('/a/b/c', walk_up=True), P('..')) + self.assertEqual(p.relative_to(P('/c'), walk_up=True), P('../a/b')) + self.assertEqual(p.relative_to('/c', walk_up=True), P('../a/b')) + # Unrelated paths. + self.assertRaises(ValueError, p.relative_to, P('/c')) + self.assertRaises(ValueError, p.relative_to, P('/a/b/c')) + self.assertRaises(ValueError, p.relative_to, P('/a/c')) + self.assertRaises(ValueError, p.relative_to, P('')) + self.assertRaises(ValueError, p.relative_to, '') + self.assertRaises(ValueError, p.relative_to, P('a')) + self.assertRaises(ValueError, p.relative_to, P("../a")) + self.assertRaises(ValueError, p.relative_to, P("a/..")) + self.assertRaises(ValueError, p.relative_to, P("/a/..")) + self.assertRaises(ValueError, p.relative_to, P(''), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('a'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("../a"), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("a/.."), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("/a/.."), walk_up=True) + + @needs_windows + def test_relative_to_windows(self): + P = self.cls + p = P('C:Foo/Bar') + self.assertEqual(p.relative_to(P('c:')), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:'), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:foO')), P('Bar')) + self.assertEqual(p.relative_to('c:foO'), P('Bar')) + self.assertEqual(p.relative_to('c:foO/'), P('Bar')) + self.assertEqual(p.relative_to(P('c:foO/baR')), P()) + self.assertEqual(p.relative_to('c:foO/baR'), P()) + self.assertEqual(p.relative_to(P('c:'), walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:foO'), walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:foO', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:foO/', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to(P('c:foO/baR'), walk_up=True), P()) + self.assertEqual(p.relative_to('c:foO/baR', walk_up=True), P()) + self.assertEqual(p.relative_to(P('C:Foo/Bar/Baz'), walk_up=True), P('..')) + self.assertEqual(p.relative_to(P('C:Foo/Baz'), walk_up=True), P('../Bar')) + self.assertEqual(p.relative_to(P('C:Baz/Bar'), walk_up=True), P('../../Foo/Bar')) + # Unrelated paths. + self.assertRaises(ValueError, p.relative_to, P()) + self.assertRaises(ValueError, p.relative_to, '') + self.assertRaises(ValueError, p.relative_to, P('d:')) + self.assertRaises(ValueError, p.relative_to, P('/')) + self.assertRaises(ValueError, p.relative_to, P('Foo')) + self.assertRaises(ValueError, p.relative_to, P('/Foo')) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo')) + self.assertRaises(ValueError, p.relative_to, P('C:Foo/Bar/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:Foo/Baz')) + self.assertRaises(ValueError, p.relative_to, P(), walk_up=True) + self.assertRaises(ValueError, p.relative_to, '', walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo'), walk_up=True) + p = P('C:/Foo/Bar') + self.assertEqual(p.relative_to(P('c:/')), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:/'), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:/foO')), P('Bar')) + self.assertEqual(p.relative_to('c:/foO'), P('Bar')) + self.assertEqual(p.relative_to('c:/foO/'), P('Bar')) + self.assertEqual(p.relative_to(P('c:/foO/baR')), P()) + self.assertEqual(p.relative_to('c:/foO/baR'), P()) + self.assertEqual(p.relative_to(P('c:/'), walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:/', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:/foO'), walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:/foO', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:/foO/', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to(P('c:/foO/baR'), walk_up=True), P()) + self.assertEqual(p.relative_to('c:/foO/baR', walk_up=True), P()) + self.assertEqual(p.relative_to('C:/Baz', walk_up=True), P('../Foo/Bar')) + self.assertEqual(p.relative_to('C:/Foo/Bar/Baz', walk_up=True), P('..')) + self.assertEqual(p.relative_to('C:/Foo/Baz', walk_up=True), P('../Bar')) + # Unrelated paths. + self.assertRaises(ValueError, p.relative_to, 'c:') + self.assertRaises(ValueError, p.relative_to, P('c:')) + self.assertRaises(ValueError, p.relative_to, P('C:/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Bar/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:Foo')) + self.assertRaises(ValueError, p.relative_to, P('d:')) + self.assertRaises(ValueError, p.relative_to, P('d:/')) + self.assertRaises(ValueError, p.relative_to, P('/')) + self.assertRaises(ValueError, p.relative_to, P('/Foo')) + self.assertRaises(ValueError, p.relative_to, P('//C/Foo')) + self.assertRaises(ValueError, p.relative_to, 'c:', walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('c:'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('C:Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('d:/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('//C/Foo'), walk_up=True) + # UNC paths. + p = P('//Server/Share/Foo/Bar') + self.assertEqual(p.relative_to(P('//sErver/sHare')), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare'), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/'), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo')), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo'), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/'), P('Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar')), P()) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar'), P()) + self.assertEqual(p.relative_to(P('//sErver/sHare'), walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo'), walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar'), walk_up=True), P()) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar', walk_up=True), P()) + self.assertEqual(p.relative_to(P('//sErver/sHare/bar'), walk_up=True), P('../Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/bar', walk_up=True), P('../Foo/Bar')) + # Unrelated paths. + self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo')) + self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo')) + self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo')) + self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo')) + self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo'), walk_up=True) + + def test_is_relative_to_common(self): + P = self.cls + p = P('a/b') + self.assertRaises(TypeError, p.is_relative_to) + self.assertRaises(TypeError, p.is_relative_to, b'a') + self.assertTrue(p.is_relative_to(P(''))) + self.assertTrue(p.is_relative_to('')) + self.assertTrue(p.is_relative_to(P('a'))) + self.assertTrue(p.is_relative_to('a/')) + self.assertTrue(p.is_relative_to(P('a/b'))) + self.assertTrue(p.is_relative_to('a/b')) + # Unrelated paths. + self.assertFalse(p.is_relative_to(P('c'))) + self.assertFalse(p.is_relative_to(P('a/b/c'))) + self.assertFalse(p.is_relative_to(P('a/c'))) + self.assertFalse(p.is_relative_to(P('/a'))) + p = P('/a/b') + self.assertTrue(p.is_relative_to(P('/'))) + self.assertTrue(p.is_relative_to('/')) + self.assertTrue(p.is_relative_to(P('/a'))) + self.assertTrue(p.is_relative_to('/a')) + self.assertTrue(p.is_relative_to('/a/')) + self.assertTrue(p.is_relative_to(P('/a/b'))) + self.assertTrue(p.is_relative_to('/a/b')) + # Unrelated paths. + self.assertFalse(p.is_relative_to(P('/c'))) + self.assertFalse(p.is_relative_to(P('/a/b/c'))) + self.assertFalse(p.is_relative_to(P('/a/c'))) + self.assertFalse(p.is_relative_to(P(''))) + self.assertFalse(p.is_relative_to('')) + self.assertFalse(p.is_relative_to(P('a'))) + + @needs_windows + def test_is_relative_to_windows(self): + P = self.cls + p = P('C:Foo/Bar') + self.assertTrue(p.is_relative_to(P('c:'))) + self.assertTrue(p.is_relative_to('c:')) + self.assertTrue(p.is_relative_to(P('c:foO'))) + self.assertTrue(p.is_relative_to('c:foO')) + self.assertTrue(p.is_relative_to('c:foO/')) + self.assertTrue(p.is_relative_to(P('c:foO/baR'))) + self.assertTrue(p.is_relative_to('c:foO/baR')) + # Unrelated paths. + self.assertFalse(p.is_relative_to(P())) + self.assertFalse(p.is_relative_to('')) + self.assertFalse(p.is_relative_to(P('d:'))) + self.assertFalse(p.is_relative_to(P('/'))) + self.assertFalse(p.is_relative_to(P('Foo'))) + self.assertFalse(p.is_relative_to(P('/Foo'))) + self.assertFalse(p.is_relative_to(P('C:/Foo'))) + self.assertFalse(p.is_relative_to(P('C:Foo/Bar/Baz'))) + self.assertFalse(p.is_relative_to(P('C:Foo/Baz'))) + p = P('C:/Foo/Bar') + self.assertTrue(p.is_relative_to(P('c:/'))) + self.assertTrue(p.is_relative_to(P('c:/foO'))) + self.assertTrue(p.is_relative_to('c:/foO/')) + self.assertTrue(p.is_relative_to(P('c:/foO/baR'))) + self.assertTrue(p.is_relative_to('c:/foO/baR')) + # Unrelated paths. + self.assertFalse(p.is_relative_to('c:')) + self.assertFalse(p.is_relative_to(P('C:/Baz'))) + self.assertFalse(p.is_relative_to(P('C:/Foo/Bar/Baz'))) + self.assertFalse(p.is_relative_to(P('C:/Foo/Baz'))) + self.assertFalse(p.is_relative_to(P('C:Foo'))) + self.assertFalse(p.is_relative_to(P('d:'))) + self.assertFalse(p.is_relative_to(P('d:/'))) + self.assertFalse(p.is_relative_to(P('/'))) + self.assertFalse(p.is_relative_to(P('/Foo'))) + self.assertFalse(p.is_relative_to(P('//C/Foo'))) + # UNC paths. + p = P('//Server/Share/Foo/Bar') + self.assertTrue(p.is_relative_to(P('//sErver/sHare'))) + self.assertTrue(p.is_relative_to('//sErver/sHare')) + self.assertTrue(p.is_relative_to('//sErver/sHare/')) + self.assertTrue(p.is_relative_to(P('//sErver/sHare/Foo'))) + self.assertTrue(p.is_relative_to('//sErver/sHare/Foo')) + self.assertTrue(p.is_relative_to('//sErver/sHare/Foo/')) + self.assertTrue(p.is_relative_to(P('//sErver/sHare/Foo/Bar'))) + self.assertTrue(p.is_relative_to('//sErver/sHare/Foo/Bar')) + # Unrelated paths. + self.assertFalse(p.is_relative_to(P('/Server/Share/Foo'))) + self.assertFalse(p.is_relative_to(P('c:/Server/Share/Foo'))) + self.assertFalse(p.is_relative_to(P('//z/Share/Foo'))) + self.assertFalse(p.is_relative_to(P('//Server/z/Foo'))) + class PurePosixPathTest(PurePathTest): cls = pathlib.PurePosixPath @@ -531,37 +1133,79 @@ class cls(pathlib.PurePath): # Tests for the concrete classes. # -class PathTest(test_pathlib_abc.DummyPathTest, PurePathTest): +class PathTest(PurePathTest): """Tests for the FS-accessing functionalities of the Path classes.""" cls = pathlib.Path can_symlink = os_helper.can_symlink() def setUp(self): + name = self.id().split('.')[-1] + if name in _tests_needing_symlinks and not self.can_symlink: + self.skipTest('requires symlinks') super().setUp() - os.chmod(self.parser.join(self.base, 'dirE'), 0) + os.mkdir(self.base) + os.mkdir(os.path.join(self.base, 'dirA')) + os.mkdir(os.path.join(self.base, 'dirB')) + os.mkdir(os.path.join(self.base, 'dirC')) + os.mkdir(os.path.join(self.base, 'dirC', 'dirD')) + os.mkdir(os.path.join(self.base, 'dirE')) + with open(os.path.join(self.base, 'fileA'), 'wb') as f: + f.write(b"this is file A\n") + with open(os.path.join(self.base, 'dirB', 'fileB'), 'wb') as f: + f.write(b"this is file B\n") + with open(os.path.join(self.base, 'dirC', 'fileC'), 'wb') as f: + f.write(b"this is file C\n") + with open(os.path.join(self.base, 'dirC', 'novel.txt'), 'wb') as f: + f.write(b"this is a novel\n") + with open(os.path.join(self.base, 'dirC', 'dirD', 'fileD'), 'wb') as f: + f.write(b"this is file D\n") + os.chmod(os.path.join(self.base, 'dirE'), 0) + if self.can_symlink: + # Relative symlinks. + os.symlink('fileA', os.path.join(self.base, 'linkA')) + os.symlink('non-existing', os.path.join(self.base, 'brokenLink')) + os.symlink('dirB', + os.path.join(self.base, 'linkB'), + target_is_directory=True) + os.symlink(os.path.join('..', 'dirB'), + os.path.join(self.base, 'dirA', 'linkC'), + target_is_directory=True) + # This one goes upwards, creating a loop. + os.symlink(os.path.join('..', 'dirB'), + os.path.join(self.base, 'dirB', 'linkD'), + target_is_directory=True) + # Broken symlink (pointing to itself). + os.symlink('brokenLinkLoop', os.path.join(self.base, 'brokenLinkLoop')) def tearDown(self): - os.chmod(self.parser.join(self.base, 'dirE'), 0o777) + os.chmod(os.path.join(self.base, 'dirE'), 0o777) os_helper.rmtree(self.base) + def assertFileNotFound(self, func, *args, **kwargs): + with self.assertRaises(FileNotFoundError) as cm: + func(*args, **kwargs) + self.assertEqual(cm.exception.errno, errno.ENOENT) + + def assertEqualNormCase(self, path_a, path_b): + normcase = self.parser.normcase + self.assertEqual(normcase(path_a), normcase(path_b)) + def tempdir(self): d = os_helper._longpath(tempfile.mkdtemp(suffix='-dirD', dir=os.getcwd())) self.addCleanup(os_helper.rmtree, d) return d - def test_matches_pathbase_api(self): - our_names = {name for name in dir(self.cls) if name[0] != '_'} - our_names.remove('is_reserved') # only present in PurePath - path_names = {name for name in dir(pathlib._abc.PathBase) if name[0] != '_'} - self.assertEqual(our_names, path_names) - for attr_name in our_names: + + def test_matches_writablepath_docstrings(self): + path_names = {name for name in dir(pathlib.types._WritablePath) if name[0] != '_'} + for attr_name in path_names: if attr_name == 'parser': - # On Windows, Path.parser is ntpath, but PathBase.parser is + # On Windows, Path.parser is ntpath, but WritablePath.parser is # posixpath, and so their docstrings differ. continue our_attr = getattr(self.cls, attr_name) - path_attr = getattr(pathlib._abc.PathBase, attr_name) + path_attr = getattr(pathlib.types._WritablePath, attr_name) self.assertEqual(our_attr.__doc__, path_attr.__doc__) def test_concrete_class(self): @@ -675,12 +1319,648 @@ def with_segments(self, *pathsegments): for dirpath, dirnames, filenames in p.walk(): self.assertEqual(42, dirpath.session_id) + def test_open_common(self): + p = self.cls(self.base) + with (p / 'fileA').open('r') as f: + self.assertIsInstance(f, io.TextIOBase) + self.assertEqual(f.read(), "this is file A\n") + with (p / 'fileA').open('rb') as f: + self.assertIsInstance(f, io.BufferedIOBase) + self.assertEqual(f.read().strip(), b"this is file A") + def test_open_unbuffered(self): p = self.cls(self.base) with (p / 'fileA').open('rb', buffering=0) as f: self.assertIsInstance(f, io.RawIOBase) self.assertEqual(f.read().strip(), b"this is file A") + def test_copy_file_preserve_metadata(self): + base = self.cls(self.base) + source = base / 'fileA' + if hasattr(os, 'chmod'): + os.chmod(source, stat.S_IRWXU | stat.S_IRWXO) + if hasattr(os, 'chflags') and hasattr(stat, 'UF_NODUMP'): + os.chflags(source, stat.UF_NODUMP) + source_st = source.stat() + target = base / 'copyA' + source.copy(target, preserve_metadata=True) + self.assertTrue(target.exists()) + self.assertEqual(source.read_text(), target.read_text()) + target_st = target.stat() + self.assertLessEqual(source_st.st_atime, target_st.st_atime) + self.assertLessEqual(source_st.st_mtime, target_st.st_mtime) + self.assertEqual(source_st.st_mode, target_st.st_mode) + if hasattr(source_st, 'st_flags'): + self.assertEqual(source_st.st_flags, target_st.st_flags) + + @needs_symlinks + def test_copy_file_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'dirB' / 'fileB' + target = base / 'linkA' + real_target = base / 'fileA' + result = source.copy(target) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertTrue(real_target.exists()) + self.assertFalse(real_target.is_symlink()) + self.assertEqual(source.read_text(), real_target.read_text()) + + @needs_symlinks + def test_copy_file_to_existing_symlink_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'dirB' / 'fileB' + target = base / 'linkA' + real_target = base / 'fileA' + result = source.copy(target, follow_symlinks=False) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertTrue(real_target.exists()) + self.assertFalse(real_target.is_symlink()) + self.assertEqual(source.read_text(), real_target.read_text()) + + @os_helper.skip_unless_xattr + def test_copy_file_preserve_metadata_xattrs(self): + base = self.cls(self.base) + source = base / 'fileA' + os.setxattr(source, b'user.foo', b'42') + target = base / 'copyA' + source.copy(target, preserve_metadata=True) + self.assertEqual(os.getxattr(target, b'user.foo'), b'42') + + @needs_symlinks + def test_copy_symlink_follow_symlinks_true(self): + base = self.cls(self.base) + source = base / 'linkA' + target = base / 'copyA' + result = source.copy(target) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertFalse(target.is_symlink()) + self.assertEqual(source.read_text(), target.read_text()) + + @needs_symlinks + def test_copy_symlink_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'linkA' + target = base / 'copyA' + result = source.copy(target, follow_symlinks=False) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source.readlink(), target.readlink()) + + @needs_symlinks + def test_copy_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkA' + self.assertRaises(OSError, source.copy, source) + + @needs_symlinks + def test_copy_symlink_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'fileA') + target.symlink_to(base / 'dirC') + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_symlink_to_existing_directory_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'fileA') + target.symlink_to(base / 'dirC') + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_directory_symlink_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'linkB' + target = base / 'copyA' + result = source.copy(target, follow_symlinks=False) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source.readlink(), target.readlink()) + + @needs_symlinks + def test_copy_directory_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + self.assertRaises(OSError, source.copy, source) + self.assertRaises(OSError, source.copy, source, follow_symlinks=False) + + @needs_symlinks + def test_copy_directory_symlink_into_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + target = base / 'linkB' / 'copyB' + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) + self.assertFalse(target.exists()) + + @needs_symlinks + def test_copy_directory_symlink_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'dirC') + target.symlink_to(base / 'fileA') + self.assertRaises(FileExistsError, source.copy, target) + self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_directory_symlink_to_existing_directory_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'dirC' / 'dirD') + target.symlink_to(base / 'dirC') + self.assertRaises(FileExistsError, source.copy, target) + self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_dangling_symlink(self): + base = self.cls(self.base) + source = base / 'source' + target = base / 'target' + + source.mkdir() + source.joinpath('link').symlink_to('nonexistent') + + self.assertRaises(FileNotFoundError, source.copy, target) + + target2 = base / 'target2' + result = source.copy(target2, follow_symlinks=False) + self.assertEqual(result, target2) + self.assertTrue(target2.joinpath('link').is_symlink()) + self.assertEqual(target2.joinpath('link').readlink(), self.cls('nonexistent')) + + @needs_symlinks + def test_copy_link_preserve_metadata(self): + base = self.cls(self.base) + source = base / 'linkA' + if hasattr(os, 'lchmod'): + os.lchmod(source, stat.S_IRWXU | stat.S_IRWXO) + if hasattr(os, 'lchflags') and hasattr(stat, 'UF_NODUMP'): + os.lchflags(source, stat.UF_NODUMP) + source_st = source.lstat() + target = base / 'copyA' + source.copy(target, follow_symlinks=False, preserve_metadata=True) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source.readlink(), target.readlink()) + target_st = target.lstat() + self.assertLessEqual(source_st.st_atime, target_st.st_atime) + self.assertLessEqual(source_st.st_mtime, target_st.st_mtime) + self.assertEqual(source_st.st_mode, target_st.st_mode) + if hasattr(source_st, 'st_flags'): + self.assertEqual(source_st.st_flags, target_st.st_flags) + + def test_copy_error_handling(self): + def make_raiser(err): + def raiser(*args, **kwargs): + raise OSError(err, os.strerror(err)) + return raiser + + base = self.cls(self.base) + source = base / 'fileA' + target = base / 'copyA' + + # Raise non-fatal OSError from all available fast copy functions. + with contextlib.ExitStack() as ctx: + if fcntl and hasattr(fcntl, 'FICLONE'): + ctx.enter_context(mock.patch('fcntl.ioctl', make_raiser(errno.EXDEV))) + if posix and hasattr(posix, '_fcopyfile'): + ctx.enter_context(mock.patch('posix._fcopyfile', make_raiser(errno.ENOTSUP))) + if hasattr(os, 'copy_file_range'): + ctx.enter_context(mock.patch('os.copy_file_range', make_raiser(errno.EXDEV))) + if hasattr(os, 'sendfile'): + ctx.enter_context(mock.patch('os.sendfile', make_raiser(errno.ENOTSOCK))) + + source.copy(target) + self.assertTrue(target.exists()) + self.assertEqual(source.read_text(), target.read_text()) + + # Raise fatal OSError from first available fast copy function. + if fcntl and hasattr(fcntl, 'FICLONE'): + patchpoint = 'fcntl.ioctl' + elif posix and hasattr(posix, '_fcopyfile'): + patchpoint = 'posix._fcopyfile' + elif hasattr(os, 'copy_file_range'): + patchpoint = 'os.copy_file_range' + elif hasattr(os, 'sendfile'): + patchpoint = 'os.sendfile' + else: + return + with mock.patch(patchpoint, make_raiser(errno.ENOENT)): + self.assertRaises(FileNotFoundError, source.copy, target) + + @unittest.skipIf(sys.platform == "win32" or sys.platform == "wasi", "directories are always readable on Windows and WASI") + @unittest.skipIf(root_in_posix, "test fails with root privilege") + def test_copy_dir_no_read_permission(self): + base = self.cls(self.base) + source = base / 'dirE' + target = base / 'copyE' + self.assertRaises(PermissionError, source.copy, target) + self.assertFalse(target.exists()) + + def test_copy_dir_preserve_metadata(self): + base = self.cls(self.base) + source = base / 'dirC' + if hasattr(os, 'chmod'): + os.chmod(source / 'dirD', stat.S_IRWXU | stat.S_IRWXO) + if hasattr(os, 'chflags') and hasattr(stat, 'UF_NODUMP'): + os.chflags(source / 'fileC', stat.UF_NODUMP) + target = base / 'copyA' + + subpaths = ['.', 'fileC', 'dirD', 'dirD/fileD'] + source_sts = [source.joinpath(subpath).stat() for subpath in subpaths] + source.copy(target, preserve_metadata=True) + target_sts = [target.joinpath(subpath).stat() for subpath in subpaths] + + for source_st, target_st in zip(source_sts, target_sts): + self.assertLessEqual(source_st.st_atime, target_st.st_atime) + self.assertLessEqual(source_st.st_mtime, target_st.st_mtime) + self.assertEqual(source_st.st_mode, target_st.st_mode) + if hasattr(source_st, 'st_flags'): + self.assertEqual(source_st.st_flags, target_st.st_flags) + + @os_helper.skip_unless_xattr + def test_copy_dir_preserve_metadata_xattrs(self): + base = self.cls(self.base) + source = base / 'dirC' + source_file = source.joinpath('dirD', 'fileD') + os.setxattr(source_file, b'user.foo', b'42') + target = base / 'copyA' + source.copy(target, preserve_metadata=True) + target_file = target.joinpath('dirD', 'fileD') + self.assertEqual(os.getxattr(target_file, b'user.foo'), b'42') + + @needs_symlinks + def test_move_file_symlink(self): + base = self.cls(self.base) + source = base / 'linkA' + source_readlink = source.readlink() + target = base / 'linkA_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source_readlink, target.readlink()) + + @needs_symlinks + def test_move_file_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkA' + self.assertRaises(OSError, source.move, source) + + @needs_symlinks + def test_move_dir_symlink(self): + base = self.cls(self.base) + source = base / 'linkB' + source_readlink = source.readlink() + target = base / 'linkB_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source_readlink, target.readlink()) + + @needs_symlinks + def test_move_dir_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + self.assertRaises(OSError, source.move, source) + + @needs_symlinks + def test_move_dangling_symlink(self): + base = self.cls(self.base) + source = base / 'brokenLink' + source_readlink = source.readlink() + target = base / 'brokenLink_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source_readlink, target.readlink()) + + def test_move_file(self): + base = self.cls(self.base) + source = base / 'fileA' + source_text = source.read_text() + target = base / 'fileA_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.exists()) + self.assertEqual(source_text, target.read_text()) + + @patch_replace + def test_move_file_other_fs(self): + self.test_move_file() + + def test_move_file_to_file(self): + base = self.cls(self.base) + source = base / 'fileA' + source_text = source.read_text() + target = base / 'dirB' / 'fileB' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.exists()) + self.assertEqual(source_text, target.read_text()) + + @patch_replace + def test_move_file_to_file_other_fs(self): + self.test_move_file_to_file() + + def test_move_file_to_dir(self): + base = self.cls(self.base) + source = base / 'fileA' + target = base / 'dirB' + self.assertRaises(OSError, source.move, target) + + @patch_replace + def test_move_file_to_dir_other_fs(self): + self.test_move_file_to_dir() + + def test_move_file_to_itself(self): + base = self.cls(self.base) + source = base / 'fileA' + self.assertRaises(OSError, source.move, source) + + def test_move_dir(self): + base = self.cls(self.base) + source = base / 'dirC' + target = base / 'dirC_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.is_dir()) + self.assertTrue(target.joinpath('dirD').is_dir()) + self.assertTrue(target.joinpath('dirD', 'fileD').is_file()) + self.assertEqual(target.joinpath('dirD', 'fileD').read_text(), + "this is file D\n") + self.assertTrue(target.joinpath('fileC').is_file()) + self.assertTrue(target.joinpath('fileC').read_text(), + "this is file C\n") + + @patch_replace + def test_move_dir_other_fs(self): + self.test_move_dir() + + def test_move_dir_to_dir(self): + base = self.cls(self.base) + source = base / 'dirC' + target = base / 'dirB' + self.assertRaises(OSError, source.move, target) + self.assertTrue(source.exists()) + self.assertTrue(target.exists()) + + @patch_replace + def test_move_dir_to_dir_other_fs(self): + self.test_move_dir_to_dir() + + def test_move_dir_to_itself(self): + base = self.cls(self.base) + source = base / 'dirC' + self.assertRaises(OSError, source.move, source) + self.assertTrue(source.exists()) + + def test_move_dir_into_itself(self): + base = self.cls(self.base) + source = base / 'dirC' + target = base / 'dirC' / 'bar' + self.assertRaises(OSError, source.move, target) + self.assertTrue(source.exists()) + self.assertFalse(target.exists()) + + @patch_replace + def test_move_dir_into_itself_other_fs(self): + self.test_move_dir_into_itself() + + @patch_replace + @needs_symlinks + def test_move_file_symlink_other_fs(self): + self.test_move_file_symlink() + + @patch_replace + @needs_symlinks + def test_move_file_symlink_to_itself_other_fs(self): + self.test_move_file_symlink_to_itself() + + @patch_replace + @needs_symlinks + def test_move_dir_symlink_other_fs(self): + self.test_move_dir_symlink() + + @patch_replace + @needs_symlinks + def test_move_dir_symlink_to_itself_other_fs(self): + self.test_move_dir_symlink_to_itself() + + @patch_replace + @needs_symlinks + def test_move_dangling_symlink_other_fs(self): + self.test_move_dangling_symlink() + + def test_move_into(self): + base = self.cls(self.base) + source = base / 'fileA' + source_text = source.read_text() + target_dir = base / 'dirA' + result = source.move_into(target_dir) + self.assertEqual(result, target_dir / 'fileA') + self.assertFalse(source.exists()) + self.assertTrue(result.exists()) + self.assertEqual(source_text, result.read_text()) + + @patch_replace + def test_move_into_other_os(self): + self.test_move_into() + + def test_move_into_empty_name(self): + source = self.cls('') + target_dir = self.base + self.assertRaises(ValueError, source.move_into, target_dir) + + @patch_replace + def test_move_into_empty_name_other_os(self): + self.test_move_into_empty_name() + + @needs_symlinks + def test_complex_symlinks_absolute(self): + self._check_complex_symlinks(self.base) + + @needs_symlinks + def test_complex_symlinks_relative(self): + self._check_complex_symlinks('.') + + @needs_symlinks + def test_complex_symlinks_relative_dot_dot(self): + self._check_complex_symlinks(self.parser.join('dirA', '..')) + + def _check_complex_symlinks(self, link0_target): + # Test solving a non-looping chain of symlinks (issue #19887). + parser = self.parser + P = self.cls(self.base) + P.joinpath('link1').symlink_to(parser.join('link0', 'link0'), target_is_directory=True) + P.joinpath('link2').symlink_to(parser.join('link1', 'link1'), target_is_directory=True) + P.joinpath('link3').symlink_to(parser.join('link2', 'link2'), target_is_directory=True) + P.joinpath('link0').symlink_to(link0_target, target_is_directory=True) + + # Resolve absolute paths. + p = (P / 'link0').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = (P / 'link1').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = (P / 'link2').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = (P / 'link3').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + + # Resolve relative paths. + old_path = os.getcwd() + os.chdir(self.base) + try: + p = self.cls('link0').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = self.cls('link1').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = self.cls('link2').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = self.cls('link3').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + finally: + os.chdir(old_path) + + def _check_resolve(self, p, expected, strict=True): + q = p.resolve(strict) + self.assertEqual(q, expected) + + # This can be used to check both relative and absolute resolutions. + _check_resolve_relative = _check_resolve_absolute = _check_resolve + + @needs_symlinks + def test_resolve_common(self): + P = self.cls + p = P(self.base, 'foo') + with self.assertRaises(OSError) as cm: + p.resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ENOENT) + # Non-strict + parser = self.parser + self.assertEqualNormCase(str(p.resolve(strict=False)), + parser.join(self.base, 'foo')) + p = P(self.base, 'foo', 'in', 'spam') + self.assertEqualNormCase(str(p.resolve(strict=False)), + parser.join(self.base, 'foo', 'in', 'spam')) + p = P(self.base, '..', 'foo', 'in', 'spam') + self.assertEqualNormCase(str(p.resolve(strict=False)), + parser.join(parser.dirname(self.base), 'foo', 'in', 'spam')) + # These are all relative symlinks. + p = P(self.base, 'dirB', 'fileB') + self._check_resolve_relative(p, p) + p = P(self.base, 'linkA') + self._check_resolve_relative(p, P(self.base, 'fileA')) + p = P(self.base, 'dirA', 'linkC', 'fileB') + self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) + p = P(self.base, 'dirB', 'linkD', 'fileB') + self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) + # Non-strict + p = P(self.base, 'dirA', 'linkC', 'fileB', 'foo', 'in', 'spam') + self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB', 'foo', 'in', + 'spam'), False) + p = P(self.base, 'dirA', 'linkC', '..', 'foo', 'in', 'spam') + if self.cls.parser is not posixpath: + # In Windows, if linkY points to dirB, 'dirA\linkY\..' + # resolves to 'dirA' without resolving linkY first. + self._check_resolve_relative(p, P(self.base, 'dirA', 'foo', 'in', + 'spam'), False) + else: + # In Posix, if linkY points to dirB, 'dirA/linkY/..' + # resolves to 'dirB/..' first before resolving to parent of dirB. + self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) + # Now create absolute symlinks. + d = self.tempdir() + P(self.base, 'dirA', 'linkX').symlink_to(d) + P(self.base, str(d), 'linkY').symlink_to(self.parser.join(self.base, 'dirB')) + p = P(self.base, 'dirA', 'linkX', 'linkY', 'fileB') + self._check_resolve_absolute(p, P(self.base, 'dirB', 'fileB')) + # Non-strict + p = P(self.base, 'dirA', 'linkX', 'linkY', 'foo', 'in', 'spam') + self._check_resolve_relative(p, P(self.base, 'dirB', 'foo', 'in', 'spam'), + False) + p = P(self.base, 'dirA', 'linkX', 'linkY', '..', 'foo', 'in', 'spam') + if self.cls.parser is not posixpath: + # In Windows, if linkY points to dirB, 'dirA\linkY\..' + # resolves to 'dirA' without resolving linkY first. + self._check_resolve_relative(p, P(d, 'foo', 'in', 'spam'), False) + else: + # In Posix, if linkY points to dirB, 'dirA/linkY/..' + # resolves to 'dirB/..' first before resolving to parent of dirB. + self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) + + @needs_symlinks + def test_resolve_dot(self): + # See http://web.archive.org/web/20200623062557/https://bitbucket.org/pitrou/pathlib/issues/9/ + parser = self.parser + p = self.cls(self.base) + p.joinpath('0').symlink_to('.', target_is_directory=True) + p.joinpath('1').symlink_to(parser.join('0', '0'), target_is_directory=True) + p.joinpath('2').symlink_to(parser.join('1', '1'), target_is_directory=True) + q = p / '2' + self.assertEqual(q.resolve(strict=True), p) + r = q / '3' / '4' + self.assertRaises(FileNotFoundError, r.resolve, strict=True) + # Non-strict + self.assertEqual(r.resolve(strict=False), p / '3' / '4') + + def _check_symlink_loop(self, *args): + path = self.cls(*args) + with self.assertRaises(OSError) as cm: + path.resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ELOOP) + + @needs_posix + @needs_symlinks + def test_resolve_loop(self): + # Loops with relative symlinks. + self.cls(self.base, 'linkX').symlink_to('linkX/inside') + self._check_symlink_loop(self.base, 'linkX') + self.cls(self.base, 'linkY').symlink_to('linkY') + self._check_symlink_loop(self.base, 'linkY') + self.cls(self.base, 'linkZ').symlink_to('linkZ/../linkZ') + self._check_symlink_loop(self.base, 'linkZ') + # Non-strict + p = self.cls(self.base, 'linkZ', 'foo') + self.assertEqual(p.resolve(strict=False), p) + # Loops with absolute symlinks. + self.cls(self.base, 'linkU').symlink_to(self.parser.join(self.base, 'linkU/inside')) + self._check_symlink_loop(self.base, 'linkU') + self.cls(self.base, 'linkV').symlink_to(self.parser.join(self.base, 'linkV')) + self._check_symlink_loop(self.base, 'linkV') + self.cls(self.base, 'linkW').symlink_to(self.parser.join(self.base, 'linkW/../linkW')) + self._check_symlink_loop(self.base, 'linkW') + # Non-strict + q = self.cls(self.base, 'linkW', 'foo') + self.assertEqual(q.resolve(strict=False), q) + def test_resolve_nonexist_relative_issue38671(self): p = self.cls('non', 'exist') @@ -691,6 +1971,24 @@ def test_resolve_nonexist_relative_issue38671(self): finally: os.chdir(old_cwd) + @needs_symlinks + def test_readlink(self): + P = self.cls(self.base) + self.assertEqual((P / 'linkA').readlink(), self.cls('fileA')) + self.assertEqual((P / 'brokenLink').readlink(), + self.cls('non-existing')) + self.assertEqual((P / 'linkB').readlink(), self.cls('dirB')) + self.assertEqual((P / 'linkB' / 'linkD').readlink(), self.cls('../dirB')) + with self.assertRaises(OSError): + (P / 'fileA').readlink() + + @unittest.skipIf(hasattr(os, "readlink"), "os.readlink() is present") + def test_readlink_unsupported(self): + P = self.cls(self.base) + p = P / 'fileA' + with self.assertRaises(pathlib.UnsupportedOperation): + q.readlink(p) + @os_helper.skip_unless_working_chmod def test_chmod(self): p = self.cls(self.base) / 'fileA' @@ -811,6 +2109,174 @@ def test_rmdir(self): self.assertFileNotFound(p.stat) self.assertFileNotFound(p.unlink) + def test_delete_file(self): + p = self.cls(self.base) / 'fileA' + p._delete() + self.assertFalse(p.exists()) + self.assertFileNotFound(p._delete) + + def test_delete_dir(self): + base = self.cls(self.base) + base.joinpath('dirA')._delete() + self.assertFalse(base.joinpath('dirA').exists()) + self.assertFalse(base.joinpath('dirA', 'linkC').exists( + follow_symlinks=False)) + base.joinpath('dirB')._delete() + self.assertFalse(base.joinpath('dirB').exists()) + self.assertFalse(base.joinpath('dirB', 'fileB').exists()) + self.assertFalse(base.joinpath('dirB', 'linkD').exists( + follow_symlinks=False)) + base.joinpath('dirC')._delete() + self.assertFalse(base.joinpath('dirC').exists()) + self.assertFalse(base.joinpath('dirC', 'dirD').exists()) + self.assertFalse(base.joinpath('dirC', 'dirD', 'fileD').exists()) + self.assertFalse(base.joinpath('dirC', 'fileC').exists()) + self.assertFalse(base.joinpath('dirC', 'novel.txt').exists()) + + def test_delete_missing(self): + tmp = self.cls(self.base, 'delete') + tmp.mkdir() + # filename is guaranteed not to exist + filename = tmp / 'foo' + self.assertRaises(FileNotFoundError, filename._delete) + + @needs_symlinks + def test_delete_symlink(self): + tmp = self.cls(self.base, 'delete') + tmp.mkdir() + dir_ = tmp / 'dir' + dir_.mkdir() + link = tmp / 'link' + link.symlink_to(dir_) + link._delete() + self.assertTrue(dir_.exists()) + self.assertFalse(link.exists(follow_symlinks=False)) + + @needs_symlinks + def test_delete_inner_symlink(self): + tmp = self.cls(self.base, 'delete') + tmp.mkdir() + dir1 = tmp / 'dir1' + dir2 = dir1 / 'dir2' + dir3 = tmp / 'dir3' + for d in dir1, dir2, dir3: + d.mkdir() + file1 = tmp / 'file1' + file1.write_text('foo') + link1 = dir1 / 'link1' + link1.symlink_to(dir2) + link2 = dir1 / 'link2' + link2.symlink_to(dir3) + link3 = dir1 / 'link3' + link3.symlink_to(file1) + # make sure symlinks are removed but not followed + dir1._delete() + self.assertFalse(dir1.exists()) + self.assertTrue(dir3.exists()) + self.assertTrue(file1.exists()) + + @unittest.skipIf(sys.platform[:6] == 'cygwin', + "This test can't be run on Cygwin (issue #1071513).") + @os_helper.skip_if_dac_override + @os_helper.skip_unless_working_chmod + def test_delete_unwritable(self): + tmp = self.cls(self.base, 'delete') + tmp.mkdir() + child_file_path = tmp / 'a' + child_dir_path = tmp / 'b' + child_file_path.write_text("") + child_dir_path.mkdir() + old_dir_mode = tmp.stat().st_mode + old_child_file_mode = child_file_path.stat().st_mode + old_child_dir_mode = child_dir_path.stat().st_mode + # Make unwritable. + new_mode = stat.S_IREAD | stat.S_IEXEC + try: + child_file_path.chmod(new_mode) + child_dir_path.chmod(new_mode) + tmp.chmod(new_mode) + + self.assertRaises(PermissionError, tmp._delete) + finally: + tmp.chmod(old_dir_mode) + child_file_path.chmod(old_child_file_mode) + child_dir_path.chmod(old_child_dir_mode) + + @needs_windows + def test_delete_inner_junction(self): + import _winapi + tmp = self.cls(self.base, 'delete') + tmp.mkdir() + dir1 = tmp / 'dir1' + dir2 = dir1 / 'dir2' + dir3 = tmp / 'dir3' + for d in dir1, dir2, dir3: + d.mkdir() + file1 = tmp / 'file1' + file1.write_text('foo') + link1 = dir1 / 'link1' + _winapi.CreateJunction(str(dir2), str(link1)) + link2 = dir1 / 'link2' + _winapi.CreateJunction(str(dir3), str(link2)) + link3 = dir1 / 'link3' + _winapi.CreateJunction(str(file1), str(link3)) + # make sure junctions are removed but not followed + dir1._delete() + self.assertFalse(dir1.exists()) + self.assertTrue(dir3.exists()) + self.assertTrue(file1.exists()) + + @needs_windows + def test_delete_outer_junction(self): + import _winapi + tmp = self.cls(self.base, 'delete') + tmp.mkdir() + src = tmp / 'cheese' + dst = tmp / 'shop' + src.mkdir() + spam = src / 'spam' + spam.write_text('') + _winapi.CreateJunction(str(src), str(dst)) + dst._delete() + self.assertFalse(dst.exists()) + self.assertTrue(spam.exists()) + self.assertTrue(src.exists()) + + @unittest.skipUnless(hasattr(os, "mkfifo"), 'requires os.mkfifo()') + @unittest.skipIf(sys.platform == "vxworks", + "fifo requires special path on VxWorks") + def test_delete_on_named_pipe(self): + p = self.cls(self.base, 'pipe') + os.mkfifo(p) + p._delete() + self.assertFalse(p.exists()) + + p = self.cls(self.base, 'dir') + p.mkdir() + os.mkfifo(p / 'mypipe') + p._delete() + self.assertFalse(p.exists()) + + def test_delete_does_not_choke_on_failing_lstat(self): + try: + orig_lstat = os.lstat + tmp = self.cls(self.base, 'delete') + + def raiser(fn, *args, **kwargs): + if fn != tmp: + raise OSError() + else: + return orig_lstat(fn) + + os.lstat = raiser + + tmp.mkdir() + foo = tmp / 'foo' + foo.write_text('') + tmp._delete() + finally: + os.lstat = orig_lstat + @os_helper.skip_unless_hardlink def test_hardlink_to(self): P = self.cls(self.base) @@ -968,7 +2434,6 @@ def test_mkdir_exist_ok_with_parent(self): self.assertTrue(p.exists()) self.assertEqual(p.stat().st_ctime, st_ctime_first) - @unittest.skipIf(is_emscripten, "FS root cannot be modified on Emscripten.") def test_mkdir_exist_ok_root(self): # Issue #25803: A drive root could raise PermissionError on Windows. self.cls('/').resolve().mkdir(exist_ok=True) @@ -1072,13 +2537,200 @@ def test_symlink_to_unsupported(self): with self.assertRaises(pathlib.UnsupportedOperation): q.symlink_to(p) - def test_is_junction(self): + def test_info_exists_caching(self): + p = self.cls(self.base) + q = p / 'myfile' + self.assertFalse(q.info.exists()) + self.assertFalse(q.info.exists(follow_symlinks=False)) + q.write_text('hullo') + self.assertFalse(q.info.exists()) + self.assertFalse(q.info.exists(follow_symlinks=False)) + + def test_info_is_dir_caching(self): + p = self.cls(self.base) + q = p / 'mydir' + self.assertFalse(q.info.is_dir()) + self.assertFalse(q.info.is_dir(follow_symlinks=False)) + q.mkdir() + self.assertFalse(q.info.is_dir()) + self.assertFalse(q.info.is_dir(follow_symlinks=False)) + + def test_info_is_file_caching(self): + p = self.cls(self.base) + q = p / 'myfile' + self.assertFalse(q.info.is_file()) + self.assertFalse(q.info.is_file(follow_symlinks=False)) + q.write_text('hullo') + self.assertFalse(q.info.is_file()) + self.assertFalse(q.info.is_file(follow_symlinks=False)) + + @needs_symlinks + def test_info_is_symlink_caching(self): + p = self.cls(self.base) + q = p / 'mylink' + self.assertFalse(q.info.is_symlink()) + q.symlink_to('blah') + self.assertFalse(q.info.is_symlink()) + + q = p / 'mylink' # same path, new instance. + self.assertTrue(q.info.is_symlink()) + q.unlink() + self.assertTrue(q.info.is_symlink()) + + def test_stat(self): + statA = self.cls(self.base).joinpath('fileA').stat() + statB = self.cls(self.base).joinpath('dirB', 'fileB').stat() + statC = self.cls(self.base).joinpath('dirC').stat() + # st_mode: files are the same, directory differs. + self.assertIsInstance(statA.st_mode, int) + self.assertEqual(statA.st_mode, statB.st_mode) + self.assertNotEqual(statA.st_mode, statC.st_mode) + self.assertNotEqual(statB.st_mode, statC.st_mode) + # st_ino: all different, + self.assertIsInstance(statA.st_ino, int) + self.assertNotEqual(statA.st_ino, statB.st_ino) + self.assertNotEqual(statA.st_ino, statC.st_ino) + self.assertNotEqual(statB.st_ino, statC.st_ino) + # st_dev: all the same. + self.assertIsInstance(statA.st_dev, int) + self.assertEqual(statA.st_dev, statB.st_dev) + self.assertEqual(statA.st_dev, statC.st_dev) + # other attributes not used by pathlib. + + def test_stat_no_follow_symlinks_nosymlink(self): + p = self.cls(self.base) / 'fileA' + st = p.stat() + self.assertEqual(st, p.stat(follow_symlinks=False)) + + @needs_symlinks + def test_stat_no_follow_symlinks(self): + p = self.cls(self.base) / 'linkA' + st = p.stat() + self.assertNotEqual(st, p.stat(follow_symlinks=False)) + + @needs_symlinks + def test_lstat(self): + p = self.cls(self.base)/ 'linkA' + st = p.stat() + self.assertNotEqual(st, p.lstat()) + + def test_lstat_nosymlink(self): + p = self.cls(self.base) / 'fileA' + st = p.stat() + self.assertEqual(st, p.lstat()) + + def test_exists(self): + P = self.cls + p = P(self.base) + self.assertIs(True, p.exists()) + self.assertIs(True, (p / 'dirA').exists()) + self.assertIs(True, (p / 'fileA').exists()) + self.assertIs(False, (p / 'fileA' / 'bah').exists()) + if self.can_symlink: + self.assertIs(True, (p / 'linkA').exists()) + self.assertIs(True, (p / 'linkB').exists()) + self.assertIs(True, (p / 'linkB' / 'fileB').exists()) + self.assertIs(False, (p / 'linkA' / 'bah').exists()) + self.assertIs(False, (p / 'brokenLink').exists()) + self.assertIs(True, (p / 'brokenLink').exists(follow_symlinks=False)) + self.assertIs(False, (p / 'foo').exists()) + self.assertIs(False, P('/xyzzy').exists()) + self.assertIs(False, P(self.base + '\udfff').exists()) + self.assertIs(False, P(self.base + '\x00').exists()) + + def test_is_dir(self): + P = self.cls(self.base) + self.assertTrue((P / 'dirA').is_dir()) + self.assertFalse((P / 'fileA').is_dir()) + self.assertFalse((P / 'non-existing').is_dir()) + self.assertFalse((P / 'fileA' / 'bah').is_dir()) + if self.can_symlink: + self.assertFalse((P / 'linkA').is_dir()) + self.assertTrue((P / 'linkB').is_dir()) + self.assertFalse((P/ 'brokenLink').is_dir()) + self.assertFalse((P / 'dirA\udfff').is_dir()) + self.assertFalse((P / 'dirA\x00').is_dir()) + + def test_is_dir_no_follow_symlinks(self): + P = self.cls(self.base) + self.assertTrue((P / 'dirA').is_dir(follow_symlinks=False)) + self.assertFalse((P / 'fileA').is_dir(follow_symlinks=False)) + self.assertFalse((P / 'non-existing').is_dir(follow_symlinks=False)) + self.assertFalse((P / 'fileA' / 'bah').is_dir(follow_symlinks=False)) + if self.can_symlink: + self.assertFalse((P / 'linkA').is_dir(follow_symlinks=False)) + self.assertFalse((P / 'linkB').is_dir(follow_symlinks=False)) + self.assertFalse((P/ 'brokenLink').is_dir(follow_symlinks=False)) + self.assertFalse((P / 'dirA\udfff').is_dir(follow_symlinks=False)) + self.assertFalse((P / 'dirA\x00').is_dir(follow_symlinks=False)) + + def test_is_file(self): + P = self.cls(self.base) + self.assertTrue((P / 'fileA').is_file()) + self.assertFalse((P / 'dirA').is_file()) + self.assertFalse((P / 'non-existing').is_file()) + self.assertFalse((P / 'fileA' / 'bah').is_file()) + if self.can_symlink: + self.assertTrue((P / 'linkA').is_file()) + self.assertFalse((P / 'linkB').is_file()) + self.assertFalse((P/ 'brokenLink').is_file()) + self.assertFalse((P / 'fileA\udfff').is_file()) + self.assertFalse((P / 'fileA\x00').is_file()) + + def test_is_file_no_follow_symlinks(self): + P = self.cls(self.base) + self.assertTrue((P / 'fileA').is_file(follow_symlinks=False)) + self.assertFalse((P / 'dirA').is_file(follow_symlinks=False)) + self.assertFalse((P / 'non-existing').is_file(follow_symlinks=False)) + self.assertFalse((P / 'fileA' / 'bah').is_file(follow_symlinks=False)) + if self.can_symlink: + self.assertFalse((P / 'linkA').is_file(follow_symlinks=False)) + self.assertFalse((P / 'linkB').is_file(follow_symlinks=False)) + self.assertFalse((P/ 'brokenLink').is_file(follow_symlinks=False)) + self.assertFalse((P / 'fileA\udfff').is_file(follow_symlinks=False)) + self.assertFalse((P / 'fileA\x00').is_file(follow_symlinks=False)) + + def test_is_symlink(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_symlink()) + self.assertFalse((P / 'dirA').is_symlink()) + self.assertFalse((P / 'non-existing').is_symlink()) + self.assertFalse((P / 'fileA' / 'bah').is_symlink()) + if self.can_symlink: + self.assertTrue((P / 'linkA').is_symlink()) + self.assertTrue((P / 'linkB').is_symlink()) + self.assertTrue((P/ 'brokenLink').is_symlink()) + self.assertIs((P / 'fileA\udfff').is_file(), False) + self.assertIs((P / 'fileA\x00').is_file(), False) + if self.can_symlink: + self.assertIs((P / 'linkA\udfff').is_file(), False) + self.assertIs((P / 'linkA\x00').is_file(), False) + + def test_is_junction_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_junction()) + self.assertFalse((P / 'dirA').is_junction()) + self.assertFalse((P / 'non-existing').is_junction()) + self.assertFalse((P / 'fileA' / 'bah').is_junction()) + self.assertFalse((P / 'fileA\udfff').is_junction()) + self.assertFalse((P / 'fileA\x00').is_junction()) + + def test_is_junction_true(self): P = self.cls(self.base) with mock.patch.object(P.parser, 'isjunction'): self.assertEqual(P.is_junction(), P.parser.isjunction.return_value) P.parser.isjunction.assert_called_once_with(P) + def test_is_fifo_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_fifo()) + self.assertFalse((P / 'dirA').is_fifo()) + self.assertFalse((P / 'non-existing').is_fifo()) + self.assertFalse((P / 'fileA' / 'bah').is_fifo()) + self.assertIs((P / 'fileA\udfff').is_fifo(), False) + self.assertIs((P / 'fileA\x00').is_fifo(), False) + @unittest.skipUnless(hasattr(os, "mkfifo"), "os.mkfifo() required") @unittest.skipIf(sys.platform == "vxworks", "fifo requires special path on VxWorks") @@ -1094,6 +2746,15 @@ def test_is_fifo_true(self): self.assertIs(self.cls(self.base, 'myfifo\udfff').is_fifo(), False) self.assertIs(self.cls(self.base, 'myfifo\x00').is_fifo(), False) + def test_is_socket_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_socket()) + self.assertFalse((P / 'dirA').is_socket()) + self.assertFalse((P / 'non-existing').is_socket()) + self.assertFalse((P / 'fileA' / 'bah').is_socket()) + self.assertIs((P / 'fileA\udfff').is_socket(), False) + self.assertIs((P / 'fileA\x00').is_socket(), False) + @unittest.skipUnless(hasattr(socket, "AF_UNIX"), "Unix sockets required") @unittest.skipIf( is_emscripten, "Unix sockets are not implemented on Emscripten." @@ -1117,6 +2778,24 @@ def test_is_socket_true(self): self.assertIs(self.cls(self.base, 'mysock\udfff').is_socket(), False) self.assertIs(self.cls(self.base, 'mysock\x00').is_socket(), False) + def test_is_block_device_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_block_device()) + self.assertFalse((P / 'dirA').is_block_device()) + self.assertFalse((P / 'non-existing').is_block_device()) + self.assertFalse((P / 'fileA' / 'bah').is_block_device()) + self.assertIs((P / 'fileA\udfff').is_block_device(), False) + self.assertIs((P / 'fileA\x00').is_block_device(), False) + + def test_is_char_device_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_char_device()) + self.assertFalse((P / 'dirA').is_char_device()) + self.assertFalse((P / 'non-existing').is_char_device()) + self.assertFalse((P / 'fileA' / 'bah').is_char_device()) + self.assertIs((P / 'fileA\udfff').is_char_device(), False) + self.assertIs((P / 'fileA\x00').is_char_device(), False) + def test_is_char_device_true(self): # os.devnull should generally be a char device. P = self.cls(os.devnull) @@ -1128,7 +2807,14 @@ def test_is_char_device_true(self): self.assertIs(self.cls(f'{os.devnull}\udfff').is_char_device(), False) self.assertIs(self.cls(f'{os.devnull}\x00').is_char_device(), False) - def test_is_mount_root(self): + def test_is_mount(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_mount()) + self.assertFalse((P / 'dirA').is_mount()) + self.assertFalse((P / 'non-existing').is_mount()) + self.assertFalse((P / 'fileA' / 'bah').is_mount()) + if self.can_symlink: + self.assertFalse((P / 'linkA').is_mount()) if os.name == 'nt': R = self.cls('c:\\') else: @@ -1136,87 +2822,57 @@ def test_is_mount_root(self): self.assertTrue(R.is_mount()) self.assertFalse((R / '\udfff').is_mount()) - def test_passing_kwargs_deprecated(self): - with self.assertWarns(DeprecationWarning): + def test_samefile(self): + parser = self.parser + fileA_path = parser.join(self.base, 'fileA') + fileB_path = parser.join(self.base, 'dirB', 'fileB') + p = self.cls(fileA_path) + pp = self.cls(fileA_path) + q = self.cls(fileB_path) + self.assertTrue(p.samefile(fileA_path)) + self.assertTrue(p.samefile(pp)) + self.assertFalse(p.samefile(fileB_path)) + self.assertFalse(p.samefile(q)) + # Test the non-existent file case + non_existent = parser.join(self.base, 'foo') + r = self.cls(non_existent) + self.assertRaises(FileNotFoundError, p.samefile, r) + self.assertRaises(FileNotFoundError, p.samefile, non_existent) + self.assertRaises(FileNotFoundError, r.samefile, p) + self.assertRaises(FileNotFoundError, r.samefile, non_existent) + self.assertRaises(FileNotFoundError, r.samefile, r) + self.assertRaises(FileNotFoundError, r.samefile, non_existent) + + def test_passing_kwargs_errors(self): + with self.assertRaises(TypeError): self.cls(foo="bar") - def setUpWalk(self): - super().setUpWalk() - sub21_path= self.sub2_path / "SUB21" - tmp5_path = sub21_path / "tmp3" - broken_link3_path = self.sub2_path / "broken_link3" - - os.makedirs(sub21_path) - tmp5_path.write_text("I am tmp5, blame test_pathlib.") - if self.can_symlink: - os.symlink(tmp5_path, broken_link3_path) - self.sub2_tree[2].append('broken_link3') - self.sub2_tree[2].sort() - if not is_emscripten: - # Emscripten fails with inaccessible directories. - os.chmod(sub21_path, 0) - try: - os.listdir(sub21_path) - except PermissionError: - self.sub2_tree[1].append('SUB21') - else: - os.chmod(sub21_path, stat.S_IRWXU) - os.unlink(tmp5_path) - os.rmdir(sub21_path) - - def test_walk_bad_dir(self): - self.setUpWalk() - errors = [] - walk_it = self.walk_path.walk(on_error=errors.append) - root, dirs, files = next(walk_it) - self.assertEqual(errors, []) - dir1 = 'SUB1' - path1 = root / dir1 - path1new = (root / dir1).with_suffix(".new") - path1.rename(path1new) - try: - roots = [r for r, _, _ in walk_it] - self.assertTrue(errors) - self.assertNotIn(path1, roots) - self.assertNotIn(path1new, roots) - for dir2 in dirs: - if dir2 != dir1: - self.assertIn(root / dir2, roots) - finally: - path1new.rename(path1) - - def test_walk_many_open_files(self): - depth = 30 - base = self.cls(self.base, 'deep') - path = self.cls(base, *(['d']*depth)) - path.mkdir(parents=True) - - iters = [base.walk(top_down=False) for _ in range(100)] - for i in range(depth + 1): - expected = (path, ['d'] if i else [], []) - for it in iters: - self.assertEqual(next(it), expected) - path = path.parent - - iters = [base.walk(top_down=True) for _ in range(100)] - path = base - for i in range(depth + 1): - expected = (path, ['d'] if i < depth else [], []) - for it in iters: - self.assertEqual(next(it), expected) - path = path / 'd' + @needs_symlinks + def test_iterdir_symlink(self): + # __iter__ on a symlink to a directory. + P = self.cls + p = P(self.base, 'linkB') + paths = set(p.iterdir()) + expected = { P(self.base, 'linkB', q) for q in ['fileB', 'linkD'] } + self.assertEqual(paths, expected) - def test_walk_above_recursion_limit(self): - recursion_limit = 40 - # directory_depth > recursion_limit - directory_depth = recursion_limit + 10 - base = self.cls(self.base, 'deep') - path = base.joinpath(*(['d'] * directory_depth)) - path.mkdir(parents=True) + @needs_posix + def test_glob_posix(self): + P = self.cls + p = P(self.base) + q = p / "FILEa" + given = set(p.glob("FILEa")) + expect = {q} if q.info.exists() else set() + self.assertEqual(given, expect) + self.assertEqual(set(p.glob("FILEa*")), set()) - with infinite_recursion(recursion_limit): - list(base.walk()) - list(base.walk(top_down=False)) + @needs_windows + def test_glob_windows(self): + P = self.cls + p = P(self.base) + self.assertEqual(set(p.glob("FILEa")), { P(self.base, "fileA") }) + self.assertEqual(set(p.glob("*a\\")), { P(self.base, "dirA/") }) + self.assertEqual(set(p.glob("F*a")), { P(self.base, "fileA") }) def test_glob_empty_pattern(self): p = self.cls('') @@ -1265,6 +2921,18 @@ def test_glob_pathlike(self): self.assertEqual(expect, set(p.glob(P(pattern)))) self.assertEqual(expect, set(p.glob(FakePath(pattern)))) + def test_glob_case_sensitive(self): + P = self.cls + def _check(path, pattern, case_sensitive, expected): + actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)} + expected = {str(P(self.base, q)) for q in expected} + self.assertEqual(actual, expected) + path = P(self.base) + _check(path, "DIRB/FILE*", True, []) + _check(path, "DIRB/FILE*", False, ["dirB/fileB"]) + _check(path, "dirb/file*", True, []) + _check(path, "dirb/file*", False, ["dirB/fileB"]) + @needs_symlinks def test_glob_dot(self): P = self.cls @@ -1278,6 +2946,31 @@ def test_glob_dot(self): self.assertEqual( set(P('.').glob('**/*/*')), {P("dirD/fileD")}) + # See https://github.com/WebAssembly/wasi-filesystem/issues/26 + @unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match POSIX") + def test_glob_dotdot(self): + # ".." is not special in globs. + P = self.cls + p = P(self.base) + self.assertEqual(set(p.glob("..")), { P(self.base, "..") }) + self.assertEqual(set(p.glob("../..")), { P(self.base, "..", "..") }) + self.assertEqual(set(p.glob("dirA/..")), { P(self.base, "dirA", "..") }) + self.assertEqual(set(p.glob("dirA/../file*")), { P(self.base, "dirA/../fileA") }) + self.assertEqual(set(p.glob("dirA/../file*/..")), set()) + self.assertEqual(set(p.glob("../xyzzy")), set()) + if self.cls.parser is posixpath: + self.assertEqual(set(p.glob("xyzzy/..")), set()) + else: + # ".." segments are normalized first on Windows, so this path is stat()able. + self.assertEqual(set(p.glob("xyzzy/..")), { P(self.base, "xyzzy", "..") }) + if sys.platform == "emscripten": + # Emscripten will return ELOOP if there are 49 or more ..'s. + # Can remove when https://github.com/emscripten-core/emscripten/pull/24591 is merged. + NDOTDOTS = 48 + else: + NDOTDOTS = 50 + self.assertEqual(set(p.glob("/".join([".."] * NDOTDOTS))), { P(self.base, *[".."] * NDOTDOTS)}) + def test_glob_inaccessible(self): P = self.cls p = P(self.base, "mydir1", "mydir2") @@ -1293,6 +2986,180 @@ def test_rglob_pathlike(self): self.assertEqual(expect, set(p.rglob(P(pattern)))) self.assertEqual(expect, set(p.rglob(FakePath(pattern)))) + @needs_symlinks + def test_glob_recurse_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.glob(glob, recurse_symlinks=True) + if path.parts.count("linkD") <= 1} # exclude symlink loop. + self.assertEqual(actual, { P(self.base, q) for q in expected }) + P = self.cls + p = P(self.base) + _check(p, "fileB", []) + _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check(p, "*A", ["dirA", "fileA", "linkA"]) + _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) + _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"]) + _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) + _check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/..", "dirB/linkD/.."]) + _check(p, "dir*/**", [ + "dirA/", "dirA/linkC", "dirA/linkC/fileB", "dirA/linkC/linkD", "dirA/linkC/linkD/fileB", + "dirB/", "dirB/fileB", "dirB/linkD", "dirB/linkD/fileB", + "dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", + "dirE/"]) + _check(p, "dir*/**/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirC/dirD/", "dirE/"]) + _check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", + "dirB/linkD/..", "dirA/linkC/linkD/..", + "dirC/..", "dirC/dirD/..", "dirE/.."]) + _check(p, "dir*/*/**", [ + "dirA/linkC/", "dirA/linkC/linkD", "dirA/linkC/fileB", "dirA/linkC/linkD/fileB", + "dirB/linkD/", "dirB/linkD/fileB", + "dirC/dirD/", "dirC/dirD/fileD"]) + _check(p, "dir*/*/**/", ["dirA/linkC/", "dirA/linkC/linkD/", "dirB/linkD/", "dirC/dirD/"]) + _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirA/linkC/linkD/..", + "dirB/linkD/..", "dirC/dirD/.."]) + _check(p, "dir*/**/fileC", ["dirC/fileC"]) + _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) + _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"]) + _check(p, "*/dirD/**/", ["dirC/dirD/"]) + + @needs_symlinks + def test_rglob_recurse_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.rglob(glob, recurse_symlinks=True) + if path.parts.count("linkD") <= 1} # exclude symlink loop. + self.assertEqual(actual, { P(self.base, q) for q in expected }) + P = self.cls + p = P(self.base) + _check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", + "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) + _check(p, "*/fileA", []) + _check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", + "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) + _check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB", + "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB", + "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"]) + _check(p, "*/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirC/dirD/", "dirE/", "linkB/", "linkB/linkD/"]) + _check(p, "", ["", "dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirE/", "dirC/dirD/", "linkB/", "linkB/linkD/"]) + + p = P(self.base, "dirC") + _check(p, "*", ["dirC/fileC", "dirC/novel.txt", + "dirC/dirD", "dirC/dirD/fileD"]) + _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p, "*/*", ["dirC/dirD/fileD"]) + _check(p, "*/", ["dirC/dirD/"]) + _check(p, "", ["dirC/", "dirC/dirD/"]) + # gh-91616, a re module regression + _check(p, "*.txt", ["dirC/novel.txt"]) + _check(p, "*.*", ["dirC/novel.txt"]) + + def test_rglob_recurse_symlinks_false(self): + def _check(path, glob, expected): + actual = set(path.rglob(glob, recurse_symlinks=False)) + self.assertEqual(actual, { P(self.base, q) for q in expected }) + P = self.cls + p = P(self.base) + it = p.rglob("fileA") + self.assertIsInstance(it, collections.abc.Iterator) + _check(p, "fileA", ["fileA"]) + _check(p, "fileB", ["dirB/fileB"]) + _check(p, "**/fileB", ["dirB/fileB"]) + _check(p, "*/fileA", []) + + if self.can_symlink: + _check(p, "*/fileB", ["dirB/fileB", "dirB/linkD/fileB", + "linkB/fileB", "dirA/linkC/fileB"]) + _check(p, "*/", [ + "dirA/", "dirA/linkC/", "dirB/", "dirB/linkD/", "dirC/", + "dirC/dirD/", "dirE/", "linkB/"]) + else: + _check(p, "*/fileB", ["dirB/fileB"]) + _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"]) + + _check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD"]) + _check(p, "", ["", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"]) + p = P(self.base, "dirC") + _check(p, "*", ["dirC/fileC", "dirC/novel.txt", + "dirC/dirD", "dirC/dirD/fileD"]) + _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p, "**/file*", ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p, "dir*/**", ["dirC/dirD/", "dirC/dirD/fileD"]) + _check(p, "dir*/**/", ["dirC/dirD/"]) + _check(p, "*/*", ["dirC/dirD/fileD"]) + _check(p, "*/", ["dirC/dirD/"]) + _check(p, "", ["dirC/", "dirC/dirD/"]) + _check(p, "**", ["dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt"]) + _check(p, "**/", ["dirC/", "dirC/dirD/"]) + # gh-91616, a re module regression + _check(p, "*.txt", ["dirC/novel.txt"]) + _check(p, "*.*", ["dirC/novel.txt"]) + + @needs_posix + def test_rglob_posix(self): + P = self.cls + p = P(self.base, "dirC") + q = p / "dirD" / "FILEd" + given = set(p.rglob("FILEd")) + expect = {q} if q.exists() else set() + self.assertEqual(given, expect) + self.assertEqual(set(p.rglob("FILEd*")), set()) + + @needs_windows + def test_rglob_windows(self): + P = self.cls + p = P(self.base, "dirC") + self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") }) + self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) + + @needs_symlinks + def test_rglob_symlink_loop(self): + # Don't get fooled by symlink loops (Issue #26012). + P = self.cls + p = P(self.base) + given = set(p.rglob('*', recurse_symlinks=False)) + expect = {'brokenLink', + 'dirA', 'dirA/linkC', + 'dirB', 'dirB/fileB', 'dirB/linkD', + 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', + 'dirC/fileC', 'dirC/novel.txt', + 'dirE', + 'fileA', + 'linkA', + 'linkB', + 'brokenLinkLoop', + } + self.assertEqual(given, {p / x for x in expect}) + + @needs_symlinks + def test_glob_permissions(self): + # See bpo-38894 + P = self.cls + base = P(self.base) / 'permissions' + base.mkdir() + + for i in range(100): + link = base / f"link{i}" + if i % 2: + link.symlink_to(P(self.base, "dirE", "nonexistent")) + else: + link.symlink_to(P(self.base, "dirC"), target_is_directory=True) + + self.assertEqual(len(set(base.glob("*"))), 100) + self.assertEqual(len(set(base.glob("*/"))), 50) + self.assertEqual(len(set(base.glob("*/fileC"))), 50) + self.assertEqual(len(set(base.glob("*/file*"))), 50) + + @needs_symlinks + def test_glob_long_symlink(self): + # See gh-87695 + base = self.cls(self.base) / 'long_symlink' + base.mkdir() + bad_link = base / 'bad_link' + bad_link.symlink_to("bad" * 200) + self.assertEqual(sorted(base.glob('**/*')), [bad_link]) + @needs_posix def test_absolute_posix(self): P = self.cls @@ -1306,7 +3173,7 @@ def test_absolute_posix(self): self.assertEqual(str(P('//a/b').absolute()), '//a/b') @unittest.skipIf( - is_emscripten or is_wasi, + is_wasm32, "umask is not implemented on Emscripten/WASI." ) @needs_posix @@ -1337,7 +3204,7 @@ def test_resolve_root(self): os.chdir(current_directory) @unittest.skipIf( - is_emscripten or is_wasi, + is_wasm32, "umask is not implemented on Emscripten/WASI." ) @needs_posix @@ -1444,10 +3311,13 @@ def test_handling_bad_descriptor(self): def test_from_uri_posix(self): P = self.cls self.assertEqual(P.from_uri('file:/foo/bar'), P('/foo/bar')) - self.assertEqual(P.from_uri('file://foo/bar'), P('//foo/bar')) + self.assertRaises(ValueError, P.from_uri, 'file://foo/bar') self.assertEqual(P.from_uri('file:///foo/bar'), P('/foo/bar')) self.assertEqual(P.from_uri('file:////foo/bar'), P('//foo/bar')) self.assertEqual(P.from_uri('file://localhost/foo/bar'), P('/foo/bar')) + if not is_wasi: + self.assertEqual(P.from_uri(f'file://{socket.gethostname()}/foo/bar'), + P('/foo/bar')) self.assertRaises(ValueError, P.from_uri, 'foo/bar') self.assertRaises(ValueError, P.from_uri, '/foo/bar') self.assertRaises(ValueError, P.from_uri, '//foo/bar') @@ -1457,8 +3327,8 @@ def test_from_uri_posix(self): @needs_posix def test_from_uri_pathname2url_posix(self): P = self.cls - self.assertEqual(P.from_uri('file:' + pathname2url('/foo/bar')), P('/foo/bar')) - self.assertEqual(P.from_uri('file:' + pathname2url('//foo/bar')), P('//foo/bar')) + self.assertEqual(P.from_uri(pathname2url('/foo/bar', add_scheme=True)), P('/foo/bar')) + self.assertEqual(P.from_uri(pathname2url('//foo/bar', add_scheme=True)), P('//foo/bar')) @needs_windows def test_absolute_windows(self): @@ -1597,6 +3467,166 @@ def test_group_windows(self): P('c:/').group() +class PathWalkTest(unittest.TestCase): + cls = pathlib.Path + base = PathTest.base + can_symlink = PathTest.can_symlink + + def setUp(self): + name = self.id().split('.')[-1] + if name in _tests_needing_symlinks and not self.can_symlink: + self.skipTest('requires symlinks') + self.walk_path = self.cls(self.base, "TEST1") + self.sub1_path = self.walk_path / "SUB1" + self.sub11_path = self.sub1_path / "SUB11" + self.sub2_path = self.walk_path / "SUB2" + self.link_path = self.sub2_path / "link" + self.sub2_tree = (self.sub2_path, [], ["tmp3"]) + + # Build: + # TESTFN/ + # TEST1/ a file kid and two directory kids + # tmp1 + # SUB1/ a file kid and a directory kid + # tmp2 + # SUB11/ no kids + # SUB2/ a file kid and a dirsymlink kid + # tmp3 + # link/ a symlink to TEST2 + # broken_link + # broken_link2 + # TEST2/ + # tmp4 a lone file + t2_path = self.cls(self.base, "TEST2") + os.makedirs(self.sub11_path) + os.makedirs(self.sub2_path) + os.makedirs(t2_path) + + tmp1_path = self.walk_path / "tmp1" + tmp2_path = self.sub1_path / "tmp2" + tmp3_path = self.sub2_path / "tmp3" + tmp4_path = self.cls(self.base, "TEST2", "tmp4") + for path in tmp1_path, tmp2_path, tmp3_path, tmp4_path: + with open(path, "w", encoding='utf-8') as f: + f.write(f"I'm {path} and proud of it. Blame test_pathlib.\n") + + if self.can_symlink: + broken_link_path = self.sub2_path / "broken_link" + broken_link2_path = self.sub2_path / "broken_link2" + os.symlink(t2_path, self.link_path, target_is_directory=True) + os.symlink('broken', broken_link_path) + os.symlink(os.path.join('tmp3', 'broken'), broken_link2_path) + self.sub2_tree = (self.sub2_path, [], ["broken_link", "broken_link2", "link", "tmp3"]) + sub21_path= self.sub2_path / "SUB21" + tmp5_path = sub21_path / "tmp3" + broken_link3_path = self.sub2_path / "broken_link3" + + os.makedirs(sub21_path) + tmp5_path.write_text("I am tmp5, blame test_pathlib.") + if self.can_symlink: + os.symlink(tmp5_path, broken_link3_path) + self.sub2_tree[2].append('broken_link3') + self.sub2_tree[2].sort() + os.chmod(sub21_path, 0) + try: + os.listdir(sub21_path) + except PermissionError: + self.sub2_tree[1].append('SUB21') + else: + os.chmod(sub21_path, stat.S_IRWXU) + os.unlink(tmp5_path) + os.rmdir(sub21_path) + + def tearDown(self): + if 'SUB21' in self.sub2_tree[1]: + os.chmod(self.sub2_path / "SUB21", stat.S_IRWXU) + os_helper.rmtree(self.base) + + def test_walk_bad_dir(self): + errors = [] + walk_it = self.walk_path.walk(on_error=errors.append) + root, dirs, files = next(walk_it) + self.assertEqual(errors, []) + dir1 = 'SUB1' + path1 = root / dir1 + path1new = (root / dir1).with_suffix(".new") + path1.rename(path1new) + try: + roots = [r for r, _, _ in walk_it] + self.assertTrue(errors) + self.assertNotIn(path1, roots) + self.assertNotIn(path1new, roots) + for dir2 in dirs: + if dir2 != dir1: + self.assertIn(root / dir2, roots) + finally: + path1new.rename(path1) + + def test_walk_many_open_files(self): + depth = 30 + base = self.cls(self.base, 'deep') + path = self.cls(base, *(['d']*depth)) + path.mkdir(parents=True) + + iters = [base.walk(top_down=False) for _ in range(100)] + for i in range(depth + 1): + expected = (path, ['d'] if i else [], []) + for it in iters: + self.assertEqual(next(it), expected) + path = path.parent + + iters = [base.walk(top_down=True) for _ in range(100)] + path = base + for i in range(depth + 1): + expected = (path, ['d'] if i < depth else [], []) + for it in iters: + self.assertEqual(next(it), expected) + path = path / 'd' + + def test_walk_above_recursion_limit(self): + recursion_limit = 40 + # directory_depth > recursion_limit + directory_depth = recursion_limit + 10 + base = self.cls(self.base, 'deep') + path = base.joinpath(*(['d'] * directory_depth)) + path.mkdir(parents=True) + + with infinite_recursion(recursion_limit): + list(base.walk()) + list(base.walk(top_down=False)) + + @needs_symlinks + def test_walk_follow_symlinks(self): + walk_it = self.walk_path.walk(follow_symlinks=True) + for root, dirs, files in walk_it: + if root == self.link_path: + self.assertEqual(dirs, []) + self.assertEqual(files, ["tmp4"]) + break + else: + self.fail("Didn't follow symlink with follow_symlinks=True") + + @needs_symlinks + def test_walk_symlink_location(self): + # Tests whether symlinks end up in filenames or dirnames depending + # on the `follow_symlinks` argument. + walk_it = self.walk_path.walk(follow_symlinks=False) + for root, dirs, files in walk_it: + if root == self.sub2_path: + self.assertIn("link", files) + break + else: + self.fail("symlink not found") + + walk_it = self.walk_path.walk(follow_symlinks=True) + for root, dirs, files in walk_it: + if root == self.sub2_path: + self.assertIn("link", dirs) + break + else: + self.fail("symlink not found") + + @unittest.skipIf(os.name == 'nt', 'test requires a POSIX-compatible system') class PosixPathTest(PathTest, PurePosixPathTest): cls = pathlib.PosixPath diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py deleted file mode 100644 index 2e050362158..00000000000 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ /dev/null @@ -1,2499 +0,0 @@ -# TODO: RUSTPYTHON -# Has not been tested with Windows style paths -import collections -import io -import os -import errno -import stat -import unittest - -from pathlib._abc import UnsupportedOperation, ParserBase, PurePathBase, PathBase -import posixpath - -from test.support import is_wasi -from test.support.os_helper import TESTFN - - -_tests_needing_posix = set() -_tests_needing_windows = set() -_tests_needing_symlinks = set() - - -def needs_posix(fn): - """Decorator that marks a test as requiring a POSIX-flavoured path class.""" - _tests_needing_posix.add(fn.__name__) - return fn - -def needs_windows(fn): - """Decorator that marks a test as requiring a Windows-flavoured path class.""" - _tests_needing_windows.add(fn.__name__) - return fn - -def needs_symlinks(fn): - """Decorator that marks a test as requiring a path class that supports symlinks.""" - _tests_needing_symlinks.add(fn.__name__) - return fn - - -class UnsupportedOperationTest(unittest.TestCase): - def test_is_notimplemented(self): - self.assertTrue(issubclass(UnsupportedOperation, NotImplementedError)) - self.assertTrue(isinstance(UnsupportedOperation(), NotImplementedError)) - - -class ParserBaseTest(unittest.TestCase): - cls = ParserBase - - def test_unsupported_operation(self): - m = self.cls() - e = UnsupportedOperation - with self.assertRaises(e): - m.sep - self.assertRaises(e, m.join, 'foo') - self.assertRaises(e, m.split, 'foo') - self.assertRaises(e, m.splitdrive, 'foo') - self.assertRaises(e, m.normcase, 'foo') - self.assertRaises(e, m.isabs, 'foo') - -# -# Tests for the pure classes. -# - - -class PurePathBaseTest(unittest.TestCase): - cls = PurePathBase - - def test_unsupported_operation_pure(self): - p = self.cls('foo') - e = UnsupportedOperation - with self.assertRaises(e): - p.drive - with self.assertRaises(e): - p.root - with self.assertRaises(e): - p.anchor - with self.assertRaises(e): - p.parts - with self.assertRaises(e): - p.parent - with self.assertRaises(e): - p.parents - with self.assertRaises(e): - p.name - with self.assertRaises(e): - p.stem - with self.assertRaises(e): - p.suffix - with self.assertRaises(e): - p.suffixes - with self.assertRaises(e): - p / 'bar' - with self.assertRaises(e): - 'bar' / p - self.assertRaises(e, p.joinpath, 'bar') - self.assertRaises(e, p.with_name, 'bar') - self.assertRaises(e, p.with_stem, 'bar') - self.assertRaises(e, p.with_suffix, '.txt') - self.assertRaises(e, p.relative_to, '') - self.assertRaises(e, p.is_relative_to, '') - self.assertRaises(e, p.is_absolute) - self.assertRaises(e, p.match, '*') - - def test_magic_methods(self): - P = self.cls - self.assertFalse(hasattr(P, '__fspath__')) - self.assertFalse(hasattr(P, '__bytes__')) - self.assertIs(P.__reduce__, object.__reduce__) - self.assertIs(P.__repr__, object.__repr__) - self.assertIs(P.__hash__, object.__hash__) - self.assertIs(P.__eq__, object.__eq__) - self.assertIs(P.__lt__, object.__lt__) - self.assertIs(P.__le__, object.__le__) - self.assertIs(P.__gt__, object.__gt__) - self.assertIs(P.__ge__, object.__ge__) - - def test_parser(self): - self.assertIsInstance(self.cls.parser, ParserBase) - - -class DummyPurePath(PurePathBase): - __slots__ = () - parser = posixpath - - def __eq__(self, other): - if not isinstance(other, DummyPurePath): - return NotImplemented - return str(self) == str(other) - - def __hash__(self): - return hash(str(self)) - - def __repr__(self): - return "{}({!r})".format(self.__class__.__name__, self.as_posix()) - - -class DummyPurePathTest(unittest.TestCase): - cls = DummyPurePath - - # Use a base path that's unrelated to any real filesystem path. - base = f'/this/path/kills/fascists/{TESTFN}' - - def setUp(self): - name = self.id().split('.')[-1] - if name in _tests_needing_posix and self.cls.parser is not posixpath: - self.skipTest('requires POSIX-flavoured path class') - if name in _tests_needing_windows and self.cls.parser is posixpath: - self.skipTest('requires Windows-flavoured path class') - p = self.cls('a') - self.parser = p.parser - self.sep = self.parser.sep - self.altsep = self.parser.altsep - - def test_constructor_common(self): - P = self.cls - p = P('a') - self.assertIsInstance(p, P) - P('a', 'b', 'c') - P('/a', 'b', 'c') - P('a/b/c') - P('/a/b/c') - - def test_bytes(self): - P = self.cls - with self.assertRaises(TypeError): - P(b'a') - with self.assertRaises(TypeError): - P(b'a', 'b') - with self.assertRaises(TypeError): - P('a', b'b') - with self.assertRaises(TypeError): - P('a').joinpath(b'b') - with self.assertRaises(TypeError): - P('a') / b'b' - with self.assertRaises(TypeError): - b'a' / P('b') - with self.assertRaises(TypeError): - P('a').match(b'b') - with self.assertRaises(TypeError): - P('a').relative_to(b'b') - with self.assertRaises(TypeError): - P('a').with_name(b'b') - with self.assertRaises(TypeError): - P('a').with_stem(b'b') - with self.assertRaises(TypeError): - P('a').with_suffix(b'b') - - def _check_str_subclass(self, *args): - # Issue #21127: it should be possible to construct a PurePath object - # from a str subclass instance, and it then gets converted to - # a pure str object. - class StrSubclass(str): - pass - P = self.cls - p = P(*(StrSubclass(x) for x in args)) - self.assertEqual(p, P(*args)) - for part in p.parts: - self.assertIs(type(part), str) - - def test_str_subclass_common(self): - self._check_str_subclass('') - self._check_str_subclass('.') - self._check_str_subclass('a') - self._check_str_subclass('a/b.txt') - self._check_str_subclass('/a/b.txt') - - @needs_windows - def test_str_subclass_windows(self): - self._check_str_subclass('.\\a:b') - self._check_str_subclass('c:') - self._check_str_subclass('c:a') - self._check_str_subclass('c:a\\b.txt') - self._check_str_subclass('c:\\') - self._check_str_subclass('c:\\a') - self._check_str_subclass('c:\\a\\b.txt') - self._check_str_subclass('\\\\some\\share') - self._check_str_subclass('\\\\some\\share\\a') - self._check_str_subclass('\\\\some\\share\\a\\b.txt') - - def test_with_segments_common(self): - class P(self.cls): - def __init__(self, *pathsegments, session_id): - super().__init__(*pathsegments) - self.session_id = session_id - - def with_segments(self, *pathsegments): - return type(self)(*pathsegments, session_id=self.session_id) - p = P('foo', 'bar', session_id=42) - self.assertEqual(42, (p / 'foo').session_id) - self.assertEqual(42, ('foo' / p).session_id) - self.assertEqual(42, p.joinpath('foo').session_id) - self.assertEqual(42, p.with_name('foo').session_id) - self.assertEqual(42, p.with_stem('foo').session_id) - self.assertEqual(42, p.with_suffix('.foo').session_id) - self.assertEqual(42, p.with_segments('foo').session_id) - self.assertEqual(42, p.relative_to('foo').session_id) - self.assertEqual(42, p.parent.session_id) - for parent in p.parents: - self.assertEqual(42, parent.session_id) - - def test_join_common(self): - P = self.cls - p = P('a/b') - pp = p.joinpath('c') - self.assertEqual(pp, P('a/b/c')) - self.assertIs(type(pp), type(p)) - pp = p.joinpath('c', 'd') - self.assertEqual(pp, P('a/b/c/d')) - pp = p.joinpath('/c') - self.assertEqual(pp, P('/c')) - - @needs_posix - def test_join_posix(self): - P = self.cls - p = P('//a') - pp = p.joinpath('b') - self.assertEqual(pp, P('//a/b')) - pp = P('/a').joinpath('//c') - self.assertEqual(pp, P('//c')) - pp = P('//a').joinpath('/c') - self.assertEqual(pp, P('/c')) - - @needs_windows - def test_join_windows(self): - P = self.cls - p = P('C:/a/b') - pp = p.joinpath('x/y') - self.assertEqual(pp, P('C:/a/b/x/y')) - pp = p.joinpath('/x/y') - self.assertEqual(pp, P('C:/x/y')) - # Joining with a different drive => the first path is ignored, even - # if the second path is relative. - pp = p.joinpath('D:x/y') - self.assertEqual(pp, P('D:x/y')) - pp = p.joinpath('D:/x/y') - self.assertEqual(pp, P('D:/x/y')) - pp = p.joinpath('//host/share/x/y') - self.assertEqual(pp, P('//host/share/x/y')) - # Joining with the same drive => the first path is appended to if - # the second path is relative. - pp = p.joinpath('c:x/y') - self.assertEqual(pp, P('C:/a/b/x/y')) - pp = p.joinpath('c:/x/y') - self.assertEqual(pp, P('C:/x/y')) - # Joining with files with NTFS data streams => the filename should - # not be parsed as a drive letter - pp = p.joinpath(P('./d:s')) - self.assertEqual(pp, P('C:/a/b/d:s')) - pp = p.joinpath(P('./dd:s')) - self.assertEqual(pp, P('C:/a/b/dd:s')) - pp = p.joinpath(P('E:d:s')) - self.assertEqual(pp, P('E:d:s')) - # Joining onto a UNC path with no root - pp = P('//').joinpath('server') - self.assertEqual(pp, P('//server')) - pp = P('//server').joinpath('share') - self.assertEqual(pp, P('//server/share')) - pp = P('//./BootPartition').joinpath('Windows') - self.assertEqual(pp, P('//./BootPartition/Windows')) - - def test_div_common(self): - # Basically the same as joinpath(). - P = self.cls - p = P('a/b') - pp = p / 'c' - self.assertEqual(pp, P('a/b/c')) - self.assertIs(type(pp), type(p)) - pp = p / 'c/d' - self.assertEqual(pp, P('a/b/c/d')) - pp = p / 'c' / 'd' - self.assertEqual(pp, P('a/b/c/d')) - pp = 'c' / p / 'd' - self.assertEqual(pp, P('c/a/b/d')) - pp = p/ '/c' - self.assertEqual(pp, P('/c')) - - @needs_posix - def test_div_posix(self): - # Basically the same as joinpath(). - P = self.cls - p = P('//a') - pp = p / 'b' - self.assertEqual(pp, P('//a/b')) - pp = P('/a') / '//c' - self.assertEqual(pp, P('//c')) - pp = P('//a') / '/c' - self.assertEqual(pp, P('/c')) - - @needs_windows - def test_div_windows(self): - # Basically the same as joinpath(). - P = self.cls - p = P('C:/a/b') - self.assertEqual(p / 'x/y', P('C:/a/b/x/y')) - self.assertEqual(p / 'x' / 'y', P('C:/a/b/x/y')) - self.assertEqual(p / '/x/y', P('C:/x/y')) - self.assertEqual(p / '/x' / 'y', P('C:/x/y')) - # Joining with a different drive => the first path is ignored, even - # if the second path is relative. - self.assertEqual(p / 'D:x/y', P('D:x/y')) - self.assertEqual(p / 'D:' / 'x/y', P('D:x/y')) - self.assertEqual(p / 'D:/x/y', P('D:/x/y')) - self.assertEqual(p / 'D:' / '/x/y', P('D:/x/y')) - self.assertEqual(p / '//host/share/x/y', P('//host/share/x/y')) - # Joining with the same drive => the first path is appended to if - # the second path is relative. - self.assertEqual(p / 'c:x/y', P('C:/a/b/x/y')) - self.assertEqual(p / 'c:/x/y', P('C:/x/y')) - # Joining with files with NTFS data streams => the filename should - # not be parsed as a drive letter - self.assertEqual(p / P('./d:s'), P('C:/a/b/d:s')) - self.assertEqual(p / P('./dd:s'), P('C:/a/b/dd:s')) - self.assertEqual(p / P('E:d:s'), P('E:d:s')) - - def _check_str(self, expected, args): - p = self.cls(*args) - self.assertEqual(str(p), expected.replace('/', self.sep)) - - def test_str_common(self): - # Canonicalized paths roundtrip. - for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): - self._check_str(pathstr, (pathstr,)) - # Other tests for str() are in test_equivalences(). - - @needs_windows - def test_str_windows(self): - p = self.cls('a/b/c') - self.assertEqual(str(p), 'a\\b\\c') - p = self.cls('c:/a/b/c') - self.assertEqual(str(p), 'c:\\a\\b\\c') - p = self.cls('//a/b') - self.assertEqual(str(p), '\\\\a\\b\\') - p = self.cls('//a/b/c') - self.assertEqual(str(p), '\\\\a\\b\\c') - p = self.cls('//a/b/c/d') - self.assertEqual(str(p), '\\\\a\\b\\c\\d') - - def test_as_posix_common(self): - P = self.cls - for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): - self.assertEqual(P(pathstr).as_posix(), pathstr) - # Other tests for as_posix() are in test_equivalences(). - - def test_match_empty(self): - P = self.cls - self.assertRaises(ValueError, P('a').match, '') - - def test_match_common(self): - P = self.cls - # Simple relative pattern. - self.assertTrue(P('b.py').match('b.py')) - self.assertTrue(P('a/b.py').match('b.py')) - self.assertTrue(P('/a/b.py').match('b.py')) - self.assertFalse(P('a.py').match('b.py')) - self.assertFalse(P('b/py').match('b.py')) - self.assertFalse(P('/a.py').match('b.py')) - self.assertFalse(P('b.py/c').match('b.py')) - # Wildcard relative pattern. - self.assertTrue(P('b.py').match('*.py')) - self.assertTrue(P('a/b.py').match('*.py')) - self.assertTrue(P('/a/b.py').match('*.py')) - self.assertFalse(P('b.pyc').match('*.py')) - self.assertFalse(P('b./py').match('*.py')) - self.assertFalse(P('b.py/c').match('*.py')) - # Multi-part relative pattern. - self.assertTrue(P('ab/c.py').match('a*/*.py')) - self.assertTrue(P('/d/ab/c.py').match('a*/*.py')) - self.assertFalse(P('a.py').match('a*/*.py')) - self.assertFalse(P('/dab/c.py').match('a*/*.py')) - self.assertFalse(P('ab/c.py/d').match('a*/*.py')) - # Absolute pattern. - self.assertTrue(P('/b.py').match('/*.py')) - self.assertFalse(P('b.py').match('/*.py')) - self.assertFalse(P('a/b.py').match('/*.py')) - self.assertFalse(P('/a/b.py').match('/*.py')) - # Multi-part absolute pattern. - self.assertTrue(P('/a/b.py').match('/a/*.py')) - self.assertFalse(P('/ab.py').match('/a/*.py')) - self.assertFalse(P('/a/b/c.py').match('/a/*.py')) - # Multi-part glob-style pattern. - self.assertFalse(P('/a/b/c.py').match('/**/*.py')) - self.assertTrue(P('/a/b/c.py').match('/a/**/*.py')) - # Case-sensitive flag - self.assertFalse(P('A.py').match('a.PY', case_sensitive=True)) - self.assertTrue(P('A.py').match('a.PY', case_sensitive=False)) - self.assertFalse(P('c:/a/B.Py').match('C:/A/*.pY', case_sensitive=True)) - self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', case_sensitive=False)) - # Matching against empty path - self.assertFalse(P('').match('*')) - self.assertFalse(P('').match('**')) - self.assertFalse(P('').match('**/*')) - - @needs_posix - def test_match_posix(self): - P = self.cls - self.assertFalse(P('A.py').match('a.PY')) - - @needs_windows - def test_match_windows(self): - P = self.cls - # Absolute patterns. - self.assertTrue(P('c:/b.py').match('*:/*.py')) - self.assertTrue(P('c:/b.py').match('c:/*.py')) - self.assertFalse(P('d:/b.py').match('c:/*.py')) # wrong drive - self.assertFalse(P('b.py').match('/*.py')) - self.assertFalse(P('b.py').match('c:*.py')) - self.assertFalse(P('b.py').match('c:/*.py')) - self.assertFalse(P('c:b.py').match('/*.py')) - self.assertFalse(P('c:b.py').match('c:/*.py')) - self.assertFalse(P('/b.py').match('c:*.py')) - self.assertFalse(P('/b.py').match('c:/*.py')) - # UNC patterns. - self.assertTrue(P('//some/share/a.py').match('//*/*/*.py')) - self.assertTrue(P('//some/share/a.py').match('//some/share/*.py')) - self.assertFalse(P('//other/share/a.py').match('//some/share/*.py')) - self.assertFalse(P('//some/share/a/b.py').match('//some/share/*.py')) - # Case-insensitivity. - self.assertTrue(P('B.py').match('b.PY')) - self.assertTrue(P('c:/a/B.Py').match('C:/A/*.pY')) - self.assertTrue(P('//Some/Share/B.Py').match('//somE/sharE/*.pY')) - # Path anchor doesn't match pattern anchor - self.assertFalse(P('c:/b.py').match('/*.py')) # 'c:/' vs '/' - self.assertFalse(P('c:/b.py').match('c:*.py')) # 'c:/' vs 'c:' - self.assertFalse(P('//some/share/a.py').match('/*.py')) # '//some/share/' vs '/' - - def test_full_match_common(self): - P = self.cls - # Simple relative pattern. - self.assertTrue(P('b.py').full_match('b.py')) - self.assertFalse(P('a/b.py').full_match('b.py')) - self.assertFalse(P('/a/b.py').full_match('b.py')) - self.assertFalse(P('a.py').full_match('b.py')) - self.assertFalse(P('b/py').full_match('b.py')) - self.assertFalse(P('/a.py').full_match('b.py')) - self.assertFalse(P('b.py/c').full_match('b.py')) - # Wildcard relative pattern. - self.assertTrue(P('b.py').full_match('*.py')) - self.assertFalse(P('a/b.py').full_match('*.py')) - self.assertFalse(P('/a/b.py').full_match('*.py')) - self.assertFalse(P('b.pyc').full_match('*.py')) - self.assertFalse(P('b./py').full_match('*.py')) - self.assertFalse(P('b.py/c').full_match('*.py')) - # Multi-part relative pattern. - self.assertTrue(P('ab/c.py').full_match('a*/*.py')) - self.assertFalse(P('/d/ab/c.py').full_match('a*/*.py')) - self.assertFalse(P('a.py').full_match('a*/*.py')) - self.assertFalse(P('/dab/c.py').full_match('a*/*.py')) - self.assertFalse(P('ab/c.py/d').full_match('a*/*.py')) - # Absolute pattern. - self.assertTrue(P('/b.py').full_match('/*.py')) - self.assertFalse(P('b.py').full_match('/*.py')) - self.assertFalse(P('a/b.py').full_match('/*.py')) - self.assertFalse(P('/a/b.py').full_match('/*.py')) - # Multi-part absolute pattern. - self.assertTrue(P('/a/b.py').full_match('/a/*.py')) - self.assertFalse(P('/ab.py').full_match('/a/*.py')) - self.assertFalse(P('/a/b/c.py').full_match('/a/*.py')) - # Multi-part glob-style pattern. - self.assertTrue(P('a').full_match('**')) - self.assertTrue(P('c.py').full_match('**')) - self.assertTrue(P('a/b/c.py').full_match('**')) - self.assertTrue(P('/a/b/c.py').full_match('**')) - self.assertTrue(P('/a/b/c.py').full_match('/**')) - self.assertTrue(P('/a/b/c.py').full_match('/a/**')) - self.assertTrue(P('/a/b/c.py').full_match('**/*.py')) - self.assertTrue(P('/a/b/c.py').full_match('/**/*.py')) - self.assertTrue(P('/a/b/c.py').full_match('/a/**/*.py')) - self.assertTrue(P('/a/b/c.py').full_match('/a/b/**/*.py')) - self.assertTrue(P('/a/b/c.py').full_match('/**/**/**/**/*.py')) - self.assertFalse(P('c.py').full_match('**/a.py')) - self.assertFalse(P('c.py').full_match('c/**')) - self.assertFalse(P('a/b/c.py').full_match('**/a')) - self.assertFalse(P('a/b/c.py').full_match('**/a/b')) - self.assertFalse(P('a/b/c.py').full_match('**/a/b/c')) - self.assertFalse(P('a/b/c.py').full_match('**/a/b/c.')) - self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**')) - self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**')) - self.assertFalse(P('a/b/c.py').full_match('/a/b/c.py/**')) - self.assertFalse(P('a/b/c.py').full_match('/**/a/b/c.py')) - # Case-sensitive flag - self.assertFalse(P('A.py').full_match('a.PY', case_sensitive=True)) - self.assertTrue(P('A.py').full_match('a.PY', case_sensitive=False)) - self.assertFalse(P('c:/a/B.Py').full_match('C:/A/*.pY', case_sensitive=True)) - self.assertTrue(P('/a/b/c.py').full_match('/A/*/*.Py', case_sensitive=False)) - # Matching against empty path - self.assertFalse(P('').full_match('*')) - self.assertTrue(P('').full_match('**')) - self.assertFalse(P('').full_match('**/*')) - # Matching with empty pattern - self.assertTrue(P('').full_match('')) - self.assertTrue(P('.').full_match('.')) - self.assertFalse(P('/').full_match('')) - self.assertFalse(P('/').full_match('.')) - self.assertFalse(P('foo').full_match('')) - self.assertFalse(P('foo').full_match('.')) - - def test_parts_common(self): - # `parts` returns a tuple. - sep = self.sep - P = self.cls - p = P('a/b') - parts = p.parts - self.assertEqual(parts, ('a', 'b')) - # When the path is absolute, the anchor is a separate part. - p = P('/a/b') - parts = p.parts - self.assertEqual(parts, (sep, 'a', 'b')) - - @needs_windows - def test_parts_windows(self): - P = self.cls - p = P('c:a/b') - parts = p.parts - self.assertEqual(parts, ('c:', 'a', 'b')) - p = P('c:/a/b') - parts = p.parts - self.assertEqual(parts, ('c:\\', 'a', 'b')) - p = P('//a/b/c/d') - parts = p.parts - self.assertEqual(parts, ('\\\\a\\b\\', 'c', 'd')) - - def test_parent_common(self): - # Relative - P = self.cls - p = P('a/b/c') - self.assertEqual(p.parent, P('a/b')) - self.assertEqual(p.parent.parent, P('a')) - self.assertEqual(p.parent.parent.parent, P('')) - self.assertEqual(p.parent.parent.parent.parent, P('')) - # Anchored - p = P('/a/b/c') - self.assertEqual(p.parent, P('/a/b')) - self.assertEqual(p.parent.parent, P('/a')) - self.assertEqual(p.parent.parent.parent, P('/')) - self.assertEqual(p.parent.parent.parent.parent, P('/')) - - @needs_windows - def test_parent_windows(self): - # Anchored - P = self.cls - p = P('z:a/b/c') - self.assertEqual(p.parent, P('z:a/b')) - self.assertEqual(p.parent.parent, P('z:a')) - self.assertEqual(p.parent.parent.parent, P('z:')) - self.assertEqual(p.parent.parent.parent.parent, P('z:')) - p = P('z:/a/b/c') - self.assertEqual(p.parent, P('z:/a/b')) - self.assertEqual(p.parent.parent, P('z:/a')) - self.assertEqual(p.parent.parent.parent, P('z:/')) - self.assertEqual(p.parent.parent.parent.parent, P('z:/')) - p = P('//a/b/c/d') - self.assertEqual(p.parent, P('//a/b/c')) - self.assertEqual(p.parent.parent, P('//a/b')) - self.assertEqual(p.parent.parent.parent, P('//a/b')) - - def test_parents_common(self): - # Relative - P = self.cls - p = P('a/b/c') - par = p.parents - self.assertEqual(len(par), 3) - self.assertEqual(par[0], P('a/b')) - self.assertEqual(par[1], P('a')) - self.assertEqual(par[2], P('')) - self.assertEqual(par[-1], P('')) - self.assertEqual(par[-2], P('a')) - self.assertEqual(par[-3], P('a/b')) - self.assertEqual(par[0:1], (P('a/b'),)) - self.assertEqual(par[:2], (P('a/b'), P('a'))) - self.assertEqual(par[:-1], (P('a/b'), P('a'))) - self.assertEqual(par[1:], (P('a'), P(''))) - self.assertEqual(par[::2], (P('a/b'), P(''))) - self.assertEqual(par[::-1], (P(''), P('a'), P('a/b'))) - self.assertEqual(list(par), [P('a/b'), P('a'), P('')]) - with self.assertRaises(IndexError): - par[-4] - with self.assertRaises(IndexError): - par[3] - with self.assertRaises(TypeError): - par[0] = p - # Anchored - p = P('/a/b/c') - par = p.parents - self.assertEqual(len(par), 3) - self.assertEqual(par[0], P('/a/b')) - self.assertEqual(par[1], P('/a')) - self.assertEqual(par[2], P('/')) - self.assertEqual(par[-1], P('/')) - self.assertEqual(par[-2], P('/a')) - self.assertEqual(par[-3], P('/a/b')) - self.assertEqual(par[0:1], (P('/a/b'),)) - self.assertEqual(par[:2], (P('/a/b'), P('/a'))) - self.assertEqual(par[:-1], (P('/a/b'), P('/a'))) - self.assertEqual(par[1:], (P('/a'), P('/'))) - self.assertEqual(par[::2], (P('/a/b'), P('/'))) - self.assertEqual(par[::-1], (P('/'), P('/a'), P('/a/b'))) - self.assertEqual(list(par), [P('/a/b'), P('/a'), P('/')]) - with self.assertRaises(IndexError): - par[-4] - with self.assertRaises(IndexError): - par[3] - - @needs_windows - def test_parents_windows(self): - # Anchored - P = self.cls - p = P('z:a/b/') - par = p.parents - self.assertEqual(len(par), 2) - self.assertEqual(par[0], P('z:a')) - self.assertEqual(par[1], P('z:')) - self.assertEqual(par[0:1], (P('z:a'),)) - self.assertEqual(par[:-1], (P('z:a'),)) - self.assertEqual(par[:2], (P('z:a'), P('z:'))) - self.assertEqual(par[1:], (P('z:'),)) - self.assertEqual(par[::2], (P('z:a'),)) - self.assertEqual(par[::-1], (P('z:'), P('z:a'))) - self.assertEqual(list(par), [P('z:a'), P('z:')]) - with self.assertRaises(IndexError): - par[2] - p = P('z:/a/b/') - par = p.parents - self.assertEqual(len(par), 2) - self.assertEqual(par[0], P('z:/a')) - self.assertEqual(par[1], P('z:/')) - self.assertEqual(par[0:1], (P('z:/a'),)) - self.assertEqual(par[0:-1], (P('z:/a'),)) - self.assertEqual(par[:2], (P('z:/a'), P('z:/'))) - self.assertEqual(par[1:], (P('z:/'),)) - self.assertEqual(par[::2], (P('z:/a'),)) - self.assertEqual(par[::-1], (P('z:/'), P('z:/a'),)) - self.assertEqual(list(par), [P('z:/a'), P('z:/')]) - with self.assertRaises(IndexError): - par[2] - p = P('//a/b/c/d') - par = p.parents - self.assertEqual(len(par), 2) - self.assertEqual(par[0], P('//a/b/c')) - self.assertEqual(par[1], P('//a/b')) - self.assertEqual(par[0:1], (P('//a/b/c'),)) - self.assertEqual(par[0:-1], (P('//a/b/c'),)) - self.assertEqual(par[:2], (P('//a/b/c'), P('//a/b'))) - self.assertEqual(par[1:], (P('//a/b'),)) - self.assertEqual(par[::2], (P('//a/b/c'),)) - self.assertEqual(par[::-1], (P('//a/b'), P('//a/b/c'))) - self.assertEqual(list(par), [P('//a/b/c'), P('//a/b')]) - with self.assertRaises(IndexError): - par[2] - - def test_drive_common(self): - P = self.cls - self.assertEqual(P('a/b').drive, '') - self.assertEqual(P('/a/b').drive, '') - self.assertEqual(P('').drive, '') - - @needs_windows - def test_drive_windows(self): - P = self.cls - self.assertEqual(P('c:').drive, 'c:') - self.assertEqual(P('c:a/b').drive, 'c:') - self.assertEqual(P('c:/').drive, 'c:') - self.assertEqual(P('c:/a/b/').drive, 'c:') - self.assertEqual(P('//a/b').drive, '\\\\a\\b') - self.assertEqual(P('//a/b/').drive, '\\\\a\\b') - self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b') - self.assertEqual(P('./c:a').drive, '') - - def test_root_common(self): - P = self.cls - sep = self.sep - self.assertEqual(P('').root, '') - self.assertEqual(P('a/b').root, '') - self.assertEqual(P('/').root, sep) - self.assertEqual(P('/a/b').root, sep) - - @needs_posix - def test_root_posix(self): - P = self.cls - self.assertEqual(P('/a/b').root, '/') - # POSIX special case for two leading slashes. - self.assertEqual(P('//a/b').root, '//') - - @needs_windows - def test_root_windows(self): - P = self.cls - self.assertEqual(P('c:').root, '') - self.assertEqual(P('c:a/b').root, '') - self.assertEqual(P('c:/').root, '\\') - self.assertEqual(P('c:/a/b/').root, '\\') - self.assertEqual(P('//a/b').root, '\\') - self.assertEqual(P('//a/b/').root, '\\') - self.assertEqual(P('//a/b/c/d').root, '\\') - - def test_anchor_common(self): - P = self.cls - sep = self.sep - self.assertEqual(P('').anchor, '') - self.assertEqual(P('a/b').anchor, '') - self.assertEqual(P('/').anchor, sep) - self.assertEqual(P('/a/b').anchor, sep) - - @needs_windows - def test_anchor_windows(self): - P = self.cls - self.assertEqual(P('c:').anchor, 'c:') - self.assertEqual(P('c:a/b').anchor, 'c:') - self.assertEqual(P('c:/').anchor, 'c:\\') - self.assertEqual(P('c:/a/b/').anchor, 'c:\\') - self.assertEqual(P('//a/b').anchor, '\\\\a\\b\\') - self.assertEqual(P('//a/b/').anchor, '\\\\a\\b\\') - self.assertEqual(P('//a/b/c/d').anchor, '\\\\a\\b\\') - - def test_name_empty(self): - P = self.cls - self.assertEqual(P('').name, '') - self.assertEqual(P('.').name, '.') - self.assertEqual(P('/a/b/.').name, '.') - - def test_name_common(self): - P = self.cls - self.assertEqual(P('/').name, '') - self.assertEqual(P('a/b').name, 'b') - self.assertEqual(P('/a/b').name, 'b') - self.assertEqual(P('a/b.py').name, 'b.py') - self.assertEqual(P('/a/b.py').name, 'b.py') - - @needs_windows - def test_name_windows(self): - P = self.cls - self.assertEqual(P('c:').name, '') - self.assertEqual(P('c:/').name, '') - self.assertEqual(P('c:a/b').name, 'b') - self.assertEqual(P('c:/a/b').name, 'b') - self.assertEqual(P('c:a/b.py').name, 'b.py') - self.assertEqual(P('c:/a/b.py').name, 'b.py') - self.assertEqual(P('//My.py/Share.php').name, '') - self.assertEqual(P('//My.py/Share.php/a/b').name, 'b') - - def test_suffix_common(self): - P = self.cls - self.assertEqual(P('').suffix, '') - self.assertEqual(P('.').suffix, '') - self.assertEqual(P('..').suffix, '') - self.assertEqual(P('/').suffix, '') - self.assertEqual(P('a/b').suffix, '') - self.assertEqual(P('/a/b').suffix, '') - self.assertEqual(P('/a/b/.').suffix, '') - self.assertEqual(P('a/b.py').suffix, '.py') - self.assertEqual(P('/a/b.py').suffix, '.py') - self.assertEqual(P('a/.hgrc').suffix, '') - self.assertEqual(P('/a/.hgrc').suffix, '') - self.assertEqual(P('a/.hg.rc').suffix, '.rc') - self.assertEqual(P('/a/.hg.rc').suffix, '.rc') - self.assertEqual(P('a/b.tar.gz').suffix, '.gz') - self.assertEqual(P('/a/b.tar.gz').suffix, '.gz') - self.assertEqual(P('a/Some name. Ending with a dot.').suffix, '') - self.assertEqual(P('/a/Some name. Ending with a dot.').suffix, '') - - @needs_windows - def test_suffix_windows(self): - P = self.cls - self.assertEqual(P('c:').suffix, '') - self.assertEqual(P('c:/').suffix, '') - self.assertEqual(P('c:a/b').suffix, '') - self.assertEqual(P('c:/a/b').suffix, '') - self.assertEqual(P('c:a/b.py').suffix, '.py') - self.assertEqual(P('c:/a/b.py').suffix, '.py') - self.assertEqual(P('c:a/.hgrc').suffix, '') - self.assertEqual(P('c:/a/.hgrc').suffix, '') - self.assertEqual(P('c:a/.hg.rc').suffix, '.rc') - self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc') - self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz') - self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz') - self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '') - self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '') - self.assertEqual(P('//My.py/Share.php').suffix, '') - self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') - - def test_suffixes_common(self): - P = self.cls - self.assertEqual(P('').suffixes, []) - self.assertEqual(P('.').suffixes, []) - self.assertEqual(P('/').suffixes, []) - self.assertEqual(P('a/b').suffixes, []) - self.assertEqual(P('/a/b').suffixes, []) - self.assertEqual(P('/a/b/.').suffixes, []) - self.assertEqual(P('a/b.py').suffixes, ['.py']) - self.assertEqual(P('/a/b.py').suffixes, ['.py']) - self.assertEqual(P('a/.hgrc').suffixes, []) - self.assertEqual(P('/a/.hgrc').suffixes, []) - self.assertEqual(P('a/.hg.rc').suffixes, ['.rc']) - self.assertEqual(P('/a/.hg.rc').suffixes, ['.rc']) - self.assertEqual(P('a/b.tar.gz').suffixes, ['.tar', '.gz']) - self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz']) - self.assertEqual(P('a/Some name. Ending with a dot.').suffixes, []) - self.assertEqual(P('/a/Some name. Ending with a dot.').suffixes, []) - - @needs_windows - def test_suffixes_windows(self): - P = self.cls - self.assertEqual(P('c:').suffixes, []) - self.assertEqual(P('c:/').suffixes, []) - self.assertEqual(P('c:a/b').suffixes, []) - self.assertEqual(P('c:/a/b').suffixes, []) - self.assertEqual(P('c:a/b.py').suffixes, ['.py']) - self.assertEqual(P('c:/a/b.py').suffixes, ['.py']) - self.assertEqual(P('c:a/.hgrc').suffixes, []) - self.assertEqual(P('c:/a/.hgrc').suffixes, []) - self.assertEqual(P('c:a/.hg.rc').suffixes, ['.rc']) - self.assertEqual(P('c:/a/.hg.rc').suffixes, ['.rc']) - self.assertEqual(P('c:a/b.tar.gz').suffixes, ['.tar', '.gz']) - self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz']) - self.assertEqual(P('//My.py/Share.php').suffixes, []) - self.assertEqual(P('//My.py/Share.php/a/b').suffixes, []) - self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) - self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) - - def test_stem_empty(self): - P = self.cls - self.assertEqual(P('').stem, '') - self.assertEqual(P('.').stem, '.') - - def test_stem_common(self): - P = self.cls - self.assertEqual(P('..').stem, '..') - self.assertEqual(P('/').stem, '') - self.assertEqual(P('a/b').stem, 'b') - self.assertEqual(P('a/b.py').stem, 'b') - self.assertEqual(P('a/.hgrc').stem, '.hgrc') - self.assertEqual(P('a/.hg.rc').stem, '.hg') - self.assertEqual(P('a/b.tar.gz').stem, 'b.tar') - self.assertEqual(P('a/Some name. Ending with a dot.').stem, - 'Some name. Ending with a dot.') - - @needs_windows - def test_stem_windows(self): - P = self.cls - self.assertEqual(P('c:').stem, '') - self.assertEqual(P('c:.').stem, '') - self.assertEqual(P('c:..').stem, '..') - self.assertEqual(P('c:/').stem, '') - self.assertEqual(P('c:a/b').stem, 'b') - self.assertEqual(P('c:a/b.py').stem, 'b') - self.assertEqual(P('c:a/.hgrc').stem, '.hgrc') - self.assertEqual(P('c:a/.hg.rc').stem, '.hg') - self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') - self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, - 'Some name. Ending with a dot.') - def test_with_name_common(self): - P = self.cls - self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml')) - self.assertEqual(P('/a/b').with_name('d.xml'), P('/a/d.xml')) - self.assertEqual(P('a/b.py').with_name('d.xml'), P('a/d.xml')) - self.assertEqual(P('/a/b.py').with_name('d.xml'), P('/a/d.xml')) - self.assertEqual(P('a/Dot ending.').with_name('d.xml'), P('a/d.xml')) - self.assertEqual(P('/a/Dot ending.').with_name('d.xml'), P('/a/d.xml')) - - @needs_windows - def test_with_name_windows(self): - P = self.cls - self.assertEqual(P('c:a/b').with_name('d.xml'), P('c:a/d.xml')) - self.assertEqual(P('c:/a/b').with_name('d.xml'), P('c:/a/d.xml')) - self.assertEqual(P('c:a/Dot ending.').with_name('d.xml'), P('c:a/d.xml')) - self.assertEqual(P('c:/a/Dot ending.').with_name('d.xml'), P('c:/a/d.xml')) - self.assertRaises(ValueError, P('c:').with_name, 'd.xml') - self.assertRaises(ValueError, P('c:/').with_name, 'd.xml') - self.assertRaises(ValueError, P('//My/Share').with_name, 'd.xml') - self.assertEqual(str(P('a').with_name('d:')), '.\\d:') - self.assertEqual(str(P('a').with_name('d:e')), '.\\d:e') - self.assertEqual(P('c:a/b').with_name('d:'), P('c:a/d:')) - self.assertEqual(P('c:a/b').with_name('d:e'), P('c:a/d:e')) - self.assertRaises(ValueError, P('c:a/b').with_name, 'd:/e') - self.assertRaises(ValueError, P('c:a/b').with_name, '//My/Share') - - def test_with_name_empty(self): - P = self.cls - self.assertEqual(P('').with_name('d.xml'), P('d.xml')) - self.assertEqual(P('.').with_name('d.xml'), P('d.xml')) - self.assertEqual(P('/').with_name('d.xml'), P('/d.xml')) - self.assertEqual(P('a/b').with_name(''), P('a/')) - self.assertEqual(P('a/b').with_name('.'), P('a/.')) - - def test_with_name_seps(self): - P = self.cls - self.assertRaises(ValueError, P('a/b').with_name, '/c') - self.assertRaises(ValueError, P('a/b').with_name, 'c/') - self.assertRaises(ValueError, P('a/b').with_name, 'c/d') - - def test_with_stem_common(self): - P = self.cls - self.assertEqual(P('a/b').with_stem('d'), P('a/d')) - self.assertEqual(P('/a/b').with_stem('d'), P('/a/d')) - self.assertEqual(P('a/b.py').with_stem('d'), P('a/d.py')) - self.assertEqual(P('/a/b.py').with_stem('d'), P('/a/d.py')) - self.assertEqual(P('/a/b.tar.gz').with_stem('d'), P('/a/d.gz')) - self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d')) - self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d')) - - @needs_windows - def test_with_stem_windows(self): - P = self.cls - self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d')) - self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) - self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d')) - self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d')) - self.assertRaises(ValueError, P('c:').with_stem, 'd') - self.assertRaises(ValueError, P('c:/').with_stem, 'd') - self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') - self.assertEqual(str(P('a').with_stem('d:')), '.\\d:') - self.assertEqual(str(P('a').with_stem('d:e')), '.\\d:e') - self.assertEqual(P('c:a/b').with_stem('d:'), P('c:a/d:')) - self.assertEqual(P('c:a/b').with_stem('d:e'), P('c:a/d:e')) - self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:/e') - self.assertRaises(ValueError, P('c:a/b').with_stem, '//My/Share') - - def test_with_stem_empty(self): - P = self.cls - self.assertEqual(P('').with_stem('d'), P('d')) - self.assertEqual(P('.').with_stem('d'), P('d')) - self.assertEqual(P('/').with_stem('d'), P('/d')) - self.assertEqual(P('a/b').with_stem(''), P('a/')) - self.assertEqual(P('a/b').with_stem('.'), P('a/.')) - self.assertRaises(ValueError, P('foo.gz').with_stem, '') - self.assertRaises(ValueError, P('/a/b/foo.gz').with_stem, '') - - def test_with_stem_seps(self): - P = self.cls - self.assertRaises(ValueError, P('a/b').with_stem, '/c') - self.assertRaises(ValueError, P('a/b').with_stem, 'c/') - self.assertRaises(ValueError, P('a/b').with_stem, 'c/d') - - def test_with_suffix_common(self): - P = self.cls - self.assertEqual(P('a/b').with_suffix('.gz'), P('a/b.gz')) - self.assertEqual(P('/a/b').with_suffix('.gz'), P('/a/b.gz')) - self.assertEqual(P('a/b.py').with_suffix('.gz'), P('a/b.gz')) - self.assertEqual(P('/a/b.py').with_suffix('.gz'), P('/a/b.gz')) - # Stripping suffix. - self.assertEqual(P('a/b.py').with_suffix(''), P('a/b')) - self.assertEqual(P('/a/b').with_suffix(''), P('/a/b')) - - @needs_windows - def test_with_suffix_windows(self): - P = self.cls - self.assertEqual(P('c:a/b').with_suffix('.gz'), P('c:a/b.gz')) - self.assertEqual(P('c:/a/b').with_suffix('.gz'), P('c:/a/b.gz')) - self.assertEqual(P('c:a/b.py').with_suffix('.gz'), P('c:a/b.gz')) - self.assertEqual(P('c:/a/b.py').with_suffix('.gz'), P('c:/a/b.gz')) - # Path doesn't have a "filename" component. - self.assertRaises(ValueError, P('').with_suffix, '.gz') - self.assertRaises(ValueError, P('.').with_suffix, '.gz') - self.assertRaises(ValueError, P('/').with_suffix, '.gz') - self.assertRaises(ValueError, P('//My/Share').with_suffix, '.gz') - # Invalid suffix. - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'gz') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '/') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\') - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '/.gz') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\.gz') - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:.gz') - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c/d') - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c\\d') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c/d') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c\\d') - self.assertRaises(TypeError, P('c:a/b').with_suffix, None) - - def test_with_suffix_empty(self): - P = self.cls - # Path doesn't have a "filename" component. - self.assertRaises(ValueError, P('').with_suffix, '.gz') - self.assertRaises(ValueError, P('/').with_suffix, '.gz') - - def test_with_suffix_invalid(self): - P = self.cls - # Invalid suffix. - self.assertRaises(ValueError, P('a/b').with_suffix, 'gz') - self.assertRaises(ValueError, P('a/b').with_suffix, '/') - self.assertRaises(ValueError, P('a/b').with_suffix, '.') - self.assertRaises(ValueError, P('a/b').with_suffix, '/.gz') - self.assertRaises(ValueError, P('a/b').with_suffix, 'c/d') - self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d') - self.assertRaises(ValueError, P('a/b').with_suffix, './.d') - self.assertRaises(ValueError, P('a/b').with_suffix, '.d/.') - self.assertRaises(TypeError, P('a/b').with_suffix, None) - - def test_relative_to_common(self): - P = self.cls - p = P('a/b') - self.assertRaises(TypeError, p.relative_to) - self.assertRaises(TypeError, p.relative_to, b'a') - self.assertEqual(p.relative_to(P('')), P('a/b')) - self.assertEqual(p.relative_to(''), P('a/b')) - self.assertEqual(p.relative_to(P('a')), P('b')) - self.assertEqual(p.relative_to('a'), P('b')) - self.assertEqual(p.relative_to('a/'), P('b')) - self.assertEqual(p.relative_to(P('a/b')), P('')) - self.assertEqual(p.relative_to('a/b'), P('')) - self.assertEqual(p.relative_to(P(''), walk_up=True), P('a/b')) - self.assertEqual(p.relative_to('', walk_up=True), P('a/b')) - self.assertEqual(p.relative_to(P('a'), walk_up=True), P('b')) - self.assertEqual(p.relative_to('a', walk_up=True), P('b')) - self.assertEqual(p.relative_to('a/', walk_up=True), P('b')) - self.assertEqual(p.relative_to(P('a/b'), walk_up=True), P('')) - self.assertEqual(p.relative_to('a/b', walk_up=True), P('')) - self.assertEqual(p.relative_to(P('a/c'), walk_up=True), P('../b')) - self.assertEqual(p.relative_to('a/c', walk_up=True), P('../b')) - self.assertEqual(p.relative_to(P('a/b/c'), walk_up=True), P('..')) - self.assertEqual(p.relative_to('a/b/c', walk_up=True), P('..')) - self.assertEqual(p.relative_to(P('c'), walk_up=True), P('../a/b')) - self.assertEqual(p.relative_to('c', walk_up=True), P('../a/b')) - # Unrelated paths. - self.assertRaises(ValueError, p.relative_to, P('c')) - self.assertRaises(ValueError, p.relative_to, P('a/b/c')) - self.assertRaises(ValueError, p.relative_to, P('a/c')) - self.assertRaises(ValueError, p.relative_to, P('/a')) - self.assertRaises(ValueError, p.relative_to, P("../a")) - self.assertRaises(ValueError, p.relative_to, P("a/..")) - self.assertRaises(ValueError, p.relative_to, P("/a/..")) - self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('/a'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P("../a"), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P("a/.."), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P("/a/.."), walk_up=True) - p = P('/a/b') - self.assertEqual(p.relative_to(P('/')), P('a/b')) - self.assertEqual(p.relative_to('/'), P('a/b')) - self.assertEqual(p.relative_to(P('/a')), P('b')) - self.assertEqual(p.relative_to('/a'), P('b')) - self.assertEqual(p.relative_to('/a/'), P('b')) - self.assertEqual(p.relative_to(P('/a/b')), P('')) - self.assertEqual(p.relative_to('/a/b'), P('')) - self.assertEqual(p.relative_to(P('/'), walk_up=True), P('a/b')) - self.assertEqual(p.relative_to('/', walk_up=True), P('a/b')) - self.assertEqual(p.relative_to(P('/a'), walk_up=True), P('b')) - self.assertEqual(p.relative_to('/a', walk_up=True), P('b')) - self.assertEqual(p.relative_to('/a/', walk_up=True), P('b')) - self.assertEqual(p.relative_to(P('/a/b'), walk_up=True), P('')) - self.assertEqual(p.relative_to('/a/b', walk_up=True), P('')) - self.assertEqual(p.relative_to(P('/a/c'), walk_up=True), P('../b')) - self.assertEqual(p.relative_to('/a/c', walk_up=True), P('../b')) - self.assertEqual(p.relative_to(P('/a/b/c'), walk_up=True), P('..')) - self.assertEqual(p.relative_to('/a/b/c', walk_up=True), P('..')) - self.assertEqual(p.relative_to(P('/c'), walk_up=True), P('../a/b')) - self.assertEqual(p.relative_to('/c', walk_up=True), P('../a/b')) - # Unrelated paths. - self.assertRaises(ValueError, p.relative_to, P('/c')) - self.assertRaises(ValueError, p.relative_to, P('/a/b/c')) - self.assertRaises(ValueError, p.relative_to, P('/a/c')) - self.assertRaises(ValueError, p.relative_to, P('')) - self.assertRaises(ValueError, p.relative_to, '') - self.assertRaises(ValueError, p.relative_to, P('a')) - self.assertRaises(ValueError, p.relative_to, P("../a")) - self.assertRaises(ValueError, p.relative_to, P("a/..")) - self.assertRaises(ValueError, p.relative_to, P("/a/..")) - self.assertRaises(ValueError, p.relative_to, P(''), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('a'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P("../a"), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P("a/.."), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P("/a/.."), walk_up=True) - - @needs_windows - def test_relative_to_windows(self): - P = self.cls - p = P('C:Foo/Bar') - self.assertEqual(p.relative_to(P('c:')), P('Foo/Bar')) - self.assertEqual(p.relative_to('c:'), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('c:foO')), P('Bar')) - self.assertEqual(p.relative_to('c:foO'), P('Bar')) - self.assertEqual(p.relative_to('c:foO/'), P('Bar')) - self.assertEqual(p.relative_to(P('c:foO/baR')), P()) - self.assertEqual(p.relative_to('c:foO/baR'), P()) - self.assertEqual(p.relative_to(P('c:'), walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to('c:', walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('c:foO'), walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('c:foO', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('c:foO/', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to(P('c:foO/baR'), walk_up=True), P()) - self.assertEqual(p.relative_to('c:foO/baR', walk_up=True), P()) - self.assertEqual(p.relative_to(P('C:Foo/Bar/Baz'), walk_up=True), P('..')) - self.assertEqual(p.relative_to(P('C:Foo/Baz'), walk_up=True), P('../Bar')) - self.assertEqual(p.relative_to(P('C:Baz/Bar'), walk_up=True), P('../../Foo/Bar')) - # Unrelated paths. - self.assertRaises(ValueError, p.relative_to, P()) - self.assertRaises(ValueError, p.relative_to, '') - self.assertRaises(ValueError, p.relative_to, P('d:')) - self.assertRaises(ValueError, p.relative_to, P('/')) - self.assertRaises(ValueError, p.relative_to, P('Foo')) - self.assertRaises(ValueError, p.relative_to, P('/Foo')) - self.assertRaises(ValueError, p.relative_to, P('C:/Foo')) - self.assertRaises(ValueError, p.relative_to, P('C:Foo/Bar/Baz')) - self.assertRaises(ValueError, p.relative_to, P('C:Foo/Baz')) - self.assertRaises(ValueError, p.relative_to, P(), walk_up=True) - self.assertRaises(ValueError, p.relative_to, '', walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('C:/Foo'), walk_up=True) - p = P('C:/Foo/Bar') - self.assertEqual(p.relative_to(P('c:/')), P('Foo/Bar')) - self.assertEqual(p.relative_to('c:/'), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('c:/foO')), P('Bar')) - self.assertEqual(p.relative_to('c:/foO'), P('Bar')) - self.assertEqual(p.relative_to('c:/foO/'), P('Bar')) - self.assertEqual(p.relative_to(P('c:/foO/baR')), P()) - self.assertEqual(p.relative_to('c:/foO/baR'), P()) - self.assertEqual(p.relative_to(P('c:/'), walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to('c:/', walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('c:/foO'), walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('c:/foO', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('c:/foO/', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to(P('c:/foO/baR'), walk_up=True), P()) - self.assertEqual(p.relative_to('c:/foO/baR', walk_up=True), P()) - self.assertEqual(p.relative_to('C:/Baz', walk_up=True), P('../Foo/Bar')) - self.assertEqual(p.relative_to('C:/Foo/Bar/Baz', walk_up=True), P('..')) - self.assertEqual(p.relative_to('C:/Foo/Baz', walk_up=True), P('../Bar')) - # Unrelated paths. - self.assertRaises(ValueError, p.relative_to, 'c:') - self.assertRaises(ValueError, p.relative_to, P('c:')) - self.assertRaises(ValueError, p.relative_to, P('C:/Baz')) - self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Bar/Baz')) - self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Baz')) - self.assertRaises(ValueError, p.relative_to, P('C:Foo')) - self.assertRaises(ValueError, p.relative_to, P('d:')) - self.assertRaises(ValueError, p.relative_to, P('d:/')) - self.assertRaises(ValueError, p.relative_to, P('/')) - self.assertRaises(ValueError, p.relative_to, P('/Foo')) - self.assertRaises(ValueError, p.relative_to, P('//C/Foo')) - self.assertRaises(ValueError, p.relative_to, 'c:', walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('c:'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('C:Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('d:/'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('//C/Foo'), walk_up=True) - # UNC paths. - p = P('//Server/Share/Foo/Bar') - self.assertEqual(p.relative_to(P('//sErver/sHare')), P('Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare'), P('Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/'), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('//sErver/sHare/Foo')), P('Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/Foo'), P('Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/Foo/'), P('Bar')) - self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar')), P()) - self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar'), P()) - self.assertEqual(p.relative_to(P('//sErver/sHare'), walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare', walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/', walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('//sErver/sHare/Foo'), walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/Foo', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/Foo/', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar'), walk_up=True), P()) - self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar', walk_up=True), P()) - self.assertEqual(p.relative_to(P('//sErver/sHare/bar'), walk_up=True), P('../Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/bar', walk_up=True), P('../Foo/Bar')) - # Unrelated paths. - self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo')) - self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo')) - self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo')) - self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo')) - self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo'), walk_up=True) - - def test_is_relative_to_common(self): - P = self.cls - p = P('a/b') - self.assertRaises(TypeError, p.is_relative_to) - self.assertRaises(TypeError, p.is_relative_to, b'a') - self.assertTrue(p.is_relative_to(P(''))) - self.assertTrue(p.is_relative_to('')) - self.assertTrue(p.is_relative_to(P('a'))) - self.assertTrue(p.is_relative_to('a/')) - self.assertTrue(p.is_relative_to(P('a/b'))) - self.assertTrue(p.is_relative_to('a/b')) - # Unrelated paths. - self.assertFalse(p.is_relative_to(P('c'))) - self.assertFalse(p.is_relative_to(P('a/b/c'))) - self.assertFalse(p.is_relative_to(P('a/c'))) - self.assertFalse(p.is_relative_to(P('/a'))) - p = P('/a/b') - self.assertTrue(p.is_relative_to(P('/'))) - self.assertTrue(p.is_relative_to('/')) - self.assertTrue(p.is_relative_to(P('/a'))) - self.assertTrue(p.is_relative_to('/a')) - self.assertTrue(p.is_relative_to('/a/')) - self.assertTrue(p.is_relative_to(P('/a/b'))) - self.assertTrue(p.is_relative_to('/a/b')) - # Unrelated paths. - self.assertFalse(p.is_relative_to(P('/c'))) - self.assertFalse(p.is_relative_to(P('/a/b/c'))) - self.assertFalse(p.is_relative_to(P('/a/c'))) - self.assertFalse(p.is_relative_to(P(''))) - self.assertFalse(p.is_relative_to('')) - self.assertFalse(p.is_relative_to(P('a'))) - - @needs_windows - def test_is_relative_to_windows(self): - P = self.cls - p = P('C:Foo/Bar') - self.assertTrue(p.is_relative_to(P('c:'))) - self.assertTrue(p.is_relative_to('c:')) - self.assertTrue(p.is_relative_to(P('c:foO'))) - self.assertTrue(p.is_relative_to('c:foO')) - self.assertTrue(p.is_relative_to('c:foO/')) - self.assertTrue(p.is_relative_to(P('c:foO/baR'))) - self.assertTrue(p.is_relative_to('c:foO/baR')) - # Unrelated paths. - self.assertFalse(p.is_relative_to(P())) - self.assertFalse(p.is_relative_to('')) - self.assertFalse(p.is_relative_to(P('d:'))) - self.assertFalse(p.is_relative_to(P('/'))) - self.assertFalse(p.is_relative_to(P('Foo'))) - self.assertFalse(p.is_relative_to(P('/Foo'))) - self.assertFalse(p.is_relative_to(P('C:/Foo'))) - self.assertFalse(p.is_relative_to(P('C:Foo/Bar/Baz'))) - self.assertFalse(p.is_relative_to(P('C:Foo/Baz'))) - p = P('C:/Foo/Bar') - self.assertTrue(p.is_relative_to(P('c:/'))) - self.assertTrue(p.is_relative_to(P('c:/foO'))) - self.assertTrue(p.is_relative_to('c:/foO/')) - self.assertTrue(p.is_relative_to(P('c:/foO/baR'))) - self.assertTrue(p.is_relative_to('c:/foO/baR')) - # Unrelated paths. - self.assertFalse(p.is_relative_to('c:')) - self.assertFalse(p.is_relative_to(P('C:/Baz'))) - self.assertFalse(p.is_relative_to(P('C:/Foo/Bar/Baz'))) - self.assertFalse(p.is_relative_to(P('C:/Foo/Baz'))) - self.assertFalse(p.is_relative_to(P('C:Foo'))) - self.assertFalse(p.is_relative_to(P('d:'))) - self.assertFalse(p.is_relative_to(P('d:/'))) - self.assertFalse(p.is_relative_to(P('/'))) - self.assertFalse(p.is_relative_to(P('/Foo'))) - self.assertFalse(p.is_relative_to(P('//C/Foo'))) - # UNC paths. - p = P('//Server/Share/Foo/Bar') - self.assertTrue(p.is_relative_to(P('//sErver/sHare'))) - self.assertTrue(p.is_relative_to('//sErver/sHare')) - self.assertTrue(p.is_relative_to('//sErver/sHare/')) - self.assertTrue(p.is_relative_to(P('//sErver/sHare/Foo'))) - self.assertTrue(p.is_relative_to('//sErver/sHare/Foo')) - self.assertTrue(p.is_relative_to('//sErver/sHare/Foo/')) - self.assertTrue(p.is_relative_to(P('//sErver/sHare/Foo/Bar'))) - self.assertTrue(p.is_relative_to('//sErver/sHare/Foo/Bar')) - # Unrelated paths. - self.assertFalse(p.is_relative_to(P('/Server/Share/Foo'))) - self.assertFalse(p.is_relative_to(P('c:/Server/Share/Foo'))) - self.assertFalse(p.is_relative_to(P('//z/Share/Foo'))) - self.assertFalse(p.is_relative_to(P('//Server/z/Foo'))) - - @needs_posix - def test_is_absolute_posix(self): - P = self.cls - self.assertFalse(P('').is_absolute()) - self.assertFalse(P('a').is_absolute()) - self.assertFalse(P('a/b/').is_absolute()) - self.assertTrue(P('/').is_absolute()) - self.assertTrue(P('/a').is_absolute()) - self.assertTrue(P('/a/b/').is_absolute()) - self.assertTrue(P('//a').is_absolute()) - self.assertTrue(P('//a/b').is_absolute()) - - @needs_windows - def test_is_absolute_windows(self): - P = self.cls - # Under NT, only paths with both a drive and a root are absolute. - self.assertFalse(P().is_absolute()) - self.assertFalse(P('a').is_absolute()) - self.assertFalse(P('a/b/').is_absolute()) - self.assertFalse(P('/').is_absolute()) - self.assertFalse(P('/a').is_absolute()) - self.assertFalse(P('/a/b/').is_absolute()) - self.assertFalse(P('c:').is_absolute()) - self.assertFalse(P('c:a').is_absolute()) - self.assertFalse(P('c:a/b/').is_absolute()) - self.assertTrue(P('c:/').is_absolute()) - self.assertTrue(P('c:/a').is_absolute()) - self.assertTrue(P('c:/a/b/').is_absolute()) - # UNC paths are absolute by definition. - self.assertTrue(P('//').is_absolute()) - self.assertTrue(P('//a').is_absolute()) - self.assertTrue(P('//a/b').is_absolute()) - self.assertTrue(P('//a/b/').is_absolute()) - self.assertTrue(P('//a/b/c').is_absolute()) - self.assertTrue(P('//a/b/c/d').is_absolute()) - self.assertTrue(P('//?/UNC/').is_absolute()) - self.assertTrue(P('//?/UNC/spam').is_absolute()) - - -# -# Tests for the virtual classes. -# - -class PathBaseTest(PurePathBaseTest): - cls = PathBase - - def test_unsupported_operation(self): - P = self.cls - p = self.cls('') - e = UnsupportedOperation - self.assertRaises(e, p.stat) - self.assertRaises(e, p.lstat) - self.assertRaises(e, p.exists) - self.assertRaises(e, p.samefile, 'foo') - self.assertRaises(e, p.is_dir) - self.assertRaises(e, p.is_file) - self.assertRaises(e, p.is_mount) - self.assertRaises(e, p.is_symlink) - self.assertRaises(e, p.is_block_device) - self.assertRaises(e, p.is_char_device) - self.assertRaises(e, p.is_fifo) - self.assertRaises(e, p.is_socket) - self.assertRaises(e, p.open) - self.assertRaises(e, p.read_bytes) - self.assertRaises(e, p.read_text) - self.assertRaises(e, p.write_bytes, b'foo') - self.assertRaises(e, p.write_text, 'foo') - self.assertRaises(e, p.iterdir) - self.assertRaises(e, p.glob, '*') - self.assertRaises(e, p.rglob, '*') - self.assertRaises(e, lambda: list(p.walk())) - self.assertRaises(e, p.absolute) - self.assertRaises(e, P.cwd) - self.assertRaises(e, p.expanduser) - self.assertRaises(e, p.home) - self.assertRaises(e, p.readlink) - self.assertRaises(e, p.symlink_to, 'foo') - self.assertRaises(e, p.hardlink_to, 'foo') - self.assertRaises(e, p.mkdir) - self.assertRaises(e, p.touch) - self.assertRaises(e, p.rename, 'foo') - self.assertRaises(e, p.replace, 'foo') - self.assertRaises(e, p.chmod, 0o755) - self.assertRaises(e, p.lchmod, 0o755) - self.assertRaises(e, p.unlink) - self.assertRaises(e, p.rmdir) - self.assertRaises(e, p.owner) - self.assertRaises(e, p.group) - self.assertRaises(e, p.as_uri) - - def test_as_uri_common(self): - e = UnsupportedOperation - self.assertRaises(e, self.cls('').as_uri) - - def test_fspath_common(self): - self.assertRaises(TypeError, os.fspath, self.cls('')) - - def test_as_bytes_common(self): - self.assertRaises(TypeError, bytes, self.cls('')) - - -class DummyPathIO(io.BytesIO): - """ - Used by DummyPath to implement `open('w')` - """ - - def __init__(self, files, path): - super().__init__() - self.files = files - self.path = path - - def close(self): - self.files[self.path] = self.getvalue() - super().close() - - -DummyPathStatResult = collections.namedtuple( - 'DummyPathStatResult', - 'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime st_ctime') - - -class DummyPath(PathBase): - """ - Simple implementation of PathBase that keeps files and directories in - memory. - """ - __slots__ = () - parser = posixpath - - _files = {} - _directories = {} - _symlinks = {} - - def __eq__(self, other): - if not isinstance(other, DummyPath): - return NotImplemented - return str(self) == str(other) - - def __hash__(self): - return hash(str(self)) - - def __repr__(self): - return "{}({!r})".format(self.__class__.__name__, self.as_posix()) - - def stat(self, *, follow_symlinks=True): - if follow_symlinks or self.name in ('', '.', '..'): - path = str(self.resolve(strict=True)) - else: - path = str(self.parent.resolve(strict=True) / self.name) - if path in self._files: - st_mode = stat.S_IFREG - elif path in self._directories: - st_mode = stat.S_IFDIR - elif path in self._symlinks: - st_mode = stat.S_IFLNK - else: - raise FileNotFoundError(errno.ENOENT, "Not found", str(self)) - return DummyPathStatResult(st_mode, hash(str(self)), 0, 0, 0, 0, 0, 0, 0, 0) - - def open(self, mode='r', buffering=-1, encoding=None, - errors=None, newline=None): - if buffering != -1: - raise NotImplementedError - path_obj = self.resolve() - path = str(path_obj) - name = path_obj.name - parent = str(path_obj.parent) - if path in self._directories: - raise IsADirectoryError(errno.EISDIR, "Is a directory", path) - - text = 'b' not in mode - mode = ''.join(c for c in mode if c not in 'btU') - if mode == 'r': - if path not in self._files: - raise FileNotFoundError(errno.ENOENT, "File not found", path) - stream = io.BytesIO(self._files[path]) - elif mode == 'w': - if parent not in self._directories: - raise FileNotFoundError(errno.ENOENT, "File not found", parent) - stream = DummyPathIO(self._files, path) - self._files[path] = b'' - self._directories[parent].add(name) - else: - raise NotImplementedError - if text: - stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline) - return stream - - def iterdir(self): - path = str(self.resolve()) - if path in self._files: - raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) - elif path in self._directories: - return (self / name for name in self._directories[path]) - else: - raise FileNotFoundError(errno.ENOENT, "File not found", path) - - def mkdir(self, mode=0o777, parents=False, exist_ok=False): - path = str(self.resolve()) - if path in self._directories: - if exist_ok: - return - else: - raise FileExistsError(errno.EEXIST, "File exists", path) - try: - if self.name: - self._directories[str(self.parent)].add(self.name) - self._directories[path] = set() - except KeyError: - if not parents: - raise FileNotFoundError(errno.ENOENT, "File not found", str(self.parent)) from None - self.parent.mkdir(parents=True, exist_ok=True) - self.mkdir(mode, parents=False, exist_ok=exist_ok) - - -class DummyPathTest(DummyPurePathTest): - """Tests for PathBase methods that use stat(), open() and iterdir().""" - - cls = DummyPath - can_symlink = False - - # (self.base) - # | - # |-- brokenLink -> non-existing - # |-- dirA - # | `-- linkC -> ../dirB - # |-- dirB - # | |-- fileB - # | `-- linkD -> ../dirB - # |-- dirC - # | |-- dirD - # | | `-- fileD - # | `-- fileC - # | `-- novel.txt - # |-- dirE # No permissions - # |-- fileA - # |-- linkA -> fileA - # |-- linkB -> dirB - # `-- brokenLinkLoop -> brokenLinkLoop - # - - def setUp(self): - super().setUp() - name = self.id().split('.')[-1] - if name in _tests_needing_symlinks and not self.can_symlink: - self.skipTest('requires symlinks') - parser = self.cls.parser - p = self.cls(self.base) - p.mkdir(parents=True) - p.joinpath('dirA').mkdir() - p.joinpath('dirB').mkdir() - p.joinpath('dirC').mkdir() - p.joinpath('dirC', 'dirD').mkdir() - p.joinpath('dirE').mkdir() - with p.joinpath('fileA').open('wb') as f: - f.write(b"this is file A\n") - with p.joinpath('dirB', 'fileB').open('wb') as f: - f.write(b"this is file B\n") - with p.joinpath('dirC', 'fileC').open('wb') as f: - f.write(b"this is file C\n") - with p.joinpath('dirC', 'novel.txt').open('wb') as f: - f.write(b"this is a novel\n") - with p.joinpath('dirC', 'dirD', 'fileD').open('wb') as f: - f.write(b"this is file D\n") - if self.can_symlink: - p.joinpath('linkA').symlink_to('fileA') - p.joinpath('brokenLink').symlink_to('non-existing') - p.joinpath('linkB').symlink_to('dirB') - p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB')) - p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB')) - p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop') - - def tearDown(self): - cls = self.cls - cls._files.clear() - cls._directories.clear() - cls._symlinks.clear() - - def tempdir(self): - path = self.cls(self.base).with_name('tmp-dirD') - path.mkdir() - return path - - def assertFileNotFound(self, func, *args, **kwargs): - with self.assertRaises(FileNotFoundError) as cm: - func(*args, **kwargs) - self.assertEqual(cm.exception.errno, errno.ENOENT) - - def assertEqualNormCase(self, path_a, path_b): - normcase = self.parser.normcase - self.assertEqual(normcase(path_a), normcase(path_b)) - - def test_samefile(self): - parser = self.parser - fileA_path = parser.join(self.base, 'fileA') - fileB_path = parser.join(self.base, 'dirB', 'fileB') - p = self.cls(fileA_path) - pp = self.cls(fileA_path) - q = self.cls(fileB_path) - self.assertTrue(p.samefile(fileA_path)) - self.assertTrue(p.samefile(pp)) - self.assertFalse(p.samefile(fileB_path)) - self.assertFalse(p.samefile(q)) - # Test the non-existent file case - non_existent = parser.join(self.base, 'foo') - r = self.cls(non_existent) - self.assertRaises(FileNotFoundError, p.samefile, r) - self.assertRaises(FileNotFoundError, p.samefile, non_existent) - self.assertRaises(FileNotFoundError, r.samefile, p) - self.assertRaises(FileNotFoundError, r.samefile, non_existent) - self.assertRaises(FileNotFoundError, r.samefile, r) - self.assertRaises(FileNotFoundError, r.samefile, non_existent) - - def test_exists(self): - P = self.cls - p = P(self.base) - self.assertIs(True, p.exists()) - self.assertIs(True, (p / 'dirA').exists()) - self.assertIs(True, (p / 'fileA').exists()) - self.assertIs(False, (p / 'fileA' / 'bah').exists()) - if self.can_symlink: - self.assertIs(True, (p / 'linkA').exists()) - self.assertIs(True, (p / 'linkB').exists()) - self.assertIs(True, (p / 'linkB' / 'fileB').exists()) - self.assertIs(False, (p / 'linkA' / 'bah').exists()) - self.assertIs(False, (p / 'brokenLink').exists()) - self.assertIs(True, (p / 'brokenLink').exists(follow_symlinks=False)) - self.assertIs(False, (p / 'foo').exists()) - self.assertIs(False, P('/xyzzy').exists()) - self.assertIs(False, P(self.base + '\udfff').exists()) - self.assertIs(False, P(self.base + '\x00').exists()) - - def test_open_common(self): - p = self.cls(self.base) - with (p / 'fileA').open('r') as f: - self.assertIsInstance(f, io.TextIOBase) - self.assertEqual(f.read(), "this is file A\n") - with (p / 'fileA').open('rb') as f: - self.assertIsInstance(f, io.BufferedIOBase) - self.assertEqual(f.read().strip(), b"this is file A") - - def test_read_write_bytes(self): - p = self.cls(self.base) - (p / 'fileA').write_bytes(b'abcdefg') - self.assertEqual((p / 'fileA').read_bytes(), b'abcdefg') - # Check that trying to write str does not truncate the file. - self.assertRaises(TypeError, (p / 'fileA').write_bytes, 'somestr') - self.assertEqual((p / 'fileA').read_bytes(), b'abcdefg') - - def test_read_write_text(self): - p = self.cls(self.base) - (p / 'fileA').write_text('äbcdefg', encoding='latin-1') - self.assertEqual((p / 'fileA').read_text( - encoding='utf-8', errors='ignore'), 'bcdefg') - # Check that trying to write bytes does not truncate the file. - self.assertRaises(TypeError, (p / 'fileA').write_text, b'somebytes') - self.assertEqual((p / 'fileA').read_text(encoding='latin-1'), 'äbcdefg') - - def test_read_text_with_newlines(self): - p = self.cls(self.base) - # Check that `\n` character change nothing - (p / 'fileA').write_bytes(b'abcde\r\nfghlk\n\rmnopq') - self.assertEqual((p / 'fileA').read_text(newline='\n'), - 'abcde\r\nfghlk\n\rmnopq') - # Check that `\r` character replaces `\n` - (p / 'fileA').write_bytes(b'abcde\r\nfghlk\n\rmnopq') - self.assertEqual((p / 'fileA').read_text(newline='\r'), - 'abcde\r\nfghlk\n\rmnopq') - # Check that `\r\n` character replaces `\n` - (p / 'fileA').write_bytes(b'abcde\r\nfghlk\n\rmnopq') - self.assertEqual((p / 'fileA').read_text(newline='\r\n'), - 'abcde\r\nfghlk\n\rmnopq') - - def test_write_text_with_newlines(self): - p = self.cls(self.base) - # Check that `\n` character change nothing - (p / 'fileA').write_text('abcde\r\nfghlk\n\rmnopq', newline='\n') - self.assertEqual((p / 'fileA').read_bytes(), - b'abcde\r\nfghlk\n\rmnopq') - # Check that `\r` character replaces `\n` - (p / 'fileA').write_text('abcde\r\nfghlk\n\rmnopq', newline='\r') - self.assertEqual((p / 'fileA').read_bytes(), - b'abcde\r\rfghlk\r\rmnopq') - # Check that `\r\n` character replaces `\n` - (p / 'fileA').write_text('abcde\r\nfghlk\n\rmnopq', newline='\r\n') - self.assertEqual((p / 'fileA').read_bytes(), - b'abcde\r\r\nfghlk\r\n\rmnopq') - # Check that no argument passed will change `\n` to `os.linesep` - os_linesep_byte = bytes(os.linesep, encoding='ascii') - (p / 'fileA').write_text('abcde\nfghlk\n\rmnopq') - self.assertEqual((p / 'fileA').read_bytes(), - b'abcde' + os_linesep_byte + b'fghlk' + os_linesep_byte + b'\rmnopq') - - def test_iterdir(self): - P = self.cls - p = P(self.base) - it = p.iterdir() - paths = set(it) - expected = ['dirA', 'dirB', 'dirC', 'dirE', 'fileA'] - if self.can_symlink: - expected += ['linkA', 'linkB', 'brokenLink', 'brokenLinkLoop'] - self.assertEqual(paths, { P(self.base, q) for q in expected }) - - @needs_symlinks - def test_iterdir_symlink(self): - # __iter__ on a symlink to a directory. - P = self.cls - p = P(self.base, 'linkB') - paths = set(p.iterdir()) - expected = { P(self.base, 'linkB', q) for q in ['fileB', 'linkD'] } - self.assertEqual(paths, expected) - - def test_iterdir_nodir(self): - # __iter__ on something that is not a directory. - p = self.cls(self.base, 'fileA') - with self.assertRaises(OSError) as cm: - p.iterdir() - # ENOENT or EINVAL under Windows, ENOTDIR otherwise - # (see issue #12802). - self.assertIn(cm.exception.errno, (errno.ENOTDIR, - errno.ENOENT, errno.EINVAL)) - - def test_glob_common(self): - def _check(glob, expected): - self.assertEqual(set(glob), { P(self.base, q) for q in expected }) - P = self.cls - p = P(self.base) - it = p.glob("fileA") - self.assertIsInstance(it, collections.abc.Iterator) - _check(it, ["fileA"]) - _check(p.glob("fileB"), []) - _check(p.glob("dir*/file*"), ["dirB/fileB", "dirC/fileC"]) - if not self.can_symlink: - _check(p.glob("*A"), ['dirA', 'fileA']) - else: - _check(p.glob("*A"), ['dirA', 'fileA', 'linkA']) - if not self.can_symlink: - _check(p.glob("*B/*"), ['dirB/fileB']) - else: - _check(p.glob("*B/*"), ['dirB/fileB', 'dirB/linkD', - 'linkB/fileB', 'linkB/linkD']) - if not self.can_symlink: - _check(p.glob("*/fileB"), ['dirB/fileB']) - else: - _check(p.glob("*/fileB"), ['dirB/fileB', 'linkB/fileB']) - if self.can_symlink: - _check(p.glob("brokenLink"), ['brokenLink']) - - if not self.can_symlink: - _check(p.glob("*/"), ["dirA/", "dirB/", "dirC/", "dirE/"]) - else: - _check(p.glob("*/"), ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) - - @needs_posix - def test_glob_posix(self): - P = self.cls - p = P(self.base) - q = p / "FILEa" - given = set(p.glob("FILEa")) - expect = {q} if q.exists() else set() - self.assertEqual(given, expect) - self.assertEqual(set(p.glob("FILEa*")), set()) - - @needs_windows - def test_glob_windows(self): - P = self.cls - p = P(self.base) - self.assertEqual(set(p.glob("FILEa")), { P(self.base, "fileA") }) - self.assertEqual(set(p.glob("*a\\")), { P(self.base, "dirA/") }) - self.assertEqual(set(p.glob("F*a")), { P(self.base, "fileA") }) - - def test_glob_empty_pattern(self): - P = self.cls - p = P(self.base) - self.assertEqual(list(p.glob("")), [p]) - self.assertEqual(list(p.glob(".")), [p / "."]) - self.assertEqual(list(p.glob("./")), [p / "./"]) - - def test_glob_case_sensitive(self): - P = self.cls - def _check(path, pattern, case_sensitive, expected): - actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)} - expected = {str(P(self.base, q)) for q in expected} - self.assertEqual(actual, expected) - path = P(self.base) - _check(path, "DIRB/FILE*", True, []) - _check(path, "DIRB/FILE*", False, ["dirB/fileB"]) - _check(path, "dirb/file*", True, []) - _check(path, "dirb/file*", False, ["dirB/fileB"]) - - @needs_symlinks - def test_glob_recurse_symlinks_common(self): - def _check(path, glob, expected): - actual = {path for path in path.glob(glob, recurse_symlinks=True) - if path.parts.count("linkD") <= 1} # exclude symlink loop. - self.assertEqual(actual, { P(self.base, q) for q in expected }) - P = self.cls - p = P(self.base) - _check(p, "fileB", []) - _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"]) - _check(p, "*A", ["dirA", "fileA", "linkA"]) - _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) - _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"]) - _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) - _check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/..", "dirB/linkD/.."]) - _check(p, "dir*/**", [ - "dirA/", "dirA/linkC", "dirA/linkC/fileB", "dirA/linkC/linkD", "dirA/linkC/linkD/fileB", - "dirB/", "dirB/fileB", "dirB/linkD", "dirB/linkD/fileB", - "dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", - "dirE/"]) - _check(p, "dir*/**/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", - "dirC/", "dirC/dirD/", "dirE/"]) - _check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", - "dirB/linkD/..", "dirA/linkC/linkD/..", - "dirC/..", "dirC/dirD/..", "dirE/.."]) - _check(p, "dir*/*/**", [ - "dirA/linkC/", "dirA/linkC/linkD", "dirA/linkC/fileB", "dirA/linkC/linkD/fileB", - "dirB/linkD/", "dirB/linkD/fileB", - "dirC/dirD/", "dirC/dirD/fileD"]) - _check(p, "dir*/*/**/", ["dirA/linkC/", "dirA/linkC/linkD/", "dirB/linkD/", "dirC/dirD/"]) - _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirA/linkC/linkD/..", - "dirB/linkD/..", "dirC/dirD/.."]) - _check(p, "dir*/**/fileC", ["dirC/fileC"]) - _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) - _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"]) - _check(p, "*/dirD/**/", ["dirC/dirD/"]) - - def test_rglob_recurse_symlinks_false(self): - def _check(path, glob, expected): - actual = set(path.rglob(glob, recurse_symlinks=False)) - self.assertEqual(actual, { P(self.base, q) for q in expected }) - P = self.cls - p = P(self.base) - it = p.rglob("fileA") - self.assertIsInstance(it, collections.abc.Iterator) - _check(p, "fileA", ["fileA"]) - _check(p, "fileB", ["dirB/fileB"]) - _check(p, "**/fileB", ["dirB/fileB"]) - _check(p, "*/fileA", []) - - if self.can_symlink: - _check(p, "*/fileB", ["dirB/fileB", "dirB/linkD/fileB", - "linkB/fileB", "dirA/linkC/fileB"]) - _check(p, "*/", [ - "dirA/", "dirA/linkC/", "dirB/", "dirB/linkD/", "dirC/", - "dirC/dirD/", "dirE/", "linkB/"]) - else: - _check(p, "*/fileB", ["dirB/fileB"]) - _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"]) - - _check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD"]) - _check(p, "", ["", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"]) - p = P(self.base, "dirC") - _check(p, "*", ["dirC/fileC", "dirC/novel.txt", - "dirC/dirD", "dirC/dirD/fileD"]) - _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) - _check(p, "**/file*", ["dirC/fileC", "dirC/dirD/fileD"]) - _check(p, "dir*/**", ["dirC/dirD/", "dirC/dirD/fileD"]) - _check(p, "dir*/**/", ["dirC/dirD/"]) - _check(p, "*/*", ["dirC/dirD/fileD"]) - _check(p, "*/", ["dirC/dirD/"]) - _check(p, "", ["dirC/", "dirC/dirD/"]) - _check(p, "**", ["dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt"]) - _check(p, "**/", ["dirC/", "dirC/dirD/"]) - # gh-91616, a re module regression - _check(p, "*.txt", ["dirC/novel.txt"]) - _check(p, "*.*", ["dirC/novel.txt"]) - - @needs_posix - def test_rglob_posix(self): - P = self.cls - p = P(self.base, "dirC") - q = p / "dirD" / "FILEd" - given = set(p.rglob("FILEd")) - expect = {q} if q.exists() else set() - self.assertEqual(given, expect) - self.assertEqual(set(p.rglob("FILEd*")), set()) - - @needs_windows - def test_rglob_windows(self): - P = self.cls - p = P(self.base, "dirC") - self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") }) - self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) - - @needs_symlinks - def test_rglob_recurse_symlinks_common(self): - def _check(path, glob, expected): - actual = {path for path in path.rglob(glob, recurse_symlinks=True) - if path.parts.count("linkD") <= 1} # exclude symlink loop. - self.assertEqual(actual, { P(self.base, q) for q in expected }) - P = self.cls - p = P(self.base) - _check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", - "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) - _check(p, "*/fileA", []) - _check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", - "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) - _check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB", - "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB", - "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"]) - _check(p, "*/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", - "dirC/", "dirC/dirD/", "dirE/", "linkB/", "linkB/linkD/"]) - _check(p, "", ["", "dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", - "dirC/", "dirE/", "dirC/dirD/", "linkB/", "linkB/linkD/"]) - - p = P(self.base, "dirC") - _check(p, "*", ["dirC/fileC", "dirC/novel.txt", - "dirC/dirD", "dirC/dirD/fileD"]) - _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) - _check(p, "*/*", ["dirC/dirD/fileD"]) - _check(p, "*/", ["dirC/dirD/"]) - _check(p, "", ["dirC/", "dirC/dirD/"]) - # gh-91616, a re module regression - _check(p, "*.txt", ["dirC/novel.txt"]) - _check(p, "*.*", ["dirC/novel.txt"]) - - @needs_symlinks - def test_rglob_symlink_loop(self): - # Don't get fooled by symlink loops (Issue #26012). - P = self.cls - p = P(self.base) - given = set(p.rglob('*', recurse_symlinks=False)) - expect = {'brokenLink', - 'dirA', 'dirA/linkC', - 'dirB', 'dirB/fileB', 'dirB/linkD', - 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', - 'dirC/fileC', 'dirC/novel.txt', - 'dirE', - 'fileA', - 'linkA', - 'linkB', - 'brokenLinkLoop', - } - self.assertEqual(given, {p / x for x in expect}) - - # See https://github.com/WebAssembly/wasi-filesystem/issues/26 - @unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match POSIX") - def test_glob_dotdot(self): - # ".." is not special in globs. - P = self.cls - p = P(self.base) - self.assertEqual(set(p.glob("..")), { P(self.base, "..") }) - self.assertEqual(set(p.glob("../..")), { P(self.base, "..", "..") }) - self.assertEqual(set(p.glob("dirA/..")), { P(self.base, "dirA", "..") }) - self.assertEqual(set(p.glob("dirA/../file*")), { P(self.base, "dirA/../fileA") }) - self.assertEqual(set(p.glob("dirA/../file*/..")), set()) - self.assertEqual(set(p.glob("../xyzzy")), set()) - if self.cls.parser is posixpath: - self.assertEqual(set(p.glob("xyzzy/..")), set()) - else: - # ".." segments are normalized first on Windows, so this path is stat()able. - self.assertEqual(set(p.glob("xyzzy/..")), { P(self.base, "xyzzy", "..") }) - self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)}) - - @needs_symlinks - def test_glob_permissions(self): - # See bpo-38894 - P = self.cls - base = P(self.base) / 'permissions' - base.mkdir() - - for i in range(100): - link = base / f"link{i}" - if i % 2: - link.symlink_to(P(self.base, "dirE", "nonexistent")) - else: - link.symlink_to(P(self.base, "dirC")) - - self.assertEqual(len(set(base.glob("*"))), 100) - self.assertEqual(len(set(base.glob("*/"))), 50) - self.assertEqual(len(set(base.glob("*/fileC"))), 50) - self.assertEqual(len(set(base.glob("*/file*"))), 50) - - @needs_symlinks - def test_glob_long_symlink(self): - # See gh-87695 - base = self.cls(self.base) / 'long_symlink' - base.mkdir() - bad_link = base / 'bad_link' - bad_link.symlink_to("bad" * 200) - self.assertEqual(sorted(base.glob('**/*')), [bad_link]) - - @needs_symlinks - def test_readlink(self): - P = self.cls(self.base) - self.assertEqual((P / 'linkA').readlink(), self.cls('fileA')) - self.assertEqual((P / 'brokenLink').readlink(), - self.cls('non-existing')) - self.assertEqual((P / 'linkB').readlink(), self.cls('dirB')) - self.assertEqual((P / 'linkB' / 'linkD').readlink(), self.cls('../dirB')) - with self.assertRaises(OSError): - (P / 'fileA').readlink() - - @unittest.skipIf(hasattr(os, "readlink"), "os.readlink() is present") - def test_readlink_unsupported(self): - P = self.cls(self.base) - p = P / 'fileA' - with self.assertRaises(UnsupportedOperation): - q.readlink(p) - - def _check_resolve(self, p, expected, strict=True): - q = p.resolve(strict) - self.assertEqual(q, expected) - - # This can be used to check both relative and absolute resolutions. - _check_resolve_relative = _check_resolve_absolute = _check_resolve - - @needs_symlinks - def test_resolve_common(self): - P = self.cls - p = P(self.base, 'foo') - with self.assertRaises(OSError) as cm: - p.resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ENOENT) - # Non-strict - parser = self.parser - self.assertEqualNormCase(str(p.resolve(strict=False)), - parser.join(self.base, 'foo')) - p = P(self.base, 'foo', 'in', 'spam') - self.assertEqualNormCase(str(p.resolve(strict=False)), - parser.join(self.base, 'foo', 'in', 'spam')) - p = P(self.base, '..', 'foo', 'in', 'spam') - self.assertEqualNormCase(str(p.resolve(strict=False)), - parser.join(parser.dirname(self.base), 'foo', 'in', 'spam')) - # These are all relative symlinks. - p = P(self.base, 'dirB', 'fileB') - self._check_resolve_relative(p, p) - p = P(self.base, 'linkA') - self._check_resolve_relative(p, P(self.base, 'fileA')) - p = P(self.base, 'dirA', 'linkC', 'fileB') - self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) - p = P(self.base, 'dirB', 'linkD', 'fileB') - self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) - # Non-strict - p = P(self.base, 'dirA', 'linkC', 'fileB', 'foo', 'in', 'spam') - self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB', 'foo', 'in', - 'spam'), False) - p = P(self.base, 'dirA', 'linkC', '..', 'foo', 'in', 'spam') - if self.cls.parser is not posixpath: - # In Windows, if linkY points to dirB, 'dirA\linkY\..' - # resolves to 'dirA' without resolving linkY first. - self._check_resolve_relative(p, P(self.base, 'dirA', 'foo', 'in', - 'spam'), False) - else: - # In Posix, if linkY points to dirB, 'dirA/linkY/..' - # resolves to 'dirB/..' first before resolving to parent of dirB. - self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) - # Now create absolute symlinks. - d = self.tempdir() - P(self.base, 'dirA', 'linkX').symlink_to(d) - P(self.base, str(d), 'linkY').symlink_to(self.parser.join(self.base, 'dirB')) - p = P(self.base, 'dirA', 'linkX', 'linkY', 'fileB') - self._check_resolve_absolute(p, P(self.base, 'dirB', 'fileB')) - # Non-strict - p = P(self.base, 'dirA', 'linkX', 'linkY', 'foo', 'in', 'spam') - self._check_resolve_relative(p, P(self.base, 'dirB', 'foo', 'in', 'spam'), - False) - p = P(self.base, 'dirA', 'linkX', 'linkY', '..', 'foo', 'in', 'spam') - if self.cls.parser is not posixpath: - # In Windows, if linkY points to dirB, 'dirA\linkY\..' - # resolves to 'dirA' without resolving linkY first. - self._check_resolve_relative(p, P(d, 'foo', 'in', 'spam'), False) - else: - # In Posix, if linkY points to dirB, 'dirA/linkY/..' - # resolves to 'dirB/..' first before resolving to parent of dirB. - self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) - - @needs_symlinks - def test_resolve_dot(self): - # See http://web.archive.org/web/20200623062557/https://bitbucket.org/pitrou/pathlib/issues/9/ - parser = self.parser - p = self.cls(self.base) - p.joinpath('0').symlink_to('.', target_is_directory=True) - p.joinpath('1').symlink_to(parser.join('0', '0'), target_is_directory=True) - p.joinpath('2').symlink_to(parser.join('1', '1'), target_is_directory=True) - q = p / '2' - self.assertEqual(q.resolve(strict=True), p) - r = q / '3' / '4' - self.assertRaises(FileNotFoundError, r.resolve, strict=True) - # Non-strict - self.assertEqual(r.resolve(strict=False), p / '3' / '4') - - def _check_symlink_loop(self, *args): - path = self.cls(*args) - with self.assertRaises(OSError) as cm: - path.resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ELOOP) - - @needs_posix - @needs_symlinks - def test_resolve_loop(self): - # Loops with relative symlinks. - self.cls(self.base, 'linkX').symlink_to('linkX/inside') - self._check_symlink_loop(self.base, 'linkX') - self.cls(self.base, 'linkY').symlink_to('linkY') - self._check_symlink_loop(self.base, 'linkY') - self.cls(self.base, 'linkZ').symlink_to('linkZ/../linkZ') - self._check_symlink_loop(self.base, 'linkZ') - # Non-strict - p = self.cls(self.base, 'linkZ', 'foo') - self.assertEqual(p.resolve(strict=False), p) - # Loops with absolute symlinks. - self.cls(self.base, 'linkU').symlink_to(self.parser.join(self.base, 'linkU/inside')) - self._check_symlink_loop(self.base, 'linkU') - self.cls(self.base, 'linkV').symlink_to(self.parser.join(self.base, 'linkV')) - self._check_symlink_loop(self.base, 'linkV') - self.cls(self.base, 'linkW').symlink_to(self.parser.join(self.base, 'linkW/../linkW')) - self._check_symlink_loop(self.base, 'linkW') - # Non-strict - q = self.cls(self.base, 'linkW', 'foo') - self.assertEqual(q.resolve(strict=False), q) - - def test_stat(self): - statA = self.cls(self.base).joinpath('fileA').stat() - statB = self.cls(self.base).joinpath('dirB', 'fileB').stat() - statC = self.cls(self.base).joinpath('dirC').stat() - # st_mode: files are the same, directory differs. - self.assertIsInstance(statA.st_mode, int) - self.assertEqual(statA.st_mode, statB.st_mode) - self.assertNotEqual(statA.st_mode, statC.st_mode) - self.assertNotEqual(statB.st_mode, statC.st_mode) - # st_ino: all different, - self.assertIsInstance(statA.st_ino, int) - self.assertNotEqual(statA.st_ino, statB.st_ino) - self.assertNotEqual(statA.st_ino, statC.st_ino) - self.assertNotEqual(statB.st_ino, statC.st_ino) - # st_dev: all the same. - self.assertIsInstance(statA.st_dev, int) - self.assertEqual(statA.st_dev, statB.st_dev) - self.assertEqual(statA.st_dev, statC.st_dev) - # other attributes not used by pathlib. - - @needs_symlinks - def test_stat_no_follow_symlinks(self): - p = self.cls(self.base) / 'linkA' - st = p.stat() - self.assertNotEqual(st, p.stat(follow_symlinks=False)) - - def test_stat_no_follow_symlinks_nosymlink(self): - p = self.cls(self.base) / 'fileA' - st = p.stat() - self.assertEqual(st, p.stat(follow_symlinks=False)) - - @needs_symlinks - def test_lstat(self): - p = self.cls(self.base)/ 'linkA' - st = p.stat() - self.assertNotEqual(st, p.lstat()) - - def test_lstat_nosymlink(self): - p = self.cls(self.base) / 'fileA' - st = p.stat() - self.assertEqual(st, p.lstat()) - - def test_is_dir(self): - P = self.cls(self.base) - self.assertTrue((P / 'dirA').is_dir()) - self.assertFalse((P / 'fileA').is_dir()) - self.assertFalse((P / 'non-existing').is_dir()) - self.assertFalse((P / 'fileA' / 'bah').is_dir()) - if self.can_symlink: - self.assertFalse((P / 'linkA').is_dir()) - self.assertTrue((P / 'linkB').is_dir()) - self.assertFalse((P/ 'brokenLink').is_dir()) - self.assertFalse((P / 'dirA\udfff').is_dir()) - self.assertFalse((P / 'dirA\x00').is_dir()) - - def test_is_dir_no_follow_symlinks(self): - P = self.cls(self.base) - self.assertTrue((P / 'dirA').is_dir(follow_symlinks=False)) - self.assertFalse((P / 'fileA').is_dir(follow_symlinks=False)) - self.assertFalse((P / 'non-existing').is_dir(follow_symlinks=False)) - self.assertFalse((P / 'fileA' / 'bah').is_dir(follow_symlinks=False)) - if self.can_symlink: - self.assertFalse((P / 'linkA').is_dir(follow_symlinks=False)) - self.assertFalse((P / 'linkB').is_dir(follow_symlinks=False)) - self.assertFalse((P/ 'brokenLink').is_dir(follow_symlinks=False)) - self.assertFalse((P / 'dirA\udfff').is_dir(follow_symlinks=False)) - self.assertFalse((P / 'dirA\x00').is_dir(follow_symlinks=False)) - - def test_is_file(self): - P = self.cls(self.base) - self.assertTrue((P / 'fileA').is_file()) - self.assertFalse((P / 'dirA').is_file()) - self.assertFalse((P / 'non-existing').is_file()) - self.assertFalse((P / 'fileA' / 'bah').is_file()) - if self.can_symlink: - self.assertTrue((P / 'linkA').is_file()) - self.assertFalse((P / 'linkB').is_file()) - self.assertFalse((P/ 'brokenLink').is_file()) - self.assertFalse((P / 'fileA\udfff').is_file()) - self.assertFalse((P / 'fileA\x00').is_file()) - - def test_is_file_no_follow_symlinks(self): - P = self.cls(self.base) - self.assertTrue((P / 'fileA').is_file(follow_symlinks=False)) - self.assertFalse((P / 'dirA').is_file(follow_symlinks=False)) - self.assertFalse((P / 'non-existing').is_file(follow_symlinks=False)) - self.assertFalse((P / 'fileA' / 'bah').is_file(follow_symlinks=False)) - if self.can_symlink: - self.assertFalse((P / 'linkA').is_file(follow_symlinks=False)) - self.assertFalse((P / 'linkB').is_file(follow_symlinks=False)) - self.assertFalse((P/ 'brokenLink').is_file(follow_symlinks=False)) - self.assertFalse((P / 'fileA\udfff').is_file(follow_symlinks=False)) - self.assertFalse((P / 'fileA\x00').is_file(follow_symlinks=False)) - - def test_is_mount(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_mount()) - self.assertFalse((P / 'dirA').is_mount()) - self.assertFalse((P / 'non-existing').is_mount()) - self.assertFalse((P / 'fileA' / 'bah').is_mount()) - if self.can_symlink: - self.assertFalse((P / 'linkA').is_mount()) - - def test_is_symlink(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_symlink()) - self.assertFalse((P / 'dirA').is_symlink()) - self.assertFalse((P / 'non-existing').is_symlink()) - self.assertFalse((P / 'fileA' / 'bah').is_symlink()) - if self.can_symlink: - self.assertTrue((P / 'linkA').is_symlink()) - self.assertTrue((P / 'linkB').is_symlink()) - self.assertTrue((P/ 'brokenLink').is_symlink()) - self.assertIs((P / 'fileA\udfff').is_file(), False) - self.assertIs((P / 'fileA\x00').is_file(), False) - if self.can_symlink: - self.assertIs((P / 'linkA\udfff').is_file(), False) - self.assertIs((P / 'linkA\x00').is_file(), False) - - def test_is_junction_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_junction()) - self.assertFalse((P / 'dirA').is_junction()) - self.assertFalse((P / 'non-existing').is_junction()) - self.assertFalse((P / 'fileA' / 'bah').is_junction()) - self.assertFalse((P / 'fileA\udfff').is_junction()) - self.assertFalse((P / 'fileA\x00').is_junction()) - - def test_is_fifo_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_fifo()) - self.assertFalse((P / 'dirA').is_fifo()) - self.assertFalse((P / 'non-existing').is_fifo()) - self.assertFalse((P / 'fileA' / 'bah').is_fifo()) - self.assertIs((P / 'fileA\udfff').is_fifo(), False) - self.assertIs((P / 'fileA\x00').is_fifo(), False) - - def test_is_socket_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_socket()) - self.assertFalse((P / 'dirA').is_socket()) - self.assertFalse((P / 'non-existing').is_socket()) - self.assertFalse((P / 'fileA' / 'bah').is_socket()) - self.assertIs((P / 'fileA\udfff').is_socket(), False) - self.assertIs((P / 'fileA\x00').is_socket(), False) - - def test_is_block_device_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_block_device()) - self.assertFalse((P / 'dirA').is_block_device()) - self.assertFalse((P / 'non-existing').is_block_device()) - self.assertFalse((P / 'fileA' / 'bah').is_block_device()) - self.assertIs((P / 'fileA\udfff').is_block_device(), False) - self.assertIs((P / 'fileA\x00').is_block_device(), False) - - def test_is_char_device_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_char_device()) - self.assertFalse((P / 'dirA').is_char_device()) - self.assertFalse((P / 'non-existing').is_char_device()) - self.assertFalse((P / 'fileA' / 'bah').is_char_device()) - self.assertIs((P / 'fileA\udfff').is_char_device(), False) - self.assertIs((P / 'fileA\x00').is_char_device(), False) - - def _check_complex_symlinks(self, link0_target): - # Test solving a non-looping chain of symlinks (issue #19887). - parser = self.parser - P = self.cls(self.base) - P.joinpath('link1').symlink_to(parser.join('link0', 'link0'), target_is_directory=True) - P.joinpath('link2').symlink_to(parser.join('link1', 'link1'), target_is_directory=True) - P.joinpath('link3').symlink_to(parser.join('link2', 'link2'), target_is_directory=True) - P.joinpath('link0').symlink_to(link0_target, target_is_directory=True) - - # Resolve absolute paths. - p = (P / 'link0').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = (P / 'link1').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = (P / 'link2').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = (P / 'link3').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - - # Resolve relative paths. - try: - self.cls('').absolute() - except UnsupportedOperation: - return - old_path = os.getcwd() - os.chdir(self.base) - try: - p = self.cls('link0').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = self.cls('link1').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = self.cls('link2').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = self.cls('link3').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - finally: - os.chdir(old_path) - - @needs_symlinks - def test_complex_symlinks_absolute(self): - self._check_complex_symlinks(self.base) - - @needs_symlinks - def test_complex_symlinks_relative(self): - self._check_complex_symlinks('.') - - @needs_symlinks - def test_complex_symlinks_relative_dot_dot(self): - self._check_complex_symlinks(self.parser.join('dirA', '..')) - - def setUpWalk(self): - # Build: - # TESTFN/ - # TEST1/ a file kid and two directory kids - # tmp1 - # SUB1/ a file kid and a directory kid - # tmp2 - # SUB11/ no kids - # SUB2/ a file kid and a dirsymlink kid - # tmp3 - # link/ a symlink to TEST2 - # broken_link - # broken_link2 - # TEST2/ - # tmp4 a lone file - self.walk_path = self.cls(self.base, "TEST1") - self.sub1_path = self.walk_path / "SUB1" - self.sub11_path = self.sub1_path / "SUB11" - self.sub2_path = self.walk_path / "SUB2" - tmp1_path = self.walk_path / "tmp1" - tmp2_path = self.sub1_path / "tmp2" - tmp3_path = self.sub2_path / "tmp3" - self.link_path = self.sub2_path / "link" - t2_path = self.cls(self.base, "TEST2") - tmp4_path = self.cls(self.base, "TEST2", "tmp4") - broken_link_path = self.sub2_path / "broken_link" - broken_link2_path = self.sub2_path / "broken_link2" - - self.sub11_path.mkdir(parents=True) - self.sub2_path.mkdir(parents=True) - t2_path.mkdir(parents=True) - - for path in tmp1_path, tmp2_path, tmp3_path, tmp4_path: - with path.open("w", encoding='utf-8') as f: - f.write(f"I'm {path} and proud of it. Blame test_pathlib.\n") - - if self.can_symlink: - self.link_path.symlink_to(t2_path) - broken_link_path.symlink_to('broken') - broken_link2_path.symlink_to(self.cls('tmp3', 'broken')) - self.sub2_tree = (self.sub2_path, [], ["broken_link", "broken_link2", "link", "tmp3"]) - else: - self.sub2_tree = (self.sub2_path, [], ["tmp3"]) - - def test_walk_topdown(self): - self.setUpWalk() - walker = self.walk_path.walk() - entry = next(walker) - entry[1].sort() # Ensure we visit SUB1 before SUB2 - self.assertEqual(entry, (self.walk_path, ["SUB1", "SUB2"], ["tmp1"])) - entry = next(walker) - self.assertEqual(entry, (self.sub1_path, ["SUB11"], ["tmp2"])) - entry = next(walker) - self.assertEqual(entry, (self.sub11_path, [], [])) - entry = next(walker) - entry[1].sort() - entry[2].sort() - self.assertEqual(entry, self.sub2_tree) - with self.assertRaises(StopIteration): - next(walker) - - def test_walk_prune(self): - self.setUpWalk() - # Prune the search. - all = [] - for root, dirs, files in self.walk_path.walk(): - all.append((root, dirs, files)) - if 'SUB1' in dirs: - # Note that this also mutates the dirs we appended to all! - dirs.remove('SUB1') - - self.assertEqual(len(all), 2) - self.assertEqual(all[0], (self.walk_path, ["SUB2"], ["tmp1"])) - - all[1][-1].sort() - all[1][1].sort() - self.assertEqual(all[1], self.sub2_tree) - - def test_walk_bottom_up(self): - self.setUpWalk() - seen_testfn = seen_sub1 = seen_sub11 = seen_sub2 = False - for path, dirnames, filenames in self.walk_path.walk(top_down=False): - if path == self.walk_path: - self.assertFalse(seen_testfn) - self.assertTrue(seen_sub1) - self.assertTrue(seen_sub2) - self.assertEqual(sorted(dirnames), ["SUB1", "SUB2"]) - self.assertEqual(filenames, ["tmp1"]) - seen_testfn = True - elif path == self.sub1_path: - self.assertFalse(seen_testfn) - self.assertFalse(seen_sub1) - self.assertTrue(seen_sub11) - self.assertEqual(dirnames, ["SUB11"]) - self.assertEqual(filenames, ["tmp2"]) - seen_sub1 = True - elif path == self.sub11_path: - self.assertFalse(seen_sub1) - self.assertFalse(seen_sub11) - self.assertEqual(dirnames, []) - self.assertEqual(filenames, []) - seen_sub11 = True - elif path == self.sub2_path: - self.assertFalse(seen_testfn) - self.assertFalse(seen_sub2) - self.assertEqual(sorted(dirnames), sorted(self.sub2_tree[1])) - self.assertEqual(sorted(filenames), sorted(self.sub2_tree[2])) - seen_sub2 = True - else: - raise AssertionError(f"Unexpected path: {path}") - self.assertTrue(seen_testfn) - - @needs_symlinks - def test_walk_follow_symlinks(self): - self.setUpWalk() - walk_it = self.walk_path.walk(follow_symlinks=True) - for root, dirs, files in walk_it: - if root == self.link_path: - self.assertEqual(dirs, []) - self.assertEqual(files, ["tmp4"]) - break - else: - self.fail("Didn't follow symlink with follow_symlinks=True") - - @needs_symlinks - def test_walk_symlink_location(self): - self.setUpWalk() - # Tests whether symlinks end up in filenames or dirnames depending - # on the `follow_symlinks` argument. - walk_it = self.walk_path.walk(follow_symlinks=False) - for root, dirs, files in walk_it: - if root == self.sub2_path: - self.assertIn("link", files) - break - else: - self.fail("symlink not found") - - walk_it = self.walk_path.walk(follow_symlinks=True) - for root, dirs, files in walk_it: - if root == self.sub2_path: - self.assertIn("link", dirs) - break - else: - self.fail("symlink not found") - - -class DummyPathWithSymlinks(DummyPath): - __slots__ = () - - # Reduce symlink traversal limit to make tests run faster. - _max_symlinks = 20 - - def readlink(self): - path = str(self.parent.resolve() / self.name) - if path in self._symlinks: - return self.with_segments(self._symlinks[path]) - elif path in self._files or path in self._directories: - raise OSError(errno.EINVAL, "Not a symlink", path) - else: - raise FileNotFoundError(errno.ENOENT, "File not found", path) - - def symlink_to(self, target, target_is_directory=False): - self._directories[str(self.parent)].add(self.name) - self._symlinks[str(self)] = str(target) - - -class DummyPathWithSymlinksTest(DummyPathTest): - cls = DummyPathWithSymlinks - can_symlink = True - - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/test/test_pathlib/test_read.py b/Lib/test/test_pathlib/test_read.py new file mode 100644 index 00000000000..482203c290a --- /dev/null +++ b/Lib/test/test_pathlib/test_read.py @@ -0,0 +1,343 @@ +""" +Tests for pathlib.types._ReadablePath +""" + +import collections.abc +import io +import sys +import unittest + +from .support import is_pypi +from .support.local_path import ReadableLocalPath, LocalPathGround +from .support.zip_path import ReadableZipPath, ZipPathGround + +if is_pypi: + from pathlib_abc import PathInfo, _ReadablePath + from pathlib_abc._os import magic_open +else: + from pathlib.types import PathInfo, _ReadablePath + from pathlib._os import magic_open + + +class ReadTestBase: + def setUp(self): + self.root = self.ground.setup() + self.ground.create_hierarchy(self.root) + + def tearDown(self): + self.ground.teardown(self.root) + + def test_is_readable(self): + self.assertIsInstance(self.root, _ReadablePath) + + def test_open_r(self): + p = self.root / 'fileA' + with magic_open(p, 'r', encoding='utf-8') as f: + self.assertIsInstance(f, io.TextIOBase) + self.assertEqual(f.read(), 'this is file A\n') + + @unittest.skipIf( + not getattr(sys.flags, 'warn_default_encoding', 0), + "Requires warn_default_encoding", + ) + def test_open_r_encoding_warning(self): + p = self.root / 'fileA' + with self.assertWarns(EncodingWarning) as wc: + with magic_open(p, 'r'): + pass + self.assertEqual(wc.filename, __file__) + + def test_open_rb(self): + p = self.root / 'fileA' + with magic_open(p, 'rb') as f: + self.assertEqual(f.read(), b'this is file A\n') + self.assertRaises(ValueError, magic_open, p, 'rb', encoding='utf8') + self.assertRaises(ValueError, magic_open, p, 'rb', errors='strict') + self.assertRaises(ValueError, magic_open, p, 'rb', newline='') + + def test_read_bytes(self): + p = self.root / 'fileA' + self.assertEqual(p.read_bytes(), b'this is file A\n') + + def test_read_text(self): + p = self.root / 'fileA' + self.assertEqual(p.read_text(encoding='utf-8'), 'this is file A\n') + q = self.root / 'abc' + self.ground.create_file(q, b'\xe4bcdefg') + self.assertEqual(q.read_text(encoding='latin-1'), 'äbcdefg') + self.assertEqual(q.read_text(encoding='utf-8', errors='ignore'), 'bcdefg') + + @unittest.skipIf( + not getattr(sys.flags, 'warn_default_encoding', 0), + "Requires warn_default_encoding", + ) + def test_read_text_encoding_warning(self): + p = self.root / 'fileA' + with self.assertWarns(EncodingWarning) as wc: + p.read_text() + self.assertEqual(wc.filename, __file__) + + def test_read_text_with_newlines(self): + p = self.root / 'abc' + self.ground.create_file(p, b'abcde\r\nfghlk\n\rmnopq') + # Check that `\n` character change nothing + self.assertEqual(p.read_text(encoding='utf-8', newline='\n'), 'abcde\r\nfghlk\n\rmnopq') + # Check that `\r` character replaces `\n` + self.assertEqual(p.read_text(encoding='utf-8', newline='\r'), 'abcde\r\nfghlk\n\rmnopq') + # Check that `\r\n` character replaces `\n` + self.assertEqual(p.read_text(encoding='utf-8', newline='\r\n'), 'abcde\r\nfghlk\n\rmnopq') + + def test_iterdir(self): + expected = ['dirA', 'dirB', 'dirC', 'fileA'] + if self.ground.can_symlink: + expected += ['linkA', 'linkB', 'brokenLink', 'brokenLinkLoop'] + expected = {self.root.joinpath(name) for name in expected} + actual = set(self.root.iterdir()) + self.assertEqual(actual, expected) + + def test_iterdir_nodir(self): + p = self.root / 'fileA' + self.assertRaises(OSError, p.iterdir) + + def test_iterdir_info(self): + for child in self.root.iterdir(): + self.assertIsInstance(child.info, PathInfo) + self.assertTrue(child.info.exists(follow_symlinks=False)) + + def test_glob(self): + if not self.ground.can_symlink: + self.skipTest("requires symlinks") + + p = self.root + sep = self.root.parser.sep + altsep = self.root.parser.altsep + def check(pattern, expected): + if altsep: + expected = {name.replace(altsep, sep) for name in expected} + expected = {p.joinpath(name) for name in expected} + actual = set(p.glob(pattern, recurse_symlinks=True)) + self.assertEqual(actual, expected) + + it = p.glob("fileA") + self.assertIsInstance(it, collections.abc.Iterator) + self.assertEqual(list(it), [p.joinpath("fileA")]) + check("*A", ["dirA", "fileA", "linkA"]) + check("*A", ['dirA', 'fileA', 'linkA']) + check("*B/*", ["dirB/fileB", "linkB/fileB"]) + check("*B/*", ['dirB/fileB', 'linkB/fileB']) + check("brokenLink", ['brokenLink']) + check("brokenLinkLoop", ['brokenLinkLoop']) + check("**/", ["", "dirA/", "dirA/linkC/", "dirB/", "dirC/", "dirC/dirD/", "linkB/"]) + check("**/*/", ["dirA/", "dirA/linkC/", "dirB/", "dirC/", "dirC/dirD/", "linkB/"]) + check("*/", ["dirA/", "dirB/", "dirC/", "linkB/"]) + check("*/dirD/**/", ["dirC/dirD/"]) + check("*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"]) + check("dir*/**", ["dirA/", "dirA/linkC", "dirA/linkC/fileB", "dirB/", "dirB/fileB", "dirC/", + "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt"]) + check("dir*/**/", ["dirA/", "dirA/linkC/", "dirB/", "dirC/", "dirC/dirD/"]) + check("dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", "dirC/..", "dirC/dirD/.."]) + check("dir*/*/**", ["dirA/linkC/", "dirA/linkC/fileB", "dirC/dirD/", "dirC/dirD/fileD"]) + check("dir*/*/**/", ["dirA/linkC/", "dirC/dirD/"]) + check("dir*/*/**/..", ["dirA/linkC/..", "dirC/dirD/.."]) + check("dir*/*/..", ["dirC/dirD/..", "dirA/linkC/.."]) + check("dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) + check("dir*/**/fileC", ["dirC/fileC"]) + check("dir*/file*", ["dirB/fileB", "dirC/fileC"]) + check("**/*/fileA", []) + check("fileB", []) + check("**/*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"]) + check("**/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"]) + check("*/fileB", ["dirB/fileB", "linkB/fileB"]) + check("*/fileB", ['dirB/fileB', 'linkB/fileB']) + check("**/file*", + ["fileA", "dirA/linkC/fileB", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", + "linkB/fileB"]) + with self.assertRaisesRegex(ValueError, 'Unacceptable pattern'): + list(p.glob('')) + + def test_walk_top_down(self): + it = self.root.walk() + + path, dirnames, filenames = next(it) + dirnames.sort() + filenames.sort() + self.assertEqual(path, self.root) + self.assertEqual(dirnames, ['dirA', 'dirB', 'dirC']) + self.assertEqual(filenames, ['brokenLink', 'brokenLinkLoop', 'fileA', 'linkA', 'linkB'] + if self.ground.can_symlink else ['fileA']) + + path, dirnames, filenames = next(it) + self.assertEqual(path, self.root / 'dirA') + self.assertEqual(dirnames, []) + self.assertEqual(filenames, ['linkC'] if self.ground.can_symlink else []) + + path, dirnames, filenames = next(it) + self.assertEqual(path, self.root / 'dirB') + self.assertEqual(dirnames, []) + self.assertEqual(filenames, ['fileB']) + + path, dirnames, filenames = next(it) + filenames.sort() + self.assertEqual(path, self.root / 'dirC') + self.assertEqual(dirnames, ['dirD']) + self.assertEqual(filenames, ['fileC', 'novel.txt']) + + path, dirnames, filenames = next(it) + self.assertEqual(path, self.root / 'dirC' / 'dirD') + self.assertEqual(dirnames, []) + self.assertEqual(filenames, ['fileD']) + + self.assertRaises(StopIteration, next, it) + + def test_walk_prune(self): + expected = {self.root, self.root / 'dirA', self.root / 'dirC', self.root / 'dirC' / 'dirD'} + actual = set() + for path, dirnames, filenames in self.root.walk(): + actual.add(path) + if path == self.root: + dirnames.remove('dirB') + self.assertEqual(actual, expected) + + def test_walk_bottom_up(self): + seen_root = seen_dira = seen_dirb = seen_dirc = seen_dird = False + for path, dirnames, filenames in self.root.walk(top_down=False): + if path == self.root: + self.assertFalse(seen_root) + self.assertTrue(seen_dira) + self.assertTrue(seen_dirb) + self.assertTrue(seen_dirc) + self.assertEqual(sorted(dirnames), ['dirA', 'dirB', 'dirC']) + self.assertEqual(sorted(filenames), + ['brokenLink', 'brokenLinkLoop', 'fileA', 'linkA', 'linkB'] + if self.ground.can_symlink else ['fileA']) + seen_root = True + elif path == self.root / 'dirA': + self.assertFalse(seen_root) + self.assertFalse(seen_dira) + self.assertEqual(dirnames, []) + self.assertEqual(filenames, ['linkC'] if self.ground.can_symlink else []) + seen_dira = True + elif path == self.root / 'dirB': + self.assertFalse(seen_root) + self.assertFalse(seen_dirb) + self.assertEqual(dirnames, []) + self.assertEqual(filenames, ['fileB']) + seen_dirb = True + elif path == self.root / 'dirC': + self.assertFalse(seen_root) + self.assertFalse(seen_dirc) + self.assertTrue(seen_dird) + self.assertEqual(dirnames, ['dirD']) + self.assertEqual(sorted(filenames), ['fileC', 'novel.txt']) + seen_dirc = True + elif path == self.root / 'dirC' / 'dirD': + self.assertFalse(seen_root) + self.assertFalse(seen_dirc) + self.assertFalse(seen_dird) + self.assertEqual(dirnames, []) + self.assertEqual(filenames, ['fileD']) + seen_dird = True + else: + raise AssertionError(f"Unexpected path: {path}") + self.assertTrue(seen_root) + + def test_info_exists(self): + p = self.root + self.assertTrue(p.info.exists()) + self.assertTrue((p / 'dirA').info.exists()) + self.assertTrue((p / 'dirA').info.exists(follow_symlinks=False)) + self.assertTrue((p / 'fileA').info.exists()) + self.assertTrue((p / 'fileA').info.exists(follow_symlinks=False)) + self.assertFalse((p / 'non-existing').info.exists()) + self.assertFalse((p / 'non-existing').info.exists(follow_symlinks=False)) + if self.ground.can_symlink: + self.assertTrue((p / 'linkA').info.exists()) + self.assertTrue((p / 'linkA').info.exists(follow_symlinks=False)) + self.assertTrue((p / 'linkB').info.exists()) + self.assertTrue((p / 'linkB').info.exists(follow_symlinks=True)) + self.assertFalse((p / 'brokenLink').info.exists()) + self.assertTrue((p / 'brokenLink').info.exists(follow_symlinks=False)) + self.assertFalse((p / 'brokenLinkLoop').info.exists()) + self.assertTrue((p / 'brokenLinkLoop').info.exists(follow_symlinks=False)) + self.assertFalse((p / 'fileA\udfff').info.exists()) + self.assertFalse((p / 'fileA\udfff').info.exists(follow_symlinks=False)) + self.assertFalse((p / 'fileA\x00').info.exists()) + self.assertFalse((p / 'fileA\x00').info.exists(follow_symlinks=False)) + + def test_info_is_dir(self): + p = self.root + self.assertTrue((p / 'dirA').info.is_dir()) + self.assertTrue((p / 'dirA').info.is_dir(follow_symlinks=False)) + self.assertFalse((p / 'fileA').info.is_dir()) + self.assertFalse((p / 'fileA').info.is_dir(follow_symlinks=False)) + self.assertFalse((p / 'non-existing').info.is_dir()) + self.assertFalse((p / 'non-existing').info.is_dir(follow_symlinks=False)) + if self.ground.can_symlink: + self.assertFalse((p / 'linkA').info.is_dir()) + self.assertFalse((p / 'linkA').info.is_dir(follow_symlinks=False)) + self.assertTrue((p / 'linkB').info.is_dir()) + self.assertFalse((p / 'linkB').info.is_dir(follow_symlinks=False)) + self.assertFalse((p / 'brokenLink').info.is_dir()) + self.assertFalse((p / 'brokenLink').info.is_dir(follow_symlinks=False)) + self.assertFalse((p / 'brokenLinkLoop').info.is_dir()) + self.assertFalse((p / 'brokenLinkLoop').info.is_dir(follow_symlinks=False)) + self.assertFalse((p / 'dirA\udfff').info.is_dir()) + self.assertFalse((p / 'dirA\udfff').info.is_dir(follow_symlinks=False)) + self.assertFalse((p / 'dirA\x00').info.is_dir()) + self.assertFalse((p / 'dirA\x00').info.is_dir(follow_symlinks=False)) + + def test_info_is_file(self): + p = self.root + self.assertTrue((p / 'fileA').info.is_file()) + self.assertTrue((p / 'fileA').info.is_file(follow_symlinks=False)) + self.assertFalse((p / 'dirA').info.is_file()) + self.assertFalse((p / 'dirA').info.is_file(follow_symlinks=False)) + self.assertFalse((p / 'non-existing').info.is_file()) + self.assertFalse((p / 'non-existing').info.is_file(follow_symlinks=False)) + if self.ground.can_symlink: + self.assertTrue((p / 'linkA').info.is_file()) + self.assertFalse((p / 'linkA').info.is_file(follow_symlinks=False)) + self.assertFalse((p / 'linkB').info.is_file()) + self.assertFalse((p / 'linkB').info.is_file(follow_symlinks=False)) + self.assertFalse((p / 'brokenLink').info.is_file()) + self.assertFalse((p / 'brokenLink').info.is_file(follow_symlinks=False)) + self.assertFalse((p / 'brokenLinkLoop').info.is_file()) + self.assertFalse((p / 'brokenLinkLoop').info.is_file(follow_symlinks=False)) + self.assertFalse((p / 'fileA\udfff').info.is_file()) + self.assertFalse((p / 'fileA\udfff').info.is_file(follow_symlinks=False)) + self.assertFalse((p / 'fileA\x00').info.is_file()) + self.assertFalse((p / 'fileA\x00').info.is_file(follow_symlinks=False)) + + def test_info_is_symlink(self): + p = self.root + self.assertFalse((p / 'fileA').info.is_symlink()) + self.assertFalse((p / 'dirA').info.is_symlink()) + self.assertFalse((p / 'non-existing').info.is_symlink()) + if self.ground.can_symlink: + self.assertTrue((p / 'linkA').info.is_symlink()) + self.assertTrue((p / 'linkB').info.is_symlink()) + self.assertTrue((p / 'brokenLink').info.is_symlink()) + self.assertFalse((p / 'linkA\udfff').info.is_symlink()) + self.assertFalse((p / 'linkA\x00').info.is_symlink()) + self.assertTrue((p / 'brokenLinkLoop').info.is_symlink()) + self.assertFalse((p / 'fileA\udfff').info.is_symlink()) + self.assertFalse((p / 'fileA\x00').info.is_symlink()) + + +class ZipPathReadTest(ReadTestBase, unittest.TestCase): + ground = ZipPathGround(ReadableZipPath) + + +class LocalPathReadTest(ReadTestBase, unittest.TestCase): + ground = LocalPathGround(ReadableLocalPath) + + +if not is_pypi: + from pathlib import Path + + class PathReadTest(ReadTestBase, unittest.TestCase): + ground = LocalPathGround(Path) + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_pathlib/test_write.py b/Lib/test/test_pathlib/test_write.py new file mode 100644 index 00000000000..b958490d0a8 --- /dev/null +++ b/Lib/test/test_pathlib/test_write.py @@ -0,0 +1,141 @@ +""" +Tests for pathlib.types._WritablePath +""" + +import io +import os +import sys +import unittest + +from .support import is_pypi +from .support.local_path import WritableLocalPath, LocalPathGround +from .support.zip_path import WritableZipPath, ZipPathGround + +if is_pypi: + from pathlib_abc import _WritablePath + from pathlib_abc._os import magic_open +else: + from pathlib.types import _WritablePath + from pathlib._os import magic_open + + +class WriteTestBase: + def setUp(self): + self.root = self.ground.setup() + + def tearDown(self): + self.ground.teardown(self.root) + + def test_is_writable(self): + self.assertIsInstance(self.root, _WritablePath) + + def test_open_w(self): + p = self.root / 'fileA' + with magic_open(p, 'w', encoding='utf-8') as f: + self.assertIsInstance(f, io.TextIOBase) + f.write('this is file A\n') + self.assertEqual(self.ground.readtext(p), 'this is file A\n') + + @unittest.skipIf( + not getattr(sys.flags, 'warn_default_encoding', 0), + "Requires warn_default_encoding", + ) + def test_open_w_encoding_warning(self): + p = self.root / 'fileA' + with self.assertWarns(EncodingWarning) as wc: + with magic_open(p, 'w'): + pass + self.assertEqual(wc.filename, __file__) + + def test_open_wb(self): + p = self.root / 'fileA' + with magic_open(p, 'wb') as f: + #self.assertIsInstance(f, io.BufferedWriter) + f.write(b'this is file A\n') + self.assertEqual(self.ground.readbytes(p), b'this is file A\n') + self.assertRaises(ValueError, magic_open, p, 'wb', encoding='utf8') + self.assertRaises(ValueError, magic_open, p, 'wb', errors='strict') + self.assertRaises(ValueError, magic_open, p, 'wb', newline='') + + def test_write_bytes(self): + p = self.root / 'fileA' + p.write_bytes(b'abcdefg') + self.assertEqual(self.ground.readbytes(p), b'abcdefg') + # Check that trying to write str does not truncate the file. + self.assertRaises(TypeError, p.write_bytes, 'somestr') + self.assertEqual(self.ground.readbytes(p), b'abcdefg') + + def test_write_text(self): + p = self.root / 'fileA' + p.write_text('äbcdefg', encoding='latin-1') + self.assertEqual(self.ground.readbytes(p), b'\xe4bcdefg') + # Check that trying to write bytes does not truncate the file. + self.assertRaises(TypeError, p.write_text, b'somebytes', encoding='utf-8') + self.assertEqual(self.ground.readbytes(p), b'\xe4bcdefg') + + @unittest.skipIf( + not getattr(sys.flags, 'warn_default_encoding', 0), + "Requires warn_default_encoding", + ) + def test_write_text_encoding_warning(self): + p = self.root / 'fileA' + with self.assertWarns(EncodingWarning) as wc: + p.write_text('abcdefg') + self.assertEqual(wc.filename, __file__) + + def test_write_text_with_newlines(self): + # Check that `\n` character change nothing + p = self.root / 'fileA' + p.write_text('abcde\r\nfghlk\n\rmnopq', encoding='utf-8', newline='\n') + self.assertEqual(self.ground.readbytes(p), b'abcde\r\nfghlk\n\rmnopq') + + # Check that `\r` character replaces `\n` + p = self.root / 'fileB' + p.write_text('abcde\r\nfghlk\n\rmnopq', encoding='utf-8', newline='\r') + self.assertEqual(self.ground.readbytes(p), b'abcde\r\rfghlk\r\rmnopq') + + # Check that `\r\n` character replaces `\n` + p = self.root / 'fileC' + p.write_text('abcde\r\nfghlk\n\rmnopq', encoding='utf-8', newline='\r\n') + self.assertEqual(self.ground.readbytes(p), b'abcde\r\r\nfghlk\r\n\rmnopq') + + # Check that no argument passed will change `\n` to `os.linesep` + os_linesep_byte = bytes(os.linesep, encoding='ascii') + p = self.root / 'fileD' + p.write_text('abcde\nfghlk\n\rmnopq', encoding='utf-8') + self.assertEqual(self.ground.readbytes(p), + b'abcde' + os_linesep_byte + + b'fghlk' + os_linesep_byte + b'\rmnopq') + + def test_mkdir(self): + p = self.root / 'newdirA' + self.assertFalse(self.ground.isdir(p)) + p.mkdir() + self.assertTrue(self.ground.isdir(p)) + + def test_symlink_to(self): + if not self.ground.can_symlink: + self.skipTest('needs symlinks') + link = self.root.joinpath('linkA') + link.symlink_to('fileA') + self.assertTrue(self.ground.islink(link)) + self.assertEqual(self.ground.readlink(link), 'fileA') + + +class ZipPathWriteTest(WriteTestBase, unittest.TestCase): + ground = ZipPathGround(WritableZipPath) + + +class LocalPathWriteTest(WriteTestBase, unittest.TestCase): + ground = LocalPathGround(WritableLocalPath) + + +if not is_pypi: + from pathlib import Path + + class PathWriteTest(WriteTestBase, unittest.TestCase): + ground = LocalPathGround(Path) + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 04c8ee71a99..8b935dc7f12 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1,12 +1,11 @@ from test.support import (gc_collect, bigmemtest, _2G, cpython_only, captured_stdout, - check_disallow_instantiation, is_emscripten, is_wasi, + check_disallow_instantiation, linked_to_musl, warnings_helper, SHORT_TIMEOUT, Stopwatch, requires_resource) import locale import re import string import sys -import time import unittest import warnings from re import Scanner @@ -14,7 +13,7 @@ # some platforms lack working multiprocessing try: - import _multiprocessing + import _multiprocessing # noqa: F401 except ImportError: multiprocessing = None else: @@ -621,6 +620,7 @@ def test_re_fullmatch(self): self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4)) self.assertIsNone(re.fullmatch(r"a+", "ab")) self.assertIsNone(re.fullmatch(r"abc$", "abc\n")) + self.assertIsNone(re.fullmatch(r"abc\z", "abc\n")) self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n")) self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n")) self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4)) @@ -806,6 +806,8 @@ def test_special_escapes(self): self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd", re.ASCII).group(1), "bx") self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") + self.assertEqual(re.search(r"^\Aabc\z$", "abc", re.M).group(0), "abc") + self.assertIsNone(re.search(r"^\Aabc\z$", "\nabc\n", re.M)) self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M)) self.assertEqual(re.search(br"\b(b.)\b", @@ -817,6 +819,8 @@ def test_special_escapes(self): self.assertEqual(re.search(br"\B(b.)\B", b"abc bcd bc abxd", re.LOCALE).group(1), b"bx") self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc") + self.assertEqual(re.search(br"^\Aabc\z$", b"abc", re.M).group(0), b"abc") + self.assertIsNone(re.search(br"^\Aabc\z$", b"\nabc\n", re.M)) self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc") self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M)) self.assertEqual(re.search(r"\d\D\w\W\s\S", @@ -840,7 +844,7 @@ def test_other_escapes(self): self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^') self.assertIsNone(re.match(r"[\^a]+", 'b')) re.purge() # for warnings - for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY': + for c in 'ceghijklmopqyCEFGHIJKLMNOPQRTVXY': with self.subTest(c): self.assertRaises(re.PatternError, re.compile, '\\%c' % c) for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ': @@ -888,6 +892,8 @@ def test_named_unicode_escapes(self): self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0) self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1) + # TODO: RUSTPYTHON; re.search(r"\B", "") now returns a match in CPython 3.14 + @unittest.expectedFailure def test_word_boundaries(self): # See http://bugs.python.org/issue10713 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1), "abc") @@ -983,18 +989,15 @@ def test_word_boundaries(self): self.assertIsNone(re.fullmatch(br".+\B", b"abc", re.LOCALE)) self.assertIsNone(re.fullmatch(r".+\B", "ьюя")) self.assertTrue(re.fullmatch(r".+\B", "ьюя", re.ASCII)) - # However, an empty string contains no word boundaries, and also no - # non-boundaries. + # However, an empty string contains no word boundaries. self.assertIsNone(re.search(r"\b", "")) self.assertIsNone(re.search(r"\b", "", re.ASCII)) self.assertIsNone(re.search(br"\b", b"")) self.assertIsNone(re.search(br"\b", b"", re.LOCALE)) - # This one is questionable and different from the perlre behaviour, - # but describes current behavior. - self.assertIsNone(re.search(r"\B", "")) - self.assertIsNone(re.search(r"\B", "", re.ASCII)) - self.assertIsNone(re.search(br"\B", b"")) - self.assertIsNone(re.search(br"\B", b"", re.LOCALE)) + self.assertTrue(re.search(r"\B", "")) + self.assertTrue(re.search(r"\B", "", re.ASCII)) + self.assertTrue(re.search(br"\B", b"")) + self.assertTrue(re.search(br"\B", b"", re.LOCALE)) # A single word-character string has two boundaries, but no # non-boundary gaps. self.assertEqual(len(re.findall(r"\b", "a")), 2) @@ -1423,7 +1426,7 @@ def test_pickling(self): newpat = pickle.loads(pickled) self.assertEqual(newpat, oldpat) # current pickle expects the _compile() reconstructor in re module - from re import _compile + from re import _compile # noqa: F401 @unittest.expectedFailure # TODO: RUSTPYTHON def test_copying(self): @@ -1755,7 +1758,7 @@ def test_bug_6561(self): for x in not_decimal_digits: self.assertIsNone(re.match(r'^\d$', x)) - @unittest.expectedFailure # TODO: RUSTPYTHON a = array.array(typecode)\n ValueError: bad typecode (must be b, B, u, h, H, i, I, l, L, q, Q, f or d) + @unittest.expectedFailure # TODO: RUSTPYTHON; a = array.array(typecode)\n ValueError: bad typecode (must be b, B, u, h, H, i, I, l, L, q, Q, f or d) @warnings_helper.ignore_warnings(category=DeprecationWarning) # gh-80480 array('u') def test_empty_array(self): # SF buf 1647541 @@ -2185,10 +2188,9 @@ def test_bug_20998(self): self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3)) @unittest.expectedFailure # TODO: RUSTPYTHON; self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))\n AssertionError: None is not true - @unittest.skipIf( - is_emscripten or is_wasi, - "musl libc issue on Emscripten/WASI, bpo-46390" - ) + @unittest.skipIf(linked_to_musl(), "musl libc issue, bpo-46390") + @unittest.skipIf(sys.platform.startswith("sunos"), + "test doesn't work on Solaris, gh-91214") def test_locale_caching(self): # Issue #22410 oldlocale = locale.setlocale(locale.LC_CTYPE) @@ -2225,10 +2227,9 @@ def check_en_US_utf8(self): self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5')) self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) - @unittest.skipIf( - is_emscripten or is_wasi, - "musl libc issue on Emscripten/WASI, bpo-46390" - ) + @unittest.skipIf(linked_to_musl(), "musl libc issue, bpo-46390") + @unittest.skipIf(sys.platform.startswith("sunos"), + "test doesn't work on Solaris, gh-91214") def test_locale_compiled(self): oldlocale = locale.setlocale(locale.LC_CTYPE) self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) @@ -2632,8 +2633,8 @@ def test_findall_atomic_grouping(self): @unittest.expectedFailure # TODO: RUSTPYTHON def test_bug_gh91616(self): - self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer - self.assertTrue(re.fullmatch(r'(?s:(?=(?P.*?\.))(?P=g0).*)\Z', "a.txt")) + self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\z', "a.txt")) # reproducer + self.assertTrue(re.fullmatch(r'(?s:(?=(?P.*?\.))(?P=g0).*)\z', "a.txt")) def test_bug_gh100061(self): # gh-100061 @@ -2655,7 +2656,7 @@ def test_bug_gh100061(self): self.assertEqual(re.match("(?>(?:ab?c){1,3})", "aca").span(), (0, 2)) self.assertEqual(re.match("(?:ab?c){1,3}+", "aca").span(), (0, 2)) - @unittest.expectedFailure # TODO: RUSTPYTHON; self.assertEqual(re.match('((x)|y|z){3}+', 'xyz').groups(), ('z', 'x'))\n AssertionError: Tuples differ: ('x', 'x') != ('z', 'x') + @unittest.expectedFailure # TODO: RUSTPYTHON; self.assertEqual(re.match('((x)|y|z){3}+', 'xyz').groups(), ('z', 'x'))\n AssertionError: Tuples differ: ('x', 'x') != ('z', 'x') def test_bug_gh101955(self): # Possessive quantifier with nested alternative with capture groups self.assertEqual(re.match('((x)|y|z)*+', 'xyz').groups(), ('z', 'x')) @@ -2893,11 +2894,11 @@ def test_long_pattern(self): pattern = 'Very %spattern' % ('long ' * 1000) r = repr(re.compile(pattern)) self.assertLess(len(r), 300) - self.assertEqual(r[:30], "re.compile('Very long long lon") + self.assertStartsWith(r, "re.compile('Very long long lon") r = repr(re.compile(pattern, re.I)) self.assertLess(len(r), 300) - self.assertEqual(r[:30], "re.compile('Very long long lon") - self.assertEqual(r[-16:], ", re.IGNORECASE)") + self.assertStartsWith(r, "re.compile('Very long long lon") + self.assertEndsWith(r, ", re.IGNORECASE)") def test_flags_repr(self): self.assertEqual(repr(re.I), "re.IGNORECASE") @@ -2977,7 +2978,7 @@ def test_deprecated_modules(self): self.assertEqual(mod.__name__, name) self.assertEqual(mod.__package__, '') for attr in deprecated[name]: - self.assertTrue(hasattr(mod, attr)) + self.assertHasAttr(mod, attr) del sys.modules[name] @cpython_only diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 89cabfe0083..b4a949fc747 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -259,7 +259,6 @@ class EmptyQueryStringTest(BaseRobotTest, unittest.TestCase): good = ['/some/path?'] bad = ['/another/path?'] - @unittest.expectedFailure # TODO: RUSTPYTHON; self.assertFalse(self.parser.can_fetch(agent, url))\nAssertionError: True is not false def test_bad_urls(self): super().test_bad_urls() diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index 9798a4f59c3..71b54e286a3 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -3525,7 +3525,6 @@ def test_starttls(self): else: s.close() - @unittest.expectedFailure # TODO: RUSTPYTHON def test_socketserver(self): """Using socketserver to create and manage SSL connections.""" server = make_https_server(self, certfile=SIGNED_CERTFILE) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 7e3607842fd..ae524c5ffba 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -7,13 +7,11 @@ import email.message import io import unittest -from unittest.mock import patch from test import support from test.support import os_helper from test.support import socket_helper -from test.support import warnings_helper -from test.support.testcase import ExtraAssertions import os +import socket try: import ssl except ImportError: @@ -21,7 +19,6 @@ import sys import tempfile -from base64 import b64encode import collections @@ -36,32 +33,6 @@ def hexescape(char): hex_repr = "0%s" % hex_repr return "%" + hex_repr -# Shortcut for testing FancyURLopener -_urlopener = None - - -def urlopen(url, data=None, proxies=None): - """urlopen(url [, data]) -> open file-like object""" - global _urlopener - if proxies is not None: - opener = urllib.request.FancyURLopener(proxies=proxies) - elif not _urlopener: - opener = FancyURLopener() - _urlopener = opener - else: - opener = _urlopener - if data is None: - return opener.open(url) - else: - return opener.open(url, data) - - -def FancyURLopener(): - with warnings_helper.check_warnings( - ('FancyURLopener style of invoking requests is deprecated.', - DeprecationWarning)): - return urllib.request.FancyURLopener() - def fakehttp(fakedata, mock_close=False): class FakeSocket(io.BytesIO): @@ -120,27 +91,7 @@ def unfakehttp(self): http.client.HTTPConnection = self._connection_class -class FakeFTPMixin(object): - def fakeftp(self): - class FakeFtpWrapper(object): - def __init__(self, user, passwd, host, port, dirs, timeout=None, - persistent=True): - pass - - def retrfile(self, file, type): - return io.BytesIO(), 0 - - def close(self): - pass - - self._ftpwrapper_class = urllib.request.ftpwrapper - urllib.request.ftpwrapper = FakeFtpWrapper - - def unfakeftp(self): - urllib.request.ftpwrapper = self._ftpwrapper_class - - -class urlopen_FileTests(unittest.TestCase, ExtraAssertions): +class urlopen_FileTests(unittest.TestCase): """Test urlopen() opening a temporary file. Try to test as much functionality as possible so as to cut down on reliance @@ -159,7 +110,7 @@ def setUp(self): f.close() self.pathname = os_helper.TESTFN self.quoted_pathname = urllib.parse.quote(os.fsencode(self.pathname)) - self.returned_obj = urlopen("file:%s" % self.quoted_pathname) + self.returned_obj = urllib.request.urlopen("file:%s" % self.quoted_pathname) def tearDown(self): """Shut down the open object""" @@ -204,7 +155,7 @@ def test_headers(self): self.assertIsInstance(self.returned_obj.headers, email.message.Message) def test_url(self): - self.assertEqual(self.returned_obj.url, self.quoted_pathname) + self.assertEqual(self.returned_obj.url, "file:" + self.quoted_pathname) def test_status(self): self.assertIsNone(self.returned_obj.status) @@ -213,7 +164,7 @@ def test_info(self): self.assertIsInstance(self.returned_obj.info(), email.message.Message) def test_geturl(self): - self.assertEqual(self.returned_obj.geturl(), self.quoted_pathname) + self.assertEqual(self.returned_obj.geturl(), "file:" + self.quoted_pathname) def test_getcode(self): self.assertIsNone(self.returned_obj.getcode()) @@ -230,6 +181,16 @@ def test_iter(self): def test_relativelocalfile(self): self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname) + def test_remote_authority(self): + # Test for GH-90812. + url = 'file://pythontest.net/foo/bar' + with self.assertRaises(urllib.error.URLError) as e: + urllib.request.urlopen(url) + if os.name == 'nt': + self.assertEqual(e.exception.filename, r'\\pythontest.net\foo\bar') + else: + self.assertEqual(e.exception.reason, 'file:// scheme is supported only on localhost') + class ProxyTests(unittest.TestCase): @@ -338,13 +299,13 @@ def test_getproxies_environment_prefer_lowercase(self): self.assertEqual('http://somewhere:3128', proxies['http']) -class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): +class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin): """Test urlopen() opening a fake http connection.""" def check_read(self, ver): self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!") try: - fp = urlopen("http://python.org/") + fp = urllib.request.urlopen("http://python.org/") self.assertEqual(fp.readline(), b"Hello!") self.assertEqual(fp.readline(), b"") self.assertEqual(fp.geturl(), 'http://python.org/') @@ -365,8 +326,8 @@ def test_url_fragment(self): def test_willclose(self): self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") try: - resp = urlopen("http://www.python.org") - self.assertTrue(resp.fp.will_close) + resp = urllib.request.urlopen("http://www.python.org") + self.assertTrue(resp.will_close) finally: self.unfakehttp() @@ -391,9 +352,6 @@ def test_url_path_with_control_char_rejected(self): with self.assertRaisesRegex( InvalidURL, f"contain control.*{escaped_char_repr}"): urllib.request.urlopen(f"https:{schemeless_url}") - # This code path quotes the URL so there is no injection. - resp = urlopen(f"http:{schemeless_url}") - self.assertNotIn(char, resp.geturl()) finally: self.unfakehttp() @@ -415,11 +373,6 @@ def test_url_path_with_newline_header_injection_rejected(self): urllib.request.urlopen(f"http:{schemeless_url}") with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): urllib.request.urlopen(f"https:{schemeless_url}") - # This code path quotes the URL so there is no injection. - resp = urlopen(f"http:{schemeless_url}") - self.assertNotIn(' ', resp.geturl()) - self.assertNotIn('\r', resp.geturl()) - self.assertNotIn('\n', resp.geturl()) finally: self.unfakehttp() @@ -434,9 +387,9 @@ def test_url_host_with_control_char_rejected(self): InvalidURL = http.client.InvalidURL with self.assertRaisesRegex( InvalidURL, f"contain control.*{escaped_char_repr}"): - urlopen(f"http:{schemeless_url}") + urllib.request.urlopen(f"http:{schemeless_url}") with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"): - urlopen(f"https:{schemeless_url}") + urllib.request.urlopen(f"https:{schemeless_url}") finally: self.unfakehttp() @@ -449,9 +402,9 @@ def test_url_host_with_newline_header_injection_rejected(self): InvalidURL = http.client.InvalidURL with self.assertRaisesRegex( InvalidURL, r"contain control.*\\r"): - urlopen(f"http:{schemeless_url}") + urllib.request.urlopen(f"http:{schemeless_url}") with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): - urlopen(f"https:{schemeless_url}") + urllib.request.urlopen(f"https:{schemeless_url}") finally: self.unfakehttp() @@ -501,7 +454,7 @@ def test_invalid_redirect(self): def test_redirect_limit_independent(self): # Ticket #12923: make sure independent requests each use their # own retry limit. - for i in range(FancyURLopener().maxtries): + for i in range(urllib.request.HTTPRedirectHandler.max_redirections): self.fakehttp(b'''HTTP/1.1 302 Found Location: file://guidocomputer.athome.com:/python/license Connection: close @@ -518,89 +471,49 @@ def test_empty_socket(self): # data. (#1680230) self.fakehttp(b'') try: - self.assertRaises(OSError, urlopen, "http://something") + self.assertRaises(OSError, urllib.request.urlopen, "http://something") finally: self.unfakehttp() def test_missing_localfile(self): # Test for #10836 with self.assertRaises(urllib.error.URLError) as e: - urlopen('file://localhost/a/file/which/doesnot/exists.py') + urllib.request.urlopen('file://localhost/a/file/which/doesnot/exists.py') self.assertTrue(e.exception.filename) self.assertTrue(e.exception.reason) def test_file_notexists(self): fd, tmp_file = tempfile.mkstemp() - tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') + tmp_file_canon_url = urllib.request.pathname2url(tmp_file, add_scheme=True) + parsed = urllib.parse.urlsplit(tmp_file_canon_url) + tmp_fileurl = parsed._replace(netloc='localhost').geturl() try: self.assertTrue(os.path.exists(tmp_file)) - with urlopen(tmp_fileurl) as fobj: + with urllib.request.urlopen(tmp_fileurl) as fobj: self.assertTrue(fobj) + self.assertEqual(fobj.url, tmp_file_canon_url) finally: os.close(fd) os.unlink(tmp_file) self.assertFalse(os.path.exists(tmp_file)) with self.assertRaises(urllib.error.URLError): - urlopen(tmp_fileurl) + urllib.request.urlopen(tmp_fileurl) def test_ftp_nohost(self): test_ftp_url = 'ftp:///path' with self.assertRaises(urllib.error.URLError) as e: - urlopen(test_ftp_url) + urllib.request.urlopen(test_ftp_url) self.assertFalse(e.exception.filename) self.assertTrue(e.exception.reason) def test_ftp_nonexisting(self): with self.assertRaises(urllib.error.URLError) as e: - urlopen('ftp://localhost/a/file/which/doesnot/exists.py') + urllib.request.urlopen('ftp://localhost/a/file/which/doesnot/exists.py') self.assertFalse(e.exception.filename) self.assertTrue(e.exception.reason) - @patch.object(urllib.request, 'MAXFTPCACHE', 0) - def test_ftp_cache_pruning(self): - self.fakeftp() - try: - urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, []) - urlopen('ftp://localhost') - finally: - self.unfakeftp() - def test_userpass_inurl(self): - self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") - try: - fp = urlopen("http://user:pass@python.org/") - self.assertEqual(fp.readline(), b"Hello!") - self.assertEqual(fp.readline(), b"") - self.assertEqual(fp.geturl(), 'http://user:pass@python.org/') - self.assertEqual(fp.getcode(), 200) - finally: - self.unfakehttp() - - def test_userpass_inurl_w_spaces(self): - self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") - try: - userpass = "a b:c d" - url = "http://{}@python.org/".format(userpass) - fakehttp_wrapper = http.client.HTTPConnection - authorization = ("Authorization: Basic %s\r\n" % - b64encode(userpass.encode("ASCII")).decode("ASCII")) - fp = urlopen(url) - # The authorization header must be in place - self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8")) - self.assertEqual(fp.readline(), b"Hello!") - self.assertEqual(fp.readline(), b"") - # the spaces are quoted in URL so no match - self.assertNotEqual(fp.geturl(), url) - self.assertEqual(fp.getcode(), 200) - finally: - self.unfakehttp() - - def test_URLopener_deprecation(self): - with warnings_helper.check_warnings(('',DeprecationWarning)): - urllib.request.URLopener() - - -class urlopen_DataTests(unittest.TestCase, ExtraAssertions): +class urlopen_DataTests(unittest.TestCase): """Test urlopen() opening a data URL.""" def setUp(self): @@ -713,7 +626,7 @@ def tearDown(self): def constructLocalFileUrl(self, filePath): filePath = os.path.abspath(filePath) - return "file://%s" % urllib.request.pathname2url(filePath) + return urllib.request.pathname2url(filePath, add_scheme=True) def createNewTempFile(self, data=b""): """Creates a new temporary file containing the specified data, @@ -1518,6 +1431,32 @@ def test_quoting(self): "url2pathname() failed; %s != %s" % (expect, result)) + def test_pathname2url(self): + # Test cases common to Windows and POSIX. + fn = urllib.request.pathname2url + sep = os.path.sep + self.assertEqual(fn(''), '') + self.assertEqual(fn(sep), '///') + self.assertEqual(fn('a'), 'a') + self.assertEqual(fn(f'a{sep}b.c'), 'a/b.c') + self.assertEqual(fn(f'{sep}a{sep}b.c'), '///a/b.c') + self.assertEqual(fn(f'{sep}a{sep}b%#c'), '///a/b%25%23c') + + def test_pathname2url_add_scheme(self): + sep = os.path.sep + subtests = [ + ('', 'file:'), + (sep, 'file:///'), + ('a', 'file:a'), + (f'a{sep}b.c', 'file:a/b.c'), + (f'{sep}a{sep}b.c', 'file:///a/b.c'), + (f'{sep}a{sep}b%#c', 'file:///a/b%25%23c'), + ] + for path, expected_url in subtests: + with self.subTest(path=path): + self.assertEqual( + urllib.request.pathname2url(path, add_scheme=True), expected_url) + @unittest.skipUnless(sys.platform == 'win32', 'test specific to Windows pathnames.') def test_pathname2url_win(self): @@ -1527,16 +1466,18 @@ def test_pathname2url_win(self): self.assertEqual(fn('\\\\?\\unc\\server\\share\\dir'), '//server/share/dir') self.assertEqual(fn("C:"), '///C:') self.assertEqual(fn("C:\\"), '///C:/') + self.assertEqual(fn('c:\\a\\b.c'), '///c:/a/b.c') self.assertEqual(fn('C:\\a\\b.c'), '///C:/a/b.c') self.assertEqual(fn('C:\\a\\b.c\\'), '///C:/a/b.c/') self.assertEqual(fn('C:\\a\\\\b.c'), '///C:/a//b.c') self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c') self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9') self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), "///C:/foo/bar/spam.foo") - # Long drive letter - self.assertRaises(IOError, fn, "XX:\\") + # NTFS alternate data streams + self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar') + self.assertEqual(fn('foo:bar'), 'foo%3Abar') # No drive letter - self.assertEqual(fn("\\folder\\test\\"), '/folder/test/') + self.assertEqual(fn("\\folder\\test\\"), '///folder/test/') self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/') self.assertEqual(fn("\\\\\\folder\\test\\"), '///folder/test/') self.assertEqual(fn('\\\\some\\share\\'), '//some/share/') @@ -1549,7 +1490,7 @@ def test_pathname2url_win(self): self.assertEqual(fn('//?/unc/server/share/dir'), '//server/share/dir') # Round-tripping urls = ['///C:', - '/folder/test/', + '///folder/test/', '///C:/foo/bar/spam.foo'] for url in urls: self.assertEqual(fn(urllib.request.url2pathname(url)), url) @@ -1558,12 +1499,9 @@ def test_pathname2url_win(self): 'test specific to POSIX pathnames') def test_pathname2url_posix(self): fn = urllib.request.pathname2url - self.assertEqual(fn('/'), '/') - self.assertEqual(fn('/a/b.c'), '/a/b.c') self.assertEqual(fn('//a/b.c'), '////a/b.c') self.assertEqual(fn('///a/b.c'), '/////a/b.c') self.assertEqual(fn('////a/b.c'), '//////a/b.c') - self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c') @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') def test_pathname2url_nonascii(self): @@ -1572,11 +1510,90 @@ def test_pathname2url_nonascii(self): url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors) self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url) + def test_url2pathname(self): + # Test cases common to Windows and POSIX. + fn = urllib.request.url2pathname + sep = os.path.sep + self.assertEqual(fn(''), '') + self.assertEqual(fn('/'), f'{sep}') + self.assertEqual(fn('///'), f'{sep}') + self.assertEqual(fn('////'), f'{sep}{sep}') + self.assertEqual(fn('foo'), 'foo') + self.assertEqual(fn('foo/bar'), f'foo{sep}bar') + self.assertEqual(fn('/foo/bar'), f'{sep}foo{sep}bar') + self.assertEqual(fn('//localhost/foo/bar'), f'{sep}foo{sep}bar') + self.assertEqual(fn('///foo/bar'), f'{sep}foo{sep}bar') + self.assertEqual(fn('////foo/bar'), f'{sep}{sep}foo{sep}bar') + self.assertEqual(fn('data:blah'), 'data:blah') + self.assertEqual(fn('data://blah'), f'data:{sep}{sep}blah') + self.assertEqual(fn('foo?bar'), 'foo') + self.assertEqual(fn('foo#bar'), 'foo') + self.assertEqual(fn('foo?bar=baz'), 'foo') + self.assertEqual(fn('foo?bar#baz'), 'foo') + self.assertEqual(fn('foo%3Fbar'), 'foo?bar') + self.assertEqual(fn('foo%23bar'), 'foo#bar') + self.assertEqual(fn('foo%3Fbar%3Dbaz'), 'foo?bar=baz') + self.assertEqual(fn('foo%3Fbar%23baz'), 'foo?bar#baz') + + def test_url2pathname_require_scheme(self): + sep = os.path.sep + subtests = [ + ('file:', ''), + ('FILE:', ''), + ('FiLe:', ''), + ('file:/', f'{sep}'), + ('file:///', f'{sep}'), + ('file:////', f'{sep}{sep}'), + ('file:foo', 'foo'), + ('file:foo/bar', f'foo{sep}bar'), + ('file:/foo/bar', f'{sep}foo{sep}bar'), + ('file://localhost/foo/bar', f'{sep}foo{sep}bar'), + ('file:///foo/bar', f'{sep}foo{sep}bar'), + ('file:////foo/bar', f'{sep}{sep}foo{sep}bar'), + ('file:data:blah', 'data:blah'), + ('file:data://blah', f'data:{sep}{sep}blah'), + ] + for url, expected_path in subtests: + with self.subTest(url=url): + self.assertEqual( + urllib.request.url2pathname(url, require_scheme=True), + expected_path) + + def test_url2pathname_require_scheme_errors(self): + subtests = [ + '', + ':', + 'foo', + 'http:foo', + 'localfile:foo', + 'data:foo', + 'data:file:foo', + 'data:file://foo', + ] + for url in subtests: + with self.subTest(url=url): + self.assertRaises( + urllib.error.URLError, + urllib.request.url2pathname, + url, require_scheme=True) + + @unittest.skipIf(support.is_emscripten, "Fixed by https://github.com/emscripten-core/emscripten/pull/24593") + def test_url2pathname_resolve_host(self): + fn = urllib.request.url2pathname + sep = os.path.sep + self.assertEqual(fn('//127.0.0.1/foo/bar', resolve_host=True), f'{sep}foo{sep}bar') + self.assertEqual(fn(f'//{socket.gethostname()}/foo/bar'), f'{sep}foo{sep}bar') + self.assertEqual(fn(f'//{socket.gethostname()}/foo/bar', resolve_host=True), f'{sep}foo{sep}bar') + @unittest.skipUnless(sys.platform == 'win32', 'test specific to Windows pathnames.') def test_url2pathname_win(self): fn = urllib.request.url2pathname self.assertEqual(fn('/C:/'), 'C:\\') + self.assertEqual(fn('//C:'), 'C:') + self.assertEqual(fn('//C:/'), 'C:\\') + self.assertEqual(fn('//C:\\'), 'C:\\') + self.assertEqual(fn('//C:80/'), 'C:80\\') self.assertEqual(fn("///C|"), 'C:') self.assertEqual(fn("///C:"), 'C:') self.assertEqual(fn('///C:/'), 'C:\\') @@ -1586,6 +1603,7 @@ def test_url2pathname_win(self): self.assertEqual(fn("///C/test/"), '\\C\\test\\') self.assertEqual(fn("////C/test/"), '\\\\C\\test\\') # DOS drive paths + self.assertEqual(fn('c:/path/to/file'), 'c:\\path\\to\\file') self.assertEqual(fn('C:/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('C:/path/to/file/'), 'C:\\path\\to\\file\\') self.assertEqual(fn('C:/path/to//file'), 'C:\\path\\to\\\\file') @@ -1593,12 +1611,15 @@ def test_url2pathname_win(self): self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn("///C|/foo/bar/spam.foo"), 'C:\\foo\\bar\\spam.foo') - # Non-ASCII drive letter - self.assertRaises(IOError, fn, "///\u00e8|/") + # Colons in URI + self.assertEqual(fn('///\u00e8|/'), '\u00e8:\\') + self.assertEqual(fn('//host/share/spam.txt:eggs'), '\\\\host\\share\\spam.txt:eggs') + self.assertEqual(fn('///c:/spam.txt:eggs'), 'c:\\spam.txt:eggs') # UNC paths self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file') self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file') self.assertEqual(fn('/////server/path/to/file'), '\\\\server\\path\\to\\file') + self.assertEqual(fn('//127.0.0.1/path/to/file'), '\\\\127.0.0.1\\path\\to\\file') # Localhost paths self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file') @@ -1618,11 +1639,12 @@ def test_url2pathname_win(self): 'test specific to POSIX pathnames') def test_url2pathname_posix(self): fn = urllib.request.url2pathname - self.assertEqual(fn('/foo/bar'), '/foo/bar') - self.assertEqual(fn('//foo/bar'), '//foo/bar') - self.assertEqual(fn('///foo/bar'), '/foo/bar') - self.assertEqual(fn('////foo/bar'), '//foo/bar') - self.assertEqual(fn('//localhost/foo/bar'), '/foo/bar') + self.assertRaises(urllib.error.URLError, fn, '//foo/bar') + self.assertRaises(urllib.error.URLError, fn, '//localhost:/foo/bar') + self.assertRaises(urllib.error.URLError, fn, '//:80/foo/bar') + self.assertRaises(urllib.error.URLError, fn, '//:/foo/bar') + self.assertRaises(urllib.error.URLError, fn, '//c:80/foo/bar') + self.assertRaises(urllib.error.URLError, fn, '//127.0.0.1/foo/bar') @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') def test_url2pathname_nonascii(self): @@ -1641,56 +1663,6 @@ def test_thishost(self): self.assertIsInstance(urllib.request.thishost(), tuple) -class URLopener_Tests(FakeHTTPMixin, unittest.TestCase): - """Testcase to test the open method of URLopener class.""" - - def test_quoted_open(self): - class DummyURLopener(urllib.request.URLopener): - def open_spam(self, url): - return url - with warnings_helper.check_warnings( - ('DummyURLopener style of invoking requests is deprecated.', - DeprecationWarning)): - self.assertEqual(DummyURLopener().open( - 'spam://example/ /'),'//example/%20/') - - # test the safe characters are not quoted by urlopen - self.assertEqual(DummyURLopener().open( - "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"), - "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/") - - @warnings_helper.ignore_warnings(category=DeprecationWarning) - def test_urlopener_retrieve_file(self): - with os_helper.temp_dir() as tmpdir: - fd, tmpfile = tempfile.mkstemp(dir=tmpdir) - os.close(fd) - fileurl = "file:" + urllib.request.pathname2url(tmpfile) - filename, _ = urllib.request.URLopener().retrieve(fileurl) - # Some buildbots have TEMP folder that uses a lowercase drive letter. - self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile)) - - @warnings_helper.ignore_warnings(category=DeprecationWarning) - def test_urlopener_retrieve_remote(self): - url = "http://www.python.org/file.txt" - self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") - self.addCleanup(self.unfakehttp) - filename, _ = urllib.request.URLopener().retrieve(url) - self.assertEqual(os.path.splitext(filename)[1], ".txt") - - @warnings_helper.ignore_warnings(category=DeprecationWarning) - def test_local_file_open(self): - # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme - class DummyURLopener(urllib.request.URLopener): - def open_local_file(self, url): - return url - for url in ('local_file://example', 'local-file://example'): - self.assertRaises(OSError, urllib.request.urlopen, url) - self.assertRaises(OSError, urllib.request.URLopener().open, url) - self.assertRaises(OSError, urllib.request.URLopener().retrieve, url) - self.assertRaises(OSError, DummyURLopener().open, url) - self.assertRaises(OSError, DummyURLopener().retrieve, url) - - class RequestTests(unittest.TestCase): """Unit tests for urllib.request.Request.""" diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 263472499d6..7d7f2fa00d3 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -3,12 +3,12 @@ from test.support import os_helper from test.support import requires_subprocess from test.support import warnings_helper -from test.support.testcase import ExtraAssertions from test import test_urllib from unittest import mock import os import io +import ftplib import socket import array import sys @@ -23,10 +23,11 @@ _proxy_bypass_winreg_override, _proxy_bypass_macosx_sysconf, AbstractDigestAuthHandler) -from urllib.parse import urlparse +from urllib.parse import urlsplit import urllib.error import http.client + support.requires_working_socket(module=True) # XXX @@ -43,10 +44,6 @@ def test___all__(self): context = {} exec('from urllib.%s import *' % module, context) del context['__builtins__'] - if module == 'request' and os.name == 'nt': - u, p = context.pop('url2pathname'), context.pop('pathname2url') - self.assertEqual(u.__module__, 'nturl2path') - self.assertEqual(p.__module__, 'nturl2path') for k, v in context.items(): self.assertEqual(v.__module__, 'urllib.%s' % module, "%r is exposed in 'urllib.%s' but defined in %r" % @@ -717,17 +714,8 @@ def test_processors(self): self.assertIsInstance(args[1], MockResponse) -def sanepathname2url(path): - urlpath = urllib.request.pathname2url(path) - if os.name == "nt" and urlpath.startswith("///"): - urlpath = urlpath[2:] - # XXX don't ask me about the mac... - return urlpath - +class HandlerTests(unittest.TestCase): -class HandlerTests(unittest.TestCase, ExtraAssertions): - - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: None != 'image/gif' def test_ftp(self): class MockFTPWrapper: def __init__(self, data): @@ -752,7 +740,6 @@ def connect_ftp(self, user, passwd, host, port, dirs, self.ftpwrapper = MockFTPWrapper(self.data) return self.ftpwrapper - import ftplib data = "rheum rhaponicum" h = NullFTPHandler(data) h.parent = MockOpener() @@ -793,25 +780,50 @@ def connect_ftp(self, user, passwd, host, port, dirs, self.assertEqual(int(headers["Content-length"]), len(data)) r.close() + @support.requires_resource("network") + def test_ftp_error(self): + class ErrorFTPHandler(urllib.request.FTPHandler): + def __init__(self, exception): + self._exception = exception + + def connect_ftp(self, user, passwd, host, port, dirs, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + raise self._exception + + exception = ftplib.error_perm( + "500 OOPS: cannot change directory:/nonexistent") + h = ErrorFTPHandler(exception) + urlopen = urllib.request.build_opener(h).open + try: + urlopen("ftp://www.pythontest.net/") + except urllib.error.URLError as raised: + self.assertEqual(raised.reason, + f"ftp error: {exception.args[0]}") + else: + self.fail("Did not raise ftplib exception") + def test_file(self): import email.utils h = urllib.request.FileHandler() o = h.parent = MockOpener() TESTFN = os_helper.TESTFN - urlpath = sanepathname2url(os.path.abspath(TESTFN)) towrite = b"hello, world\n" + canonurl = urllib.request.pathname2url(os.path.abspath(TESTFN), add_scheme=True) + parsed = urlsplit(canonurl) + if parsed.netloc: + raise unittest.SkipTest("non-local working directory") urls = [ - "file://localhost%s" % urlpath, - "file://%s" % urlpath, - "file://%s%s" % (socket.gethostbyname('localhost'), urlpath), + canonurl, + parsed._replace(netloc='localhost').geturl(), + parsed._replace(netloc=socket.gethostbyname('localhost')).geturl(), ] try: localaddr = socket.gethostbyname(socket.gethostname()) except socket.gaierror: localaddr = '' if localaddr: - urls.append("file://%s%s" % (localaddr, urlpath)) + urls.append(parsed._replace(netloc=localaddr).geturl()) for url in urls: f = open(TESTFN, "wb") @@ -836,10 +848,10 @@ def test_file(self): self.assertEqual(headers["Content-type"], "text/plain") self.assertEqual(headers["Content-length"], "13") self.assertEqual(headers["Last-modified"], modified) - self.assertEqual(respurl, url) + self.assertEqual(respurl, canonurl) for url in [ - "file://localhost:80%s" % urlpath, + parsed._replace(netloc='localhost:80').geturl(), "file:///file_does_not_exist.txt", "file://not-a-local-host.com//dir/file.txt", "file://%s:80%s/%s" % (socket.gethostbyname('localhost'), @@ -1137,13 +1149,13 @@ def test_full_url_setter(self): r = Request('http://example.com') for url in urls: r.full_url = url - parsed = urlparse(url) + parsed = urlsplit(url) self.assertEqual(r.get_full_url(), url) # full_url setter uses splittag to split into components. # splittag sets the fragment as None while urlparse sets it to '' self.assertEqual(r.fragment or '', parsed.fragment) - self.assertEqual(urlparse(r.get_full_url()).query, parsed.query) + self.assertEqual(urlsplit(r.get_full_url()).query, parsed.query) def test_full_url_deleter(self): r = Request('http://www.example.com') @@ -1835,7 +1847,7 @@ def test_invalid_closed(self): self.assertTrue(conn.fakesock.closed, "Connection not closed") -class MiscTests(unittest.TestCase, ExtraAssertions): +class MiscTests(unittest.TestCase): def opener_has_handler(self, opener, handler_class): self.assertTrue(any(h.__class__ == handler_class @@ -1955,10 +1967,38 @@ def test_parse_proxy(self): self.assertRaises(ValueError, _parse_proxy, 'file:/ftp.example.com'), - def test_unsupported_algorithm(self): - handler = AbstractDigestAuthHandler() + +skip_libssl_fips_mode = unittest.skipIf( + support.is_libssl_fips_mode(), + "conservative skip due to OpenSSL FIPS mode possible algorithm nerfing", +) + + +class TestDigestAuthAlgorithms(unittest.TestCase): + def setUp(self): + self.handler = AbstractDigestAuthHandler() + + @skip_libssl_fips_mode + def test_md5_algorithm(self): + H, KD = self.handler.get_algorithm_impls('MD5') + self.assertEqual(H("foo"), "acbd18db4cc2f85cedef654fccc4a4d8") + self.assertEqual(KD("foo", "bar"), "4e99e8c12de7e01535248d2bac85e732") + + @skip_libssl_fips_mode + def test_sha_algorithm(self): + H, KD = self.handler.get_algorithm_impls('SHA') + self.assertEqual(H("foo"), "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33") + self.assertEqual(KD("foo", "bar"), "54dcbe67d21d5eb39493d46d89ae1f412d3bd6de") + + @skip_libssl_fips_mode + def test_sha256_algorithm(self): + H, KD = self.handler.get_algorithm_impls('SHA-256') + self.assertEqual(H("foo"), "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae") + self.assertEqual(KD("foo", "bar"), "a765a8beaa9d561d4c5cbed29d8f4e30870297fdfa9cb7d6e9848a95fec9f937") + + def test_invalid_algorithm(self): with self.assertRaises(ValueError) as exc: - handler.get_algorithm_impls('invalid') + self.handler.get_algorithm_impls('invalid') self.assertEqual( str(exc.exception), "Unsupported digest authentication algorithm 'invalid'" diff --git a/Lib/test/test_urllib2_localnet.py b/Lib/test/test_urllib2_localnet.py index 9a899785116..982a0a187ef 100644 --- a/Lib/test/test_urllib2_localnet.py +++ b/Lib/test/test_urllib2_localnet.py @@ -11,7 +11,6 @@ from test import support from test.support import hashlib_helper from test.support import threading_helper -from test.support.testcase import ExtraAssertions try: import ssl @@ -447,7 +446,7 @@ def log_message(self, *args): return FakeHTTPRequestHandler -class TestUrlopen(unittest.TestCase, ExtraAssertions): +class TestUrlopen(unittest.TestCase): """Tests urllib.request.urlopen using the network. These tests are not exhaustive. Assuming that testing using files does a @@ -686,7 +685,6 @@ def test_issue16464(self): self.assertEqual(b"1234567890", request.data) self.assertEqual("10", request.get_header("Content-length")) - def setUpModule(): thread_info = threading_helper.threading_setup() unittest.addModuleCleanup(threading_helper.threading_cleanup, *thread_info) diff --git a/Lib/test/test_urllib2net.py b/Lib/test/test_urllib2net.py index 41f170a6ad5..d015267cefd 100644 --- a/Lib/test/test_urllib2net.py +++ b/Lib/test/test_urllib2net.py @@ -7,7 +7,6 @@ from test.support import os_helper from test.support import socket_helper from test.support import ResourceDenied -from test.test_urllib2 import sanepathname2url from test.support.warnings_helper import check_no_resource_warning import os @@ -192,7 +191,7 @@ def test_file(self): f.write('hi there\n') f.close() urls = [ - 'file:' + sanepathname2url(os.path.abspath(TESTFN)), + urllib.request.pathname2url(os.path.abspath(TESTFN), add_scheme=True), ('file:///nonsensename/etc/passwd', None, urllib.error.URLError), ] diff --git a/Lib/test/test_urllibnet.py b/Lib/test/test_urllibnet.py index 6733fe9c6ea..1a42c35dc49 100644 --- a/Lib/test/test_urllibnet.py +++ b/Lib/test/test_urllibnet.py @@ -2,10 +2,10 @@ from test import support from test.support import os_helper from test.support import socket_helper -from test.support.testcase import ExtraAssertions import contextlib import socket +import urllib.error import urllib.parse import urllib.request import os @@ -35,7 +35,7 @@ def testURLread(self): f.read() -class urlopenNetworkTests(unittest.TestCase, ExtraAssertions): +class urlopenNetworkTests(unittest.TestCase): """Tests urllib.request.urlopen using the network. These tests are not exhaustive. Assuming that testing using files does a @@ -101,13 +101,11 @@ def test_getcode(self): # test getcode() with the fancy opener to get 404 error codes URL = self.url + "XXXinvalidXXX" with socket_helper.transient_internet(URL): - with self.assertWarns(DeprecationWarning): - open_url = urllib.request.FancyURLopener().open(URL) - try: - code = open_url.getcode() - finally: - open_url.close() - self.assertEqual(code, 404) + with self.assertRaises(urllib.error.URLError) as e: + with urllib.request.urlopen(URL): + pass + self.assertEqual(e.exception.code, 404) + e.exception.close() @support.requires_resource('walltime') def test_bad_address(self): diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index af6fe99fb51..06423ad2682 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1043,6 +1043,8 @@ def test_telurl_params(self): self.assertEqual(p1.path, '863-1234') self.assertEqual(p1.params, 'phone-context=+1-914-555') + # TODO: RUSTPYTHON; urllib.parse.Quoter has removed in Python 3.14. + @unittest.expectedFailure def test_Quoter_repr(self): quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE) self.assertIn('Quoter', repr(quoter)) diff --git a/Lib/test/test_warnings/__init__.py b/Lib/test/test_warnings/__init__.py index abdf7b32df2..87632821a8e 100644 --- a/Lib/test/test_warnings/__init__.py +++ b/Lib/test/test_warnings/__init__.py @@ -241,7 +241,6 @@ def test_once(self): 42) self.assertEqual(len(w), 0) - @unittest.expectedFailure # TODO: RUSTPYTHON re.PatternError: bad escape \z at position 15 def test_filter_module(self): MS_WINDOWS = (sys.platform == 'win32') with self.module.catch_warnings(record=True) as w: diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index c72138a33ca..67d9bbea0d3 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -247,11 +247,11 @@ def _hostinfo(self): return hostname, port -_DefragResultBase = namedtuple('DefragResult', 'url fragment') +_DefragResultBase = namedtuple('_DefragResultBase', 'url fragment') _SplitResultBase = namedtuple( - 'SplitResult', 'scheme netloc path query fragment') + '_SplitResultBase', 'scheme netloc path query fragment') _ParseResultBase = namedtuple( - 'ParseResult', 'scheme netloc path params query fragment') + '_ParseResultBase', 'scheme netloc path params query fragment') _DefragResultBase.__doc__ = """ DefragResult(url, fragment) @@ -392,20 +392,23 @@ def urlparse(url, scheme='', allow_fragments=True): Note that % escapes are not expanded. """ url, scheme, _coerce_result = _coerce_args(url, scheme) - splitresult = urlsplit(url, scheme, allow_fragments) - scheme, netloc, url, query, fragment = splitresult - if scheme in uses_params and ';' in url: - url, params = _splitparams(url) - else: - params = '' - result = ParseResult(scheme, netloc, url, params, query, fragment) + scheme, netloc, url, params, query, fragment = _urlparse(url, scheme, allow_fragments) + result = ParseResult(scheme or '', netloc or '', url, params or '', query or '', fragment or '') return _coerce_result(result) -def _splitparams(url): +def _urlparse(url, scheme=None, allow_fragments=True): + scheme, netloc, url, query, fragment = _urlsplit(url, scheme, allow_fragments) + if (scheme or '') in uses_params and ';' in url: + url, params = _splitparams(url, allow_none=True) + else: + params = None + return (scheme, netloc, url, params, query, fragment) + +def _splitparams(url, allow_none=False): if '/' in url: i = url.find(';', url.rfind('/')) if i < 0: - return url, '' + return url, None if allow_none else '' else: i = url.find(';') return url[:i], url[i+1:] @@ -457,7 +460,7 @@ def _check_bracketed_netloc(netloc): # https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/ def _check_bracketed_host(hostname): if hostname.startswith('v'): - if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", hostname): + if not re.match(r"\Av[a-fA-F0-9]+\..+\z", hostname): raise ValueError(f"IPvFuture address is invalid") else: ip = ipaddress.ip_address(hostname) # Throws Value Error if not IPv6 or IPv4 @@ -489,17 +492,23 @@ def urlsplit(url, scheme='', allow_fragments=True): """ url, scheme, _coerce_result = _coerce_args(url, scheme) + scheme, netloc, url, query, fragment = _urlsplit(url, scheme, allow_fragments) + v = SplitResult(scheme or '', netloc or '', url, query or '', fragment or '') + return _coerce_result(v) + +def _urlsplit(url, scheme=None, allow_fragments=True): # Only lstrip url as some applications rely on preserving trailing space. # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both) url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE) - scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE) - for b in _UNSAFE_URL_BYTES_TO_REMOVE: url = url.replace(b, "") - scheme = scheme.replace(b, "") + if scheme is not None: + scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE) + for b in _UNSAFE_URL_BYTES_TO_REMOVE: + scheme = scheme.replace(b, "") allow_fragments = bool(allow_fragments) - netloc = query = fragment = '' + netloc = query = fragment = None i = url.find(':') if i > 0 and url[0].isascii() and url[0].isalpha(): for c in url[:i]: @@ -519,8 +528,7 @@ def urlsplit(url, scheme='', allow_fragments=True): if '?' in url: url, query = url.split('?', 1) _checknetloc(netloc) - v = SplitResult(scheme, netloc, url, query, fragment) - return _coerce_result(v) + return (scheme, netloc, url, query, fragment) def urlunparse(components): """Put a parsed URL back together again. This may result in a @@ -529,9 +537,15 @@ def urlunparse(components): (the draft states that these are equivalent).""" scheme, netloc, url, params, query, fragment, _coerce_result = ( _coerce_args(*components)) + if not netloc: + if scheme and scheme in uses_netloc and (not url or url[:1] == '/'): + netloc = '' + else: + netloc = None if params: url = "%s;%s" % (url, params) - return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) + return _coerce_result(_urlunsplit(scheme or None, netloc, url, + query or None, fragment or None)) def urlunsplit(components): """Combine the elements of a tuple as returned by urlsplit() into a @@ -541,20 +555,27 @@ def urlunsplit(components): empty query; the RFC states that these are equivalent).""" scheme, netloc, url, query, fragment, _coerce_result = ( _coerce_args(*components)) - if netloc: + if not netloc: + if scheme and scheme in uses_netloc and (not url or url[:1] == '/'): + netloc = '' + else: + netloc = None + return _coerce_result(_urlunsplit(scheme or None, netloc, url, + query or None, fragment or None)) + +def _urlunsplit(scheme, netloc, url, query, fragment): + if netloc is not None: if url and url[:1] != '/': url = '/' + url url = '//' + netloc + url elif url[:2] == '//': url = '//' + url - elif scheme and scheme in uses_netloc and (not url or url[:1] == '/'): - url = '//' + url if scheme: url = scheme + ':' + url - if query: + if query is not None: url = url + '?' + query - if fragment: + if fragment is not None: url = url + '#' + fragment - return _coerce_result(url) + return url def urljoin(base, url, allow_fragments=True): """Join a base URL and a possibly relative URL to form an absolute @@ -565,26 +586,29 @@ def urljoin(base, url, allow_fragments=True): return base base, url, _coerce_result = _coerce_args(base, url) - bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ - urlparse(base, '', allow_fragments) - scheme, netloc, path, params, query, fragment = \ - urlparse(url, bscheme, allow_fragments) - - if scheme != bscheme or scheme not in uses_relative: + bscheme, bnetloc, bpath, bquery, bfragment = \ + _urlsplit(base, None, allow_fragments) + scheme, netloc, path, query, fragment = \ + _urlsplit(url, None, allow_fragments) + + if scheme is None: + scheme = bscheme + if scheme != bscheme or (scheme and scheme not in uses_relative): return _coerce_result(url) - if scheme in uses_netloc: + if not scheme or scheme in uses_netloc: if netloc: - return _coerce_result(urlunparse((scheme, netloc, path, - params, query, fragment))) + return _coerce_result(_urlunsplit(scheme, netloc, path, + query, fragment)) netloc = bnetloc - if not path and not params: + if not path: path = bpath - params = bparams - if not query: + if query is None: query = bquery - return _coerce_result(urlunparse((scheme, netloc, path, - params, query, fragment))) + if fragment is None: + fragment = bfragment + return _coerce_result(_urlunsplit(scheme, netloc, path, + query, fragment)) base_parts = bpath.split('/') if base_parts[-1] != '': @@ -621,8 +645,8 @@ def urljoin(base, url, allow_fragments=True): # then we need to append the trailing '/' resolved_path.append('') - return _coerce_result(urlunparse((scheme, netloc, '/'.join( - resolved_path) or '/', params, query, fragment))) + return _coerce_result(_urlunsplit(scheme, netloc, '/'.join( + resolved_path) or '/', query, fragment)) def urldefrag(url): @@ -634,12 +658,12 @@ def urldefrag(url): """ url, _coerce_result = _coerce_args(url) if '#' in url: - s, n, p, a, q, frag = urlparse(url) - defrag = urlunparse((s, n, p, a, q, '')) + s, n, p, q, frag = _urlsplit(url) + defrag = _urlunsplit(s, n, p, q, None) else: frag = '' defrag = url - return _coerce_result(DefragResult(defrag, frag)) + return _coerce_result(DefragResult(defrag, frag or '')) _hexdig = '0123456789ABCDEFabcdef' _hextobyte = None @@ -745,7 +769,8 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, parsed_result = {} pairs = parse_qsl(qs, keep_blank_values, strict_parsing, encoding=encoding, errors=errors, - max_num_fields=max_num_fields, separator=separator) + max_num_fields=max_num_fields, separator=separator, + _stacklevel=2) for name, value in pairs: if name in parsed_result: parsed_result[name].append(value) @@ -755,7 +780,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): + encoding='utf-8', errors='replace', max_num_fields=None, separator='&', *, _stacklevel=1): """Parse a query given as a string argument. Arguments: @@ -783,7 +808,6 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, Returns a list, as G-d intended. """ - if not separator or not isinstance(separator, (str, bytes)): raise ValueError("Separator must be of type string or bytes.") if isinstance(qs, str): @@ -792,12 +816,21 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, eq = '=' def _unquote(s): return unquote_plus(s, encoding=encoding, errors=errors) + elif qs is None: + return [] else: - if not qs: - return [] - # Use memoryview() to reject integers and iterables, - # acceptable by the bytes constructor. - qs = bytes(memoryview(qs)) + try: + # Use memoryview() to reject integers and iterables, + # acceptable by the bytes constructor. + qs = bytes(memoryview(qs)) + except TypeError: + if not qs: + warnings.warn(f"Accepting {type(qs).__name__} objects with " + f"false value in urllib.parse.parse_qsl() is " + f"deprecated as of 3.14", + DeprecationWarning, stacklevel=_stacklevel + 1) + return [] + raise if isinstance(separator, str): separator = bytes(separator, 'ascii') eq = b'=' @@ -842,14 +875,6 @@ def unquote_plus(string, encoding='utf-8', errors='replace'): b'_.-~') _ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) -def __getattr__(name): - if name == 'Quoter': - warnings.warn('Deprecated in 3.11. ' - 'urllib.parse.Quoter will be removed in Python 3.14. ' - 'It was not intended to be a public API.', - DeprecationWarning, stacklevel=2) - return _Quoter - raise AttributeError(f'module {__name__!r} has no attribute {name!r}') class _Quoter(dict): """A mapping from bytes numbers (in range(0,256)) to strings. diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 21d76913feb..566b8087aec 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -83,6 +83,7 @@ import base64 import bisect +import contextlib import email import hashlib import http.client @@ -94,21 +95,19 @@ import sys import time import tempfile -import contextlib -import warnings from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( urlparse, urlsplit, urljoin, unwrap, quote, unquote, _splittype, _splithost, _splitport, _splituser, _splitpasswd, - _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes, + _splitattr, _splitvalue, _splittag, unquote_to_bytes, urlunparse) from urllib.response import addinfourl, addclosehook # check for SSL try: - import ssl + import ssl # noqa: F401 except ImportError: _have_ssl = False else: @@ -128,7 +127,7 @@ 'urlopen', 'install_opener', 'build_opener', 'pathname2url', 'url2pathname', 'getproxies', # Legacy interface - 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener', + 'urlretrieve', 'urlcleanup', ] # used in User-Agent header sent @@ -165,8 +164,7 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, the reason phrase returned by the server --- instead of the response headers as it is specified in the documentation for HTTPResponse. - For FTP, file, and data URLs and requests explicitly handled by legacy - URLopener and FancyURLopener classes, this function returns a + For FTP, file, and data URLs, this function returns a urllib.response.addinfourl object. Note that None may be returned if no handler handles the request (though @@ -940,6 +938,7 @@ def _parse_realm(self, header): for mo in AbstractBasicAuthHandler.rx.finditer(header): scheme, quote, realm = mo.groups() if quote not in ['"', "'"]: + import warnings warnings.warn("Basic Auth Realm was unquoted", UserWarning, 3) @@ -1049,7 +1048,7 @@ def http_error_407(self, req, fp, code, msg, headers): class AbstractDigestAuthHandler: - # Digest authentication is specified in RFC 2617. + # Digest authentication is specified in RFC 2617/7616. # XXX The client does not inspect the Authentication-Info header # in a successful response. @@ -1177,11 +1176,14 @@ def get_authorization(self, req, chal): return base def get_algorithm_impls(self, algorithm): + # algorithm names taken from RFC 7616 Section 6.1 # lambdas assume digest modules are imported at the top level if algorithm == 'MD5': H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest() - elif algorithm == 'SHA': + elif algorithm == 'SHA': # non-standard, retained for compatibility. H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest() + elif algorithm == 'SHA-256': + H = lambda x: hashlib.sha256(x.encode("ascii")).hexdigest() # XXX MD5-sess else: raise ValueError("Unsupported digest authentication " @@ -1448,16 +1450,6 @@ def parse_http_list(s): return [part.strip() for part in res] class FileHandler(BaseHandler): - # Use local file or FTP depending on form of URL - def file_open(self, req): - url = req.selector - if url[:2] == '//' and url[2:3] != '/' and (req.host and - req.host != 'localhost'): - if not req.host in self.get_names(): - raise URLError("file:// scheme is supported only on localhost") - else: - return self.open_local_file(req) - # names for the localhost names = None def get_names(self): @@ -1474,35 +1466,41 @@ def get_names(self): def open_local_file(self, req): import email.utils import mimetypes - host = req.host - filename = req.selector - localfile = url2pathname(filename) + localfile = url2pathname(req.full_url, require_scheme=True, resolve_host=True) try: stats = os.stat(localfile) size = stats.st_size modified = email.utils.formatdate(stats.st_mtime, usegmt=True) - mtype = mimetypes.guess_type(filename)[0] + mtype = mimetypes.guess_file_type(localfile)[0] headers = email.message_from_string( 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified)) - if host: - host, port = _splitport(host) - if not host or \ - (not port and _safe_gethostbyname(host) in self.get_names()): - if host: - origurl = 'file://' + host + filename - else: - origurl = 'file://' + filename - return addinfourl(open(localfile, 'rb'), headers, origurl) + origurl = pathname2url(localfile, add_scheme=True) + return addinfourl(open(localfile, 'rb'), headers, origurl) except OSError as exp: - raise URLError(exp) - raise URLError('file not on local host') + raise URLError(exp, exp.filename) -def _safe_gethostbyname(host): + file_open = open_local_file + +def _is_local_authority(authority, resolve): + # Compare hostnames + if not authority or authority == 'localhost': + return True try: - return socket.gethostbyname(host) - except socket.gaierror: - return None + hostname = socket.gethostname() + except (socket.gaierror, AttributeError): + pass + else: + if authority == hostname: + return True + # Compare IP addresses + if not resolve: + return False + try: + address = socket.gethostbyname(authority) + except (socket.gaierror, AttributeError, UnicodeEncodeError): + return False + return address in FileHandler().get_names() class FTPHandler(BaseHandler): def ftp_open(self, req): @@ -1559,7 +1557,7 @@ def ftp_open(self, req): if fw is not None and not fw.keepalive: fw.close() if isinstance(exp, ftplib.all_errors): - raise URLError(exp) from exp + raise URLError(f"ftp error: {exp}") from exp raise def connect_ftp(self, user, passwd, host, port, dirs, timeout): @@ -1651,710 +1649,80 @@ def data_open(self, req): return addinfourl(io.BytesIO(data), headers, url) -# Code move from the old urllib module - -MAXFTPCACHE = 10 # Trim the ftp cache beyond this size - -# Helper for non-unix systems -if os.name == 'nt': - from nturl2path import url2pathname, pathname2url -else: - def url2pathname(pathname): - """OS-specific conversion from a relative URL of the 'file' scheme - to a file system path; not recommended for general use.""" - if pathname[:3] == '///': - # URL has an empty authority section, so the path begins on the - # third character. - pathname = pathname[2:] - elif pathname[:12] == '//localhost/': - # Skip past 'localhost' authority. - pathname = pathname[11:] - encoding = sys.getfilesystemencoding() - errors = sys.getfilesystemencodeerrors() - return unquote(pathname, encoding=encoding, errors=errors) - - def pathname2url(pathname): - """OS-specific conversion from a file system path to a relative URL - of the 'file' scheme; not recommended for general use.""" - if pathname[:2] == '//': - # Add explicitly empty authority to avoid interpreting the path - # as authority. - pathname = '//' + pathname - encoding = sys.getfilesystemencoding() - errors = sys.getfilesystemencodeerrors() - return quote(pathname, encoding=encoding, errors=errors) - - -ftpcache = {} - - -class URLopener: - """Class to open URLs. - This is a class rather than just a subroutine because we may need - more than one set of global protocol-specific options. - Note -- this is a base class for those who don't want the - automatic handling of errors type 302 (relocated) and 401 - (authorization needed).""" - - __tempfiles = None - - version = "Python-urllib/%s" % __version__ - - # Constructor - def __init__(self, proxies=None, **x509): - msg = "%(class)s style of invoking requests is deprecated. " \ - "Use newer urlopen functions/methods" % {'class': self.__class__.__name__} - warnings.warn(msg, DeprecationWarning, stacklevel=3) - if proxies is None: - proxies = getproxies() - assert hasattr(proxies, 'keys'), "proxies must be a mapping" - self.proxies = proxies - self.key_file = x509.get('key_file') - self.cert_file = x509.get('cert_file') - self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')] - self.__tempfiles = [] - self.__unlink = os.unlink # See cleanup() - self.tempcache = None - # Undocumented feature: if you assign {} to tempcache, - # it is used to cache files retrieved with - # self.retrieve(). This is not enabled by default - # since it does not work for changing documents (and I - # haven't got the logic to check expiration headers - # yet). - self.ftpcache = ftpcache - # Undocumented feature: you can use a different - # ftp cache by assigning to the .ftpcache member; - # in case you want logically independent URL openers - # XXX This is not threadsafe. Bah. - - def __del__(self): - self.close() - - def close(self): - self.cleanup() - - def cleanup(self): - # This code sometimes runs when the rest of this module - # has already been deleted, so it can't use any globals - # or import anything. - if self.__tempfiles: - for file in self.__tempfiles: - try: - self.__unlink(file) - except OSError: - pass - del self.__tempfiles[:] - if self.tempcache: - self.tempcache.clear() - - def addheader(self, *args): - """Add a header to be used by the HTTP interface only - e.g. u.addheader('Accept', 'sound/basic')""" - self.addheaders.append(args) - - # External interface - def open(self, fullurl, data=None): - """Use URLopener().open(file) instead of open(file, 'r').""" - fullurl = unwrap(_to_bytes(fullurl)) - fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") - if self.tempcache and fullurl in self.tempcache: - filename, headers = self.tempcache[fullurl] - fp = open(filename, 'rb') - return addinfourl(fp, headers, fullurl) - urltype, url = _splittype(fullurl) - if not urltype: - urltype = 'file' - if urltype in self.proxies: - proxy = self.proxies[urltype] - urltype, proxyhost = _splittype(proxy) - host, selector = _splithost(proxyhost) - url = (host, fullurl) # Signal special case to open_*() - else: - proxy = None - name = 'open_' + urltype - self.type = urltype - name = name.replace('-', '_') - if not hasattr(self, name) or name == 'open_local_file': - if proxy: - return self.open_unknown_proxy(proxy, fullurl, data) - else: - return self.open_unknown(fullurl, data) - try: - if data is None: - return getattr(self, name)(url) - else: - return getattr(self, name)(url, data) - except (HTTPError, URLError): - raise - except OSError as msg: - raise OSError('socket error', msg) from msg - - def open_unknown(self, fullurl, data=None): - """Overridable interface to open unknown URL type.""" - type, url = _splittype(fullurl) - raise OSError('url error', 'unknown url type', type) - - def open_unknown_proxy(self, proxy, fullurl, data=None): - """Overridable interface to open unknown URL type.""" - type, url = _splittype(fullurl) - raise OSError('url error', 'invalid proxy for %s' % type, proxy) - - # External interface - def retrieve(self, url, filename=None, reporthook=None, data=None): - """retrieve(url) returns (filename, headers) for a local object - or (tempfilename, headers) for a remote object.""" - url = unwrap(_to_bytes(url)) - if self.tempcache and url in self.tempcache: - return self.tempcache[url] - type, url1 = _splittype(url) - if filename is None and (not type or type == 'file'): - try: - fp = self.open_local_file(url1) - hdrs = fp.info() - fp.close() - return url2pathname(_splithost(url1)[1]), hdrs - except OSError: - pass - fp = self.open(url, data) - try: - headers = fp.info() - if filename: - tfp = open(filename, 'wb') - else: - garbage, path = _splittype(url) - garbage, path = _splithost(path or "") - path, garbage = _splitquery(path or "") - path, garbage = _splitattr(path or "") - suffix = os.path.splitext(path)[1] - (fd, filename) = tempfile.mkstemp(suffix) - self.__tempfiles.append(filename) - tfp = os.fdopen(fd, 'wb') - try: - result = filename, headers - if self.tempcache is not None: - self.tempcache[url] = result - bs = 1024*8 - size = -1 - read = 0 - blocknum = 0 - if "content-length" in headers: - size = int(headers["Content-Length"]) - if reporthook: - reporthook(blocknum, bs, size) - while block := fp.read(bs): - read += len(block) - tfp.write(block) - blocknum += 1 - if reporthook: - reporthook(blocknum, bs, size) - finally: - tfp.close() - finally: - fp.close() - - # raise exception if actual size does not match content-length header - if size >= 0 and read < size: - raise ContentTooShortError( - "retrieval incomplete: got only %i out of %i bytes" - % (read, size), result) - - return result - - # Each method named open_ knows how to open that type of URL - - def _open_generic_http(self, connection_factory, url, data): - """Make an HTTP connection using connection_class. - - This is an internal method that should be called from - open_http() or open_https(). - - Arguments: - - connection_factory should take a host name and return an - HTTPConnection instance. - - url is the url to retrieval or a host, relative-path pair. - - data is payload for a POST request or None. - """ - - user_passwd = None - proxy_passwd= None - if isinstance(url, str): - host, selector = _splithost(url) - if host: - user_passwd, host = _splituser(host) - host = unquote(host) - realhost = host - else: - host, selector = url - # check whether the proxy contains authorization information - proxy_passwd, host = _splituser(host) - # now we proceed with the url we want to obtain - urltype, rest = _splittype(selector) - url = rest - user_passwd = None - if urltype.lower() != 'http': - realhost = None - else: - realhost, rest = _splithost(rest) - if realhost: - user_passwd, realhost = _splituser(realhost) - if user_passwd: - selector = "%s://%s%s" % (urltype, realhost, rest) - if proxy_bypass(realhost): - host = realhost - - if not host: raise OSError('http error', 'no host given') - - if proxy_passwd: - proxy_passwd = unquote(proxy_passwd) - proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii') - else: - proxy_auth = None - - if user_passwd: - user_passwd = unquote(user_passwd) - auth = base64.b64encode(user_passwd.encode()).decode('ascii') - else: - auth = None - http_conn = connection_factory(host) - headers = {} - if proxy_auth: - headers["Proxy-Authorization"] = "Basic %s" % proxy_auth - if auth: - headers["Authorization"] = "Basic %s" % auth - if realhost: - headers["Host"] = realhost - - # Add Connection:close as we don't support persistent connections yet. - # This helps in closing the socket and avoiding ResourceWarning - - headers["Connection"] = "close" - - for header, value in self.addheaders: - headers[header] = value - - if data is not None: - headers["Content-Type"] = "application/x-www-form-urlencoded" - http_conn.request("POST", selector, data, headers) - else: - http_conn.request("GET", selector, headers=headers) - - try: - response = http_conn.getresponse() - except http.client.BadStatusLine: - # something went wrong with the HTTP status line - raise URLError("http protocol error: bad status line") - - # According to RFC 2616, "2xx" code indicates that the client's - # request was successfully received, understood, and accepted. - if 200 <= response.status < 300: - return addinfourl(response, response.msg, "http:" + url, - response.status) - else: - return self.http_error( - url, response.fp, - response.status, response.reason, response.msg, data) - - def open_http(self, url, data=None): - """Use HTTP protocol.""" - return self._open_generic_http(http.client.HTTPConnection, url, data) - - def http_error(self, url, fp, errcode, errmsg, headers, data=None): - """Handle http errors. - - Derived class can override this, or provide specific handlers - named http_error_DDD where DDD is the 3-digit error code.""" - # First check if there's a specific handler for this error - name = 'http_error_%d' % errcode - if hasattr(self, name): - method = getattr(self, name) - if data is None: - result = method(url, fp, errcode, errmsg, headers) - else: - result = method(url, fp, errcode, errmsg, headers, data) - if result: return result - return self.http_error_default(url, fp, errcode, errmsg, headers) - - def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handler: close the connection and raise OSError.""" - fp.close() - raise HTTPError(url, errcode, errmsg, headers, None) - - if _have_ssl: - def _https_connection(self, host): - if self.key_file or self.cert_file: - http_version = http.client.HTTPSConnection._http_vsn - context = http.client._create_https_context(http_version) - context.load_cert_chain(self.cert_file, self.key_file) - # cert and key file means the user wants to authenticate. - # enable TLS 1.3 PHA implicitly even for custom contexts. - if context.post_handshake_auth is not None: - context.post_handshake_auth = True - else: - context = None - return http.client.HTTPSConnection(host, context=context) - - def open_https(self, url, data=None): - """Use HTTPS protocol.""" - return self._open_generic_http(self._https_connection, url, data) - - def open_file(self, url): - """Use local file or FTP depending on form of URL.""" - if not isinstance(url, str): - raise URLError('file error: proxy support for file protocol currently not implemented') - if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': - raise ValueError("file:// scheme is supported only on localhost") - else: - return self.open_local_file(url) - - def open_local_file(self, url): - """Use local file.""" - import email.utils - import mimetypes - host, file = _splithost(url) - localname = url2pathname(file) - try: - stats = os.stat(localname) - except OSError as e: - raise URLError(e.strerror, e.filename) - size = stats.st_size - modified = email.utils.formatdate(stats.st_mtime, usegmt=True) - mtype = mimetypes.guess_type(url)[0] - headers = email.message_from_string( - 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified)) - if not host: - urlfile = file - if file[:1] == '/': - urlfile = 'file://' + file - return addinfourl(open(localname, 'rb'), headers, urlfile) - host, port = _splitport(host) - if (not port - and socket.gethostbyname(host) in ((localhost(),) + thishost())): - urlfile = file - if file[:1] == '/': - urlfile = 'file://' + file - elif file[:2] == './': - raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) - return addinfourl(open(localname, 'rb'), headers, urlfile) - raise URLError('local file error: not on local host') - - def open_ftp(self, url): - """Use FTP protocol.""" - if not isinstance(url, str): - raise URLError('ftp error: proxy support for ftp protocol currently not implemented') - import mimetypes - host, path = _splithost(url) - if not host: raise URLError('ftp error: no host given') - host, port = _splitport(host) - user, host = _splituser(host) - if user: user, passwd = _splitpasswd(user) - else: passwd = None - host = unquote(host) - user = unquote(user or '') - passwd = unquote(passwd or '') - host = socket.gethostbyname(host) - if not port: - import ftplib - port = ftplib.FTP_PORT - else: - port = int(port) - path, attrs = _splitattr(path) - path = unquote(path) - dirs = path.split('/') - dirs, file = dirs[:-1], dirs[-1] - if dirs and not dirs[0]: dirs = dirs[1:] - if dirs and not dirs[0]: dirs[0] = '/' - key = user, host, port, '/'.join(dirs) - # XXX thread unsafe! - if len(self.ftpcache) > MAXFTPCACHE: - # Prune the cache, rather arbitrarily - for k in list(self.ftpcache): - if k != key: - v = self.ftpcache[k] - del self.ftpcache[k] - v.close() - try: - if key not in self.ftpcache: - self.ftpcache[key] = \ - ftpwrapper(user, passwd, host, port, dirs) - if not file: type = 'D' - else: type = 'I' - for attr in attrs: - attr, value = _splitvalue(attr) - if attr.lower() == 'type' and \ - value in ('a', 'A', 'i', 'I', 'd', 'D'): - type = value.upper() - (fp, retrlen) = self.ftpcache[key].retrfile(file, type) - mtype = mimetypes.guess_type("ftp:" + url)[0] - headers = "" - if mtype: - headers += "Content-Type: %s\n" % mtype - if retrlen is not None and retrlen >= 0: - headers += "Content-Length: %d\n" % retrlen - headers = email.message_from_string(headers) - return addinfourl(fp, headers, "ftp:" + url) - except ftperrors() as exp: - raise URLError(f'ftp error: {exp}') from exp - - def open_data(self, url, data=None): - """Use "data" URL.""" - if not isinstance(url, str): - raise URLError('data error: proxy support for data protocol currently not implemented') - # ignore POSTed data - # - # syntax of data URLs: - # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data - # mediatype := [ type "/" subtype ] *( ";" parameter ) - # data := *urlchar - # parameter := attribute "=" value - try: - [type, data] = url.split(',', 1) - except ValueError: - raise OSError('data error', 'bad data URL') - if not type: - type = 'text/plain;charset=US-ASCII' - semi = type.rfind(';') - if semi >= 0 and '=' not in type[semi:]: - encoding = type[semi+1:] - type = type[:semi] - else: - encoding = '' - msg = [] - msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', - time.gmtime(time.time()))) - msg.append('Content-type: %s' % type) - if encoding == 'base64': - # XXX is this encoding/decoding ok? - data = base64.decodebytes(data.encode('ascii')).decode('latin-1') - else: - data = unquote(data) - msg.append('Content-Length: %d' % len(data)) - msg.append('') - msg.append(data) - msg = '\n'.join(msg) - headers = email.message_from_string(msg) - f = io.StringIO(msg) - #f.fileno = None # needed for addinfourl - return addinfourl(f, headers, url) - - -class FancyURLopener(URLopener): - """Derived class with handlers for errors we can handle (perhaps).""" - - def __init__(self, *args, **kwargs): - URLopener.__init__(self, *args, **kwargs) - self.auth_cache = {} - self.tries = 0 - self.maxtries = 10 - - def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handling -- don't raise an exception.""" - return addinfourl(fp, headers, "http:" + url, errcode) - - def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): - """Error 302 -- relocated (temporarily).""" - self.tries += 1 - try: - if self.maxtries and self.tries >= self.maxtries: - if hasattr(self, "http_error_500"): - meth = self.http_error_500 - else: - meth = self.http_error_default - return meth(url, fp, 500, - "Internal Server Error: Redirect Recursion", - headers) - result = self.redirect_internal(url, fp, errcode, errmsg, - headers, data) - return result - finally: - self.tries = 0 - - def redirect_internal(self, url, fp, errcode, errmsg, headers, data): - if 'location' in headers: - newurl = headers['location'] - elif 'uri' in headers: - newurl = headers['uri'] - else: - return - fp.close() - - # In case the server sent a relative URL, join with original: - newurl = urljoin(self.type + ":" + url, newurl) - - urlparts = urlparse(newurl) - - # For security reasons, we don't allow redirection to anything other - # than http, https and ftp. +# Code moved from the old urllib module - # We are using newer HTTPError with older redirect_internal method - # This older method will get deprecated in 3.3 - - if urlparts.scheme not in ('http', 'https', 'ftp', ''): - raise HTTPError(newurl, errcode, - errmsg + - " Redirection to url '%s' is not allowed." % newurl, - headers, fp) +def url2pathname(url, *, require_scheme=False, resolve_host=False): + """Convert the given file URL to a local file system path. - return self.open(newurl) + The 'file:' scheme prefix must be omitted unless *require_scheme* + is set to true. - def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): - """Error 301 -- also relocated (permanently).""" - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - - def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): - """Error 303 -- also relocated (essentially identical to 302).""" - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - - def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): - """Error 307 -- relocated, but turn POST into error.""" - if data is None: - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - else: - return self.http_error_default(url, fp, errcode, errmsg, headers) - - def http_error_308(self, url, fp, errcode, errmsg, headers, data=None): - """Error 308 -- relocated, but turn POST into error.""" - if data is None: - return self.http_error_301(url, fp, errcode, errmsg, headers, data) - else: - return self.http_error_default(url, fp, errcode, errmsg, headers) - - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None, - retry=False): - """Error 401 -- authentication required. - This function supports Basic authentication only.""" - if 'www-authenticate' not in headers: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - stuff = headers['www-authenticate'] - match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) - if not match: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - scheme, realm = match.groups() - if scheme.lower() != 'basic': - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - if not retry: - URLopener.http_error_default(self, url, fp, errcode, errmsg, - headers) - name = 'retry_' + self.type + '_basic_auth' - if data is None: - return getattr(self,name)(url, realm) - else: - return getattr(self,name)(url, realm, data) - - def http_error_407(self, url, fp, errcode, errmsg, headers, data=None, - retry=False): - """Error 407 -- proxy authentication required. - This function supports Basic authentication only.""" - if 'proxy-authenticate' not in headers: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - stuff = headers['proxy-authenticate'] - match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) - if not match: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - scheme, realm = match.groups() - if scheme.lower() != 'basic': - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - if not retry: - URLopener.http_error_default(self, url, fp, errcode, errmsg, - headers) - name = 'retry_proxy_' + self.type + '_basic_auth' - if data is None: - return getattr(self,name)(url, realm) - else: - return getattr(self,name)(url, realm, data) - - def retry_proxy_http_basic_auth(self, url, realm, data=None): - host, selector = _splithost(url) - newurl = 'http://' + host + selector - proxy = self.proxies['http'] - urltype, proxyhost = _splittype(proxy) - proxyhost, proxyselector = _splithost(proxyhost) - i = proxyhost.find('@') + 1 - proxyhost = proxyhost[i:] - user, passwd = self.get_user_passwd(proxyhost, realm, i) - if not (user or passwd): return None - proxyhost = "%s:%s@%s" % (quote(user, safe=''), - quote(passwd, safe=''), proxyhost) - self.proxies['http'] = 'http://' + proxyhost + proxyselector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_proxy_https_basic_auth(self, url, realm, data=None): - host, selector = _splithost(url) - newurl = 'https://' + host + selector - proxy = self.proxies['https'] - urltype, proxyhost = _splittype(proxy) - proxyhost, proxyselector = _splithost(proxyhost) - i = proxyhost.find('@') + 1 - proxyhost = proxyhost[i:] - user, passwd = self.get_user_passwd(proxyhost, realm, i) - if not (user or passwd): return None - proxyhost = "%s:%s@%s" % (quote(user, safe=''), - quote(passwd, safe=''), proxyhost) - self.proxies['https'] = 'https://' + proxyhost + proxyselector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_http_basic_auth(self, url, realm, data=None): - host, selector = _splithost(url) - i = host.find('@') + 1 - host = host[i:] - user, passwd = self.get_user_passwd(host, realm, i) - if not (user or passwd): return None - host = "%s:%s@%s" % (quote(user, safe=''), - quote(passwd, safe=''), host) - newurl = 'http://' + host + selector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_https_basic_auth(self, url, realm, data=None): - host, selector = _splithost(url) - i = host.find('@') + 1 - host = host[i:] - user, passwd = self.get_user_passwd(host, realm, i) - if not (user or passwd): return None - host = "%s:%s@%s" % (quote(user, safe=''), - quote(passwd, safe=''), host) - newurl = 'https://' + host + selector - if data is None: - return self.open(newurl) + The URL authority may be resolved with gethostbyname() if + *resolve_host* is set to true. + """ + if not require_scheme: + url = 'file:' + url + scheme, authority, url = urlsplit(url)[:3] # Discard query and fragment. + if scheme != 'file': + raise URLError("URL is missing a 'file:' scheme") + if os.name == 'nt': + if authority[1:2] == ':': + # e.g. file://c:/file.txt + url = authority + url + elif not _is_local_authority(authority, resolve_host): + # e.g. file://server/share/file.txt + url = '//' + authority + url + elif url[:3] == '///': + # e.g. file://///server/share/file.txt + url = url[1:] else: - return self.open(newurl, data) - - def get_user_passwd(self, host, realm, clear_cache=0): - key = realm + '@' + host.lower() - if key in self.auth_cache: - if clear_cache: - del self.auth_cache[key] - else: - return self.auth_cache[key] - user, passwd = self.prompt_user_passwd(host, realm) - if user or passwd: self.auth_cache[key] = (user, passwd) - return user, passwd - - def prompt_user_passwd(self, host, realm): - """Override this in a GUI environment!""" - import getpass - try: - user = input("Enter username for %s at %s: " % (realm, host)) - passwd = getpass.getpass("Enter password for %s in %s at %s: " % - (user, realm, host)) - return user, passwd - except KeyboardInterrupt: - print() - return None, None + if url[:1] == '/' and url[2:3] in (':', '|'): + # Skip past extra slash before DOS drive in URL path. + url = url[1:] + if url[1:2] == '|': + # Older URLs use a pipe after a drive letter + url = url[:1] + ':' + url[2:] + url = url.replace('/', '\\') + elif not _is_local_authority(authority, resolve_host): + raise URLError("file:// scheme is supported only on localhost") + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + return unquote(url, encoding=encoding, errors=errors) + + +def pathname2url(pathname, *, add_scheme=False): + """Convert the given local file system path to a file URL. + + The 'file:' scheme prefix is omitted unless *add_scheme* + is set to true. + """ + if os.name == 'nt': + pathname = pathname.replace('\\', '/') + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + scheme = 'file:' if add_scheme else '' + drive, root, tail = os.path.splitroot(pathname) + if drive: + # First, clean up some special forms. We are going to sacrifice the + # additional information anyway + if drive[:4] == '//?/': + drive = drive[4:] + if drive[:4].upper() == 'UNC/': + drive = '//' + drive[4:] + if drive[1:] == ':': + # DOS drive specified. Add three slashes to the start, producing + # an authority section with a zero-length authority, and a path + # section starting with a single slash. + drive = '///' + drive + drive = quote(drive, encoding=encoding, errors=errors, safe='/:') + elif root: + # Add explicitly empty authority to absolute path. If the path + # starts with exactly one slash then this change is mostly + # cosmetic, but if it begins with two or more slashes then this + # avoids interpreting the path as a URL authority. + root = '//' + root + tail = quote(tail, encoding=encoding, errors=errors) + return scheme + drive + root + tail # Utility functions @@ -2502,9 +1870,7 @@ def getproxies_environment(): """Return a dictionary of scheme -> proxy server URL mappings. Scan the environment for variables named _proxy; - this seems to be the standard convention. If you need a - different way, you can pass a proxies dictionary to the - [Fancy]URLopener constructor. + this seems to be the standard convention. """ # in order to prefer lowercase variables, process environment in # two passes: first matches any, second pass matches lowercase only diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py index 63689816f30..4009fd6b58f 100644 --- a/Lib/urllib/robotparser.py +++ b/Lib/urllib/robotparser.py @@ -181,8 +181,10 @@ def can_fetch(self, useragent, url): return False # search for given user agent matches # the first match counts - parsed_url = urllib.parse.urlsplit(url) - url = urllib.parse.urlunsplit(('', '', *parsed_url[2:])) + # TODO: The private API is used in order to preserve an empty query. + # This is temporary until the public API starts supporting this feature. + parsed_url = urllib.parse._urlsplit(url, '') + url = urllib.parse._urlunsplit(None, None, *parsed_url[2:]) url = normalize_path(url) if not url: url = "/"