diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 48c8f770f81..116ce4f37ec 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -16,15 +16,16 @@ _setmode = None import io -from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) +from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END, Reader, Writer) # noqa: F401 valid_seek_flags = {0, 1, 2} # Hardwired values if hasattr(os, 'SEEK_HOLE') : valid_seek_flags.add(os.SEEK_HOLE) valid_seek_flags.add(os.SEEK_DATA) -# open() uses st_blksize whenever we can -DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes +# open() uses max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) +# when the device block size is available. +DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes # NOTE: Base classes defined here are registered with the "official" ABCs # defined in io.py. We don't use real inheritance though, because we don't want @@ -123,10 +124,10 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, the size of a fixed-size chunk buffer. When no buffering argument is given, the default buffering policy works as follows: - * Binary files are buffered in fixed-size chunks; the size of the buffer - is chosen using a heuristic trying to determine the underlying device's - "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. - On many systems, the buffer will typically be 4096 or 8192 bytes long. + * Binary files are buffered in fixed-size chunks; the size of the buffer + is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) + when the device block size is available. + On most systems, the buffer will typically be 128 kilobytes long. * "Interactive" text files (files for which isatty() returns True) use line buffering. Other text files use the policy described above @@ -238,18 +239,11 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, result = raw try: line_buffering = False - if buffering == 1 or buffering < 0 and raw.isatty(): + if buffering == 1 or buffering < 0 and raw._isatty_open_only(): buffering = -1 line_buffering = True if buffering < 0: - buffering = DEFAULT_BUFFER_SIZE - try: - bs = os.fstat(raw.fileno()).st_blksize - except (OSError, AttributeError): - pass - else: - if bs > 1: - buffering = bs + buffering = max(min(raw._blksize, 8192 * 1024), DEFAULT_BUFFER_SIZE) if buffering < 0: raise ValueError("invalid buffering size") if buffering == 0: @@ -941,22 +935,22 @@ def read1(self, size=-1): return self.read(size) def write(self, b): - if self.closed: - raise ValueError("write to closed file") if isinstance(b, str): raise TypeError("can't write str to binary stream") with memoryview(b) as view: + if self.closed: + raise ValueError("write to closed file") + n = view.nbytes # Size of any bytes-like object - if n == 0: - return 0 - pos = self._pos - if pos > len(self._buffer): - # Inserts null bytes between the current end of the file - # and the new write position. - padding = b'\x00' * (pos - len(self._buffer)) - self._buffer += padding - self._buffer[pos:pos + n] = b - self._pos += n + if n == 0: + return 0 + + pos = self._pos + if pos > len(self._buffer): + # Pad buffer to pos with null bytes. + self._buffer.resize(pos) + self._buffer[pos:pos + n] = view + self._pos += n return n def seek(self, pos, whence=0): @@ -1470,6 +1464,17 @@ def write(self, b): return BufferedWriter.write(self, b) +def _new_buffersize(bytes_read): + # Parallels _io/fileio.c new_buffersize + if bytes_read > 65536: + addend = bytes_read >> 3 + else: + addend = 256 + bytes_read + if addend < DEFAULT_BUFFER_SIZE: + addend = DEFAULT_BUFFER_SIZE + return bytes_read + addend + + class FileIO(RawIOBase): _fd = -1 _created = False @@ -1494,6 +1499,7 @@ def __init__(self, file, mode='r', closefd=True, opener=None): """ if self._fd >= 0: # Have to close the existing file first. + self._stat_atopen = None try: if self._closefd: os.close(self._fd) @@ -1573,18 +1579,15 @@ def __init__(self, file, mode='r', closefd=True, opener=None): os.set_inheritable(fd, False) self._closefd = closefd - fdfstat = os.fstat(fd) + self._stat_atopen = os.fstat(fd) try: - if stat.S_ISDIR(fdfstat.st_mode): + if stat.S_ISDIR(self._stat_atopen.st_mode): raise IsADirectoryError(errno.EISDIR, os.strerror(errno.EISDIR), file) except AttributeError: # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR # don't exist. pass - self._blksize = getattr(fdfstat, 'st_blksize', 0) - if self._blksize <= 1: - self._blksize = DEFAULT_BUFFER_SIZE if _setmode: # don't translate newlines (\r\n <=> \n) @@ -1601,6 +1604,7 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if e.errno != errno.ESPIPE: raise except: + self._stat_atopen = None if owned_fd is not None: os.close(owned_fd) raise @@ -1629,6 +1633,17 @@ def __repr__(self): return ('<%s name=%r mode=%r closefd=%r>' % (class_name, name, self.mode, self._closefd)) + @property + def _blksize(self): + if self._stat_atopen is None: + return DEFAULT_BUFFER_SIZE + + blksize = getattr(self._stat_atopen, "st_blksize", 0) + # WASI sets blsize to 0 + if not blksize: + return DEFAULT_BUFFER_SIZE + return blksize + def _checkReadable(self): if not self._readable: raise UnsupportedOperation('File not open for reading') @@ -1640,7 +1655,13 @@ def _checkWritable(self, msg=None): def read(self, size=None): """Read at most size bytes, returned as bytes. - Only makes one system call, so less data may be returned than requested + If size is less than 0, read all bytes in the file making + multiple read calls. See ``FileIO.readall``. + + Attempts to make only one system call, retrying only per + PEP 475 (EINTR). This means less data may be returned than + requested. + In non-blocking mode, returns None if no data is available. Return an empty bytes object at EOF. """ @@ -1656,45 +1677,57 @@ def read(self, size=None): def readall(self): """Read all data from the file, returned as bytes. - In non-blocking mode, returns as much as is immediately available, - or None if no data is available. Return an empty bytes object at EOF. + Reads until either there is an error or read() returns size 0 + (indicates EOF). If the file is already at EOF, returns an + empty bytes object. + + In non-blocking mode, returns as much data as could be read + before EAGAIN. If no data is available (EAGAIN is returned + before bytes are read) returns None. """ self._checkClosed() self._checkReadable() - bufsize = DEFAULT_BUFFER_SIZE - try: - pos = os.lseek(self._fd, 0, SEEK_CUR) - end = os.fstat(self._fd).st_size - if end >= pos: - bufsize = end - pos + 1 - except OSError: - pass + if self._stat_atopen is None or self._stat_atopen.st_size <= 0: + bufsize = DEFAULT_BUFFER_SIZE + else: + # In order to detect end of file, need a read() of at least 1 + # byte which returns size 0. Oversize the buffer by 1 byte so the + # I/O can be completed with two read() calls (one for all data, one + # for EOF) without needing to resize the buffer. + bufsize = self._stat_atopen.st_size + 1 - result = bytearray() - while True: - if len(result) >= bufsize: - bufsize = len(result) - bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) - n = bufsize - len(result) - try: - chunk = os.read(self._fd, n) - except BlockingIOError: - if result: - break + if self._stat_atopen.st_size > 65536: + try: + pos = os.lseek(self._fd, 0, SEEK_CUR) + if self._stat_atopen.st_size >= pos: + bufsize = self._stat_atopen.st_size - pos + 1 + except OSError: + pass + + result = bytearray(bufsize) + bytes_read = 0 + try: + while n := os.readinto(self._fd, memoryview(result)[bytes_read:]): + bytes_read += n + if bytes_read >= len(result): + result.resize(_new_buffersize(bytes_read)) + except BlockingIOError: + if not bytes_read: return None - if not chunk: # reached the end of the file - break - result += chunk + assert len(result) - bytes_read >= 1, \ + "os.readinto buffer size 0 will result in erroneous EOF / returns 0" + result.resize(bytes_read) return bytes(result) - def readinto(self, b): + def readinto(self, buffer): """Same as RawIOBase.readinto().""" - m = memoryview(b).cast('B') - data = self.read(len(m)) - n = len(data) - m[:n] = data - return n + self._checkClosed() + self._checkReadable() + try: + return os.readinto(self._fd, buffer) + except BlockingIOError: + return None def write(self, b): """Write bytes b to file, return number written. @@ -1744,6 +1777,7 @@ def truncate(self, size=None): if size is None: size = self.tell() os.ftruncate(self._fd, size) + self._stat_atopen = None return size def close(self): @@ -1753,6 +1787,7 @@ def close(self): called more than once without error. """ if not self.closed: + self._stat_atopen = None try: if self._closefd and self._fd >= 0: os.close(self._fd) @@ -1791,6 +1826,21 @@ def isatty(self): self._checkClosed() return os.isatty(self._fd) + def _isatty_open_only(self): + """Checks whether the file is a TTY using an open-only optimization. + + TTYs are always character devices. If the interpreter knows a file is + not a character device when it would call ``isatty``, can skip that + call. Inside ``open()`` there is a fresh stat result that contains that + information. Use the stat result to skip a system call. Outside of that + context TOCTOU issues (the fd could be arbitrarily modified by + surrounding code). + """ + if (self._stat_atopen is not None + and not stat.S_ISCHR(self._stat_atopen.st_mode)): + return False + return os.isatty(self._fd) + @property def closefd(self): """True if the file descriptor will be closed by close().""" @@ -2015,8 +2065,7 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None, raise ValueError("invalid encoding: %r" % encoding) if not codecs.lookup(encoding)._is_text_encoding: - msg = ("%r is not a text encoding; " - "use codecs.open() to handle arbitrary codecs") + msg = "%r is not a text encoding" raise LookupError(msg % encoding) if errors is None: @@ -2524,9 +2573,12 @@ def read(self, size=None): size = size_index() decoder = self._decoder or self._get_decoder() if size < 0: + chunk = self.buffer.read() + if chunk is None: + raise BlockingIOError("Read returned None.") # Read everything. result = (self._get_decoded_chars() + - decoder.decode(self.buffer.read(), final=True)) + decoder.decode(chunk, final=True)) if self._snapshot is not None: self._set_decoded_chars('') self._snapshot = None diff --git a/Lib/io.py b/Lib/io.py index f0e2fa15d5a..63ffadb1d38 100644 --- a/Lib/io.py +++ b/Lib/io.py @@ -46,21 +46,20 @@ "BufferedReader", "BufferedWriter", "BufferedRWPair", "BufferedRandom", "TextIOBase", "TextIOWrapper", "UnsupportedOperation", "SEEK_SET", "SEEK_CUR", "SEEK_END", - "DEFAULT_BUFFER_SIZE", "text_encoding", "IncrementalNewlineDecoder"] + "DEFAULT_BUFFER_SIZE", "text_encoding", "IncrementalNewlineDecoder", + "Reader", "Writer"] import _io import abc +from _collections_abc import _check_methods from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation, open, open_code, FileIO, BytesIO, StringIO, BufferedReader, BufferedWriter, BufferedRWPair, BufferedRandom, IncrementalNewlineDecoder, text_encoding, TextIOWrapper) -# Pretend this exception was created here. -UnsupportedOperation.__module__ = "io" - # for seek() SEEK_SET = 0 SEEK_CUR = 1 @@ -97,3 +96,55 @@ class TextIOBase(_io._TextIOBase, IOBase): pass else: RawIOBase.register(_WindowsConsoleIO) + +# +# Static Typing Support +# + +GenericAlias = type(list[int]) + + +class Reader(metaclass=abc.ABCMeta): + """Protocol for simple I/O reader instances. + + This protocol only supports blocking I/O. + """ + + __slots__ = () + + @abc.abstractmethod + def read(self, size=..., /): + """Read data from the input stream and return it. + + If *size* is specified, at most *size* items (bytes/characters) will be + read. + """ + + @classmethod + def __subclasshook__(cls, C): + if cls is Reader: + return _check_methods(C, "read") + return NotImplemented + + __class_getitem__ = classmethod(GenericAlias) + + +class Writer(metaclass=abc.ABCMeta): + """Protocol for simple I/O writer instances. + + This protocol only supports blocking I/O. + """ + + __slots__ = () + + @abc.abstractmethod + def write(self, data, /): + """Write *data* to the output stream and return the number of items written.""" + + @classmethod + def __subclasshook__(cls, C): + if cls is Writer: + return _check_methods(C, "write") + return NotImplemented + + __class_getitem__ = classmethod(GenericAlias) diff --git a/Lib/test/test_bufio.py b/Lib/test/test_bufio.py index dc9a82dc635..cb9cb4d0bc7 100644 --- a/Lib/test/test_bufio.py +++ b/Lib/test/test_bufio.py @@ -28,7 +28,7 @@ def try_one(self, s): f.write(b"\n") f.write(s) f.close() - f = open(os_helper.TESTFN, "rb") + f = self.open(os_helper.TESTFN, "rb") line = f.readline() self.assertEqual(line, s + b"\n") line = f.readline() diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index fdb36ed997d..ac57e71c46e 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -10,8 +10,8 @@ from functools import wraps from test.support import ( - cpython_only, swap_attr, gc_collect, is_emscripten, is_wasi, - infinite_recursion, + cpython_only, swap_attr, gc_collect, is_wasi, + infinite_recursion, strace_helper ) from test.support.os_helper import ( TESTFN, TESTFN_ASCII, TESTFN_UNICODE, make_bad_fd, @@ -24,6 +24,9 @@ import _pyio # Python implementation of io +_strace_flags=["--trace=%file,%desc"] + + class AutoFileTests: # file tests for which a test file is automatically set up @@ -359,6 +362,144 @@ def testErrnoOnClosedReadinto(self, f): a = array('b', b'x'*10) f.readinto(a) + @unittest.skip("TODO: RUSTPYTHON; extra ioctl(TCGETS) syscall for isatty check") + @strace_helper.requires_strace() + def test_syscalls_read(self): + """Check set of system calls during common I/O patterns + + It's expected as bits of the I/O implementation change, this will need + to change. The goal is to catch changes that unintentionally add + additional systemcalls (ex. additional calls have been looked at in + bpo-21679 and gh-120754). + """ + self.f.write(b"Hello, World!") + self.f.close() + + + def check_readall(name, code, prelude="", cleanup="", + extra_checks=None): + with self.subTest(name=name): + syscalls = strace_helper.get_events(code, _strace_flags, + prelude=prelude, + cleanup=cleanup) + + # Some system calls (ex. mmap) can be used for both File I/O and + # memory allocation. Filter out the ones used for memory + # allocation. + syscalls = strace_helper.filter_memory(syscalls) + + # The first call should be an open that returns a + # file descriptor (fd). Afer that calls may vary. Once the file + # is opened, check calls refer to it by fd as the filename + # could be removed from the filesystem, renamed, etc. See: + # Time-of-check time-of-use (TOCTOU) software bug class. + # + # There are a number of related but distinct open system calls + # so not checking precise name here. + self.assertGreater( + len(syscalls), + 1, + f"Should have had at least an open call|calls={syscalls}") + fd_str = syscalls[0].returncode + + # All other calls should contain the fd in their argument set. + for ev in syscalls[1:]: + self.assertIn( + fd_str, + ev.args, + f"Looking for file descriptor in arguments|ev={ev}" + ) + + # There are a number of related syscalls used to implement + # behaviors in a libc (ex. fstat, newfstatat, statx, open, openat). + # Allow any that use the same substring. + def count_similarname(name): + return len([ev for ev in syscalls if name in ev.syscall]) + + checks = [ + # Should open and close the file exactly once + ("open", 1), + ("close", 1), + # There should no longer be an isatty call (All files being + # tested are block devices / not character devices). + ('ioctl', 0), + # Should only have one fstat (bpo-21679, gh-120754) + # note: It's important this uses a fd rather than filename, + # That is validated by the `fd` check above. + # note: fstat, newfstatat, and statx have all been observed + # here in the underlying C library implementations. + ("stat", 1) + ] + + if extra_checks: + checks += extra_checks + + for call, count in checks: + self.assertEqual( + count_similarname(call), + count, + msg=f"call={call}|count={count}|syscalls={syscalls}" + ) + + # "open, read, close" file using different common patterns. + check_readall( + "open builtin with default options", + f""" + f = open('{TESTFN}') + f.read() + f.close() + """ + ) + + check_readall( + "open in binary mode", + f""" + f = open('{TESTFN}', 'rb') + f.read() + f.close() + """ + ) + + check_readall( + "open in text mode", + f""" + f = open('{TESTFN}', 'rt') + f.read() + f.close() + """, + # GH-122111: read_text uses BufferedIO which requires looking up + # position in file. `read_bytes` disables that buffering and avoids + # these calls which is tested the `pathlib read_bytes` case. + extra_checks=[("seek", 1)] + ) + + check_readall( + "pathlib read_bytes", + "p.read_bytes()", + prelude=f"""from pathlib import Path; p = Path("{TESTFN}")""", + # GH-122111: Buffering is disabled so these calls are avoided. + extra_checks=[("seek", 0)] + ) + + check_readall( + "pathlib read_text", + "p.read_text()", + prelude=f"""from pathlib import Path; p = Path("{TESTFN}")""" + ) + + # Focus on just `read()`. + calls = strace_helper.get_syscalls( + prelude=f"f = open('{TESTFN}')", + code="f.read()", + cleanup="f.close()", + strace_flags=_strace_flags + ) + # One to read all the bytes + # One to read the EOF and get a size 0 return. + self.assertEqual(calls.count("read"), 2) + + + class CAutoFileTests(AutoFileTests, unittest.TestCase): FileIO = _io.FileIO modulename = '_io' @@ -367,6 +508,10 @@ class PyAutoFileTests(AutoFileTests, unittest.TestCase): FileIO = _pyio.FileIO modulename = '_pyio' + @unittest.expectedFailureIfWindows('TODO: RUSTPYTHON; _blksize mismatch on Windows') + def testBlksize(self): + return super().testBlksize() + class OtherFileTests: @@ -391,7 +536,7 @@ def testAbles(self): self.assertEqual(f.isatty(), False) f.close() - if sys.platform != "win32" and not is_emscripten: + if sys.platform != "win32": try: f = self.FileIO("/dev/tty", "a") except OSError: diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index ba54349f41d..e747b9dd03f 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4071,8 +4071,6 @@ def __setstate__(slf, state): self.assertEqual(newtxt.tag, 'ham') del MyTextIO - # TODO: RUSTPYTHON; TypeError: a bytes-like object is required, not 'NoneType' - @unittest.expectedFailure @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") def test_read_non_blocking(self): import os @@ -4113,6 +4111,11 @@ class CTextIOWrapperTest(TextIOWrapperTest): io = io shutdown_error = "LookupError: unknown encoding: ascii" + @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") + @unittest.expectedFailure + def test_read_non_blocking(self): + return super().test_read_non_blocking() + def test_initialization(self): r = self.BytesIO(b"\xc3\xa9\n\n") b = self.BufferedReader(r, 1000) @@ -4231,15 +4234,29 @@ def closed(self): def test_seek_with_encoder_state(self): return super().test_seek_with_encoder_state() + @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: a bytes-like object is required, not 'NoneType' + def test_read_non_blocking(self): + return super().test_read_non_blocking() + class PyTextIOWrapperTest(TextIOWrapperTest): io = pyio shutdown_error = "LookupError: unknown encoding: ascii" + @unittest.expectedFailureIfWindows("TODO: RUSTPYTHON; os.set_blocking not available on Windows") + def test_read_non_blocking(self): + return super().test_read_non_blocking() + @unittest.expectedFailure # TODO: RUSTPYTHON; LookupError: unknown encoding: euc_jis_2004 def test_seek_with_encoder_state(self): return super().test_seek_with_encoder_state() + if sys.platform == "win32": + @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") + @unittest.expectedFailure + def test_read_non_blocking(self): + return super().test_read_non_blocking() + class IncrementalNewlineDecoderTest(unittest.TestCase): @@ -5097,12 +5114,10 @@ class MyWriter: def write(self, b: bytes): pass - @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'io' has no attribute 'Reader' def test_reader_subclass(self): self.assertIsSubclass(self.MyReader, io.Reader) self.assertNotIsSubclass(str, io.Reader) - @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'io' has no attribute 'Writer' def test_writer_subclass(self): self.assertIsSubclass(self.MyWriter, io.Writer) self.assertNotIsSubclass(str, io.Writer) diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index b6b95143621..00f646e5a94 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -6,10 +6,12 @@ import unittest from test import support +import gc import io import _pyio as pyio import pickle import sys +import weakref class IntLike: def __init__(self, num): @@ -52,6 +54,12 @@ def testSeek(self): self.assertEqual(buf[3:], bytesIo.read()) self.assertRaises(TypeError, bytesIo.seek, 0.0) + self.assertEqual(sys.maxsize, bytesIo.seek(sys.maxsize)) + self.assertEqual(self.EOF, bytesIo.read(4)) + + self.assertEqual(sys.maxsize - 2, bytesIo.seek(sys.maxsize - 2)) + self.assertEqual(self.EOF, bytesIo.read(4)) + def testTell(self): buf = self.buftype("1234567890") bytesIo = self.ioclass(buf) @@ -263,8 +271,8 @@ def test_iterator(self): memio = self.ioclass(buf * 10) self.assertEqual(iter(memio), memio) - self.assertTrue(hasattr(memio, '__iter__')) - self.assertTrue(hasattr(memio, '__next__')) + self.assertHasAttr(memio, '__iter__') + self.assertHasAttr(memio, '__next__') i = 0 for line in memio: self.assertEqual(line, buf) @@ -463,6 +471,40 @@ def test_getbuffer(self): memio.close() self.assertRaises(ValueError, memio.getbuffer) + def test_getbuffer_empty(self): + memio = self.ioclass() + buf = memio.getbuffer() + self.assertEqual(bytes(buf), b"") + # Trying to change the size of the BytesIO while a buffer is exported + # raises a BufferError. + self.assertRaises(BufferError, memio.write, b'x') + buf2 = memio.getbuffer() + self.assertRaises(BufferError, memio.write, b'x') + buf.release() + self.assertRaises(BufferError, memio.write, b'x') + buf2.release() + memio.write(b'x') + + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: is not None + def test_getbuffer_gc_collect(self): + memio = self.ioclass(b"1234567890") + buf = memio.getbuffer() + memiowr = weakref.ref(memio) + bufwr = weakref.ref(buf) + # Create a reference loop. + a = [buf] + a.append(a) + # The Python implementation emits an unraisable exception. + with support.catch_unraisable_exception(): + del memio + del buf + del a + # The C implementation emits an unraisable exception. + with support.catch_unraisable_exception(): + gc.collect() + self.assertIsNone(memiowr()) + self.assertIsNone(bufwr()) + def test_read1(self): buf = self.buftype("1234567890") self.assertEqual(self.ioclass(buf).read1(), buf) @@ -517,6 +559,14 @@ def test_relative_seek(self): memio.seek(1, 1) self.assertEqual(memio.read(), buf[1:]) + def test_issue141311(self): + memio = self.ioclass() + # Seek allows PY_SSIZE_T_MAX, read should handle that. + # Past end of buffer read should always return 0 (EOF). + self.assertEqual(sys.maxsize, memio.seek(sys.maxsize)) + buf = bytearray(2) + self.assertEqual(0, memio.readinto(buf)) + def test_unicode(self): memio = self.ioclass() @@ -538,6 +588,75 @@ def test_issue5449(self): self.ioclass(initial_bytes=buf) self.assertRaises(TypeError, self.ioclass, buf, foo=None) + @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: a bytes-like object is required, not 'B' + def test_write_concurrent_close(self): + class B: + def __buffer__(self, flags): + memio.close() + return memoryview(b"A") + + memio = self.ioclass() + self.assertRaises(ValueError, memio.write, B()) + + # Prevent crashes when memio.write() or memio.writelines() + # concurrently mutates (e.g., closes or exports) 'memio'. + # See: https://github.com/python/cpython/issues/143378. + + @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: a bytes-like object is required, not 'B' + def test_writelines_concurrent_close(self): + class B: + def __buffer__(self, flags): + memio.close() + return memoryview(b"A") + + memio = self.ioclass() + self.assertRaises(ValueError, memio.writelines, [B()]) + + @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: a bytes-like object is required, not 'B' + def test_write_concurrent_export(self): + class B: + buf = None + def __buffer__(self, flags): + self.buf = memio.getbuffer() + return memoryview(b"A") + + memio = self.ioclass() + self.assertRaises(BufferError, memio.write, B()) + + @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: a bytes-like object is required, not 'B' + def test_writelines_concurrent_export(self): + class B: + buf = None + def __buffer__(self, flags): + self.buf = memio.getbuffer() + return memoryview(b"A") + + memio = self.ioclass() + self.assertRaises(BufferError, memio.writelines, [B()]) + + @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: a bytes-like object is required, not 'B' + def test_write_mutating_buffer(self): + # Test that buffer is exported only once during write(). + # See: https://github.com/python/cpython/issues/143602. + class B: + count = 0 + def __buffer__(self, flags): + self.count += 1 + if self.count == 1: + return memoryview(b"AAA") + else: + return memoryview(b"BBBBBBBBB") + + memio = self.ioclass(b'0123456789') + memio.seek(2) + b = B() + n = memio.write(b) + + self.assertEqual(b.count, 1) + self.assertEqual(n, 3) + self.assertEqual(memio.getvalue(), b"01AAA56789") + self.assertEqual(memio.tell(), 5) + class TextIOTestMixin: @@ -724,54 +843,6 @@ class CBytesIOTest(PyBytesIOTest): ioclass = io.BytesIO UnsupportedOperation = io.UnsupportedOperation - def test_bytes_array(self): - super().test_bytes_array() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_flags(self): - super().test_flags() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_getbuffer(self): - super().test_getbuffer() - - def test_init(self): - super().test_init() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_issue5449(self): - super().test_issue5449() - - def test_read(self): - super().test_read() - - def test_readline(self): - super().test_readline() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_relative_seek(self): - super().test_relative_seek() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_seek(self): - super().test_seek() - - def test_subclassing(self): - super().test_subclassing() - - def test_truncate(self): - super().test_truncate() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_write(self): - super().test_write() - def test_getstate(self): memio = self.ioclass() state = memio.__getstate__() @@ -783,8 +854,7 @@ def test_getstate(self): memio.close() self.assertRaises(ValueError, memio.__getstate__) - # TODO: RUSTPYTHON - @unittest.expectedFailure + @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Expected type 'bytes' but 'bytearray' found. def test_setstate(self): # This checks whether __setstate__ does proper input validation. memio = self.ioclass() @@ -816,7 +886,7 @@ def test_sizeof(self): def _test_cow_mutation(self, mutation): # Common code for all BytesIO copy-on-write mutation tests. - imm = b' ' * 1024 + imm = (' ' * 1024).encode("ascii") old_rc = sys.getrefcount(imm) memio = self.ioclass(imm) self.assertEqual(sys.getrefcount(imm), old_rc + 1) @@ -857,79 +927,25 @@ def test_cow_mutable(self): memio = self.ioclass(ba) self.assertEqual(sys.getrefcount(ba), old_rc) -class CStringIOTest(PyStringIOTest): - ioclass = io.StringIO - UnsupportedOperation = io.UnsupportedOperation - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_detach(self): - super().test_detach() - - # TODO: RUSTPYTHON - @unittest.expectedFailure + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: ValueError not raised by writable def test_flags(self): - super().test_flags() - - def test_init(self): - super().test_init() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_issue5265(self): - super().test_issue5265() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_newline_cr(self): - super().test_newline_cr() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_newline_crlf(self): - super().test_newline_crlf() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_newline_empty(self): - super().test_newline_empty() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_newline_none(self): - super().test_newline_none() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_newlines_property(self): - super().test_newlines_property() - - def test_read(self): - super().test_read() - - def test_readline(self): - super().test_readline() + return super().test_flags() - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_relative_seek(self): - super().test_relative_seek() + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: ValueError not raised by write + def test_write(self): + return super().test_write() - # TODO: RUSTPYTHON - @unittest.expectedFailure + @unittest.expectedFailure # TODO: RUSTPYTHON; OverflowError: Python int too large to convert to Rust u64 def test_seek(self): - super().test_seek() - - def test_textio_properties(self): - super().test_textio_properties() + return super().test_seek() - def test_truncate(self): - super().test_truncate() +class CStringIOTest(PyStringIOTest): + ioclass = io.StringIO + UnsupportedOperation = io.UnsupportedOperation # XXX: For the Python version of io.StringIO, this is highly # dependent on the encoding used for the underlying buffer. - # TODO: RUSTPYTHON - @unittest.expectedFailure + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 8 != 2 def test_widechar(self): buf = self.buftype("\U0002030a\U00020347") memio = self.ioclass(buf) @@ -954,8 +970,7 @@ def test_getstate(self): memio.close() self.assertRaises(ValueError, memio.__getstate__) - # TODO: RUSTPYTHON - @unittest.expectedFailure + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: ValueError not raised by __setstate__ def test_setstate(self): # This checks whether __setstate__ does proper input validation. memio = self.ioclass() @@ -973,47 +988,49 @@ def test_setstate(self): memio.close() self.assertRaises(ValueError, memio.__setstate__, ("closed", "", 0, None)) - -class CStringIOPickleTest(PyStringIOPickleTest): - UnsupportedOperation = io.UnsupportedOperation - - # TODO: RUSTPYTHON - @unittest.expectedFailure + @unittest.expectedFailure # TODO: RUSTPYTHON; + def test_issue5265(self): - super().test_issue5265() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_newline_cr(self): - super().test_newline_cr() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_newline_crlf(self): - super().test_newline_crlf() + return super().test_issue5265() - # TODO: RUSTPYTHON - @unittest.expectedFailure + @unittest.expectedFailure # TODO: RUSTPYTHON; ? ++++ def test_newline_empty(self): - super().test_newline_empty() + return super().test_newline_empty() - # TODO: RUSTPYTHON - @unittest.expectedFailure + @unittest.expectedFailure # TODO: RUSTPYTHON; ? ^^^^^ def test_newline_none(self): - super().test_newline_none() + return super().test_newline_none() - # TODO: RUSTPYTHON - @unittest.expectedFailure + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: OSError not raised by seek + def test_relative_seek(self): + return super().test_relative_seek() + + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: ValueError not raised by writable + def test_flags(self): + return super().test_flags() + + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'StringIO' object has no attribute 'detach' + def test_detach(self): + return super().test_detach() + + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'StringIO' object has no attribute 'newlines'. Did you mean: 'readlines'? def test_newlines_property(self): - super().test_newlines_property() + return super().test_newlines_property() - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_relative_seek(self): - super().test_relative_seek() + @unittest.expectedFailure # TODO: RUSTPYTHON; OverflowError: Python int too large to convert to Rust u64 + def test_seek(self): + return super().test_seek() - def test_textio_properties(self): - super().test_textio_properties() + @unittest.expectedFailure # TODO: RUSTPYTHON; d + def test_newline_cr(self): + return super().test_newline_cr() + + @unittest.expectedFailure # TODO: RUSTPYTHON; d + def test_newline_crlf(self): + return super().test_newline_crlf() + + +class CStringIOPickleTest(PyStringIOPickleTest): + UnsupportedOperation = io.UnsupportedOperation class ioclass(io.StringIO): def __new__(cls, *args, **kwargs): @@ -1021,6 +1038,34 @@ def __new__(cls, *args, **kwargs): def __init__(self, *args, **kwargs): pass + @unittest.expectedFailure # TODO: RUSTPYTHON; + + def test_issue5265(self): + return super().test_issue5265() + + @unittest.expectedFailure # TODO: RUSTPYTHON; ? ++++ + def test_newline_empty(self): + return super().test_newline_empty() + + @unittest.expectedFailure # TODO: RUSTPYTHON; ? ^^^^^ + def test_newline_none(self): + return super().test_newline_none() + + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: OSError not raised by seek + def test_relative_seek(self): + return super().test_relative_seek() + + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'StringIO' object has no attribute 'newlines'. Did you mean: 'readlines'? + def test_newlines_property(self): + return super().test_newlines_property() + + @unittest.expectedFailure # TODO: RUSTPYTHON; d + def test_newline_cr(self): + return super().test_newline_cr() + + @unittest.expectedFailure # TODO: RUSTPYTHON; d + def test_newline_crlf(self): + return super().test_newline_crlf() + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index 1038e8c1d1d..7ec64ea5ccb 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -4359,7 +4359,6 @@ def __release_buffer__(self, mv: memoryview) -> None: self.assertNotIsSubclass(C, ReleasableBuffer) self.assertNotIsInstance(C(), ReleasableBuffer) - @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'io' has no attribute 'Reader' def test_io_reader_protocol_allowed(self): @runtime_checkable class CustomReader(io.Reader[bytes], Protocol): @@ -4377,7 +4376,6 @@ def close(self): self.assertNotIsSubclass(A, CustomReader) self.assertNotIsInstance(A(), CustomReader) - @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'io' has no attribute 'Writer' def test_io_writer_protocol_allowed(self): @runtime_checkable class CustomWriter(io.Writer[bytes], Protocol): diff --git a/crates/vm/src/builtins/bytearray.rs b/crates/vm/src/builtins/bytearray.rs index 91369624f5d..83143070e07 100644 --- a/crates/vm/src/builtins/bytearray.rs +++ b/crates/vm/src/builtins/bytearray.rs @@ -538,6 +538,15 @@ impl PyByteArray { self.borrow_buf_mut().reverse(); } + #[pymethod] + fn resize(&self, size: isize, vm: &VirtualMachine) -> PyResult<()> { + if size < 0 { + return Err(vm.new_value_error("bytearray.resize(): new size must be >= 0".to_owned())); + } + self.try_resizable(vm)?.elements.resize(size as usize, 0); + Ok(()) + } + // TODO: Uncomment when Python adds __class_getitem__ to bytearray // #[pyclassmethod] fn __class_getitem__(cls: PyTypeRef, args: PyObjectRef, vm: &VirtualMachine) -> PyGenericAlias { diff --git a/crates/vm/src/stdlib/io.rs b/crates/vm/src/stdlib/io.rs index 89f567d8665..6b4fb9abc0a 100644 --- a/crates/vm/src/stdlib/io.rs +++ b/crates/vm/src/stdlib/io.rs @@ -4393,6 +4393,12 @@ mod _io { } } + #[derive(FromArgs)] + struct BytesIOArgs { + #[pyarg(any, optional)] + initial_bytes: OptionalArg>, + } + #[pyattr] #[pyclass(name = "BytesIO", base = _BufferedIOBase)] #[derive(Debug)] @@ -4417,15 +4423,17 @@ mod _io { } impl Initializer for BytesIO { - type Args = OptionalArg>; + type Args = BytesIOArgs; - fn init(zelf: PyRef, object: Self::Args, vm: &VirtualMachine) -> PyResult<()> { + fn init(zelf: PyRef, args: Self::Args, vm: &VirtualMachine) -> PyResult<()> { if zelf.exports.load() > 0 { return Err(vm.new_buffer_error( "Existing exports of data: object cannot be re-sized".to_owned(), )); } - let raw_bytes = object + + let raw_bytes = args + .initial_bytes .flatten() .map_or_else(Vec::new, |input| input.borrow_buf().to_vec()); *zelf.buffer.write() = BufferedIO::new(Cursor::new(raw_bytes)); @@ -4503,9 +4511,20 @@ mod _io { how: OptionalArg, vm: &VirtualMachine, ) -> PyResult { - self.buffer(vm)? - .seek(seekfrom(vm, offset, how)?) - .map_err(|err| os_err(vm, err)) + let seek_from = seekfrom(vm, offset, how)?; + let mut buffer = self.buffer(vm)?; + + // Handle negative positions by clamping to 0 + match seek_from { + SeekFrom::Current(offset) if offset < 0 => { + let current = buffer.tell(); + let new_pos = current.saturating_add_signed(offset); + buffer + .seek(SeekFrom::Start(new_pos)) + .map_err(|err| os_err(vm, err)) + } + _ => buffer.seek(seek_from).map_err(|err| os_err(vm, err)), + } } #[pymethod] @@ -4534,8 +4553,14 @@ mod _io { } #[pymethod] - fn close(&self) { + fn close(&self, vm: &VirtualMachine) -> PyResult<()> { + if self.exports.load() > 0 { + return Err(vm.new_buffer_error( + "Existing exports of data: object cannot be closed".to_owned(), + )); + } self.closed.store(true); + Ok(()) } #[pymethod] @@ -4602,6 +4627,9 @@ mod _io { impl PyRef { #[pymethod] fn getbuffer(self, vm: &VirtualMachine) -> PyResult { + if self.closed.load() { + return Err(vm.new_value_error("I/O operation on closed file.".to_owned())); + } let len = self.buffer.read().cursor.get_ref().len(); let buffer = PyBuffer::new( self.into(), @@ -4931,14 +4959,19 @@ mod _io { } fn create_unsupported_operation(ctx: &Context) -> PyTypeRef { + use crate::builtins::type_::PyAttributes; use crate::types::PyTypeSlots; + + let mut attrs = PyAttributes::default(); + attrs.insert(identifier!(ctx, __module__), ctx.new_str("io").into()); + PyType::new_heap( "UnsupportedOperation", vec![ ctx.exceptions.os_error.to_owned(), ctx.exceptions.value_error.to_owned(), ], - Default::default(), + attrs, PyTypeSlots::heap_default(), ctx.types.type_type.to_owned(), ctx, diff --git a/scripts/update_lib/cmd_quick.py b/scripts/update_lib/cmd_quick.py index 319d4aeec3e..c43e0761518 100644 --- a/scripts/update_lib/cmd_quick.py +++ b/scripts/update_lib/cmd_quick.py @@ -406,7 +406,7 @@ def main(argv: list[str] | None = None) -> int: # Collect hard dependencies for commit lib_deps = DEPENDENCIES.get(module_name, {}) for dep_name in lib_deps.get("hard_deps", []): - dep_lib_path = construct_lib_path("Lib", dep_name) + dep_lib_path = pathlib.Path("Lib") / dep_name if dep_lib_path.exists(): hard_deps_for_commit.append(dep_lib_path) diff --git a/scripts/update_lib/deps.py b/scripts/update_lib/deps.py index 99fe5154620..58b259c8a14 100644 --- a/scripts/update_lib/deps.py +++ b/scripts/update_lib/deps.py @@ -516,6 +516,7 @@ def clear_import_graph_caches() -> None: ], }, "io": { + "hard_deps": ["_pyio.py"], "test": [ "test_io.py", "test_bufio.py",