diff --git a/.cspell.dict/python-more.txt b/.cspell.dict/python-more.txt index 1f3fc4864cd..a13f345eece 100644 --- a/.cspell.dict/python-more.txt +++ b/.cspell.dict/python-more.txt @@ -178,6 +178,7 @@ PYTHONHASHSEED PYTHONHOME PYTHONINSPECT PYTHONINTMAXSTRDIGITS +PYTHONIOENCODING PYTHONNODEBUGRANGES PYTHONNOUSERSITE PYTHONOPTIMIZE diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 4cf233d5f8b..d29c6918234 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -2494,8 +2494,6 @@ def test_test_command_invalid_file(self): finally: os_helper.unlink(tmpname) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_list_command(self): for tar_name in testtarnames: with support.captured_stdout() as t: @@ -2507,8 +2505,6 @@ def test_list_command(self): PYTHONIOENCODING='ascii') self.assertEqual(out, expected) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_list_command_verbose(self): for tar_name in testtarnames: with support.captured_stdout() as t: diff --git a/crates/vm/Lib/core_modules/encodings_ascii.py b/crates/vm/Lib/core_modules/encodings_ascii.py new file mode 120000 index 00000000000..c0507e75b03 --- /dev/null +++ b/crates/vm/Lib/core_modules/encodings_ascii.py @@ -0,0 +1 @@ +../../../../Lib/encodings/ascii.py \ No newline at end of file diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index 2eef479d1b4..7f7c5ea7674 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -261,17 +261,28 @@ impl VirtualMachine { Ok(()) } - fn import_utf8_encodings(&mut self) -> PyResult<()> { + fn import_ascii_utf8_encodings(&mut self) -> PyResult<()> { import::import_frozen(self, "codecs")?; - // FIXME: See corresponding part of `core_frozen_inits` - // let encoding_module_name = if cfg!(feature = "freeze-stdlib") { - // "encodings.utf_8" - // } else { - // "encodings_utf_8" - // }; - let encoding_module_name = "encodings_utf_8"; - let encoding_module = import::import_frozen(self, encoding_module_name)?; - let getregentry = encoding_module.get_attr("getregentry", self)?; + + // Use dotted names when freeze-stdlib is enabled (modules come from Lib/encodings/), + // otherwise use underscored names (modules come from core_modules/). + let (ascii_module_name, utf8_module_name) = if cfg!(feature = "freeze-stdlib") { + ("encodings.ascii", "encodings.utf_8") + } else { + ("encodings_ascii", "encodings_utf_8") + }; + + // Register ascii encoding + let ascii_module = import::import_frozen(self, ascii_module_name)?; + let getregentry = ascii_module.get_attr("getregentry", self)?; + let codec_info = getregentry.call((), self)?; + self.state + .codec_registry + .register_manual("ascii", codec_info.try_into_value(self)?)?; + + // Register utf-8 encoding + let utf8_module = import::import_frozen(self, utf8_module_name)?; + let getregentry = utf8_module.get_attr("getregentry", self)?; let codec_info = getregentry.call((), self)?; self.state .codec_registry @@ -298,7 +309,7 @@ impl VirtualMachine { #[cfg(not(feature = "threading"))] import::import_frozen(self, "_thread")?; let importlib = import::init_importlib_base(self)?; - self.import_utf8_encodings()?; + self.import_ascii_utf8_encodings()?; #[cfg(any(not(target_arch = "wasm32"), target_os = "wasi"))] { @@ -327,17 +338,25 @@ impl VirtualMachine { let line_buffering = buffered_stdio && (isatty || fd == 2); let newline = if cfg!(windows) { None } else { Some("\n") }; - // stderr uses backslashreplace error handler - let errors: Option<&str> = if fd == 2 { + let encoding = self.state.config.settings.stdio_encoding.as_deref(); + // stderr always uses backslashreplace (ignores stdio_errors) + let errors = if fd == 2 { Some("backslashreplace") } else { - None + self.state.config.settings.stdio_errors.as_deref() }; let stdio = self.call_method( &io, "TextIOWrapper", - (buf, (), errors, newline, line_buffering, write_through), + ( + buf, + encoding, + errors, + newline, + line_buffering, + write_through, + ), )?; let mode = if write { "w" } else { "r" }; stdio.set_attr("mode", self.ctx.new_str(mode), self)?; @@ -1007,6 +1026,8 @@ pub fn resolve_frozen_alias(name: &str) -> &str { match name { "_frozen_importlib" => "importlib._bootstrap", "_frozen_importlib_external" => "importlib._bootstrap_external", + "encodings_ascii" => "encodings.ascii", + "encodings_utf_8" => "encodings.utf_8", _ => name, } } diff --git a/crates/vm/src/vm/setting.rs b/crates/vm/src/vm/setting.rs index 8a307d1852b..9be21b4484e 100644 --- a/crates/vm/src/vm/setting.rs +++ b/crates/vm/src/vm/setting.rs @@ -112,9 +112,11 @@ pub struct Settings { /// -u, PYTHONUNBUFFERED=x pub buffered_stdio: bool, - // wchar_t *stdio_encoding; + /// PYTHONIOENCODING - stdio encoding + pub stdio_encoding: Option, + /// PYTHONIOENCODING - stdio error handler + pub stdio_errors: Option, pub utf8_mode: u8, - // wchar_t *stdio_errors; /// --check-hash-based-pycs pub check_hash_pycs_mode: CheckHashPycsMode, @@ -197,6 +199,8 @@ impl Default for Settings { buffered_stdio: true, check_hash_pycs_mode: CheckHashPycsMode::Default, allow_external_library: cfg!(feature = "importlib"), + stdio_encoding: None, + stdio_errors: None, utf8_mode: 1, int_max_str_digits: 4300, #[cfg(feature = "flame-it")] diff --git a/src/settings.rs b/src/settings.rs index 54e66086932..7dd3c1a7714 100644 --- a/src/settings.rs +++ b/src/settings.rs @@ -298,6 +298,23 @@ pub fn parse_opts() -> Result<(Settings, RunMode), lexopt::Error> { settings.code_debug_ranges = false; } + // Parse PYTHONIOENCODING=encoding[:errors] + if let Some(val) = get_env("PYTHONIOENCODING") + && let Some(val_str) = val.to_str() + && !val_str.is_empty() + { + if let Some((enc, err)) = val_str.split_once(':') { + if !enc.is_empty() { + settings.stdio_encoding = Some(enc.to_owned()); + } + if !err.is_empty() { + settings.stdio_errors = Some(err.to_owned()); + } + } else { + settings.stdio_encoding = Some(val_str.to_owned()); + } + } + if settings.dev_mode { settings.warnoptions.push("default".to_owned()); settings.faulthandler = true;