From a3acff119124cedbd2c2c25681b79967238b17a8 Mon Sep 17 00:00:00 2001 From: changjoon-park Date: Mon, 20 Apr 2026 02:12:46 +0900 Subject: [PATCH 1/5] Fix process abort on large float format precision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Formatting a float with large precision (>= ~65535) aborted the interpreter instead of raising a Python exception. CPython handles the same input by returning a clean string. # Before ./rustpython -c "print(f'{1.5:.1000000}')" thread 'main' panicked at crates/literal/src/float.rs:135: Formatting argument out of range (exit 101, abort) # After ./rustpython -c "print(f'{1.5:.1000000}')" 1.5 Root cause: Rust's `format!("{:.*}", n, x)` panics when `n` exceeds the fmt runtime's internal precision limit. `format_fixed` already caps `n` at u16::MAX, but `format_general` and `format_exponent` (and the `%` branch in `crates/common/src/format.rs`) passed user-supplied precision straight through to `format!`. Fix: * Introduce `FMT_MAX_PRECISION` + `clamp_fmt_precision()` in crates/literal/src/float.rs. Cap is `u16::MAX - 1` because `{:.*e}` hits a second panic (`ndigits > 0` in core flt2dec) at exactly u16::MAX; the smaller value covers both paths. * Apply the helper to `format_fixed` (replacing the existing ad-hoc cap), `format_exponent` (entry), and `format_general` (three separate format! calls with saturating arithmetic on derived precision values). * Apply the helper in the `FormatType::Percentage` branch in crates/common/src/format.rs. This is harmless for all normal inputs — f64 carries only ~17 significant digits, so precision beyond 65K is padding zeros at best. Complex-number and old-style `%`-formatting paths transitively benefit because they dispatch to the same library functions. Verified: * cargo run -- -m test test_float test_fstring test_format: 144 passed, 0 regressed. * extra_tests/snippets/builtin_format.py: all assertions pass, including 7 new regression cases covering e / E / g / G / f / % at precision 1_000_000. * Probed with 10 magnitude values (0, ±1.5, ±inf, nan, 1e-300, 1e300, f64::MAX, 5e-324) x 4 format types = 40 combinations, plus precision 0/1/2 boundary, complex formatting, old-style `%` formatting, and combined specs (fill/align/sign/grouping/ zero-pad). All return clean strings; no process abort. --- crates/common/src/format.rs | 1 + crates/literal/src/float.rs | 37 +++++++++++++++++++++++--- extra_tests/snippets/builtin_format.py | 28 +++++++++++++++++++ 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/crates/common/src/format.rs b/crates/common/src/format.rs index 930c764acf3..0d192bbba81 100644 --- a/crates/common/src/format.rs +++ b/crates/common/src/format.rs @@ -722,6 +722,7 @@ impl FormatSpec { magnitude if magnitude.is_nan() => Ok("nan%".to_owned()), magnitude if magnitude.is_infinite() => Ok("inf%".to_owned()), _ => { + let precision = float::clamp_fmt_precision(precision); let result = format!("{:.*}", precision, magnitude * 100.0); let point = float::decimal_point_or_empty(precision, self.alternate_form); Ok(format!("{result}{point}%")) diff --git a/crates/literal/src/float.rs b/crates/literal/src/float.rs index 0fc51782438..0d834403579 100644 --- a/crates/literal/src/float.rs +++ b/crates/literal/src/float.rs @@ -54,11 +54,29 @@ pub const fn decimal_point_or_empty(precision: usize, alternate_form: bool) -> & } } +/// Rust's `format!("{:.*}", n, x)` panics when `n` exceeds the fmt runtime's +/// internal precision limit. User-supplied precision can legally reach far +/// higher values (e.g. `f"{1.5:.1000000}"`) — clamp here so we produce a +/// (truncated-but-valid) output instead of aborting the interpreter. Harmless +/// in practice: f64 carries only ~17 significant digits, so precision beyond +/// 65K is padding zeros at best. +/// +/// The cap is `u16::MAX - 1` because `{:.*e}` (exponential) hits a tighter +/// internal assertion (`ndigits > 0` in `core::num::flt2dec`) at exactly +/// `u16::MAX` — plain `{:.*}` accepts `u16::MAX` but using the smaller cap +/// uniformly keeps the code simple and covers both paths. +pub const FMT_MAX_PRECISION: usize = u16::MAX as usize - 1; + +#[inline] +pub fn clamp_fmt_precision(precision: usize) -> usize { + core::cmp::min(precision, FMT_MAX_PRECISION) +} + pub fn format_fixed(precision: usize, magnitude: f64, case: Case, alternate_form: bool) -> String { match magnitude { magnitude if magnitude.is_finite() => { let point = decimal_point_or_empty(precision, alternate_form); - let precision = core::cmp::min(precision, u16::MAX as usize); + let precision = clamp_fmt_precision(precision); format!("{magnitude:.precision$}{point}") } magnitude if magnitude.is_nan() => format_nan(case), @@ -77,6 +95,7 @@ pub fn format_exponent( ) -> String { match magnitude { magnitude if magnitude.is_finite() => { + let precision = clamp_fmt_precision(precision); let r_exp = format!("{magnitude:.precision$e}"); let mut parts = r_exp.splitn(2, 'e'); let base = parts.next().unwrap(); @@ -132,7 +151,11 @@ pub fn format_general( ) -> String { match magnitude { magnitude if magnitude.is_finite() => { - let r_exp = format!("{:.*e}", precision.saturating_sub(1), magnitude); + let r_exp = format!( + "{:.*e}", + clamp_fmt_precision(precision.saturating_sub(1)), + magnitude, + ); let mut parts = r_exp.splitn(2, 'e'); let base = parts.next().unwrap(); let exponent = parts.next().unwrap().parse::().unwrap(); @@ -141,12 +164,18 @@ pub fn format_general( Case::Lower => 'e', Case::Upper => 'E', }; - let magnitude = format!("{:.*}", precision + 1, base); + let magnitude = format!( + "{:.*}", + clamp_fmt_precision(precision.saturating_add(1)), + base, + ); let base = maybe_remove_trailing_redundant_chars(magnitude, alternate_form); let point = decimal_point_or_empty(precision.saturating_sub(1), alternate_form); format!("{base}{point}{e}{exponent:+#03}") } else { - let precision = ((precision as i64) - 1 - exponent) as usize; + let precision = clamp_fmt_precision( + ((precision as i64) - 1 - exponent).max(0) as usize, + ); let magnitude = format!("{magnitude:.precision$}"); let base = maybe_remove_trailing_redundant_chars(magnitude, alternate_form); let point = decimal_point_or_empty(precision, alternate_form); diff --git a/extra_tests/snippets/builtin_format.py b/extra_tests/snippets/builtin_format.py index a5edcc89523..cbbef835a4c 100644 --- a/extra_tests/snippets/builtin_format.py +++ b/extra_tests/snippets/builtin_format.py @@ -199,3 +199,31 @@ def test_zero_padding(): for i in range(0, 30): format(x, ",") x = x // 10 + + +# Large float precision must not abort the interpreter. +# Previously these paths hit unguarded `format!("{:.*e}", ...)` in +# crates/literal/src/float.rs and `crates/common/src/format.rs` (the `%` +# branch), which panic past Rust's fmt precision limit and killed the +# process instead of raising a Python exception. +_big = 1_000_000 +# f-string default (general format) — g-format trims trailing zeros, so +# high precision returns the short natural representation. +assert f"{1.5:.{_big}}" == "1.5" +assert "{:.{}g}".format(1.5, _big) == "1.5" +assert "{:.{}G}".format(1.5, _big) == "1.5" +# Exponential and percent types emit padded zeros up to the (internally +# capped) precision. We don't pin exact length; we only require the call +# to return a str and not crash the runtime. +for spec_type in ("e", "E", "%", "f"): + out = ("{:." + str(_big) + spec_type + "}").format(1.5) + assert isinstance(out, str) and len(out) > 0 + +# Shallow cases unchanged. +assert f"{1.5:.5}" == "1.5" +assert "{:.3f}".format(1.5) == "1.500" +assert "{:.2%}".format(0.25) == "25.00%" +assert "{:.4e}".format(1234.5) == "1.2345e+03" +assert "{:.3g}".format(1234.5) == "1.23e+03" +assert f"{float('nan'):.10f}" == "nan" +assert f"{float('inf'):.10f}" == "inf" From 70e85d95740298649951d76e929477c0e474bcad Mon Sep 17 00:00:00 2001 From: changjoon-park Date: Mon, 20 Apr 2026 03:00:26 +0900 Subject: [PATCH 2/5] Address CodeRabbit review: split cap + drop redundant clamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two refinements after CodeRabbit review: 1. Drop the redundant `format!("{:.*}", precision + 1, base)` in `format_general`'s scientific branch. It was a no-op pre-fix (magnitude is `.abs()`-ed at the caller, so `base` has no sign and its length was exactly `precision + 1`), but after I added the cap it turned into an active truncate — dropping 1 char of precision at the cap boundary. Reuse `base` directly and extract `exp_precision` for reuse by `decimal_point_or_empty`. 2. Split the cap into two helpers. `FMT_MAX_PRECISION = u16::MAX` — for plain `{:.*}` (format_fixed, %-branch, format_general's non-scientific branch). `FMT_MAX_EXP_PRECISION = u16::MAX - 1` — for `{:.*e}` (format_exponent, format_general's scientific entry). The second value is one lower because `{:.*e}` trips an additional `ndigits > 0` assertion in `core::num::flt2dec` at exactly `u16::MAX`. The first commit used the tighter cap uniformly, which silently regressed `format_fixed` by 1 char at `precision == u16::MAX` (it previously capped at exactly that value). Two helpers restore byte-identical CPython parity for fixed / percent / general-non-scientific paths up through `precision == u16::MAX`. Verification: * precision 5 .. 65534: 360 outputs byte-identical to CPython across 8 magnitudes x 9 precisions x 5 types. * precision == 65535: f / g / G / % now match CPython (0 diff). e / E remain 1 char shorter — unavoidable within the `u16::MAX - 1` exp cap. * precision > 65535: output stops at cap; CPython emits full padding — same design divergence as before. * No panic regression: f-string default, e/E, g/G, %, f at precision 1_000_000 all return cleanly. * Test suite: test_float + test_fstring + test_format, 162 passed, 0 regressed. --- crates/literal/src/float.rs | 41 +++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/crates/literal/src/float.rs b/crates/literal/src/float.rs index 0d834403579..6a1a977a9fa 100644 --- a/crates/literal/src/float.rs +++ b/crates/literal/src/float.rs @@ -61,17 +61,24 @@ pub const fn decimal_point_or_empty(precision: usize, alternate_form: bool) -> & /// in practice: f64 carries only ~17 significant digits, so precision beyond /// 65K is padding zeros at best. /// -/// The cap is `u16::MAX - 1` because `{:.*e}` (exponential) hits a tighter -/// internal assertion (`ndigits > 0` in `core::num::flt2dec`) at exactly -/// `u16::MAX` — plain `{:.*}` accepts `u16::MAX` but using the smaller cap -/// uniformly keeps the code simple and covers both paths. -pub const FMT_MAX_PRECISION: usize = u16::MAX as usize - 1; +/// The two caps differ by 1: `{:.*}` (plain) accepts `u16::MAX`, but `{:.*e}` +/// (exponential) hits a tighter assertion (`ndigits > 0` in +/// `core::num::flt2dec`) at exactly `u16::MAX`. Keeping plain at the higher +/// cap preserves byte-identical output with CPython up through +/// `precision == u16::MAX` for fixed / percent / general-non-scientific paths. +pub const FMT_MAX_PRECISION: usize = u16::MAX as usize; +pub const FMT_MAX_EXP_PRECISION: usize = u16::MAX as usize - 1; #[inline] pub fn clamp_fmt_precision(precision: usize) -> usize { core::cmp::min(precision, FMT_MAX_PRECISION) } +#[inline] +pub fn clamp_exp_precision(precision: usize) -> usize { + core::cmp::min(precision, FMT_MAX_EXP_PRECISION) +} + pub fn format_fixed(precision: usize, magnitude: f64, case: Case, alternate_form: bool) -> String { match magnitude { magnitude if magnitude.is_finite() => { @@ -95,7 +102,7 @@ pub fn format_exponent( ) -> String { match magnitude { magnitude if magnitude.is_finite() => { - let precision = clamp_fmt_precision(precision); + let precision = clamp_exp_precision(precision); let r_exp = format!("{magnitude:.precision$e}"); let mut parts = r_exp.splitn(2, 'e'); let base = parts.next().unwrap(); @@ -151,11 +158,8 @@ pub fn format_general( ) -> String { match magnitude { magnitude if magnitude.is_finite() => { - let r_exp = format!( - "{:.*e}", - clamp_fmt_precision(precision.saturating_sub(1)), - magnitude, - ); + let exp_precision = clamp_exp_precision(precision.saturating_sub(1)); + let r_exp = format!("{:.*e}", exp_precision, magnitude); let mut parts = r_exp.splitn(2, 'e'); let base = parts.next().unwrap(); let exponent = parts.next().unwrap().parse::().unwrap(); @@ -164,13 +168,14 @@ pub fn format_general( Case::Lower => 'e', Case::Upper => 'E', }; - let magnitude = format!( - "{:.*}", - clamp_fmt_precision(precision.saturating_add(1)), - base, - ); - let base = maybe_remove_trailing_redundant_chars(magnitude, alternate_form); - let point = decimal_point_or_empty(precision.saturating_sub(1), alternate_form); + // `base` is already produced at the clamped precision via + // `r_exp`. The previous `format!("{:.*}", precision + 1, base)` + // call was a no-op (magnitude is `.abs()`-ed at the caller, so + // base has no sign and its length was exactly `precision + 1`) + // — reuse `base` directly to avoid double-clamping that would + // drop the last 1-2 chars at high precision. + let base = maybe_remove_trailing_redundant_chars(base.to_owned(), alternate_form); + let point = decimal_point_or_empty(exp_precision, alternate_form); format!("{base}{point}{e}{exponent:+#03}") } else { let precision = clamp_fmt_precision( From b142cf228442c9743fbcaa2b07a3adae0a52cb7c Mon Sep 17 00:00:00 2001 From: changjoon-park Date: Mon, 20 Apr 2026 03:16:22 +0900 Subject: [PATCH 3/5] Fix ruff format: single-line precision clamp --- crates/literal/src/float.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/literal/src/float.rs b/crates/literal/src/float.rs index 6a1a977a9fa..6740ee9c886 100644 --- a/crates/literal/src/float.rs +++ b/crates/literal/src/float.rs @@ -178,9 +178,8 @@ pub fn format_general( let point = decimal_point_or_empty(exp_precision, alternate_form); format!("{base}{point}{e}{exponent:+#03}") } else { - let precision = clamp_fmt_precision( - ((precision as i64) - 1 - exponent).max(0) as usize, - ); + let precision = + clamp_fmt_precision(((precision as i64) - 1 - exponent).max(0) as usize); let magnitude = format!("{magnitude:.precision$}"); let base = maybe_remove_trailing_redundant_chars(magnitude, alternate_form); let point = decimal_point_or_empty(precision, alternate_form); From d4c8316abf6da6a58084db36cd7166e323db0d26 Mon Sep 17 00:00:00 2001 From: changjoon-park Date: Mon, 20 Apr 2026 13:55:44 +0900 Subject: [PATCH 4/5] Address @youknowone review: byte-identical CPython parity at boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per review comment on `extra_tests/snippets/builtin_format.py:209`: the patch declares `FMT_MAX_PRECISION = u16::MAX`, so the tests must cover 65535 and 65536 and demonstrate CPython parity at the boundary. The previous version only avoided panic — at the cap it silently truncated 1 char short of CPython for e / E, and thousands of chars short for f / % at precision beyond the cap. This commit restores byte-identical CPython output at every precision up to the format- spec parser's own `i32::MAX` ceiling. Fix: pad the Rust-format result with '0's up to the user-requested precision. Why this is correct, not a workaround: IEEE 754 double has at most ~767 significant decimal digits; past that, every digit is deterministically '0' in both CPython and the native Rust output. Our cap (65534 for exp, 65535 for plain) sits far above 767, so appending zeros reconstructs precisely what CPython would have produced. Verified on hard inputs: `1e-100`, `5e-324` (subnormal boundary), `f64::MAX`, mixed magnitudes — the last 100 chars of Rust-format output at precision 65534 are all '0' for every case. Changes: * `format_fixed`: after format!(), extend with (precision - capped) '0' chars before appending the optional decimal point. * `format_exponent`: same, applied to the parsed mantissa before reassembling with the exponent marker. * `FormatType::Percentage` branch: same. Also fixed a bug the boundary audit surfaced: the finite-input overflow guard used `return Ok("inf%")`, which bypasses the outer sign handler. Changed to a match-arm value so `format_sign_and_align` still runs and produces "-inf%" for `-f64::MAX`, matching CPython. Verification: * 7 magnitudes × 5 precisions × 6 format types = 210 comparisons against CPython at precisions {65534, 65535, 65536, 100000, 200000}. All 210 byte-identical. * Gap audit (complex formatting, old-style % formatting, negative magnitudes, -0.0, combined specs with fill / sign / alternate / grouping) at boundary precisions. All but 20 byte-identical. The 20 remaining diffs all stem from a pre-existing complex-imaginary-part repr bug (`1e100j` expands to 100 '0's in RustPython vs CPython's `1e+100j`) which reproduces on upstream main without any part of this patch and is out of scope here. * `cargo run -- -m test test_float test_fstring test_format`: 162 passed, 0 regressed. * `extra_tests/snippets/builtin_format.py` now pins exact expected strings at 65534 / 65535 / 65536 / 1_000_000 for every format type, plus the `f64::MAX × 100 → 'inf%'` overflow case. * `cargo fmt --check`: pass. --- crates/common/src/format.rs | 22 ++++++++++++--- crates/literal/src/float.rs | 27 ++++++++++++++---- extra_tests/snippets/builtin_format.py | 39 +++++++++++++++++--------- 3 files changed, 66 insertions(+), 22 deletions(-) diff --git a/crates/common/src/format.rs b/crates/common/src/format.rs index 0d192bbba81..4d31acf7a3f 100644 --- a/crates/common/src/format.rs +++ b/crates/common/src/format.rs @@ -722,10 +722,24 @@ impl FormatSpec { magnitude if magnitude.is_nan() => Ok("nan%".to_owned()), magnitude if magnitude.is_infinite() => Ok("inf%".to_owned()), _ => { - let precision = float::clamp_fmt_precision(precision); - let result = format!("{:.*}", precision, magnitude * 100.0); - let point = float::decimal_point_or_empty(precision, self.alternate_form); - Ok(format!("{result}{point}%")) + let scaled = magnitude * 100.0; + // `magnitude * 100` can overflow a finite input to +inf + // (e.g. f64::MAX). Emit "inf%" so the outer sign handler + // produces "-inf%" or "inf%" consistently with CPython. + if scaled.is_infinite() { + Ok("inf%".to_owned()) + } else { + let capped = float::clamp_fmt_precision(precision); + let mut result = format!("{:.*}", capped, scaled); + // Pad with '0's up to the requested precision to match + // CPython byte-identically past the internal cap. + let missing = precision.saturating_sub(capped); + if missing > 0 { + result.extend(core::iter::repeat_n('0', missing)); + } + let point = float::decimal_point_or_empty(precision, self.alternate_form); + Ok(format!("{result}{point}%")) + } } }, None => match magnitude { diff --git a/crates/literal/src/float.rs b/crates/literal/src/float.rs index 6740ee9c886..0856f646b22 100644 --- a/crates/literal/src/float.rs +++ b/crates/literal/src/float.rs @@ -83,8 +83,17 @@ pub fn format_fixed(precision: usize, magnitude: f64, case: Case, alternate_form match magnitude { magnitude if magnitude.is_finite() => { let point = decimal_point_or_empty(precision, alternate_form); - let precision = clamp_fmt_precision(precision); - format!("{magnitude:.precision$}{point}") + let capped = clamp_fmt_precision(precision); + let mut out = format!("{magnitude:.capped$}"); + // Pad with '0's up to the requested precision to match CPython + // byte-identically. `f64` has at most ~767 significant decimal + // digits, so any digit past `capped` is deterministically '0'. + let missing = precision.saturating_sub(capped); + if missing > 0 { + out.extend(core::iter::repeat_n('0', missing)); + } + out.push_str(point); + out } magnitude if magnitude.is_nan() => format_nan(case), magnitude if magnitude.is_infinite() => format_inf(case), @@ -102,8 +111,8 @@ pub fn format_exponent( ) -> String { match magnitude { magnitude if magnitude.is_finite() => { - let precision = clamp_exp_precision(precision); - let r_exp = format!("{magnitude:.precision$e}"); + let capped = clamp_exp_precision(precision); + let r_exp = format!("{magnitude:.capped$e}"); let mut parts = r_exp.splitn(2, 'e'); let base = parts.next().unwrap(); let exponent = parts.next().unwrap().parse::().unwrap(); @@ -112,7 +121,15 @@ pub fn format_exponent( Case::Upper => 'E', }; let point = decimal_point_or_empty(precision, alternate_form); - format!("{base}{point}{e}{exponent:+#03}") + // Pad with '0's up to the requested precision to match CPython + // byte-identically past our internal cap; see `format_fixed`. + let missing = precision.saturating_sub(capped); + let mut mantissa = String::with_capacity(base.len() + missing); + mantissa.push_str(base); + if missing > 0 { + mantissa.extend(core::iter::repeat_n('0', missing)); + } + format!("{mantissa}{point}{e}{exponent:+#03}") } magnitude if magnitude.is_nan() => format_nan(case), magnitude if magnitude.is_infinite() => format_inf(case), diff --git a/extra_tests/snippets/builtin_format.py b/extra_tests/snippets/builtin_format.py index cbbef835a4c..2eff9ccca31 100644 --- a/extra_tests/snippets/builtin_format.py +++ b/extra_tests/snippets/builtin_format.py @@ -205,19 +205,32 @@ def test_zero_padding(): # Previously these paths hit unguarded `format!("{:.*e}", ...)` in # crates/literal/src/float.rs and `crates/common/src/format.rs` (the `%` # branch), which panic past Rust's fmt precision limit and killed the -# process instead of raising a Python exception. -_big = 1_000_000 -# f-string default (general format) — g-format trims trailing zeros, so -# high precision returns the short natural representation. -assert f"{1.5:.{_big}}" == "1.5" -assert "{:.{}g}".format(1.5, _big) == "1.5" -assert "{:.{}G}".format(1.5, _big) == "1.5" -# Exponential and percent types emit padded zeros up to the (internally -# capped) precision. We don't pin exact length; we only require the call -# to return a str and not crash the runtime. -for spec_type in ("e", "E", "%", "f"): - out = ("{:." + str(_big) + spec_type + "}").format(1.5) - assert isinstance(out, str) and len(out) > 0 +# process instead of raising a Python exception. Internally the limit is +# u16::MAX; output is zero-padded past that boundary to match CPython +# byte-identically. + +# Boundary values around the internal cap (u16::MAX = 65535). Output must +# match what CPython would produce. +# f-format pads with trailing zeros up to the requested precision. +assert "{:.65534f}".format(1.5) == "1." + "5" + "0" * 65533 +assert "{:.65535f}".format(1.5) == "1." + "5" + "0" * 65534 +assert "{:.65536f}".format(1.5) == "1." + "5" + "0" * 65535 +# e-format emits a fixed mantissa width + 'e+00'. +assert "{:.65534e}".format(1.5) == "1." + "5" + "0" * 65533 + "e+00" +assert "{:.65535e}".format(1.5) == "1." + "5" + "0" * 65534 + "e+00" +assert "{:.65536e}".format(1.5) == "1." + "5" + "0" * 65535 + "e+00" +# %-format multiplies by 100 then applies f-format. +assert "{:.65534%}".format(1.5) == "150." + "0" * 65534 + "%" +assert "{:.65535%}".format(1.5) == "150." + "0" * 65535 + "%" +assert "{:.65536%}".format(1.5) == "150." + "0" * 65536 + "%" +# g-format strips trailing zeros, so the short form is the natural +# representation regardless of precision. +for p in (65534, 65535, 65536, 1_000_000): + assert ("{:." + str(p) + "g}").format(1.5) == "1.5" + +# Percent overflow: finite input whose *100 is +inf produces "inf%" +# rather than crashing. CPython does the same. +assert "{:.100000%}".format(1.7976931348623157e308) == "inf%" # Shallow cases unchanged. assert f"{1.5:.5}" == "1.5" From 58c59d47027b20927928d827ddf6ea6736bfd7b9 Mon Sep 17 00:00:00 2001 From: changjoon-park Date: Mon, 20 Apr 2026 22:11:05 +0900 Subject: [PATCH 5/5] Clarify boundary test labels + add past-cap depth assertions Rename the boundary-test section so the three precision points per format type are labeled below / at / past the cap inline, making the "past MAX_PRECISION" unhappy-case coverage explicit. Add len-based assertions at precision 1_000_000 for f, e, and % to exercise the cap-then-pad path at a depth far beyond the boundary. --- extra_tests/snippets/builtin_format.py | 33 +++++++++++++++++--------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/extra_tests/snippets/builtin_format.py b/extra_tests/snippets/builtin_format.py index 2eff9ccca31..250d8ad6cac 100644 --- a/extra_tests/snippets/builtin_format.py +++ b/extra_tests/snippets/builtin_format.py @@ -209,25 +209,36 @@ def test_zero_padding(): # u16::MAX; output is zero-padded past that boundary to match CPython # byte-identically. -# Boundary values around the internal cap (u16::MAX = 65535). Output must -# match what CPython would produce. +# Three precision points per format type — below the cap (uncapped +# path), exactly at the cap (boundary), and one past the cap (the +# unhappy case, where internal clamping plus zero-padding has to +# reconstruct CPython's output). All must byte-match CPython. + # f-format pads with trailing zeros up to the requested precision. -assert "{:.65534f}".format(1.5) == "1." + "5" + "0" * 65533 -assert "{:.65535f}".format(1.5) == "1." + "5" + "0" * 65534 -assert "{:.65536f}".format(1.5) == "1." + "5" + "0" * 65535 +assert "{:.65534f}".format(1.5) == "1." + "5" + "0" * 65533 # below cap +assert "{:.65535f}".format(1.5) == "1." + "5" + "0" * 65534 # at cap +assert "{:.65536f}".format(1.5) == "1." + "5" + "0" * 65535 # past cap → padding # e-format emits a fixed mantissa width + 'e+00'. -assert "{:.65534e}".format(1.5) == "1." + "5" + "0" * 65533 + "e+00" -assert "{:.65535e}".format(1.5) == "1." + "5" + "0" * 65534 + "e+00" -assert "{:.65536e}".format(1.5) == "1." + "5" + "0" * 65535 + "e+00" +assert "{:.65534e}".format(1.5) == "1." + "5" + "0" * 65533 + "e+00" # below +assert "{:.65535e}".format(1.5) == "1." + "5" + "0" * 65534 + "e+00" # at cap +assert ( + "{:.65536e}".format(1.5) == "1." + "5" + "0" * 65535 + "e+00" +) # past cap → padding # %-format multiplies by 100 then applies f-format. -assert "{:.65534%}".format(1.5) == "150." + "0" * 65534 + "%" -assert "{:.65535%}".format(1.5) == "150." + "0" * 65535 + "%" -assert "{:.65536%}".format(1.5) == "150." + "0" * 65536 + "%" +assert "{:.65534%}".format(1.5) == "150." + "0" * 65534 + "%" # below +assert "{:.65535%}".format(1.5) == "150." + "0" * 65535 + "%" # at cap +assert "{:.65536%}".format(1.5) == "150." + "0" * 65536 + "%" # past cap → padding # g-format strips trailing zeros, so the short form is the natural # representation regardless of precision. for p in (65534, 65535, 65536, 1_000_000): assert ("{:." + str(p) + "g}").format(1.5) == "1.5" +# Far past the cap — verifies the pad path handles arbitrary precision, +# not just one-off values near the boundary. +assert len("{:.1000000f}".format(1.5)) == 1_000_002 # "1." + 1M zeros +assert len("{:.1000000e}".format(1.5)) == 1_000_006 # + "e+00" +assert len("{:.1000000%}".format(1.5)) == 1_000_005 # "150." + 1M zeros + "%" + # Percent overflow: finite input whose *100 is +inf produces "inf%" # rather than crashing. CPython does the same. assert "{:.100000%}".format(1.7976931348623157e308) == "inf%"