diff --git a/crates/common/src/format.rs b/crates/common/src/format.rs index 930c764acf3..4d31acf7a3f 100644 --- a/crates/common/src/format.rs +++ b/crates/common/src/format.rs @@ -722,9 +722,24 @@ impl FormatSpec { magnitude if magnitude.is_nan() => Ok("nan%".to_owned()), magnitude if magnitude.is_infinite() => Ok("inf%".to_owned()), _ => { - let result = format!("{:.*}", precision, magnitude * 100.0); - let point = float::decimal_point_or_empty(precision, self.alternate_form); - Ok(format!("{result}{point}%")) + let scaled = magnitude * 100.0; + // `magnitude * 100` can overflow a finite input to +inf + // (e.g. f64::MAX). Emit "inf%" so the outer sign handler + // produces "-inf%" or "inf%" consistently with CPython. + if scaled.is_infinite() { + Ok("inf%".to_owned()) + } else { + let capped = float::clamp_fmt_precision(precision); + let mut result = format!("{:.*}", capped, scaled); + // Pad with '0's up to the requested precision to match + // CPython byte-identically past the internal cap. + let missing = precision.saturating_sub(capped); + if missing > 0 { + result.extend(core::iter::repeat_n('0', missing)); + } + let point = float::decimal_point_or_empty(precision, self.alternate_form); + Ok(format!("{result}{point}%")) + } } }, None => match magnitude { diff --git a/crates/literal/src/float.rs b/crates/literal/src/float.rs index 0fc51782438..0856f646b22 100644 --- a/crates/literal/src/float.rs +++ b/crates/literal/src/float.rs @@ -54,12 +54,46 @@ pub const fn decimal_point_or_empty(precision: usize, alternate_form: bool) -> & } } +/// Rust's `format!("{:.*}", n, x)` panics when `n` exceeds the fmt runtime's +/// internal precision limit. User-supplied precision can legally reach far +/// higher values (e.g. `f"{1.5:.1000000}"`) — clamp here so we produce a +/// (truncated-but-valid) output instead of aborting the interpreter. Harmless +/// in practice: f64 carries only ~17 significant digits, so precision beyond +/// 65K is padding zeros at best. +/// +/// The two caps differ by 1: `{:.*}` (plain) accepts `u16::MAX`, but `{:.*e}` +/// (exponential) hits a tighter assertion (`ndigits > 0` in +/// `core::num::flt2dec`) at exactly `u16::MAX`. Keeping plain at the higher +/// cap preserves byte-identical output with CPython up through +/// `precision == u16::MAX` for fixed / percent / general-non-scientific paths. +pub const FMT_MAX_PRECISION: usize = u16::MAX as usize; +pub const FMT_MAX_EXP_PRECISION: usize = u16::MAX as usize - 1; + +#[inline] +pub fn clamp_fmt_precision(precision: usize) -> usize { + core::cmp::min(precision, FMT_MAX_PRECISION) +} + +#[inline] +pub fn clamp_exp_precision(precision: usize) -> usize { + core::cmp::min(precision, FMT_MAX_EXP_PRECISION) +} + pub fn format_fixed(precision: usize, magnitude: f64, case: Case, alternate_form: bool) -> String { match magnitude { magnitude if magnitude.is_finite() => { let point = decimal_point_or_empty(precision, alternate_form); - let precision = core::cmp::min(precision, u16::MAX as usize); - format!("{magnitude:.precision$}{point}") + let capped = clamp_fmt_precision(precision); + let mut out = format!("{magnitude:.capped$}"); + // Pad with '0's up to the requested precision to match CPython + // byte-identically. `f64` has at most ~767 significant decimal + // digits, so any digit past `capped` is deterministically '0'. + let missing = precision.saturating_sub(capped); + if missing > 0 { + out.extend(core::iter::repeat_n('0', missing)); + } + out.push_str(point); + out } magnitude if magnitude.is_nan() => format_nan(case), magnitude if magnitude.is_infinite() => format_inf(case), @@ -77,7 +111,8 @@ pub fn format_exponent( ) -> String { match magnitude { magnitude if magnitude.is_finite() => { - let r_exp = format!("{magnitude:.precision$e}"); + let capped = clamp_exp_precision(precision); + let r_exp = format!("{magnitude:.capped$e}"); let mut parts = r_exp.splitn(2, 'e'); let base = parts.next().unwrap(); let exponent = parts.next().unwrap().parse::().unwrap(); @@ -86,7 +121,15 @@ pub fn format_exponent( Case::Upper => 'E', }; let point = decimal_point_or_empty(precision, alternate_form); - format!("{base}{point}{e}{exponent:+#03}") + // Pad with '0's up to the requested precision to match CPython + // byte-identically past our internal cap; see `format_fixed`. + let missing = precision.saturating_sub(capped); + let mut mantissa = String::with_capacity(base.len() + missing); + mantissa.push_str(base); + if missing > 0 { + mantissa.extend(core::iter::repeat_n('0', missing)); + } + format!("{mantissa}{point}{e}{exponent:+#03}") } magnitude if magnitude.is_nan() => format_nan(case), magnitude if magnitude.is_infinite() => format_inf(case), @@ -132,7 +175,8 @@ pub fn format_general( ) -> String { match magnitude { magnitude if magnitude.is_finite() => { - let r_exp = format!("{:.*e}", precision.saturating_sub(1), magnitude); + let exp_precision = clamp_exp_precision(precision.saturating_sub(1)); + let r_exp = format!("{:.*e}", exp_precision, magnitude); let mut parts = r_exp.splitn(2, 'e'); let base = parts.next().unwrap(); let exponent = parts.next().unwrap().parse::().unwrap(); @@ -141,12 +185,18 @@ pub fn format_general( Case::Lower => 'e', Case::Upper => 'E', }; - let magnitude = format!("{:.*}", precision + 1, base); - let base = maybe_remove_trailing_redundant_chars(magnitude, alternate_form); - let point = decimal_point_or_empty(precision.saturating_sub(1), alternate_form); + // `base` is already produced at the clamped precision via + // `r_exp`. The previous `format!("{:.*}", precision + 1, base)` + // call was a no-op (magnitude is `.abs()`-ed at the caller, so + // base has no sign and its length was exactly `precision + 1`) + // — reuse `base` directly to avoid double-clamping that would + // drop the last 1-2 chars at high precision. + let base = maybe_remove_trailing_redundant_chars(base.to_owned(), alternate_form); + let point = decimal_point_or_empty(exp_precision, alternate_form); format!("{base}{point}{e}{exponent:+#03}") } else { - let precision = ((precision as i64) - 1 - exponent) as usize; + let precision = + clamp_fmt_precision(((precision as i64) - 1 - exponent).max(0) as usize); let magnitude = format!("{magnitude:.precision$}"); let base = maybe_remove_trailing_redundant_chars(magnitude, alternate_form); let point = decimal_point_or_empty(precision, alternate_form); diff --git a/extra_tests/snippets/builtin_format.py b/extra_tests/snippets/builtin_format.py index a5edcc89523..250d8ad6cac 100644 --- a/extra_tests/snippets/builtin_format.py +++ b/extra_tests/snippets/builtin_format.py @@ -199,3 +199,55 @@ def test_zero_padding(): for i in range(0, 30): format(x, ",") x = x // 10 + + +# Large float precision must not abort the interpreter. +# Previously these paths hit unguarded `format!("{:.*e}", ...)` in +# crates/literal/src/float.rs and `crates/common/src/format.rs` (the `%` +# branch), which panic past Rust's fmt precision limit and killed the +# process instead of raising a Python exception. Internally the limit is +# u16::MAX; output is zero-padded past that boundary to match CPython +# byte-identically. + +# Three precision points per format type — below the cap (uncapped +# path), exactly at the cap (boundary), and one past the cap (the +# unhappy case, where internal clamping plus zero-padding has to +# reconstruct CPython's output). All must byte-match CPython. + +# f-format pads with trailing zeros up to the requested precision. +assert "{:.65534f}".format(1.5) == "1." + "5" + "0" * 65533 # below cap +assert "{:.65535f}".format(1.5) == "1." + "5" + "0" * 65534 # at cap +assert "{:.65536f}".format(1.5) == "1." + "5" + "0" * 65535 # past cap → padding +# e-format emits a fixed mantissa width + 'e+00'. +assert "{:.65534e}".format(1.5) == "1." + "5" + "0" * 65533 + "e+00" # below +assert "{:.65535e}".format(1.5) == "1." + "5" + "0" * 65534 + "e+00" # at cap +assert ( + "{:.65536e}".format(1.5) == "1." + "5" + "0" * 65535 + "e+00" +) # past cap → padding +# %-format multiplies by 100 then applies f-format. +assert "{:.65534%}".format(1.5) == "150." + "0" * 65534 + "%" # below +assert "{:.65535%}".format(1.5) == "150." + "0" * 65535 + "%" # at cap +assert "{:.65536%}".format(1.5) == "150." + "0" * 65536 + "%" # past cap → padding +# g-format strips trailing zeros, so the short form is the natural +# representation regardless of precision. +for p in (65534, 65535, 65536, 1_000_000): + assert ("{:." + str(p) + "g}").format(1.5) == "1.5" + +# Far past the cap — verifies the pad path handles arbitrary precision, +# not just one-off values near the boundary. +assert len("{:.1000000f}".format(1.5)) == 1_000_002 # "1." + 1M zeros +assert len("{:.1000000e}".format(1.5)) == 1_000_006 # + "e+00" +assert len("{:.1000000%}".format(1.5)) == 1_000_005 # "150." + 1M zeros + "%" + +# Percent overflow: finite input whose *100 is +inf produces "inf%" +# rather than crashing. CPython does the same. +assert "{:.100000%}".format(1.7976931348623157e308) == "inf%" + +# Shallow cases unchanged. +assert f"{1.5:.5}" == "1.5" +assert "{:.3f}".format(1.5) == "1.500" +assert "{:.2%}".format(0.25) == "25.00%" +assert "{:.4e}".format(1234.5) == "1.2345e+03" +assert "{:.3g}".format(1234.5) == "1.23e+03" +assert f"{float('nan'):.10f}" == "nan" +assert f"{float('inf'):.10f}" == "inf"