Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions crates/common/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -722,9 +722,24 @@ impl FormatSpec {
magnitude if magnitude.is_nan() => Ok("nan%".to_owned()),
magnitude if magnitude.is_infinite() => Ok("inf%".to_owned()),
_ => {
let result = format!("{:.*}", precision, magnitude * 100.0);
let point = float::decimal_point_or_empty(precision, self.alternate_form);
Ok(format!("{result}{point}%"))
let scaled = magnitude * 100.0;
// `magnitude * 100` can overflow a finite input to +inf
// (e.g. f64::MAX). Emit "inf%" so the outer sign handler
// produces "-inf%" or "inf%" consistently with CPython.
if scaled.is_infinite() {
Ok("inf%".to_owned())
} else {
let capped = float::clamp_fmt_precision(precision);
let mut result = format!("{:.*}", capped, scaled);
// Pad with '0's up to the requested precision to match
// CPython byte-identically past the internal cap.
let missing = precision.saturating_sub(capped);
if missing > 0 {
result.extend(core::iter::repeat_n('0', missing));
}
let point = float::decimal_point_or_empty(precision, self.alternate_form);
Ok(format!("{result}{point}%"))
}
}
},
None => match magnitude {
Expand Down
68 changes: 59 additions & 9 deletions crates/literal/src/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,46 @@ pub const fn decimal_point_or_empty(precision: usize, alternate_form: bool) -> &
}
}

/// Rust's `format!("{:.*}", n, x)` panics when `n` exceeds the fmt runtime's
/// internal precision limit. User-supplied precision can legally reach far
/// higher values (e.g. `f"{1.5:.1000000}"`) — clamp here so we produce a
/// (truncated-but-valid) output instead of aborting the interpreter. Harmless
/// in practice: f64 carries only ~17 significant digits, so precision beyond
/// 65K is padding zeros at best.
///
/// The two caps differ by 1: `{:.*}` (plain) accepts `u16::MAX`, but `{:.*e}`
/// (exponential) hits a tighter assertion (`ndigits > 0` in
/// `core::num::flt2dec`) at exactly `u16::MAX`. Keeping plain at the higher
/// cap preserves byte-identical output with CPython up through
/// `precision == u16::MAX` for fixed / percent / general-non-scientific paths.
pub const FMT_MAX_PRECISION: usize = u16::MAX as usize;
pub const FMT_MAX_EXP_PRECISION: usize = u16::MAX as usize - 1;

#[inline]
pub fn clamp_fmt_precision(precision: usize) -> usize {
core::cmp::min(precision, FMT_MAX_PRECISION)
}

#[inline]
pub fn clamp_exp_precision(precision: usize) -> usize {
core::cmp::min(precision, FMT_MAX_EXP_PRECISION)
}

pub fn format_fixed(precision: usize, magnitude: f64, case: Case, alternate_form: bool) -> String {
match magnitude {
magnitude if magnitude.is_finite() => {
let point = decimal_point_or_empty(precision, alternate_form);
let precision = core::cmp::min(precision, u16::MAX as usize);
format!("{magnitude:.precision$}{point}")
let capped = clamp_fmt_precision(precision);
let mut out = format!("{magnitude:.capped$}");
// Pad with '0's up to the requested precision to match CPython
// byte-identically. `f64` has at most ~767 significant decimal
// digits, so any digit past `capped` is deterministically '0'.
let missing = precision.saturating_sub(capped);
if missing > 0 {
out.extend(core::iter::repeat_n('0', missing));
}
out.push_str(point);
out
}
magnitude if magnitude.is_nan() => format_nan(case),
magnitude if magnitude.is_infinite() => format_inf(case),
Expand All @@ -77,7 +111,8 @@ pub fn format_exponent(
) -> String {
match magnitude {
magnitude if magnitude.is_finite() => {
let r_exp = format!("{magnitude:.precision$e}");
let capped = clamp_exp_precision(precision);
let r_exp = format!("{magnitude:.capped$e}");
let mut parts = r_exp.splitn(2, 'e');
let base = parts.next().unwrap();
let exponent = parts.next().unwrap().parse::<i64>().unwrap();
Expand All @@ -86,7 +121,15 @@ pub fn format_exponent(
Case::Upper => 'E',
};
let point = decimal_point_or_empty(precision, alternate_form);
format!("{base}{point}{e}{exponent:+#03}")
// Pad with '0's up to the requested precision to match CPython
// byte-identically past our internal cap; see `format_fixed`.
let missing = precision.saturating_sub(capped);
let mut mantissa = String::with_capacity(base.len() + missing);
mantissa.push_str(base);
if missing > 0 {
mantissa.extend(core::iter::repeat_n('0', missing));
}
format!("{mantissa}{point}{e}{exponent:+#03}")
}
magnitude if magnitude.is_nan() => format_nan(case),
magnitude if magnitude.is_infinite() => format_inf(case),
Expand Down Expand Up @@ -132,7 +175,8 @@ pub fn format_general(
) -> String {
match magnitude {
magnitude if magnitude.is_finite() => {
let r_exp = format!("{:.*e}", precision.saturating_sub(1), magnitude);
let exp_precision = clamp_exp_precision(precision.saturating_sub(1));
let r_exp = format!("{:.*e}", exp_precision, magnitude);
let mut parts = r_exp.splitn(2, 'e');
let base = parts.next().unwrap();
let exponent = parts.next().unwrap().parse::<i64>().unwrap();
Expand All @@ -141,12 +185,18 @@ pub fn format_general(
Case::Lower => 'e',
Case::Upper => 'E',
};
let magnitude = format!("{:.*}", precision + 1, base);
let base = maybe_remove_trailing_redundant_chars(magnitude, alternate_form);
let point = decimal_point_or_empty(precision.saturating_sub(1), alternate_form);
// `base` is already produced at the clamped precision via
// `r_exp`. The previous `format!("{:.*}", precision + 1, base)`
// call was a no-op (magnitude is `.abs()`-ed at the caller, so
// base has no sign and its length was exactly `precision + 1`)
// — reuse `base` directly to avoid double-clamping that would
// drop the last 1-2 chars at high precision.
let base = maybe_remove_trailing_redundant_chars(base.to_owned(), alternate_form);
let point = decimal_point_or_empty(exp_precision, alternate_form);
format!("{base}{point}{e}{exponent:+#03}")
} else {
let precision = ((precision as i64) - 1 - exponent) as usize;
let precision =
clamp_fmt_precision(((precision as i64) - 1 - exponent).max(0) as usize);
let magnitude = format!("{magnitude:.precision$}");
let base = maybe_remove_trailing_redundant_chars(magnitude, alternate_form);
let point = decimal_point_or_empty(precision, alternate_form);
Expand Down
52 changes: 52 additions & 0 deletions extra_tests/snippets/builtin_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,55 @@ def test_zero_padding():
for i in range(0, 30):
format(x, ",")
x = x // 10


# Large float precision must not abort the interpreter.
# Previously these paths hit unguarded `format!("{:.*e}", ...)` in
# crates/literal/src/float.rs and `crates/common/src/format.rs` (the `%`
# branch), which panic past Rust's fmt precision limit and killed the
# process instead of raising a Python exception. Internally the limit is
# u16::MAX; output is zero-padded past that boundary to match CPython
# byte-identically.

# Three precision points per format type — below the cap (uncapped
# path), exactly at the cap (boundary), and one past the cap (the
# unhappy case, where internal clamping plus zero-padding has to
# reconstruct CPython's output). All must byte-match CPython.

# f-format pads with trailing zeros up to the requested precision.
assert "{:.65534f}".format(1.5) == "1." + "5" + "0" * 65533 # below cap
assert "{:.65535f}".format(1.5) == "1." + "5" + "0" * 65534 # at cap
assert "{:.65536f}".format(1.5) == "1." + "5" + "0" * 65535 # past cap → padding
# e-format emits a fixed mantissa width + 'e+00'.
assert "{:.65534e}".format(1.5) == "1." + "5" + "0" * 65533 + "e+00" # below
assert "{:.65535e}".format(1.5) == "1." + "5" + "0" * 65534 + "e+00" # at cap
assert (
"{:.65536e}".format(1.5) == "1." + "5" + "0" * 65535 + "e+00"
) # past cap → padding
# %-format multiplies by 100 then applies f-format.
assert "{:.65534%}".format(1.5) == "150." + "0" * 65534 + "%" # below
assert "{:.65535%}".format(1.5) == "150." + "0" * 65535 + "%" # at cap
assert "{:.65536%}".format(1.5) == "150." + "0" * 65536 + "%" # past cap → padding
# g-format strips trailing zeros, so the short form is the natural
# representation regardless of precision.
for p in (65534, 65535, 65536, 1_000_000):
assert ("{:." + str(p) + "g}").format(1.5) == "1.5"

# Far past the cap — verifies the pad path handles arbitrary precision,
# not just one-off values near the boundary.
assert len("{:.1000000f}".format(1.5)) == 1_000_002 # "1." + 1M zeros
assert len("{:.1000000e}".format(1.5)) == 1_000_006 # + "e+00"
assert len("{:.1000000%}".format(1.5)) == 1_000_005 # "150." + 1M zeros + "%"

# Percent overflow: finite input whose *100 is +inf produces "inf%"
# rather than crashing. CPython does the same.
assert "{:.100000%}".format(1.7976931348623157e308) == "inf%"

# Shallow cases unchanged.
assert f"{1.5:.5}" == "1.5"
assert "{:.3f}".format(1.5) == "1.500"
assert "{:.2%}".format(0.25) == "25.00%"
assert "{:.4e}".format(1234.5) == "1.2345e+03"
assert "{:.3g}".format(1234.5) == "1.23e+03"
assert f"{float('nan'):.10f}" == "nan"
assert f"{float('inf'):.10f}" == "inf"
Comment on lines +217 to +253
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

which test is about unhappy cases, like exceeding the MAX_PRECISION?

Loading