Fix complex repr to use scientific notation for large integer-valued components (#7634)

* Fix complex repr to use scientific notation for large integer-valued components

repr of a complex number whose real or imaginary part is an integer-valued
float with |x| >= 1e16 emitted the full decimal expansion instead of
scientific notation, diverging from CPython:

  Before (RustPython):
    repr(1e100 + 1e100j)
      (10000000000000000000000000000000000000000000000000000000000
       0000000000000000000000000000000000000000000+1000000000000000
       000000000000000000000000000000000000000000000000000000000000
       00000000000000000000000000000000000000j)

  After / CPython:
    (1e+100+1e+100j)

Root cause in crates/literal/src/complex.rs::to_string — it bifurcated
each component by .fract() == 0.0:

  if im.fract() == 0.0 { im.to_string() }       // Rust's default Display
  else                 { float::to_string(im) } // scientific for large/small

Rust's Display never uses scientific notation, so any integer-valued f64
(including 1e16, 1e17, 1e100 which are exactly representable as integers)
routed through the wrong branch and produced the full decimal expansion.
Non-integer magnitudes reached float::to_string and rendered correctly.

The fix is to use one helper per component that implements CPython's
actual PyOS_double_to_string(format='r') rule: scientific notation when
|x| < 1e-4 or |x| >= 1e16, otherwise Rust's default Display (which drops
the trailing '.0' for integer-valued floats — matching CPython's
(1+2j) convention rather than (1.0+2.0j)). The threshold matches
float::to_string; the only behavioral difference is that complex
components render 1.0 as "1" rather than "1.0".

Verified:
  * 29 CPython reference cases (normal / boundary / extremes / special /
    signed-zero) — all byte-identical after fix.
  * 18 additional edge cases (subnormal 5e-324, f64::MAX, MIN_POSITIVE,
    DBL_EPSILON, threshold-straddling values) — all byte-identical.
  * Lib/test/test_complex.py::test_repr_str /
    test_negative_zero_repr_str / test_repr_roundtrip — all pass.
  * cargo run -- -m test test_complex — 37 passed.
  * cargo run -- -m test test_float test_long — 101 passed.
  * ast.unparse() round-trip of source containing complex literals
    (e.g. 1e100 + 1e-100j, 1e17 + 1j) produces CPython-identical output.
  * extra_tests/snippets/builtin_complex.py — 20+ new regression cases.

* Address CodeRabbit review: clarify threshold boundary test comment

The comment claimed all three assertions stay in non-scientific form,
but the 1e-5 case explicitly verifies scientific notation (since
|1e-5| < 1e-4 falls outside the decimal-form range). Reworded the
header to describe the axis being tested (threshold boundary) and
added per-case inline notes indicating each assertion's expected
form.
This commit is contained in:
Changjoon
2026-04-20 21:56:41 +09:00
committed by GitHub
parent 175f12b664
commit af41d11faf
2 changed files with 64 additions and 9 deletions

View File

@@ -2,14 +2,39 @@ use crate::float;
use alloc::borrow::ToOwned;
use alloc::string::{String, ToString};
/// Format a single complex component (real or imag) for `repr`.
/// Uses scientific notation when `|value| < 1e-4` or `|value| >= 1e16`
/// (matching CPython's `PyOS_double_to_string(format='r')`), otherwise
/// Rust's default `Display`, which drops the trailing `.0` for
/// integer-valued floats.
///
/// This differs from `float::to_string` only in that integer values in
/// the normal range render as `"1"` rather than `"1.0"` — complex repr
/// formats `1+2j` as `"(1+2j)"`, not `"(1.0+2.0j)"`.
fn component_to_string(value: f64) -> String {
let lit = alloc::format!("{value:e}");
if let Some(position) = lit.find('e') {
let significand = &lit[..position];
let exponent = lit[position + 1..].parse::<i32>().unwrap();
if exponent < 16 && exponent > -5 {
// Normal magnitude — Rust's default Display emits "1" for 1.0,
// "1.5" for 1.5, "1000000000000000" for 1e15, etc.
value.to_string()
} else {
alloc::format!("{significand}e{exponent:+#03}")
}
} else {
// nan / inf / -inf — `format!("{x:e}")` produces e.g. "NaN" with no
// exponent marker; lowercase to match Python.
let mut s = value.to_string();
s.make_ascii_lowercase();
s
}
}
/// Convert a complex number to a string.
pub fn to_string(re: f64, im: f64) -> String {
// integer => drop ., fractional => float_ops
let mut im_part = if im.fract() == 0.0 {
im.to_string()
} else {
float::to_string(im)
};
let mut im_part = component_to_string(im);
im_part.push('j');
// positive empty => return im_part, integer => drop ., fractional => float_ops
@@ -19,10 +44,8 @@ pub fn to_string(re: f64, im: f64) -> String {
} else {
"-0".to_owned()
}
} else if re.fract() == 0.0 {
re.to_string()
} else {
float::to_string(re)
component_to_string(re)
};
let mut result =
String::with_capacity(re_part.len() + im_part.len() + 2 + im.is_sign_positive() as usize);

View File

@@ -236,3 +236,35 @@ class complex_subclass(complex):
z = complex_subclass(3 + 4j)
assert z.__complex__() == 3 + 4j
assert type(z.__complex__()) == complex
# repr must use scientific notation for |value| >= 1e16 or < 1e-4, matching
# CPython. Previously integer-valued large magnitudes (e.g. 1e16, 1e100) hit
# a `fract() == 0.0` branch in rustpython_literal::complex::to_string that
# used Rust's default Display — which emits the full decimal expansion
# (`10000...000`) instead of `1e+16`.
assert repr(1e16 + 1j) == "(1e+16+1j)"
assert repr(1e17 + 1j) == "(1e+17+1j)"
assert repr(1e100 + 1e100j) == "(1e+100+1e+100j)"
assert repr(-1e100 - 1e100j) == "(-1e+100-1e+100j)"
assert repr(1e-100 + 1e100j) == "(1e-100+1e+100j)"
assert repr(1 + 1e100j) == "(1+1e+100j)"
assert repr(1e100 + 1j) == "(1e+100+1j)"
# Threshold boundary: |x| in [1e-4, 1e16) renders in decimal form; values
# outside that range use scientific notation. These three assertions pin
# the exact transition points.
assert repr(1e15 + 1j) == "(1000000000000000+1j)" # below 1e16 -> decimal
assert repr(1e-4 + 1j) == "(0.0001+1j)" # at 1e-4 (inclusive) -> decimal
assert repr(1e-5 + 1j) == "(1e-05+1j)" # below 1e-4 -> scientific
# Integer-valued components render without trailing ".0".
assert repr(1 + 2j) == "(1+2j)"
assert repr(1.0 + 2.0j) == "(1+2j)"
# Special values still round-trip correctly.
assert repr(float("nan") + 1j) == "(nan+1j)"
assert repr(float("inf") + 1j) == "(inf+1j)"
assert repr(float("-inf") + 1j) == "(-inf+1j)"
assert repr(complex(1, float("nan"))) == "(1+nanj)"
assert repr(complex(1, float("inf"))) == "(1+infj)"