Fix complex repr to use scientific notation for large integer-valued components (#7634)

* Fix complex repr to use scientific notation for large integer-valued components repr of a complex number whose real or imaginary part is an integer-valued float with |x| >= 1e16 emitted the full decimal expansion instead of scientific notation, diverging from CPython: Before (RustPython): repr(1e100 + 1e100j) (10000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000+1000000000000000 000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000j) After / CPython: (1e+100+1e+100j) Root cause in crates/literal/src/complex.rs::to_string — it bifurcated each component by .fract() == 0.0: if im.fract() == 0.0 { im.to_string() } // Rust's default Display else { float::to_string(im) } // scientific for large/small Rust's Display never uses scientific notation, so any integer-valued f64 (including 1e16, 1e17, 1e100 which are exactly representable as integers) routed through the wrong branch and produced the full decimal expansion. Non-integer magnitudes reached float::to_string and rendered correctly. The fix is to use one helper per component that implements CPython's actual PyOS_double_to_string(format='r') rule: scientific notation when |x| < 1e-4 or |x| >= 1e16, otherwise Rust's default Display (which drops the trailing '.0' for integer-valued floats — matching CPython's (1+2j) convention rather than (1.0+2.0j)). The threshold matches float::to_string; the only behavioral difference is that complex components render 1.0 as "1" rather than "1.0". Verified: * 29 CPython reference cases (normal / boundary / extremes / special / signed-zero) — all byte-identical after fix. * 18 additional edge cases (subnormal 5e-324, f64::MAX, MIN_POSITIVE, DBL_EPSILON, threshold-straddling values) — all byte-identical. * Lib/test/test_complex.py::test_repr_str / test_negative_zero_repr_str / test_repr_roundtrip — all pass. * cargo run -- -m test test_complex — 37 passed. * cargo run -- -m test test_float test_long — 101 passed. * ast.unparse() round-trip of source containing complex literals (e.g. 1e100 + 1e-100j, 1e17 + 1j) produces CPython-identical output. * extra_tests/snippets/builtin_complex.py — 20+ new regression cases. * Address CodeRabbit review: clarify threshold boundary test comment The comment claimed all three assertions stay in non-scientific form, but the 1e-5 case explicitly verifies scientific notation (since |1e-5| < 1e-4 falls outside the decimal-form range). Reworded the header to describe the axis being tested (threshold boundary) and added per-case inline notes indicating each assertion's expected form.
2026-06-02 19:39:49 +09:00 · 2026-04-20 21:56:41 +09:00
parent 175f12b664
commit af41d11faf
2 changed files with 64 additions and 9 deletions
--- a/crates/literal/src/complex.rs
+++ b/crates/literal/src/complex.rs
@@ -2,14 +2,39 @@ use crate::float;
 use alloc::borrow::ToOwned;
 use alloc::string::{String, ToString};

+/// Format a single complex component (real or imag) for `repr`.
+/// Uses scientific notation when `|value| < 1e-4` or `|value| >= 1e16`
+/// (matching CPython's `PyOS_double_to_string(format='r')`), otherwise
+/// Rust's default `Display`, which drops the trailing `.0` for
+/// integer-valued floats.
+///
+/// This differs from `float::to_string` only in that integer values in
+/// the normal range render as `"1"` rather than `"1.0"` — complex repr
+/// formats `1+2j` as `"(1+2j)"`, not `"(1.0+2.0j)"`.
+fn component_to_string(value: f64) -> String {
+    let lit = alloc::format!("{value:e}");
+    if let Some(position) = lit.find('e') {
+        let significand = &lit[..position];
+        let exponent = lit[position + 1..].parse::<i32>().unwrap();
+        if exponent < 16 && exponent > -5 {
+            // Normal magnitude — Rust's default Display emits "1" for 1.0,
+            // "1.5" for 1.5, "1000000000000000" for 1e15, etc.
+            value.to_string()
+        } else {
+            alloc::format!("{significand}e{exponent:+#03}")
+        }
+    } else {
+        // nan / inf / -inf — `format!("{x:e}")` produces e.g. "NaN" with no
+        // exponent marker; lowercase to match Python.
+        let mut s = value.to_string();
+        s.make_ascii_lowercase();
+        s
+    }
+}
+
 /// Convert a complex number to a string.
 pub fn to_string(re: f64, im: f64) -> String {
-    // integer => drop ., fractional => float_ops
-    let mut im_part = if im.fract() == 0.0 {
-        im.to_string()
-    } else {
-        float::to_string(im)
-    };
+    let mut im_part = component_to_string(im);
    im_part.push('j');

    // positive empty => return im_part, integer => drop ., fractional => float_ops
@@ -19,10 +44,8 @@ pub fn to_string(re: f64, im: f64) -> String {
        } else {
            "-0".to_owned()
        }
-    } else if re.fract() == 0.0 {
-        re.to_string()
    } else {
-        float::to_string(re)
+        component_to_string(re)
    };
    let mut result =
        String::with_capacity(re_part.len() + im_part.len() + 2 + im.is_sign_positive() as usize);
--- a/extra_tests/snippets/builtin_complex.py
+++ b/extra_tests/snippets/builtin_complex.py
@@ -236,3 +236,35 @@ class complex_subclass(complex):
 z = complex_subclass(3 + 4j)
 assert z.__complex__() == 3 + 4j
 assert type(z.__complex__()) == complex
+
+
+# repr must use scientific notation for |value| >= 1e16 or < 1e-4, matching
+# CPython. Previously integer-valued large magnitudes (e.g. 1e16, 1e100) hit
+# a `fract() == 0.0` branch in rustpython_literal::complex::to_string that
+# used Rust's default Display — which emits the full decimal expansion
+# (`10000...000`) instead of `1e+16`.
+assert repr(1e16 + 1j) == "(1e+16+1j)"
+assert repr(1e17 + 1j) == "(1e+17+1j)"
+assert repr(1e100 + 1e100j) == "(1e+100+1e+100j)"
+assert repr(-1e100 - 1e100j) == "(-1e+100-1e+100j)"
+assert repr(1e-100 + 1e100j) == "(1e-100+1e+100j)"
+assert repr(1 + 1e100j) == "(1+1e+100j)"
+assert repr(1e100 + 1j) == "(1e+100+1j)"
+
+# Threshold boundary: |x| in [1e-4, 1e16) renders in decimal form; values
+# outside that range use scientific notation. These three assertions pin
+# the exact transition points.
+assert repr(1e15 + 1j) == "(1000000000000000+1j)"  # below 1e16 -> decimal
+assert repr(1e-4 + 1j) == "(0.0001+1j)"  # at 1e-4 (inclusive) -> decimal
+assert repr(1e-5 + 1j) == "(1e-05+1j)"  # below 1e-4 -> scientific
+
+# Integer-valued components render without trailing ".0".
+assert repr(1 + 2j) == "(1+2j)"
+assert repr(1.0 + 2.0j) == "(1+2j)"
+
+# Special values still round-trip correctly.
+assert repr(float("nan") + 1j) == "(nan+1j)"
+assert repr(float("inf") + 1j) == "(inf+1j)"
+assert repr(float("-inf") + 1j) == "(-inf+1j)"
+assert repr(complex(1, float("nan"))) == "(1+nanj)"
+assert repr(complex(1, float("inf"))) == "(1+infj)"