mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
Fix complex repr to use scientific notation for large integer-valued components (#7634)
* Fix complex repr to use scientific notation for large integer-valued components
repr of a complex number whose real or imaginary part is an integer-valued
float with |x| >= 1e16 emitted the full decimal expansion instead of
scientific notation, diverging from CPython:
Before (RustPython):
repr(1e100 + 1e100j)
(10000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000+1000000000000000
000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000j)
After / CPython:
(1e+100+1e+100j)
Root cause in crates/literal/src/complex.rs::to_string — it bifurcated
each component by .fract() == 0.0:
if im.fract() == 0.0 { im.to_string() } // Rust's default Display
else { float::to_string(im) } // scientific for large/small
Rust's Display never uses scientific notation, so any integer-valued f64
(including 1e16, 1e17, 1e100 which are exactly representable as integers)
routed through the wrong branch and produced the full decimal expansion.
Non-integer magnitudes reached float::to_string and rendered correctly.
The fix is to use one helper per component that implements CPython's
actual PyOS_double_to_string(format='r') rule: scientific notation when
|x| < 1e-4 or |x| >= 1e16, otherwise Rust's default Display (which drops
the trailing '.0' for integer-valued floats — matching CPython's
(1+2j) convention rather than (1.0+2.0j)). The threshold matches
float::to_string; the only behavioral difference is that complex
components render 1.0 as "1" rather than "1.0".
Verified:
* 29 CPython reference cases (normal / boundary / extremes / special /
signed-zero) — all byte-identical after fix.
* 18 additional edge cases (subnormal 5e-324, f64::MAX, MIN_POSITIVE,
DBL_EPSILON, threshold-straddling values) — all byte-identical.
* Lib/test/test_complex.py::test_repr_str /
test_negative_zero_repr_str / test_repr_roundtrip — all pass.
* cargo run -- -m test test_complex — 37 passed.
* cargo run -- -m test test_float test_long — 101 passed.
* ast.unparse() round-trip of source containing complex literals
(e.g. 1e100 + 1e-100j, 1e17 + 1j) produces CPython-identical output.
* extra_tests/snippets/builtin_complex.py — 20+ new regression cases.
* Address CodeRabbit review: clarify threshold boundary test comment
The comment claimed all three assertions stay in non-scientific form,
but the 1e-5 case explicitly verifies scientific notation (since
|1e-5| < 1e-4 falls outside the decimal-form range). Reworded the
header to describe the axis being tested (threshold boundary) and
added per-case inline notes indicating each assertion's expected
form.
This commit is contained in:
@@ -2,14 +2,39 @@ use crate::float;
|
||||
use alloc::borrow::ToOwned;
|
||||
use alloc::string::{String, ToString};
|
||||
|
||||
/// Format a single complex component (real or imag) for `repr`.
|
||||
/// Uses scientific notation when `|value| < 1e-4` or `|value| >= 1e16`
|
||||
/// (matching CPython's `PyOS_double_to_string(format='r')`), otherwise
|
||||
/// Rust's default `Display`, which drops the trailing `.0` for
|
||||
/// integer-valued floats.
|
||||
///
|
||||
/// This differs from `float::to_string` only in that integer values in
|
||||
/// the normal range render as `"1"` rather than `"1.0"` — complex repr
|
||||
/// formats `1+2j` as `"(1+2j)"`, not `"(1.0+2.0j)"`.
|
||||
fn component_to_string(value: f64) -> String {
|
||||
let lit = alloc::format!("{value:e}");
|
||||
if let Some(position) = lit.find('e') {
|
||||
let significand = &lit[..position];
|
||||
let exponent = lit[position + 1..].parse::<i32>().unwrap();
|
||||
if exponent < 16 && exponent > -5 {
|
||||
// Normal magnitude — Rust's default Display emits "1" for 1.0,
|
||||
// "1.5" for 1.5, "1000000000000000" for 1e15, etc.
|
||||
value.to_string()
|
||||
} else {
|
||||
alloc::format!("{significand}e{exponent:+#03}")
|
||||
}
|
||||
} else {
|
||||
// nan / inf / -inf — `format!("{x:e}")` produces e.g. "NaN" with no
|
||||
// exponent marker; lowercase to match Python.
|
||||
let mut s = value.to_string();
|
||||
s.make_ascii_lowercase();
|
||||
s
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a complex number to a string.
|
||||
pub fn to_string(re: f64, im: f64) -> String {
|
||||
// integer => drop ., fractional => float_ops
|
||||
let mut im_part = if im.fract() == 0.0 {
|
||||
im.to_string()
|
||||
} else {
|
||||
float::to_string(im)
|
||||
};
|
||||
let mut im_part = component_to_string(im);
|
||||
im_part.push('j');
|
||||
|
||||
// positive empty => return im_part, integer => drop ., fractional => float_ops
|
||||
@@ -19,10 +44,8 @@ pub fn to_string(re: f64, im: f64) -> String {
|
||||
} else {
|
||||
"-0".to_owned()
|
||||
}
|
||||
} else if re.fract() == 0.0 {
|
||||
re.to_string()
|
||||
} else {
|
||||
float::to_string(re)
|
||||
component_to_string(re)
|
||||
};
|
||||
let mut result =
|
||||
String::with_capacity(re_part.len() + im_part.len() + 2 + im.is_sign_positive() as usize);
|
||||
|
||||
@@ -236,3 +236,35 @@ class complex_subclass(complex):
|
||||
z = complex_subclass(3 + 4j)
|
||||
assert z.__complex__() == 3 + 4j
|
||||
assert type(z.__complex__()) == complex
|
||||
|
||||
|
||||
# repr must use scientific notation for |value| >= 1e16 or < 1e-4, matching
|
||||
# CPython. Previously integer-valued large magnitudes (e.g. 1e16, 1e100) hit
|
||||
# a `fract() == 0.0` branch in rustpython_literal::complex::to_string that
|
||||
# used Rust's default Display — which emits the full decimal expansion
|
||||
# (`10000...000`) instead of `1e+16`.
|
||||
assert repr(1e16 + 1j) == "(1e+16+1j)"
|
||||
assert repr(1e17 + 1j) == "(1e+17+1j)"
|
||||
assert repr(1e100 + 1e100j) == "(1e+100+1e+100j)"
|
||||
assert repr(-1e100 - 1e100j) == "(-1e+100-1e+100j)"
|
||||
assert repr(1e-100 + 1e100j) == "(1e-100+1e+100j)"
|
||||
assert repr(1 + 1e100j) == "(1+1e+100j)"
|
||||
assert repr(1e100 + 1j) == "(1e+100+1j)"
|
||||
|
||||
# Threshold boundary: |x| in [1e-4, 1e16) renders in decimal form; values
|
||||
# outside that range use scientific notation. These three assertions pin
|
||||
# the exact transition points.
|
||||
assert repr(1e15 + 1j) == "(1000000000000000+1j)" # below 1e16 -> decimal
|
||||
assert repr(1e-4 + 1j) == "(0.0001+1j)" # at 1e-4 (inclusive) -> decimal
|
||||
assert repr(1e-5 + 1j) == "(1e-05+1j)" # below 1e-4 -> scientific
|
||||
|
||||
# Integer-valued components render without trailing ".0".
|
||||
assert repr(1 + 2j) == "(1+2j)"
|
||||
assert repr(1.0 + 2.0j) == "(1+2j)"
|
||||
|
||||
# Special values still round-trip correctly.
|
||||
assert repr(float("nan") + 1j) == "(nan+1j)"
|
||||
assert repr(float("inf") + 1j) == "(inf+1j)"
|
||||
assert repr(float("-inf") + 1j) == "(-inf+1j)"
|
||||
assert repr(complex(1, float("nan"))) == "(1+nanj)"
|
||||
assert repr(complex(1, float("inf"))) == "(1+infj)"
|
||||
|
||||
Reference in New Issue
Block a user