mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
Fix float parsing (#5643)
* Fix float parsing * Add rustpython_literal::complex * Don't call .to_string() on a constant
This commit is contained in:
12
Cargo.lock
generated
12
Cargo.lock
generated
@@ -1270,9 +1270,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||
|
||||
[[package]]
|
||||
name = "lexical-parse-float"
|
||||
version = "0.8.5"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f"
|
||||
checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2"
|
||||
dependencies = [
|
||||
"lexical-parse-integer",
|
||||
"lexical-util",
|
||||
@@ -1281,9 +1281,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lexical-parse-integer"
|
||||
version = "0.8.6"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9"
|
||||
checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e"
|
||||
dependencies = [
|
||||
"lexical-util",
|
||||
"static_assertions",
|
||||
@@ -1291,9 +1291,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lexical-util"
|
||||
version = "0.8.5"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc"
|
||||
checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3"
|
||||
dependencies = [
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
2
Lib/test/test_float.py
vendored
2
Lib/test/test_float.py
vendored
@@ -35,8 +35,6 @@ class OtherFloatSubclass(float):
|
||||
|
||||
class GeneralFloatCases(unittest.TestCase):
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_float(self):
|
||||
self.assertEqual(float(3.14), 3.14)
|
||||
self.assertEqual(float(314), 314.0)
|
||||
|
||||
@@ -609,6 +609,49 @@ macro_rules! ascii {
|
||||
}
|
||||
pub use ascii;
|
||||
|
||||
// TODO: this should probably live in a crate like unic or unicode-properties
|
||||
const UNICODE_DECIMAL_VALUES: &[char] = &[
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '٠', '١', '٢', '٣', '٤', '٥', '٦', '٧', '٨',
|
||||
'٩', '۰', '۱', '۲', '۳', '۴', '۵', '۶', '۷', '۸', '۹', '߀', '߁', '߂', '߃', '߄', '߅', '߆', '߇',
|
||||
'߈', '߉', '०', '१', '२', '३', '४', '५', '६', '७', '८', '९', '০', '১', '২', '৩', '৪', '৫', '৬',
|
||||
'৭', '৮', '৯', '੦', '੧', '੨', '੩', '੪', '੫', '੬', '੭', '੮', '੯', '૦', '૧', '૨', '૩', '૪', '૫',
|
||||
'૬', '૭', '૮', '૯', '୦', '୧', '୨', '୩', '୪', '୫', '୬', '୭', '୮', '୯', '௦', '௧', '௨', '௩', '௪',
|
||||
'௫', '௬', '௭', '௮', '௯', '౦', '౧', '౨', '౩', '౪', '౫', '౬', '౭', '౮', '౯', '೦', '೧', '೨', '೩',
|
||||
'೪', '೫', '೬', '೭', '೮', '೯', '൦', '൧', '൨', '൩', '൪', '൫', '൬', '൭', '൮', '൯', '෦', '෧', '෨',
|
||||
'෩', '෪', '෫', '෬', '෭', '෮', '෯', '๐', '๑', '๒', '๓', '๔', '๕', '๖', '๗', '๘', '๙', '໐', '໑',
|
||||
'໒', '໓', '໔', '໕', '໖', '໗', '໘', '໙', '༠', '༡', '༢', '༣', '༤', '༥', '༦', '༧', '༨', '༩', '၀',
|
||||
'၁', '၂', '၃', '၄', '၅', '၆', '၇', '၈', '၉', '႐', '႑', '႒', '႓', '႔', '႕', '႖', '႗', '႘', '႙',
|
||||
'០', '១', '២', '៣', '៤', '៥', '៦', '៧', '៨', '៩', '᠐', '᠑', '᠒', '᠓', '᠔', '᠕', '᠖', '᠗', '᠘',
|
||||
'᠙', '᥆', '᥇', '᥈', '᥉', '᥊', '᥋', '᥌', '᥍', '᥎', '᥏', '᧐', '᧑', '᧒', '᧓', '᧔', '᧕', '᧖', '᧗',
|
||||
'᧘', '᧙', '᪀', '᪁', '᪂', '᪃', '᪄', '᪅', '᪆', '᪇', '᪈', '᪉', '᪐', '᪑', '᪒', '᪓', '᪔', '᪕', '᪖',
|
||||
'᪗', '᪘', '᪙', '᭐', '᭑', '᭒', '᭓', '᭔', '᭕', '᭖', '᭗', '᭘', '᭙', '᮰', '᮱', '᮲', '᮳', '᮴', '᮵',
|
||||
'᮶', '᮷', '᮸', '᮹', '᱀', '᱁', '᱂', '᱃', '᱄', '᱅', '᱆', '᱇', '᱈', '᱉', '᱐', '᱑', '᱒', '᱓', '᱔',
|
||||
'᱕', '᱖', '᱗', '᱘', '᱙', '꘠', '꘡', '꘢', '꘣', '꘤', '꘥', '꘦', '꘧', '꘨', '꘩', '꣐', '꣑', '꣒', '꣓',
|
||||
'꣔', '꣕', '꣖', '꣗', '꣘', '꣙', '꤀', '꤁', '꤂', '꤃', '꤄', '꤅', '꤆', '꤇', '꤈', '꤉', '꧐', '꧑', '꧒',
|
||||
'꧓', '꧔', '꧕', '꧖', '꧗', '꧘', '꧙', '꧰', '꧱', '꧲', '꧳', '꧴', '꧵', '꧶', '꧷', '꧸', '꧹', '꩐', '꩑',
|
||||
'꩒', '꩓', '꩔', '꩕', '꩖', '꩗', '꩘', '꩙', '꯰', '꯱', '꯲', '꯳', '꯴', '꯵', '꯶', '꯷', '꯸', '꯹', '0',
|
||||
'1', '2', '3', '4', '5', '6', '7', '8', '9', '𐒠', '𐒡', '𐒢', '𐒣', '𐒤', '𐒥', '𐒦', '𐒧',
|
||||
'𐒨', '𐒩', '𑁦', '𑁧', '𑁨', '𑁩', '𑁪', '𑁫', '𑁬', '𑁭', '𑁮', '𑁯', '𑃰', '𑃱', '𑃲', '𑃳', '𑃴', '𑃵', '𑃶',
|
||||
'𑃷', '𑃸', '𑃹', '𑄶', '𑄷', '𑄸', '𑄹', '𑄺', '𑄻', '𑄼', '𑄽', '𑄾', '𑄿', '𑇐', '𑇑', '𑇒', '𑇓', '𑇔', '𑇕',
|
||||
'𑇖', '𑇗', '𑇘', '𑇙', '𑋰', '𑋱', '𑋲', '𑋳', '𑋴', '𑋵', '𑋶', '𑋷', '𑋸', '𑋹', '𑑐', '𑑑', '𑑒', '𑑓', '𑑔',
|
||||
'𑑕', '𑑖', '𑑗', '𑑘', '𑑙', '𑓐', '𑓑', '𑓒', '𑓓', '𑓔', '𑓕', '𑓖', '𑓗', '𑓘', '𑓙', '𑙐', '𑙑', '𑙒', '𑙓',
|
||||
'𑙔', '𑙕', '𑙖', '𑙗', '𑙘', '𑙙', '𑛀', '𑛁', '𑛂', '𑛃', '𑛄', '𑛅', '𑛆', '𑛇', '𑛈', '𑛉', '𑜰', '𑜱', '𑜲',
|
||||
'𑜳', '𑜴', '𑜵', '𑜶', '𑜷', '𑜸', '𑜹', '𑣠', '𑣡', '𑣢', '𑣣', '𑣤', '𑣥', '𑣦', '𑣧', '𑣨', '𑣩', '𑱐', '𑱑',
|
||||
'𑱒', '𑱓', '𑱔', '𑱕', '𑱖', '𑱗', '𑱘', '𑱙', '𑵐', '𑵑', '𑵒', '𑵓', '𑵔', '𑵕', '𑵖', '𑵗', '𑵘', '𑵙', '𖩠',
|
||||
'𖩡', '𖩢', '𖩣', '𖩤', '𖩥', '𖩦', '𖩧', '𖩨', '𖩩', '𖭐', '𖭑', '𖭒', '𖭓', '𖭔', '𖭕', '𖭖', '𖭗', '𖭘', '𖭙',
|
||||
'𝟎', '𝟏', '𝟐', '𝟑', '𝟒', '𝟓', '𝟔', '𝟕', '𝟖', '𝟗', '𝟘', '𝟙', '𝟚', '𝟛', '𝟜', '𝟝', '𝟞', '𝟟', '𝟠',
|
||||
'𝟡', '𝟢', '𝟣', '𝟤', '𝟥', '𝟦', '𝟧', '𝟨', '𝟩', '𝟪', '𝟫', '𝟬', '𝟭', '𝟮', '𝟯', '𝟰', '𝟱', '𝟲', '𝟳',
|
||||
'𝟴', '𝟵', '𝟶', '𝟷', '𝟸', '𝟹', '𝟺', '𝟻', '𝟼', '𝟽', '𝟾', '𝟿', '𞥐', '𞥑', '𞥒', '𞥓', '𞥔', '𞥕', '𞥖',
|
||||
'𞥗', '𞥘', '𞥙',
|
||||
];
|
||||
|
||||
pub fn char_to_decimal(ch: char) -> Option<u8> {
|
||||
UNICODE_DECIMAL_VALUES
|
||||
.binary_search(&ch)
|
||||
.ok()
|
||||
.map(|i| (i % 10) as u8)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -366,7 +366,7 @@ impl<'a, 'b, 'c> Unparser<'a, 'b, 'c> {
|
||||
}
|
||||
}
|
||||
&ruff::Number::Complex { real, imag } => self
|
||||
.p(&rustpython_literal::float::complex_to_string(real, imag)
|
||||
.p(&rustpython_literal::complex::to_string(real, imag)
|
||||
.replace("inf", inf_str))?,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ rustpython-wtf8 = { workspace = true }
|
||||
|
||||
hexf-parse = "0.2.1"
|
||||
is-macro.workspace = true
|
||||
lexical-parse-float = { version = "0.8.0", features = ["format"] }
|
||||
lexical-parse-float = { version = "1.0.4", features = ["format"] }
|
||||
num-traits = { workspace = true }
|
||||
unic-ucd-category = { workspace = true }
|
||||
|
||||
|
||||
73
compiler/literal/src/complex.rs
Normal file
73
compiler/literal/src/complex.rs
Normal file
@@ -0,0 +1,73 @@
|
||||
use crate::float;
|
||||
|
||||
/// Convert a complex number to a string.
|
||||
pub fn to_string(re: f64, im: f64) -> String {
|
||||
// integer => drop ., fractional => float_ops
|
||||
let mut im_part = if im.fract() == 0.0 {
|
||||
im.to_string()
|
||||
} else {
|
||||
float::to_string(im)
|
||||
};
|
||||
im_part.push('j');
|
||||
|
||||
// positive empty => return im_part, integer => drop ., fractional => float_ops
|
||||
let re_part = if re == 0.0 {
|
||||
if re.is_sign_positive() {
|
||||
return im_part;
|
||||
} else {
|
||||
"-0".to_owned()
|
||||
}
|
||||
} else if re.fract() == 0.0 {
|
||||
re.to_string()
|
||||
} else {
|
||||
float::to_string(re)
|
||||
};
|
||||
let mut result =
|
||||
String::with_capacity(re_part.len() + im_part.len() + 2 + im.is_sign_positive() as usize);
|
||||
result.push('(');
|
||||
result.push_str(&re_part);
|
||||
if im.is_sign_positive() || im.is_nan() {
|
||||
result.push('+');
|
||||
}
|
||||
result.push_str(&im_part);
|
||||
result.push(')');
|
||||
result
|
||||
}
|
||||
|
||||
/// Parse a complex number from a string.
|
||||
///
|
||||
/// Returns `Some((re, im))` on success.
|
||||
pub fn parse_str(s: &str) -> Option<(f64, f64)> {
|
||||
let s = s.trim();
|
||||
// Handle parentheses
|
||||
let s = match s.strip_prefix('(') {
|
||||
None => s,
|
||||
Some(s) => s.strip_suffix(')')?.trim(),
|
||||
};
|
||||
|
||||
let value = match s.strip_suffix(|c| c == 'j' || c == 'J') {
|
||||
None => (float::parse_str(s)?, 0.0),
|
||||
Some(mut s) => {
|
||||
let mut real = 0.0;
|
||||
// Find the central +/- operator. If it exists, parse the real part.
|
||||
for (i, w) in s.as_bytes().windows(2).enumerate() {
|
||||
if (w[1] == b'+' || w[1] == b'-') && !(w[0] == b'e' || w[0] == b'E') {
|
||||
real = float::parse_str(&s[..=i])?;
|
||||
s = &s[i + 1..];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let imag = match s {
|
||||
// "j", "+j"
|
||||
"" | "+" => 1.0,
|
||||
// "-j"
|
||||
"-" => -1.0,
|
||||
s => float::parse_str(s)?,
|
||||
};
|
||||
|
||||
(real, imag)
|
||||
}
|
||||
};
|
||||
Some(value)
|
||||
}
|
||||
@@ -6,49 +6,8 @@ pub fn parse_str(literal: &str) -> Option<f64> {
|
||||
parse_inner(literal.trim().as_bytes())
|
||||
}
|
||||
|
||||
fn strip_underlines(literal: &[u8]) -> Option<Vec<u8>> {
|
||||
let mut prev = b'\0';
|
||||
let mut dup = Vec::<u8>::new();
|
||||
for p in literal {
|
||||
if *p == b'_' {
|
||||
// Underscores are only allowed after digits.
|
||||
if !prev.is_ascii_digit() {
|
||||
return None;
|
||||
}
|
||||
} else {
|
||||
dup.push(*p);
|
||||
// Underscores are only allowed before digits.
|
||||
if prev == b'_' && !p.is_ascii_digit() {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
prev = *p;
|
||||
}
|
||||
|
||||
// Underscores are not allowed at the end.
|
||||
if prev == b'_' {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(dup)
|
||||
}
|
||||
|
||||
pub fn parse_bytes(literal: &[u8]) -> Option<f64> {
|
||||
parse_inner(trim_slice(literal, |b| b.is_ascii_whitespace()))
|
||||
}
|
||||
|
||||
fn trim_slice<T>(v: &[T], mut trim: impl FnMut(&T) -> bool) -> &[T] {
|
||||
let mut it = v.iter();
|
||||
// it.take_while_ref(&mut trim).for_each(drop);
|
||||
// hmm.. `&mut slice::Iter<_>` is not `Clone`
|
||||
// it.by_ref().rev().take_while_ref(&mut trim).for_each(drop);
|
||||
while it.clone().next().is_some_and(&mut trim) {
|
||||
it.next();
|
||||
}
|
||||
while it.clone().next_back().is_some_and(&mut trim) {
|
||||
it.next_back();
|
||||
}
|
||||
it.as_slice()
|
||||
parse_inner(literal.trim_ascii())
|
||||
}
|
||||
|
||||
fn parse_inner(literal: &[u8]) -> Option<f64> {
|
||||
@@ -56,15 +15,11 @@ fn parse_inner(literal: &[u8]) -> Option<f64> {
|
||||
FromLexicalWithOptions, NumberFormatBuilder, Options, format::PYTHON3_LITERAL,
|
||||
};
|
||||
|
||||
// Use custom function for underline handling for now.
|
||||
// For further information see https://github.com/Alexhuszagh/rust-lexical/issues/96.
|
||||
let stripped = strip_underlines(literal)?;
|
||||
|
||||
// lexical-core's format::PYTHON_STRING is inaccurate
|
||||
const PYTHON_STRING: u128 = NumberFormatBuilder::rebuild(PYTHON3_LITERAL)
|
||||
.no_special(false)
|
||||
.build();
|
||||
f64::from_lexical_with_options::<PYTHON_STRING>(&stripped, &Options::new()).ok()
|
||||
f64::from_lexical_with_options::<PYTHON_STRING>(literal, &Options::new()).ok()
|
||||
}
|
||||
|
||||
pub fn is_integer(v: f64) -> bool {
|
||||
@@ -223,39 +178,6 @@ pub fn to_string(value: f64) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn complex_to_string(re: f64, im: f64) -> String {
|
||||
// integer => drop ., fractional => float_ops
|
||||
let mut im_part = if im.fract() == 0.0 {
|
||||
im.to_string()
|
||||
} else {
|
||||
to_string(im)
|
||||
};
|
||||
im_part.push('j');
|
||||
|
||||
// positive empty => return im_part, integer => drop ., fractional => float_ops
|
||||
let re_part = if re == 0.0 {
|
||||
if re.is_sign_positive() {
|
||||
return im_part;
|
||||
} else {
|
||||
re.to_string()
|
||||
}
|
||||
} else if re.fract() == 0.0 {
|
||||
re.to_string()
|
||||
} else {
|
||||
to_string(re)
|
||||
};
|
||||
let mut result =
|
||||
String::with_capacity(re_part.len() + im_part.len() + 2 + im.is_sign_positive() as usize);
|
||||
result.push('(');
|
||||
result.push_str(&re_part);
|
||||
if im.is_sign_positive() || im.is_nan() {
|
||||
result.push('+');
|
||||
}
|
||||
result.push_str(&im_part);
|
||||
result.push(')');
|
||||
result
|
||||
}
|
||||
|
||||
pub fn from_hex(s: &str) -> Option<f64> {
|
||||
if let Ok(f) = hexf_parse::parse_hexf64(s, false) {
|
||||
return Some(f);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
pub mod char;
|
||||
pub mod complex;
|
||||
pub mod escape;
|
||||
pub mod float;
|
||||
pub mod format;
|
||||
|
||||
@@ -179,13 +179,13 @@ impl Constructor for PyComplex {
|
||||
"complex() can't take second arg if first is a string".to_owned(),
|
||||
));
|
||||
}
|
||||
let value = s
|
||||
let (re, im) = s
|
||||
.to_str()
|
||||
.and_then(|s| parse_str(s.trim()))
|
||||
.and_then(rustpython_literal::complex::parse_str)
|
||||
.ok_or_else(|| {
|
||||
vm.new_value_error("complex() arg is a malformed string".to_owned())
|
||||
})?;
|
||||
return Self::from(value)
|
||||
return Self::from(Complex64 { re, im })
|
||||
.into_ref_with_type(vm, cls)
|
||||
.map(Into::into);
|
||||
} else {
|
||||
@@ -494,7 +494,7 @@ impl Representable for PyComplex {
|
||||
// TODO: when you fix this, move it to rustpython_common::complex::repr and update
|
||||
// ast/src/unparse.rs + impl Display for Constant in ast/src/constant.rs
|
||||
let Complex64 { re, im } = zelf.value;
|
||||
Ok(rustpython_literal::float::complex_to_string(re, im))
|
||||
Ok(rustpython_literal::complex::to_string(re, im))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -519,40 +519,3 @@ pub struct ComplexArgs {
|
||||
#[pyarg(any, optional)]
|
||||
imag: OptionalArg<PyObjectRef>,
|
||||
}
|
||||
|
||||
fn parse_str(s: &str) -> Option<Complex64> {
|
||||
// Handle parentheses
|
||||
let s = match s.strip_prefix('(') {
|
||||
None => s,
|
||||
Some(s) => match s.strip_suffix(')') {
|
||||
None => return None,
|
||||
Some(s) => s.trim(),
|
||||
},
|
||||
};
|
||||
|
||||
let value = match s.strip_suffix(|c| c == 'j' || c == 'J') {
|
||||
None => Complex64::new(crate::literal::float::parse_str(s)?, 0.0),
|
||||
Some(mut s) => {
|
||||
let mut real = 0.0;
|
||||
// Find the central +/- operator. If it exists, parse the real part.
|
||||
for (i, w) in s.as_bytes().windows(2).enumerate() {
|
||||
if (w[1] == b'+' || w[1] == b'-') && !(w[0] == b'e' || w[0] == b'E') {
|
||||
real = crate::literal::float::parse_str(&s[..=i])?;
|
||||
s = &s[i + 1..];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let imag = match s {
|
||||
// "j", "+j"
|
||||
"" | "+" => 1.0,
|
||||
// "-j"
|
||||
"-" => -1.0,
|
||||
s => crate::literal::float::parse_str(s)?,
|
||||
};
|
||||
|
||||
Complex64::new(real, imag)
|
||||
}
|
||||
};
|
||||
Some(value)
|
||||
}
|
||||
|
||||
@@ -159,9 +159,31 @@ impl Constructor for PyFloat {
|
||||
}
|
||||
|
||||
fn float_from_string(val: PyObjectRef, vm: &VirtualMachine) -> PyResult<f64> {
|
||||
let (bytearray, buffer, buffer_lock);
|
||||
let (bytearray, buffer, buffer_lock, mapped_string);
|
||||
let b = if let Some(s) = val.payload_if_subclass::<PyStr>(vm) {
|
||||
s.as_wtf8().trim().as_bytes()
|
||||
use crate::common::str::PyKindStr;
|
||||
match s.as_str_kind() {
|
||||
PyKindStr::Ascii(s) => s.trim().as_bytes(),
|
||||
PyKindStr::Utf8(s) => {
|
||||
mapped_string = s
|
||||
.trim()
|
||||
.chars()
|
||||
.map(|c| {
|
||||
if let Some(n) = rustpython_common::str::char_to_decimal(c) {
|
||||
char::from_digit(n.into(), 10).unwrap()
|
||||
} else if c.is_whitespace() {
|
||||
' '
|
||||
} else {
|
||||
c
|
||||
}
|
||||
})
|
||||
.collect::<String>();
|
||||
mapped_string.as_bytes()
|
||||
}
|
||||
// if there are surrogates, it's not gonna parse anyway,
|
||||
// so we can just choose a known bad value
|
||||
PyKindStr::Wtf8(_) => b"",
|
||||
}
|
||||
} else if let Some(bytes) = val.payload_if_subclass::<PyBytes>(vm) {
|
||||
bytes.as_bytes()
|
||||
} else if let Some(buf) = val.payload_if_subclass::<PyByteArray>(vm) {
|
||||
|
||||
Reference in New Issue
Block a user