From 916f39c2b5dcc856acc4112a2745537c51fcd1c9 Mon Sep 17 00:00:00 2001 From: Daniel Watkins Date: Mon, 1 Nov 2021 21:32:48 -0400 Subject: [PATCH 1/9] cformat.rs: address memory allocation error with %*s / %.*f The previous code assumed that, after parsing was complete, both the padding and the precision integers would always be positive: it therefore cast them to usize unconditionally. This meant that when a negative integer was encountered, it would underflow and attempt to generate a padded string of around usize::MAX, or a float representation of that precision: unsurprisingly, these fail to allocate. For simple format strings (e.g. `%-3s`), this works fine: `-` is detected as a separate token during parsing, so only the positive integer (3) is passed through. However, `%*s` expresses "take the first value as the padding integer, and the second as the string to be padded" (`"%*s" % (-3, ...)` is analogous to the previous example). In this codepath, the parsing does not handle the padding integer, so it can't strip the leading `-`: the padding integer is negative. A negative precision can also be given in similar fashion, with `"%.*f" % (-3, ...)`. N.B. This commit does not cause the code to produce the correct _output_ for negative padding integers or precisions provided this way, it addresses only the crash. --- vm/src/cformat.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs index 473d3c65cf..05ac38d874 100644 --- a/vm/src/cformat.rs +++ b/vm/src/cformat.rs @@ -177,7 +177,7 @@ impl CFormatSpec { Some(CFormatQuantity::Amount(width)) => cmp::max(width, num_chars), _ => num_chars, }; - let fill_chars_needed = width - num_chars; + let fill_chars_needed = width.saturating_sub(num_chars); let fill_string = CFormatSpec::compute_fill_string(fill_char, fill_chars_needed); if !fill_string.is_empty() { @@ -563,7 +563,7 @@ fn try_update_quantity_from_tuple<'a, I: Iterator>( Some(CFormatQuantity::FromValuesTuple) => match elements.next() { Some(width_obj) => { if let Some(i) = width_obj.payload::() { - let i = i.try_to_primitive::(vm)? as usize; + let i = i.try_to_primitive::(vm)?.abs() as usize; *q = Some(CFormatQuantity::Amount(i)); Ok(()) } else { @@ -917,7 +917,7 @@ where break; } } - return Ok(Some(CFormatQuantity::Amount(num as usize))); + return Ok(Some(CFormatQuantity::Amount(num.abs() as usize))); } } Ok(None) From 380653b6cbceaa4014aa5c7a8f42c84cd3905081 Mon Sep 17 00:00:00 2001 From: Daniel Watkins Date: Thu, 28 Oct 2021 15:35:05 -0400 Subject: [PATCH 2/9] cformat.rs: implement %a correctly, using builtins::ascii The repr() of Unicode strings is the Unicode string, so the previous code was incorrect for that case (at least). --- Lib/test/test_format.py | 2 -- vm/src/cformat.rs | 8 +++++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 9ad3eb8269..33a52bc4fc 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -284,8 +284,6 @@ class FormatTest(unittest.TestCase): test_exc_common('%x', 3.14, TypeError, "%x format: an integer is required, not float") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_str_format(self): testformat("%r", "\u0378", "'\\u0378'") # non printable testformat("%a", "\u0378", "'\\u0378'") # non printable diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs index 05ac38d874..7b59c0e026 100644 --- a/vm/src/cformat.rs +++ b/vm/src/cformat.rs @@ -6,6 +6,7 @@ use crate::{ builtins::{try_f64_to_bigint, tuple, PyBytes, PyFloat, PyInt, PyStr}, function::ArgIntoFloat, protocol::PyBuffer, + stdlib::builtins, ItemProtocol, PyObjectRef, PyResult, TryFromBorrowedObject, TryFromObject, TypeProtocol, VirtualMachine, }; @@ -460,15 +461,16 @@ impl CFormatSpec { match &self.format_type { CFormatType::String(preconversor) => { let result = match preconversor { - CFormatPreconversor::Str => obj.str(vm)?, - CFormatPreconversor::Repr | CFormatPreconversor::Ascii => obj.repr(vm)?, + CFormatPreconversor::Ascii => builtins::ascii(obj, vm)?.into(), + CFormatPreconversor::Str => obj.str(vm)?.as_str().to_owned(), + CFormatPreconversor::Repr => obj.repr(vm)?.as_str().to_owned(), CFormatPreconversor::Bytes => { return Err(vm.new_value_error( "unsupported format character 'b' (0x62)".to_owned(), )); } }; - Ok(self.format_string(result.as_str().to_owned())) + Ok(self.format_string(result)) } CFormatType::Number(number_type) => match number_type { CNumberType::Decimal => match_class!(match &obj { From dcc0973b03c3dad5bcc53204764cafe5adbe75f8 Mon Sep 17 00:00:00 2001 From: Daniel Watkins Date: Thu, 28 Oct 2021 16:14:53 -0400 Subject: [PATCH 3/9] cformat.rs: handle bytearrays when formatting b'%c' --- vm/src/cformat.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs index 7b59c0e026..c0192dc21e 100644 --- a/vm/src/cformat.rs +++ b/vm/src/cformat.rs @@ -3,7 +3,7 @@ use crate::common::float_ops; use crate::{ - builtins::{try_f64_to_bigint, tuple, PyBytes, PyFloat, PyInt, PyStr}, + builtins::{try_f64_to_bigint, tuple, PyByteArray, PyBytes, PyFloat, PyInt, PyStr}, function::ArgIntoFloat, protocol::PyBuffer, stdlib::builtins, @@ -447,12 +447,19 @@ impl CFormatSpec { })?; return Ok(self.format_char(ch).into_bytes()); } - if let Some(s) = obj.payload::() { - if let Ok(ch) = s.as_str().chars().exactly_one() { - return Ok(self.format_char(ch).into_bytes()); + if let Some(b) = obj.payload::() { + if b.len() == 1 { + return Ok(self.format_char(b.as_bytes()[0] as char).into_bytes()); + } + } else if let Some(ba) = obj.payload::() { + let buf = ba.borrow_buf(); + if buf.len() == 1 { + return Ok(self.format_char(buf[0] as char).into_bytes()); } } - Err(vm.new_type_error("%c requires int or char".to_owned())) + Err(vm.new_type_error( + "%c requires an integer in range(256) or a single byte".to_owned(), + )) } } } From 3e19da833181c6e3d8d29900b61dcb5bfc88087f Mon Sep 17 00:00:00 2001 From: Daniel Watkins Date: Thu, 28 Oct 2021 16:33:45 -0400 Subject: [PATCH 4/9] cformat.rs: implement b'%a'/b'%r' correctly Unlike strings, b'%a' and b'%r' are equivalent, and they match the '%a' behaviour of strings, not '%r'. Thanks to @youknowone for improving this implementation. --- vm/src/cformat.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs index c0192dc21e..aade49bb01 100644 --- a/vm/src/cformat.rs +++ b/vm/src/cformat.rs @@ -360,10 +360,11 @@ impl CFormatSpec { fn bytes_format(&self, vm: &VirtualMachine, obj: PyObjectRef) -> PyResult> { match &self.format_type { CFormatType::String(preconversor) => match preconversor { + // Unlike strings, %r and %a are identical for bytes: the behaviour corresponds to + // %a for strings (not %r) CFormatPreconversor::Repr | CFormatPreconversor::Ascii => { - let s = obj.repr(vm)?; - let s = self.format_string(s.as_str().to_owned()); - Ok(s.into_bytes()) + let b = builtins::ascii(obj, vm)?.into(); + Ok(b) } CFormatPreconversor::Str | CFormatPreconversor::Bytes => { if let Ok(buffer) = PyBuffer::try_from_borrowed_object(vm, &obj) { From 8594a656c731d2ede87504db5b096f12c795e235 Mon Sep 17 00:00:00 2001 From: Daniel Watkins Date: Thu, 28 Oct 2021 16:45:33 -0400 Subject: [PATCH 5/9] cformat.rs: s/string/bytes/ in byte formatting error messages --- vm/src/cformat.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs index aade49bb01..79149a9672 100644 --- a/vm/src/cformat.rs +++ b/vm/src/cformat.rs @@ -684,7 +684,7 @@ impl CFormatBytes { Ok(result) } else { Err(vm.new_type_error( - "not all arguments converted during string formatting".to_owned(), + "not all arguments converted during bytes formatting".to_owned(), )) }; } @@ -740,8 +740,7 @@ impl CFormatBytes { // check that all arguments were converted if value_iter.next().is_some() && !is_mapping { - Err(vm - .new_type_error("not all arguments converted during string formatting".to_owned())) + Err(vm.new_type_error("not all arguments converted during bytes formatting".to_owned())) } else { Ok(result) } From 1ed3a6ca816bd3e228f20a6863ca37650558a155 Mon Sep 17 00:00:00 2001 From: Daniel Watkins Date: Fri, 29 Oct 2021 08:40:09 -0400 Subject: [PATCH 6/9] cformat.rs: emit correct "not all arguments" errors for byte formatting Without this, the code would accept `b"eggs" % b"ham"` and `b"eggs" % bytearray(b"ham")` because it would detect bytes and bytearray as mappings (which _are_ permitted to have leftover, unprocessed arguments). --- vm/src/cformat.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs index 79149a9672..6fb383c71e 100644 --- a/vm/src/cformat.rs +++ b/vm/src/cformat.rs @@ -666,7 +666,8 @@ impl CFormatBytes { let is_mapping = values_obj.class().has_attr("__getitem__") && !values_obj.isinstance(&vm.ctx.types.tuple_type) - && !values_obj.isinstance(&vm.ctx.types.str_type); + && !values_obj.isinstance(&vm.ctx.types.bytes_type) + && !values_obj.isinstance(&vm.ctx.types.bytearray_type); if num_specifiers == 0 { // literal only From 4d2747e4b8c8588b7d34d11dcd622e89b8babfde Mon Sep 17 00:00:00 2001 From: Daniel Watkins Date: Fri, 29 Oct 2021 08:56:44 -0400 Subject: [PATCH 7/9] cformat.rs: reject %c args > 255 when byte formatting This matches CPython's behaviour. Thanks to @DimitrisJim for the improved implementation here. --- vm/src/cformat.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs index 6fb383c71e..6da6a1bbc1 100644 --- a/vm/src/cformat.rs +++ b/vm/src/cformat.rs @@ -440,12 +440,9 @@ impl CFormatSpec { CFormatType::Character => { if let Some(i) = obj.payload::() { let ch = i - .as_bigint() - .to_u32() - .and_then(std::char::from_u32) - .ok_or_else(|| { - vm.new_overflow_error("%c arg not in range(0x110000)".to_owned()) - })?; + .try_to_primitive::(vm) + .map_err(|_| vm.new_overflow_error("%c arg not in range(256)".to_owned()))? + as char; return Ok(self.format_char(ch).into_bytes()); } if let Some(b) = obj.payload::() { From c5eba153700ee373764f9bb033a6f39b9dd20044 Mon Sep 17 00:00:00 2001 From: Daniel Watkins Date: Fri, 29 Oct 2021 09:26:34 -0400 Subject: [PATCH 8/9] test_format: run test_bytes_and_bytearray_format All the errors have now been addressed. --- Lib/test/test_format.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 33a52bc4fc..abacda2acc 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -317,8 +317,6 @@ class FormatTest(unittest.TestCase): else: raise TestFailed('"%*d"%(maxsize, -127) should fail') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_bytes_and_bytearray_format(self): # %c will insert a single byte, either from an int in range(256), or # from a bytes argument of length 1, not from a str. From 655bfbc6cff7fbd730ba0624d1aed78d671930d0 Mon Sep 17 00:00:00 2001 From: Daniel Watkins Date: Fri, 29 Oct 2021 09:39:33 -0400 Subject: [PATCH 9/9] cformat.rs: align exception text with CPython test_format.py doesn't _fail_ if the text is different, but it does emit a warning: this fixes those warnings. --- vm/src/cformat.rs | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs index 6da6a1bbc1..c166b1fd81 100644 --- a/vm/src/cformat.rs +++ b/vm/src/cformat.rs @@ -434,7 +434,18 @@ impl CFormatSpec { } }, CFormatType::Float(_) => { - let value = ArgIntoFloat::try_from_object(vm, obj)?.to_f64(); + let type_name = obj.class().name().to_string(); + let value = ArgIntoFloat::try_from_object(vm, obj) + .map_err(|e| { + if e.isinstance(&vm.ctx.exceptions.type_error) { + // formatfloat in bytesobject.c generates its own specific exception + // text in this case, mirror it here. + vm.new_type_error(format!("float argument required, not {}", type_name)) + } else { + e + } + })? + .to_f64(); Ok(self.format_float(value).into_bytes()) } CFormatType::Character => { @@ -462,7 +473,12 @@ impl CFormatSpec { } } - fn string_format(&self, vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { + fn string_format( + &self, + vm: &VirtualMachine, + obj: PyObjectRef, + idx: &usize, + ) -> PyResult { match &self.format_type { CFormatType::String(preconversor) => { let result = match preconversor { @@ -470,9 +486,11 @@ impl CFormatSpec { CFormatPreconversor::Str => obj.str(vm)?.as_str().to_owned(), CFormatPreconversor::Repr => obj.repr(vm)?.as_str().to_owned(), CFormatPreconversor::Bytes => { - return Err(vm.new_value_error( - "unsupported format character 'b' (0x62)".to_owned(), - )); + // idx is the position of the %, we want the position of the b + return Err(vm.new_value_error(format!( + "unsupported format character 'b' (0x62) at index {}", + idx + 1 + ))); } }; Ok(self.format_string(result)) @@ -841,7 +859,7 @@ impl CFormatString { if mapping_required { // dict return if is_mapping { - for (_, part) in &self.parts { + for (idx, part) in &self.parts { match part { CFormatPart::Literal(literal) => result.push_str(literal), CFormatPart::Spec(spec) => { @@ -849,7 +867,7 @@ impl CFormatString { Some(key) => values_obj.get_item(key, vm)?, None => unreachable!(), }; - let part_result = spec.string_format(vm, value)?; + let part_result = spec.string_format(vm, value, idx)?; result.push_str(&part_result); } } @@ -868,7 +886,7 @@ impl CFormatString { }; let mut value_iter = values.iter(); - for (_, part) in &mut self.parts { + for (idx, part) in &mut self.parts { match part { CFormatPart::Literal(literal) => result.push_str(literal), CFormatPart::Spec(spec) => { @@ -881,7 +899,7 @@ impl CFormatString { vm.new_type_error("not enough arguments for format string".to_owned()) ), }?; - let part_result = spec.string_format(vm, value)?; + let part_result = spec.string_format(vm, value, idx)?; result.push_str(&part_result); } }