From 916f39c2b5dcc856acc4112a2745537c51fcd1c9 Mon Sep 17 00:00:00 2001
From: Daniel Watkins <daniel@daniel-watkins.co.uk>
Date: Mon, 1 Nov 2021 21:32:48 -0400
Subject: [PATCH 1/9] cformat.rs: address memory allocation error with %*s /
 %.*f

The previous code assumed that, after parsing was complete, both the
padding and the precision integers would always be positive: it
therefore cast them to usize unconditionally.  This meant that when a
negative integer was encountered, it would underflow and attempt to
generate a padded string of around usize::MAX, or a float representation
of that precision: unsurprisingly, these fail to allocate.

For simple format strings (e.g. `%-3s`), this works fine: `-` is
detected as a separate token during parsing, so only the positive
integer (3) is passed through.

However, `%*s` expresses "take the first value as the padding integer,
and the second as the string to be padded" (`"%*s" % (-3, ...)` is
analogous to the previous example).  In this codepath, the parsing does
not handle the padding integer, so it can't strip the leading `-`: the
padding integer is negative.  A negative precision can also be given in
similar fashion, with `"%.*f" % (-3, ...)`.

N.B. This commit does not cause the code to produce the correct _output_
for negative padding integers or precisions provided this way, it
addresses only the crash.
---
 vm/src/cformat.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs
index 473d3c65cf..05ac38d874 100644
--- a/vm/src/cformat.rs
+++ b/vm/src/cformat.rs
@@ -177,7 +177,7 @@ impl CFormatSpec {
             Some(CFormatQuantity::Amount(width)) => cmp::max(width, num_chars),
             _ => num_chars,
         };
-        let fill_chars_needed = width - num_chars;
+        let fill_chars_needed = width.saturating_sub(num_chars);
         let fill_string = CFormatSpec::compute_fill_string(fill_char, fill_chars_needed);
 
         if !fill_string.is_empty() {
@@ -563,7 +563,7 @@ fn try_update_quantity_from_tuple<'a, I: Iterator<Item = &'a PyObjectRef>>(
         Some(CFormatQuantity::FromValuesTuple) => match elements.next() {
             Some(width_obj) => {
                 if let Some(i) = width_obj.payload::<PyInt>() {
-                    let i = i.try_to_primitive::<isize>(vm)? as usize;
+                    let i = i.try_to_primitive::<isize>(vm)?.abs() as usize;
                     *q = Some(CFormatQuantity::Amount(i));
                     Ok(())
                 } else {
@@ -917,7 +917,7 @@ where
                     break;
                 }
             }
-            return Ok(Some(CFormatQuantity::Amount(num as usize)));
+            return Ok(Some(CFormatQuantity::Amount(num.abs() as usize)));
         }
     }
     Ok(None)

From 380653b6cbceaa4014aa5c7a8f42c84cd3905081 Mon Sep 17 00:00:00 2001
From: Daniel Watkins <daniel@daniel-watkins.co.uk>
Date: Thu, 28 Oct 2021 15:35:05 -0400
Subject: [PATCH 2/9] cformat.rs: implement %a correctly, using builtins::ascii

The repr() of Unicode strings is the Unicode string, so the previous
code was incorrect for that case (at least).
---
 Lib/test/test_format.py | 2 --
 vm/src/cformat.rs       | 8 +++++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py
index 9ad3eb8269..33a52bc4fc 100644
--- a/Lib/test/test_format.py
+++ b/Lib/test/test_format.py
@@ -284,8 +284,6 @@ class FormatTest(unittest.TestCase):
         test_exc_common('%x', 3.14, TypeError,
                         "%x format: an integer is required, not float")
 
-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
     def test_str_format(self):
         testformat("%r", "\u0378", "'\\u0378'")  # non printable
         testformat("%a", "\u0378", "'\\u0378'")  # non printable
diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs
index 05ac38d874..7b59c0e026 100644
--- a/vm/src/cformat.rs
+++ b/vm/src/cformat.rs
@@ -6,6 +6,7 @@ use crate::{
     builtins::{try_f64_to_bigint, tuple, PyBytes, PyFloat, PyInt, PyStr},
     function::ArgIntoFloat,
     protocol::PyBuffer,
+    stdlib::builtins,
     ItemProtocol, PyObjectRef, PyResult, TryFromBorrowedObject, TryFromObject, TypeProtocol,
     VirtualMachine,
 };
@@ -460,15 +461,16 @@ impl CFormatSpec {
         match &self.format_type {
             CFormatType::String(preconversor) => {
                 let result = match preconversor {
-                    CFormatPreconversor::Str => obj.str(vm)?,
-                    CFormatPreconversor::Repr | CFormatPreconversor::Ascii => obj.repr(vm)?,
+                    CFormatPreconversor::Ascii => builtins::ascii(obj, vm)?.into(),
+                    CFormatPreconversor::Str => obj.str(vm)?.as_str().to_owned(),
+                    CFormatPreconversor::Repr => obj.repr(vm)?.as_str().to_owned(),
                     CFormatPreconversor::Bytes => {
                         return Err(vm.new_value_error(
                             "unsupported format character 'b' (0x62)".to_owned(),
                         ));
                     }
                 };
-                Ok(self.format_string(result.as_str().to_owned()))
+                Ok(self.format_string(result))
             }
             CFormatType::Number(number_type) => match number_type {
                 CNumberType::Decimal => match_class!(match &obj {

From dcc0973b03c3dad5bcc53204764cafe5adbe75f8 Mon Sep 17 00:00:00 2001
From: Daniel Watkins <daniel@daniel-watkins.co.uk>
Date: Thu, 28 Oct 2021 16:14:53 -0400
Subject: [PATCH 3/9] cformat.rs: handle bytearrays when formatting b'%c'

---
 vm/src/cformat.rs | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs
index 7b59c0e026..c0192dc21e 100644
--- a/vm/src/cformat.rs
+++ b/vm/src/cformat.rs
@@ -3,7 +3,7 @@
 
 use crate::common::float_ops;
 use crate::{
-    builtins::{try_f64_to_bigint, tuple, PyBytes, PyFloat, PyInt, PyStr},
+    builtins::{try_f64_to_bigint, tuple, PyByteArray, PyBytes, PyFloat, PyInt, PyStr},
     function::ArgIntoFloat,
     protocol::PyBuffer,
     stdlib::builtins,
@@ -447,12 +447,19 @@ impl CFormatSpec {
                         })?;
                     return Ok(self.format_char(ch).into_bytes());
                 }
-                if let Some(s) = obj.payload::<PyStr>() {
-                    if let Ok(ch) = s.as_str().chars().exactly_one() {
-                        return Ok(self.format_char(ch).into_bytes());
+                if let Some(b) = obj.payload::<PyBytes>() {
+                    if b.len() == 1 {
+                        return Ok(self.format_char(b.as_bytes()[0] as char).into_bytes());
+                    }
+                } else if let Some(ba) = obj.payload::<PyByteArray>() {
+                    let buf = ba.borrow_buf();
+                    if buf.len() == 1 {
+                        return Ok(self.format_char(buf[0] as char).into_bytes());
                     }
                 }
-                Err(vm.new_type_error("%c requires int or char".to_owned()))
+                Err(vm.new_type_error(
+                    "%c requires an integer in range(256) or a single byte".to_owned(),
+                ))
             }
         }
     }

From 3e19da833181c6e3d8d29900b61dcb5bfc88087f Mon Sep 17 00:00:00 2001
From: Daniel Watkins <daniel@daniel-watkins.co.uk>
Date: Thu, 28 Oct 2021 16:33:45 -0400
Subject: [PATCH 4/9] cformat.rs: implement b'%a'/b'%r' correctly

Unlike strings, b'%a' and b'%r' are equivalent, and they match the '%a'
behaviour of strings, not '%r'.

Thanks to @youknowone for improving this implementation.
---
 vm/src/cformat.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs
index c0192dc21e..aade49bb01 100644
--- a/vm/src/cformat.rs
+++ b/vm/src/cformat.rs
@@ -360,10 +360,11 @@ impl CFormatSpec {
     fn bytes_format(&self, vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Vec<u8>> {
         match &self.format_type {
             CFormatType::String(preconversor) => match preconversor {
+                // Unlike strings, %r and %a are identical for bytes: the behaviour corresponds to
+                // %a for strings (not %r)
                 CFormatPreconversor::Repr | CFormatPreconversor::Ascii => {
-                    let s = obj.repr(vm)?;
-                    let s = self.format_string(s.as_str().to_owned());
-                    Ok(s.into_bytes())
+                    let b = builtins::ascii(obj, vm)?.into();
+                    Ok(b)
                 }
                 CFormatPreconversor::Str | CFormatPreconversor::Bytes => {
                     if let Ok(buffer) = PyBuffer::try_from_borrowed_object(vm, &obj) {

From 8594a656c731d2ede87504db5b096f12c795e235 Mon Sep 17 00:00:00 2001
From: Daniel Watkins <daniel@daniel-watkins.co.uk>
Date: Thu, 28 Oct 2021 16:45:33 -0400
Subject: [PATCH 5/9] cformat.rs: s/string/bytes/ in byte formatting error
 messages

---
 vm/src/cformat.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs
index aade49bb01..79149a9672 100644
--- a/vm/src/cformat.rs
+++ b/vm/src/cformat.rs
@@ -684,7 +684,7 @@ impl CFormatBytes {
                 Ok(result)
             } else {
                 Err(vm.new_type_error(
-                    "not all arguments converted during string formatting".to_owned(),
+                    "not all arguments converted during bytes formatting".to_owned(),
                 ))
             };
         }
@@ -740,8 +740,7 @@ impl CFormatBytes {
 
         // check that all arguments were converted
         if value_iter.next().is_some() && !is_mapping {
-            Err(vm
-                .new_type_error("not all arguments converted during string formatting".to_owned()))
+            Err(vm.new_type_error("not all arguments converted during bytes formatting".to_owned()))
         } else {
             Ok(result)
         }

From 1ed3a6ca816bd3e228f20a6863ca37650558a155 Mon Sep 17 00:00:00 2001
From: Daniel Watkins <daniel@daniel-watkins.co.uk>
Date: Fri, 29 Oct 2021 08:40:09 -0400
Subject: [PATCH 6/9] cformat.rs: emit correct "not all arguments" errors for
 byte formatting

Without this, the code would accept `b"eggs" % b"ham"` and `b"eggs" %
bytearray(b"ham")` because it would detect bytes and bytearray as
mappings (which _are_ permitted to have leftover, unprocessed
arguments).
---
 vm/src/cformat.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs
index 79149a9672..6fb383c71e 100644
--- a/vm/src/cformat.rs
+++ b/vm/src/cformat.rs
@@ -666,7 +666,8 @@ impl CFormatBytes {
 
         let is_mapping = values_obj.class().has_attr("__getitem__")
             && !values_obj.isinstance(&vm.ctx.types.tuple_type)
-            && !values_obj.isinstance(&vm.ctx.types.str_type);
+            && !values_obj.isinstance(&vm.ctx.types.bytes_type)
+            && !values_obj.isinstance(&vm.ctx.types.bytearray_type);
 
         if num_specifiers == 0 {
             // literal only

From 4d2747e4b8c8588b7d34d11dcd622e89b8babfde Mon Sep 17 00:00:00 2001
From: Daniel Watkins <daniel@daniel-watkins.co.uk>
Date: Fri, 29 Oct 2021 08:56:44 -0400
Subject: [PATCH 7/9] cformat.rs: reject %c args > 255 when byte formatting

This matches CPython's behaviour.  Thanks to @DimitrisJim for the
improved implementation here.
---
 vm/src/cformat.rs | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs
index 6fb383c71e..6da6a1bbc1 100644
--- a/vm/src/cformat.rs
+++ b/vm/src/cformat.rs
@@ -440,12 +440,9 @@ impl CFormatSpec {
             CFormatType::Character => {
                 if let Some(i) = obj.payload::<PyInt>() {
                     let ch = i
-                        .as_bigint()
-                        .to_u32()
-                        .and_then(std::char::from_u32)
-                        .ok_or_else(|| {
-                            vm.new_overflow_error("%c arg not in range(0x110000)".to_owned())
-                        })?;
+                        .try_to_primitive::<u8>(vm)
+                        .map_err(|_| vm.new_overflow_error("%c arg not in range(256)".to_owned()))?
+                        as char;
                     return Ok(self.format_char(ch).into_bytes());
                 }
                 if let Some(b) = obj.payload::<PyBytes>() {

From c5eba153700ee373764f9bb033a6f39b9dd20044 Mon Sep 17 00:00:00 2001
From: Daniel Watkins <daniel@daniel-watkins.co.uk>
Date: Fri, 29 Oct 2021 09:26:34 -0400
Subject: [PATCH 8/9] test_format: run test_bytes_and_bytearray_format

All the errors have now been addressed.
---
 Lib/test/test_format.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py
index 33a52bc4fc..abacda2acc 100644
--- a/Lib/test/test_format.py
+++ b/Lib/test/test_format.py
@@ -317,8 +317,6 @@ class FormatTest(unittest.TestCase):
             else:
                 raise TestFailed('"%*d"%(maxsize, -127) should fail')
 
-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
     def test_bytes_and_bytearray_format(self):
         # %c will insert a single byte, either from an int in range(256), or
         # from a bytes argument of length 1, not from a str.

From 655bfbc6cff7fbd730ba0624d1aed78d671930d0 Mon Sep 17 00:00:00 2001
From: Daniel Watkins <daniel@daniel-watkins.co.uk>
Date: Fri, 29 Oct 2021 09:39:33 -0400
Subject: [PATCH 9/9] cformat.rs: align exception text with CPython

test_format.py doesn't _fail_ if the text is different, but it does emit
a warning: this fixes those warnings.
---
 vm/src/cformat.rs | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs
index 6da6a1bbc1..c166b1fd81 100644
--- a/vm/src/cformat.rs
+++ b/vm/src/cformat.rs
@@ -434,7 +434,18 @@ impl CFormatSpec {
                 }
             },
             CFormatType::Float(_) => {
-                let value = ArgIntoFloat::try_from_object(vm, obj)?.to_f64();
+                let type_name = obj.class().name().to_string();
+                let value = ArgIntoFloat::try_from_object(vm, obj)
+                    .map_err(|e| {
+                        if e.isinstance(&vm.ctx.exceptions.type_error) {
+                            // formatfloat in bytesobject.c generates its own specific exception
+                            // text in this case, mirror it here.
+                            vm.new_type_error(format!("float argument required, not {}", type_name))
+                        } else {
+                            e
+                        }
+                    })?
+                    .to_f64();
                 Ok(self.format_float(value).into_bytes())
             }
             CFormatType::Character => {
@@ -462,7 +473,12 @@ impl CFormatSpec {
         }
     }
 
-    fn string_format(&self, vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<String> {
+    fn string_format(
+        &self,
+        vm: &VirtualMachine,
+        obj: PyObjectRef,
+        idx: &usize,
+    ) -> PyResult<String> {
         match &self.format_type {
             CFormatType::String(preconversor) => {
                 let result = match preconversor {
@@ -470,9 +486,11 @@ impl CFormatSpec {
                     CFormatPreconversor::Str => obj.str(vm)?.as_str().to_owned(),
                     CFormatPreconversor::Repr => obj.repr(vm)?.as_str().to_owned(),
                     CFormatPreconversor::Bytes => {
-                        return Err(vm.new_value_error(
-                            "unsupported format character 'b' (0x62)".to_owned(),
-                        ));
+                        // idx is the position of the %, we want the position of the b
+                        return Err(vm.new_value_error(format!(
+                            "unsupported format character 'b' (0x62) at index {}",
+                            idx + 1
+                        )));
                     }
                 };
                 Ok(self.format_string(result))
@@ -841,7 +859,7 @@ impl CFormatString {
         if mapping_required {
             // dict
             return if is_mapping {
-                for (_, part) in &self.parts {
+                for (idx, part) in &self.parts {
                     match part {
                         CFormatPart::Literal(literal) => result.push_str(literal),
                         CFormatPart::Spec(spec) => {
@@ -849,7 +867,7 @@ impl CFormatString {
                                 Some(key) => values_obj.get_item(key, vm)?,
                                 None => unreachable!(),
                             };
-                            let part_result = spec.string_format(vm, value)?;
+                            let part_result = spec.string_format(vm, value, idx)?;
                             result.push_str(&part_result);
                         }
                     }
@@ -868,7 +886,7 @@ impl CFormatString {
         };
         let mut value_iter = values.iter();
 
-        for (_, part) in &mut self.parts {
+        for (idx, part) in &mut self.parts {
             match part {
                 CFormatPart::Literal(literal) => result.push_str(literal),
                 CFormatPart::Spec(spec) => {
@@ -881,7 +899,7 @@ impl CFormatString {
                             vm.new_type_error("not enough arguments for format string".to_owned())
                         ),
                     }?;
-                    let part_result = spec.string_format(vm, value)?;
+                    let part_result = spec.string_format(vm, value, idx)?;
                     result.push_str(&part_result);
                 }
             }