diff --git a/common/src/bytes.rs b/common/src/bytes.rs deleted file mode 100644 index f281da2090..0000000000 --- a/common/src/bytes.rs +++ /dev/null @@ -1,62 +0,0 @@ -use crate::str::ReprOverflowError; - -pub fn repr(b: &[u8]) -> Result { - repr_with(b, &[], "") -} - -pub fn repr_with(b: &[u8], prefixes: &[&str], suffix: &str) -> Result { - use std::fmt::Write; - - let mut out_len = 0usize; - let mut squote = 0; - let mut dquote = 0; - - for &ch in b { - let incr = match ch { - b'\'' => { - squote += 1; - 1 - } - b'"' => { - dquote += 1; - 1 - } - b'\\' | b'\t' | b'\r' | b'\n' => 2, - 0x20..=0x7e => 1, - _ => 4, // \xHH - }; - out_len = out_len.checked_add(incr).ok_or(ReprOverflowError)?; - } - - let (quote, num_escaped_quotes) = crate::str::choose_quotes_for_repr(squote, dquote); - // we'll be adding backslashes in front of the existing inner quotes - out_len += num_escaped_quotes; - - // 3 is for b prefix + outer quotes - out_len += 3 + prefixes.iter().map(|s| s.len()).sum::() + suffix.len(); - - let mut res = String::with_capacity(out_len); - res.extend(prefixes.iter().copied()); - res.push('b'); - res.push(quote); - for &ch in b { - match ch { - b'\t' => res.push_str("\\t"), - b'\n' => res.push_str("\\n"), - b'\r' => res.push_str("\\r"), - // printable ascii range - 0x20..=0x7e => { - let ch = ch as char; - if ch == quote || ch == '\\' { - res.push('\\'); - } - res.push(ch); - } - _ => write!(res, "\\x{ch:02x}").unwrap(), - } - } - res.push(quote); - res.push_str(suffix); - - Ok(res) -} diff --git a/common/src/escape.rs b/common/src/escape.rs new file mode 100644 index 0000000000..de7a51433e --- /dev/null +++ b/common/src/escape.rs @@ -0,0 +1,414 @@ +#[derive(Debug, Clone, Copy)] +pub enum Quote { + Single, + Double, +} + +impl Quote { + #[inline] + pub const fn swap(self) -> Quote { + match self { + Quote::Single => Quote::Double, + Quote::Double => Quote::Single, + } + } + + #[inline] + pub const fn to_byte(&self) -> u8 { + match self { + Quote::Single => b'\'', + Quote::Double => b'"', + } + } + + #[inline] + pub const fn to_char(&self) -> char { + match self { + Quote::Single => '\'', + Quote::Double => '"', + } + } +} + +pub struct EscapeLayout { + pub quote: Quote, + pub len: Option, +} + +pub trait Escape { + fn source_len(&self) -> usize; + fn layout(&self) -> &EscapeLayout; + fn changed(&self) -> bool { + self.layout().len != Some(self.source_len()) + } + + fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result; + fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result; + fn write_body(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { + if self.changed() { + self.write_body_slow(formatter) + } else { + self.write_source(formatter) + } + } +} + +/// Returns the outer quotes to use and the number of quotes that need to be +/// escaped. +pub(crate) const fn choose_quote( + single_count: usize, + double_count: usize, + preferred_quote: Quote, +) -> (Quote, usize) { + let (primary_count, secondary_count) = match preferred_quote { + Quote::Single => (single_count, double_count), + Quote::Double => (double_count, single_count), + }; + + // always use primary unless we have primary but no seconday + let use_secondary = primary_count > 0 && secondary_count == 0; + if use_secondary { + (preferred_quote.swap(), secondary_count) + } else { + (preferred_quote, primary_count) + } +} + +pub struct UnicodeEscape<'a> { + source: &'a str, + layout: EscapeLayout, +} + +impl<'a> UnicodeEscape<'a> { + pub fn with_forced_quote(source: &'a str, quote: Quote) -> Self { + let layout = EscapeLayout { quote, len: None }; + Self { source, layout } + } + pub fn new_repr(source: &'a str) -> Self { + let layout = Self::repr_layout(source, Quote::Single); + Self { source, layout } + } + + pub fn str_repr<'r>(&'a self) -> StrRepr<'r, 'a> { + StrRepr(self) + } +} + +pub struct StrRepr<'r, 'a>(&'r UnicodeEscape<'a>); + +impl StrRepr<'_, '_> { + pub fn write(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { + let quote = self.0.layout().quote.to_char(); + formatter.write_char(quote)?; + self.0.write_body(formatter)?; + formatter.write_char(quote) + } + + pub fn to_string(&self) -> Option { + let mut s = String::with_capacity(self.0.layout().len?); + self.write(&mut s).unwrap(); + Some(s) + } +} + +impl std::fmt::Display for StrRepr<'_, '_> { + fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.write(formatter) + } +} + +impl UnicodeEscape<'_> { + const REPR_RESERVED_LEN: usize = 2; // for quotes + + pub fn repr_layout(source: &str, preferred_quote: Quote) -> EscapeLayout { + Self::output_layout_with_checker(source, preferred_quote, |a, b| { + Some((a as isize).checked_add(b as isize)? as usize) + }) + } + + fn output_layout_with_checker( + source: &str, + preferred_quote: Quote, + length_add: impl Fn(usize, usize) -> Option, + ) -> EscapeLayout { + let mut out_len = Self::REPR_RESERVED_LEN; + let mut single_count = 0; + let mut double_count = 0; + + for ch in source.chars() { + let incr = match ch { + '\'' => { + single_count += 1; + 1 + } + '"' => { + double_count += 1; + 1 + } + c => Self::escaped_char_len(c), + }; + let Some(new_len) = length_add(out_len, incr) else { + #[cold] + fn stop(single_count: usize, double_count: usize, preferred_quote: Quote) -> EscapeLayout { + EscapeLayout { quote: choose_quote(single_count, double_count, preferred_quote).0, len: None } + } + return stop(single_count, double_count, preferred_quote); + }; + out_len = new_len; + } + + let (quote, num_escaped_quotes) = choose_quote(single_count, double_count, preferred_quote); + // we'll be adding backslashes in front of the existing inner quotes + let Some(out_len) = length_add(out_len, num_escaped_quotes) else { + return EscapeLayout { quote, len: None }; + }; + + EscapeLayout { + quote, + len: Some(out_len - Self::REPR_RESERVED_LEN), + } + } + + fn escaped_char_len(ch: char) -> usize { + match ch { + '\\' | '\t' | '\r' | '\n' => 2, + ch if ch < ' ' || ch as u32 == 0x7f => 4, // \xHH + ch if ch.is_ascii() => 1, + ch if crate::char::is_printable(ch) => { + // max = std::cmp::max(ch, max); + ch.len_utf8() + } + ch if (ch as u32) < 0x100 => 4, // \xHH + ch if (ch as u32) < 0x10000 => 6, // \uHHHH + _ => 10, // \uHHHHHHHH + } + } + + fn write_char( + ch: char, + quote: Quote, + formatter: &mut impl std::fmt::Write, + ) -> std::fmt::Result { + match ch { + '\n' => formatter.write_str("\\n"), + '\t' => formatter.write_str("\\t"), + '\r' => formatter.write_str("\\r"), + // these 2 branches *would* be handled below, but we shouldn't have to do a + // unicodedata lookup just for ascii characters + '\x20'..='\x7e' => { + // printable ascii range + if ch == quote.to_char() || ch == '\\' { + formatter.write_char('\\')?; + } + formatter.write_char(ch) + } + ch if ch.is_ascii() => { + write!(formatter, "\\x{:02x}", ch as u8) + } + ch if crate::char::is_printable(ch) => formatter.write_char(ch), + '\0'..='\u{ff}' => { + write!(formatter, "\\x{:02x}", ch as u32) + } + '\0'..='\u{ffff}' => { + write!(formatter, "\\u{:04x}", ch as u32) + } + _ => { + write!(formatter, "\\U{:08x}", ch as u32) + } + } + } +} + +impl<'a> Escape for UnicodeEscape<'a> { + fn source_len(&self) -> usize { + self.source.len() + } + + fn layout(&self) -> &EscapeLayout { + &self.layout + } + + fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { + formatter.write_str(self.source) + } + + #[cold] + fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { + for ch in self.source.chars() { + Self::write_char(ch, self.layout().quote, formatter)?; + } + Ok(()) + } +} + +#[cfg(test)] +mod unicode_escapse_tests { + use super::*; + + #[test] + fn changed() { + fn test(s: &str) -> bool { + UnicodeEscape::new_repr(s).changed() + } + assert!(!test("hello")); + assert!(!test("'hello'")); + assert!(!test("\"hello\"")); + + assert!(test("'\"hello")); + assert!(test("hello\n")); + } +} + +pub struct AsciiEscape<'a> { + source: &'a [u8], + layout: EscapeLayout, +} + +impl<'a> AsciiEscape<'a> { + pub fn new(source: &'a [u8], layout: EscapeLayout) -> Self { + Self { source, layout } + } + pub fn with_forced_quote(source: &'a [u8], quote: Quote) -> Self { + let layout = EscapeLayout { quote, len: None }; + Self { source, layout } + } + pub fn new_repr(source: &'a [u8]) -> Self { + let layout = Self::repr_layout(source, Quote::Single); + Self { source, layout } + } + + pub fn bytes_repr<'r>(&'a self) -> BytesRepr<'r, 'a> { + BytesRepr(self) + } +} + +impl AsciiEscape<'_> { + pub fn repr_layout(source: &[u8], preferred_quote: Quote) -> EscapeLayout { + Self::output_layout_with_checker(source, preferred_quote, 3, |a, b| { + Some((a as isize).checked_add(b as isize)? as usize) + }) + } + + pub fn named_repr_layout(source: &[u8], name: &str) -> EscapeLayout { + Self::output_layout_with_checker(source, Quote::Single, name.len() + 2 + 3, |a, b| { + Some((a as isize).checked_add(b as isize)? as usize) + }) + } + + fn output_layout_with_checker( + source: &[u8], + preferred_quote: Quote, + reserved_len: usize, + length_add: impl Fn(usize, usize) -> Option, + ) -> EscapeLayout { + let mut out_len = reserved_len; + let mut single_count = 0; + let mut double_count = 0; + + for ch in source.iter() { + let incr = match ch { + b'\'' => { + single_count += 1; + 1 + } + b'"' => { + double_count += 1; + 1 + } + c => Self::escaped_char_len(*c), + }; + let Some(new_len) = length_add(out_len, incr) else { + #[cold] + fn stop(single_count: usize, double_count: usize, preferred_quote: Quote) -> EscapeLayout { + EscapeLayout { quote: choose_quote(single_count, double_count, preferred_quote).0, len: None } + } + return stop(single_count, double_count, preferred_quote); + }; + out_len = new_len; + } + + let (quote, num_escaped_quotes) = choose_quote(single_count, double_count, preferred_quote); + // we'll be adding backslashes in front of the existing inner quotes + let Some(out_len) = length_add(out_len, num_escaped_quotes) else { + return EscapeLayout { quote, len: None }; + }; + + EscapeLayout { + quote, + len: Some(out_len - reserved_len), + } + } + + fn escaped_char_len(ch: u8) -> usize { + match ch { + b'\\' | b'\t' | b'\r' | b'\n' => 2, + 0x20..=0x7e => 1, + _ => 4, // \xHH + } + } + + fn write_char(ch: u8, quote: Quote, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { + match ch { + b'\t' => formatter.write_str("\\t"), + b'\n' => formatter.write_str("\\n"), + b'\r' => formatter.write_str("\\r"), + 0x20..=0x7e => { + // printable ascii range + if ch == quote.to_byte() || ch == b'\\' { + formatter.write_char('\\')?; + } + formatter.write_char(ch as char) + } + ch => write!(formatter, "\\x{ch:02x}"), + } + } +} + +impl<'a> Escape for AsciiEscape<'a> { + fn source_len(&self) -> usize { + self.source.len() + } + + fn layout(&self) -> &EscapeLayout { + &self.layout + } + + fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { + formatter.write_str(unsafe { + // SAFETY: this function must be called only when source is printable ascii characters + std::str::from_utf8_unchecked(self.source) + }) + } + + #[cold] + fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { + for ch in self.source.iter() { + Self::write_char(*ch, self.layout().quote, formatter)?; + } + Ok(()) + } +} + +pub struct BytesRepr<'r, 'a>(&'r AsciiEscape<'a>); + +impl BytesRepr<'_, '_> { + pub fn write(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { + let quote = self.0.layout().quote.to_char(); + formatter.write_char('b')?; + formatter.write_char(quote)?; + self.0.write_body(formatter)?; + formatter.write_char(quote) + } + + pub fn to_string(&self) -> Option { + let mut s = String::with_capacity(self.0.layout().len?); + self.write(&mut s).unwrap(); + Some(s) + } +} + +impl std::fmt::Display for BytesRepr<'_, '_> { + fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.write(formatter) + } +} diff --git a/common/src/lib.rs b/common/src/lib.rs index 250f8de217..ae6e55a793 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -7,13 +7,13 @@ pub use macros::*; pub mod atomic; pub mod borrow; pub mod boxvec; -pub mod bytes; pub mod cformat; pub mod char; pub mod cmp; #[cfg(any(unix, windows, target_os = "wasi"))] pub mod crt_fd; pub mod encodings; +pub mod escape; pub mod float_ops; pub mod format; pub mod hash; diff --git a/common/src/str.rs b/common/src/str.rs index 7e2024d0f3..3c01b755bf 100644 --- a/common/src/str.rs +++ b/common/src/str.rs @@ -3,11 +3,7 @@ use crate::{ hash::PyHash, }; use ascii::AsciiString; -use once_cell::unsync::OnceCell; -use std::{ - fmt, - ops::{Bound, RangeBounds}, -}; +use std::ops::{Bound, RangeBounds}; #[cfg(not(target_arch = "wasm32"))] #[allow(non_camel_case_types)] @@ -342,158 +338,6 @@ macro_rules! ascii { }}; } -/// Get a Display-able type that formats to the python `repr()` of the string value -#[inline] -pub fn repr(s: &str) -> Repr<'_> { - Repr { - s, - info: OnceCell::new(), - } -} - -#[derive(Debug, Copy, Clone)] -#[non_exhaustive] -pub struct ReprOverflowError; -impl fmt::Display for ReprOverflowError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str("string is too long to generate repr") - } -} - -#[derive(Copy, Clone)] -struct ReprInfo { - dquoted: bool, - out_len: usize, -} -impl ReprInfo { - fn get(s: &str) -> Result { - let mut out_len = 0usize; - let mut squote = 0; - let mut dquote = 0; - - for ch in s.chars() { - let incr = match ch { - '\'' => { - squote += 1; - 1 - } - '"' => { - dquote += 1; - 1 - } - '\\' | '\t' | '\r' | '\n' => 2, - ch if ch < ' ' || ch as u32 == 0x7f => 4, // \xHH - ch if ch.is_ascii() => 1, - ch if crate::char::is_printable(ch) => { - // max = std::cmp::max(ch, max); - ch.len_utf8() - } - ch if (ch as u32) < 0x100 => 4, // \xHH - ch if (ch as u32) < 0x10000 => 6, // \uHHHH - _ => 10, // \uHHHHHHHH - }; - out_len += incr; - if out_len > std::isize::MAX as usize { - return Err(ReprOverflowError); - } - } - - let (quote, num_escaped_quotes) = choose_quotes_for_repr(squote, dquote); - // we'll be adding backslashes in front of the existing inner quotes - out_len += num_escaped_quotes; - - // start and ending quotes - out_len += 2; - - let dquoted = quote == '"'; - - Ok(ReprInfo { dquoted, out_len }) - } -} - -pub struct Repr<'a> { - s: &'a str, - // the tuple is dquouted, out_len - info: OnceCell>, -} -impl Repr<'_> { - fn get_info(&self) -> Result { - *self.info.get_or_init(|| ReprInfo::get(self.s)) - } - - /// Same as `::to_string()`, but checks for a possible OverflowError. - pub fn to_string_checked(&self) -> Result { - let info = self.get_info()?; - let mut repr = String::with_capacity(info.out_len); - self._fmt(&mut repr, info).unwrap(); - Ok(repr) - } - - fn _fmt(&self, repr: &mut W, info: ReprInfo) -> fmt::Result { - let s = self.s; - let in_len = s.len(); - let ReprInfo { dquoted, out_len } = info; - - let quote = if dquoted { '"' } else { '\'' }; - // if we don't need to escape anything we can just copy - let unchanged = out_len == in_len; - - repr.write_char(quote)?; - if unchanged { - repr.write_str(s)?; - } else { - for ch in s.chars() { - match ch { - '\n' => repr.write_str("\\n"), - '\t' => repr.write_str("\\t"), - '\r' => repr.write_str("\\r"), - // these 2 branches *would* be handled below, but we shouldn't have to do a - // unicodedata lookup just for ascii characters - '\x20'..='\x7e' => { - // printable ascii range - if ch == quote || ch == '\\' { - repr.write_char('\\')?; - } - repr.write_char(ch) - } - ch if ch.is_ascii() => { - write!(repr, "\\x{:02x}", ch as u8) - } - ch if crate::char::is_printable(ch) => repr.write_char(ch), - '\0'..='\u{ff}' => { - write!(repr, "\\x{:02x}", ch as u32) - } - '\0'..='\u{ffff}' => { - write!(repr, "\\u{:04x}", ch as u32) - } - _ => { - write!(repr, "\\U{:08x}", ch as u32) - } - }?; - } - } - repr.write_char(quote) - } -} - -impl fmt::Display for Repr<'_> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let info = self.get_info().unwrap(); - self._fmt(f, info) - } -} - -/// returns the outer quotes to use and the number of quotes that need to be escaped -pub(crate) fn choose_quotes_for_repr(num_squotes: usize, num_dquotes: usize) -> (char, usize) { - // always use squote unless we have squotes but no dquotes - let use_dquote = num_squotes > 0 && num_dquotes == 0; - if use_dquote { - ('"', num_dquotes) - } else { - ('\'', num_squotes) - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/compiler/ast/src/constant.rs b/compiler/ast/src/constant.rs index 6099272efd..d81d7fbede 100644 --- a/compiler/ast/src/constant.rs +++ b/compiler/ast/src/constant.rs @@ -41,9 +41,13 @@ impl std::fmt::Display for Constant { match self { Constant::None => f.pad("None"), Constant::Bool(b) => f.pad(if *b { "True" } else { "False" }), - Constant::Str(s) => rustpython_common::str::repr(s).fmt(f), + Constant::Str(s) => rustpython_common::escape::UnicodeEscape::new_repr(s.as_str()) + .str_repr() + .write(f), Constant::Bytes(b) => { - f.pad(&rustpython_common::bytes::repr(b).map_err(|_err| std::fmt::Error)?) + let escape = rustpython_common::escape::AsciiEscape::new_repr(b); + let repr = escape.bytes_repr().to_string().unwrap(); + f.pad(&repr) } Constant::Int(i) => i.fmt(f), Constant::Tuple(tup) => { diff --git a/compiler/ast/src/unparse.rs b/compiler/ast/src/unparse.rs index 081c2a9241..e7e1bced18 100644 --- a/compiler/ast/src/unparse.rs +++ b/compiler/ast/src/unparse.rs @@ -511,7 +511,9 @@ impl<'a> Unparser<'a> { } else { self.p("f")?; let body = to_string_fmt(|f| Unparser::new(f).unparse_fstring_body(values, is_spec)); - fmt::Display::fmt(&rustpython_common::str::repr(&body), &mut self.f) + rustpython_common::escape::UnicodeEscape::new_repr(&body) + .str_repr() + .write(&mut self.f) } } } diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index 762573ba67..31c4f32072 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -1292,11 +1292,9 @@ mod array { if zelf.len() == 0 { return Ok(format!("{class_name}('u')")); } - return Ok(format!( - "{}('u', {})", - class_name, - crate::common::str::repr(&zelf.tounicode(vm)?) - )); + let to_unicode = zelf.tounicode(vm)?; + let escape = crate::common::escape::UnicodeEscape::new_repr(&to_unicode); + return Ok(format!("{}('u', {})", class_name, escape.str_repr())); } zelf.read().repr(&class_name, vm) } diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index f1279389e4..9ac0fb54f2 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -871,7 +871,7 @@ impl Representable for PyByteArray { fn repr_str(zelf: &Py, vm: &VirtualMachine) -> PyResult { let class = zelf.class(); let class_name = class.name(); - zelf.inner().repr(Some(&class_name), vm) + zelf.inner().repr_with_name(&class_name, vm) } } diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index b27a8a2df3..351eb7ba8a 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -672,7 +672,7 @@ impl Iterable for PyBytes { impl Representable for PyBytes { #[inline] fn repr_str(zelf: &Py, vm: &VirtualMachine) -> PyResult { - zelf.inner.repr(None, vm) + zelf.inner.repr_bytes(vm) } } diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index bdb32981de..7449aef52f 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -500,9 +500,12 @@ impl PyStr { #[inline] pub(crate) fn repr(&self, vm: &VirtualMachine) -> PyResult { - rustpython_common::str::repr(self.as_str()) - .to_string_checked() - .map_err(|err| vm.new_overflow_error(err.to_string())) + use rustpython_common::escape::UnicodeEscape; + let escape = UnicodeEscape::new_repr(self.as_str()); + escape + .str_repr() + .to_string() + .ok_or_else(|| vm.new_overflow_error("string is too long to generate repr".to_owned())) } #[pymethod] diff --git a/vm/src/bytesinner.rs b/vm/src/bytesinner.rs index 410b7d1759..2e0bf1ae2e 100644 --- a/vm/src/bytesinner.rs +++ b/vm/src/bytesinner.rs @@ -1,11 +1,12 @@ use crate::{ anystr::{self, AnyStr, AnyStrContainer, AnyStrWrapper}, builtins::{ - pystr, PyByteArray, PyBytes, PyBytesRef, PyInt, PyIntRef, PyStr, PyStrRef, PyTypeRef, + pystr, PyBaseExceptionRef, PyByteArray, PyBytes, PyBytesRef, PyInt, PyIntRef, PyStr, + PyStrRef, PyTypeRef, }, byte::bytes_from_object, cformat::cformat_bytes, - convert::ToPyException, + common::{escape::Escape, hash}, function::{ArgIterable, Either, OptionalArg, OptionalOption, PyComparisonValue}, identifier, protocol::PyBuffer, @@ -17,7 +18,6 @@ use bstr::ByteSlice; use itertools::Itertools; use num_bigint::BigInt; use num_traits::ToPrimitive; -use rustpython_common::hash; #[derive(Debug, Default, Clone)] pub struct PyBytesInner { @@ -247,13 +247,36 @@ impl PyBytesInner { &self.elements } - pub fn repr(&self, class_name: Option<&str>, vm: &VirtualMachine) -> PyResult { - let repr = if let Some(class_name) = class_name { - rustpython_common::bytes::repr_with(&self.elements, &[class_name, "("], ")") - } else { - rustpython_common::bytes::repr(&self.elements) - }; - repr.map_err(|err| err.to_pyexception(vm)) + fn new_repr_overflow_error(vm: &VirtualMachine) -> PyBaseExceptionRef { + vm.new_overflow_error("bytes object is too large to make repr".to_owned()) + } + + pub fn repr_with_name(&self, class_name: &str, vm: &VirtualMachine) -> PyResult { + let escape = rustpython_common::escape::AsciiEscape::new_repr(&self.elements); + let len = escape + .layout() + .len + .and_then(|len| (len as isize).checked_add(2 + class_name.len() as isize)) + .ok_or_else(|| Self::new_repr_overflow_error(vm))? as usize; + let mut buf = String::with_capacity(len); + buf.push_str(class_name); + buf.push('('); + escape.bytes_repr().write(&mut buf).unwrap(); + buf.push(')'); + debug_assert_eq!(buf.len(), len); + Ok(buf) + } + + pub fn repr_bytes(&self, vm: &VirtualMachine) -> PyResult { + let escape = rustpython_common::escape::AsciiEscape::new_repr(&self.elements); + let len = escape + .layout() + .len + .ok_or_else(|| Self::new_repr_overflow_error(vm))?; + let mut buf = String::with_capacity(len); + escape.bytes_repr().write(&mut buf).unwrap(); + debug_assert_eq!(buf.len(), len); + Ok(buf) } #[inline] diff --git a/vm/src/exceptions.rs b/vm/src/exceptions.rs index 9de24bb67b..912b212235 100644 --- a/vm/src/exceptions.rs +++ b/vm/src/exceptions.rs @@ -1,5 +1,5 @@ use self::types::{PyBaseException, PyBaseExceptionRef}; -use crate::common::{lock::PyRwLock, str::ReprOverflowError}; +use crate::common::lock::PyRwLock; use crate::object::{Traverse, TraverseFn}; use crate::{ builtins::{ @@ -1535,9 +1535,3 @@ pub(super) mod types { #[derive(Debug)] pub struct PyEncodingWarning {} } - -impl ToPyException for ReprOverflowError { - fn to_pyexception(&self, vm: &VirtualMachine) -> PyBaseExceptionRef { - vm.new_overflow_error(self.to_string()) - } -}