From 6070af32406a78c9b15a2a285eba6b9d34f4f128 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Thu, 7 Oct 2021 22:24:02 +0900 Subject: [PATCH 1/2] Use buffer args for _struct --- vm/src/builtins/memory.rs | 2 +- vm/src/stdlib/pystruct.rs | 82 +++++++++++++++++---------------------- 2 files changed, 36 insertions(+), 48 deletions(-) diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index d7cdb7b246..4f9b40d610 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -74,7 +74,7 @@ impl PyMemoryView { } fn parse_format(format: &str, vm: &VirtualMachine) -> PyResult { - FormatSpec::parse(format, vm) + FormatSpec::parse(format.as_bytes(), vm) } pub fn from_buffer(buffer: PyBuffer, vm: &VirtualMachine) -> PyResult { diff --git a/vm/src/stdlib/pystruct.rs b/vm/src/stdlib/pystruct.rs index 0e7348b82b..27dfd3957d 100644 --- a/vm/src/stdlib/pystruct.rs +++ b/vm/src/stdlib/pystruct.rs @@ -14,10 +14,11 @@ pub(crate) mod _struct { use crate::{ builtins::{float, PyBaseExceptionRef, PyBytesRef, PyStr, PyStrRef, PyTupleRef, PyTypeRef}, common::str::wchar_t, - function::{ArgBytesLike, ArgIntoBool, ArgMemoryBuffer, IntoPyObject, PosArgs}, + function::{ + ArgAsciiBuffer, ArgBytesLike, ArgIntoBool, ArgMemoryBuffer, IntoPyObject, PosArgs, + }, protocol::PyIterReturn, slots::{IteratorIterable, SlotConstructor, SlotIterator}, - utils::Either, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, VirtualMachine, }; use crossbeam_utils::atomic::AtomicCell; @@ -211,24 +212,8 @@ pub(crate) mod _struct { } impl FormatSpec { - fn decode_and_parse( - vm: &VirtualMachine, - fmt: &Either, - ) -> PyResult { - let decoded_fmt = match fmt { - Either::A(string) => string.as_str(), - Either::B(bytes) if bytes.is_ascii() => std::str::from_utf8(bytes).unwrap(), - _ => { - return Err(vm.new_unicode_decode_error( - "Struct format must be a ascii string".to_owned(), - )) - } - }; - FormatSpec::parse(decoded_fmt, vm) - } - - pub fn parse(fmt: &str, vm: &VirtualMachine) -> PyResult { - let mut chars = fmt.bytes().peekable(); + pub fn parse(fmt: &[u8], vm: &VirtualMachine) -> PyResult { + let mut chars = fmt.iter().copied().peekable(); // First determine "@", "<", ">","!" or "=" let endianness = parse_endianness(&mut chars); @@ -399,10 +384,10 @@ pub(crate) mod _struct { let mut repeat = 0isize; while let Some(b'0'..=b'9') = chars.peek() { if let Some(c) = chars.next() { - let current_digit = (c as char).to_digit(10).unwrap() as isize; + let current_digit = c - b'0'; repeat = repeat .checked_mul(10) - .and_then(|r| r.checked_add(current_digit)) + .and_then(|r| r.checked_add(current_digit as _)) .ok_or_else(|| OVERFLOW_MSG.to_owned())?; } } @@ -486,12 +471,18 @@ pub(crate) mod _struct { } buffer_len - (-offset as usize) } else { - if offset as usize >= buffer_len { + let offset = offset as usize; + let (op, op_action) = if is_pack { + ("pack_into", "packing") + } else { + ("unpack_from", "unpacking") + }; + if offset >= buffer_len { let msg = format!( "{op} requires a buffer of at least {required} bytes for {op_action} {needed} \ bytes at offset {offset} (actual buffer size is {buffer_len})", - op = if is_pack { "pack_into" } else { "unpack_from" }, - op_action = if is_pack { "packing" } else { "unpacking" }, + op = op, + op_action = op_action, required = needed + offset as usize, needed = needed, offset = offset, @@ -499,7 +490,7 @@ pub(crate) mod _struct { ); return Err(new_struct_error(vm, msg)); } - offset as usize + offset }; if (buffer_len - offset_from_start) < needed { @@ -717,24 +708,20 @@ pub(crate) mod _struct { } #[pyfunction] - fn pack( - fmt: Either, - args: PosArgs, - vm: &VirtualMachine, - ) -> PyResult> { - let format_spec = FormatSpec::decode_and_parse(vm, &fmt)?; + fn pack(fmt: ArgAsciiBuffer, args: PosArgs, vm: &VirtualMachine) -> PyResult> { + let format_spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; format_spec.pack(args.into_vec(), vm) } #[pyfunction] fn pack_into( - fmt: Either, + fmt: ArgAsciiBuffer, buffer: ArgMemoryBuffer, offset: isize, args: PosArgs, vm: &VirtualMachine, ) -> PyResult<()> { - let format_spec = FormatSpec::decode_and_parse(vm, &fmt)?; + let format_spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; let offset = get_buffer_offset(buffer.len(), offset, format_spec.size, true, vm)?; buffer.with_ref(|data| format_spec.pack_into(&mut data[offset..], args.into_vec(), vm)) } @@ -757,11 +744,11 @@ pub(crate) mod _struct { #[pyfunction] fn unpack( - fmt: Either, + fmt: ArgAsciiBuffer, buffer: ArgBytesLike, vm: &VirtualMachine, ) -> PyResult { - let format_spec = FormatSpec::decode_and_parse(vm, &fmt)?; + let format_spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; buffer.with_ref(|buf| format_spec.unpack(buf, vm)) } @@ -774,11 +761,11 @@ pub(crate) mod _struct { #[pyfunction] fn unpack_from( - fmt: Either, + fmt: ArgAsciiBuffer, args: UpdateFromArgs, vm: &VirtualMachine, ) -> PyResult { - let format_spec = FormatSpec::decode_and_parse(vm, &fmt)?; + let format_spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; let offset = get_buffer_offset(args.buffer.len(), args.offset, format_spec.size, false, vm)?; args.buffer @@ -849,17 +836,17 @@ pub(crate) mod _struct { #[pyfunction] fn iter_unpack( - fmt: Either, + fmt: ArgAsciiBuffer, buffer: ArgBytesLike, vm: &VirtualMachine, ) -> PyResult { - let format_spec = FormatSpec::decode_and_parse(vm, &fmt)?; + let format_spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; UnpackIterator::new(vm, format_spec, buffer) } #[pyfunction] - fn calcsize(fmt: Either, vm: &VirtualMachine) -> PyResult { - let format_spec = FormatSpec::decode_and_parse(vm, &fmt)?; + fn calcsize(fmt: ArgAsciiBuffer, vm: &VirtualMachine) -> PyResult { + let format_spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; Ok(format_spec.size) } @@ -872,14 +859,15 @@ pub(crate) mod _struct { } impl SlotConstructor for PyStruct { - type Args = Either; + type Args = ArgAsciiBuffer; fn py_new(cls: PyTypeRef, fmt: Self::Args, vm: &VirtualMachine) -> PyResult { - let spec = FormatSpec::decode_and_parse(vm, &fmt)?; + let spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; let fmt_str = match fmt { - Either::A(s) => s, - Either::B(b) => PyStr::from(std::str::from_utf8(b.as_bytes()).unwrap()) - .into_ref_with_type(vm, vm.ctx.types.str_type.clone())?, + ArgAsciiBuffer::String(s) => s, + buffer => buffer + .with_ref(|bytes| PyStr::from(std::str::from_utf8(bytes).unwrap())) + .into_ref(vm), }; PyStruct { spec, fmt_str }.into_pyresult_with_type(vm, cls) } From fbbefed81379f7fe6ba3e869e53d941e7f80ae00 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sun, 10 Oct 2021 20:09:26 +0900 Subject: [PATCH 2/2] pystruct::IntoStructFormatBytes to take str or bytes for format --- vm/src/builtins/pystr.rs | 2 +- vm/src/stdlib/pystruct.rs | 86 +++++++++++++++++++++++++-------------- 2 files changed, 57 insertions(+), 31 deletions(-) diff --git a/vm/src/builtins/pystr.rs b/vm/src/builtins/pystr.rs index acf788111f..92c9d29d1c 100644 --- a/vm/src/builtins/pystr.rs +++ b/vm/src/builtins/pystr.rs @@ -283,7 +283,7 @@ impl PyStr { } /// SAFETY: Given 'bytes' must be ascii - unsafe fn new_ascii_unchecked(bytes: Vec) -> Self { + pub(crate) unsafe fn new_ascii_unchecked(bytes: Vec) -> Self { Self::new_str_unchecked(bytes, PyStrKind::Ascii) } diff --git a/vm/src/stdlib/pystruct.rs b/vm/src/stdlib/pystruct.rs index 27dfd3957d..b16df69e63 100644 --- a/vm/src/stdlib/pystruct.rs +++ b/vm/src/stdlib/pystruct.rs @@ -12,14 +12,14 @@ #[pymodule] pub(crate) mod _struct { use crate::{ - builtins::{float, PyBaseExceptionRef, PyBytesRef, PyStr, PyStrRef, PyTupleRef, PyTypeRef}, - common::str::wchar_t, - function::{ - ArgAsciiBuffer, ArgBytesLike, ArgIntoBool, ArgMemoryBuffer, IntoPyObject, PosArgs, + builtins::{ + float, PyBaseExceptionRef, PyBytes, PyBytesRef, PyStr, PyStrRef, PyTupleRef, PyTypeRef, }, + common::str::wchar_t, + function::{ArgBytesLike, ArgIntoBool, ArgMemoryBuffer, IntoPyObject, PosArgs}, protocol::PyIterReturn, slots::{IteratorIterable, SlotConstructor, SlotIterator}, - PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, VirtualMachine, + PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol, VirtualMachine, }; use crossbeam_utils::atomic::AtomicCell; use half::f16; @@ -203,6 +203,39 @@ pub(crate) mod _struct { const OVERFLOW_MSG: &str = "total struct size too long"; + struct IntoStructFormatBytes(PyStrRef); + + impl TryFromObject for IntoStructFormatBytes { + fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { + // CPython turns str to bytes but we do reversed way here + // The only performance difference is this transition cost + let fmt = match_class! { + match obj { + s @ PyStr => if s.is_ascii() { + Some(s) + } else { + None + }, + b @ PyBytes => if b.is_ascii() { + Some(unsafe { + PyStr::new_ascii_unchecked(b.as_bytes().to_vec()) + }.into_ref(vm)) + } else { + None + }, + other => return Err(vm.new_type_error(format!("Struct() argument 1 must be a str or bytes object, not {}", other.class().name()))), + } + }.ok_or_else(|| vm.new_unicode_decode_error("Struct format must be a ascii string".to_owned()))?; + Ok(IntoStructFormatBytes(fmt)) + } + } + + impl IntoStructFormatBytes { + fn format_spec(&self, vm: &VirtualMachine) -> PyResult { + FormatSpec::parse(self.0.as_str().as_bytes(), vm) + } + } + #[derive(Debug, Clone)] pub(crate) struct FormatSpec { endianness: Endianness, @@ -708,20 +741,19 @@ pub(crate) mod _struct { } #[pyfunction] - fn pack(fmt: ArgAsciiBuffer, args: PosArgs, vm: &VirtualMachine) -> PyResult> { - let format_spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; - format_spec.pack(args.into_vec(), vm) + fn pack(fmt: IntoStructFormatBytes, args: PosArgs, vm: &VirtualMachine) -> PyResult> { + fmt.format_spec(vm)?.pack(args.into_vec(), vm) } #[pyfunction] fn pack_into( - fmt: ArgAsciiBuffer, + fmt: IntoStructFormatBytes, buffer: ArgMemoryBuffer, offset: isize, args: PosArgs, vm: &VirtualMachine, ) -> PyResult<()> { - let format_spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; + let format_spec = fmt.format_spec(vm)?; let offset = get_buffer_offset(buffer.len(), offset, format_spec.size, true, vm)?; buffer.with_ref(|data| format_spec.pack_into(&mut data[offset..], args.into_vec(), vm)) } @@ -744,11 +776,11 @@ pub(crate) mod _struct { #[pyfunction] fn unpack( - fmt: ArgAsciiBuffer, + fmt: IntoStructFormatBytes, buffer: ArgBytesLike, vm: &VirtualMachine, ) -> PyResult { - let format_spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; + let format_spec = fmt.format_spec(vm)?; buffer.with_ref(|buf| format_spec.unpack(buf, vm)) } @@ -761,11 +793,11 @@ pub(crate) mod _struct { #[pyfunction] fn unpack_from( - fmt: ArgAsciiBuffer, + fmt: IntoStructFormatBytes, args: UpdateFromArgs, vm: &VirtualMachine, ) -> PyResult { - let format_spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; + let format_spec = fmt.format_spec(vm)?; let offset = get_buffer_offset(args.buffer.len(), args.offset, format_spec.size, false, vm)?; args.buffer @@ -836,18 +868,17 @@ pub(crate) mod _struct { #[pyfunction] fn iter_unpack( - fmt: ArgAsciiBuffer, + fmt: IntoStructFormatBytes, buffer: ArgBytesLike, vm: &VirtualMachine, ) -> PyResult { - let format_spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; + let format_spec = fmt.format_spec(vm)?; UnpackIterator::new(vm, format_spec, buffer) } #[pyfunction] - fn calcsize(fmt: ArgAsciiBuffer, vm: &VirtualMachine) -> PyResult { - let format_spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; - Ok(format_spec.size) + fn calcsize(fmt: IntoStructFormatBytes, vm: &VirtualMachine) -> PyResult { + Ok(fmt.format_spec(vm)?.size) } #[pyattr] @@ -855,21 +886,16 @@ pub(crate) mod _struct { #[derive(Debug, PyValue)] struct PyStruct { spec: FormatSpec, - fmt_str: PyStrRef, + format: PyStrRef, } impl SlotConstructor for PyStruct { - type Args = ArgAsciiBuffer; + type Args = IntoStructFormatBytes; fn py_new(cls: PyTypeRef, fmt: Self::Args, vm: &VirtualMachine) -> PyResult { - let spec = fmt.with_ref(|bytes| FormatSpec::parse(bytes, vm))?; - let fmt_str = match fmt { - ArgAsciiBuffer::String(s) => s, - buffer => buffer - .with_ref(|bytes| PyStr::from(std::str::from_utf8(bytes).unwrap())) - .into_ref(vm), - }; - PyStruct { spec, fmt_str }.into_pyresult_with_type(vm, cls) + let spec = fmt.format_spec(vm)?; + let format = fmt.0; + PyStruct { spec, format }.into_pyresult_with_type(vm, cls) } } @@ -877,7 +903,7 @@ pub(crate) mod _struct { impl PyStruct { #[pyproperty] fn format(&self) -> PyStrRef { - self.fmt_str.clone() + self.format.clone() } #[pyproperty]