From 0dcb4d4b2e1c8b024fa806250b2647251902817a Mon Sep 17 00:00:00 2001 From: Noah <33094578+coolreader18@users.noreply.github.com> Date: Thu, 20 Feb 2020 19:44:45 -0600 Subject: [PATCH] Reorganize struct module, add struct.Struct --- Lib/struct.py | 15 +++ vm/src/function.rs | 6 ++ vm/src/stdlib/mod.rs | 2 +- vm/src/stdlib/pystruct.rs | 215 ++++++++++++++++++++++---------------- 4 files changed, 148 insertions(+), 90 deletions(-) create mode 100644 Lib/struct.py diff --git a/Lib/struct.py b/Lib/struct.py new file mode 100644 index 0000000000..d6bba58863 --- /dev/null +++ b/Lib/struct.py @@ -0,0 +1,15 @@ +__all__ = [ + # Functions + 'calcsize', 'pack', 'pack_into', 'unpack', 'unpack_from', + 'iter_unpack', + + # Classes + 'Struct', + + # Exceptions + 'error' + ] + +from _struct import * +from _struct import _clearcache +from _struct import __doc__ diff --git a/vm/src/function.rs b/vm/src/function.rs index f2f574fc6f..b993e8ee31 100644 --- a/vm/src/function.rs +++ b/vm/src/function.rs @@ -294,6 +294,12 @@ impl Args { } } +impl AsRef<[T]> for Args { + fn as_ref(&self) -> &[T] { + &self.0 + } +} + impl Args> { pub fn into_tuple(self, vm: &VirtualMachine) -> PyObjectRef { vm.ctx diff --git a/vm/src/stdlib/mod.rs b/vm/src/stdlib/mod.rs index 4289bcc9c9..c709de3e0c 100644 --- a/vm/src/stdlib/mod.rs +++ b/vm/src/stdlib/mod.rs @@ -80,7 +80,7 @@ pub fn get_module_inits() -> HashMap { "regex_crate".to_owned() => Box::new(re::make_module), "_random".to_owned() => Box::new(random::make_module), "_string".to_owned() => Box::new(string::make_module), - "struct".to_owned() => Box::new(pystruct::make_module), + "_struct".to_owned() => Box::new(pystruct::make_module), "_thread".to_owned() => Box::new(thread::make_module), "time".to_owned() => Box::new(time_module::make_module), "_weakref".to_owned() => Box::new(weakref::make_module), diff --git a/vm/src/stdlib/pystruct.rs b/vm/src/stdlib/pystruct.rs index 2c61784da6..abba7398ba 100644 --- a/vm/src/stdlib/pystruct.rs +++ b/vm/src/stdlib/pystruct.rs @@ -14,21 +14,13 @@ use std::iter::Peekable; use byteorder::{ReadBytesExt, WriteBytesExt}; -use crate::function::PyFuncArgs; +use crate::function::Args; use crate::obj::{ - objbytes::PyBytesRef, - objstr::{self, PyStringRef}, - objtype, + objbytes::PyBytesRef, objstr::PyStringRef, objtuple::PyTuple, objtype::PyClassRef, }; -use crate::pyobject::{PyObjectRef, PyResult, TryFromObject}; +use crate::pyobject::{PyClassImpl, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject}; use crate::VirtualMachine; -#[derive(Debug)] -struct FormatSpec { - endianness: Endianness, - codes: Vec, -} - #[derive(Debug)] enum Endianness { Native, @@ -56,16 +48,78 @@ impl FormatCode { } } -fn parse_format_string(fmt: String) -> Result { - let mut chars = fmt.chars().peekable(); +#[derive(Debug)] +struct FormatSpec { + endianness: Endianness, + codes: Vec, +} - // First determine "<", ">","!" or "=" - let endianness = parse_endiannes(&mut chars); +impl FormatSpec { + fn parse(fmt: &str) -> Result { + let mut chars = fmt.chars().peekable(); - // Now, analyze struct string furter: - let codes = parse_format_codes(&mut chars)?; + // First determine "<", ">","!" or "=" + let endianness = parse_endiannes(&mut chars); - Ok(FormatSpec { endianness, codes }) + // Now, analyze struct string furter: + let codes = parse_format_codes(&mut chars)?; + + Ok(FormatSpec { endianness, codes }) + } + + fn pack(&self, args: &[PyObjectRef], vm: &VirtualMachine) -> PyResult> { + if self.codes.len() != args.len() { + return Err(vm.new_exception_msg( + vm.try_class("_struct", "error")?, + format!( + "pack expected {} items for packing (got {})", + self.codes.len(), + args.len() + ), + )); + } + + // Create data vector: + let mut data = Vec::::new(); + // Loop over all opcodes: + for (code, arg) in self.codes.iter().zip(args.iter()) { + debug!("code: {:?}", code); + match self.endianness { + Endianness::Little => { + pack_item::(vm, code, arg, &mut data)? + } + Endianness::Big => pack_item::(vm, code, arg, &mut data)?, + Endianness::Network => { + pack_item::(vm, code, arg, &mut data)? + } + Endianness::Native => { + pack_item::(vm, code, arg, &mut data)? + } + } + } + + Ok(data) + } + + fn unpack(&self, data: &[u8], vm: &VirtualMachine) -> PyResult { + let mut rdr = Cursor::new(data); + + let mut items = vec![]; + for code in &self.codes { + debug!("unpack code: {:?}", code); + let item = match self.endianness { + Endianness::Little => unpack_code::(vm, &code, &mut rdr)?, + Endianness::Big => unpack_code::(vm, &code, &mut rdr)?, + Endianness::Network => { + unpack_code::(vm, &code, &mut rdr)? + } + Endianness::Native => unpack_code::(vm, &code, &mut rdr)?, + }; + items.push(item); + } + + Ok(PyTuple::from(items)) + } } /// Parse endianness @@ -222,53 +276,9 @@ where Ok(()) } -fn struct_pack(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { - if args.args.is_empty() { - Err(vm.new_type_error(format!( - "Expected at least 1 argument (got: {})", - args.args.len() - ))) - } else { - let fmt_arg = args.args[0].clone(); - if objtype::isinstance(&fmt_arg, &vm.ctx.str_type()) { - let fmt_str = objstr::clone_value(&fmt_arg); - - let format_spec = parse_format_string(fmt_str).map_err(|e| vm.new_value_error(e))?; - - if format_spec.codes.len() + 1 == args.args.len() { - // Create data vector: - let mut data = Vec::::new(); - // Loop over all opcodes: - for (code, arg) in format_spec.codes.iter().zip(args.args.iter().skip(1)) { - debug!("code: {:?}", code); - match format_spec.endianness { - Endianness::Little => { - pack_item::(vm, code, arg, &mut data)? - } - Endianness::Big => { - pack_item::(vm, code, arg, &mut data)? - } - Endianness::Network => { - pack_item::(vm, code, arg, &mut data)? - } - Endianness::Native => { - pack_item::(vm, code, arg, &mut data)? - } - } - } - - Ok(vm.ctx.new_bytes(data)) - } else { - Err(vm.new_type_error(format!( - "Expected {} arguments (got: {})", - format_spec.codes.len() + 1, - args.args.len() - ))) - } - } else { - Err(vm.new_type_error("First argument must be of str type".to_owned())) - } - } +fn struct_pack(fmt: PyStringRef, args: Args, vm: &VirtualMachine) -> PyResult> { + let format_spec = FormatSpec::parse(fmt.as_str()).map_err(|e| vm.new_value_error(e))?; + format_spec.pack(args.as_ref(), vm) } fn unpack_i8(vm: &VirtualMachine, rdr: &mut dyn Read) -> PyResult { @@ -372,26 +382,10 @@ where } } -fn struct_unpack(fmt: PyStringRef, buffer: PyBytesRef, vm: &VirtualMachine) -> PyResult { - let fmt_str = fmt.as_str().to_owned(); - - let format_spec = parse_format_string(fmt_str).map_err(|e| vm.new_value_error(e))?; - let data = buffer.get_value().to_vec(); - let mut rdr = Cursor::new(data); - - let mut items = vec![]; - for code in format_spec.codes { - debug!("unpack code: {:?}", code); - let item = match format_spec.endianness { - Endianness::Little => unpack_code::(vm, &code, &mut rdr)?, - Endianness::Big => unpack_code::(vm, &code, &mut rdr)?, - Endianness::Network => unpack_code::(vm, &code, &mut rdr)?, - Endianness::Native => unpack_code::(vm, &code, &mut rdr)?, - }; - items.push(item); - } - - Ok(vm.ctx.new_tuple(items)) +fn struct_unpack(fmt: PyStringRef, buffer: PyBytesRef, vm: &VirtualMachine) -> PyResult { + let fmt_str = fmt.as_str(); + let format_spec = FormatSpec::parse(fmt_str).map_err(|e| vm.new_value_error(e))?; + format_spec.unpack(buffer.get_value(), vm) } fn unpack_code(vm: &VirtualMachine, code: &FormatCode, rdr: &mut dyn Read) -> PyResult @@ -417,20 +411,63 @@ where } fn struct_calcsize(fmt: PyStringRef, vm: &VirtualMachine) -> PyResult { - let fmt_str = fmt.as_str().to_owned(); - let format_spec = parse_format_string(fmt_str).map_err(|e| vm.new_value_error(e))?; + let fmt_str = fmt.as_str(); + let format_spec = FormatSpec::parse(fmt_str).map_err(|e| vm.new_value_error(e))?; Ok(format_spec.codes.iter().map(|code| code.size()).sum()) } +#[pyclass(name = "Struct")] +#[derive(Debug)] +struct PyStruct { + spec: FormatSpec, + fmt_str: PyStringRef, +} + +impl PyValue for PyStruct { + fn class(vm: &VirtualMachine) -> PyClassRef { + vm.class("_struct", "Struct") + } +} + +#[pyimpl] +impl PyStruct { + #[pyslot] + fn tp_new(cls: PyClassRef, fmt_str: PyStringRef, vm: &VirtualMachine) -> PyResult> { + let spec = FormatSpec::parse(fmt_str.as_str()).map_err(|e| vm.new_value_error(e))?; + + PyStruct { spec, fmt_str }.into_ref_with_type(vm, cls) + } + + #[pyproperty] + fn format(&self) -> PyStringRef { + self.fmt_str.clone() + } + + #[pymethod] + fn pack(&self, args: Args, vm: &VirtualMachine) -> PyResult> { + self.spec.pack(args.as_ref(), vm) + } + #[pymethod] + fn unpack(&self, data: PyBytesRef, vm: &VirtualMachine) -> PyResult { + self.spec.unpack(data.get_value(), vm) + } +} + +// seems weird that this is part of the "public" API, but whatever +// TODO: implement a format code->spec cache like CPython does? +fn clearcache() {} + pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { let ctx = &vm.ctx; - let struct_error = ctx.new_class("struct.error", ctx.object()); + let struct_error = ctx.new_class("struct.error", ctx.exceptions.exception_type.clone()); - py_module!(vm, "struct", { + py_module!(vm, "_struct", { + "_clearcache" => ctx.new_function(clearcache), "pack" => ctx.new_function(struct_pack), "unpack" => ctx.new_function(struct_unpack), "calcsize" => ctx.new_function(struct_calcsize), "error" => struct_error, + "Struct" => PyStruct::make_class(ctx), }) }