From 456bc80697713158a5e5e2c379ef0a86df10c147 Mon Sep 17 00:00:00 2001 From: Noa Date: Fri, 13 Jan 2023 22:25:43 -0600 Subject: [PATCH] Rework frozen modules and directly deserialize to CodeObject --- compiler/core/src/bytecode.rs | 167 ++++++++++++++++++---------- compiler/core/src/marshal.rs | 33 ++++-- derive-impl/src/compile_bytecode.rs | 35 +++--- jit/tests/common.rs | 1 + pylib/src/lib.rs | 5 +- src/interpreter.rs | 2 +- vm/src/builtins/code.rs | 23 +++- vm/src/frozen.rs | 21 ++-- vm/src/import.rs | 2 +- vm/src/stdlib/imp.rs | 17 +-- vm/src/vm/mod.rs | 6 +- wasm/lib/src/vm_class.rs | 2 +- 12 files changed, 198 insertions(+), 116 deletions(-) diff --git a/compiler/core/src/bytecode.rs b/compiler/core/src/bytecode.rs index 3b3203186b..c01c4b80f0 100644 --- a/compiler/core/src/bytecode.rs +++ b/compiler/core/src/bytecode.rs @@ -1,7 +1,6 @@ //! Implement python as a virtual machine with bytecodes. This module //! implements bytecode structure. -use crate::marshal::MarshalError; use crate::{marshal, Location}; use bitflags::bitflags; use itertools::Itertools; @@ -46,6 +45,19 @@ pub trait ConstantBag: Sized + Copy { fn make_name(&self, name: &str) -> ::Name; } +pub trait AsBag { + type Bag: ConstantBag; + #[allow(clippy::wrong_self_convention)] + fn as_bag(self) -> Self::Bag; +} + +impl AsBag for Bag { + type Bag = Self; + fn as_bag(self) -> Self { + self + } +} + #[derive(Clone, Copy)] pub struct BasicBag; @@ -1077,27 +1089,6 @@ impl CodeObject { } } -impl CodeObject { - /// Load a code object from bytes - pub fn from_bytes(data: &[u8]) -> Result { - use lz4_flex::block::DecompressError; - let raw_bincode = lz4_flex::decompress_size_prepended(data).map_err(|e| match e { - DecompressError::OutputTooSmall { .. } | DecompressError::ExpectedAnotherByte => { - MarshalError::Eof - } - _ => MarshalError::InvalidBytecode, - })?; - marshal::deserialize_code(&mut &raw_bincode[..], BasicBag) - } - - /// Serialize this bytecode to bytes. - pub fn to_bytes(&self) -> Vec { - let mut data = Vec::new(); - marshal::serialize_code(&mut data, self); - lz4_flex::compress_prepend_size(&data) - } -} - impl fmt::Display for CodeObject { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.display_inner(f, false, 1)?; @@ -1483,32 +1474,81 @@ impl fmt::Debug for CodeObject { } } -/// A frozen module. Holds a code object and whether it is part of a package -#[derive(Debug)] -pub struct FrozenModule { - pub code: CodeObject, - pub package: bool, -} - pub mod frozen_lib { use super::*; - use marshal::{Read, Write}; + use marshal::{Read, ReadBorrowed, Write}; - /// Decode a library to a iterable of frozen modules - pub fn decode_lib(bytes: &[u8]) -> FrozenModulesIter { - let data = lz4_flex::decompress_size_prepended(bytes).unwrap(); - let mut data = marshal::Cursor { data, position: 0 }; - let remaining = data.read_u32().unwrap(); - FrozenModulesIter { remaining, data } + /// A frozen module. Holds a frozen code object and whether it is part of a package + #[derive(Copy, Clone)] + pub struct FrozenModule { + pub code: FrozenCodeObject, + pub package: bool, } - pub struct FrozenModulesIter { + #[derive(Copy, Clone)] + pub struct FrozenCodeObject { + pub bytes: B, + } + + impl> FrozenCodeObject { + /// Decode a frozen codeobject + #[inline] + pub fn decode( + &self, + bag: Bag, + ) -> CodeObject<::Constant> { + Self::_decode(self.bytes.as_ref(), bag.as_bag()) + } + fn _decode(data: &[u8], bag: Bag) -> CodeObject { + let decompressed = lz4_flex::decompress_size_prepended(data) + .expect("deserialize frozen CodeObject failed"); + marshal::deserialize_code(&mut &decompressed[..], bag) + .expect("deserializing frozen CodeObject failed") + } + } + + impl FrozenCodeObject> { + pub fn encode(code: &CodeObject) -> Self { + let mut data = Vec::new(); + marshal::serialize_code(&mut data, code); + let bytes = lz4_flex::compress_prepend_size(&data); + FrozenCodeObject { bytes } + } + } + + #[repr(transparent)] + pub struct FrozenLib { + pub bytes: B, + } + + impl + ?Sized> FrozenLib { + pub const fn from_ref(b: &B) -> &FrozenLib { + unsafe { &*(b as *const B as *const FrozenLib) } + } + + /// Decode a library to a iterable of frozen modules + pub fn decode(&self) -> FrozenModulesIter<'_> { + let mut data = self.bytes.as_ref(); + let remaining = data.read_u32().unwrap(); + FrozenModulesIter { remaining, data } + } + } + + impl<'a, B: AsRef<[u8]> + ?Sized> IntoIterator for &'a FrozenLib { + type Item = (&'a str, FrozenModule<&'a [u8]>); + type IntoIter = FrozenModulesIter<'a>; + fn into_iter(self) -> Self::IntoIter { + self.decode() + } + } + + pub struct FrozenModulesIter<'a> { remaining: u32, - data: marshal::Cursor>, + data: &'a [u8], } - impl Iterator for FrozenModulesIter { - type Item = (String, FrozenModule); + impl<'a> Iterator for FrozenModulesIter<'a> { + type Item = (&'a str, FrozenModule<&'a [u8]>); fn next(&mut self) -> Option { if self.remaining > 0 { @@ -1524,31 +1564,37 @@ pub mod frozen_lib { (self.remaining as usize, Some(self.remaining as usize)) } } - impl ExactSizeIterator for FrozenModulesIter {} + impl ExactSizeIterator for FrozenModulesIter<'_> {} - fn read_entry(rdr: &mut impl Read) -> Result<(String, FrozenModule), marshal::MarshalError> { + fn read_entry<'a>( + rdr: &mut &'a [u8], + ) -> Result<(&'a str, FrozenModule<&'a [u8]>), marshal::MarshalError> { let len = rdr.read_u32()?; - let name = rdr.read_str(len)?.to_owned(); - let code = marshal::deserialize_code(rdr, BasicBag)?; + let name = rdr.read_str_borrow(len)?; + let len = rdr.read_u32()?; + let code_slice = rdr.read_slice_borrow(len)?; + let code = FrozenCodeObject { bytes: code_slice }; let package = rdr.read_u8()? != 0; Ok((name, FrozenModule { code, package })) } - /// Encode the given iterator of frozen modules into a compressed vector of bytes - pub fn encode_lib<'a, I>(lib: I) -> Vec - where - I: IntoIterator, - I::IntoIter: ExactSizeIterator + Clone, - { - let iter = lib.into_iter(); - let mut data = Vec::new(); - write_lib(&mut data, iter); - lz4_flex::compress_prepend_size(&data) + impl FrozenLib> { + /// Encode the given iterator of frozen modules into a compressed vector of bytes + pub fn encode<'a, I, B: AsRef<[u8]>>(lib: I) -> FrozenLib> + where + I: IntoIterator)>, + I::IntoIter: ExactSizeIterator + Clone, + { + let iter = lib.into_iter(); + let mut bytes = Vec::new(); + write_lib(&mut bytes, iter); + Self { bytes } + } } - fn write_lib<'a>( - buf: &mut impl Write, - lib: impl ExactSizeIterator, + fn write_lib<'a, B: AsRef<[u8]>>( + buf: &mut Vec, + lib: impl ExactSizeIterator)>, ) { marshal::write_len(buf, lib.len()); for (name, module) in lib { @@ -1556,10 +1602,9 @@ pub mod frozen_lib { } } - fn write_entry(buf: &mut impl Write, name: &str, module: &FrozenModule) { - marshal::write_len(buf, name.len()); - buf.write_slice(name.as_bytes()); - marshal::serialize_code(buf, &module.code); + fn write_entry(buf: &mut Vec, name: &str, module: FrozenModule>) { + marshal::write_vec(buf, name.as_bytes()); + marshal::write_vec(buf, module.code.bytes.as_ref()); buf.write_u8(module.package as u8); } } diff --git a/compiler/core/src/marshal.rs b/compiler/core/src/marshal.rs index 69d887934c..e9f962fd14 100644 --- a/compiler/core/src/marshal.rs +++ b/compiler/core/src/marshal.rs @@ -130,8 +130,21 @@ pub trait Read { } } +pub(crate) trait ReadBorrowed<'a>: Read { + fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]>; + fn read_str_borrow(&mut self, len: u32) -> Result<&'a str> { + Ok(std::str::from_utf8(self.read_slice_borrow(len)?)?) + } +} + impl Read for &[u8] { fn read_slice(&mut self, n: u32) -> Result<&[u8]> { + self.read_slice_borrow(n) + } +} + +impl<'a> ReadBorrowed<'a> for &'a [u8] { + fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]> { let data = self.get(..n as usize).ok_or(MarshalError::Eof)?; *self = &self[n as usize..]; Ok(data) @@ -474,6 +487,11 @@ pub(crate) fn write_len(buf: &mut W, len: usize) { buf.write_u32(len); } +pub(crate) fn write_vec(buf: &mut W, slice: &[u8]) { + write_len(buf, slice.len()); + buf.write_slice(slice); +} + pub fn serialize_value( buf: &mut W, constant: DumpableValue<'_, D>, @@ -501,13 +519,11 @@ pub fn serialize_value( } DumpableValue::Str(s) => { buf.write_u8(Type::Unicode as u8); - write_len(buf, s.len()); - buf.write_slice(s.as_bytes()); + write_vec(buf, s.as_bytes()); } DumpableValue::Bytes(b) => { buf.write_u8(Type::Bytes as u8); - write_len(buf, b.len()); - buf.write_slice(b); + write_vec(buf, b); } DumpableValue::Code(c) => { buf.write_u8(Type::Code as u8); @@ -580,14 +596,12 @@ pub fn serialize_code(buf: &mut W, code: &CodeObject) buf.write_u32(code.arg_count); buf.write_u32(code.kwonlyarg_count); - write_len(buf, code.source_path.as_ref().len()); - buf.write_slice(code.source_path.as_ref().as_bytes()); + write_vec(buf, code.source_path.as_ref().as_bytes()); buf.write_u32(code.first_line_number); buf.write_u32(code.max_stackdepth); - write_len(buf, code.obj_name.as_ref().len()); - buf.write_slice(code.obj_name.as_ref().as_bytes()); + write_vec(buf, code.obj_name.as_ref().as_bytes()); let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]); write_len(buf, cell2arg.len()); @@ -603,8 +617,7 @@ pub fn serialize_code(buf: &mut W, code: &CodeObject) let mut write_names = |names: &[C::Name]| { write_len(buf, names.len()); for name in names { - write_len(buf, name.as_ref().len()); - buf.write_slice(name.as_ref().as_bytes()); + write_vec(buf, name.as_ref().as_bytes()); } }; diff --git a/derive-impl/src/compile_bytecode.rs b/derive-impl/src/compile_bytecode.rs index e72ad2bf9d..30962326ed 100644 --- a/derive-impl/src/compile_bytecode.rs +++ b/derive-impl/src/compile_bytecode.rs @@ -17,7 +17,7 @@ use crate::{extract_spans, Diagnostic}; use once_cell::sync::Lazy; use proc_macro2::{Span, TokenStream}; use quote::quote; -use rustpython_compiler_core::{CodeObject, FrozenModule, Mode}; +use rustpython_compiler_core::{frozen_lib, CodeObject, Mode}; use std::{ collections::HashMap, env, fs, @@ -44,6 +44,11 @@ enum CompilationSourceKind { Dir(PathBuf), } +struct CompiledModule { + code: CodeObject, + package: bool, +} + struct CompilationSource { kind: CompilationSourceKind, span: (Span, Span), @@ -80,7 +85,7 @@ impl CompilationSource { mode: Mode, module_name: String, compiler: &dyn Compiler, - ) -> Result, Diagnostic> { + ) -> Result, Diagnostic> { match &self.kind { CompilationSourceKind::Dir(rel_path) => self.compile_dir( &CARGO_MANIFEST_DIR.join(rel_path), @@ -89,7 +94,7 @@ impl CompilationSource { compiler, ), _ => Ok(hashmap! { - module_name.clone() => FrozenModule { + module_name.clone() => CompiledModule { code: self.compile_single(mode, module_name, compiler)?, package: false, }, @@ -131,7 +136,7 @@ impl CompilationSource { parent: String, mode: Mode, compiler: &dyn Compiler, - ) -> Result, Diagnostic> { + ) -> Result, Diagnostic> { let mut code_map = HashMap::new(); let paths = fs::read_dir(path) .or_else(|e| { @@ -217,7 +222,7 @@ impl CompilationSource { code_map.insert( module_name, - FrozenModule { + CompiledModule { code, package: is_init, }, @@ -369,12 +374,11 @@ pub fn impl_py_compile( .source .compile_single(args.mode, args.module_name, compiler)?; - let bytes = code.to_bytes(); - let bytes = LitByteStr::new(&bytes, Span::call_site()); + let frozen = frozen_lib::FrozenCodeObject::encode(&code); + let bytes = LitByteStr::new(&frozen.bytes, Span::call_site()); let output = quote! { - #crate_name::CodeObject::from_bytes(#bytes) - .expect("Deserializing CodeObject failed") + #crate_name::frozen_lib::FrozenCodeObject { bytes: &#bytes[..] } }; Ok(output) @@ -390,12 +394,17 @@ pub fn impl_py_freeze( let crate_name = args.crate_name; let code_map = args.source.compile(args.mode, args.module_name, compiler)?; - let data = - rustpython_compiler_core::frozen_lib::encode_lib(code_map.iter().map(|(k, v)| (&**k, v))); - let bytes = LitByteStr::new(&data, Span::call_site()); + let data = frozen_lib::FrozenLib::encode(code_map.iter().map(|(k, v)| { + let v = frozen_lib::FrozenModule { + code: frozen_lib::FrozenCodeObject::encode(&v.code), + package: v.package, + }; + (&**k, v) + })); + let bytes = LitByteStr::new(&data.bytes, Span::call_site()); let output = quote! { - #crate_name::frozen_lib::decode_lib(#bytes) + #crate_name::frozen_lib::FrozenLib::from_ref(#bytes) }; Ok(output) diff --git a/jit/tests/common.rs b/jit/tests/common.rs index f2f8eabc31..6b96c14185 100644 --- a/jit/tests/common.rs +++ b/jit/tests/common.rs @@ -165,6 +165,7 @@ macro_rules! jit_function { crate_name = "rustpython_compiler_core", source = $($t)* ); + let code = code.decode(rustpython_compiler_core::BasicBag); let mut machine = $crate::common::StackMachine::new(); machine.run(code); machine.get_function(stringify!($func_name)).compile() diff --git a/pylib/src/lib.rs b/pylib/src/lib.rs index 7f52aca999..7ae9a4b23f 100644 --- a/pylib/src/lib.rs +++ b/pylib/src/lib.rs @@ -10,6 +10,5 @@ pub const LIB_PATH: &str = match option_env!("win_lib_path") { }; #[cfg(feature = "freeze-stdlib")] -pub fn frozen_stdlib() -> impl Iterator { - rustpython_derive::py_freeze!(dir = "./Lib", crate_name = "rustpython_compiler_core") -} +pub const FROZEN_STDLIB: &rustpython_compiler_core::frozen_lib::FrozenLib = + rustpython_derive::py_freeze!(dir = "./Lib", crate_name = "rustpython_compiler_core"); diff --git a/src/interpreter.rs b/src/interpreter.rs index 33eeb80884..89b0bc6e00 100644 --- a/src/interpreter.rs +++ b/src/interpreter.rs @@ -41,7 +41,7 @@ pub fn init_stdlib(vm: &mut VirtualMachine) { // if we're on freeze-stdlib, the core stdlib modules will be included anyway #[cfg(feature = "freeze-stdlib")] - vm.add_frozen(rustpython_pylib::frozen_stdlib()); + vm.add_frozen(rustpython_pylib::FROZEN_STDLIB); #[cfg(not(feature = "freeze-stdlib"))] { diff --git a/vm/src/builtins/code.rs b/vm/src/builtins/code.rs index fc987ea001..e0296fa31e 100644 --- a/vm/src/builtins/code.rs +++ b/vm/src/builtins/code.rs @@ -5,7 +5,7 @@ use super::{PyStrRef, PyTupleRef, PyType, PyTypeRef}; use crate::{ builtins::PyStrInterned, - bytecode::{self, BorrowedConstant, CodeFlags, Constant, ConstantBag}, + bytecode::{self, AsBag, BorrowedConstant, CodeFlags, Constant, ConstantBag}, class::{PyClassImpl, StaticType}, convert::ToPyObject, function::{FuncArgs, OptionalArg}, @@ -97,8 +97,21 @@ impl Constant for Literal { } } +impl<'a> AsBag for &'a Context { + type Bag = PyObjBag<'a>; + fn as_bag(self) -> PyObjBag<'a> { + PyObjBag(self) + } +} +impl<'a> AsBag for &'a VirtualMachine { + type Bag = PyObjBag<'a>; + fn as_bag(self) -> PyObjBag<'a> { + PyObjBag(&self.ctx) + } +} + #[derive(Clone, Copy)] -pub(crate) struct PyObjBag<'a>(pub &'a Context); +pub struct PyObjBag<'a>(pub &'a Context); impl ConstantBag for PyObjBag<'_> { type Constant = Literal; @@ -166,6 +179,12 @@ impl IntoCodeObject for bytecode::CodeObject { } } +impl> IntoCodeObject for bytecode::frozen_lib::FrozenCodeObject { + fn into_code_object(self, ctx: &Context) -> CodeObject { + self.decode(ctx) + } +} + #[pyclass(module = false, name = "code")] pub struct PyCode { pub code: CodeObject, diff --git a/vm/src/frozen.rs b/vm/src/frozen.rs index 8c809aa372..c1d2fa5b05 100644 --- a/vm/src/frozen.rs +++ b/vm/src/frozen.rs @@ -1,13 +1,10 @@ -use crate::bytecode::FrozenModule; +use crate::bytecode::frozen_lib::FrozenModule; -pub fn core_frozen_inits() -> impl Iterator { +pub fn core_frozen_inits() -> impl Iterator { let iter = std::iter::empty(); macro_rules! ext_modules { - ($iter:ident, ($modules:expr)) => { - let $iter = $iter.chain($modules); - }; ($iter:ident, $($t:tt)*) => { - ext_modules!($iter, (py_freeze!($($t)*))) + let $iter = $iter.chain(py_freeze!($($t)*)); }; } @@ -23,10 +20,8 @@ pub fn core_frozen_inits() -> impl Iterator { // Includes _importlib_bootstrap and _importlib_bootstrap_external ext_modules!( iter, - (rustpython_derive::py_freeze!( - dir = "./Lib/python_builtins", - crate_name = "rustpython_compiler_core" - )) + dir = "./Lib/python_builtins", + crate_name = "rustpython_compiler_core" ); // core stdlib Python modules that the vm calls into, but are still used in Python @@ -34,10 +29,8 @@ pub fn core_frozen_inits() -> impl Iterator { #[cfg(not(feature = "freeze-stdlib"))] ext_modules!( iter, - (rustpython_derive::py_freeze!( - dir = "./Lib/core_modules", - crate_name = "rustpython_compiler_core" - )) + dir = "./Lib/core_modules", + crate_name = "rustpython_compiler_core" ); iter diff --git a/vm/src/import.rs b/vm/src/import.rs index 9fc8e56ea4..780feea46e 100644 --- a/vm/src/import.rs +++ b/vm/src/import.rs @@ -77,7 +77,7 @@ pub fn make_frozen(vm: &VirtualMachine, name: &str) -> PyResult> { vm.state.frozen.get(name).ok_or_else(|| { vm.new_import_error(format!("No such frozen object named {name}"), name) })?; - Ok(vm.ctx.new_code(frozen.code.clone())) + Ok(vm.ctx.new_code(frozen.code)) } pub fn import_frozen(vm: &VirtualMachine, module_name: &str) -> PyResult { diff --git a/vm/src/stdlib/imp.rs b/vm/src/stdlib/imp.rs index c3a376a088..0c79213166 100644 --- a/vm/src/stdlib/imp.rs +++ b/vm/src/stdlib/imp.rs @@ -1,4 +1,5 @@ -use crate::{builtins::PyBaseExceptionRef, bytecode::FrozenModule, PyObjectRef, VirtualMachine}; +use crate::bytecode::frozen_lib::FrozenModule; +use crate::{builtins::PyBaseExceptionRef, PyObjectRef, VirtualMachine}; pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { let module = _imp::make_module(vm); @@ -73,8 +74,12 @@ impl FrozenError { } // find_frozen in frozen.c -fn find_frozen<'a>(name: &str, vm: &'a VirtualMachine) -> Result<&'a FrozenModule, FrozenError> { - vm.state.frozen.get(name).ok_or(FrozenError::NotFound) +fn find_frozen(name: &str, vm: &VirtualMachine) -> Result { + vm.state + .frozen + .get(name) + .copied() + .ok_or(FrozenError::NotFound) } #[pymodule] @@ -139,11 +144,9 @@ mod _imp { #[pyfunction] fn is_frozen_package(name: PyStrRef, vm: &VirtualMachine) -> PyResult { - vm.state - .frozen - .get(name.as_str()) + super::find_frozen(name.as_str(), vm) .map(|frozen| frozen.package) - .ok_or_else(|| vm.new_import_error(format!("No such frozen object named {name}"), name)) + .map_err(|e| e.to_pyexception(name.as_str(), vm)) } #[pyfunction] diff --git a/vm/src/vm/mod.rs b/vm/src/vm/mod.rs index 2b32b6db04..9db3c436ea 100644 --- a/vm/src/vm/mod.rs +++ b/vm/src/vm/mod.rs @@ -21,7 +21,7 @@ use crate::{ tuple::{PyTuple, PyTupleTyped}, PyBaseExceptionRef, PyDictRef, PyInt, PyList, PyModule, PyStrInterned, PyStrRef, PyTypeRef, }, - bytecode, + bytecode::frozen_lib::FrozenModule, codecs::CodecsRegistry, common::{hash::HashSecret, lock::PyMutex, rc::PyRc}, convert::ToPyObject, @@ -88,7 +88,7 @@ struct ExceptionStack { pub struct PyGlobalState { pub settings: Settings, pub module_inits: stdlib::StdlibMap, - pub frozen: HashMap, + pub frozen: HashMap<&'static str, FrozenModule, ahash::RandomState>, pub stacksize: AtomicCell, pub thread_count: AtomicCell, pub hash_secret: HashSecret, @@ -330,7 +330,7 @@ impl VirtualMachine { /// Can only be used in the initialization closure passed to [`Interpreter::with_init`] pub fn add_frozen(&mut self, frozen: I) where - I: IntoIterator, + I: IntoIterator, { self.state_mut().frozen.extend(frozen); } diff --git a/wasm/lib/src/vm_class.rs b/wasm/lib/src/vm_class.rs index efc6070aef..229aaae818 100644 --- a/wasm/lib/src/vm_class.rs +++ b/wasm/lib/src/vm_class.rs @@ -46,7 +46,7 @@ impl StoredVirtualMachine { vm.add_native_modules(rustpython_stdlib::get_module_inits()); #[cfg(feature = "freeze-stdlib")] - vm.add_frozen(rustpython_pylib::frozen_stdlib()); + vm.add_frozen(rustpython_pylib::FROZEN_STDLIB); vm.wasm_id = Some(id);