diff --git a/bytecode/src/lib.rs b/bytecode/src/lib.rs index 691bd507a8..b4cfa42922 100644 --- a/bytecode/src/lib.rs +++ b/bytecode/src/lib.rs @@ -1075,7 +1075,7 @@ impl fmt::Debug for CodeObject { } } -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Debug)] pub struct FrozenModule { #[serde(bound( deserialize = "C: serde::Deserialize<'de>, C::Name: serde::Deserialize<'de>", @@ -1084,3 +1084,132 @@ pub struct FrozenModule { pub code: CodeObject, pub package: bool, } + +pub mod frozen_lib { + use super::*; + use bincode::{options, Options}; + use std::convert::TryInto; + use std::io; + + pub fn decode_lib(bytes: &[u8]) -> FrozenModulesIter { + let data = lz4_flex::decompress_size_prepended(bytes).unwrap(); + let r = VecReader { data, pos: 0 }; + let mut de = bincode::Deserializer::with_bincode_read(r, options()); + let len = u64::deserialize(&mut de).unwrap().try_into().unwrap(); + FrozenModulesIter { len, de } + } + + pub struct FrozenModulesIter { + len: usize, + // ideally this could be a SeqAccess, but I think that would require existential types + de: bincode::Deserializer, + } + + impl Iterator for FrozenModulesIter { + type Item = (String, FrozenModule); + + fn next(&mut self) -> Option { + // manually mimic bincode's seq encoding, which is + // This probably won't change (bincode doesn't require padding or anything), but + // it's not guaranteed by semver as far as I can tell + if self.len > 0 { + let entry = Deserialize::deserialize(&mut self.de).unwrap(); + self.len -= 1; + Some(entry) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + (self.len, Some(self.len)) + } + } + + impl ExactSizeIterator for FrozenModulesIter {} + + pub fn encode_lib<'a, I>(lib: I) -> Vec + where + I: IntoIterator, + I::IntoIter: ExactSizeIterator + Clone, + { + let iter = lib.into_iter(); + let data = options().serialize(&SerializeLib { iter }).unwrap(); + lz4_flex::compress_prepend_size(&data) + } + + struct SerializeLib { + iter: I, + } + + impl<'a, I> Serialize for SerializeLib + where + I: ExactSizeIterator + Clone, + { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.collect_seq(self.iter.clone()) + } + } + + /// Owned version of bincode::de::read::SliceReader<'a> + struct VecReader { + data: Vec, + pos: usize, + } + + impl io::Read for VecReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let mut subslice = &self.data[self.pos..]; + let n = io::Read::read(&mut subslice, buf)?; + self.pos += n; + Ok(n) + } + fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { + self.get_byte_slice(buf.len()) + .map(|data| buf.copy_from_slice(data)) + } + } + + impl VecReader { + #[inline(always)] + fn get_byte_slice(&mut self, length: usize) -> io::Result<&[u8]> { + let subslice = &self.data[self.pos..]; + match subslice.get(..length) { + Some(ret) => { + self.pos += length; + Ok(ret) + } + None => Err(io::ErrorKind::UnexpectedEof.into()), + } + } + } + + impl<'storage> bincode::BincodeRead<'storage> for VecReader { + fn forward_read_str(&mut self, length: usize, visitor: V) -> bincode::Result + where + V: serde::de::Visitor<'storage>, + { + let bytes = self.get_byte_slice(length)?; + match ::std::str::from_utf8(bytes) { + Ok(s) => visitor.visit_str(s), + Err(e) => Err(bincode::ErrorKind::InvalidUtf8Encoding(e).into()), + } + } + + fn get_byte_buffer(&mut self, length: usize) -> bincode::Result> { + self.get_byte_slice(length) + .map(|x| x.to_vec()) + .map_err(Into::into) + } + + fn forward_read_bytes(&mut self, length: usize, visitor: V) -> bincode::Result + where + V: serde::de::Visitor<'storage>, + { + visitor.visit_bytes(self.get_byte_slice(length)?) + } + } +} diff --git a/derive/src/compile_bytecode.rs b/derive/src/compile_bytecode.rs index 852df198a3..bca2e76213 100644 --- a/derive/src/compile_bytecode.rs +++ b/derive/src/compile_bytecode.rs @@ -330,30 +330,11 @@ pub fn impl_py_freeze(input: TokenStream2) -> Result { let crate_name = args.crate_name; let code_map = args.source.compile(args.mode, args.module_name)?; - let modules_len = code_map.len(); - - let modules = code_map - .into_iter() - .map(|(module_name, FrozenModule { code, package })| { - let module_name = LitStr::new(&module_name, Span::call_site()); - let bytes = code.to_bytes(); - let bytes = LitByteStr::new(&bytes, Span::call_site()); - quote! { - m.insert(#module_name.into(), #crate_name::FrozenModule { - code: #crate_name::CodeObject::from_bytes( - #bytes - ).expect("Deserializing CodeObject failed"), - package: #package, - }); - } - }); + let data = rustpython_bytecode::frozen_lib::encode_lib(code_map.iter().map(|(k, v)| (&**k, v))); + let bytes = LitByteStr::new(&data, Span::call_site()); let output = quote! { - { - let mut m = ::std::collections::HashMap::with_capacity(#modules_len); - #(#modules)* - m - } + #crate_name::frozen_lib::decode_lib(#bytes) }; Ok(output) diff --git a/examples/freeze/main.rs b/examples/freeze/main.rs index d9cc11778c..b406296bac 100644 --- a/examples/freeze/main.rs +++ b/examples/freeze/main.rs @@ -1,5 +1,3 @@ -use std::collections::HashMap; - use rustpython_vm as vm; fn main() -> vm::pyobject::PyResult<()> { @@ -11,13 +9,9 @@ fn run(vm: &vm::VirtualMachine) -> vm::pyobject::PyResult<()> { // the file parameter is relevant to the directory where the crate's Cargo.toml is located, see $CARGO_MANIFEST_DIR: // https://doc.rust-lang.org/cargo/reference/environment-variables.html#environment-variables-cargo-sets-for-crates - let modules: HashMap = - vm::py_freeze!(file = "examples/freeze/freeze.py"); + let module = vm::py_compile!(file = "examples/freeze/freeze.py"); - let res = vm.run_code_obj( - vm.new_code_object(modules.get("frozen").unwrap().code.clone()), - scope, - ); + let res = vm.run_code_obj(vm.new_code_object(module), scope); if let Err(err) = res { vm::exceptions::print_exception(&vm, err); diff --git a/vm/pylib-crate/src/lib.rs b/vm/pylib-crate/src/lib.rs index 895654c57b..d53aabe2f1 100644 --- a/vm/pylib-crate/src/lib.rs +++ b/vm/pylib-crate/src/lib.rs @@ -5,8 +5,8 @@ pub const LIB_PATH: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/Lib"); #[cfg(feature = "compiled-bytecode")] -use {rustpython_bytecode::FrozenModule, std::collections::HashMap}; +use rustpython_bytecode::FrozenModule; #[cfg(feature = "compiled-bytecode")] -pub fn frozen_stdlib() -> HashMap { +pub fn frozen_stdlib() -> impl Iterator { rustpython_derive::py_freeze!(dir = "Lib", crate_name = "rustpython_bytecode") } diff --git a/vm/src/frozen.rs b/vm/src/frozen.rs index d98d9c9187..ec94b7ecdb 100644 --- a/vm/src/frozen.rs +++ b/vm/src/frozen.rs @@ -1,7 +1,6 @@ use crate::builtins::code; use crate::bytecode; use crate::VirtualMachine; -use std::collections::HashMap; pub fn map_frozen<'a>( vm: &'a VirtualMachine, @@ -19,18 +18,19 @@ pub fn map_frozen<'a>( }) } -pub fn get_module_inits( - vm: &VirtualMachine, -) -> HashMap { - let mut modules = HashMap::default(); - +pub fn get_module_inits() -> impl Iterator { + let iter = std::iter::empty(); macro_rules! ext_modules { - ($($t:tt)*) => { - modules.extend(map_frozen(vm, py_freeze!($($t)*))); + ($iter:ident, ($modules:expr)) => { + let $iter = $iter.chain($modules); + }; + ($iter:ident, $($t:tt)*) => { + ext_modules!($iter, (py_freeze!($($t)*))) }; } ext_modules!( + iter, source = "initialized = True; print(\"Hello world!\")\n", module_name = "__hello__", ); @@ -39,19 +39,15 @@ pub fn get_module_inits( // in theory be implemented in Rust, but are easiest to do in Python for one reason or another. // Includes _importlib_bootstrap and _importlib_bootstrap_external // For Windows: did you forget to run `powershell scripts\symlinks-to-hardlinks.ps1`? - ext_modules!(dir = "Lib/python_builtins/"); + ext_modules!(iter, dir = "Lib/python_builtins/"); #[cfg(not(feature = "freeze-stdlib"))] - { - // core stdlib Python modules that the vm calls into, but are still used in Python - // application code, e.g. copyreg - ext_modules!(dir = "Lib/core_modules/"); - } + // core stdlib Python modules that the vm calls into, but are still used in Python + // application code, e.g. copyreg + ext_modules!(iter, dir = "Lib/core_modules/"); // if we're on freeze-stdlib, the core stdlib modules will be included anyway #[cfg(feature = "freeze-stdlib")] - { - modules.extend(map_frozen(vm, rustpython_pylib::frozen_stdlib())); - } + ext_modules!(iter, (rustpython_pylib::frozen_stdlib())); - modules + iter } diff --git a/vm/src/vm.rs b/vm/src/vm.rs index 77e51b3cf3..8980b80c7c 100644 --- a/vm/src/vm.rs +++ b/vm/src/vm.rs @@ -282,7 +282,7 @@ impl VirtualMachine { initialized: false, }; - let frozen = frozen::get_module_inits(&vm); + let frozen = frozen::map_frozen(&vm, frozen::get_module_inits()).collect(); PyRc::get_mut(&mut vm.state).unwrap().frozen = frozen; module::init_module_dict(