Serialize entire frozen hashmap to bytes

This commit is contained in:
Noah
2021-01-19 18:09:11 -06:00
parent 2ab02e34f1
commit 58503cb0fa
6 changed files with 152 additions and 52 deletions

View File

@@ -1075,7 +1075,7 @@ impl<C: Constant> fmt::Debug for CodeObject<C> {
}
}
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Debug)]
pub struct FrozenModule<C: Constant = ConstantData> {
#[serde(bound(
deserialize = "C: serde::Deserialize<'de>, C::Name: serde::Deserialize<'de>",
@@ -1084,3 +1084,132 @@ pub struct FrozenModule<C: Constant = ConstantData> {
pub code: CodeObject<C>,
pub package: bool,
}
pub mod frozen_lib {
use super::*;
use bincode::{options, Options};
use std::convert::TryInto;
use std::io;
pub fn decode_lib(bytes: &[u8]) -> FrozenModulesIter {
let data = lz4_flex::decompress_size_prepended(bytes).unwrap();
let r = VecReader { data, pos: 0 };
let mut de = bincode::Deserializer::with_bincode_read(r, options());
let len = u64::deserialize(&mut de).unwrap().try_into().unwrap();
FrozenModulesIter { len, de }
}
pub struct FrozenModulesIter {
len: usize,
// ideally this could be a SeqAccess, but I think that would require existential types
de: bincode::Deserializer<VecReader, bincode::DefaultOptions>,
}
impl Iterator for FrozenModulesIter {
type Item = (String, FrozenModule);
fn next(&mut self) -> Option<Self::Item> {
// manually mimic bincode's seq encoding, which is <len:u64> <element*len>
// This probably won't change (bincode doesn't require padding or anything), but
// it's not guaranteed by semver as far as I can tell
if self.len > 0 {
let entry = Deserialize::deserialize(&mut self.de).unwrap();
self.len -= 1;
Some(entry)
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.len, Some(self.len))
}
}
impl ExactSizeIterator for FrozenModulesIter {}
pub fn encode_lib<'a, I>(lib: I) -> Vec<u8>
where
I: IntoIterator<Item = (&'a str, &'a FrozenModule)>,
I::IntoIter: ExactSizeIterator + Clone,
{
let iter = lib.into_iter();
let data = options().serialize(&SerializeLib { iter }).unwrap();
lz4_flex::compress_prepend_size(&data)
}
struct SerializeLib<I> {
iter: I,
}
impl<'a, I> Serialize for SerializeLib<I>
where
I: ExactSizeIterator<Item = (&'a str, &'a FrozenModule)> + Clone,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.collect_seq(self.iter.clone())
}
}
/// Owned version of bincode::de::read::SliceReader<'a>
struct VecReader {
data: Vec<u8>,
pos: usize,
}
impl io::Read for VecReader {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut subslice = &self.data[self.pos..];
let n = io::Read::read(&mut subslice, buf)?;
self.pos += n;
Ok(n)
}
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
self.get_byte_slice(buf.len())
.map(|data| buf.copy_from_slice(data))
}
}
impl VecReader {
#[inline(always)]
fn get_byte_slice(&mut self, length: usize) -> io::Result<&[u8]> {
let subslice = &self.data[self.pos..];
match subslice.get(..length) {
Some(ret) => {
self.pos += length;
Ok(ret)
}
None => Err(io::ErrorKind::UnexpectedEof.into()),
}
}
}
impl<'storage> bincode::BincodeRead<'storage> for VecReader {
fn forward_read_str<V>(&mut self, length: usize, visitor: V) -> bincode::Result<V::Value>
where
V: serde::de::Visitor<'storage>,
{
let bytes = self.get_byte_slice(length)?;
match ::std::str::from_utf8(bytes) {
Ok(s) => visitor.visit_str(s),
Err(e) => Err(bincode::ErrorKind::InvalidUtf8Encoding(e).into()),
}
}
fn get_byte_buffer(&mut self, length: usize) -> bincode::Result<Vec<u8>> {
self.get_byte_slice(length)
.map(|x| x.to_vec())
.map_err(Into::into)
}
fn forward_read_bytes<V>(&mut self, length: usize, visitor: V) -> bincode::Result<V::Value>
where
V: serde::de::Visitor<'storage>,
{
visitor.visit_bytes(self.get_byte_slice(length)?)
}
}
}

View File

@@ -330,30 +330,11 @@ pub fn impl_py_freeze(input: TokenStream2) -> Result<TokenStream2, Diagnostic> {
let crate_name = args.crate_name;
let code_map = args.source.compile(args.mode, args.module_name)?;
let modules_len = code_map.len();
let modules = code_map
.into_iter()
.map(|(module_name, FrozenModule { code, package })| {
let module_name = LitStr::new(&module_name, Span::call_site());
let bytes = code.to_bytes();
let bytes = LitByteStr::new(&bytes, Span::call_site());
quote! {
m.insert(#module_name.into(), #crate_name::FrozenModule {
code: #crate_name::CodeObject::from_bytes(
#bytes
).expect("Deserializing CodeObject failed"),
package: #package,
});
}
});
let data = rustpython_bytecode::frozen_lib::encode_lib(code_map.iter().map(|(k, v)| (&**k, v)));
let bytes = LitByteStr::new(&data, Span::call_site());
let output = quote! {
{
let mut m = ::std::collections::HashMap::with_capacity(#modules_len);
#(#modules)*
m
}
#crate_name::frozen_lib::decode_lib(#bytes)
};
Ok(output)

View File

@@ -1,5 +1,3 @@
use std::collections::HashMap;
use rustpython_vm as vm;
fn main() -> vm::pyobject::PyResult<()> {
@@ -11,13 +9,9 @@ fn run(vm: &vm::VirtualMachine) -> vm::pyobject::PyResult<()> {
// the file parameter is relevant to the directory where the crate's Cargo.toml is located, see $CARGO_MANIFEST_DIR:
// https://doc.rust-lang.org/cargo/reference/environment-variables.html#environment-variables-cargo-sets-for-crates
let modules: HashMap<String, vm::bytecode::FrozenModule> =
vm::py_freeze!(file = "examples/freeze/freeze.py");
let module = vm::py_compile!(file = "examples/freeze/freeze.py");
let res = vm.run_code_obj(
vm.new_code_object(modules.get("frozen").unwrap().code.clone()),
scope,
);
let res = vm.run_code_obj(vm.new_code_object(module), scope);
if let Err(err) = res {
vm::exceptions::print_exception(&vm, err);

View File

@@ -5,8 +5,8 @@
pub const LIB_PATH: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/Lib");
#[cfg(feature = "compiled-bytecode")]
use {rustpython_bytecode::FrozenModule, std::collections::HashMap};
use rustpython_bytecode::FrozenModule;
#[cfg(feature = "compiled-bytecode")]
pub fn frozen_stdlib() -> HashMap<String, FrozenModule> {
pub fn frozen_stdlib() -> impl Iterator<Item = (String, FrozenModule)> {
rustpython_derive::py_freeze!(dir = "Lib", crate_name = "rustpython_bytecode")
}

View File

@@ -1,7 +1,6 @@
use crate::builtins::code;
use crate::bytecode;
use crate::VirtualMachine;
use std::collections::HashMap;
pub fn map_frozen<'a>(
vm: &'a VirtualMachine,
@@ -19,18 +18,19 @@ pub fn map_frozen<'a>(
})
}
pub fn get_module_inits(
vm: &VirtualMachine,
) -> HashMap<String, code::FrozenModule, ahash::RandomState> {
let mut modules = HashMap::default();
pub fn get_module_inits() -> impl Iterator<Item = (String, bytecode::FrozenModule)> {
let iter = std::iter::empty();
macro_rules! ext_modules {
($($t:tt)*) => {
modules.extend(map_frozen(vm, py_freeze!($($t)*)));
($iter:ident, ($modules:expr)) => {
let $iter = $iter.chain($modules);
};
($iter:ident, $($t:tt)*) => {
ext_modules!($iter, (py_freeze!($($t)*)))
};
}
ext_modules!(
iter,
source = "initialized = True; print(\"Hello world!\")\n",
module_name = "__hello__",
);
@@ -39,19 +39,15 @@ pub fn get_module_inits(
// in theory be implemented in Rust, but are easiest to do in Python for one reason or another.
// Includes _importlib_bootstrap and _importlib_bootstrap_external
// For Windows: did you forget to run `powershell scripts\symlinks-to-hardlinks.ps1`?
ext_modules!(dir = "Lib/python_builtins/");
ext_modules!(iter, dir = "Lib/python_builtins/");
#[cfg(not(feature = "freeze-stdlib"))]
{
// core stdlib Python modules that the vm calls into, but are still used in Python
// application code, e.g. copyreg
ext_modules!(dir = "Lib/core_modules/");
}
// core stdlib Python modules that the vm calls into, but are still used in Python
// application code, e.g. copyreg
ext_modules!(iter, dir = "Lib/core_modules/");
// if we're on freeze-stdlib, the core stdlib modules will be included anyway
#[cfg(feature = "freeze-stdlib")]
{
modules.extend(map_frozen(vm, rustpython_pylib::frozen_stdlib()));
}
ext_modules!(iter, (rustpython_pylib::frozen_stdlib()));
modules
iter
}

View File

@@ -282,7 +282,7 @@ impl VirtualMachine {
initialized: false,
};
let frozen = frozen::get_module_inits(&vm);
let frozen = frozen::map_frozen(&vm, frozen::get_module_inits()).collect();
PyRc::get_mut(&mut vm.state).unwrap().frozen = frozen;
module::init_module_dict(