Rework frozen modules and directly deserialize to CodeObject<Literal>

This commit is contained in:
Noa
2023-01-13 22:25:43 -06:00
parent 22bc2d24ab
commit 456bc80697
12 changed files with 198 additions and 116 deletions

View File

@@ -1,7 +1,6 @@
//! Implement python as a virtual machine with bytecodes. This module
//! implements bytecode structure.
use crate::marshal::MarshalError;
use crate::{marshal, Location};
use bitflags::bitflags;
use itertools::Itertools;
@@ -46,6 +45,19 @@ pub trait ConstantBag: Sized + Copy {
fn make_name(&self, name: &str) -> <Self::Constant as Constant>::Name;
}
pub trait AsBag {
type Bag: ConstantBag;
#[allow(clippy::wrong_self_convention)]
fn as_bag(self) -> Self::Bag;
}
impl<Bag: ConstantBag> AsBag for Bag {
type Bag = Self;
fn as_bag(self) -> Self {
self
}
}
#[derive(Clone, Copy)]
pub struct BasicBag;
@@ -1077,27 +1089,6 @@ impl<C: Constant> CodeObject<C> {
}
}
impl CodeObject<ConstantData> {
/// Load a code object from bytes
pub fn from_bytes(data: &[u8]) -> Result<Self, MarshalError> {
use lz4_flex::block::DecompressError;
let raw_bincode = lz4_flex::decompress_size_prepended(data).map_err(|e| match e {
DecompressError::OutputTooSmall { .. } | DecompressError::ExpectedAnotherByte => {
MarshalError::Eof
}
_ => MarshalError::InvalidBytecode,
})?;
marshal::deserialize_code(&mut &raw_bincode[..], BasicBag)
}
/// Serialize this bytecode to bytes.
pub fn to_bytes(&self) -> Vec<u8> {
let mut data = Vec::new();
marshal::serialize_code(&mut data, self);
lz4_flex::compress_prepend_size(&data)
}
}
impl<C: Constant> fmt::Display for CodeObject<C> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.display_inner(f, false, 1)?;
@@ -1483,32 +1474,81 @@ impl<C: Constant> fmt::Debug for CodeObject<C> {
}
}
/// A frozen module. Holds a code object and whether it is part of a package
#[derive(Debug)]
pub struct FrozenModule {
pub code: CodeObject<ConstantData>,
pub package: bool,
}
pub mod frozen_lib {
use super::*;
use marshal::{Read, Write};
use marshal::{Read, ReadBorrowed, Write};
/// Decode a library to a iterable of frozen modules
pub fn decode_lib(bytes: &[u8]) -> FrozenModulesIter {
let data = lz4_flex::decompress_size_prepended(bytes).unwrap();
let mut data = marshal::Cursor { data, position: 0 };
let remaining = data.read_u32().unwrap();
FrozenModulesIter { remaining, data }
/// A frozen module. Holds a frozen code object and whether it is part of a package
#[derive(Copy, Clone)]
pub struct FrozenModule<B = &'static [u8]> {
pub code: FrozenCodeObject<B>,
pub package: bool,
}
pub struct FrozenModulesIter {
#[derive(Copy, Clone)]
pub struct FrozenCodeObject<B> {
pub bytes: B,
}
impl<B: AsRef<[u8]>> FrozenCodeObject<B> {
/// Decode a frozen codeobject
#[inline]
pub fn decode<Bag: AsBag>(
&self,
bag: Bag,
) -> CodeObject<<Bag::Bag as ConstantBag>::Constant> {
Self::_decode(self.bytes.as_ref(), bag.as_bag())
}
fn _decode<Bag: ConstantBag>(data: &[u8], bag: Bag) -> CodeObject<Bag::Constant> {
let decompressed = lz4_flex::decompress_size_prepended(data)
.expect("deserialize frozen CodeObject failed");
marshal::deserialize_code(&mut &decompressed[..], bag)
.expect("deserializing frozen CodeObject failed")
}
}
impl FrozenCodeObject<Vec<u8>> {
pub fn encode<C: Constant>(code: &CodeObject<C>) -> Self {
let mut data = Vec::new();
marshal::serialize_code(&mut data, code);
let bytes = lz4_flex::compress_prepend_size(&data);
FrozenCodeObject { bytes }
}
}
#[repr(transparent)]
pub struct FrozenLib<B: ?Sized = [u8]> {
pub bytes: B,
}
impl<B: AsRef<[u8]> + ?Sized> FrozenLib<B> {
pub const fn from_ref(b: &B) -> &FrozenLib<B> {
unsafe { &*(b as *const B as *const FrozenLib<B>) }
}
/// Decode a library to a iterable of frozen modules
pub fn decode(&self) -> FrozenModulesIter<'_> {
let mut data = self.bytes.as_ref();
let remaining = data.read_u32().unwrap();
FrozenModulesIter { remaining, data }
}
}
impl<'a, B: AsRef<[u8]> + ?Sized> IntoIterator for &'a FrozenLib<B> {
type Item = (&'a str, FrozenModule<&'a [u8]>);
type IntoIter = FrozenModulesIter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.decode()
}
}
pub struct FrozenModulesIter<'a> {
remaining: u32,
data: marshal::Cursor<Vec<u8>>,
data: &'a [u8],
}
impl Iterator for FrozenModulesIter {
type Item = (String, FrozenModule);
impl<'a> Iterator for FrozenModulesIter<'a> {
type Item = (&'a str, FrozenModule<&'a [u8]>);
fn next(&mut self) -> Option<Self::Item> {
if self.remaining > 0 {
@@ -1524,31 +1564,37 @@ pub mod frozen_lib {
(self.remaining as usize, Some(self.remaining as usize))
}
}
impl ExactSizeIterator for FrozenModulesIter {}
impl ExactSizeIterator for FrozenModulesIter<'_> {}
fn read_entry(rdr: &mut impl Read) -> Result<(String, FrozenModule), marshal::MarshalError> {
fn read_entry<'a>(
rdr: &mut &'a [u8],
) -> Result<(&'a str, FrozenModule<&'a [u8]>), marshal::MarshalError> {
let len = rdr.read_u32()?;
let name = rdr.read_str(len)?.to_owned();
let code = marshal::deserialize_code(rdr, BasicBag)?;
let name = rdr.read_str_borrow(len)?;
let len = rdr.read_u32()?;
let code_slice = rdr.read_slice_borrow(len)?;
let code = FrozenCodeObject { bytes: code_slice };
let package = rdr.read_u8()? != 0;
Ok((name, FrozenModule { code, package }))
}
/// Encode the given iterator of frozen modules into a compressed vector of bytes
pub fn encode_lib<'a, I>(lib: I) -> Vec<u8>
where
I: IntoIterator<Item = (&'a str, &'a FrozenModule)>,
I::IntoIter: ExactSizeIterator + Clone,
{
let iter = lib.into_iter();
let mut data = Vec::new();
write_lib(&mut data, iter);
lz4_flex::compress_prepend_size(&data)
impl FrozenLib<Vec<u8>> {
/// Encode the given iterator of frozen modules into a compressed vector of bytes
pub fn encode<'a, I, B: AsRef<[u8]>>(lib: I) -> FrozenLib<Vec<u8>>
where
I: IntoIterator<Item = (&'a str, FrozenModule<B>)>,
I::IntoIter: ExactSizeIterator + Clone,
{
let iter = lib.into_iter();
let mut bytes = Vec::new();
write_lib(&mut bytes, iter);
Self { bytes }
}
}
fn write_lib<'a>(
buf: &mut impl Write,
lib: impl ExactSizeIterator<Item = (&'a str, &'a FrozenModule)>,
fn write_lib<'a, B: AsRef<[u8]>>(
buf: &mut Vec<u8>,
lib: impl ExactSizeIterator<Item = (&'a str, FrozenModule<B>)>,
) {
marshal::write_len(buf, lib.len());
for (name, module) in lib {
@@ -1556,10 +1602,9 @@ pub mod frozen_lib {
}
}
fn write_entry(buf: &mut impl Write, name: &str, module: &FrozenModule) {
marshal::write_len(buf, name.len());
buf.write_slice(name.as_bytes());
marshal::serialize_code(buf, &module.code);
fn write_entry(buf: &mut Vec<u8>, name: &str, module: FrozenModule<impl AsRef<[u8]>>) {
marshal::write_vec(buf, name.as_bytes());
marshal::write_vec(buf, module.code.bytes.as_ref());
buf.write_u8(module.package as u8);
}
}

View File

@@ -130,8 +130,21 @@ pub trait Read {
}
}
pub(crate) trait ReadBorrowed<'a>: Read {
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]>;
fn read_str_borrow(&mut self, len: u32) -> Result<&'a str> {
Ok(std::str::from_utf8(self.read_slice_borrow(len)?)?)
}
}
impl Read for &[u8] {
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
self.read_slice_borrow(n)
}
}
impl<'a> ReadBorrowed<'a> for &'a [u8] {
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]> {
let data = self.get(..n as usize).ok_or(MarshalError::Eof)?;
*self = &self[n as usize..];
Ok(data)
@@ -474,6 +487,11 @@ pub(crate) fn write_len<W: Write>(buf: &mut W, len: usize) {
buf.write_u32(len);
}
pub(crate) fn write_vec<W: Write>(buf: &mut W, slice: &[u8]) {
write_len(buf, slice.len());
buf.write_slice(slice);
}
pub fn serialize_value<W: Write, D: Dumpable>(
buf: &mut W,
constant: DumpableValue<'_, D>,
@@ -501,13 +519,11 @@ pub fn serialize_value<W: Write, D: Dumpable>(
}
DumpableValue::Str(s) => {
buf.write_u8(Type::Unicode as u8);
write_len(buf, s.len());
buf.write_slice(s.as_bytes());
write_vec(buf, s.as_bytes());
}
DumpableValue::Bytes(b) => {
buf.write_u8(Type::Bytes as u8);
write_len(buf, b.len());
buf.write_slice(b);
write_vec(buf, b);
}
DumpableValue::Code(c) => {
buf.write_u8(Type::Code as u8);
@@ -580,14 +596,12 @@ pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>)
buf.write_u32(code.arg_count);
buf.write_u32(code.kwonlyarg_count);
write_len(buf, code.source_path.as_ref().len());
buf.write_slice(code.source_path.as_ref().as_bytes());
write_vec(buf, code.source_path.as_ref().as_bytes());
buf.write_u32(code.first_line_number);
buf.write_u32(code.max_stackdepth);
write_len(buf, code.obj_name.as_ref().len());
buf.write_slice(code.obj_name.as_ref().as_bytes());
write_vec(buf, code.obj_name.as_ref().as_bytes());
let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]);
write_len(buf, cell2arg.len());
@@ -603,8 +617,7 @@ pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>)
let mut write_names = |names: &[C::Name]| {
write_len(buf, names.len());
for name in names {
write_len(buf, name.as_ref().len());
buf.write_slice(name.as_ref().as_bytes());
write_vec(buf, name.as_ref().as_bytes());
}
};

View File

@@ -17,7 +17,7 @@ use crate::{extract_spans, Diagnostic};
use once_cell::sync::Lazy;
use proc_macro2::{Span, TokenStream};
use quote::quote;
use rustpython_compiler_core::{CodeObject, FrozenModule, Mode};
use rustpython_compiler_core::{frozen_lib, CodeObject, Mode};
use std::{
collections::HashMap,
env, fs,
@@ -44,6 +44,11 @@ enum CompilationSourceKind {
Dir(PathBuf),
}
struct CompiledModule {
code: CodeObject,
package: bool,
}
struct CompilationSource {
kind: CompilationSourceKind,
span: (Span, Span),
@@ -80,7 +85,7 @@ impl CompilationSource {
mode: Mode,
module_name: String,
compiler: &dyn Compiler,
) -> Result<HashMap<String, FrozenModule>, Diagnostic> {
) -> Result<HashMap<String, CompiledModule>, Diagnostic> {
match &self.kind {
CompilationSourceKind::Dir(rel_path) => self.compile_dir(
&CARGO_MANIFEST_DIR.join(rel_path),
@@ -89,7 +94,7 @@ impl CompilationSource {
compiler,
),
_ => Ok(hashmap! {
module_name.clone() => FrozenModule {
module_name.clone() => CompiledModule {
code: self.compile_single(mode, module_name, compiler)?,
package: false,
},
@@ -131,7 +136,7 @@ impl CompilationSource {
parent: String,
mode: Mode,
compiler: &dyn Compiler,
) -> Result<HashMap<String, FrozenModule>, Diagnostic> {
) -> Result<HashMap<String, CompiledModule>, Diagnostic> {
let mut code_map = HashMap::new();
let paths = fs::read_dir(path)
.or_else(|e| {
@@ -217,7 +222,7 @@ impl CompilationSource {
code_map.insert(
module_name,
FrozenModule {
CompiledModule {
code,
package: is_init,
},
@@ -369,12 +374,11 @@ pub fn impl_py_compile(
.source
.compile_single(args.mode, args.module_name, compiler)?;
let bytes = code.to_bytes();
let bytes = LitByteStr::new(&bytes, Span::call_site());
let frozen = frozen_lib::FrozenCodeObject::encode(&code);
let bytes = LitByteStr::new(&frozen.bytes, Span::call_site());
let output = quote! {
#crate_name::CodeObject::from_bytes(#bytes)
.expect("Deserializing CodeObject failed")
#crate_name::frozen_lib::FrozenCodeObject { bytes: &#bytes[..] }
};
Ok(output)
@@ -390,12 +394,17 @@ pub fn impl_py_freeze(
let crate_name = args.crate_name;
let code_map = args.source.compile(args.mode, args.module_name, compiler)?;
let data =
rustpython_compiler_core::frozen_lib::encode_lib(code_map.iter().map(|(k, v)| (&**k, v)));
let bytes = LitByteStr::new(&data, Span::call_site());
let data = frozen_lib::FrozenLib::encode(code_map.iter().map(|(k, v)| {
let v = frozen_lib::FrozenModule {
code: frozen_lib::FrozenCodeObject::encode(&v.code),
package: v.package,
};
(&**k, v)
}));
let bytes = LitByteStr::new(&data.bytes, Span::call_site());
let output = quote! {
#crate_name::frozen_lib::decode_lib(#bytes)
#crate_name::frozen_lib::FrozenLib::from_ref(#bytes)
};
Ok(output)

View File

@@ -165,6 +165,7 @@ macro_rules! jit_function {
crate_name = "rustpython_compiler_core",
source = $($t)*
);
let code = code.decode(rustpython_compiler_core::BasicBag);
let mut machine = $crate::common::StackMachine::new();
machine.run(code);
machine.get_function(stringify!($func_name)).compile()

View File

@@ -10,6 +10,5 @@ pub const LIB_PATH: &str = match option_env!("win_lib_path") {
};
#[cfg(feature = "freeze-stdlib")]
pub fn frozen_stdlib() -> impl Iterator<Item = (String, rustpython_compiler_core::FrozenModule)> {
rustpython_derive::py_freeze!(dir = "./Lib", crate_name = "rustpython_compiler_core")
}
pub const FROZEN_STDLIB: &rustpython_compiler_core::frozen_lib::FrozenLib =
rustpython_derive::py_freeze!(dir = "./Lib", crate_name = "rustpython_compiler_core");

View File

@@ -41,7 +41,7 @@ pub fn init_stdlib(vm: &mut VirtualMachine) {
// if we're on freeze-stdlib, the core stdlib modules will be included anyway
#[cfg(feature = "freeze-stdlib")]
vm.add_frozen(rustpython_pylib::frozen_stdlib());
vm.add_frozen(rustpython_pylib::FROZEN_STDLIB);
#[cfg(not(feature = "freeze-stdlib"))]
{

View File

@@ -5,7 +5,7 @@
use super::{PyStrRef, PyTupleRef, PyType, PyTypeRef};
use crate::{
builtins::PyStrInterned,
bytecode::{self, BorrowedConstant, CodeFlags, Constant, ConstantBag},
bytecode::{self, AsBag, BorrowedConstant, CodeFlags, Constant, ConstantBag},
class::{PyClassImpl, StaticType},
convert::ToPyObject,
function::{FuncArgs, OptionalArg},
@@ -97,8 +97,21 @@ impl Constant for Literal {
}
}
impl<'a> AsBag for &'a Context {
type Bag = PyObjBag<'a>;
fn as_bag(self) -> PyObjBag<'a> {
PyObjBag(self)
}
}
impl<'a> AsBag for &'a VirtualMachine {
type Bag = PyObjBag<'a>;
fn as_bag(self) -> PyObjBag<'a> {
PyObjBag(&self.ctx)
}
}
#[derive(Clone, Copy)]
pub(crate) struct PyObjBag<'a>(pub &'a Context);
pub struct PyObjBag<'a>(pub &'a Context);
impl ConstantBag for PyObjBag<'_> {
type Constant = Literal;
@@ -166,6 +179,12 @@ impl IntoCodeObject for bytecode::CodeObject {
}
}
impl<B: AsRef<[u8]>> IntoCodeObject for bytecode::frozen_lib::FrozenCodeObject<B> {
fn into_code_object(self, ctx: &Context) -> CodeObject {
self.decode(ctx)
}
}
#[pyclass(module = false, name = "code")]
pub struct PyCode {
pub code: CodeObject,

View File

@@ -1,13 +1,10 @@
use crate::bytecode::FrozenModule;
use crate::bytecode::frozen_lib::FrozenModule;
pub fn core_frozen_inits() -> impl Iterator<Item = (String, FrozenModule)> {
pub fn core_frozen_inits() -> impl Iterator<Item = (&'static str, FrozenModule)> {
let iter = std::iter::empty();
macro_rules! ext_modules {
($iter:ident, ($modules:expr)) => {
let $iter = $iter.chain($modules);
};
($iter:ident, $($t:tt)*) => {
ext_modules!($iter, (py_freeze!($($t)*)))
let $iter = $iter.chain(py_freeze!($($t)*));
};
}
@@ -23,10 +20,8 @@ pub fn core_frozen_inits() -> impl Iterator<Item = (String, FrozenModule)> {
// Includes _importlib_bootstrap and _importlib_bootstrap_external
ext_modules!(
iter,
(rustpython_derive::py_freeze!(
dir = "./Lib/python_builtins",
crate_name = "rustpython_compiler_core"
))
dir = "./Lib/python_builtins",
crate_name = "rustpython_compiler_core"
);
// core stdlib Python modules that the vm calls into, but are still used in Python
@@ -34,10 +29,8 @@ pub fn core_frozen_inits() -> impl Iterator<Item = (String, FrozenModule)> {
#[cfg(not(feature = "freeze-stdlib"))]
ext_modules!(
iter,
(rustpython_derive::py_freeze!(
dir = "./Lib/core_modules",
crate_name = "rustpython_compiler_core"
))
dir = "./Lib/core_modules",
crate_name = "rustpython_compiler_core"
);
iter

View File

@@ -77,7 +77,7 @@ pub fn make_frozen(vm: &VirtualMachine, name: &str) -> PyResult<PyRef<PyCode>> {
vm.state.frozen.get(name).ok_or_else(|| {
vm.new_import_error(format!("No such frozen object named {name}"), name)
})?;
Ok(vm.ctx.new_code(frozen.code.clone()))
Ok(vm.ctx.new_code(frozen.code))
}
pub fn import_frozen(vm: &VirtualMachine, module_name: &str) -> PyResult {

View File

@@ -1,4 +1,5 @@
use crate::{builtins::PyBaseExceptionRef, bytecode::FrozenModule, PyObjectRef, VirtualMachine};
use crate::bytecode::frozen_lib::FrozenModule;
use crate::{builtins::PyBaseExceptionRef, PyObjectRef, VirtualMachine};
pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
let module = _imp::make_module(vm);
@@ -73,8 +74,12 @@ impl FrozenError {
}
// find_frozen in frozen.c
fn find_frozen<'a>(name: &str, vm: &'a VirtualMachine) -> Result<&'a FrozenModule, FrozenError> {
vm.state.frozen.get(name).ok_or(FrozenError::NotFound)
fn find_frozen(name: &str, vm: &VirtualMachine) -> Result<FrozenModule, FrozenError> {
vm.state
.frozen
.get(name)
.copied()
.ok_or(FrozenError::NotFound)
}
#[pymodule]
@@ -139,11 +144,9 @@ mod _imp {
#[pyfunction]
fn is_frozen_package(name: PyStrRef, vm: &VirtualMachine) -> PyResult<bool> {
vm.state
.frozen
.get(name.as_str())
super::find_frozen(name.as_str(), vm)
.map(|frozen| frozen.package)
.ok_or_else(|| vm.new_import_error(format!("No such frozen object named {name}"), name))
.map_err(|e| e.to_pyexception(name.as_str(), vm))
}
#[pyfunction]

View File

@@ -21,7 +21,7 @@ use crate::{
tuple::{PyTuple, PyTupleTyped},
PyBaseExceptionRef, PyDictRef, PyInt, PyList, PyModule, PyStrInterned, PyStrRef, PyTypeRef,
},
bytecode,
bytecode::frozen_lib::FrozenModule,
codecs::CodecsRegistry,
common::{hash::HashSecret, lock::PyMutex, rc::PyRc},
convert::ToPyObject,
@@ -88,7 +88,7 @@ struct ExceptionStack {
pub struct PyGlobalState {
pub settings: Settings,
pub module_inits: stdlib::StdlibMap,
pub frozen: HashMap<String, bytecode::FrozenModule, ahash::RandomState>,
pub frozen: HashMap<&'static str, FrozenModule, ahash::RandomState>,
pub stacksize: AtomicCell<usize>,
pub thread_count: AtomicCell<usize>,
pub hash_secret: HashSecret,
@@ -330,7 +330,7 @@ impl VirtualMachine {
/// Can only be used in the initialization closure passed to [`Interpreter::with_init`]
pub fn add_frozen<I>(&mut self, frozen: I)
where
I: IntoIterator<Item = (String, bytecode::FrozenModule)>,
I: IntoIterator<Item = (&'static str, FrozenModule)>,
{
self.state_mut().frozen.extend(frozen);
}

View File

@@ -46,7 +46,7 @@ impl StoredVirtualMachine {
vm.add_native_modules(rustpython_stdlib::get_module_inits());
#[cfg(feature = "freeze-stdlib")]
vm.add_frozen(rustpython_pylib::frozen_stdlib());
vm.add_frozen(rustpython_pylib::FROZEN_STDLIB);
vm.wasm_id = Some(id);