From 43860902e685288e8fade14c460a5b2671551456 Mon Sep 17 00:00:00 2001 From: Noah <33094578+coolreader18@users.noreply.github.com> Date: Thu, 10 Dec 2020 12:36:29 -0600 Subject: [PATCH 01/10] Import ConstantData in compile.rs --- compiler/src/compile.rs | 94 ++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/compiler/src/compile.rs b/compiler/src/compile.rs index da500a604..7204ff3fc 100644 --- a/compiler/src/compile.rs +++ b/compiler/src/compile.rs @@ -12,7 +12,7 @@ use indexmap::IndexSet; use itertools::Itertools; use num_complex::Complex64; use rustpython_ast as ast; -use rustpython_bytecode::bytecode::{self, CallType, CodeObject, Instruction, Label}; +use rustpython_bytecode::bytecode::{self, CallType, CodeObject, ConstantData, Instruction, Label}; type CompileResult = Result; @@ -20,7 +20,7 @@ struct CodeInfo { code: CodeObject, instructions: Vec, locations: Vec, - constants: Vec, + constants: Vec, name_cache: IndexSet, varname_cache: IndexSet, cellvar_cache: IndexSet, @@ -343,7 +343,7 @@ impl Compiler { let (statements, doc) = get_doc(&program.statements); if let Some(value) = doc { - self.emit_constant(bytecode::ConstantData::Str { value }); + self.emit_constant(ConstantData::Str { value }); let doc = self.name("__doc__"); self.emit(Instruction::StoreGlobal(doc)) } @@ -357,7 +357,7 @@ impl Compiler { assert_eq!(self.code_stack.len(), size_before); // Emit None at end: - self.emit_constant(bytecode::ConstantData::None); + self.emit_constant(ConstantData::None); self.emit(Instruction::ReturnValue); Ok(()) } @@ -391,7 +391,7 @@ impl Compiler { } if !emitted_return { - self.emit_constant(bytecode::ConstantData::None); + self.emit_constant(ConstantData::None); self.emit(Instruction::ReturnValue); } @@ -636,7 +636,7 @@ impl Compiler { if is_async { self.emit(Instruction::BeforeAsyncWith); self.emit(Instruction::GetAwaitable); - self.emit_constant(bytecode::ConstantData::None); + self.emit_constant(ConstantData::None); self.emit(Instruction::YieldFrom); self.emit(Instruction::SetupAsyncWith { end: end_label }); } else { @@ -667,7 +667,7 @@ impl Compiler { if is_async { self.emit(Instruction::GetAwaitable); - self.emit_constant(bytecode::ConstantData::None); + self.emit_constant(ConstantData::None); self.emit(Instruction::YieldFrom); } @@ -779,7 +779,7 @@ impl Compiler { self.compile_expression(v)?; } None => { - self.emit_constant(bytecode::ConstantData::None); + self.emit_constant(ConstantData::None); } } @@ -862,7 +862,7 @@ impl Compiler { let mut num_kw_only_defaults = 0; for (kw, default) in args.kwonlyargs.iter().zip(&args.kw_defaults) { if let Some(default) = default { - self.emit_constant(bytecode::ConstantData::Str { + self.emit_constant(ConstantData::Str { value: kw.arg.clone(), }); self.compile_expression(default)?; @@ -1086,7 +1086,7 @@ impl Compiler { // the last instruction is a ReturnValue already, we don't need to emit it } _ => { - self.emit_constant(bytecode::ConstantData::None); + self.emit_constant(ConstantData::None); self.emit(Instruction::ReturnValue); } } @@ -1101,7 +1101,7 @@ impl Compiler { // Return annotation: if let Some(annotation) = returns { // key: - self.emit_constant(bytecode::ConstantData::Str { + self.emit_constant(ConstantData::Str { value: "return".to_owned(), }); // value: @@ -1111,7 +1111,7 @@ impl Compiler { let mut visit_arg_annotation = |arg: &ast::Parameter| -> CompileResult<()> { if let Some(annotation) = &arg.annotation { - self.emit_constant(bytecode::ConstantData::Str { + self.emit_constant(ConstantData::Str { value: arg.arg.to_owned(), }); self.compile_expression(&annotation)?; @@ -1147,10 +1147,10 @@ impl Compiler { self.build_closure(&code); - self.emit_constant(bytecode::ConstantData::Code { + self.emit_constant(ConstantData::Code { code: Box::new(code), }); - self.emit_constant(bytecode::ConstantData::Str { + self.emit_constant(ConstantData::Str { value: qualified_name, }); @@ -1294,7 +1294,7 @@ impl Compiler { self.emit(Instruction::LoadGlobal(dunder_name)); let dunder_module = self.name("__module__"); self.emit(Instruction::StoreLocal(dunder_module)); - self.emit_constant(bytecode::ConstantData::Str { + self.emit_constant(ConstantData::Str { value: qualified_name.clone(), }); let qualname = self.name("__qualname__"); @@ -1322,7 +1322,7 @@ impl Compiler { let classcell = self.name("__classcell__"); self.emit(Instruction::StoreLocal(classcell)); } else { - self.emit_constant(bytecode::ConstantData::None); + self.emit_constant(ConstantData::None); } self.emit(Instruction::ReturnValue); @@ -1334,17 +1334,17 @@ impl Compiler { self.build_closure(&code); - self.emit_constant(bytecode::ConstantData::Code { + self.emit_constant(ConstantData::Code { code: Box::new(code), }); - self.emit_constant(bytecode::ConstantData::Str { + self.emit_constant(ConstantData::Str { value: name.to_owned(), }); // Turn code object into function object: self.emit(Instruction::MakeFunction); - self.emit_constant(bytecode::ConstantData::Str { + self.emit_constant(ConstantData::Str { value: qualified_name, }); @@ -1356,7 +1356,7 @@ impl Compiler { let mut kwarg_names = vec![]; for keyword in keywords { if let Some(name) = &keyword.name { - kwarg_names.push(bytecode::ConstantData::Str { + kwarg_names.push(ConstantData::Str { value: name.to_owned(), }); } else { @@ -1366,7 +1366,7 @@ impl Compiler { self.compile_expression(&keyword.value)?; } - self.emit_constant(bytecode::ConstantData::Tuple { + self.emit_constant(ConstantData::Tuple { elements: kwarg_names, }); self.emit(Instruction::CallFunction { @@ -1390,8 +1390,8 @@ impl Compiler { // Doc string value: self.emit_constant(match doc_str { - Some(doc) => bytecode::ConstantData::Str { value: doc }, - None => bytecode::ConstantData::None, // set docstring None if not declared + Some(doc) => ConstantData::Str { value: doc }, + None => ConstantData::None, // set docstring None if not declared }); } @@ -1461,7 +1461,7 @@ impl Compiler { handler: check_asynciter_label, }); self.emit(Instruction::GetANext); - self.emit_constant(bytecode::ConstantData::None); + self.emit_constant(ConstantData::None); self.emit(Instruction::YieldFrom); self.compile_store(target)?; self.emit(Instruction::PopBlock); @@ -1610,7 +1610,7 @@ impl Compiler { // Store as dict entry in __annotations__ dict: let annotations = self.name("__annotations__"); self.emit(Instruction::LoadNameAny(annotations)); - self.emit_constant(bytecode::ConstantData::Str { + self.emit_constant(ConstantData::Str { value: name.to_owned(), }); self.emit(Instruction::StoreSubscript); @@ -1909,11 +1909,11 @@ impl Compiler { } Number { value } => { let const_value = match value { - ast::Number::Integer { value } => bytecode::ConstantData::Integer { + ast::Number::Integer { value } => ConstantData::Integer { value: value.clone(), }, - ast::Number::Float { value } => bytecode::ConstantData::Float { value: *value }, - ast::Number::Complex { real, imag } => bytecode::ConstantData::Complex { + ast::Number::Float { value } => ConstantData::Float { value: *value }, + ast::Number::Complex { real, imag } => ConstantData::Complex { value: Complex64::new(*real, *imag), }, }; @@ -1960,7 +1960,7 @@ impl Compiler { self.mark_generator(); match value { Some(expression) => self.compile_expression(expression)?, - Option::None => self.emit_constant(bytecode::ConstantData::None), + Option::None => self.emit_constant(ConstantData::None), }; self.emit(Instruction::YieldValue); } @@ -1970,7 +1970,7 @@ impl Compiler { } self.compile_expression(value)?; self.emit(Instruction::GetAwaitable); - self.emit_constant(bytecode::ConstantData::None); + self.emit_constant(ConstantData::None); self.emit(Instruction::YieldFrom); } YieldFrom { value } => { @@ -1986,26 +1986,26 @@ impl Compiler { self.mark_generator(); self.compile_expression(value)?; self.emit(Instruction::GetIter); - self.emit_constant(bytecode::ConstantData::None); + self.emit_constant(ConstantData::None); self.emit(Instruction::YieldFrom); } True => { - self.emit_constant(bytecode::ConstantData::Boolean { value: true }); + self.emit_constant(ConstantData::Boolean { value: true }); } False => { - self.emit_constant(bytecode::ConstantData::Boolean { value: false }); + self.emit_constant(ConstantData::Boolean { value: false }); } ast::ExpressionType::None => { - self.emit_constant(bytecode::ConstantData::None); + self.emit_constant(ConstantData::None); } Ellipsis => { - self.emit_constant(bytecode::ConstantData::Ellipsis); + self.emit_constant(ConstantData::Ellipsis); } ast::ExpressionType::String { value } => { self.compile_string(value)?; } Bytes { value } => { - self.emit_constant(bytecode::ConstantData::Bytes { + self.emit_constant(ConstantData::Bytes { value: value.clone(), }); } @@ -2026,10 +2026,10 @@ impl Compiler { self.emit(Instruction::ReturnValue); let code = self.pop_code_object(); self.build_closure(&code); - self.emit_constant(bytecode::ConstantData::Code { + self.emit_constant(ConstantData::Code { code: Box::new(code), }); - self.emit_constant(bytecode::ConstantData::Str { value: name }); + self.emit_constant(ConstantData::Str { value: name }); // Turn code object into function object: self.emit(Instruction::MakeFunction); @@ -2076,7 +2076,7 @@ impl Compiler { let mut subsize = 0; for keyword in subkeywords { if let Some(name) = &keyword.name { - self.emit_constant(bytecode::ConstantData::Str { + self.emit_constant(ConstantData::Str { value: name.to_owned(), }); self.compile_expression(&keyword.value)?; @@ -2138,7 +2138,7 @@ impl Compiler { let mut kwarg_names = vec![]; for keyword in keywords { if let Some(name) = &keyword.name { - kwarg_names.push(bytecode::ConstantData::Str { + kwarg_names.push(ConstantData::Str { value: name.to_owned(), }); } else { @@ -2148,7 +2148,7 @@ impl Compiler { self.compile_expression(&keyword.value)?; } - self.emit_constant(bytecode::ConstantData::Tuple { + self.emit_constant(ConstantData::Tuple { elements: kwarg_names, }); self.emit(Instruction::CallFunction { @@ -2349,12 +2349,12 @@ impl Compiler { self.build_closure(&code); // List comprehension code: - self.emit_constant(bytecode::ConstantData::Code { + self.emit_constant(ConstantData::Code { code: Box::new(code), }); // List comprehension function name: - self.emit_constant(bytecode::ConstantData::Str { value: name }); + self.emit_constant(ConstantData::Str { value: name }); // Turn code object into function object: self.emit(Instruction::MakeFunction); @@ -2374,7 +2374,7 @@ impl Compiler { fn compile_string(&mut self, string: &ast::StringGroup) -> CompileResult<()> { if let Some(value) = try_get_constant_string(string) { - self.emit_constant(bytecode::ConstantData::Str { value }); + self.emit_constant(ConstantData::Str { value }); } else { match string { ast::StringGroup::Joined { values } => { @@ -2384,7 +2384,7 @@ impl Compiler { self.emit(Instruction::BuildString { size: values.len() }) } ast::StringGroup::Constant { value } => { - self.emit_constant(bytecode::ConstantData::Str { + self.emit_constant(ConstantData::Str { value: value.to_owned(), }); } @@ -2395,7 +2395,7 @@ impl Compiler { } => { match spec { Some(spec) => self.compile_string(spec)?, - None => self.emit_constant(bytecode::ConstantData::Str { + None => self.emit_constant(ConstantData::Str { value: String::new(), }), }; @@ -2440,7 +2440,7 @@ impl Compiler { info.locations.push(location); } - fn emit_constant(&mut self, constant: bytecode::ConstantData) { + fn emit_constant(&mut self, constant: ConstantData) { let info = self.current_codeinfo(); let idx = info.constants.len(); info.constants.push(constant); From 3280a1655add8cf0223d80d77cb5cae7868deafc Mon Sep 17 00:00:00 2001 From: Noah <33094578+coolreader18@users.noreply.github.com> Date: Thu, 10 Dec 2020 11:22:21 -0600 Subject: [PATCH 02/10] Optimize the size of Instruction --- Cargo.lock | 1 + bytecode/src/bytecode.rs | 91 +++++---------- compiler/Cargo.toml | 1 + compiler/src/compile.rs | 163 ++++++++++++-------------- compiler/src/error.rs | 4 + src/lib.rs | 4 +- vm/src/builtins/module.rs | 2 +- vm/src/builtins/object.rs | 4 +- vm/src/builtins/pystr.rs | 2 + vm/src/builtins/tuple.rs | 15 +++ vm/src/frame.rs | 237 +++++++++++++++++++------------------- vm/src/import.rs | 4 +- vm/src/pyobject.rs | 1 + vm/src/pyobjectrc.rs | 2 +- vm/src/vm.rs | 42 +++++-- 15 files changed, 286 insertions(+), 287 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a2379c202..23237636a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1814,6 +1814,7 @@ dependencies = [ "itertools", "log", "num-complex", + "num-traits", "rustpython-ast", "rustpython-bytecode", "rustpython-parser", diff --git a/bytecode/src/bytecode.rs b/bytecode/src/bytecode.rs index 778455581..78d07df38 100644 --- a/bytecode/src/bytecode.rs +++ b/bytecode/src/bytecode.rs @@ -169,11 +169,10 @@ pub type NameIdx = usize; /// A Single bytecode instruction. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum Instruction { - Import { - name_idx: Option, - symbols_idx: Vec, - level: usize, + ImportName { + idx: NameIdx, }, + ImportNameless, ImportStar, ImportFrom { idx: NameIdx, @@ -210,7 +209,9 @@ pub enum Instruction { }, BinaryOperation { op: BinaryOperator, - inplace: bool, + }, + BinaryOperationInplace { + op: BinaryOperator, }, LoadAttr { idx: NameIdx, @@ -248,8 +249,14 @@ pub enum Instruction { target: Label, }, MakeFunction, - CallFunction { - typ: CallType, + CallFunctionPositional { + nargs: usize, + }, + CallFunctionKeyword { + nargs: usize, + }, + CallFunctionEx { + has_kwargs: bool, }, ForIter { target: Label, @@ -259,7 +266,6 @@ pub enum Instruction { YieldFrom, SetupAnnotation, SetupLoop { - start: Label, end: Label, }, @@ -333,8 +339,8 @@ pub enum Instruction { size: usize, }, UnpackEx { - before: usize, - after: usize, + before: u8, + after: u8, }, FormatValue { conversion: Option, @@ -361,13 +367,6 @@ pub enum Instruction { use self::Instruction::*; -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub enum CallType { - Positional(usize), - Keyword(usize), - Ex(bool), -} - #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum ConstantData { Integer { value: BigInt }, @@ -452,7 +451,7 @@ impl BorrowedConstant<'_, C> { } } -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize)] pub enum ComparisonOperator { Greater, GreaterOrEqual, @@ -467,7 +466,7 @@ pub enum ComparisonOperator { ExceptionMatch, } -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize)] pub enum BinaryOperator { Power, Multiply, @@ -484,7 +483,7 @@ pub enum BinaryOperator { Or, } -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize)] pub enum UnaryOperator { Not, Invert, @@ -598,29 +597,12 @@ impl CodeObject { | SetupFinally { handler: l } | SetupExcept { handler: l } | SetupWith { end: l } - | SetupAsyncWith { end: l } => { + | SetupAsyncWith { end: l } + | SetupLoop { end: l } => { label_targets.insert(*l); } - SetupLoop { start, end } => { - label_targets.insert(*start); - label_targets.insert(*end); - } - #[rustfmt::skip] - Import { .. } | ImportStar | ImportFrom { .. } | LoadFast(_) | LoadNameAny(_) - | LoadGlobal(_) | LoadDeref(_) | LoadClassDeref(_) | StoreFast(_) | StoreLocal(_) - | StoreGlobal(_) | StoreDeref(_) | DeleteFast(_) | DeleteLocal(_) | DeleteGlobal(_) - | DeleteDeref(_) | LoadClosure(_) | Subscript | StoreSubscript | DeleteSubscript - | StoreAttr { .. } | DeleteAttr { .. } | LoadConst { .. } | UnaryOperation { .. } - | BinaryOperation { .. } | LoadAttr { .. } | CompareOperation { .. } | Pop - | Rotate { .. } | Duplicate | GetIter | Continue | Break | MakeFunction - | CallFunction { .. } | ReturnValue | YieldValue | YieldFrom | SetupAnnotation - | EnterFinally | EndFinally | WithCleanupStart | WithCleanupFinish | PopBlock - | Raise { .. } | BuildString { .. } | BuildTuple { .. } | BuildList { .. } - | BuildSet { .. } | BuildMap { .. } | BuildSlice { .. } | ListAppend { .. } - | SetAdd { .. } | MapAdd { .. } | PrintExpr | LoadBuildClass | UnpackSequence { .. } - | UnpackEx { .. } | FormatValue { .. } | PopException | Reverse { .. } - | GetAwaitable | BeforeAsyncWith | GetAIter | GetANext | MapAddRev { .. } => {} + _ => {} } } label_targets @@ -807,22 +789,8 @@ impl Instruction { }; match self { - Import { - name_idx, - symbols_idx, - level, - } => w!( - Import, - format!("{:?}", name_idx.map(|idx| names[idx].as_ref())), - format!( - "({:?})", - symbols_idx - .iter() - .map(|&idx| names[idx].as_ref()) - .format(", ") - ), - level - ), + ImportName { idx } => w!(ImportName, names[*idx].as_ref()), + ImportNameless => w!(ImportNameless), ImportStar => w!(ImportStar), ImportFrom { idx } => w!(ImportFrom, names[*idx].as_ref()), LoadFast(idx) => w!(LoadFast, *idx, varnames[*idx].as_ref()), @@ -860,7 +828,10 @@ impl Instruction { } } UnaryOperation { op } => w!(UnaryOperation, format!("{:?}", op)), - BinaryOperation { op, inplace } => w!(BinaryOperation, format!("{:?}", op), inplace), + BinaryOperation { op } => w!(BinaryOperation, format!("{:?}", op)), + BinaryOperationInplace { op } => { + w!(BinaryOperationInplace, format!("{:?}", op)) + } LoadAttr { idx } => w!(LoadAttr, names[*idx].as_ref()), CompareOperation { op } => w!(CompareOperation, format!("{:?}", op)), Pop => w!(Pop), @@ -875,13 +846,15 @@ impl Instruction { JumpIfTrueOrPop { target } => w!(JumpIfTrueOrPop, target), JumpIfFalseOrPop { target } => w!(JumpIfFalseOrPop, target), MakeFunction => w!(MakeFunction), - CallFunction { typ } => w!(CallFunction, format!("{:?}", typ)), + CallFunctionPositional { nargs } => w!(CallFunctionPositional, nargs), + CallFunctionKeyword { nargs } => w!(CallFunctionKeyword, nargs), + CallFunctionEx { has_kwargs } => w!(CallFunctionEx, has_kwargs), ForIter { target } => w!(ForIter, target), ReturnValue => w!(ReturnValue), YieldValue => w!(YieldValue), YieldFrom => w!(YieldFrom), SetupAnnotation => w!(SetupAnnotation), - SetupLoop { start, end } => w!(SetupLoop, start, end), + SetupLoop { end } => w!(SetupLoop, end), SetupExcept { handler } => w!(SetupExcept, handler), SetupFinally { handler } => w!(SetupFinally, handler), EnterFinally => w!(EnterFinally), diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index dc44869aa..ba37125e8 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -13,6 +13,7 @@ itertools = "0.9" rustpython-bytecode = { path = "../bytecode", version = "0.1.1" } rustpython-ast = { path = "../ast" } num-complex = { version = "0.3", features = ["serde"] } +num-traits = "0.2" log = "0.4" arrayvec = "0.5" diff --git a/compiler/src/compile.rs b/compiler/src/compile.rs index 7204ff3fc..e953ec83a 100644 --- a/compiler/src/compile.rs +++ b/compiler/src/compile.rs @@ -11,8 +11,9 @@ use crate::symboltable::{make_symbol_table, statements_to_symbol_table, SymbolSc use indexmap::IndexSet; use itertools::Itertools; use num_complex::Complex64; +use num_traits::ToPrimitive; use rustpython_ast as ast; -use rustpython_bytecode::bytecode::{self, CallType, CodeObject, ConstantData, Instruction, Label}; +use rustpython_bytecode::bytecode::{self, CodeObject, ConstantData, Instruction, Label}; type CompileResult = Result; @@ -86,29 +87,12 @@ impl CodeInfo { | SetupFinally { handler: l } | SetupExcept { handler: l } | SetupWith { end: l } - | SetupAsyncWith { end: l } => { + | SetupAsyncWith { end: l } + | SetupLoop { end: l } => { *l = label_map[l.0].expect("label never set"); } - SetupLoop { start, end } => { - *start = label_map[start.0].expect("label never set"); - *end = label_map[end.0].expect("label never set"); - } - #[rustfmt::skip] - Import { .. } | ImportStar | ImportFrom { .. } | LoadFast(_) | LoadNameAny(_) - | LoadGlobal(_) | LoadDeref(_) | LoadClassDeref(_) | StoreFast(_) | StoreLocal(_) - | StoreGlobal(_) | StoreDeref(_) | DeleteFast(_) | DeleteLocal(_) | DeleteGlobal(_) - | DeleteDeref(_) | LoadClosure(_) | Subscript | StoreSubscript | DeleteSubscript - | StoreAttr { .. } | DeleteAttr { .. } | LoadConst { .. } | UnaryOperation { .. } - | BinaryOperation { .. } | LoadAttr { .. } | CompareOperation { .. } | Pop - | Rotate { .. } | Duplicate | GetIter | Continue | Break | MakeFunction - | CallFunction { .. } | ReturnValue | YieldValue | YieldFrom | SetupAnnotation - | EnterFinally | EndFinally | WithCleanupStart | WithCleanupFinish | PopBlock - | Raise { .. } | BuildString { .. } | BuildTuple { .. } | BuildList { .. } - | BuildSet { .. } | BuildMap { .. } | BuildSlice { .. } | ListAppend { .. } - | SetAdd { .. } | MapAdd { .. } | PrintExpr | LoadBuildClass | UnpackSequence { .. } - | UnpackEx { .. } | FormatValue { .. } | PopException | Reverse { .. } - | GetAwaitable | BeforeAsyncWith | GetAIter | GetANext | MapAddRev { .. } => {} + _ => {} } } code @@ -519,12 +503,12 @@ impl Compiler { Import { names } => { // import a, b, c as d for name in names { - let name_idx = Some(self.name(&name.symbol)); - self.emit(Instruction::Import { - name_idx, - symbols_idx: vec![], - level: 0, + self.emit_constant(ConstantData::Integer { + value: num_traits::Zero::zero(), }); + self.emit_constant(ConstantData::None); + let idx = self.name(&name.symbol); + self.emit(Instruction::ImportName { idx }); if let Some(alias) = &name.alias { for part in name.symbol.split('.').skip(1) { let idx = self.name(part); @@ -543,32 +527,43 @@ impl Compiler { } => { let import_star = names.iter().any(|n| n.symbol == "*"); - let module_idx = module.as_ref().map(|s| self.name(s)); - - if import_star { + let from_list = if import_star { if self.ctx.in_func() { return Err(self .error_loc(CompileErrorType::FunctionImportStar, statement.location)); } - let star = self.name("*"); + vec![ConstantData::Str { + value: "*".to_owned(), + }] + } else { + names + .iter() + .map(|n| ConstantData::Str { + value: n.symbol.to_owned(), + }) + .collect() + }; + + let module_idx = module.as_ref().map(|s| self.name(s)); + + // from .... import (*fromlist) + self.emit_constant(ConstantData::Integer { + value: (*level).into(), + }); + self.emit_constant(ConstantData::Tuple { + elements: from_list, + }); + if let Some(idx) = module_idx { + self.emit(Instruction::ImportName { idx }); + } else { + self.emit(Instruction::ImportNameless); + } + + if import_star { // from .... import * - self.emit(Instruction::Import { - name_idx: module_idx, - symbols_idx: vec![star], - level: *level, - }); self.emit(Instruction::ImportStar); } else { // from mod import a, b as c - // First, determine the fromlist (for import lib): - let from_list = names.iter().map(|n| self.name(&n.symbol)).collect(); - - // Load module once: - self.emit(Instruction::Import { - name_idx: module_idx, - symbols_idx: from_list, - level: *level, - }); for name in names { let idx = self.name(&name.symbol); @@ -731,14 +726,10 @@ impl Compiler { match msg { Some(e) => { self.compile_expression(e)?; - self.emit(Instruction::CallFunction { - typ: CallType::Positional(1), - }); + self.emit(Instruction::CallFunctionPositional { nargs: 1 }); } None => { - self.emit(Instruction::CallFunction { - typ: CallType::Positional(0), - }); + self.emit(Instruction::CallFunctionPositional { nargs: 0 }); } } self.emit(Instruction::Raise { argc: 1 }); @@ -927,9 +918,7 @@ impl Compiler { fn apply_decorators(&mut self, decorator_list: &[ast::Expression]) { // Apply decorators: for _ in decorator_list { - self.emit(Instruction::CallFunction { - typ: CallType::Positional(1), - }); + self.emit(Instruction::CallFunctionPositional { nargs: 1 }); } } @@ -1369,12 +1358,12 @@ impl Compiler { self.emit_constant(ConstantData::Tuple { elements: kwarg_names, }); - self.emit(Instruction::CallFunction { - typ: CallType::Keyword(2 + keywords.len() + bases.len()), + self.emit(Instruction::CallFunctionKeyword { + nargs: 2 + keywords.len() + bases.len(), }); } else { - self.emit(Instruction::CallFunction { - typ: CallType::Positional(2 + bases.len()), + self.emit(Instruction::CallFunctionPositional { + nargs: 2 + bases.len(), }); } @@ -1404,11 +1393,8 @@ impl Compiler { let start_label = self.new_label(); let else_label = self.new_label(); let end_label = self.new_label(); - self.emit(Instruction::SetupLoop { - start: start_label, - end: end_label, - }); + self.emit(Instruction::SetupLoop { end: end_label }); self.set_label(start_label); self.compile_jump_if(test, false, else_label)?; @@ -1442,11 +1428,6 @@ impl Compiler { let else_label = self.new_label(); let end_label = self.new_label(); - self.emit(Instruction::SetupLoop { - start: start_label, - end: end_label, - }); - // The thing iterated: self.compile_expression(iter)?; @@ -1456,6 +1437,7 @@ impl Compiler { self.emit(Instruction::GetAIter); + self.emit(Instruction::SetupLoop { end: end_label }); self.set_label(start_label); self.emit(Instruction::SetupExcept { handler: check_asynciter_label, @@ -1486,6 +1468,7 @@ impl Compiler { // Retrieve Iterator self.emit(Instruction::GetIter); + self.emit(Instruction::SetupLoop { end: end_label }); self.set_label(start_label); self.emit(Instruction::ForIter { target: else_label }); @@ -1647,10 +1630,16 @@ impl Compiler { return Err(self.error(CompileErrorType::MultipleStarArgs)); } else { seen_star = true; - self.emit(Instruction::UnpackEx { - before: i, - after: elements.len() - i - 1, - }); + let before = i; + let after = elements.len() - i - 1; + let (before, after) = (|| Some((before.to_u8()?, after.to_u8()?)))() + .ok_or_else(|| { + self.error_loc( + CompileErrorType::TooManyStarUnpack, + target.location, + ) + })?; + self.emit(Instruction::UnpackEx { before, after }); } } } @@ -1683,7 +1672,7 @@ impl Compiler { } fn compile_op(&mut self, op: &ast::Operator, inplace: bool) { - let i = match op { + let op = match op { ast::Operator::Add => bytecode::BinaryOperator::Add, ast::Operator::Sub => bytecode::BinaryOperator::Subtract, ast::Operator::Mult => bytecode::BinaryOperator::Multiply, @@ -1698,7 +1687,12 @@ impl Compiler { ast::Operator::BitXor => bytecode::BinaryOperator::Xor, ast::Operator::BitAnd => bytecode::BinaryOperator::And, }; - self.emit(Instruction::BinaryOperation { op: i, inplace }); + let ins = if inplace { + Instruction::BinaryOperationInplace { op } + } else { + Instruction::BinaryOperation { op } + }; + self.emit(ins); } /// Implement boolean short circuit evaluation logic. @@ -2124,13 +2118,9 @@ impl Compiler { // Create an optional map with kw-args: if !keywords.is_empty() { self.compile_keywords(keywords)?; - self.emit(Instruction::CallFunction { - typ: CallType::Ex(true), - }); + self.emit(Instruction::CallFunctionEx { has_kwargs: true }); } else { - self.emit(Instruction::CallFunction { - typ: CallType::Ex(false), - }); + self.emit(Instruction::CallFunctionEx { has_kwargs: false }); } } else { // Keyword arguments: @@ -2151,13 +2141,9 @@ impl Compiler { self.emit_constant(ConstantData::Tuple { elements: kwarg_names, }); - self.emit(Instruction::CallFunction { - typ: CallType::Keyword(count), - }); + self.emit(Instruction::CallFunctionKeyword { nargs: count }); } else { - self.emit(Instruction::CallFunction { - typ: CallType::Positional(count), - }); + self.emit(Instruction::CallFunctionPositional { nargs: count }); } } Ok(()) @@ -2270,10 +2256,7 @@ impl Compiler { let start_label = self.new_label(); let end_label = self.new_label(); loop_labels.push((start_label, end_label)); - self.emit(Instruction::SetupLoop { - start: start_label, - end: end_label, - }); + self.emit(Instruction::SetupLoop { end: end_label }); self.set_label(start_label); self.emit(Instruction::ForIter { target: end_label }); @@ -2366,9 +2349,7 @@ impl Compiler { self.emit(Instruction::GetIter); // Call just created function: - self.emit(Instruction::CallFunction { - typ: CallType::Positional(1), - }); + self.emit(Instruction::CallFunctionPositional { nargs: 1 }); Ok(()) } diff --git a/compiler/src/error.rs b/compiler/src/error.rs index b5b1d866c..f1fa58e5c 100644 --- a/compiler/src/error.rs +++ b/compiler/src/error.rs @@ -37,6 +37,7 @@ pub enum CompileErrorType { InvalidFuturePlacement, InvalidFutureFeature(String), FunctionImportStar, + TooManyStarUnpack, } impl fmt::Display for CompileErrorType { @@ -70,6 +71,9 @@ impl fmt::Display for CompileErrorType { CompileErrorType::FunctionImportStar => { write!(f, "import * only allowed at module level") } + CompileErrorType::TooManyStarUnpack => { + write!(f, "too many expressions in star-unpacking assignment") + } } } } diff --git a/src/lib.rs b/src/lib.rs index dbfad98b3..492043ee5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -510,7 +510,7 @@ fn run_rustpython(vm: &VirtualMachine, matches: &ArgMatches) -> PyResult<()> { vm.get_attribute(vm.sys_module.clone(), "modules")? .set_item("__main__", main_module, vm)?; - let site_result = vm.import("site", &[], 0); + let site_result = vm.import("site", None, 0); if site_result.is_err() { warn!( @@ -559,7 +559,7 @@ fn run_command(vm: &VirtualMachine, scope: Scope, source: String) -> PyResult<() fn run_module(vm: &VirtualMachine, module: &str) -> PyResult<()> { debug!("Running module {}", module); - let runpy = vm.import("runpy", &[], 0)?; + let runpy = vm.import("runpy", None, 0)?; let run_module_as_main = vm.get_attribute(runpy, "_run_module_as_main")?; vm.invoke(&run_module_as_main, (module,))?; Ok(()) diff --git a/vm/src/builtins/module.rs b/vm/src/builtins/module.rs index 37d0bb638..76ae4e3a3 100644 --- a/vm/src/builtins/module.rs +++ b/vm/src/builtins/module.rs @@ -82,7 +82,7 @@ impl PyModule { #[pymethod(magic)] fn repr(zelf: PyRef, vm: &VirtualMachine) -> PyResult { - let importlib = vm.import("_frozen_importlib", &[], 0)?; + let importlib = vm.import("_frozen_importlib", None, 0)?; let module_repr = vm.get_attribute(importlib, "_module_repr")?; vm.invoke(&module_repr, (zelf,)) } diff --git a/vm/src/builtins/object.rs b/vm/src/builtins/object.rs index decb916dd..f5c886b88 100644 --- a/vm/src/builtins/object.rs +++ b/vm/src/builtins/object.rs @@ -314,11 +314,11 @@ pub fn init(context: &PyContext) { fn common_reduce(obj: PyObjectRef, proto: usize, vm: &VirtualMachine) -> PyResult { if proto >= 2 { - let reducelib = vm.import("__reducelib", &[], 0)?; + let reducelib = vm.import("__reducelib", None, 0)?; let reduce_2 = vm.get_attribute(reducelib, "reduce_2")?; vm.invoke(&reduce_2, (obj,)) } else { - let copyreg = vm.import("copyreg", &[], 0)?; + let copyreg = vm.import("copyreg", None, 0)?; let reduce_ex = vm.get_attribute(copyreg, "_reduce_ex")?; vm.invoke(&reduce_ex, (obj, proto)) } diff --git a/vm/src/builtins/pystr.rs b/vm/src/builtins/pystr.rs index 22787ac23..a57a45975 100644 --- a/vm/src/builtins/pystr.rs +++ b/vm/src/builtins/pystr.rs @@ -95,12 +95,14 @@ impl fmt::Display for PyStr { } impl TryIntoRef for String { + #[inline] fn try_into_ref(self, vm: &VirtualMachine) -> PyResult> { Ok(PyStr::from(self).into_ref(vm)) } } impl TryIntoRef for &str { + #[inline] fn try_into_ref(self, vm: &VirtualMachine) -> PyResult> { Ok(PyStr::from(self).into_ref(vm)) } diff --git a/vm/src/builtins/tuple.rs b/vm/src/builtins/tuple.rs index 74991c69d..c4614881f 100644 --- a/vm/src/builtins/tuple.rs +++ b/vm/src/builtins/tuple.rs @@ -317,6 +317,7 @@ impl TryFromObject for PyTupleTyped { impl<'a, T: TransmuteFromObject + 'a> BorrowValue<'a> for PyTupleTyped { type Borrowed = &'a [T]; + #[inline] fn borrow_value(&'a self) -> Self::Borrowed { unsafe { &*(self.tuple.borrow_value() as *const [PyObjectRef] as *const [T]) } } @@ -327,3 +328,17 @@ impl fmt::Debug for PyTupleTyped { self.borrow_value().fmt(f) } } + +impl From> for PyTupleRef { + #[inline] + fn from(tup: PyTupleTyped) -> Self { + tup.tuple + } +} + +impl IntoPyObject for PyTupleTyped { + #[inline] + fn into_pyobject(self, _vm: &VirtualMachine) -> PyObjectRef { + self.tuple.into_object() + } +} diff --git a/vm/src/frame.rs b/vm/src/frame.rs index bb749f875..ced95d808 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -327,9 +327,8 @@ impl ExecutingFrame<'_> { // Execute until return or exception: loop { let idx = self.lasti.fetch_add(1, Ordering::Relaxed); - let loc = self.code.locations[idx]; let instr = &self.code.instructions[idx]; - let result = self.execute_instruction(instr, vm); + let result = self.execute_instruction(instr, idx, vm); match result { Ok(None) => continue, Ok(Some(value)) => { @@ -341,6 +340,8 @@ impl ExecutingFrame<'_> { // 2. Add new entry with current execution position (filename, lineno, code_object) to traceback. // 3. Unwind block stack till appropriate handler is found. + let loc = self.code.locations[idx]; + let next = exception.traceback(); let new_traceback = @@ -421,6 +422,7 @@ impl ExecutingFrame<'_> { fn execute_instruction( &mut self, instruction: &bytecode::Instruction, + current_idx: usize, vm: &VirtualMachine, ) -> FrameResult { vm.check_signals()?; @@ -445,11 +447,10 @@ impl ExecutingFrame<'_> { self.push_value(self.code.constants[*idx].0.clone()); Ok(None) } - bytecode::Instruction::Import { - name_idx, - symbols_idx, - level, - } => self.import(vm, *name_idx, symbols_idx, *level), + bytecode::Instruction::ImportName { idx } => { + self.import(vm, Some(self.code.names[*idx].clone())) + } + bytecode::Instruction::ImportNameless => self.import(vm, None), bytecode::Instruction::ImportStar => self.import_star(vm), bytecode::Instruction::ImportFrom { idx } => self.import_from(vm, *idx), bytecode::Instruction::LoadFast(idx) => { @@ -639,8 +640,9 @@ impl ExecutingFrame<'_> { PyDictRef::try_from_object(vm, dict_obj)?.set_item(key, value, vm)?; Ok(None) } - bytecode::Instruction::BinaryOperation { ref op, inplace } => { - self.execute_binop(vm, op, *inplace) + bytecode::Instruction::BinaryOperation { op } => self.execute_binop(vm, *op), + bytecode::Instruction::BinaryOperationInplace { op } => { + self.execute_binop_inplace(vm, *op) } bytecode::Instruction::LoadAttr { idx } => self.load_attr(vm, *idx), bytecode::Instruction::StoreAttr { idx } => self.store_attr(vm, *idx), @@ -668,9 +670,9 @@ impl ExecutingFrame<'_> { } Ok(None) } - bytecode::Instruction::SetupLoop { start, end } => { + bytecode::Instruction::SetupLoop { end } => { self.push_block(BlockType::Loop { - start: *start, + start: bytecode::Label(current_idx + 1), end: *end, }); Ok(None) @@ -813,7 +815,15 @@ impl ExecutingFrame<'_> { } bytecode::Instruction::ForIter { target } => self.execute_for_iter(vm, *target), bytecode::Instruction::MakeFunction => self.execute_make_function(vm), - bytecode::Instruction::CallFunction { typ } => self.execute_call_function(vm, typ), + bytecode::Instruction::CallFunctionPositional { nargs } => { + self.execute_call_function_positional(vm, *nargs) + } + bytecode::Instruction::CallFunctionKeyword { nargs } => { + self.execute_call_function_keyword(vm, *nargs) + } + bytecode::Instruction::CallFunctionEx { has_kwargs } => { + self.execute_call_function_ex(vm, *has_kwargs) + } bytecode::Instruction::Jump { target } => { self.jump(*target); Ok(None) @@ -971,22 +981,12 @@ impl ExecutingFrame<'_> { } #[cfg_attr(feature = "flame-it", flame("Frame"))] - fn import( - &mut self, - vm: &VirtualMachine, - module: Option, - symbols: &[bytecode::NameIdx], - level: usize, - ) -> FrameResult { - let module = match module { - Some(idx) => self.code.names[idx].borrow_value(), - None => "", - }; - let from_list = symbols - .iter() - .map(|&idx| self.code.names[idx].clone()) - .collect::>(); - let module = vm.import(&module, &from_list, level)?; + fn import(&mut self, vm: &VirtualMachine, module: Option) -> FrameResult { + let module = module.unwrap_or_else(|| PyStr::from("").into_ref(vm)); + let from_list = >>::try_from_object(vm, self.pop_value())?; + let level = usize::try_from_object(vm, self.pop_value())?; + + let module = vm.import(module, from_list, level)?; self.push_value(module); Ok(None) @@ -1206,59 +1206,61 @@ impl ExecutingFrame<'_> { Ok(None) } - fn execute_call_function( + fn execute_call_function_positional( &mut self, vm: &VirtualMachine, - typ: &bytecode::CallType, + nargs: usize, ) -> FrameResult { - let args = match typ { - bytecode::CallType::Positional(count) => { - let args: Vec = self.pop_multiple(*count); - FuncArgs { - args, - kwargs: IndexMap::new(), - } - } - bytecode::CallType::Keyword(count) => { - let kwarg_names = self.pop_value(); - let args: Vec = self.pop_multiple(*count); - - let kwarg_names = vm - .extract_elements(&kwarg_names)? - .iter() - .map(|pyobj| pystr::clone_value(pyobj)) - .collect(); - FuncArgs::with_kwargs_names(args, kwarg_names) - } - bytecode::CallType::Ex(has_kwargs) => { - let kwargs = if *has_kwargs { - let kw_dict: PyDictRef = self.pop_value().downcast().map_err(|_| { - // TODO: check collections.abc.Mapping - vm.new_type_error("Kwargs must be a dict.".to_owned()) - })?; - let mut kwargs = IndexMap::new(); - for (key, value) in kw_dict.into_iter() { - let key = key.payload_if_subclass::(vm).ok_or_else(|| { - vm.new_type_error("keywords must be strings".to_owned()) - })?; - kwargs.insert(key.borrow_value().to_owned(), value); - } - kwargs - } else { - IndexMap::new() - }; - let args = self.pop_value(); - let args = vm.extract_elements(&args)?; - FuncArgs { args, kwargs } - } + let args: Vec = self.pop_multiple(nargs); + let args = FuncArgs { + args, + kwargs: IndexMap::new(), }; - // Call function: - // eprintln!( - // "calling from {} {:?}", - // self.code.obj_name, - // self.code.locations[self.lasti.load(Ordering::Relaxed)] - // ); + let func_ref = self.pop_value(); + let value = vm.invoke(&func_ref, args)?; + self.push_value(value); + Ok(None) + } + + fn execute_call_function_keyword(&mut self, vm: &VirtualMachine, nargs: usize) -> FrameResult { + let kwarg_names = self.pop_value(); + let args: Vec = self.pop_multiple(nargs); + + let kwarg_names = vm + .extract_elements(&kwarg_names)? + .iter() + .map(|pyobj| pystr::clone_value(pyobj)) + .collect(); + let args = FuncArgs::with_kwargs_names(args, kwarg_names); + + let func_ref = self.pop_value(); + let value = vm.invoke(&func_ref, args)?; + self.push_value(value); + Ok(None) + } + + fn execute_call_function_ex(&mut self, vm: &VirtualMachine, has_kwargs: bool) -> FrameResult { + let kwargs = if has_kwargs { + let kw_dict: PyDictRef = self.pop_value().downcast().map_err(|_| { + // TODO: check collections.abc.Mapping + vm.new_type_error("Kwargs must be a dict.".to_owned()) + })?; + let mut kwargs = IndexMap::new(); + for (key, value) in kw_dict.into_iter() { + let key = key + .payload_if_subclass::(vm) + .ok_or_else(|| vm.new_type_error("keywords must be strings".to_owned()))?; + kwargs.insert(key.borrow_value().to_owned(), value); + } + kwargs + } else { + IndexMap::new() + }; + let args = self.pop_value(); + let args = vm.extract_elements(&args)?; + let args = FuncArgs { args, kwargs }; + let func_ref = self.pop_value(); let value = vm.invoke(&func_ref, args)?; self.push_value(value); @@ -1343,12 +1345,8 @@ impl ExecutingFrame<'_> { } } - fn execute_unpack_ex( - &mut self, - vm: &VirtualMachine, - before: usize, - after: usize, - ) -> FrameResult { + fn execute_unpack_ex(&mut self, vm: &VirtualMachine, before: u8, after: u8) -> FrameResult { + let (before, after) = (before as usize, after as usize); let value = self.pop_value(); let elements = vm.extract_elements::(&value)?; let min_expected = before + after; @@ -1486,47 +1484,50 @@ impl ExecutingFrame<'_> { } #[cfg_attr(feature = "flame-it", flame("Frame"))] - fn execute_binop( + fn execute_binop(&mut self, vm: &VirtualMachine, op: bytecode::BinaryOperator) -> FrameResult { + let b_ref = &self.pop_value(); + let a_ref = &self.pop_value(); + let value = match op { + bytecode::BinaryOperator::Subtract => vm._sub(a_ref, b_ref), + bytecode::BinaryOperator::Add => vm._add(a_ref, b_ref), + bytecode::BinaryOperator::Multiply => vm._mul(a_ref, b_ref), + bytecode::BinaryOperator::MatrixMultiply => vm._matmul(a_ref, b_ref), + bytecode::BinaryOperator::Power => vm._pow(a_ref, b_ref), + bytecode::BinaryOperator::Divide => vm._truediv(a_ref, b_ref), + bytecode::BinaryOperator::FloorDivide => vm._floordiv(a_ref, b_ref), + bytecode::BinaryOperator::Modulo => vm._mod(a_ref, b_ref), + bytecode::BinaryOperator::Lshift => vm._lshift(a_ref, b_ref), + bytecode::BinaryOperator::Rshift => vm._rshift(a_ref, b_ref), + bytecode::BinaryOperator::Xor => vm._xor(a_ref, b_ref), + bytecode::BinaryOperator::Or => vm._or(a_ref, b_ref), + bytecode::BinaryOperator::And => vm._and(a_ref, b_ref), + }?; + + self.push_value(value); + Ok(None) + } + fn execute_binop_inplace( &mut self, vm: &VirtualMachine, - op: &bytecode::BinaryOperator, - inplace: bool, + op: bytecode::BinaryOperator, ) -> FrameResult { let b_ref = &self.pop_value(); let a_ref = &self.pop_value(); - let value = if inplace { - match *op { - bytecode::BinaryOperator::Subtract => vm._isub(a_ref, b_ref), - bytecode::BinaryOperator::Add => vm._iadd(a_ref, b_ref), - bytecode::BinaryOperator::Multiply => vm._imul(a_ref, b_ref), - bytecode::BinaryOperator::MatrixMultiply => vm._imatmul(a_ref, b_ref), - bytecode::BinaryOperator::Power => vm._ipow(a_ref, b_ref), - bytecode::BinaryOperator::Divide => vm._itruediv(a_ref, b_ref), - bytecode::BinaryOperator::FloorDivide => vm._ifloordiv(a_ref, b_ref), - bytecode::BinaryOperator::Modulo => vm._imod(a_ref, b_ref), - bytecode::BinaryOperator::Lshift => vm._ilshift(a_ref, b_ref), - bytecode::BinaryOperator::Rshift => vm._irshift(a_ref, b_ref), - bytecode::BinaryOperator::Xor => vm._ixor(a_ref, b_ref), - bytecode::BinaryOperator::Or => vm._ior(a_ref, b_ref), - bytecode::BinaryOperator::And => vm._iand(a_ref, b_ref), - }? - } else { - match *op { - bytecode::BinaryOperator::Subtract => vm._sub(a_ref, b_ref), - bytecode::BinaryOperator::Add => vm._add(a_ref, b_ref), - bytecode::BinaryOperator::Multiply => vm._mul(a_ref, b_ref), - bytecode::BinaryOperator::MatrixMultiply => vm._matmul(a_ref, b_ref), - bytecode::BinaryOperator::Power => vm._pow(a_ref, b_ref), - bytecode::BinaryOperator::Divide => vm._truediv(a_ref, b_ref), - bytecode::BinaryOperator::FloorDivide => vm._floordiv(a_ref, b_ref), - bytecode::BinaryOperator::Modulo => vm._mod(a_ref, b_ref), - bytecode::BinaryOperator::Lshift => vm._lshift(a_ref, b_ref), - bytecode::BinaryOperator::Rshift => vm._rshift(a_ref, b_ref), - bytecode::BinaryOperator::Xor => vm._xor(a_ref, b_ref), - bytecode::BinaryOperator::Or => vm._or(a_ref, b_ref), - bytecode::BinaryOperator::And => vm._and(a_ref, b_ref), - }? - }; + let value = match op { + bytecode::BinaryOperator::Subtract => vm._isub(a_ref, b_ref), + bytecode::BinaryOperator::Add => vm._iadd(a_ref, b_ref), + bytecode::BinaryOperator::Multiply => vm._imul(a_ref, b_ref), + bytecode::BinaryOperator::MatrixMultiply => vm._imatmul(a_ref, b_ref), + bytecode::BinaryOperator::Power => vm._ipow(a_ref, b_ref), + bytecode::BinaryOperator::Divide => vm._itruediv(a_ref, b_ref), + bytecode::BinaryOperator::FloorDivide => vm._ifloordiv(a_ref, b_ref), + bytecode::BinaryOperator::Modulo => vm._imod(a_ref, b_ref), + bytecode::BinaryOperator::Lshift => vm._ilshift(a_ref, b_ref), + bytecode::BinaryOperator::Rshift => vm._irshift(a_ref, b_ref), + bytecode::BinaryOperator::Xor => vm._ixor(a_ref, b_ref), + bytecode::BinaryOperator::Or => vm._ior(a_ref, b_ref), + bytecode::BinaryOperator::And => vm._iand(a_ref, b_ref), + }?; self.push_value(value); Ok(None) diff --git a/vm/src/import.rs b/vm/src/import.rs index 550de9ca4..b4d238ce2 100644 --- a/vm/src/import.rs +++ b/vm/src/import.rs @@ -36,7 +36,7 @@ pub(crate) fn init_importlib( let install_external = vm.get_attribute(importlib, "_install_external_importers")?; vm.invoke(&install_external, ())?; // Set pyc magic number to commit hash. Should be changed when bytecode will be more stable. - let importlib_external = vm.import("_frozen_importlib_external", &[], 0)?; + let importlib_external = vm.import("_frozen_importlib_external", None, 0)?; let mut magic = get_git_revision().into_bytes(); magic.truncate(4); if magic.len() != 4 { @@ -44,7 +44,7 @@ pub(crate) fn init_importlib( } vm.set_attr(&importlib_external, "MAGIC_NUMBER", vm.ctx.new_bytes(magic))?; let zipimport_res = (|| -> PyResult<()> { - let zipimport = vm.import("zipimport", &[], 0)?; + let zipimport = vm.import("zipimport", None, 0)?; let zipimporter = vm.get_attribute(zipimport, "zipimporter")?; let path_hooks = vm.get_attribute(vm.sys_module.clone(), "path_hooks")?; let path_hooks = list::PyListRef::try_from_object(vm, path_hooks)?; diff --git a/vm/src/pyobject.rs b/vm/src/pyobject.rs index 15f1484ec..1d3b64c3c 100644 --- a/vm/src/pyobject.rs +++ b/vm/src/pyobject.rs @@ -763,6 +763,7 @@ pub trait TryIntoRef { } impl TryIntoRef for PyRef { + #[inline] fn try_into_ref(self, _vm: &VirtualMachine) -> PyResult> { Ok(self) } diff --git a/vm/src/pyobjectrc.rs b/vm/src/pyobjectrc.rs index 3467a5840..e4badd23e 100644 --- a/vm/src/pyobjectrc.rs +++ b/vm/src/pyobjectrc.rs @@ -317,7 +317,7 @@ impl Drop for PyObjectRef { Ok(v) => println!("{}", v.to_string()), Err(_) => println!("{}", del_method.class().name), } - let tb_module = vm.import("traceback", &[], 0).unwrap(); + let tb_module = vm.import("traceback", None, 0).unwrap(); // TODO: set exc traceback let print_stack = vm.get_attribute(tb_module, "print_stack").unwrap(); vm.invoke(&print_stack, ()).unwrap(); diff --git a/vm/src/vm.rs b/vm/src/vm.rs index b6e2f97db..b61728589 100644 --- a/vm/src/vm.rs +++ b/vm/src/vm.rs @@ -22,7 +22,7 @@ use crate::builtins::object; use crate::builtins::pybool; use crate::builtins::pystr::{PyStr, PyStrRef}; use crate::builtins::pytype::PyTypeRef; -use crate::builtins::tuple::PyTuple; +use crate::builtins::tuple::{PyTuple, PyTupleTyped}; use crate::common::{hash::HashSecret, lock::PyMutex, rc::PyRc}; #[cfg(feature = "rustpython-compiler")] use crate::compile::{self, CompileError, CompileErrorType, CompileOpts}; @@ -511,7 +511,7 @@ impl VirtualMachine { pub fn try_class(&self, module: &str, class: &str) -> PyResult { let class = self - .get_attribute(self.import(module, &[], 0)?, class)? + .get_attribute(self.import(module, None, 0)?, class)? .downcast() .expect("not a class"); Ok(class) @@ -519,7 +519,7 @@ impl VirtualMachine { pub fn class(&self, module: &str, class: &str) -> PyTypeRef { let module = self - .import(module, &[], 0) + .import(module, None, 0) .unwrap_or_else(|_| panic!("unable to import {}", module)); let class = self .get_attribute(module.clone(), class) @@ -844,16 +844,35 @@ impl VirtualMachine { }) } - pub fn import(&self, module: &str, from_list: &[PyStrRef], level: usize) -> PyResult { + #[inline] + pub fn import( + &self, + module: impl TryIntoRef, + from_list: Option>, + level: usize, + ) -> PyResult { + self._import_inner(module.try_into_ref(self)?, from_list, level) + } + + fn _import_inner( + &self, + module: PyStrRef, + from_list: Option>, + level: usize, + ) -> PyResult { // if the import inputs seem weird, e.g a package import or something, rather than just // a straight `import ident` - let weird = module.contains('.') || level != 0 || !from_list.is_empty(); + let weird = module.borrow_value().contains('.') + || level != 0 + || from_list + .as_ref() + .map_or(false, |x| !x.borrow_value().is_empty()); let cached_module = if weird { None } else { let sys_modules = self.get_attribute(self.sys_module.clone(), "modules")?; - sys_modules.get_item(module, self).ok() + sys_modules.get_item(module.clone(), self).ok() }; match cached_module { @@ -871,7 +890,7 @@ impl VirtualMachine { let import_func = self .get_attribute(self.builtins.clone(), "__import__") .map_err(|_| { - self.new_import_error("__import__ not found".to_owned(), module) + self.new_import_error("__import__ not found".to_owned(), module.clone()) })?; let (locals, globals) = if let Some(frame) = self.current_frame() { @@ -879,9 +898,10 @@ impl VirtualMachine { } else { (None, None) }; - let from_list = self - .ctx - .new_tuple(from_list.iter().map(|x| x.as_object().clone()).collect()); + let from_list = match from_list { + Some(tup) => tup.into_pyobject(self), + None => self.ctx.new_tuple(vec![]), + }; self.invoke(&import_func, (module, globals, locals, from_list, level)) .map_err(|exc| import::remove_importlib_frames(self, &exc)) } @@ -1287,7 +1307,7 @@ impl VirtualMachine { encoding: Option, errors: Option, ) -> PyResult { - let codecsmodule = self.import("_codecs", &[], 0)?; + let codecsmodule = self.import("_codecs", None, 0)?; let func = self.get_attribute(codecsmodule, func)?; let mut args = vec![obj, encoding.into_pyobject(self)]; if let Some(errors) = errors { From f93934f0c6ad249738e66249a2489de3c6bfd34e Mon Sep 17 00:00:00 2001 From: Noah <33094578+coolreader18@users.noreply.github.com> Date: Thu, 10 Dec 2020 13:36:45 -0600 Subject: [PATCH 03/10] Make Continue have the target it's continuing to --- bytecode/src/bytecode.rs | 9 ++++--- compiler/src/compile.rs | 52 ++++++++++++++++++++++------------------ vm/src/frame.rs | 23 ++++++++---------- 3 files changed, 45 insertions(+), 39 deletions(-) diff --git a/bytecode/src/bytecode.rs b/bytecode/src/bytecode.rs index 78d07df38..993e1a066 100644 --- a/bytecode/src/bytecode.rs +++ b/bytecode/src/bytecode.rs @@ -225,7 +225,9 @@ pub enum Instruction { }, Duplicate, GetIter, - Continue, + Continue { + target: Label, + }, Break, Jump { target: Label, @@ -598,7 +600,8 @@ impl CodeObject { | SetupExcept { handler: l } | SetupWith { end: l } | SetupAsyncWith { end: l } - | SetupLoop { end: l } => { + | SetupLoop { end: l } + | Continue { target: l } => { label_targets.insert(*l); } @@ -838,7 +841,7 @@ impl Instruction { Rotate { amount } => w!(Rotate, amount), Duplicate => w!(Duplicate), GetIter => w!(GetIter), - Continue => w!(Continue), + Continue { target } => w!(Continue, target), Break => w!(Break), Jump { target } => w!(Jump, target), JumpIfTrue { target } => w!(JumpIfTrue, target), diff --git a/compiler/src/compile.rs b/compiler/src/compile.rs index e953ec83a..dc0b05302 100644 --- a/compiler/src/compile.rs +++ b/compiler/src/compile.rs @@ -88,7 +88,8 @@ impl CodeInfo { | SetupExcept { handler: l } | SetupWith { end: l } | SetupAsyncWith { end: l } - | SetupLoop { end: l } => { + | SetupLoop { end: l } + | Continue { target: l } => { *l = label_map[l.0].expect("label never set"); } @@ -131,7 +132,7 @@ impl Default for CompileOpts { #[derive(Debug, Clone, Copy)] struct CompileContext { - in_loop: bool, + loop_data: Option<(Label, Label)>, in_class: bool, func: FunctionContext, } @@ -144,6 +145,9 @@ enum FunctionContext { } impl CompileContext { + fn in_loop(self) -> bool { + self.loop_data.is_some() + } fn in_func(self) -> bool { self.func != FunctionContext::NoFunction } @@ -236,7 +240,7 @@ impl Compiler { current_qualified_path: None, done_with_future_stmts: false, ctx: CompileContext { - in_loop: false, + loop_data: None, in_class: false, func: FunctionContext::NoFunction, }, @@ -737,19 +741,21 @@ impl Compiler { } } Break => { - if !self.ctx.in_loop { + if !self.ctx.in_loop() { return Err(self.error_loc(CompileErrorType::InvalidBreak, statement.location)); } self.emit(Instruction::Break); } - Continue => { - if !self.ctx.in_loop { + Continue => match self.ctx.loop_data { + Some((start, _)) => { + self.emit(Instruction::Continue { target: start }); + } + None => { return Err( self.error_loc(CompileErrorType::InvalidContinue, statement.location) ); } - self.emit(Instruction::Continue); - } + }, Return { value } => { if !self.ctx.in_func() { return Err(self.error_loc(CompileErrorType::InvalidReturn, statement.location)); @@ -1052,7 +1058,7 @@ impl Compiler { let prev_ctx = self.ctx; self.ctx = CompileContext { - in_loop: false, + loop_data: None, in_class: prev_ctx.in_class, func: if is_async { FunctionContext::AsyncFunction @@ -1258,7 +1264,7 @@ impl Compiler { self.ctx = CompileContext { func: FunctionContext::NoFunction, in_class: true, - in_loop: false, + loop_data: None, }; let qualified_name = self.create_qualified_name(name, ""); @@ -1399,10 +1405,10 @@ impl Compiler { self.compile_jump_if(test, false, else_label)?; - let was_in_loop = self.ctx.in_loop; - self.ctx.in_loop = true; + let was_in_loop = self.ctx.loop_data; + self.ctx.loop_data = Some((start_label, end_label)); self.compile_statements(body)?; - self.ctx.in_loop = was_in_loop; + self.ctx.loop_data = was_in_loop; self.emit(Instruction::Jump { target: start_label, }); @@ -1428,6 +1434,8 @@ impl Compiler { let else_label = self.new_label(); let end_label = self.new_label(); + self.emit(Instruction::SetupLoop { end: end_label }); + // The thing iterated: self.compile_expression(iter)?; @@ -1437,7 +1445,6 @@ impl Compiler { self.emit(Instruction::GetAIter); - self.emit(Instruction::SetupLoop { end: end_label }); self.set_label(start_label); self.emit(Instruction::SetupExcept { handler: check_asynciter_label, @@ -1459,26 +1466,25 @@ impl Compiler { self.emit(Instruction::JumpIfTrue { target: else_label }); self.emit(Instruction::Raise { argc: 0 }); - let was_in_loop = self.ctx.in_loop; - self.ctx.in_loop = true; + let was_in_loop = self.ctx.loop_data; + self.ctx.loop_data = Some((start_label, end_label)); self.set_label(body_label); self.compile_statements(body)?; - self.ctx.in_loop = was_in_loop; + self.ctx.loop_data = was_in_loop; } else { // Retrieve Iterator self.emit(Instruction::GetIter); - self.emit(Instruction::SetupLoop { end: end_label }); self.set_label(start_label); self.emit(Instruction::ForIter { target: else_label }); // Start of loop iteration, set targets: self.compile_store(target)?; - let was_in_loop = self.ctx.in_loop; - self.ctx.in_loop = true; + let was_in_loop = self.ctx.loop_data; + self.ctx.loop_data = Some((start_label, end_label)); self.compile_statements(body)?; - self.ctx.in_loop = was_in_loop; + self.ctx.loop_data = was_in_loop; } self.emit(Instruction::Jump { @@ -2009,7 +2015,7 @@ impl Compiler { Lambda { args, body } => { let prev_ctx = self.ctx; self.ctx = CompileContext { - in_loop: false, + loop_data: Option::None, in_class: prev_ctx.in_class, func: FunctionContext::Function, }; @@ -2182,7 +2188,7 @@ impl Compiler { let prev_ctx = self.ctx; self.ctx = CompileContext { - in_loop: false, + loop_data: None, in_class: prev_ctx.in_class, func: FunctionContext::Function, }; diff --git a/vm/src/frame.rs b/vm/src/frame.rs index ced95d808..328da1ebb 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -42,7 +42,6 @@ struct Block { #[derive(Clone, Debug)] enum BlockType { Loop { - start: bytecode::Label, end: bytecode::Label, }, TryExcept { @@ -77,7 +76,7 @@ enum UnwindReason { Break, /// We are unwinding blocks since we hit a continue statements. - Continue, + Continue { target: bytecode::Label }, } #[derive(Debug)] @@ -328,7 +327,7 @@ impl ExecutingFrame<'_> { loop { let idx = self.lasti.fetch_add(1, Ordering::Relaxed); let instr = &self.code.instructions[idx]; - let result = self.execute_instruction(instr, idx, vm); + let result = self.execute_instruction(instr, vm); match result { Ok(None) => continue, Ok(Some(value)) => { @@ -422,7 +421,6 @@ impl ExecutingFrame<'_> { fn execute_instruction( &mut self, instruction: &bytecode::Instruction, - current_idx: usize, vm: &VirtualMachine, ) -> FrameResult { vm.check_signals()?; @@ -671,10 +669,7 @@ impl ExecutingFrame<'_> { Ok(None) } bytecode::Instruction::SetupLoop { end } => { - self.push_block(BlockType::Loop { - start: bytecode::Label(current_idx + 1), - end: *end, - }); + self.push_block(BlockType::Loop { end: *end }); Ok(None) } bytecode::Instruction::SetupExcept { handler } => { @@ -871,7 +866,9 @@ impl ExecutingFrame<'_> { bytecode::Instruction::Raise { argc } => self.execute_raise(vm, *argc), bytecode::Instruction::Break => self.unwind_blocks(vm, UnwindReason::Break), - bytecode::Instruction::Continue => self.unwind_blocks(vm, UnwindReason::Continue), + bytecode::Instruction::Continue { target } => { + self.unwind_blocks(vm, UnwindReason::Continue { target: *target }) + } bytecode::Instruction::PrintExpr => { let expr = self.pop_value(); @@ -1040,14 +1037,14 @@ impl ExecutingFrame<'_> { // First unwind all existing blocks on the block stack: while let Some(block) = self.current_block() { match block.typ { - BlockType::Loop { start, end } => match &reason { + BlockType::Loop { end } => match &reason { UnwindReason::Break => { self.pop_block(); self.jump(end); return Ok(None); } - UnwindReason::Continue => { - self.jump(start); + UnwindReason::Continue { target } => { + self.jump(*target); return Ok(None); } _ => { @@ -1094,7 +1091,7 @@ impl ExecutingFrame<'_> { match reason { UnwindReason::Raising { exception } => Err(exception), UnwindReason::Returning { value } => Ok(Some(ExecutionResult::Return(value))), - UnwindReason::Break | UnwindReason::Continue => { + UnwindReason::Break | UnwindReason::Continue { .. } => { panic!("Internal error: break or continue must occur within a loop block.") } // UnwindReason::NoWorries => Ok(None), } From 60b2884afa28ab0167c0e9ad4ebd54fce2b1b85f Mon Sep 17 00:00:00 2001 From: Noah <33094578+coolreader18@users.noreply.github.com> Date: Thu, 10 Dec 2020 14:08:59 -0600 Subject: [PATCH 04/10] Use lz4_flex instead of lz-fear --- Cargo.lock | 43 +++++----------------------------------- bytecode/Cargo.toml | 2 +- bytecode/src/bytecode.rs | 17 +++++++++------- 3 files changed, 16 insertions(+), 46 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 23237636a..9adaca599 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -755,26 +755,6 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" -[[package]] -name = "fehler" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5729fe49ba028cd550747b6e62cd3d841beccab5390aa398538c31a2d983635" -dependencies = [ - "fehler-macros", -] - -[[package]] -name = "fehler-macros" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccb5acb1045ebbfa222e2c50679e392a71dd77030b78fb0189f2d9c5974400f9" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "fixedbitset" version = "0.2.0" @@ -1129,16 +1109,13 @@ dependencies = [ ] [[package]] -name = "lz-fear" -version = "0.1.1" +name = "lz4_flex" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06aad1ce45e4ccf7a8d7d43e0c3ad38dc5d2255174a5f29a3c39d961fbc6181d" +checksum = "2e7bea5a3a7bb3c040adc89eadadee33c3d39371b20dd980c952dd2642867b99" dependencies = [ - "bitflags", "byteorder", - "fehler", - "thiserror", - "twox-hash", + "quick-error", ] [[package]] @@ -1769,7 +1746,7 @@ dependencies = [ "bitflags", "bstr", "itertools", - "lz-fear", + "lz4_flex", "num-bigint", "num-complex", "serde", @@ -2395,16 +2372,6 @@ dependencies = [ "serde", ] -[[package]] -name = "twox-hash" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04f8ab788026715fa63b31960869617cba39117e520eb415b0139543e325ab59" -dependencies = [ - "cfg-if 0.1.10", - "static_assertions", -] - [[package]] name = "typenum" version = "1.12.0" diff --git a/bytecode/Cargo.toml b/bytecode/Cargo.toml index a1c431d4e..303f429d5 100644 --- a/bytecode/Cargo.toml +++ b/bytecode/Cargo.toml @@ -11,7 +11,7 @@ license = "MIT" [dependencies] bincode = "1.1" bitflags = "1.1" -lz-fear = "0.1" +lz4_flex = "0.4" num-bigint = { version = "0.3", features = ["serde"] } num-complex = { version = "0.3", features = ["serde"] } serde = { version = "1.0", features = ["derive"] } diff --git a/bytecode/src/bytecode.rs b/bytecode/src/bytecode.rs index 993e1a066..26f11f1d9 100644 --- a/bytecode/src/bytecode.rs +++ b/bytecode/src/bytecode.rs @@ -721,18 +721,21 @@ impl CodeObject { impl CodeObject { /// Load a code object from bytes pub fn from_bytes(data: &[u8]) -> Result> { - let reader = lz_fear::framed::LZ4FrameReader::new(data)?; - Ok(bincode::deserialize_from(reader.into_read())?) + // TODO: PR to lz4_flex to make it not panic + if data.len() < 4 { + return Err( + std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "bad bytecode").into(), + ); + } + let raw_bincode = lz4_flex::decompress_size_prepended(data)?; + let data = bincode::deserialize(&raw_bincode)?; + Ok(data) } /// Serialize this bytecode to bytes. pub fn to_bytes(&self) -> Vec { let data = bincode::serialize(&self).expect("CodeObject is not serializable"); - let mut out = Vec::new(); - lz_fear::framed::CompressionSettings::default() - .compress_with_size_unchecked(data.as_slice(), &mut out, data.len() as u64) - .unwrap(); - out + lz4_flex::compress_prepend_size(&data) } } From 057b5e8edae72595e002a8d982116be461c89bb0 Mon Sep 17 00:00:00 2001 From: Noah <33094578+coolreader18@users.noreply.github.com> Date: Thu, 10 Dec 2020 14:43:42 -0600 Subject: [PATCH 05/10] Fix jit --- jit/src/instructions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jit/src/instructions.rs b/jit/src/instructions.rs index 9ff9228d4..fd85a6dc1 100644 --- a/jit/src/instructions.rs +++ b/jit/src/instructions.rs @@ -320,7 +320,7 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> { _ => Err(JitCompileError::NotSupported), } } - Instruction::BinaryOperation { op, .. } => { + Instruction::BinaryOperation { op } | Instruction::BinaryOperationInplace { op } => { // the rhs is popped off first let b = self.stack.pop().ok_or(JitCompileError::BadBytecode)?; let a = self.stack.pop().ok_or(JitCompileError::BadBytecode)?; From 9c895c268aaaad803ce6d3bbf6070b25b9784ccb Mon Sep 17 00:00:00 2001 From: Noah <33094578+coolreader18@users.noreply.github.com> Date: Thu, 10 Dec 2020 18:44:12 -0600 Subject: [PATCH 06/10] Add label_arg{,_mut} methods --- bytecode/src/bytecode.rs | 61 ++++++++++++++++++++++++++++------------ compiler/src/compile.rs | 31 ++++++-------------- 2 files changed, 51 insertions(+), 41 deletions(-) diff --git a/bytecode/src/bytecode.rs b/bytecode/src/bytecode.rs index 26f11f1d9..1ca643226 100644 --- a/bytecode/src/bytecode.rs +++ b/bytecode/src/bytecode.rs @@ -145,7 +145,7 @@ impl CodeFlags { #[derive(Serialize, Debug, Deserialize, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] #[repr(transparent)] // XXX: if you add a new instruction that stores a Label, make sure to add it in -// compile::CodeInfo::finalize_code and CodeObject::label_targets +// Instruction::label_arg{,_mut} pub struct Label(pub usize); impl fmt::Display for Label { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -589,23 +589,8 @@ impl CodeObject { pub fn label_targets(&self) -> BTreeSet