diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index 7ec6f7192..3f53fe10e 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -5971,7 +5971,7 @@ impl Compiler { if self.ctx.func != FunctionContext::AsyncFunction { return Err(self.error(CodegenErrorType::InvalidAsyncFor)); } - emit!(self, Instruction::GetAIter); + emit!(self, Instruction::GetAiter); self.switch_to_block(for_block); @@ -5980,7 +5980,7 @@ impl Compiler { // SETUP_FINALLY to guard the __anext__ call emit!(self, PseudoInstruction::SetupFinally { delta: else_block }); - emit!(self, Instruction::GetANext); + emit!(self, Instruction::GetAnext); self.emit_load_const(ConstantData::None); end_async_for_target = self.compile_yield_from_sequence(true)?; // POP_BLOCK for SETUP_FINALLY - only GetANext/yield_from are protected @@ -9445,7 +9445,7 @@ impl Compiler { // Get iterator / turn item into an iterator if generator.is_async { - emit!(self, Instruction::GetAIter); + emit!(self, Instruction::GetAiter); } else { emit!(self, Instruction::GetIter); } @@ -9455,7 +9455,7 @@ impl Compiler { let mut end_async_for_target = BlockIdx::NULL; if generator.is_async { emit!(self, PseudoInstruction::SetupFinally { delta: after_block }); - emit!(self, Instruction::GetANext); + emit!(self, Instruction::GetAnext); self.push_fblock( FBlockType::AsyncComprehensionGenerator, loop_block, @@ -9565,7 +9565,7 @@ impl Compiler { // Get iterator / turn item into an iterator // Use is_async from the first generator, not has_an_async_gen which covers ALL generators if outermost.is_async { - emit!(self, Instruction::GetAIter); + emit!(self, Instruction::GetAiter); } else { emit!(self, Instruction::GetIter); }; @@ -9646,7 +9646,7 @@ impl Compiler { } } if has_async && generators[0].is_async { - emit!(self, Instruction::GetAIter); + emit!(self, Instruction::GetAiter); } else { emit!(self, Instruction::GetIter); } @@ -9822,7 +9822,7 @@ impl Compiler { if i > 0 { self.compile_for_iterable_expression(&generator.iter, generator.is_async)?; if generator.is_async { - emit!(self, Instruction::GetAIter); + emit!(self, Instruction::GetAiter); } else { emit!(self, Instruction::GetIter); } @@ -9833,7 +9833,7 @@ impl Compiler { let mut end_async_for_target = BlockIdx::NULL; if generator.is_async { emit!(self, PseudoInstruction::SetupFinally { delta: after_block }); - emit!(self, Instruction::GetANext); + emit!(self, Instruction::GetAnext); self.push_fblock( FBlockType::AsyncComprehensionGenerator, loop_block, @@ -18406,7 +18406,7 @@ async def f(items): "async dict comprehension should be inlined" ); assert!( - ops.iter().any(|op| matches!(op, Instruction::GetAIter)), + ops.iter().any(|op| matches!(op, Instruction::GetAiter)), "inlined async dict comprehension should keep GET_AITER in outer code, got ops={ops:?}" ); assert!( @@ -23486,7 +23486,7 @@ async def name_4(): let Some(get_aiter_pos) = name_4 .instructions .iter() - .position(|unit| matches!(unit.op, Instruction::GetAIter)) + .position(|unit| matches!(unit.op, Instruction::GetAiter)) else { panic!("missing GET_AITER in name_4"); }; diff --git a/crates/codegen/src/ir.rs b/crates/codegen/src/ir.rs index 112e6553c..17764a992 100644 --- a/crates/codegen/src/ir.rs +++ b/crates/codegen/src/ir.rs @@ -12,9 +12,8 @@ use rustpython_compiler_core::{ bytecode::{ AnyInstruction, AnyOpcode, Arg, CO_FAST_CELL, CO_FAST_FREE, CO_FAST_HIDDEN, CO_FAST_LOCAL, CodeFlags, CodeObject, CodeUnit, CodeUnits, ConstantData, ExceptionTableEntry, - InstrDisplayContext, Instruction, InstructionMetadata, IntrinsicFunction1, Label, OpArg, - Opcode, PseudoInstruction, PseudoOpcode, PyCodeLocationInfoKind, encode_exception_table, - oparg, + InstrDisplayContext, Instruction, IntrinsicFunction1, Label, OpArg, Opcode, + PseudoInstruction, PseudoOpcode, PyCodeLocationInfoKind, encode_exception_table, oparg, }, varint::{write_signed_varint, write_varint}, }; @@ -3227,7 +3226,7 @@ impl CodeInfo { } AnyInstruction::Real( Instruction::FormatSimple - | Instruction::GetANext + | Instruction::GetAnext | Instruction::GetLen | Instruction::GetYieldFromIter | Instruction::ImportFrom { .. } @@ -6896,7 +6895,7 @@ impl CodeInfo { block .instructions .iter() - .any(|info| matches!(info.instr.real(), Some(Instruction::GetANext))) + .any(|info| matches!(info.instr.real(), Some(Instruction::GetAnext))) } fn block_has_return(block: &Block) -> bool { @@ -11624,7 +11623,7 @@ fn block_contains_suspension_point(block: &Block) -> bool { instr, Instruction::YieldValue { .. } | Instruction::GetAwaitable { .. } - | Instruction::GetANext + | Instruction::GetAnext | Instruction::EndAsyncFor ) }) diff --git a/crates/compiler-core/generate.py b/crates/compiler-core/generate.py new file mode 100644 index 000000000..ecb4652ea --- /dev/null +++ b/crates/compiler-core/generate.py @@ -0,0 +1,721 @@ +#!/usr/bin/env python +import collections +import dataclasses +import io +import os +import pathlib +import subprocess +import sys + +import tomllib + +CRATE_ROOT = pathlib.Path(__file__).parent +CONF_FILE = CRATE_ROOT / "opcode.toml" +OUT_FILE = CRATE_ROOT / "src" / "bytecode" / "instructions.rs" + +ROOT = CRATE_ROOT.parents[1] + +try: + CPYTHON_ROOT = pathlib.Path(os.environ["CPYTHON_ROOT"]).expanduser().resolve() +except KeyError: + raise ValueError("Missing environment variable 'CPYTHON_ROOT'") + +CPYTHON_TOOLS_LIB = CPYTHON_ROOT / "Tools" / "cases_generator" + +sys.path.append(CPYTHON_TOOLS_LIB.as_posix()) + +import analyzer +from generators_common import DEFAULT_INPUT +from stack import get_stack_effect + + +@dataclasses.dataclass(frozen=True, kw_only=True, slots=True) +class OpcodeGen: + name: str + instruction_enum: str + instructions: list + numeric_repr: str + metadata: dict[str, str] + analysis: analyzer.Analysis + + def gen(self) -> str: + methods = "\n\n".join( + getattr(self, attr).strip() + for attr in sorted(dir(self)) + if attr.startswith("fn_") + ) + + impls = "\n\n".join( + getattr(self, attr).strip() + for attr in sorted(dir(self)) + if attr.startswith("impl_") + ) + + variants = ",\n".join(instr.name for instr in self) + + return f""" + #[derive(Clone, Copy, Debug, Eq, PartialEq)] + pub enum {self.name} {{ + {variants} + }} + + impl {self.name} {{ + {methods} + }} + + {impls} + """ + + @property + def fn_as_numeric(self) -> str: + arms = ",\n".join(f"Self::{instr.name} => {instr.opcode}" for instr in self) + return f""" + #[must_use] + pub const fn as_{self.numeric_repr}(self) -> {self.numeric_repr} {{ + match self {{ + {arms}, + }} + }} + """ + + @property + def fn_try_from_numeric(self) -> str: + arms = ",\n".join(f"{instr.opcode} => Self::{instr.name}" for instr in self) + return f""" + pub const fn try_from_{self.numeric_repr}( + value: {self.numeric_repr} + ) -> Result {{ + Ok(match value {{ + {arms}, + _ => return Err(MarshalError::InvalidBytecode), + }}) + }} + """ + + @property + def impl_try_from_numeric(self) -> str: + return f""" + impl TryFrom<{self.numeric_repr}> for {self.name} {{ + type Error = MarshalError; + + fn try_from(value: {self.numeric_repr}) -> Result {{ + Self::try_from_{self.numeric_repr}(value) + }} + }} + """ + + @property + def impl_into_numeric(self) -> str: + return f""" + impl From<{self.name}> for {self.numeric_repr} {{ + fn from(opcode: {self.name}) -> Self {{ + opcode.as_{self.numeric_repr}() + }} + }} + """ + + def build_has_attr_fn(self, fn_attr: str, prop_attr: str, doc_flag: str) -> str: + arms = "|".join( + f"Self::{instr.name}" + for instr in self + if getattr(instr.properties, prop_attr) + ) + + if arms: + inner = f"matches!(self, {arms})" + else: + inner = "false" + + return f""" + /// Does this opcode have '{doc_flag}' set. + #[must_use] + pub const fn has_{fn_attr}(self) -> bool {{ + {inner} + }} + """ + + fn_has_arg = property( + lambda self: self.build_has_attr_fn("arg", "oparg", "HAS_ARG_FLAG") + ) + + fn_has_const = property( + lambda self: self.build_has_attr_fn("const", "uses_co_consts", "HAS_CONST_FLAG") + ) + + fn_has_name = property( + lambda self: self.build_has_attr_fn("name", "uses_co_names", "HAS_NAME_FLAG") + ) + + fn_has_jump = property( + lambda self: self.build_has_attr_fn("jump", "jumps", "HAS_JUMP_FLAG") + ) + + fn_has_free = property( + lambda self: self.build_has_attr_fn("free", "has_free", "HAS_FREE_FLAG") + ) + + fn_has_local = property( + lambda self: self.build_has_attr_fn("local", "uses_locals", "HAS_LOCAL_FLAG") + ) + + @property + def instrumented_mapping(self) -> dict[str, str]: + inames = {instr.name for instr in self if instr.name.startswith("Instrumented")} + names = {instr.name for instr in self} - inames + + res = {} + for iname in sorted(inames): + name = iname.removeprefix("Instrumented") + if name not in names: + continue + + res[name] = iname + + return res + + @property + def fn_to_base(self) -> str: + arms = ",\n".join( + f"Self::{iname} => Self::{name}" + for name, iname in self.instrumented_mapping.items() + ) + + arms = arms.strip() + if not arms: + inner = "None" + else: + inner = f""" + Some(match self {{ + {arms}, + _ => return None, + + }}) + """ + + return f""" + #[must_use] + pub const fn to_base(self) -> Option {{ + {inner} + }} + """ + + @property + def fn_to_instrumented(self) -> str: + arms = ",\n".join( + f"Self::{name} => Self::{iname}" + for name, iname in self.instrumented_mapping.items() + ) + + arms = arms.strip() + if not arms: + inner = "None" + else: + inner = f""" + Some(match self {{ + {arms}, + _ => return None, + + }}) + """ + + return f""" + #[must_use] + pub const fn to_instrumented(self) -> Option {{ + {inner} + }} + """ + + @property + def fn_deopt(self) -> str: + names = {instr.name for instr in self} + + deopts = collections.defaultdict(list) + for family in self.analysis.families.values(): + family_name = to_pascal_case(family.name) + if family_name not in names: + continue + + for member in family.members: + if member.name == family_name: + continue + + deopts[family_name].append(member.name) + + arms = "" + for target, specialized in deopts.items(): + ops = "|".join(f"Self::{op}" for op in specialized) + arms += f"{ops} => Self::{target},\n" + + arms = arms.strip() + + if not arms: + inner = "None" + else: + inner = f""" + Some(match self {{ + {arms} + _ => return None, + + }}) + """ + + return f""" + #[must_use] + pub const fn deopt(self) -> Option {{ + {inner} + }} + """ + + @property + def fn_cache_entries(self) -> str: + arms = "" + for instr in self: + name = instr.name + if getattr(instr, "family", None) and (instr.family.name != name): + continue + + if name.startswith("Instrumented"): + continue + + try: + size = instr.size + except AttributeError: + continue + + if size > 1: + arms += f"Self::{name} => {size - 1},\n" + + arms = arms.strip() + if not arms: + inner = "0" + else: + inner = f""" + match self.deoptimize() {{ + {arms} + _ => 0, + }} + """ + + return f""" + #[must_use] + pub const fn cache_entries(self) -> usize {{ + {inner} + }} + """ + + @property + def fn_stack_effect_info(self) -> str: + oparg_used = False + arms = "" + for instr in self: + name = instr.name + stack = get_stack_effect(instr) + + popped = (-stack.base_offset).to_c() + pushed = (stack.logical_sp - stack.base_offset).to_c() + + pushed_comment = "" + popped_comment = "" + + if stack_effect := self.metadata.get(name, {}).get("stack_effect"): + if npushed := stack_effect.get("pushed"): + pushed_comment = f"// TODO: Differs from CPython `{pushed}`" + pushed = npushed + + if npopped := stack_effect.get("popped"): + popped_comment = f"// TODO: Differs from CPython `{popped}`" + popped = npopped + + oparg_used = oparg_used or any("oparg" in expr for expr in (pushed, popped)) + + arms += f""" + Self::{name} => ( + {pushed}, {pushed_comment} + {popped}, {popped_comment} + ), + """.strip() + + arms = arms.strip() + + oparg_arg = "_oparg" + oparg_cast = "" + if oparg_used: + oparg_arg = "oparg" + oparg_cast = f""" + // Reason for converting {oparg_arg} to i32 is because of expressions like `1 + (oparg -1)` + // that causes underflow errors. + let oparg = i32::try_from({oparg_arg}).expect("{oparg_arg} does not fit in an `i32`"); + """ + + return f""" + #[must_use] + pub fn stack_effect_info(&self, {oparg_arg}: u32) -> StackEffect {{ + {oparg_cast} + + let (pushed, popped) = match self {{ + {arms} + }}; + + debug_assert!(u32::try_from(pushed).is_ok()); + debug_assert!(u32::try_from(popped).is_ok()); + + StackEffect::new(pushed as u32, popped as u32) + }} + """ + + @property + def fn_as_instruction(self) -> str: + arms = "" + for instr in self: + name = instr.name + arms += f"Self::{name} => {self.instruction_enum}::{name}" + if oparg := self.metadata.get(name, {}).get("oparg"): + oname = oparg["name"] + arms += f" {{ {oname}: Arg::marker() }}" + + arms += ",\n" + + return f""" + /// Returns self as [`{self.instruction_enum}`]. + #[must_use] + pub const fn as_instruction(self) -> {self.instruction_enum} {{ + match self {{ + {arms} + }} + }} + """ + + @property + def impl_as_instruction(self) -> str: + return f""" + impl From<{self.name}> for {self.instruction_enum} {{ + fn from(opcode: {self.name}) -> Self {{ + opcode.as_instruction() + }} + }} + """ + + @property + def fn_stack_effect(self) -> str: + return """ + /// Stack effect of [`Self::stack_effect_info`]. + #[must_use] + pub fn stack_effect(&self, oparg: u32) -> i32 { + self.stack_effect_info(oparg).effect() + } + """ + + def __iter__(self): + yield from self.instructions + + +@dataclasses.dataclass(frozen=True, kw_only=True, slots=True) +class InstructionGen: + name: str + opcode_enum: str + instructions: list + numeric_repr: str + metadata: dict[str, str] + + def gen(self) -> str: + methods = "\n\n".join( + getattr(self, attr).strip() + for attr in sorted(dir(self)) + if attr.startswith("fn_") + ) + + impls = "\n\n".join( + getattr(self, attr).strip() + for attr in sorted(dir(self)) + if attr.startswith("impl_") + ) + + variants = "" + for instr in self: + name = instr.name + variants += name + + if oparg := self.metadata.get(name, {}).get("oparg"): + oname, otype = oparg["name"], oparg["type"] + + variants += f"{{ {oname}: Arg<{otype}> }}" + + opcode = instr.opcode + variants += f" = {opcode},\n" + + return f""" + #[derive(Clone, Copy, Debug, Eq, PartialEq)] + #[repr({self.numeric_repr})] // TODO: Remove this `#[repr(...)]` + pub enum {self.name} {{ + {variants} + }} + + impl {self.name} {{ + {methods} + }} + + {impls} + """ + + @property + def fn_as_opcode(self) -> str: + arms = "" + for instr in self: + name = instr.name + arms += f"Self::{name}" + if oparg := self.metadata.get(name, {}).get("oparg"): + arms += " { .. }" + + arms += f"=> {self.opcode_enum}::{name},\n" + + return f""" + /// Returns self as a [`{self.opcode_enum}`]. + #[must_use] + pub const fn as_opcode(self) -> {self.opcode_enum} {{ + match self {{ + {arms} + }} + }} + """ + + @property + def impl_as_opcode(self) -> str: + return f""" + impl From<{self.name}> for {self.opcode_enum} {{ + fn from(instruction: {self.name}) -> Self {{ + instruction.as_opcode() + }} + }} + """ + + @property + def fn_as_numeric_repr(self) -> str: + return f""" + #[must_use] + pub const fn as_{self.numeric_repr}(self) -> {self.numeric_repr} {{ + self.as_opcode().as_{self.numeric_repr}() + }} + """ + + @property + def impl_as_numeric_repr(self) -> str: + return f""" + impl From<{self.name}> for {self.numeric_repr} {{ + fn from(instruction: {self.name}) -> Self {{ + instruction.as_{self.numeric_repr}() + }} + }} + """ + + @property + def fn_label_arg(self) -> str: + TARGET = "oparg::Label" + + arms = "" + for instr in self: + name = instr.name + if oparg := self.metadata.get(name, {}).get("oparg"): + oname, otype = oparg["name"], oparg["type"] + if otype != TARGET: + continue + + arms += f"Self::{name} {{ {oname} }} => *{oname},\n" + + arms = arms.strip() + + return f""" + #[must_use] + pub const fn label_arg(&self) -> Option> {{ + Some(match self {{ + {arms} + _ => return None, + }}) + }} + """ + + @property + def fn_to_base(self) -> str: + return f""" + #[must_use] + pub const fn to_base(self) -> Option {{ + if let Some(opcode) = self.as_opcode().to_base() {{ + Some(opcode.as_instruction()) + }} else {{ + None + }} + }} + """ + + @property + def fn_to_instrumented(self) -> str: + return f""" + #[must_use] + pub const fn to_instrumented(self) -> Option {{ + if let Some(opcode) = self.as_opcode().to_instrumented() {{ + Some(opcode.as_instruction()) + }} else {{ + None + }} + }} + """ + + @property + def fn_try_from_numeric(self) -> str: + return f""" + pub const fn try_from_{self.numeric_repr}( + value: {self.numeric_repr} + ) -> Result {{ + match {self.opcode_enum}::try_from_{self.numeric_repr}(value) {{ + Ok(opcode) => Ok(opcode.as_instruction()), + Err(e) => Err(e), + }} + }} + """ + + @property + def impl_try_from_numeric(self) -> str: + return f""" + impl TryFrom<{self.numeric_repr}> for {self.name} {{ + type Error = MarshalError; + + fn try_from(value: {self.numeric_repr}) -> Result {{ + Self::try_from_{self.numeric_repr}(value) + }} + }} + """ + + @property + def fn_stack_effect(self) -> str: + return """ + /// Stack effect of [`Self::stack_effect_info`]. + #[must_use] + pub fn stack_effect(&self, oparg: u32) -> i32 { + self.as_opcode().stack_effect(oparg) + } + """ + + @property + def fn_cache_entries(self) -> str: + return f""" + #[must_use] + pub const fn cache_entries(self) -> usize {{ + self.as_opcode().cache_entries() + }} + """ + + @property + def fn_deopt(self) -> str: + return f""" + #[must_use] + pub const fn deopt(self) -> Option {{ + if let Some(opcode) = self.as_opcode().deopt() {{ + Some(opcode.as_instruction()) + }} else {{ + None + }} + }} + """ + + @property + def fn_stack_effect_info(self) -> str: + return f""" + #[must_use] + pub fn stack_effect_info(&self, oparg: u32) -> StackEffect {{ + self.as_opcode().stack_effect_info(oparg) + }} + """ + + def __iter__(self): + yield from self.instructions + + +def to_pascal_case(s: str) -> str: + return s.title().replace("_", "") + + +def get_analysis() -> analyzer.Analysis: + analysis = analyzer.analyze_files([DEFAULT_INPUT]) + + # We don't differentiate between real and pseudos yet + analysis.instructions |= analysis.pseudos + return analysis + + +def rustfmt(code: str) -> str: + return subprocess.check_output(["rustfmt", "--emit=stdout"], input=code, text=True) + + +def main(): + CONF = tomllib.loads(CONF_FILE.read_text()) + + analysis = get_analysis() + + outfile = io.StringIO() + for opcode_enum, conf in CONF.items(): + metadata = conf["opcodes"] + numeric_repr = conf["numeric_repr"] + instruction_enum = conf["instruction_enum"] + + opcode_range = conf["range"] + lower, upper = map(int, (opcode_range["min"], opcode_range["max"])) + bounds = range(lower, upper + 1) + + instructions = sorted( + ( + instr + for instr in analysis.instructions.values() + if instr.opcode in bounds + ), + key=lambda x: x.opcode, + ) + + for instr in instructions: + instr.name = to_pascal_case(instr.name) + + opcode_code = OpcodeGen( + name=opcode_enum, + instruction_enum=instruction_enum, + instructions=instructions, + numeric_repr=numeric_repr, + metadata=metadata, + analysis=analysis, + ).gen() + + outfile.write(opcode_code) + + instruction_code = InstructionGen( + name=instruction_enum, + opcode_enum=opcode_enum, + instructions=instructions, + numeric_repr=numeric_repr, + metadata=metadata, + ).gen() + + outfile.write(instruction_code) + + generated = outfile.getvalue() + + script_path = pathlib.Path(__file__).resolve().relative_to(ROOT).as_posix() + + output = rustfmt( + f""" +// This file is generated by {script_path} +// Do not edit! + +use crate::{{ + bytecode::{{ + instruction::{{Arg, StackEffect}}, + oparg, + }}, + marshal::MarshalError, +}}; + +{generated} + """ + ) + + OUT_FILE.write_text(output) + + +if __name__ == "__main__": + main() diff --git a/crates/compiler-core/opcode.toml b/crates/compiler-core/opcode.toml new file mode 100644 index 000000000..6ee0cb865 --- /dev/null +++ b/crates/compiler-core/opcode.toml @@ -0,0 +1,270 @@ +[Opcode] +instruction_enum = "Instruction" +numeric_repr = "u8" +range = { min = 0, max = 255 } + +[Opcode.opcodes.BinaryOp] +oparg = { name = "op", type = "oparg::BinaryOperator" } + +[Opcode.opcodes.BuildInterpolation] +oparg = { name = "format", type = "u32" } + +[Opcode.opcodes.BuildList] +oparg = { name = "count", type = "u32" } + +[Opcode.opcodes.BuildMap] +oparg = { name = "count", type = "u32" } + +[Opcode.opcodes.BuildSet] +oparg = { name = "count", type = "u32" } + +[Opcode.opcodes.BuildSlice] +oparg = { name = "argc", type = "oparg::BuildSliceArgCount" } + +[Opcode.opcodes.BuildString] +oparg = { name = "count", type = "u32" } + +[Opcode.opcodes.BuildTuple] +oparg = { name = "count", type = "u32" } + +[Opcode.opcodes.Call] +oparg = { name = "argc", type = "u32" } + +[Opcode.opcodes.CallIntrinsic1] +oparg = { name = "func", type = "oparg::IntrinsicFunction1" } + +[Opcode.opcodes.CallIntrinsic2] +oparg = { name = "func", type = "oparg::IntrinsicFunction2" } + +[Opcode.opcodes.CallKw] +oparg = { name = "argc", type = "u32" } + +[Opcode.opcodes.CompareOp] +oparg = { name = "opname", type = "oparg::ComparisonOperator" } + +[Opcode.opcodes.ContainsOp] +oparg = { name = "invert", type = "oparg::Invert" } + +[Opcode.opcodes.ConvertValue] +oparg = { name = "oparg", type = "oparg::ConvertValueOparg" } + +[Opcode.opcodes.Copy] +oparg = { name = "i", type = "u32" } + +[Opcode.opcodes.CopyFreeVars] +oparg = { name = "n", type = "u32" } + +[Opcode.opcodes.DeleteAttr] +oparg = { name = "namei", type = "oparg::NameIdx" } + +[Opcode.opcodes.DeleteDeref] +oparg = { name = "i", type = "oparg::VarNum" } + +[Opcode.opcodes.DeleteFast] +oparg = { name = "var_num", type = "oparg::VarNum" } + +[Opcode.opcodes.DeleteGlobal] +oparg = { name = "namei", type = "oparg::NameIdx" } + +[Opcode.opcodes.DeleteName] +oparg = { name = "namei", type = "oparg::NameIdx" } + +[Opcode.opcodes.DictMerge] +oparg = { name = "i", type = "u32" } + +[Opcode.opcodes.DictUpdate] +oparg = { name = "i", type = "u32" } + +[Opcode.opcodes.ForIter] +oparg = { name = "delta", type = "oparg::Label" } + +[Opcode.opcodes.GetAwaitable] +oparg = { name = "r#where", type = "u32" } + +[Opcode.opcodes.ImportFrom] +oparg = { name = "namei", type = "oparg::NameIdx" } + +[Opcode.opcodes.ImportName] +oparg = { name = "namei", type = "oparg::NameIdx" } + +[Opcode.opcodes.IsOp] +oparg = { name = "invert", type = "oparg::Invert" } + +[Opcode.opcodes.JumpBackward] +oparg = { name = "delta", type = "oparg::Label" } + +[Opcode.opcodes.JumpBackwardNoInterrupt] +oparg = { name = "delta", type = "oparg::Label" } + +[Opcode.opcodes.JumpForward] +oparg = { name = "delta", type = "oparg::Label" } + +[Opcode.opcodes.ListAppend] +oparg = { name = "i", type = "u32" } + +[Opcode.opcodes.ListExtend] +oparg = { name = "i", type = "u32" } + +[Opcode.opcodes.LoadAttr] +oparg = { name = "namei", type = "oparg::LoadAttr" } + +[Opcode.opcodes.LoadCommonConstant] +oparg = { name = "idx", type = "oparg::CommonConstant" } + +[Opcode.opcodes.LoadConst] +oparg = { name = "consti", type = "oparg::ConstIdx" } + +[Opcode.opcodes.LoadDeref] +oparg = { name = "i", type = "oparg::VarNum" } + +[Opcode.opcodes.LoadFast] +oparg = { name = "var_num", type = "oparg::VarNum" } + +[Opcode.opcodes.LoadFastAndClear] +oparg = { name = "var_num", type = "oparg::VarNum" } + +[Opcode.opcodes.LoadFastBorrow] +oparg = { name = "var_num", type = "oparg::VarNum" } + +[Opcode.opcodes.LoadFastBorrowLoadFastBorrow] +oparg = { name = "var_nums", type = "oparg::VarNums" } + +[Opcode.opcodes.LoadFastCheck] +oparg = { name = "var_num", type = "oparg::VarNum" } + +[Opcode.opcodes.LoadFastLoadFast] +oparg = { name = "var_nums", type = "oparg::VarNums" } + +[Opcode.opcodes.LoadFromDictOrDeref] +oparg = { name = "i", type = "oparg::VarNum" } + +[Opcode.opcodes.LoadFromDictOrGlobals] +oparg = { name = "i", type = "oparg::NameIdx" } + +[Opcode.opcodes.LoadGlobal] +oparg = { name = "namei", type = "oparg::NameIdx" } + +[Opcode.opcodes.LoadName] +oparg = { name = "namei", type = "oparg::NameIdx" } + +[Opcode.opcodes.LoadSmallInt] +oparg = { name = "i", type = "u32" } + +[Opcode.opcodes.LoadSpecial] +oparg = { name = "method", type = "oparg::SpecialMethod" } + +[Opcode.opcodes.LoadSuperAttr] +oparg = { name = "namei", type = "oparg::LoadSuperAttr" } + +[Opcode.opcodes.MakeCell] +oparg = { name = "i", type = "oparg::VarNum" } + +[Opcode.opcodes.MapAdd] +oparg = { name = "i", type = "u32" } + +[Opcode.opcodes.MatchClass] +oparg = { name = "count", type = "u32" } + +[Opcode.opcodes.PopJumpIfFalse] +oparg = { name = "delta", type = "oparg::Label" } + +[Opcode.opcodes.PopJumpIfNone] +oparg = { name = "delta", type = "oparg::Label" } + +[Opcode.opcodes.PopJumpIfNotNone] +oparg = { name = "delta", type = "oparg::Label" } + +[Opcode.opcodes.PopJumpIfTrue] +oparg = { name = "delta", type = "oparg::Label" } + +[Opcode.opcodes.RaiseVarargs] +oparg = { name = "argc", type = "oparg::RaiseKind" } + +[Opcode.opcodes.Reraise] +oparg = { name = "depth", type = "u32" } + +[Opcode.opcodes.Send] +oparg = { name = "delta", type = "oparg::Label" } + +[Opcode.opcodes.SetAdd] +oparg = { name = "i", type = "u32" } + +[Opcode.opcodes.SetFunctionAttribute] +oparg = { name = "flag", type = "oparg::MakeFunctionFlag" } + +[Opcode.opcodes.SetUpdate] +oparg = { name = "i", type = "u32" } + +[Opcode.opcodes.StoreAttr] +oparg = { name = "namei", type = "oparg::NameIdx" } + +[Opcode.opcodes.StoreDeref] +oparg = { name = "i", type = "oparg::VarNum" } + +[Opcode.opcodes.StoreFast] +oparg = { name = "var_num", type = "oparg::VarNum" } + +[Opcode.opcodes.StoreFastLoadFast] +oparg = { name = "var_nums", type = "oparg::VarNums" } + +[Opcode.opcodes.StoreFastStoreFast] +oparg = { name = "var_nums", type = "oparg::VarNums" } + +[Opcode.opcodes.StoreGlobal] +oparg = { name = "namei", type = "oparg::NameIdx" } + +[Opcode.opcodes.StoreName] +oparg = { name = "namei", type = "oparg::NameIdx" } + +[Opcode.opcodes.Swap] +oparg = { name = "i", type = "u32" } + +[Opcode.opcodes.UnpackEx] +oparg = { name = "counts", type = "oparg::UnpackExArgs" } + +[Opcode.opcodes.UnpackSequence] +oparg = { name = "count", type = "u32" } + +[Opcode.opcodes.WithExceptStart] +stack_effect = { pushed = "7", popped = "6" } + +[Opcode.opcodes.YieldValue] +oparg = { name = "arg", type = "u32" } + +[Opcode.opcodes.Resume] +oparg = { name = "context", type = "oparg::ResumeContext" } + +[PseudoOpcode] +instruction_enum = "PseudoInstruction" +numeric_repr = "u16" +range = { min = 256, max = 65535 } + +[PseudoOpcode.opcodes.Jump] +oparg = { name = "delta", type = "oparg::Label" } + +[PseudoOpcode.opcodes.JumpIfFalse] +oparg = { name = "delta", type = "oparg::Label" } + +[PseudoOpcode.opcodes.JumpIfTrue] +oparg = { name = "delta", type = "oparg::Label" } + +[PseudoOpcode.opcodes.JumpNoInterrupt] +oparg = { name = "delta", type = "oparg::Label" } + +[PseudoOpcode.opcodes.LoadClosure] +oparg = { name = "i", type = "oparg::NameIdx" } + +[PseudoOpcode.opcodes.SetupCleanup] +oparg = { name = "delta", type = "oparg::Label" } +stack_effect = { pushed = "0" } + +[PseudoOpcode.opcodes.SetupFinally] +oparg = { name = "delta", type = "oparg::Label" } +stack_effect = { pushed = "0" } + +[PseudoOpcode.opcodes.SetupWith] +oparg = { name = "delta", type = "oparg::Label" } +stack_effect = { pushed = "0" } + +[PseudoOpcode.opcodes.StoreFastMaybeNull] +oparg = { name = "var_num", type = "oparg::NameIdx" } diff --git a/crates/compiler-core/src/bytecode.rs b/crates/compiler-core/src/bytecode.rs index 99278a28d..220de4878 100644 --- a/crates/compiler-core/src/bytecode.rs +++ b/crates/compiler-core/src/bytecode.rs @@ -20,10 +20,8 @@ use num_complex::Complex64; use rustpython_wtf8::{Wtf8, Wtf8Buf}; pub use crate::bytecode::{ - instruction::{ - AnyInstruction, AnyOpcode, Arg, Instruction, InstructionMetadata, Opcode, - PseudoInstruction, PseudoOpcode, StackEffect, - }, + instruction::{AnyInstruction, AnyOpcode, Arg, StackEffect}, + instructions::{Instruction, Opcode, PseudoInstruction, PseudoOpcode}, oparg::{ BinaryOperator, BuildSliceArgCount, CommonConstant, ComparisonOperator, ConvertValueOparg, IntrinsicFunction1, IntrinsicFunction2, Invert, Label, LoadAttr, LoadSuperAttr, @@ -33,6 +31,7 @@ pub use crate::bytecode::{ }; mod instruction; +mod instructions; pub mod oparg; /// Exception table entry for zero-cost exception handling diff --git a/crates/compiler-core/src/bytecode/instruction.rs b/crates/compiler-core/src/bytecode/instruction.rs index 218696015..0a08aa115 100644 --- a/crates/compiler-core/src/bytecode/instruction.rs +++ b/crates/compiler-core/src/bytecode/instruction.rs @@ -1,701 +1,10 @@ use core::{fmt, marker::PhantomData}; -use crate::{ - bytecode::oparg::{ - self, BinaryOperator, BuildSliceArgCount, CommonConstant, ComparisonOperator, - ConvertValueOparg, IntrinsicFunction1, IntrinsicFunction2, Invert, Label, LoadAttr, - LoadSuperAttr, MakeFunctionFlag, NameIdx, OpArg, OpArgByte, OpArgType, RaiseKind, - SpecialMethod, UnpackExArgs, - }, - marshal::MarshalError, -}; +use crate::marshal::MarshalError; -macro_rules! define_opcodes { - ( - #[repr($typ:ident)] - $opcode_vis:vis enum $opcode_name:ident; - - $(#[$instr_meta:meta])* - $instr_vis:vis enum $instr_name:ident { - $( - $(#[$op_meta:meta])* - $op_name:ident $({ $arg_name:ident: Arg<$arg_type:ty> $(,)? })? = ($op_id:expr, $op_display:literal) - ),* $(,)? - } - ) => { - #[derive(Clone, Copy, Debug)] - $opcode_vis enum $opcode_name { - $($op_name),* - } - - impl $opcode_name { - #[doc = concat!("Converts this opcode to [`", stringify!($instr_name), "`].")] - #[must_use] - $opcode_vis const fn as_instruction(&self) -> $instr_name { - match self { - $( - Self::$op_name => $instr_name::$op_name $({ $arg_name: Arg::marker() })?, - )* - } - } - - /// Gets the CPython name representation. - #[must_use] - $opcode_vis const fn name(&self) -> &str { - match self { - $(Self::$op_name => $op_display,)* - } - } - } - - impl From<$opcode_name> for $instr_name { - fn from(opcode: $opcode_name) -> Self { - opcode.as_instruction() - } - } - - - impl TryFrom<$typ> for $opcode_name { - type Error = $crate::marshal::MarshalError; - - fn try_from(value: $typ) -> Result { - match value { - $($op_id => Ok(Self::$op_name),)* - _ => Err(Self::Error::InvalidBytecode), - } - } - } - - impl From<$opcode_name> for $typ { - fn from(opcode: $opcode_name) -> Self { - match opcode { - $($opcode_name::$op_name => $op_id,)* - } - } - } - - impl ::core::fmt::Display for $opcode_name { - fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { - self.name().fmt(f) - } - } - - #[derive(Clone, Copy, Debug)] - #[repr($typ)] // TODO: Remove this repr - $instr_vis enum $instr_name { - $( - $(#[$op_meta])* - $op_name $({ $arg_name: Arg<$arg_type> })? = $op_id // TODO: Don't assign value - ),* - } - - impl $instr_name { - #[doc = concat!("Get the corresponding [`", stringify!($opcode_name), "`].")] - #[must_use] - $instr_vis const fn opcode(&self) -> $opcode_name { - match self { - $( - Self::$op_name $({ $arg_name: _ })? => $opcode_name::$op_name, - )* - } - } - - } - - impl From<$instr_name> for $opcode_name { - fn from(instr: $instr_name) -> Self { - instr.opcode() - } - } - - impl TryFrom<$typ> for $instr_name { - type Error = $crate::marshal::MarshalError; - - fn try_from(value: $typ) -> Result { - $opcode_name::try_from(value).map(Into::into) - } - } - - impl From<$instr_name> for $typ { - fn from(instr: $instr_name) -> Self { - instr.opcode().into() - } - } - }; -} - -define_opcodes!( - #[repr(u8)] - pub enum Opcode; - - pub enum Instruction { - Cache = (0, "CACHE"), - BinarySlice = (1, "BINARY_SLICE"), - BuildTemplate = (2, "BUILD_TEMPLATE"), - BinaryOpInplaceAddUnicode = (3, "BINARY_OP_INPLACE_ADD_UNICODE"), - CallFunctionEx = (4, "CALL_FUNCTION_EX"), - CheckEgMatch = (5, "CHECK_EG_MATCH"), - CheckExcMatch = (6, "CHECK_EXC_MATCH"), - CleanupThrow = (7, "CLEANUP_THROW"), - DeleteSubscr = (8, "DELETE_SUBSCR"), - EndFor = (9, "END_FOR"), - EndSend = (10, "END_SEND"), - ExitInitCheck = (11, "EXIT_INIT_CHECK"), - FormatSimple = (12, "FORMAT_SIMPLE"), - FormatWithSpec = (13, "FORMAT_WITH_SPEC"), - GetAIter = (14, "GET_AITER"), - GetANext = (15, "GET_ANEXT"), - GetIter = (16, "GET_ITER"), - Reserved = (17, "RESERVED"), - GetLen = (18, "GET_LEN"), - GetYieldFromIter = (19, "GET_YIELD_FROM_ITER"), - InterpreterExit = (20, "INTERPRETER_EXIT"), - LoadBuildClass = (21, "LOAD_BUILD_CLASS"), - LoadLocals = (22, "LOAD_LOCALS"), - MakeFunction = (23, "MAKE_FUNCTION"), - MatchKeys = (24, "MATCH_KEYS"), - MatchMapping = (25, "MATCH_MAPPING"), - MatchSequence = (26, "MATCH_SEQUENCE"), - Nop = (27, "NOP"), - NotTaken = (28, "NOT_TAKEN"), - PopExcept = (29, "POP_EXCEPT"), - PopIter = (30, "POP_ITER"), - PopTop = (31, "POP_TOP"), - PushExcInfo = (32, "PUSH_EXC_INFO"), - PushNull = (33, "PUSH_NULL"), - ReturnGenerator = (34, "RETURN_GENERATOR"), - ReturnValue = (35, "RETURN_VALUE"), - SetupAnnotations = (36, "SETUP_ANNOTATIONS"), - StoreSlice = (37, "STORE_SLICE"), - StoreSubscr = (38, "STORE_SUBSCR"), - ToBool = (39, "TO_BOOL"), - UnaryInvert = (40, "UNARY_INVERT"), - UnaryNegative = (41, "UNARY_NEGATIVE"), - UnaryNot = (42, "UNARY_NOT"), - WithExceptStart = (43, "WITH_EXCEPT_START"), - BinaryOp { - op: Arg, - } = (44, "BINARY_OP"), - BuildInterpolation { - format: Arg, - } = (45, "BUILD_INTERPOLATION"), - BuildList { - count: Arg, - } = (46, "BUILD_LIST"), - BuildMap { - count: Arg, - } = (47, "BUILD_MAP"), - BuildSet { - count: Arg, - } = (48, "BUILD_SET"), - BuildSlice { - argc: Arg, - } = (49, "BUILD_SLICE"), - BuildString { - count: Arg, - } = (50, "BUILD_STRING"), - BuildTuple { - count: Arg, - } = (51, "BUILD_TUPLE"), - Call { - argc: Arg, - } = (52, "CALL"), - CallIntrinsic1 { - func: Arg, - } = (53, "CALL_INTRINSIC_1"), - CallIntrinsic2 { - func: Arg, - } = (54, "CALL_INTRINSIC_2"), - CallKw { - argc: Arg, - } = (55, "CALL_KW"), - CompareOp { - opname: Arg, - } = (56, "COMPARE_OP"), - ContainsOp { - invert: Arg, - } = (57, "CONTAINS_OP"), - ConvertValue { - oparg: Arg, - } = (58, "CONVERT_VALUE"), - Copy { - i: Arg, - } = (59, "COPY"), - CopyFreeVars { - n: Arg, - } = (60, "COPY_FREE_VARS"), - DeleteAttr { - namei: Arg, - } = (61, "DELETE_ATTR"), - DeleteDeref { - i: Arg, - } = (62, "DELETE_DEREF"), - DeleteFast { - var_num: Arg, - } = (63, "DELETE_FAST"), - DeleteGlobal { - namei: Arg, - } = (64, "DELETE_GLOBAL"), - DeleteName { - namei: Arg, - } = (65, "DELETE_NAME"), - DictMerge { - i: Arg, - } = (66, "DICT_MERGE"), - DictUpdate { - i: Arg, - } = (67, "DICT_UPDATE"), - EndAsyncFor = (68, "END_ASYNC_FOR"), - ExtendedArg = (69, "EXTENDED_ARG"), - ForIter { - delta: Arg