Autogen instructions & opcodes (#7797)

This commit is contained in:
Shahar Naveh
2026-05-10 16:13:38 +03:00
committed by GitHub
parent 108461f637
commit 320355f633
12 changed files with 4055 additions and 1249 deletions

View File

@@ -5971,7 +5971,7 @@ impl Compiler {
if self.ctx.func != FunctionContext::AsyncFunction {
return Err(self.error(CodegenErrorType::InvalidAsyncFor));
}
emit!(self, Instruction::GetAIter);
emit!(self, Instruction::GetAiter);
self.switch_to_block(for_block);
@@ -5980,7 +5980,7 @@ impl Compiler {
// SETUP_FINALLY to guard the __anext__ call
emit!(self, PseudoInstruction::SetupFinally { delta: else_block });
emit!(self, Instruction::GetANext);
emit!(self, Instruction::GetAnext);
self.emit_load_const(ConstantData::None);
end_async_for_target = self.compile_yield_from_sequence(true)?;
// POP_BLOCK for SETUP_FINALLY - only GetANext/yield_from are protected
@@ -9445,7 +9445,7 @@ impl Compiler {
// Get iterator / turn item into an iterator
if generator.is_async {
emit!(self, Instruction::GetAIter);
emit!(self, Instruction::GetAiter);
} else {
emit!(self, Instruction::GetIter);
}
@@ -9455,7 +9455,7 @@ impl Compiler {
let mut end_async_for_target = BlockIdx::NULL;
if generator.is_async {
emit!(self, PseudoInstruction::SetupFinally { delta: after_block });
emit!(self, Instruction::GetANext);
emit!(self, Instruction::GetAnext);
self.push_fblock(
FBlockType::AsyncComprehensionGenerator,
loop_block,
@@ -9565,7 +9565,7 @@ impl Compiler {
// Get iterator / turn item into an iterator
// Use is_async from the first generator, not has_an_async_gen which covers ALL generators
if outermost.is_async {
emit!(self, Instruction::GetAIter);
emit!(self, Instruction::GetAiter);
} else {
emit!(self, Instruction::GetIter);
};
@@ -9646,7 +9646,7 @@ impl Compiler {
}
}
if has_async && generators[0].is_async {
emit!(self, Instruction::GetAIter);
emit!(self, Instruction::GetAiter);
} else {
emit!(self, Instruction::GetIter);
}
@@ -9822,7 +9822,7 @@ impl Compiler {
if i > 0 {
self.compile_for_iterable_expression(&generator.iter, generator.is_async)?;
if generator.is_async {
emit!(self, Instruction::GetAIter);
emit!(self, Instruction::GetAiter);
} else {
emit!(self, Instruction::GetIter);
}
@@ -9833,7 +9833,7 @@ impl Compiler {
let mut end_async_for_target = BlockIdx::NULL;
if generator.is_async {
emit!(self, PseudoInstruction::SetupFinally { delta: after_block });
emit!(self, Instruction::GetANext);
emit!(self, Instruction::GetAnext);
self.push_fblock(
FBlockType::AsyncComprehensionGenerator,
loop_block,
@@ -18406,7 +18406,7 @@ async def f(items):
"async dict comprehension should be inlined"
);
assert!(
ops.iter().any(|op| matches!(op, Instruction::GetAIter)),
ops.iter().any(|op| matches!(op, Instruction::GetAiter)),
"inlined async dict comprehension should keep GET_AITER in outer code, got ops={ops:?}"
);
assert!(
@@ -23486,7 +23486,7 @@ async def name_4():
let Some(get_aiter_pos) = name_4
.instructions
.iter()
.position(|unit| matches!(unit.op, Instruction::GetAIter))
.position(|unit| matches!(unit.op, Instruction::GetAiter))
else {
panic!("missing GET_AITER in name_4");
};

View File

@@ -12,9 +12,8 @@ use rustpython_compiler_core::{
bytecode::{
AnyInstruction, AnyOpcode, Arg, CO_FAST_CELL, CO_FAST_FREE, CO_FAST_HIDDEN, CO_FAST_LOCAL,
CodeFlags, CodeObject, CodeUnit, CodeUnits, ConstantData, ExceptionTableEntry,
InstrDisplayContext, Instruction, InstructionMetadata, IntrinsicFunction1, Label, OpArg,
Opcode, PseudoInstruction, PseudoOpcode, PyCodeLocationInfoKind, encode_exception_table,
oparg,
InstrDisplayContext, Instruction, IntrinsicFunction1, Label, OpArg, Opcode,
PseudoInstruction, PseudoOpcode, PyCodeLocationInfoKind, encode_exception_table, oparg,
},
varint::{write_signed_varint, write_varint},
};
@@ -3227,7 +3226,7 @@ impl CodeInfo {
}
AnyInstruction::Real(
Instruction::FormatSimple
| Instruction::GetANext
| Instruction::GetAnext
| Instruction::GetLen
| Instruction::GetYieldFromIter
| Instruction::ImportFrom { .. }
@@ -6896,7 +6895,7 @@ impl CodeInfo {
block
.instructions
.iter()
.any(|info| matches!(info.instr.real(), Some(Instruction::GetANext)))
.any(|info| matches!(info.instr.real(), Some(Instruction::GetAnext)))
}
fn block_has_return(block: &Block) -> bool {
@@ -11624,7 +11623,7 @@ fn block_contains_suspension_point(block: &Block) -> bool {
instr,
Instruction::YieldValue { .. }
| Instruction::GetAwaitable { .. }
| Instruction::GetANext
| Instruction::GetAnext
| Instruction::EndAsyncFor
)
})

View File

@@ -0,0 +1,721 @@
#!/usr/bin/env python
import collections
import dataclasses
import io
import os
import pathlib
import subprocess
import sys
import tomllib
CRATE_ROOT = pathlib.Path(__file__).parent
CONF_FILE = CRATE_ROOT / "opcode.toml"
OUT_FILE = CRATE_ROOT / "src" / "bytecode" / "instructions.rs"
ROOT = CRATE_ROOT.parents[1]
try:
CPYTHON_ROOT = pathlib.Path(os.environ["CPYTHON_ROOT"]).expanduser().resolve()
except KeyError:
raise ValueError("Missing environment variable 'CPYTHON_ROOT'")
CPYTHON_TOOLS_LIB = CPYTHON_ROOT / "Tools" / "cases_generator"
sys.path.append(CPYTHON_TOOLS_LIB.as_posix())
import analyzer
from generators_common import DEFAULT_INPUT
from stack import get_stack_effect
@dataclasses.dataclass(frozen=True, kw_only=True, slots=True)
class OpcodeGen:
name: str
instruction_enum: str
instructions: list
numeric_repr: str
metadata: dict[str, str]
analysis: analyzer.Analysis
def gen(self) -> str:
methods = "\n\n".join(
getattr(self, attr).strip()
for attr in sorted(dir(self))
if attr.startswith("fn_")
)
impls = "\n\n".join(
getattr(self, attr).strip()
for attr in sorted(dir(self))
if attr.startswith("impl_")
)
variants = ",\n".join(instr.name for instr in self)
return f"""
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum {self.name} {{
{variants}
}}
impl {self.name} {{
{methods}
}}
{impls}
"""
@property
def fn_as_numeric(self) -> str:
arms = ",\n".join(f"Self::{instr.name} => {instr.opcode}" for instr in self)
return f"""
#[must_use]
pub const fn as_{self.numeric_repr}(self) -> {self.numeric_repr} {{
match self {{
{arms},
}}
}}
"""
@property
def fn_try_from_numeric(self) -> str:
arms = ",\n".join(f"{instr.opcode} => Self::{instr.name}" for instr in self)
return f"""
pub const fn try_from_{self.numeric_repr}(
value: {self.numeric_repr}
) -> Result<Self, MarshalError> {{
Ok(match value {{
{arms},
_ => return Err(MarshalError::InvalidBytecode),
}})
}}
"""
@property
def impl_try_from_numeric(self) -> str:
return f"""
impl TryFrom<{self.numeric_repr}> for {self.name} {{
type Error = MarshalError;
fn try_from(value: {self.numeric_repr}) -> Result<Self, Self::Error> {{
Self::try_from_{self.numeric_repr}(value)
}}
}}
"""
@property
def impl_into_numeric(self) -> str:
return f"""
impl From<{self.name}> for {self.numeric_repr} {{
fn from(opcode: {self.name}) -> Self {{
opcode.as_{self.numeric_repr}()
}}
}}
"""
def build_has_attr_fn(self, fn_attr: str, prop_attr: str, doc_flag: str) -> str:
arms = "|".join(
f"Self::{instr.name}"
for instr in self
if getattr(instr.properties, prop_attr)
)
if arms:
inner = f"matches!(self, {arms})"
else:
inner = "false"
return f"""
/// Does this opcode have '{doc_flag}' set.
#[must_use]
pub const fn has_{fn_attr}(self) -> bool {{
{inner}
}}
"""
fn_has_arg = property(
lambda self: self.build_has_attr_fn("arg", "oparg", "HAS_ARG_FLAG")
)
fn_has_const = property(
lambda self: self.build_has_attr_fn("const", "uses_co_consts", "HAS_CONST_FLAG")
)
fn_has_name = property(
lambda self: self.build_has_attr_fn("name", "uses_co_names", "HAS_NAME_FLAG")
)
fn_has_jump = property(
lambda self: self.build_has_attr_fn("jump", "jumps", "HAS_JUMP_FLAG")
)
fn_has_free = property(
lambda self: self.build_has_attr_fn("free", "has_free", "HAS_FREE_FLAG")
)
fn_has_local = property(
lambda self: self.build_has_attr_fn("local", "uses_locals", "HAS_LOCAL_FLAG")
)
@property
def instrumented_mapping(self) -> dict[str, str]:
inames = {instr.name for instr in self if instr.name.startswith("Instrumented")}
names = {instr.name for instr in self} - inames
res = {}
for iname in sorted(inames):
name = iname.removeprefix("Instrumented")
if name not in names:
continue
res[name] = iname
return res
@property
def fn_to_base(self) -> str:
arms = ",\n".join(
f"Self::{iname} => Self::{name}"
for name, iname in self.instrumented_mapping.items()
)
arms = arms.strip()
if not arms:
inner = "None"
else:
inner = f"""
Some(match self {{
{arms},
_ => return None,
}})
"""
return f"""
#[must_use]
pub const fn to_base(self) -> Option<Self> {{
{inner}
}}
"""
@property
def fn_to_instrumented(self) -> str:
arms = ",\n".join(
f"Self::{name} => Self::{iname}"
for name, iname in self.instrumented_mapping.items()
)
arms = arms.strip()
if not arms:
inner = "None"
else:
inner = f"""
Some(match self {{
{arms},
_ => return None,
}})
"""
return f"""
#[must_use]
pub const fn to_instrumented(self) -> Option<Self> {{
{inner}
}}
"""
@property
def fn_deopt(self) -> str:
names = {instr.name for instr in self}
deopts = collections.defaultdict(list)
for family in self.analysis.families.values():
family_name = to_pascal_case(family.name)
if family_name not in names:
continue
for member in family.members:
if member.name == family_name:
continue
deopts[family_name].append(member.name)
arms = ""
for target, specialized in deopts.items():
ops = "|".join(f"Self::{op}" for op in specialized)
arms += f"{ops} => Self::{target},\n"
arms = arms.strip()
if not arms:
inner = "None"
else:
inner = f"""
Some(match self {{
{arms}
_ => return None,
}})
"""
return f"""
#[must_use]
pub const fn deopt(self) -> Option<Self> {{
{inner}
}}
"""
@property
def fn_cache_entries(self) -> str:
arms = ""
for instr in self:
name = instr.name
if getattr(instr, "family", None) and (instr.family.name != name):
continue
if name.startswith("Instrumented"):
continue
try:
size = instr.size
except AttributeError:
continue
if size > 1:
arms += f"Self::{name} => {size - 1},\n"
arms = arms.strip()
if not arms:
inner = "0"
else:
inner = f"""
match self.deoptimize() {{
{arms}
_ => 0,
}}
"""
return f"""
#[must_use]
pub const fn cache_entries(self) -> usize {{
{inner}
}}
"""
@property
def fn_stack_effect_info(self) -> str:
oparg_used = False
arms = ""
for instr in self:
name = instr.name
stack = get_stack_effect(instr)
popped = (-stack.base_offset).to_c()
pushed = (stack.logical_sp - stack.base_offset).to_c()
pushed_comment = ""
popped_comment = ""
if stack_effect := self.metadata.get(name, {}).get("stack_effect"):
if npushed := stack_effect.get("pushed"):
pushed_comment = f"// TODO: Differs from CPython `{pushed}`"
pushed = npushed
if npopped := stack_effect.get("popped"):
popped_comment = f"// TODO: Differs from CPython `{popped}`"
popped = npopped
oparg_used = oparg_used or any("oparg" in expr for expr in (pushed, popped))
arms += f"""
Self::{name} => (
{pushed}, {pushed_comment}
{popped}, {popped_comment}
),
""".strip()
arms = arms.strip()
oparg_arg = "_oparg"
oparg_cast = ""
if oparg_used:
oparg_arg = "oparg"
oparg_cast = f"""
// Reason for converting {oparg_arg} to i32 is because of expressions like `1 + (oparg -1)`
// that causes underflow errors.
let oparg = i32::try_from({oparg_arg}).expect("{oparg_arg} does not fit in an `i32`");
"""
return f"""
#[must_use]
pub fn stack_effect_info(&self, {oparg_arg}: u32) -> StackEffect {{
{oparg_cast}
let (pushed, popped) = match self {{
{arms}
}};
debug_assert!(u32::try_from(pushed).is_ok());
debug_assert!(u32::try_from(popped).is_ok());
StackEffect::new(pushed as u32, popped as u32)
}}
"""
@property
def fn_as_instruction(self) -> str:
arms = ""
for instr in self:
name = instr.name
arms += f"Self::{name} => {self.instruction_enum}::{name}"
if oparg := self.metadata.get(name, {}).get("oparg"):
oname = oparg["name"]
arms += f" {{ {oname}: Arg::marker() }}"
arms += ",\n"
return f"""
/// Returns self as [`{self.instruction_enum}`].
#[must_use]
pub const fn as_instruction(self) -> {self.instruction_enum} {{
match self {{
{arms}
}}
}}
"""
@property
def impl_as_instruction(self) -> str:
return f"""
impl From<{self.name}> for {self.instruction_enum} {{
fn from(opcode: {self.name}) -> Self {{
opcode.as_instruction()
}}
}}
"""
@property
def fn_stack_effect(self) -> str:
return """
/// Stack effect of [`Self::stack_effect_info`].
#[must_use]
pub fn stack_effect(&self, oparg: u32) -> i32 {
self.stack_effect_info(oparg).effect()
}
"""
def __iter__(self):
yield from self.instructions
@dataclasses.dataclass(frozen=True, kw_only=True, slots=True)
class InstructionGen:
name: str
opcode_enum: str
instructions: list
numeric_repr: str
metadata: dict[str, str]
def gen(self) -> str:
methods = "\n\n".join(
getattr(self, attr).strip()
for attr in sorted(dir(self))
if attr.startswith("fn_")
)
impls = "\n\n".join(
getattr(self, attr).strip()
for attr in sorted(dir(self))
if attr.startswith("impl_")
)
variants = ""
for instr in self:
name = instr.name
variants += name
if oparg := self.metadata.get(name, {}).get("oparg"):
oname, otype = oparg["name"], oparg["type"]
variants += f"{{ {oname}: Arg<{otype}> }}"
opcode = instr.opcode
variants += f" = {opcode},\n"
return f"""
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[repr({self.numeric_repr})] // TODO: Remove this `#[repr(...)]`
pub enum {self.name} {{
{variants}
}}
impl {self.name} {{
{methods}
}}
{impls}
"""
@property
def fn_as_opcode(self) -> str:
arms = ""
for instr in self:
name = instr.name
arms += f"Self::{name}"
if oparg := self.metadata.get(name, {}).get("oparg"):
arms += " { .. }"
arms += f"=> {self.opcode_enum}::{name},\n"
return f"""
/// Returns self as a [`{self.opcode_enum}`].
#[must_use]
pub const fn as_opcode(self) -> {self.opcode_enum} {{
match self {{
{arms}
}}
}}
"""
@property
def impl_as_opcode(self) -> str:
return f"""
impl From<{self.name}> for {self.opcode_enum} {{
fn from(instruction: {self.name}) -> Self {{
instruction.as_opcode()
}}
}}
"""
@property
def fn_as_numeric_repr(self) -> str:
return f"""
#[must_use]
pub const fn as_{self.numeric_repr}(self) -> {self.numeric_repr} {{
self.as_opcode().as_{self.numeric_repr}()
}}
"""
@property
def impl_as_numeric_repr(self) -> str:
return f"""
impl From<{self.name}> for {self.numeric_repr} {{
fn from(instruction: {self.name}) -> Self {{
instruction.as_{self.numeric_repr}()
}}
}}
"""
@property
def fn_label_arg(self) -> str:
TARGET = "oparg::Label"
arms = ""
for instr in self:
name = instr.name
if oparg := self.metadata.get(name, {}).get("oparg"):
oname, otype = oparg["name"], oparg["type"]
if otype != TARGET:
continue
arms += f"Self::{name} {{ {oname} }} => *{oname},\n"
arms = arms.strip()
return f"""
#[must_use]
pub const fn label_arg(&self) -> Option<Arg<{TARGET}>> {{
Some(match self {{
{arms}
_ => return None,
}})
}}
"""
@property
def fn_to_base(self) -> str:
return f"""
#[must_use]
pub const fn to_base(self) -> Option<Self> {{
if let Some(opcode) = self.as_opcode().to_base() {{
Some(opcode.as_instruction())
}} else {{
None
}}
}}
"""
@property
def fn_to_instrumented(self) -> str:
return f"""
#[must_use]
pub const fn to_instrumented(self) -> Option<Self> {{
if let Some(opcode) = self.as_opcode().to_instrumented() {{
Some(opcode.as_instruction())
}} else {{
None
}}
}}
"""
@property
def fn_try_from_numeric(self) -> str:
return f"""
pub const fn try_from_{self.numeric_repr}(
value: {self.numeric_repr}
) -> Result<Self, MarshalError> {{
match {self.opcode_enum}::try_from_{self.numeric_repr}(value) {{
Ok(opcode) => Ok(opcode.as_instruction()),
Err(e) => Err(e),
}}
}}
"""
@property
def impl_try_from_numeric(self) -> str:
return f"""
impl TryFrom<{self.numeric_repr}> for {self.name} {{
type Error = MarshalError;
fn try_from(value: {self.numeric_repr}) -> Result<Self, Self::Error> {{
Self::try_from_{self.numeric_repr}(value)
}}
}}
"""
@property
def fn_stack_effect(self) -> str:
return """
/// Stack effect of [`Self::stack_effect_info`].
#[must_use]
pub fn stack_effect(&self, oparg: u32) -> i32 {
self.as_opcode().stack_effect(oparg)
}
"""
@property
def fn_cache_entries(self) -> str:
return f"""
#[must_use]
pub const fn cache_entries(self) -> usize {{
self.as_opcode().cache_entries()
}}
"""
@property
def fn_deopt(self) -> str:
return f"""
#[must_use]
pub const fn deopt(self) -> Option<Self> {{
if let Some(opcode) = self.as_opcode().deopt() {{
Some(opcode.as_instruction())
}} else {{
None
}}
}}
"""
@property
def fn_stack_effect_info(self) -> str:
return f"""
#[must_use]
pub fn stack_effect_info(&self, oparg: u32) -> StackEffect {{
self.as_opcode().stack_effect_info(oparg)
}}
"""
def __iter__(self):
yield from self.instructions
def to_pascal_case(s: str) -> str:
return s.title().replace("_", "")
def get_analysis() -> analyzer.Analysis:
analysis = analyzer.analyze_files([DEFAULT_INPUT])
# We don't differentiate between real and pseudos yet
analysis.instructions |= analysis.pseudos
return analysis
def rustfmt(code: str) -> str:
return subprocess.check_output(["rustfmt", "--emit=stdout"], input=code, text=True)
def main():
CONF = tomllib.loads(CONF_FILE.read_text())
analysis = get_analysis()
outfile = io.StringIO()
for opcode_enum, conf in CONF.items():
metadata = conf["opcodes"]
numeric_repr = conf["numeric_repr"]
instruction_enum = conf["instruction_enum"]
opcode_range = conf["range"]
lower, upper = map(int, (opcode_range["min"], opcode_range["max"]))
bounds = range(lower, upper + 1)
instructions = sorted(
(
instr
for instr in analysis.instructions.values()
if instr.opcode in bounds
),
key=lambda x: x.opcode,
)
for instr in instructions:
instr.name = to_pascal_case(instr.name)
opcode_code = OpcodeGen(
name=opcode_enum,
instruction_enum=instruction_enum,
instructions=instructions,
numeric_repr=numeric_repr,
metadata=metadata,
analysis=analysis,
).gen()
outfile.write(opcode_code)
instruction_code = InstructionGen(
name=instruction_enum,
opcode_enum=opcode_enum,
instructions=instructions,
numeric_repr=numeric_repr,
metadata=metadata,
).gen()
outfile.write(instruction_code)
generated = outfile.getvalue()
script_path = pathlib.Path(__file__).resolve().relative_to(ROOT).as_posix()
output = rustfmt(
f"""
// This file is generated by {script_path}
// Do not edit!
use crate::{{
bytecode::{{
instruction::{{Arg, StackEffect}},
oparg,
}},
marshal::MarshalError,
}};
{generated}
"""
)
OUT_FILE.write_text(output)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,270 @@
[Opcode]
instruction_enum = "Instruction"
numeric_repr = "u8"
range = { min = 0, max = 255 }
[Opcode.opcodes.BinaryOp]
oparg = { name = "op", type = "oparg::BinaryOperator" }
[Opcode.opcodes.BuildInterpolation]
oparg = { name = "format", type = "u32" }
[Opcode.opcodes.BuildList]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.BuildMap]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.BuildSet]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.BuildSlice]
oparg = { name = "argc", type = "oparg::BuildSliceArgCount" }
[Opcode.opcodes.BuildString]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.BuildTuple]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.Call]
oparg = { name = "argc", type = "u32" }
[Opcode.opcodes.CallIntrinsic1]
oparg = { name = "func", type = "oparg::IntrinsicFunction1" }
[Opcode.opcodes.CallIntrinsic2]
oparg = { name = "func", type = "oparg::IntrinsicFunction2" }
[Opcode.opcodes.CallKw]
oparg = { name = "argc", type = "u32" }
[Opcode.opcodes.CompareOp]
oparg = { name = "opname", type = "oparg::ComparisonOperator" }
[Opcode.opcodes.ContainsOp]
oparg = { name = "invert", type = "oparg::Invert" }
[Opcode.opcodes.ConvertValue]
oparg = { name = "oparg", type = "oparg::ConvertValueOparg" }
[Opcode.opcodes.Copy]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.CopyFreeVars]
oparg = { name = "n", type = "u32" }
[Opcode.opcodes.DeleteAttr]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.DeleteDeref]
oparg = { name = "i", type = "oparg::VarNum" }
[Opcode.opcodes.DeleteFast]
oparg = { name = "var_num", type = "oparg::VarNum" }
[Opcode.opcodes.DeleteGlobal]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.DeleteName]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.DictMerge]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.DictUpdate]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.ForIter]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.GetAwaitable]
oparg = { name = "r#where", type = "u32" }
[Opcode.opcodes.ImportFrom]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.ImportName]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.IsOp]
oparg = { name = "invert", type = "oparg::Invert" }
[Opcode.opcodes.JumpBackward]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.JumpBackwardNoInterrupt]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.JumpForward]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.ListAppend]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.ListExtend]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.LoadAttr]
oparg = { name = "namei", type = "oparg::LoadAttr" }
[Opcode.opcodes.LoadCommonConstant]
oparg = { name = "idx", type = "oparg::CommonConstant" }
[Opcode.opcodes.LoadConst]
oparg = { name = "consti", type = "oparg::ConstIdx" }
[Opcode.opcodes.LoadDeref]
oparg = { name = "i", type = "oparg::VarNum" }
[Opcode.opcodes.LoadFast]
oparg = { name = "var_num", type = "oparg::VarNum" }
[Opcode.opcodes.LoadFastAndClear]
oparg = { name = "var_num", type = "oparg::VarNum" }
[Opcode.opcodes.LoadFastBorrow]
oparg = { name = "var_num", type = "oparg::VarNum" }
[Opcode.opcodes.LoadFastBorrowLoadFastBorrow]
oparg = { name = "var_nums", type = "oparg::VarNums" }
[Opcode.opcodes.LoadFastCheck]
oparg = { name = "var_num", type = "oparg::VarNum" }
[Opcode.opcodes.LoadFastLoadFast]
oparg = { name = "var_nums", type = "oparg::VarNums" }
[Opcode.opcodes.LoadFromDictOrDeref]
oparg = { name = "i", type = "oparg::VarNum" }
[Opcode.opcodes.LoadFromDictOrGlobals]
oparg = { name = "i", type = "oparg::NameIdx" }
[Opcode.opcodes.LoadGlobal]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.LoadName]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.LoadSmallInt]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.LoadSpecial]
oparg = { name = "method", type = "oparg::SpecialMethod" }
[Opcode.opcodes.LoadSuperAttr]
oparg = { name = "namei", type = "oparg::LoadSuperAttr" }
[Opcode.opcodes.MakeCell]
oparg = { name = "i", type = "oparg::VarNum" }
[Opcode.opcodes.MapAdd]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.MatchClass]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.PopJumpIfFalse]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.PopJumpIfNone]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.PopJumpIfNotNone]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.PopJumpIfTrue]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.RaiseVarargs]
oparg = { name = "argc", type = "oparg::RaiseKind" }
[Opcode.opcodes.Reraise]
oparg = { name = "depth", type = "u32" }
[Opcode.opcodes.Send]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.SetAdd]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.SetFunctionAttribute]
oparg = { name = "flag", type = "oparg::MakeFunctionFlag" }
[Opcode.opcodes.SetUpdate]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.StoreAttr]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.StoreDeref]
oparg = { name = "i", type = "oparg::VarNum" }
[Opcode.opcodes.StoreFast]
oparg = { name = "var_num", type = "oparg::VarNum" }
[Opcode.opcodes.StoreFastLoadFast]
oparg = { name = "var_nums", type = "oparg::VarNums" }
[Opcode.opcodes.StoreFastStoreFast]
oparg = { name = "var_nums", type = "oparg::VarNums" }
[Opcode.opcodes.StoreGlobal]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.StoreName]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.Swap]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.UnpackEx]
oparg = { name = "counts", type = "oparg::UnpackExArgs" }
[Opcode.opcodes.UnpackSequence]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.WithExceptStart]
stack_effect = { pushed = "7", popped = "6" }
[Opcode.opcodes.YieldValue]
oparg = { name = "arg", type = "u32" }
[Opcode.opcodes.Resume]
oparg = { name = "context", type = "oparg::ResumeContext" }
[PseudoOpcode]
instruction_enum = "PseudoInstruction"
numeric_repr = "u16"
range = { min = 256, max = 65535 }
[PseudoOpcode.opcodes.Jump]
oparg = { name = "delta", type = "oparg::Label" }
[PseudoOpcode.opcodes.JumpIfFalse]
oparg = { name = "delta", type = "oparg::Label" }
[PseudoOpcode.opcodes.JumpIfTrue]
oparg = { name = "delta", type = "oparg::Label" }
[PseudoOpcode.opcodes.JumpNoInterrupt]
oparg = { name = "delta", type = "oparg::Label" }
[PseudoOpcode.opcodes.LoadClosure]
oparg = { name = "i", type = "oparg::NameIdx" }
[PseudoOpcode.opcodes.SetupCleanup]
oparg = { name = "delta", type = "oparg::Label" }
stack_effect = { pushed = "0" }
[PseudoOpcode.opcodes.SetupFinally]
oparg = { name = "delta", type = "oparg::Label" }
stack_effect = { pushed = "0" }
[PseudoOpcode.opcodes.SetupWith]
oparg = { name = "delta", type = "oparg::Label" }
stack_effect = { pushed = "0" }
[PseudoOpcode.opcodes.StoreFastMaybeNull]
oparg = { name = "var_num", type = "oparg::NameIdx" }

View File

@@ -20,10 +20,8 @@ use num_complex::Complex64;
use rustpython_wtf8::{Wtf8, Wtf8Buf};
pub use crate::bytecode::{
instruction::{
AnyInstruction, AnyOpcode, Arg, Instruction, InstructionMetadata, Opcode,
PseudoInstruction, PseudoOpcode, StackEffect,
},
instruction::{AnyInstruction, AnyOpcode, Arg, StackEffect},
instructions::{Instruction, Opcode, PseudoInstruction, PseudoOpcode},
oparg::{
BinaryOperator, BuildSliceArgCount, CommonConstant, ComparisonOperator, ConvertValueOparg,
IntrinsicFunction1, IntrinsicFunction2, Invert, Label, LoadAttr, LoadSuperAttr,
@@ -33,6 +31,7 @@ pub use crate::bytecode::{
};
mod instruction;
mod instructions;
pub mod oparg;
/// Exception table entry for zero-cost exception handling

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
use core::fmt;
use crate::{
bytecode::{CodeUnit, instruction::Instruction},
bytecode::{CodeUnit, instructions::Instruction},
marshal::MarshalError,
};

View File

@@ -5,7 +5,7 @@ mod _opcode {
use crate::vm::{
AsObject, PyObjectRef, PyResult, VirtualMachine,
builtins::{PyInt, PyIntRef},
bytecode::{AnyInstruction, AnyOpcode, InstructionMetadata, Opcode, PseudoOpcode, oparg},
bytecode::{AnyOpcode, oparg},
};
fn try_from_i32(raw: i32) -> Result<AnyOpcode, ()> {
@@ -15,9 +15,6 @@ mod _opcode {
.map_err(|_| ())
}
// https://github.com/python/cpython/blob/v3.14.2/Include/opcode_ids.h#L252
const HAVE_ARGUMENT: i32 = 43;
// prepare specialization
#[pyattr]
const ENABLE_SPECIALIZATION: i8 = 1;
@@ -74,7 +71,7 @@ mod _opcode {
.opcode
.try_to_primitive::<u16>(vm)
.and_then(|v| {
AnyInstruction::try_from(v)
AnyOpcode::try_from(v)
.map_err(|_| vm.new_exception_empty(vm.ctx.exceptions.value_error.to_owned()))
})
.map_err(|_| vm.new_value_error("invalid opcode or oparg"))?;
@@ -102,108 +99,49 @@ mod _opcode {
#[pyfunction]
fn has_arg(opcode: i32) -> bool {
try_from_i32(opcode).is_ok_and(|_| opcode > HAVE_ARGUMENT)
try_from_i32(opcode).map(|op| op.has_arg()).unwrap_or(false)
}
#[pyfunction]
fn has_const(opcode: i32) -> bool {
matches!(try_from_i32(opcode), Ok(AnyOpcode::Real(Opcode::LoadConst)))
try_from_i32(opcode)
.map(|op| op.has_const())
.unwrap_or(false)
}
#[pyfunction]
fn has_name(opcode: i32) -> bool {
matches!(
try_from_i32(opcode),
Ok(AnyOpcode::Real(
Opcode::DeleteAttr
| Opcode::DeleteGlobal
| Opcode::DeleteName
| Opcode::ImportFrom
| Opcode::ImportName
| Opcode::LoadAttr
| Opcode::LoadFromDictOrGlobals
| Opcode::LoadGlobal
| Opcode::LoadName
| Opcode::LoadSuperAttr
| Opcode::StoreAttr
| Opcode::StoreGlobal
| Opcode::StoreName
| Opcode::InstrumentedLoadSuperAttr
))
)
try_from_i32(opcode)
.map(|op| op.has_name())
.unwrap_or(false)
}
#[pyfunction]
fn has_jump(opcode: i32) -> bool {
matches!(
try_from_i32(opcode),
Ok(AnyOpcode::Real(
Opcode::EndAsyncFor
| Opcode::ForIter
| Opcode::JumpBackward
| Opcode::JumpBackwardNoInterrupt
| Opcode::JumpForward
| Opcode::PopJumpIfFalse
| Opcode::PopJumpIfNone
| Opcode::PopJumpIfNotNone
| Opcode::PopJumpIfTrue
| Opcode::Send
| Opcode::InstrumentedForIter
| Opcode::InstrumentedEndAsyncFor
) | AnyOpcode::Pseudo(
PseudoOpcode::Jump
| PseudoOpcode::JumpIfFalse
| PseudoOpcode::JumpIfTrue
| PseudoOpcode::JumpNoInterrupt
))
)
try_from_i32(opcode)
.map(|op| op.has_jump())
.unwrap_or(false)
}
#[pyfunction]
fn has_free(opcode: i32) -> bool {
matches!(
try_from_i32(opcode),
Ok(AnyOpcode::Real(
Opcode::DeleteDeref
| Opcode::LoadFromDictOrDeref
| Opcode::MakeCell
| Opcode::StoreDeref
))
)
try_from_i32(opcode)
.map(|op| op.has_free())
.unwrap_or(false)
}
#[pyfunction]
fn has_local(opcode: i32) -> bool {
matches!(
try_from_i32(opcode),
Ok(AnyOpcode::Real(
Opcode::DeleteFast
| Opcode::LoadDeref
| Opcode::LoadFast
| Opcode::LoadFastAndClear
| Opcode::LoadFastBorrow
| Opcode::LoadFastBorrowLoadFastBorrow
| Opcode::LoadFastCheck
| Opcode::LoadFastLoadFast
| Opcode::StoreFast
| Opcode::StoreFastLoadFast
| Opcode::StoreFastStoreFast
) | AnyOpcode::Pseudo(PseudoOpcode::LoadClosure | PseudoOpcode::StoreFastMaybeNull))
)
try_from_i32(opcode)
.map(|op| op.has_local())
.unwrap_or(false)
}
#[pyfunction]
fn has_exc(opcode: i32) -> bool {
// No instructions have exception info in RustPython
// (exception handling is done via exception table)
// This is for compatibility with CPython
matches!(
try_from_i32(opcode),
Ok(AnyOpcode::Pseudo(
PseudoOpcode::SetupCleanup | PseudoOpcode::SetupFinally | PseudoOpcode::SetupWith
))
)
try_from_i32(opcode)
.map(|op| op.is_block_push())
.unwrap_or(false)
}
#[pyfunction]

View File

@@ -11,9 +11,7 @@ use crate::{
types::Representable,
};
use num_traits::Zero;
use rustpython_compiler_core::bytecode::{
self, Constant, Instruction, InstructionMetadata, StackEffect,
};
use rustpython_compiler_core::bytecode::{self, Constant, Instruction, StackEffect};
use stack_analysis::*;
/// Stack state analysis for safe line-number jumps.
@@ -237,7 +235,7 @@ pub(crate) mod stack_analysis {
}
}
}
Instruction::GetIter | Instruction::GetAIter => {
Instruction::GetIter | Instruction::GetAiter => {
next_stack = push_value(pop_value(next_stack), Kind::Iterator as i64);
if next_i < stacks.len() {
stacks[next_i] = next_stack;

View File

@@ -2548,14 +2548,14 @@ impl ExecutingFrame<'_> {
Ok(None)
}
Instruction::GetAIter => {
Instruction::GetAiter => {
let aiterable = self.pop_value();
let aiter = vm.call_special_method(&aiterable, identifier!(vm, __aiter__), ())?;
self.push_value(aiter);
Ok(None)
}
Instruction::GetANext => {
#[cfg(debug_assertions)] // remove when GetANext is fully implemented
Instruction::GetAnext => {
#[cfg(debug_assertions)] // remove when GetAnext is fully implemented
let orig_stack_len = self.localsplus.stack_len();
let aiter = self.top_value();

View File

@@ -11,7 +11,7 @@ import typing
ROOT = pathlib.Path(__file__).parents[1]
BYTECODE_FILE = (
ROOT / "crates" / "compiler-core" / "src" / "bytecode" / "instruction.rs"
ROOT / "crates" / "compiler-core" / "src" / "bytecode" / "instructions.rs"
)
OPCODE_METADATA_FILE = ROOT / "Lib" / "_opcode_metadata.py"
@@ -33,7 +33,6 @@ def to_snake_case(s: str) -> str:
class Opcode(typing.NamedTuple):
rust_name: str
cpython_name: str
id: int
have_oparg: bool
@@ -41,6 +40,10 @@ class Opcode(typing.NamedTuple):
def is_instrumented(self):
return self.cpython_name.startswith("INSTRUMENTED_")
@property
def cpython_name(self):
return to_snake_case(self.rust_name)
@classmethod
def from_str(cls, text: str):
# Split on commas that are followed by a newline + an uppercase letter (new entry)
@@ -51,13 +54,8 @@ class Opcode(typing.NamedTuple):
continue
have_oparg = "Arg<" in entry # Hacky but works
rust_name = re.match(r"(\w+)", entry).group(1)
id_num, cpython_name = re.search(r'\((\d+),\s*"([^"]+)"\)', entry).groups()
yield cls(
rust_name=rust_name,
cpython_name=cpython_name,
id=int(id_num),
have_oparg=have_oparg,
)
id_num = re.findall(r"= (\d+)", entry)[0]
yield cls(rust_name=rust_name, id=int(id_num), have_oparg=have_oparg)
def __lt__(self, other: typing.Self) -> bool:
sprio, oprio = (
@@ -109,7 +107,7 @@ def build_deopts(text: str) -> dict[str, list[str]]:
match_body = raw_body[block_start:block_end]
arm_pattern = re.compile(
r"((?:Self::\w+\s*\|\s*)*Self::\w+)\s*=>\s*(?:\{\s*)?Opcode::(\w+)", re.DOTALL
r"((?:Self::\w+\s*\|\s*)*Self::\w+)\s*=>\s*(?:\{\s*)?Self::(\w+)", re.DOTALL
)
variants_pattern = re.compile(r"Self::(\w+)")