Autogen opcodes metadata (#7983)

This commit is contained in:
Shahar Naveh
2026-05-27 10:44:58 +03:00
committed by GitHub
parent 1cb24c5ebb
commit f95b7468f7
19 changed files with 2333 additions and 4094 deletions

19
.gitattributes vendored
View File

@@ -58,13 +58,14 @@ Lib/venv/scripts/posix/* text eol=lf
#
[attr]generated linguist-generated=true diff=generated
Lib/_opcode_metadata.py generated
Lib/keyword.py generated
Lib/idlelib/help.html generated
Lib/test/certdata/*.pem generated
Lib/test/certdata/*.0 generated
Lib/test/levenshtein_examples.json generated
Lib/test/test_stable_abi_ctypes.py generated
Lib/token.py generated
Lib/_opcode_metadata.py generated
Lib/keyword.py generated
Lib/idlelib/help.html generated
Lib/test/certdata/*.pem generated
Lib/test/certdata/*.0 generated
Lib/test/levenshtein_examples.json generated
Lib/test/test_stable_abi_ctypes.py generated
Lib/token.py generated
crates/compiler-core/src/bytecode/opcode_metadata.rs generated
.github/workflows/*.lock.yml linguist-generated=true merge=ours
.github/workflows/*.lock.yml linguist-generated=true merge=ours

View File

@@ -519,6 +519,8 @@ jobs:
security-events: write # for zizmor
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
@@ -544,12 +546,35 @@ jobs:
package-manager-cache: false
node-version: "24"
- name: prek
- name: install prek
id: prek
uses: j178/prek-action@bdca6f102f98e2b4c7029491a53dfd366469e33d # v2.0.4
with:
cache: false
show-verbose-logs: false
install-only: true
- name: prek run
run: prek run --show-diff-on-failure --color=always --all-files
- name: Get target CPython version
id: cpython-version
run: |
version=$(cat .python-version)
echo "version=${version}" >> "$GITHUB_OUTPUT"
- name: Clone CPython
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
repository: python/cpython
path: cpython
ref: "v${{ steps.cpython-version.outputs.version }}"
persist-credentials: false
- name: prek run (manual stage)
run: prek run --show-diff-on-failure --color=always --all-files --hook-stage manual
env:
CPYTHON_ROOT: ${{ github.workspace }}/cpython
- name: save prek cache
if: ${{ github.ref == 'refs/heads/main' }} # only save on main

View File

@@ -40,14 +40,27 @@ repos:
types: [rust]
priority: 0
- id: generate-opcode-metadata
name: generate opcode metadata
entry: python scripts/generate_opcode_metadata.py
files: '^(crates/compiler-core/src/bytecode/instruction\.rs|scripts/generate_opcode_metadata\.py)$'
- id: generate-rs-opcode-metadata
name: generate rust opcode metadata
entry: python tools/opcode_metadata/generate_rs_opcode_metadata.py
files: '^(crates/compiler-core/src/bytecode/instruction\.rs|tools/opcode_metadata/*)$'
pass_filenames: false
language: system
require_serial: true
priority: 1 # so rustfmt runs first
stages:
- manual
- id: generate-py-opcode-metadata
name: generate python opcode metadata
entry: python tools/opcode_metadata/generate_py_opcode_metadata.py
files: '^(crates/compiler-core/src/bytecode/instruction\.rs|tools/opcode_metadata/*)$'
pass_filenames: false
language: system
require_serial: true
priority: 1 # so rustfmt runs first
stages:
- manual
- repo: https://github.com/streetsidesoftware/cspell-cli
rev: v10.0.0

2
Lib/_opcode_metadata.py generated vendored
View File

@@ -1,4 +1,4 @@
# This file is generated by scripts/generate_opcode_metadata.py
# This file is generated by tools/opcode_metadata/generate_py_opcode_metadata.py
# for RustPython bytecode format (CPython 3.14 compatible opcode numbers).
# Do not edit!

View File

@@ -1,721 +0,0 @@
#!/usr/bin/env python
import collections
import dataclasses
import io
import os
import pathlib
import subprocess
import sys
import tomllib
CRATE_ROOT = pathlib.Path(__file__).parent
CONF_FILE = CRATE_ROOT / "opcode.toml"
OUT_FILE = CRATE_ROOT / "src" / "bytecode" / "instructions.rs"
ROOT = CRATE_ROOT.parents[1]
try:
CPYTHON_ROOT = pathlib.Path(os.environ["CPYTHON_ROOT"]).expanduser().resolve()
except KeyError:
raise ValueError("Missing environment variable 'CPYTHON_ROOT'")
CPYTHON_TOOLS_LIB = CPYTHON_ROOT / "Tools" / "cases_generator"
sys.path.append(CPYTHON_TOOLS_LIB.as_posix())
import analyzer
from generators_common import DEFAULT_INPUT
from stack import get_stack_effect
@dataclasses.dataclass(frozen=True, kw_only=True, slots=True)
class OpcodeGen:
name: str
instruction_enum: str
instructions: list
numeric_repr: str
metadata: dict[str, str]
analysis: analyzer.Analysis
def gen(self) -> str:
methods = "\n\n".join(
getattr(self, attr).strip()
for attr in sorted(dir(self))
if attr.startswith("fn_")
)
impls = "\n\n".join(
getattr(self, attr).strip()
for attr in sorted(dir(self))
if attr.startswith("impl_")
)
variants = ",\n".join(instr.name for instr in self)
return f"""
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum {self.name} {{
{variants}
}}
impl {self.name} {{
{methods}
}}
{impls}
"""
@property
def fn_as_numeric(self) -> str:
arms = ",\n".join(f"Self::{instr.name} => {instr.opcode}" for instr in self)
return f"""
#[must_use]
pub const fn as_{self.numeric_repr}(self) -> {self.numeric_repr} {{
match self {{
{arms},
}}
}}
"""
@property
def fn_try_from_numeric(self) -> str:
arms = ",\n".join(f"{instr.opcode} => Self::{instr.name}" for instr in self)
return f"""
pub const fn try_from_{self.numeric_repr}(
value: {self.numeric_repr}
) -> Result<Self, MarshalError> {{
Ok(match value {{
{arms},
_ => return Err(MarshalError::InvalidBytecode),
}})
}}
"""
@property
def impl_try_from_numeric(self) -> str:
return f"""
impl TryFrom<{self.numeric_repr}> for {self.name} {{
type Error = MarshalError;
fn try_from(value: {self.numeric_repr}) -> Result<Self, Self::Error> {{
Self::try_from_{self.numeric_repr}(value)
}}
}}
"""
@property
def impl_into_numeric(self) -> str:
return f"""
impl From<{self.name}> for {self.numeric_repr} {{
fn from(opcode: {self.name}) -> Self {{
opcode.as_{self.numeric_repr}()
}}
}}
"""
def build_has_attr_fn(self, fn_attr: str, prop_attr: str, doc_flag: str) -> str:
arms = "|".join(
f"Self::{instr.name}"
for instr in self
if getattr(instr.properties, prop_attr)
)
if arms:
inner = f"matches!(self, {arms})"
else:
inner = "false"
return f"""
/// Does this opcode have '{doc_flag}' set.
#[must_use]
pub const fn has_{fn_attr}(self) -> bool {{
{inner}
}}
"""
fn_has_arg = property(
lambda self: self.build_has_attr_fn("arg", "oparg", "HAS_ARG_FLAG")
)
fn_has_const = property(
lambda self: self.build_has_attr_fn("const", "uses_co_consts", "HAS_CONST_FLAG")
)
fn_has_name = property(
lambda self: self.build_has_attr_fn("name", "uses_co_names", "HAS_NAME_FLAG")
)
fn_has_jump = property(
lambda self: self.build_has_attr_fn("jump", "jumps", "HAS_JUMP_FLAG")
)
fn_has_free = property(
lambda self: self.build_has_attr_fn("free", "has_free", "HAS_FREE_FLAG")
)
fn_has_local = property(
lambda self: self.build_has_attr_fn("local", "uses_locals", "HAS_LOCAL_FLAG")
)
@property
def instrumented_mapping(self) -> dict[str, str]:
inames = {instr.name for instr in self if instr.name.startswith("Instrumented")}
names = {instr.name for instr in self} - inames
res = {}
for iname in sorted(inames):
name = iname.removeprefix("Instrumented")
if name not in names:
continue
res[name] = iname
return res
@property
def fn_to_base(self) -> str:
arms = ",\n".join(
f"Self::{iname} => Self::{name}"
for name, iname in self.instrumented_mapping.items()
)
arms = arms.strip()
if not arms:
inner = "None"
else:
inner = f"""
Some(match self {{
{arms},
_ => return None,
}})
"""
return f"""
#[must_use]
pub const fn to_base(self) -> Option<Self> {{
{inner}
}}
"""
@property
def fn_to_instrumented(self) -> str:
arms = ",\n".join(
f"Self::{name} => Self::{iname}"
for name, iname in self.instrumented_mapping.items()
)
arms = arms.strip()
if not arms:
inner = "None"
else:
inner = f"""
Some(match self {{
{arms},
_ => return None,
}})
"""
return f"""
#[must_use]
pub const fn to_instrumented(self) -> Option<Self> {{
{inner}
}}
"""
@property
def fn_deopt(self) -> str:
names = {instr.name for instr in self}
deopts = collections.defaultdict(list)
for family in self.analysis.families.values():
family_name = to_pascal_case(family.name)
if family_name not in names:
continue
for member in family.members:
if member.name == family_name:
continue
deopts[family_name].append(member.name)
arms = ""
for target, specialized in deopts.items():
ops = "|".join(f"Self::{op}" for op in specialized)
arms += f"{ops} => Self::{target},\n"
arms = arms.strip()
if not arms:
inner = "None"
else:
inner = f"""
Some(match self {{
{arms}
_ => return None,
}})
"""
return f"""
#[must_use]
pub const fn deopt(self) -> Option<Self> {{
{inner}
}}
"""
@property
def fn_cache_entries(self) -> str:
arms = ""
for instr in self:
name = instr.name
if getattr(instr, "family", None) and (instr.family.name != name):
continue
if name.startswith("Instrumented"):
continue
try:
size = instr.size
except AttributeError:
continue
if size > 1:
arms += f"Self::{name} => {size - 1},\n"
arms = arms.strip()
if not arms:
inner = "0"
else:
inner = f"""
match self.deoptimize() {{
{arms}
_ => 0,
}}
"""
return f"""
#[must_use]
pub const fn cache_entries(self) -> usize {{
{inner}
}}
"""
@property
def fn_stack_effect_info(self) -> str:
oparg_used = False
arms = ""
for instr in self:
name = instr.name
stack = get_stack_effect(instr)
popped = (-stack.base_offset).to_c()
pushed = (stack.logical_sp - stack.base_offset).to_c()
pushed_comment = ""
popped_comment = ""
if stack_effect := self.metadata.get(name, {}).get("stack_effect"):
if npushed := stack_effect.get("pushed"):
pushed_comment = f"// TODO: Differs from CPython `{pushed}`"
pushed = npushed
if npopped := stack_effect.get("popped"):
popped_comment = f"// TODO: Differs from CPython `{popped}`"
popped = npopped
oparg_used = oparg_used or any("oparg" in expr for expr in (pushed, popped))
arms += f"""
Self::{name} => (
{pushed}, {pushed_comment}
{popped}, {popped_comment}
),
""".strip()
arms = arms.strip()
oparg_arg = "_oparg"
oparg_cast = ""
if oparg_used:
oparg_arg = "oparg"
oparg_cast = f"""
// Reason for converting {oparg_arg} to i32 is because of expressions like `1 + (oparg -1)`
// that causes underflow errors.
let oparg = i32::try_from({oparg_arg}).expect("{oparg_arg} does not fit in an `i32`");
"""
return f"""
#[must_use]
pub fn stack_effect_info(&self, {oparg_arg}: u32) -> StackEffect {{
{oparg_cast}
let (pushed, popped) = match self {{
{arms}
}};
debug_assert!(u32::try_from(pushed).is_ok());
debug_assert!(u32::try_from(popped).is_ok());
StackEffect::new(pushed as u32, popped as u32)
}}
"""
@property
def fn_as_instruction(self) -> str:
arms = ""
for instr in self:
name = instr.name
arms += f"Self::{name} => {self.instruction_enum}::{name}"
if oparg := self.metadata.get(name, {}).get("oparg"):
oname = oparg["name"]
arms += f" {{ {oname}: Arg::marker() }}"
arms += ",\n"
return f"""
/// Returns self as [`{self.instruction_enum}`].
#[must_use]
pub const fn as_instruction(self) -> {self.instruction_enum} {{
match self {{
{arms}
}}
}}
"""
@property
def impl_as_instruction(self) -> str:
return f"""
impl From<{self.name}> for {self.instruction_enum} {{
fn from(opcode: {self.name}) -> Self {{
opcode.as_instruction()
}}
}}
"""
@property
def fn_stack_effect(self) -> str:
return """
/// Stack effect of [`Self::stack_effect_info`].
#[must_use]
pub fn stack_effect(&self, oparg: u32) -> i32 {
self.stack_effect_info(oparg).effect()
}
"""
def __iter__(self):
yield from self.instructions
@dataclasses.dataclass(frozen=True, kw_only=True, slots=True)
class InstructionGen:
name: str
opcode_enum: str
instructions: list
numeric_repr: str
metadata: dict[str, str]
def gen(self) -> str:
methods = "\n\n".join(
getattr(self, attr).strip()
for attr in sorted(dir(self))
if attr.startswith("fn_")
)
impls = "\n\n".join(
getattr(self, attr).strip()
for attr in sorted(dir(self))
if attr.startswith("impl_")
)
variants = ""
for instr in self:
name = instr.name
variants += name
if oparg := self.metadata.get(name, {}).get("oparg"):
oname, otype = oparg["name"], oparg["type"]
variants += f"{{ {oname}: Arg<{otype}> }}"
opcode = instr.opcode
variants += f" = {opcode},\n"
return f"""
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[repr({self.numeric_repr})] // TODO: Remove this `#[repr(...)]`
pub enum {self.name} {{
{variants}
}}
impl {self.name} {{
{methods}
}}
{impls}
"""
@property
def fn_as_opcode(self) -> str:
arms = ""
for instr in self:
name = instr.name
arms += f"Self::{name}"
if oparg := self.metadata.get(name, {}).get("oparg"):
arms += " { .. }"
arms += f"=> {self.opcode_enum}::{name},\n"
return f"""
/// Returns self as a [`{self.opcode_enum}`].
#[must_use]
pub const fn as_opcode(self) -> {self.opcode_enum} {{
match self {{
{arms}
}}
}}
"""
@property
def impl_as_opcode(self) -> str:
return f"""
impl From<{self.name}> for {self.opcode_enum} {{
fn from(instruction: {self.name}) -> Self {{
instruction.as_opcode()
}}
}}
"""
@property
def fn_as_numeric_repr(self) -> str:
return f"""
#[must_use]
pub const fn as_{self.numeric_repr}(self) -> {self.numeric_repr} {{
self.as_opcode().as_{self.numeric_repr}()
}}
"""
@property
def impl_as_numeric_repr(self) -> str:
return f"""
impl From<{self.name}> for {self.numeric_repr} {{
fn from(instruction: {self.name}) -> Self {{
instruction.as_{self.numeric_repr}()
}}
}}
"""
@property
def fn_label_arg(self) -> str:
TARGET = "oparg::Label"
arms = ""
for instr in self:
name = instr.name
if oparg := self.metadata.get(name, {}).get("oparg"):
oname, otype = oparg["name"], oparg["type"]
if otype != TARGET:
continue
arms += f"Self::{name} {{ {oname} }} => *{oname},\n"
arms = arms.strip()
return f"""
#[must_use]
pub const fn label_arg(&self) -> Option<Arg<{TARGET}>> {{
Some(match self {{
{arms}
_ => return None,
}})
}}
"""
@property
def fn_to_base(self) -> str:
return f"""
#[must_use]
pub const fn to_base(self) -> Option<Self> {{
if let Some(opcode) = self.as_opcode().to_base() {{
Some(opcode.as_instruction())
}} else {{
None
}}
}}
"""
@property
def fn_to_instrumented(self) -> str:
return f"""
#[must_use]
pub const fn to_instrumented(self) -> Option<Self> {{
if let Some(opcode) = self.as_opcode().to_instrumented() {{
Some(opcode.as_instruction())
}} else {{
None
}}
}}
"""
@property
def fn_try_from_numeric(self) -> str:
return f"""
pub const fn try_from_{self.numeric_repr}(
value: {self.numeric_repr}
) -> Result<Self, MarshalError> {{
match {self.opcode_enum}::try_from_{self.numeric_repr}(value) {{
Ok(opcode) => Ok(opcode.as_instruction()),
Err(e) => Err(e),
}}
}}
"""
@property
def impl_try_from_numeric(self) -> str:
return f"""
impl TryFrom<{self.numeric_repr}> for {self.name} {{
type Error = MarshalError;
fn try_from(value: {self.numeric_repr}) -> Result<Self, Self::Error> {{
Self::try_from_{self.numeric_repr}(value)
}}
}}
"""
@property
def fn_stack_effect(self) -> str:
return """
/// Stack effect of [`Self::stack_effect_info`].
#[must_use]
pub fn stack_effect(&self, oparg: u32) -> i32 {
self.as_opcode().stack_effect(oparg)
}
"""
@property
def fn_cache_entries(self) -> str:
return f"""
#[must_use]
pub const fn cache_entries(self) -> usize {{
self.as_opcode().cache_entries()
}}
"""
@property
def fn_deopt(self) -> str:
return f"""
#[must_use]
pub const fn deopt(self) -> Option<Self> {{
if let Some(opcode) = self.as_opcode().deopt() {{
Some(opcode.as_instruction())
}} else {{
None
}}
}}
"""
@property
def fn_stack_effect_info(self) -> str:
return f"""
#[must_use]
pub fn stack_effect_info(&self, oparg: u32) -> StackEffect {{
self.as_opcode().stack_effect_info(oparg)
}}
"""
def __iter__(self):
yield from self.instructions
def to_pascal_case(s: str) -> str:
return s.title().replace("_", "")
def get_analysis() -> analyzer.Analysis:
analysis = analyzer.analyze_files([DEFAULT_INPUT])
# We don't differentiate between real and pseudos yet
analysis.instructions |= analysis.pseudos
return analysis
def rustfmt(code: str) -> str:
return subprocess.check_output(["rustfmt", "--emit=stdout"], input=code, text=True)
def main():
CONF = tomllib.loads(CONF_FILE.read_text())
analysis = get_analysis()
outfile = io.StringIO()
for opcode_enum, conf in CONF.items():
metadata = conf["opcodes"]
numeric_repr = conf["numeric_repr"]
instruction_enum = conf["instruction_enum"]
opcode_range = conf["range"]
lower, upper = map(int, (opcode_range["min"], opcode_range["max"]))
bounds = range(lower, upper + 1)
instructions = sorted(
(
instr
for instr in analysis.instructions.values()
if instr.opcode in bounds
),
key=lambda x: x.opcode,
)
for instr in instructions:
instr.name = to_pascal_case(instr.name)
opcode_code = OpcodeGen(
name=opcode_enum,
instruction_enum=instruction_enum,
instructions=instructions,
numeric_repr=numeric_repr,
metadata=metadata,
analysis=analysis,
).gen()
outfile.write(opcode_code)
instruction_code = InstructionGen(
name=instruction_enum,
opcode_enum=opcode_enum,
instructions=instructions,
numeric_repr=numeric_repr,
metadata=metadata,
).gen()
outfile.write(instruction_code)
generated = outfile.getvalue()
script_path = pathlib.Path(__file__).resolve().relative_to(ROOT).as_posix()
output = rustfmt(
f"""
// This file is generated by {script_path}
// Do not edit!
use crate::{{
bytecode::{{
instruction::{{Arg, StackEffect}},
oparg,
}},
marshal::MarshalError,
}};
{generated}
"""
)
OUT_FILE.write_text(output)
if __name__ == "__main__":
main()

View File

@@ -1,270 +0,0 @@
[Opcode]
instruction_enum = "Instruction"
numeric_repr = "u8"
range = { min = 0, max = 255 }
[Opcode.opcodes.BinaryOp]
oparg = { name = "op", type = "oparg::BinaryOperator" }
[Opcode.opcodes.BuildInterpolation]
oparg = { name = "format", type = "u32" }
[Opcode.opcodes.BuildList]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.BuildMap]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.BuildSet]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.BuildSlice]
oparg = { name = "argc", type = "oparg::BuildSliceArgCount" }
[Opcode.opcodes.BuildString]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.BuildTuple]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.Call]
oparg = { name = "argc", type = "u32" }
[Opcode.opcodes.CallIntrinsic1]
oparg = { name = "func", type = "oparg::IntrinsicFunction1" }
[Opcode.opcodes.CallIntrinsic2]
oparg = { name = "func", type = "oparg::IntrinsicFunction2" }
[Opcode.opcodes.CallKw]
oparg = { name = "argc", type = "u32" }
[Opcode.opcodes.CompareOp]
oparg = { name = "opname", type = "oparg::ComparisonOperator" }
[Opcode.opcodes.ContainsOp]
oparg = { name = "invert", type = "oparg::Invert" }
[Opcode.opcodes.ConvertValue]
oparg = { name = "oparg", type = "oparg::ConvertValueOparg" }
[Opcode.opcodes.Copy]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.CopyFreeVars]
oparg = { name = "n", type = "u32" }
[Opcode.opcodes.DeleteAttr]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.DeleteDeref]
oparg = { name = "i", type = "oparg::VarNum" }
[Opcode.opcodes.DeleteFast]
oparg = { name = "var_num", type = "oparg::VarNum" }
[Opcode.opcodes.DeleteGlobal]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.DeleteName]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.DictMerge]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.DictUpdate]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.ForIter]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.GetAwaitable]
oparg = { name = "r#where", type = "u32" }
[Opcode.opcodes.ImportFrom]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.ImportName]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.IsOp]
oparg = { name = "invert", type = "oparg::Invert" }
[Opcode.opcodes.JumpBackward]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.JumpBackwardNoInterrupt]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.JumpForward]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.ListAppend]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.ListExtend]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.LoadAttr]
oparg = { name = "namei", type = "oparg::LoadAttr" }
[Opcode.opcodes.LoadCommonConstant]
oparg = { name = "idx", type = "oparg::CommonConstant" }
[Opcode.opcodes.LoadConst]
oparg = { name = "consti", type = "oparg::ConstIdx" }
[Opcode.opcodes.LoadDeref]
oparg = { name = "i", type = "oparg::VarNum" }
[Opcode.opcodes.LoadFast]
oparg = { name = "var_num", type = "oparg::VarNum" }
[Opcode.opcodes.LoadFastAndClear]
oparg = { name = "var_num", type = "oparg::VarNum" }
[Opcode.opcodes.LoadFastBorrow]
oparg = { name = "var_num", type = "oparg::VarNum" }
[Opcode.opcodes.LoadFastBorrowLoadFastBorrow]
oparg = { name = "var_nums", type = "oparg::VarNums" }
[Opcode.opcodes.LoadFastCheck]
oparg = { name = "var_num", type = "oparg::VarNum" }
[Opcode.opcodes.LoadFastLoadFast]
oparg = { name = "var_nums", type = "oparg::VarNums" }
[Opcode.opcodes.LoadFromDictOrDeref]
oparg = { name = "i", type = "oparg::VarNum" }
[Opcode.opcodes.LoadFromDictOrGlobals]
oparg = { name = "i", type = "oparg::NameIdx" }
[Opcode.opcodes.LoadGlobal]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.LoadName]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.LoadSmallInt]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.LoadSpecial]
oparg = { name = "method", type = "oparg::SpecialMethod" }
[Opcode.opcodes.LoadSuperAttr]
oparg = { name = "namei", type = "oparg::LoadSuperAttr" }
[Opcode.opcodes.MakeCell]
oparg = { name = "i", type = "oparg::VarNum" }
[Opcode.opcodes.MapAdd]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.MatchClass]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.PopJumpIfFalse]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.PopJumpIfNone]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.PopJumpIfNotNone]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.PopJumpIfTrue]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.RaiseVarargs]
oparg = { name = "argc", type = "oparg::RaiseKind" }
[Opcode.opcodes.Reraise]
oparg = { name = "depth", type = "u32" }
[Opcode.opcodes.Send]
oparg = { name = "delta", type = "oparg::Label" }
[Opcode.opcodes.SetAdd]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.SetFunctionAttribute]
oparg = { name = "flag", type = "oparg::MakeFunctionFlag" }
[Opcode.opcodes.SetUpdate]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.StoreAttr]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.StoreDeref]
oparg = { name = "i", type = "oparg::VarNum" }
[Opcode.opcodes.StoreFast]
oparg = { name = "var_num", type = "oparg::VarNum" }
[Opcode.opcodes.StoreFastLoadFast]
oparg = { name = "var_nums", type = "oparg::VarNums" }
[Opcode.opcodes.StoreFastStoreFast]
oparg = { name = "var_nums", type = "oparg::VarNums" }
[Opcode.opcodes.StoreGlobal]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.StoreName]
oparg = { name = "namei", type = "oparg::NameIdx" }
[Opcode.opcodes.Swap]
oparg = { name = "i", type = "u32" }
[Opcode.opcodes.UnpackEx]
oparg = { name = "counts", type = "oparg::UnpackExArgs" }
[Opcode.opcodes.UnpackSequence]
oparg = { name = "count", type = "u32" }
[Opcode.opcodes.WithExceptStart]
stack_effect = { pushed = "7", popped = "6" }
[Opcode.opcodes.YieldValue]
oparg = { name = "arg", type = "u32" }
[Opcode.opcodes.Resume]
oparg = { name = "context", type = "oparg::ResumeContext" }
[PseudoOpcode]
instruction_enum = "PseudoInstruction"
numeric_repr = "u16"
range = { min = 256, max = 65535 }
[PseudoOpcode.opcodes.Jump]
oparg = { name = "delta", type = "oparg::Label" }
[PseudoOpcode.opcodes.JumpIfFalse]
oparg = { name = "delta", type = "oparg::Label" }
[PseudoOpcode.opcodes.JumpIfTrue]
oparg = { name = "delta", type = "oparg::Label" }
[PseudoOpcode.opcodes.JumpNoInterrupt]
oparg = { name = "delta", type = "oparg::Label" }
[PseudoOpcode.opcodes.LoadClosure]
oparg = { name = "i", type = "oparg::NameIdx" }
[PseudoOpcode.opcodes.SetupCleanup]
oparg = { name = "delta", type = "oparg::Label" }
stack_effect = { pushed = "0" }
[PseudoOpcode.opcodes.SetupFinally]
oparg = { name = "delta", type = "oparg::Label" }
stack_effect = { pushed = "0" }
[PseudoOpcode.opcodes.SetupWith]
oparg = { name = "delta", type = "oparg::Label" }
stack_effect = { pushed = "0" }
[PseudoOpcode.opcodes.StoreFastMaybeNull]
oparg = { name = "var_num", type = "oparg::NameIdx" }

View File

@@ -20,8 +20,10 @@ use num_complex::Complex64;
use rustpython_wtf8::{Wtf8, Wtf8Buf};
pub use crate::bytecode::{
instruction::{AnyInstruction, AnyOpcode, Arg, StackEffect},
instructions::{Instruction, Opcode, PseudoInstruction, PseudoOpcode},
instruction::{
AnyInstruction, AnyOpcode, Arg, Instruction, Opcode, PseudoInstruction, PseudoOpcode,
StackEffect,
},
oparg::{
BinaryOperator, BuildSliceArgCount, CommonConstant, ComparisonOperator, ConvertValueOparg,
IntrinsicFunction1, IntrinsicFunction2, Invert, Label, LoadAttr, LoadSuperAttr,
@@ -31,7 +33,8 @@ pub use crate::bytecode::{
};
mod instruction;
mod instructions;
mod opcode_metadata;
pub mod oparg;
/// Exception table entry for zero-cost exception handling

View File

@@ -2,32 +2,669 @@ use core::{fmt, marker::PhantomData};
use crate::marshal::MarshalError;
use super::{Instruction, OpArg, OpArgByte, OpArgType, Opcode, PseudoInstruction, PseudoOpcode};
use super::{OpArg, OpArgByte, OpArgType, oparg};
impl Opcode {
/// Map a specialized or instrumented opcode back to its adaptive (base) variant.
#[must_use]
pub const fn deoptimize(self) -> Self {
match self.deopt() {
Some(v) => v,
None => {
// Instrumented opcodes map back to their base
match self.to_base() {
Some(v) => v,
None => self,
macro_rules! define_opcodes {
(
#[repr($typ:ident)]
$opcode_vis:vis enum $opcode_name:ident;
$(#[$instr_meta:meta])*
$instr_vis:vis enum $instr_name:ident {
$(
$(#[$op_meta:meta])*
$op_name:ident $({ $arg_name:ident: Arg<$arg_type:ty> $(,)? })? = $op_id:expr
),* $(,)?
}
) => {
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
$opcode_vis enum $opcode_name {
$($op_name),*
}
impl $opcode_name {
#[doc = concat!("Converts this opcode to [`", stringify!($instr_name), "`].")]
#[must_use]
$opcode_vis const fn as_instruction(&self) -> $instr_name {
match self {
$(
Self::$op_name => $instr_name::$op_name $({ $arg_name: Arg::marker() })?,
)*
}
}
/// Map a specialized or instrumented opcode back to its adaptive (base) variant.
#[must_use]
$opcode_vis const fn deoptimize(self) -> Self {
match self.deopt() {
Some(v) => v,
None => {
// Instrumented opcodes map back to their base
match self.to_base() {
Some(v) => v,
None => self,
}
}
}
}
// NOTE: Keep private. Will be exposed under `try_from_u8/try_from_u16`.
pub(super) const fn try_from_numeric(value: $typ) -> Result<Self, $crate::marshal::MarshalError> {
match value {
$($op_id => Ok(Self::$op_name),)*
_ => Err($crate::marshal::MarshalError::InvalidBytecode),
}
}
// NOTE: Keep private. Will be exposed under `as_u8/as_u16`.
#[must_use]
pub(super) const fn as_numeric(self) -> $typ {
match self {
$(Self::$op_name => $op_id,)*
}
}
/// Stack effect of [`Self::stack_effect_info`].
#[must_use]
$opcode_vis fn stack_effect(&self, oparg: u32) -> i32 {
self.stack_effect_info(oparg).effect()
}
}
}
/// Returns `true` if this is any instrumented opcode
/// (regular INSTRUMENTED_*, INSTRUMENTED_LINE, or INSTRUMENTED_INSTRUCTION).
#[must_use]
pub const fn is_instrumented(self) -> bool {
self.to_base().is_some()
|| matches!(self, Self::InstrumentedLine | Self::InstrumentedInstruction)
}
impl From<$opcode_name> for $instr_name {
fn from(opcode: $opcode_name) -> Self {
opcode.as_instruction()
}
}
impl TryFrom<$typ> for $opcode_name {
type Error = $crate::marshal::MarshalError;
fn try_from(value: $typ) -> Result<Self, Self::Error> {
Self::try_from_numeric(value)
}
}
impl From<$opcode_name> for $typ {
fn from(opcode: $opcode_name) -> Self {
opcode.as_numeric()
}
}
#[derive(Clone, Copy, Debug)]
#[repr($typ)] // TODO: Remove this repr
$instr_vis enum $instr_name {
$(
$(#[$op_meta])*
$op_name $({ $arg_name: Arg<$arg_type> })? = $op_id // TODO: Don't assign value
),*
}
impl $instr_name {
#[doc = concat!("Get the corresponding [`", stringify!($opcode_name), "`].")]
#[must_use]
$instr_vis const fn as_opcode(&self) -> $opcode_name {
match self {
$(
Self::$op_name $({ $arg_name: _ })? => $opcode_name::$op_name,
)*
}
}
#[must_use]
$instr_vis const fn label_arg(&self) -> Option<Arg<oparg::Label>> {
//define_opcodes!(@label_arm Self::$op_name $({ $arg_name } : $arg_type)?)
define_opcodes!(@match self, Self, [$($op_name $({ $arg_name : $arg_type })?),*])
}
#[must_use]
pub const fn to_base(self) -> Option<Self> {
if let Some(op) = self.as_opcode().to_base() {
Some(op.as_instruction())
} else {
None
}
}
#[must_use]
pub const fn to_instrumented(self) -> Option<Self> {
if let Some(op) = self.as_opcode().to_instrumented() {
Some(op.as_instruction())
} else {
None
}
}
/// Returns `true` if this is any instrumented opcode.
#[must_use]
$instr_vis const fn is_instrumented(&self) -> bool {
self.as_opcode().is_instrumented()
}
#[must_use]
$instr_vis const fn is_unconditional_jump(&self) -> bool {
self.as_opcode().is_unconditional_jump()
}
#[must_use]
$instr_vis const fn is_block_push(&self) -> bool {
self.as_opcode().is_block_push()
}
#[must_use]
$instr_vis const fn is_scope_exit(&self) -> bool {
self.as_opcode().is_scope_exit()
}
#[must_use]
$instr_vis const fn cache_entries(&self) -> usize{
self.as_opcode().cache_entries()
}
/// Map a specialized or instrumented opcode back to its adaptive (base) variant.
#[must_use]
$instr_vis const fn deoptimize(&self) -> Self {
self.as_opcode().deoptimize().as_instruction()
}
#[must_use]
$instr_vis fn stack_effect_jump(&self, oparg: u32) -> i32 {
self.as_opcode().stack_effect_jump(oparg)
}
#[must_use]
$instr_vis fn stack_effect_info(&self, oparg: u32) -> StackEffect {
self.as_opcode().stack_effect_info(oparg)
}
#[must_use]
$instr_vis fn stack_effect(&self, oparg: u32) -> i32 {
self.as_opcode().stack_effect(oparg)
}
}
impl From<$instr_name> for $opcode_name {
fn from(instr: $instr_name) -> Self {
instr.as_opcode()
}
}
impl TryFrom<$typ> for $instr_name {
type Error = $crate::marshal::MarshalError;
fn try_from(value: $typ) -> Result<Self, Self::Error> {
$opcode_name::try_from_numeric(value).map(Into::into)
}
}
impl From<$instr_name> for $typ {
fn from(instr: $instr_name) -> Self {
instr.as_opcode().into()
}
}
};
// Base case: empty list
(@match $self:expr, $name:ident, []) => {
None
};
// Label field variant (with trailing variants)
(@match $self:expr, $name:ident, [$variant:ident { $field:ident : Label } , $($rest:tt)*]) => {
match $self {
$name::$variant { $field } => Some(*$field),
other => define_opcodes!(@match other, $name, [$($rest)*]),
}
};
// Label field variant (last in list)
(@match $self:expr, $name:ident, [$variant:ident { $field:ident : Label }]) => {
match $self {
$name::$variant { $field } => Some(*$field),
other => define_opcodes!(@match other, $name, []),
}
};
// Non-Label field variant (with trailing variants)
(@match $self:expr, $name:ident, [$variant:ident { $field:ident : $type:ty } , $($rest:tt)*]) => {
match $self {
$name::$variant { .. } => None,
other => define_opcodes!(@match other, $name, [$($rest)*]),
}
};
// Non-Label field variant (last in list)
(@match $self:expr, $name:ident, [$variant:ident { $field:ident : $type:ty }]) => {
match $self {
$name::$variant { .. } => None,
_ => define_opcodes!(@match _, $name, []),
}
};
// Unit variant (with trailing variants)
(@match $self:expr, $name:ident, [$variant:ident , $($rest:tt)*]) => {
match $self {
$name::$variant => None,
other => define_opcodes!(@match other, $name, [$($rest)*]),
}
};
// Unit variant (last in list)
(@match $self:expr, $name:ident, [$variant:ident]) => {
match $self {
$name::$variant => None,
_ => define_opcodes!(@match _, $name, []),
}
};
}
define_opcodes!(
#[repr(u8)]
pub enum Opcode;
pub enum Instruction {
Cache = 0,
BinarySlice = 1,
BuildTemplate = 2,
BinaryOpInplaceAddUnicode = 3,
CallFunctionEx = 4,
CheckEgMatch = 5,
CheckExcMatch = 6,
CleanupThrow = 7,
DeleteSubscr = 8,
EndFor = 9,
EndSend = 10,
ExitInitCheck = 11,
FormatSimple = 12,
FormatWithSpec = 13,
GetAiter = 14,
GetAnext = 15,
GetIter = 16,
Reserved = 17,
GetLen = 18,
GetYieldFromIter = 19,
InterpreterExit = 20,
LoadBuildClass = 21,
LoadLocals = 22,
MakeFunction = 23,
MatchKeys = 24,
MatchMapping = 25,
MatchSequence = 26,
Nop = 27,
NotTaken = 28,
PopExcept = 29,
PopIter = 30,
PopTop = 31,
PushExcInfo = 32,
PushNull = 33,
ReturnGenerator = 34,
ReturnValue = 35,
SetupAnnotations = 36,
StoreSlice = 37,
StoreSubscr = 38,
ToBool = 39,
UnaryInvert = 40,
UnaryNegative = 41,
UnaryNot = 42,
WithExceptStart = 43,
BinaryOp {
op: Arg<oparg::BinaryOperator>,
} = 44,
BuildInterpolation {
format: Arg<u32>,
} = 45,
BuildList {
count: Arg<u32>,
} = 46,
BuildMap {
count: Arg<u32>,
} = 47,
BuildSet {
count: Arg<u32>,
} = 48,
BuildSlice {
argc: Arg<oparg::BuildSliceArgCount>,
} = 49,
BuildString {
count: Arg<u32>,
} = 50,
BuildTuple {
count: Arg<u32>,
} = 51,
Call {
argc: Arg<u32>,
} = 52,
CallIntrinsic1 {
func: Arg<oparg::IntrinsicFunction1>,
} = 53,
CallIntrinsic2 {
func: Arg<oparg::IntrinsicFunction2>,
} = 54,
CallKw {
argc: Arg<u32>,
} = 55,
CompareOp {
opname: Arg<oparg::ComparisonOperator>,
} = 56,
ContainsOp {
invert: Arg<oparg::Invert>,
} = 57,
ConvertValue {
oparg: Arg<oparg::ConvertValueOparg>,
} = 58,
Copy {
i: Arg<u32>,
} = 59,
CopyFreeVars {
n: Arg<u32>,
} = 60,
DeleteAttr {
namei: Arg<oparg::NameIdx>,
} = 61,
DeleteDeref {
i: Arg<oparg::VarNum>,
} = 62,
DeleteFast {
var_num: Arg<oparg::VarNum>,
} = 63,
DeleteGlobal {
namei: Arg<oparg::NameIdx>,
} = 64,
DeleteName {
namei: Arg<oparg::NameIdx>,
} = 65,
DictMerge {
i: Arg<u32>,
} = 66,
DictUpdate {
i: Arg<u32>,
} = 67,
EndAsyncFor = 68,
ExtendedArg = 69,
ForIter {
delta: Arg<oparg::Label>,
} = 70,
GetAwaitable {
r#where: Arg<u32>,
} = 71,
ImportFrom {
namei: Arg<oparg::NameIdx>,
} = 72,
ImportName {
namei: Arg<oparg::NameIdx>,
} = 73,
IsOp {
invert: Arg<oparg::Invert>,
} = 74,
JumpBackward {
delta: Arg<oparg::Label>,
} = 75,
JumpBackwardNoInterrupt {
delta: Arg<oparg::Label>,
} = 76,
JumpForward {
delta: Arg<oparg::Label>,
} = 77,
ListAppend {
i: Arg<u32>,
} = 78,
ListExtend {
i: Arg<u32>,
} = 79,
LoadAttr {
namei: Arg<oparg::LoadAttr>,
} = 80,
LoadCommonConstant {
idx: Arg<oparg::CommonConstant>,
} = 81,
LoadConst {
consti: Arg<oparg::ConstIdx>,
} = 82,
LoadDeref {
i: Arg<oparg::VarNum>,
} = 83,
LoadFast {
var_num: Arg<oparg::VarNum>,
} = 84,
LoadFastAndClear {
var_num: Arg<oparg::VarNum>,
} = 85,
LoadFastBorrow {
var_num: Arg<oparg::VarNum>,
} = 86,
LoadFastBorrowLoadFastBorrow {
var_nums: Arg<oparg::VarNums>,
} = 87,
LoadFastCheck {
var_num: Arg<oparg::VarNum>,
} = 88,
LoadFastLoadFast {
var_nums: Arg<oparg::VarNums>,
} = 89,
LoadFromDictOrDeref {
i: Arg<oparg::VarNum>,
} = 90,
LoadFromDictOrGlobals {
i: Arg<oparg::NameIdx>,
} = 91,
LoadGlobal {
namei: Arg<oparg::NameIdx>,
} = 92,
LoadName {
namei: Arg<oparg::NameIdx>,
} = 93,
LoadSmallInt {
i: Arg<u32>,
} = 94,
LoadSpecial {
method: Arg<oparg::SpecialMethod>,
} = 95,
LoadSuperAttr {
namei: Arg<oparg::LoadSuperAttr>,
} = 96,
MakeCell {
i: Arg<oparg::VarNum>,
} = 97,
MapAdd {
i: Arg<u32>,
} = 98,
MatchClass {
count: Arg<u32>,
} = 99,
PopJumpIfFalse {
delta: Arg<oparg::Label>,
} = 100,
PopJumpIfNone {
delta: Arg<oparg::Label>,
} = 101,
PopJumpIfNotNone {
delta: Arg<oparg::Label>,
} = 102,
PopJumpIfTrue {
delta: Arg<oparg::Label>,
} = 103,
RaiseVarargs {
argc: Arg<oparg::RaiseKind>,
} = 104,
Reraise {
depth: Arg<u32>,
} = 105,
Send {
delta: Arg<oparg::Label>,
} = 106,
SetAdd {
i: Arg<u32>,
} = 107,
SetFunctionAttribute {
flag: Arg<oparg::MakeFunctionFlag>,
} = 108,
SetUpdate {
i: Arg<u32>,
} = 109,
StoreAttr {
namei: Arg<oparg::NameIdx>,
} = 110,
StoreDeref {
i: Arg<oparg::VarNum>,
} = 111,
StoreFast {
var_num: Arg<oparg::VarNum>,
} = 112,
StoreFastLoadFast {
var_nums: Arg<oparg::VarNums>,
} = 113,
StoreFastStoreFast {
var_nums: Arg<oparg::VarNums>,
} = 114,
StoreGlobal {
namei: Arg<oparg::NameIdx>,
} = 115,
StoreName {
namei: Arg<oparg::NameIdx>,
} = 116,
Swap {
i: Arg<u32>,
} = 117,
UnpackEx {
counts: Arg<oparg::UnpackExArgs>,
} = 118,
UnpackSequence {
count: Arg<u32>,
} = 119,
YieldValue {
arg: Arg<u32>,
} = 120,
Resume {
context: Arg<oparg::ResumeContext>,
} = 128,
BinaryOpAddFloat = 129,
BinaryOpAddInt = 130,
BinaryOpAddUnicode = 131,
BinaryOpExtend = 132,
BinaryOpMultiplyFloat = 133,
BinaryOpMultiplyInt = 134,
BinaryOpSubscrDict = 135,
BinaryOpSubscrGetitem = 136,
BinaryOpSubscrListInt = 137,
BinaryOpSubscrListSlice = 138,
BinaryOpSubscrStrInt = 139,
BinaryOpSubscrTupleInt = 140,
BinaryOpSubtractFloat = 141,
BinaryOpSubtractInt = 142,
CallAllocAndEnterInit = 143,
CallBoundMethodExactArgs = 144,
CallBoundMethodGeneral = 145,
CallBuiltinClass = 146,
CallBuiltinFast = 147,
CallBuiltinFastWithKeywords = 148,
CallBuiltinO = 149,
CallIsinstance = 150,
CallKwBoundMethod = 151,
CallKwNonPy = 152,
CallKwPy = 153,
CallLen = 154,
CallListAppend = 155,
CallMethodDescriptorFast = 156,
CallMethodDescriptorFastWithKeywords = 157,
CallMethodDescriptorNoargs = 158,
CallMethodDescriptorO = 159,
CallNonPyGeneral = 160,
CallPyExactArgs = 161,
CallPyGeneral = 162,
CallStr1 = 163,
CallTuple1 = 164,
CallType1 = 165,
CompareOpFloat = 166,
CompareOpInt = 167,
CompareOpStr = 168,
ContainsOpDict = 169,
ContainsOpSet = 170,
ForIterGen = 171,
ForIterList = 172,
ForIterRange = 173,
ForIterTuple = 174,
JumpBackwardJit = 175,
JumpBackwardNoJit = 176,
LoadAttrClass = 177,
LoadAttrClassWithMetaclassCheck = 178,
LoadAttrGetattributeOverridden = 179,
LoadAttrInstanceValue = 180,
LoadAttrMethodLazyDict = 181,
LoadAttrMethodNoDict = 182,
LoadAttrMethodWithValues = 183,
LoadAttrModule = 184,
LoadAttrNondescriptorNoDict = 185,
LoadAttrNondescriptorWithValues = 186,
LoadAttrProperty = 187,
LoadAttrSlot = 188,
LoadAttrWithHint = 189,
LoadConstImmortal = 190,
LoadConstMortal = 191,
LoadGlobalBuiltin = 192,
LoadGlobalModule = 193,
LoadSuperAttrAttr = 194,
LoadSuperAttrMethod = 195,
ResumeCheck = 196,
SendGen = 197,
StoreAttrInstanceValue = 198,
StoreAttrSlot = 199,
StoreAttrWithHint = 200,
StoreSubscrDict = 201,
StoreSubscrListInt = 202,
ToBoolAlwaysTrue = 203,
ToBoolBool = 204,
ToBoolInt = 205,
ToBoolList = 206,
ToBoolNone = 207,
ToBoolStr = 208,
UnpackSequenceList = 209,
UnpackSequenceTuple = 210,
UnpackSequenceTwoTuple = 211,
InstrumentedEndFor = 234,
InstrumentedPopIter = 235,
InstrumentedEndSend = 236,
InstrumentedForIter = 237,
InstrumentedInstruction = 238,
InstrumentedJumpForward = 239,
InstrumentedNotTaken = 240,
InstrumentedPopJumpIfTrue = 241,
InstrumentedPopJumpIfFalse = 242,
InstrumentedPopJumpIfNone = 243,
InstrumentedPopJumpIfNotNone = 244,
InstrumentedResume = 245,
InstrumentedReturnValue = 246,
InstrumentedYieldValue = 247,
InstrumentedEndAsyncFor = 248,
InstrumentedLoadSuperAttr = 249,
InstrumentedCall = 250,
InstrumentedCallKw = 251,
InstrumentedCallFunctionEx = 252,
InstrumentedJumpBackward = 253,
InstrumentedLine = 254,
EnterExecutor = 255,
}
);
define_opcodes!(
#[repr(u16)]
pub enum PseudoOpcode;
pub enum PseudoInstruction {
AnnotationsPlaceholder = 256,
Jump { delta: Arg<oparg::Label> } = 257,
JumpIfFalse { delta: Arg<oparg::Label> } = 258,
JumpIfTrue { delta: Arg<oparg::Label> } = 259,
JumpNoInterrupt { delta: Arg<oparg::Label> } = 260,
LoadClosure { i: Arg<oparg::NameIdx> } = 261,
PopBlock = 262,
SetupCleanup { delta: Arg<oparg::Label> } = 263,
SetupFinally { delta: Arg<oparg::Label> } = 264,
SetupWith { delta: Arg<oparg::Label> } = 265,
StoreFastMaybeNull { var_num: Arg<oparg::NameIdx> } = 266,
}
);
impl Opcode {
#[must_use]
pub const fn is_unconditional_jump(&self) -> bool {
matches!(
@@ -59,11 +696,6 @@ impl Opcode {
}
impl PseudoOpcode {
#[must_use]
pub const fn is_instrumented(&self) -> bool {
false
}
#[must_use]
pub const fn is_block_push(&self) -> bool {
matches!(
@@ -72,6 +704,16 @@ impl PseudoOpcode {
)
}
#[must_use]
pub const fn is_scope_exit(&self) -> bool {
false
}
#[must_use]
pub const fn is_unconditional_jump(&self) -> bool {
matches!(self, Self::Jump | Self::JumpNoInterrupt)
}
/// Handler entry effect for SETUP_* pseudo ops.
///
/// Fallthrough effect is 0 (NOPs), but when the branch is taken the
@@ -89,70 +731,6 @@ impl PseudoOpcode {
}
}
impl Instruction {
/// Returns `true` if this is any instrumented opcode
/// (regular INSTRUMENTED_*, INSTRUMENTED_LINE, or INSTRUMENTED_INSTRUCTION).
#[must_use]
pub const fn is_instrumented(self) -> bool {
self.as_opcode().is_instrumented()
}
#[must_use]
pub const fn is_unconditional_jump(&self) -> bool {
self.as_opcode().is_unconditional_jump()
}
#[must_use]
pub const fn is_block_push(&self) -> bool {
self.as_opcode().is_block_push()
}
#[must_use]
pub const fn is_scope_exit(&self) -> bool {
self.as_opcode().is_scope_exit()
}
/// Map a specialized or instrumented opcode back to its adaptive (base) variant.
#[must_use]
pub const fn deoptimize(self) -> Self {
self.as_opcode().deoptimize().as_instruction()
}
#[must_use]
pub fn stack_effect_jump(&self, oparg: u32) -> i32 {
self.as_opcode().stack_effect(oparg)
}
}
impl PseudoInstruction {
/// Returns true if self is one of:
/// - [`PseudoInstruction::SetupCleanup`]
/// - [`PseudoInstruction::SetupFinally`]
/// - [`PseudoInstruction::SetupWith`]
#[must_use]
pub const fn is_block_push(&self) -> bool {
self.as_opcode().is_block_push()
}
#[must_use]
pub const fn is_unconditional_jump(&self) -> bool {
matches!(
self.as_opcode(),
PseudoOpcode::Jump | PseudoOpcode::JumpNoInterrupt
)
}
#[must_use]
pub const fn is_scope_exit(&self) -> bool {
false
}
#[must_use]
pub fn stack_effect_jump(&self, oparg: u32) -> i32 {
self.as_opcode().stack_effect_jump(oparg)
}
}
macro_rules! either_real_pseudo {
// Const
(
@@ -191,28 +769,28 @@ pub enum AnyInstruction {
impl AnyInstruction {
either_real_pseudo!(
#[must_use]
pub const fn is_unconditional_jump(&self) -> bool
#[must_use]
pub const fn is_unconditional_jump(&self) -> bool
);
either_real_pseudo!(
#[must_use]
pub const fn is_scope_exit(&self) -> bool
#[must_use]
pub const fn is_scope_exit(&self) -> bool
);
either_real_pseudo!(
#[must_use]
pub fn stack_effect(&self, oparg: u32) -> i32
#[must_use]
pub fn stack_effect(&self, oparg: u32) -> i32
);
either_real_pseudo!(
#[must_use]
pub fn stack_effect_jump(&self, oparg: u32) -> i32
#[must_use]
pub fn stack_effect_jump(&self, oparg: u32) -> i32
);
either_real_pseudo!(
#[must_use]
pub fn stack_effect_info(&self, oparg: u32) -> StackEffect
#[must_use]
pub fn stack_effect_info(&self, oparg: u32) -> StackEffect
);
}
@@ -340,7 +918,7 @@ impl AnyInstruction {
}
}
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum AnyOpcode {
Real(Opcode),
Pseudo(PseudoOpcode),
@@ -428,53 +1006,53 @@ impl AnyOpcode {
}
either_real_pseudo!(
#[must_use]
pub const fn has_arg(&self) -> bool
#[must_use]
pub const fn has_arg(&self) -> bool
);
either_real_pseudo!(
#[must_use]
pub const fn has_jump(&self) -> bool
#[must_use]
pub const fn has_jump(&self) -> bool
);
either_real_pseudo!(
#[must_use]
pub const fn has_free(&self) -> bool
#[must_use]
pub const fn has_free(&self) -> bool
);
either_real_pseudo!(
#[must_use]
pub const fn has_local(&self) -> bool
#[must_use]
pub const fn has_local(&self) -> bool
);
either_real_pseudo!(
#[must_use]
pub const fn has_name(&self) -> bool
#[must_use]
pub const fn has_name(&self) -> bool
);
either_real_pseudo!(
#[must_use]
pub const fn has_const(&self) -> bool
#[must_use]
pub const fn has_const(&self) -> bool
);
either_real_pseudo!(
#[must_use]
pub const fn is_instrumented(&self) -> bool
#[must_use]
pub const fn is_instrumented(&self) -> bool
);
either_real_pseudo!(
#[must_use]
pub const fn is_block_push(&self) -> bool
#[must_use]
pub const fn is_block_push(&self) -> bool
);
either_real_pseudo!(
#[must_use]
pub fn stack_effect_jump(&self, oparg: u32) -> i32
#[must_use]
pub fn stack_effect_jump(&self, oparg: u32) -> i32
);
either_real_pseudo!(
#[must_use]
pub fn stack_effect(&self, oparg: u32) -> i32
#[must_use]
pub fn stack_effect(&self, oparg: u32) -> i32
);
#[must_use]

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
use core::fmt;
use crate::{
bytecode::{CodeUnit, instructions::Instruction},
bytecode::{CodeUnit, Instruction},
marshal::MarshalError,
};

View File

@@ -0,0 +1,809 @@
// This file is generated by tools/opcode_metadata/generate_rs_opcode_metadata.py
// Do not edit!
use crate::{bytecode::instruction::StackEffect, marshal::MarshalError};
impl super::Opcode {
/// Returns [`Self`] as [`u8`].
#[must_use]
pub const fn as_u8(self) -> u8 {
self.as_numeric()
}
#[must_use]
pub const fn cache_entries(self) -> usize {
match self.deoptimize() {
Self::StoreSubscr => 1,
Self::ToBool => 3,
Self::BinaryOp => 5,
Self::Call => 3,
Self::CallKw => 3,
Self::CompareOp => 1,
Self::ContainsOp => 1,
Self::ForIter => 1,
Self::JumpBackward => 1,
Self::LoadAttr => 9,
Self::LoadGlobal => 4,
Self::LoadSuperAttr => 1,
Self::PopJumpIfFalse => 1,
Self::PopJumpIfNone => 1,
Self::PopJumpIfNotNone => 1,
Self::PopJumpIfTrue => 1,
Self::Send => 1,
Self::StoreAttr => 4,
Self::UnpackSequence => 1,
_ => 0,
}
}
#[must_use]
pub const fn deopt(self) -> Option<Self> {
Some(match self {
Self::ResumeCheck => Self::Resume,
Self::LoadConstMortal | Self::LoadConstImmortal => Self::LoadConst,
Self::ToBoolAlwaysTrue
| Self::ToBoolBool
| Self::ToBoolInt
| Self::ToBoolList
| Self::ToBoolNone
| Self::ToBoolStr => Self::ToBool,
Self::BinaryOpMultiplyInt
| Self::BinaryOpAddInt
| Self::BinaryOpSubtractInt
| Self::BinaryOpMultiplyFloat
| Self::BinaryOpAddFloat
| Self::BinaryOpSubtractFloat
| Self::BinaryOpAddUnicode
| Self::BinaryOpSubscrListInt
| Self::BinaryOpSubscrListSlice
| Self::BinaryOpSubscrTupleInt
| Self::BinaryOpSubscrStrInt
| Self::BinaryOpSubscrDict
| Self::BinaryOpSubscrGetitem
| Self::BinaryOpExtend
| Self::BinaryOpInplaceAddUnicode => Self::BinaryOp,
Self::StoreSubscrDict | Self::StoreSubscrListInt => Self::StoreSubscr,
Self::SendGen => Self::Send,
Self::UnpackSequenceTwoTuple | Self::UnpackSequenceTuple | Self::UnpackSequenceList => {
Self::UnpackSequence
}
Self::StoreAttrInstanceValue | Self::StoreAttrSlot | Self::StoreAttrWithHint => {
Self::StoreAttr
}
Self::LoadGlobalModule | Self::LoadGlobalBuiltin => Self::LoadGlobal,
Self::LoadSuperAttrAttr | Self::LoadSuperAttrMethod => Self::LoadSuperAttr,
Self::LoadAttrInstanceValue
| Self::LoadAttrModule
| Self::LoadAttrWithHint
| Self::LoadAttrSlot
| Self::LoadAttrClass
| Self::LoadAttrClassWithMetaclassCheck
| Self::LoadAttrProperty
| Self::LoadAttrGetattributeOverridden
| Self::LoadAttrMethodWithValues
| Self::LoadAttrMethodNoDict
| Self::LoadAttrMethodLazyDict
| Self::LoadAttrNondescriptorWithValues
| Self::LoadAttrNondescriptorNoDict => Self::LoadAttr,
Self::CompareOpFloat | Self::CompareOpInt | Self::CompareOpStr => Self::CompareOp,
Self::ContainsOpSet | Self::ContainsOpDict => Self::ContainsOp,
Self::JumpBackwardNoJit | Self::JumpBackwardJit => Self::JumpBackward,
Self::ForIterList | Self::ForIterTuple | Self::ForIterRange | Self::ForIterGen => {
Self::ForIter
}
Self::CallBoundMethodExactArgs
| Self::CallPyExactArgs
| Self::CallType1
| Self::CallStr1
| Self::CallTuple1
| Self::CallBuiltinClass
| Self::CallBuiltinO
| Self::CallBuiltinFast
| Self::CallBuiltinFastWithKeywords
| Self::CallLen
| Self::CallIsinstance
| Self::CallListAppend
| Self::CallMethodDescriptorO
| Self::CallMethodDescriptorFastWithKeywords
| Self::CallMethodDescriptorNoargs
| Self::CallMethodDescriptorFast
| Self::CallAllocAndEnterInit
| Self::CallPyGeneral
| Self::CallBoundMethodGeneral
| Self::CallNonPyGeneral => Self::Call,
Self::CallKwBoundMethod | Self::CallKwPy | Self::CallKwNonPy => Self::CallKw,
_ => return None,
})
}
/// Does this opcode have 'HAS_ARG_FLAG' set.
#[must_use]
pub const fn has_arg(self) -> bool {
matches!(
self,
Self::BinaryOp
| Self::BuildInterpolation
| Self::BuildList
| Self::BuildMap
| Self::BuildSet
| Self::BuildSlice
| Self::BuildString
| Self::BuildTuple
| Self::Call
| Self::CallIntrinsic1
| Self::CallIntrinsic2
| Self::CallKw
| Self::CompareOp
| Self::ContainsOp
| Self::ConvertValue
| Self::Copy
| Self::CopyFreeVars
| Self::DeleteAttr
| Self::DeleteDeref
| Self::DeleteFast
| Self::DeleteGlobal
| Self::DeleteName
| Self::DictMerge
| Self::DictUpdate
| Self::EndAsyncFor
| Self::ExtendedArg
| Self::ForIter
| Self::GetAwaitable
| Self::ImportFrom
| Self::ImportName
| Self::IsOp
| Self::JumpBackward
| Self::JumpBackwardNoInterrupt
| Self::JumpForward
| Self::ListAppend
| Self::ListExtend
| Self::LoadAttr
| Self::LoadCommonConstant
| Self::LoadConst
| Self::LoadDeref
| Self::LoadFast
| Self::LoadFastAndClear
| Self::LoadFastBorrow
| Self::LoadFastBorrowLoadFastBorrow
| Self::LoadFastCheck
| Self::LoadFastLoadFast
| Self::LoadFromDictOrDeref
| Self::LoadFromDictOrGlobals
| Self::LoadGlobal
| Self::LoadName
| Self::LoadSmallInt
| Self::LoadSpecial
| Self::LoadSuperAttr
| Self::MakeCell
| Self::MapAdd
| Self::MatchClass
| Self::PopJumpIfFalse
| Self::PopJumpIfNone
| Self::PopJumpIfNotNone
| Self::PopJumpIfTrue
| Self::RaiseVarargs
| Self::Reraise
| Self::Send
| Self::SetAdd
| Self::SetFunctionAttribute
| Self::SetUpdate
| Self::StoreAttr
| Self::StoreDeref
| Self::StoreFast
| Self::StoreFastLoadFast
| Self::StoreFastStoreFast
| Self::StoreGlobal
| Self::StoreName
| Self::Swap
| Self::UnpackEx
| Self::UnpackSequence
| Self::YieldValue
| Self::Resume
| Self::CallAllocAndEnterInit
| Self::CallBoundMethodExactArgs
| Self::CallBoundMethodGeneral
| Self::CallBuiltinClass
| Self::CallBuiltinFast
| Self::CallBuiltinFastWithKeywords
| Self::CallBuiltinO
| Self::CallIsinstance
| Self::CallKwBoundMethod
| Self::CallKwNonPy
| Self::CallKwPy
| Self::CallListAppend
| Self::CallMethodDescriptorFast
| Self::CallMethodDescriptorFastWithKeywords
| Self::CallMethodDescriptorNoargs
| Self::CallMethodDescriptorO
| Self::CallNonPyGeneral
| Self::CallPyExactArgs
| Self::CallPyGeneral
| Self::CallStr1
| Self::CallTuple1
| Self::CallType1
| Self::CompareOpFloat
| Self::CompareOpInt
| Self::CompareOpStr
| Self::ContainsOpDict
| Self::ContainsOpSet
| Self::ForIterGen
| Self::ForIterList
| Self::ForIterRange
| Self::ForIterTuple
| Self::JumpBackwardJit
| Self::JumpBackwardNoJit
| Self::LoadAttrClass
| Self::LoadAttrClassWithMetaclassCheck
| Self::LoadAttrGetattributeOverridden
| Self::LoadAttrInstanceValue
| Self::LoadAttrMethodLazyDict
| Self::LoadAttrMethodNoDict
| Self::LoadAttrMethodWithValues
| Self::LoadAttrModule
| Self::LoadAttrNondescriptorNoDict
| Self::LoadAttrNondescriptorWithValues
| Self::LoadAttrProperty
| Self::LoadAttrSlot
| Self::LoadAttrWithHint
| Self::LoadConstImmortal
| Self::LoadConstMortal
| Self::LoadGlobalBuiltin
| Self::LoadGlobalModule
| Self::LoadSuperAttrAttr
| Self::LoadSuperAttrMethod
| Self::SendGen
| Self::StoreAttrWithHint
| Self::UnpackSequenceList
| Self::UnpackSequenceTuple
| Self::UnpackSequenceTwoTuple
| Self::InstrumentedForIter
| Self::InstrumentedJumpForward
| Self::InstrumentedPopJumpIfTrue
| Self::InstrumentedPopJumpIfFalse
| Self::InstrumentedPopJumpIfNone
| Self::InstrumentedPopJumpIfNotNone
| Self::InstrumentedResume
| Self::InstrumentedYieldValue
| Self::InstrumentedEndAsyncFor
| Self::InstrumentedLoadSuperAttr
| Self::InstrumentedCall
| Self::InstrumentedCallKw
| Self::InstrumentedJumpBackward
| Self::EnterExecutor
)
}
/// Does this opcode have 'HAS_CONST_FLAG' set.
#[must_use]
pub const fn has_const(self) -> bool {
matches!(
self,
Self::LoadConst | Self::LoadConstImmortal | Self::LoadConstMortal
)
}
/// Does this opcode have 'HAS_FREE_FLAG' set.
#[must_use]
pub const fn has_free(self) -> bool {
matches!(
self,
Self::DeleteDeref | Self::LoadFromDictOrDeref | Self::MakeCell | Self::StoreDeref
)
}
/// Does this opcode have 'HAS_JUMP_FLAG' set.
#[must_use]
pub const fn has_jump(self) -> bool {
matches!(
self,
Self::EndAsyncFor
| Self::ForIter
| Self::JumpBackward
| Self::JumpBackwardNoInterrupt
| Self::JumpForward
| Self::PopJumpIfFalse
| Self::PopJumpIfNone
| Self::PopJumpIfNotNone
| Self::PopJumpIfTrue
| Self::Send
| Self::ForIterList
| Self::ForIterRange
| Self::ForIterTuple
| Self::JumpBackwardJit
| Self::JumpBackwardNoJit
| Self::InstrumentedForIter
| Self::InstrumentedEndAsyncFor
)
}
/// Does this opcode have 'HAS_LOCAL_FLAG' set.
#[must_use]
pub const fn has_local(self) -> bool {
matches!(
self,
Self::BinaryOpInplaceAddUnicode
| Self::DeleteFast
| Self::LoadDeref
| Self::LoadFast
| Self::LoadFastAndClear
| Self::LoadFastBorrow
| Self::LoadFastBorrowLoadFastBorrow
| Self::LoadFastCheck
| Self::LoadFastLoadFast
| Self::StoreFast
| Self::StoreFastLoadFast
| Self::StoreFastStoreFast
)
}
/// Does this opcode have 'HAS_NAME_FLAG' set.
#[must_use]
pub const fn has_name(self) -> bool {
matches!(
self,
Self::DeleteAttr
| Self::DeleteGlobal
| Self::DeleteName
| Self::ImportFrom
| Self::ImportName
| Self::LoadAttr
| Self::LoadFromDictOrGlobals
| Self::LoadGlobal
| Self::LoadName
| Self::LoadSuperAttr
| Self::StoreAttr
| Self::StoreGlobal
| Self::StoreName
| Self::LoadAttrGetattributeOverridden
| Self::LoadAttrWithHint
| Self::LoadSuperAttrAttr
| Self::LoadSuperAttrMethod
| Self::StoreAttrWithHint
| Self::InstrumentedLoadSuperAttr
)
}
#[must_use]
pub const fn is_instrumented(self) -> bool {
matches!(
self,
Self::InstrumentedEndFor
| Self::InstrumentedPopIter
| Self::InstrumentedEndSend
| Self::InstrumentedForIter
| Self::InstrumentedInstruction
| Self::InstrumentedJumpForward
| Self::InstrumentedNotTaken
| Self::InstrumentedPopJumpIfTrue
| Self::InstrumentedPopJumpIfFalse
| Self::InstrumentedPopJumpIfNone
| Self::InstrumentedPopJumpIfNotNone
| Self::InstrumentedResume
| Self::InstrumentedReturnValue
| Self::InstrumentedYieldValue
| Self::InstrumentedEndAsyncFor
| Self::InstrumentedLoadSuperAttr
| Self::InstrumentedCall
| Self::InstrumentedCallKw
| Self::InstrumentedCallFunctionEx
| Self::InstrumentedJumpBackward
| Self::InstrumentedLine
)
}
#[must_use]
pub fn stack_effect_info(&self, oparg: u32) -> StackEffect {
// Reason for converting oparg to i32 is because of expressions like `1 + (oparg -1)`
// that causes underflow errors.
let oparg = i32::try_from(oparg).expect("oparg does not fit in an `i32`");
let (pushed, popped) = match self {
Self::Cache => (0, 0),
Self::BinarySlice => (1, 3),
Self::BuildTemplate => (1, 2),
Self::BinaryOpInplaceAddUnicode => (0, 2),
Self::CallFunctionEx => (1, 4),
Self::CheckEgMatch => (2, 2),
Self::CheckExcMatch => (2, 2),
Self::CleanupThrow => (2, 3),
Self::DeleteSubscr => (0, 2),
Self::EndFor => (0, 1),
Self::EndSend => (1, 2),
Self::ExitInitCheck => (0, 1),
Self::FormatSimple => (1, 1),
Self::FormatWithSpec => (1, 2),
Self::GetAiter => (1, 1),
Self::GetAnext => (2, 1),
Self::GetIter => (1, 1),
Self::Reserved => (0, 0),
Self::GetLen => (2, 1),
Self::GetYieldFromIter => (1, 1),
Self::InterpreterExit => (0, 1),
Self::LoadBuildClass => (1, 0),
Self::LoadLocals => (1, 0),
Self::MakeFunction => (1, 1),
Self::MatchKeys => (3, 2),
Self::MatchMapping => (2, 1),
Self::MatchSequence => (2, 1),
Self::Nop => (0, 0),
Self::NotTaken => (0, 0),
Self::PopExcept => (0, 1),
Self::PopIter => (0, 1),
Self::PopTop => (0, 1),
Self::PushExcInfo => (2, 1),
Self::PushNull => (1, 0),
Self::ReturnGenerator => (1, 0),
Self::ReturnValue => (1, 1),
Self::SetupAnnotations => (0, 0),
Self::StoreSlice => (0, 4),
Self::StoreSubscr => (0, 3),
Self::ToBool => (1, 1),
Self::UnaryInvert => (1, 1),
Self::UnaryNegative => (1, 1),
Self::UnaryNot => (1, 1),
Self::WithExceptStart => (
7, // TODO: Differs from CPython `6`
6, // TODO: Differs from CPython `5`
),
Self::BinaryOp => (1, 2),
Self::BuildInterpolation => (1, 2 + (oparg & 1)),
Self::BuildList => (1, oparg),
Self::BuildMap => (1, oparg * 2),
Self::BuildSet => (1, oparg),
Self::BuildSlice => (1, oparg),
Self::BuildString => (1, oparg),
Self::BuildTuple => (1, oparg),
Self::Call => (1, 2 + oparg),
Self::CallIntrinsic1 => (1, 1),
Self::CallIntrinsic2 => (1, 2),
Self::CallKw => (1, 3 + oparg),
Self::CompareOp => (1, 2),
Self::ContainsOp => (1, 2),
Self::ConvertValue => (1, 1),
Self::Copy => (2 + (oparg - 1), 1 + (oparg - 1)),
Self::CopyFreeVars => (0, 0),
Self::DeleteAttr => (0, 1),
Self::DeleteDeref => (0, 0),
Self::DeleteFast => (0, 0),
Self::DeleteGlobal => (0, 0),
Self::DeleteName => (0, 0),
Self::DictMerge => (4 + (oparg - 1), 5 + (oparg - 1)),
Self::DictUpdate => (1 + (oparg - 1), 2 + (oparg - 1)),
Self::EndAsyncFor => (0, 2),
Self::ExtendedArg => (0, 0),
Self::ForIter => (2, 1),
Self::GetAwaitable => (1, 1),
Self::ImportFrom => (2, 1),
Self::ImportName => (1, 2),
Self::IsOp => (1, 2),
Self::JumpBackward => (0, 0),
Self::JumpBackwardNoInterrupt => (0, 0),
Self::JumpForward => (0, 0),
Self::ListAppend => (1 + (oparg - 1), 2 + (oparg - 1)),
Self::ListExtend => (1 + (oparg - 1), 2 + (oparg - 1)),
Self::LoadAttr => (1 + (oparg & 1), 1),
Self::LoadCommonConstant => (1, 0),
Self::LoadConst => (1, 0),
Self::LoadDeref => (1, 0),
Self::LoadFast => (1, 0),
Self::LoadFastAndClear => (1, 0),
Self::LoadFastBorrow => (1, 0),
Self::LoadFastBorrowLoadFastBorrow => (2, 0),
Self::LoadFastCheck => (1, 0),
Self::LoadFastLoadFast => (2, 0),
Self::LoadFromDictOrDeref => (1, 1),
Self::LoadFromDictOrGlobals => (1, 1),
Self::LoadGlobal => (1 + (oparg & 1), 0),
Self::LoadName => (1, 0),
Self::LoadSmallInt => (1, 0),
Self::LoadSpecial => (2, 1),
Self::LoadSuperAttr => (1 + (oparg & 1), 3),
Self::MakeCell => (0, 0),
Self::MapAdd => (1 + (oparg - 1), 3 + (oparg - 1)),
Self::MatchClass => (1, 3),
Self::PopJumpIfFalse => (0, 1),
Self::PopJumpIfNone => (0, 1),
Self::PopJumpIfNotNone => (0, 1),
Self::PopJumpIfTrue => (0, 1),
Self::RaiseVarargs => (0, oparg),
Self::Reraise => (oparg, 1 + oparg),
Self::Send => (2, 2),
Self::SetAdd => (1 + (oparg - 1), 2 + (oparg - 1)),
Self::SetFunctionAttribute => (1, 2),
Self::SetUpdate => (1 + (oparg - 1), 2 + (oparg - 1)),
Self::StoreAttr => (0, 2),
Self::StoreDeref => (0, 1),
Self::StoreFast => (0, 1),
Self::StoreFastLoadFast => (1, 1),
Self::StoreFastStoreFast => (0, 2),
Self::StoreGlobal => (0, 1),
Self::StoreName => (0, 1),
Self::Swap => (2 + (oparg - 2), 2 + (oparg - 2)),
Self::UnpackEx => (1 + (oparg & 0xFF) + (oparg >> 8), 1),
Self::UnpackSequence => (oparg, 1),
Self::YieldValue => (1, 1),
Self::Resume => (0, 0),
Self::BinaryOpAddFloat => (1, 2),
Self::BinaryOpAddInt => (1, 2),
Self::BinaryOpAddUnicode => (1, 2),
Self::BinaryOpExtend => (1, 2),
Self::BinaryOpMultiplyFloat => (1, 2),
Self::BinaryOpMultiplyInt => (1, 2),
Self::BinaryOpSubscrDict => (1, 2),
Self::BinaryOpSubscrGetitem => (0, 2),
Self::BinaryOpSubscrListInt => (1, 2),
Self::BinaryOpSubscrListSlice => (1, 2),
Self::BinaryOpSubscrStrInt => (1, 2),
Self::BinaryOpSubscrTupleInt => (1, 2),
Self::BinaryOpSubtractFloat => (1, 2),
Self::BinaryOpSubtractInt => (1, 2),
Self::CallAllocAndEnterInit => (0, 2 + oparg),
Self::CallBoundMethodExactArgs => (0, 2 + oparg),
Self::CallBoundMethodGeneral => (0, 2 + oparg),
Self::CallBuiltinClass => (1, 2 + oparg),
Self::CallBuiltinFast => (1, 2 + oparg),
Self::CallBuiltinFastWithKeywords => (1, 2 + oparg),
Self::CallBuiltinO => (1, 2 + oparg),
Self::CallIsinstance => (1, 2 + oparg),
Self::CallKwBoundMethod => (0, 3 + oparg),
Self::CallKwNonPy => (1, 3 + oparg),
Self::CallKwPy => (0, 3 + oparg),
Self::CallLen => (1, 3),
Self::CallListAppend => (0, 3),
Self::CallMethodDescriptorFast => (1, 2 + oparg),
Self::CallMethodDescriptorFastWithKeywords => (1, 2 + oparg),
Self::CallMethodDescriptorNoargs => (1, 2 + oparg),
Self::CallMethodDescriptorO => (1, 2 + oparg),
Self::CallNonPyGeneral => (1, 2 + oparg),
Self::CallPyExactArgs => (0, 2 + oparg),
Self::CallPyGeneral => (0, 2 + oparg),
Self::CallStr1 => (1, 3),
Self::CallTuple1 => (1, 3),
Self::CallType1 => (1, 3),
Self::CompareOpFloat => (1, 2),
Self::CompareOpInt => (1, 2),
Self::CompareOpStr => (1, 2),
Self::ContainsOpDict => (1, 2),
Self::ContainsOpSet => (1, 2),
Self::ForIterGen => (1, 1),
Self::ForIterList => (2, 1),
Self::ForIterRange => (2, 1),
Self::ForIterTuple => (2, 1),
Self::JumpBackwardJit => (0, 0),
Self::JumpBackwardNoJit => (0, 0),
Self::LoadAttrClass => (1 + (oparg & 1), 1),
Self::LoadAttrClassWithMetaclassCheck => (1 + (oparg & 1), 1),
Self::LoadAttrGetattributeOverridden => (1, 1),
Self::LoadAttrInstanceValue => (1 + (oparg & 1), 1),
Self::LoadAttrMethodLazyDict => (2, 1),
Self::LoadAttrMethodNoDict => (2, 1),
Self::LoadAttrMethodWithValues => (2, 1),
Self::LoadAttrModule => (1 + (oparg & 1), 1),
Self::LoadAttrNondescriptorNoDict => (1, 1),
Self::LoadAttrNondescriptorWithValues => (1, 1),
Self::LoadAttrProperty => (0, 1),
Self::LoadAttrSlot => (1 + (oparg & 1), 1),
Self::LoadAttrWithHint => (1 + (oparg & 1), 1),
Self::LoadConstImmortal => (1, 0),
Self::LoadConstMortal => (1, 0),
Self::LoadGlobalBuiltin => (1 + (oparg & 1), 0),
Self::LoadGlobalModule => (1 + (oparg & 1), 0),
Self::LoadSuperAttrAttr => (1, 3),
Self::LoadSuperAttrMethod => (2, 3),
Self::ResumeCheck => (0, 0),
Self::SendGen => (1, 2),
Self::StoreAttrInstanceValue => (0, 2),
Self::StoreAttrSlot => (0, 2),
Self::StoreAttrWithHint => (0, 2),
Self::StoreSubscrDict => (0, 3),
Self::StoreSubscrListInt => (0, 3),
Self::ToBoolAlwaysTrue => (1, 1),
Self::ToBoolBool => (1, 1),
Self::ToBoolInt => (1, 1),
Self::ToBoolList => (1, 1),
Self::ToBoolNone => (1, 1),
Self::ToBoolStr => (1, 1),
Self::UnpackSequenceList => (oparg, 1),
Self::UnpackSequenceTuple => (oparg, 1),
Self::UnpackSequenceTwoTuple => (2, 1),
Self::InstrumentedEndFor => (1, 2),
Self::InstrumentedPopIter => (0, 1),
Self::InstrumentedEndSend => (1, 2),
Self::InstrumentedForIter => (2, 1),
Self::InstrumentedInstruction => (0, 0),
Self::InstrumentedJumpForward => (0, 0),
Self::InstrumentedNotTaken => (0, 0),
Self::InstrumentedPopJumpIfTrue => (0, 1),
Self::InstrumentedPopJumpIfFalse => (0, 1),
Self::InstrumentedPopJumpIfNone => (0, 1),
Self::InstrumentedPopJumpIfNotNone => (0, 1),
Self::InstrumentedResume => (0, 0),
Self::InstrumentedReturnValue => (1, 1),
Self::InstrumentedYieldValue => (1, 1),
Self::InstrumentedEndAsyncFor => (0, 2),
Self::InstrumentedLoadSuperAttr => (1 + (oparg & 1), 3),
Self::InstrumentedCall => (1, 2 + oparg),
Self::InstrumentedCallKw => (1, 3 + oparg),
Self::InstrumentedCallFunctionEx => (1, 4),
Self::InstrumentedJumpBackward => (0, 0),
Self::InstrumentedLine => (0, 0),
Self::EnterExecutor => (0, 0),
};
debug_assert!(u32::try_from(pushed).is_ok());
debug_assert!(u32::try_from(popped).is_ok());
StackEffect::new(pushed as u32, popped as u32)
}
#[must_use]
pub const fn to_base(self) -> Option<Self> {
Some(match self {
Self::InstrumentedCall => Self::Call,
Self::InstrumentedCallFunctionEx => Self::CallFunctionEx,
Self::InstrumentedCallKw => Self::CallKw,
Self::InstrumentedEndAsyncFor => Self::EndAsyncFor,
Self::InstrumentedEndFor => Self::EndFor,
Self::InstrumentedEndSend => Self::EndSend,
Self::InstrumentedForIter => Self::ForIter,
Self::InstrumentedJumpBackward => Self::JumpBackward,
Self::InstrumentedJumpForward => Self::JumpForward,
Self::InstrumentedLoadSuperAttr => Self::LoadSuperAttr,
Self::InstrumentedNotTaken => Self::NotTaken,
Self::InstrumentedPopIter => Self::PopIter,
Self::InstrumentedPopJumpIfFalse => Self::PopJumpIfFalse,
Self::InstrumentedPopJumpIfNone => Self::PopJumpIfNone,
Self::InstrumentedPopJumpIfNotNone => Self::PopJumpIfNotNone,
Self::InstrumentedPopJumpIfTrue => Self::PopJumpIfTrue,
Self::InstrumentedResume => Self::Resume,
Self::InstrumentedReturnValue => Self::ReturnValue,
Self::InstrumentedYieldValue => Self::YieldValue,
_ => return None,
})
}
#[must_use]
pub const fn to_instrumented(self) -> Option<Self> {
Some(match self {
Self::Call => Self::InstrumentedCall,
Self::CallFunctionEx => Self::InstrumentedCallFunctionEx,
Self::CallKw => Self::InstrumentedCallKw,
Self::EndAsyncFor => Self::InstrumentedEndAsyncFor,
Self::EndFor => Self::InstrumentedEndFor,
Self::EndSend => Self::InstrumentedEndSend,
Self::ForIter => Self::InstrumentedForIter,
Self::JumpBackward => Self::InstrumentedJumpBackward,
Self::JumpForward => Self::InstrumentedJumpForward,
Self::LoadSuperAttr => Self::InstrumentedLoadSuperAttr,
Self::NotTaken => Self::InstrumentedNotTaken,
Self::PopIter => Self::InstrumentedPopIter,
Self::PopJumpIfFalse => Self::InstrumentedPopJumpIfFalse,
Self::PopJumpIfNone => Self::InstrumentedPopJumpIfNone,
Self::PopJumpIfNotNone => Self::InstrumentedPopJumpIfNotNone,
Self::PopJumpIfTrue => Self::InstrumentedPopJumpIfTrue,
Self::Resume => Self::InstrumentedResume,
Self::ReturnValue => Self::InstrumentedReturnValue,
Self::YieldValue => Self::InstrumentedYieldValue,
_ => return None,
})
}
pub const fn try_from_u8(value: u8) -> Result<Self, MarshalError> {
Self::try_from_numeric(value)
}
}
impl super::PseudoOpcode {
/// Returns [`Self`] as [`u16`].
#[must_use]
pub const fn as_u16(self) -> u16 {
self.as_numeric()
}
#[must_use]
pub const fn cache_entries(self) -> usize {
0
}
#[must_use]
pub const fn deopt(self) -> Option<Self> {
None
}
/// Does this opcode have 'HAS_ARG_FLAG' set.
#[must_use]
pub const fn has_arg(self) -> bool {
matches!(
self,
Self::Jump
| Self::JumpIfFalse
| Self::JumpIfTrue
| Self::JumpNoInterrupt
| Self::LoadClosure
| Self::StoreFastMaybeNull
)
}
/// Does this opcode have 'HAS_CONST_FLAG' set.
#[must_use]
pub const fn has_const(self) -> bool {
false
}
/// Does this opcode have 'HAS_FREE_FLAG' set.
#[must_use]
pub const fn has_free(self) -> bool {
false
}
/// Does this opcode have 'HAS_JUMP_FLAG' set.
#[must_use]
pub const fn has_jump(self) -> bool {
matches!(
self,
Self::Jump | Self::JumpIfFalse | Self::JumpIfTrue | Self::JumpNoInterrupt
)
}
/// Does this opcode have 'HAS_LOCAL_FLAG' set.
#[must_use]
pub const fn has_local(self) -> bool {
matches!(self, Self::LoadClosure | Self::StoreFastMaybeNull)
}
/// Does this opcode have 'HAS_NAME_FLAG' set.
#[must_use]
pub const fn has_name(self) -> bool {
false
}
#[must_use]
pub const fn is_instrumented(self) -> bool {
false
}
#[must_use]
pub fn stack_effect_info(&self, _oparg: u32) -> StackEffect {
let (pushed, popped) = match self {
Self::AnnotationsPlaceholder => (0, 0),
Self::Jump => (0, 0),
Self::JumpIfFalse => (1, 1),
Self::JumpIfTrue => (1, 1),
Self::JumpNoInterrupt => (0, 0),
Self::LoadClosure => (1, 0),
Self::PopBlock => (0, 0),
Self::SetupCleanup => (
0, // TODO: Differs from CPython `2`
0,
),
Self::SetupFinally => (
0, // TODO: Differs from CPython `1`
0,
),
Self::SetupWith => (
0, // TODO: Differs from CPython `1`
0,
),
Self::StoreFastMaybeNull => (0, 1),
};
debug_assert!(u32::try_from(pushed).is_ok());
debug_assert!(u32::try_from(popped).is_ok());
StackEffect::new(pushed as u32, popped as u32)
}
#[must_use]
pub const fn to_base(self) -> Option<Self> {
None
}
#[must_use]
pub const fn to_instrumented(self) -> Option<Self> {
None
}
pub const fn try_from_u16(value: u16) -> Result<Self, MarshalError> {
Self::try_from_numeric(value)
}
}

View File

@@ -1,4 +1,5 @@
#![no_std]
#![recursion_limit = "256"] // Needed for `define_opcodes!` macro
#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")]
#![doc(html_root_url = "https://docs.rs/rustpython-compiler-core/")]

View File

@@ -1,184 +0,0 @@
"""
Generate Lib/_opcode_metadata.py for RustPython bytecode.
This file generates opcode metadata that is compatible with CPython 3.13.
"""
import itertools
import pathlib
import re
import typing
ROOT = pathlib.Path(__file__).parents[1]
BYTECODE_FILE = (
ROOT / "crates" / "compiler-core" / "src" / "bytecode" / "instructions.rs"
)
OPCODE_METADATA_FILE = ROOT / "Lib" / "_opcode_metadata.py"
# Opcodes that needs to be first, regardless of their opcode ID.
PRIORITY_OPMAP = {
"CACHE",
"RESERVED",
"RESUME",
"INSTRUMENTED_LINE",
"ENTER_EXECUTOR",
}
def to_snake_case(s: str) -> str:
res = re.sub(r"(?<=[a-z0-9])([A-Z])", r"_\1", s)
return re.sub(r"(\D)(\d+)$", r"\1_\2", res).upper()
class Opcode(typing.NamedTuple):
rust_name: str
id: int
have_oparg: bool
@property
def is_instrumented(self):
return self.cpython_name.startswith("INSTRUMENTED_")
@property
def cpython_name(self):
return to_snake_case(self.rust_name)
@classmethod
def from_str(cls, text: str):
# Split on commas that are followed by a newline + an uppercase letter (new entry)
entries = re.split(r",\s*\n\s*(?=[A-Z])", text)
for entry in entries:
entry = entry.strip()
if not entry:
continue
have_oparg = "Arg<" in entry # Hacky but works
rust_name = re.match(r"(\w+)", entry).group(1)
id_num = re.findall(r"= (\d+)", entry)[0]
yield cls(rust_name=rust_name, id=int(id_num), have_oparg=have_oparg)
def __lt__(self, other: typing.Self) -> bool:
sprio, oprio = (
opcode.cpython_name not in PRIORITY_OPMAP for opcode in (self, other)
)
return (sprio, self.id) < (oprio, other.id)
def extract_enum_body(text: str, name: str) -> str:
# Find the start of the enum block
start_match = re.search(rf"enum\s+{name}\s*\{{", text)
if not start_match:
return None
# Manually track brace depth from that point
depth = 0
start = start_match.end() - 1 # position of opening '{'
for i, ch in enumerate(text[start:], start):
if ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
# Return only the inner content (excluding outer braces)
return text[start + 1 : i]
def build_deopts(text: str) -> dict[str, list[str]]:
raw_body = re.search(r"fn deopt\(self\)(.*)", text, re.DOTALL).group(1)
match_start = raw_body.find("match self")
if match_start == -1:
raise ValueError("Could not detect a match statement in deopt method")
brace_depth = 0
block_start = None
block_end = None
for i, ch in enumerate(raw_body[match_start:], match_start):
if ch == "{":
brace_depth += 1
if block_start is None:
block_start = i + 1
elif ch == "}":
brace_depth -= 1
if brace_depth == 0:
block_end = i
break
match_body = raw_body[block_start:block_end]
arm_pattern = re.compile(
r"((?:Self::\w+\s*\|\s*)*Self::\w+)\s*=>\s*(?:\{\s*)?Self::(\w+)", re.DOTALL
)
variants_pattern = re.compile(r"Self::(\w+)")
deopts = {}
for hit in arm_pattern.finditer(match_body):
raw_variants = hit.group(1)
opcode = hit.group(2)
variants = variants_pattern.findall(raw_variants)
key = to_snake_case(opcode)
value = [to_snake_case(variant) for variant in variants]
deopts[key] = value
return deopts
contents = BYTECODE_FILE.read_text(encoding="utf-8")
deopts = build_deopts(contents)
enum_body = "\n".join(
extract_enum_body(contents, enum_name)
for enum_name in ("Instruction", "PseudoInstruction")
)
opcodes = list(Opcode.from_str(enum_body))
have_oparg = min(opcode.id for opcode in opcodes if opcode.have_oparg) - 1
min_instrumented = min(opcode.id for opcode in opcodes if opcode.is_instrumented)
# Generate the output file
output = """# This file is generated by scripts/generate_opcode_metadata.py
# for RustPython bytecode format (CPython 3.14 compatible opcode numbers).
# Do not edit!
"""
output += "\n_specializations = {\n"
for key, lst in deopts.items():
output += f' "{key}": [\n'
for item in lst:
output += f' "{item}",\n'
output += " ],\n"
output += "}\n"
specialized = set(itertools.chain.from_iterable(deopts.values()))
output += "\n_specialized_opmap = {\n"
for opcode in sorted(opcodes, key=lambda op: op.cpython_name):
cpython_name = opcode.cpython_name
if cpython_name not in specialized:
continue
output += f" '{cpython_name}': {opcode.id},\n"
output += "}\n"
output += "\nopmap = {\n"
for opcode in sorted(opcodes):
cpython_name = opcode.cpython_name
if cpython_name in specialized:
continue
output += f" '{cpython_name}': {opcode.id},\n"
output += "}\n"
output += f"""
HAVE_ARGUMENT = {have_oparg}
MIN_INSTRUMENTED_OPCODE = {min_instrumented}
"""
OPCODE_METADATA_FILE.write_text(output, encoding="utf-8")

View File

@@ -0,0 +1,11 @@
[WithExceptStart]
stack_effect = { pushed = "7", popped = "6" }
[SetupCleanup]
stack_effect = { pushed = "0" }
[SetupFinally]
stack_effect = { pushed = "0" }
[SetupWith]
stack_effect = { pushed = "0" }

View File

@@ -0,0 +1,27 @@
import os
import pathlib
import sys
try:
CPYTHON_ROOT = pathlib.Path(os.environ["CPYTHON_ROOT"]).expanduser().resolve()
except KeyError:
raise ValueError("Missing environment variable 'CPYTHON_ROOT'")
CPYTHON_TOOLS_LIB = CPYTHON_ROOT / "Tools" / "cases_generator"
if (path := CPYTHON_TOOLS_LIB.as_posix()) not in sys.path:
sys.path.append(path)
from analyzer import SKIP_PROPERTIES, Analysis, Family, Properties, analyze_files
from stack import get_stack_effect
def get_analysis() -> Analysis:
from generators_common import DEFAULT_INPUT
analysis = analyze_files([DEFAULT_INPUT])
# Our speration is done at the enum definition
analysis.instructions |= analysis.pseudos
return analysis

View File

@@ -0,0 +1,107 @@
"""
Generate Lib/_opcode_metadata.py for RustPython bytecode.
This file generates opcode metadata that is compatible with CPython 3.14.
"""
import functools
import io
import itertools
import operator
import pathlib
import typing
from opcodes import OpcodeInfo
from utils import DEFAULT_INPUT, ROOT, get_conf, to_pascal_case, to_upper_snake_case
OUT_FILE = ROOT / "Lib/_opcode_metadata.py"
# Opcodes that needs to be first, regardless of their opcode ID.
PRIORITY_OPMAP = {
"CACHE",
"RESERVED",
"RESUME",
"INSTRUMENTED_LINE",
"ENTER_EXECUTOR",
}
INDENT = " " * 4
INDENT2 = INDENT * 2
def main():
override_conf = get_conf()
inp = DEFAULT_INPUT.read_text()
infos = tuple(OpcodeInfo.iter_infos(inp, override_conf))
opcodes = tuple(itertools.chain.from_iterable(info.opcodes for info in infos))
script_path = pathlib.Path(__file__).resolve().relative_to(ROOT).as_posix()
out = io.StringIO()
out.write(
f"""
# This file is generated by {script_path}
# for RustPython bytecode format (CPython 3.14 compatible opcode numbers).
# Do not edit!
""".lstrip()
)
# _specializations
out.write("\n")
out.write("_specializations = {\n")
deopts = functools.reduce(operator.ior, map(operator.attrgetter("deopts"), infos))
for key, lst in deopts.items():
key = to_upper_snake_case(key)
out.write(f'{INDENT}"{key}": [\n')
for item in map(to_upper_snake_case, lst):
out.write(f'{INDENT2}"{item}",\n')
out.write(f"{INDENT}],\n")
out.write("}\n")
# _specialized_opmap
out.write("\n")
out.write("_specialized_opmap = {\n")
specialized = set(itertools.chain.from_iterable(deopts.values()))
for opcode in sorted(opcodes, key=lambda op: op.cpython_name):
if opcode.rust_name not in specialized:
continue
out.write(f"{INDENT}'{opcode.cpython_name}': {opcode.id},\n")
out.write("}\n")
# opmap
out.write("\n")
out.write("opmap = {\n")
key = lambda op: (op.cpython_name not in PRIORITY_OPMAP, op.id)
for opcode in sorted(opcodes, key=key):
if opcode.rust_name in specialized:
continue
out.write(f"{INDENT}'{opcode.cpython_name}': {opcode.id},\n")
out.write("}\n")
# min
out.write("\n")
have_argument = min(opcode.id for opcode in opcodes if opcode.have_argument) - 1
out.write(f"HAVE_ARGUMENT = {have_argument}\n")
min_instrumented = min(opcode.id for opcode in opcodes if opcode.is_instrumented)
out.write(f"MIN_INSTRUMENTED_OPCODE = {min_instrumented}\n")
# write output
generated = out.getvalue()
OUT_FILE.write_text(generated)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,364 @@
#!/usr/bin/env python
from __future__ import annotations
import collections
import dataclasses
import io
import os
import pathlib
import subprocess
import sys
import typing
import tomllib
from cpython import Analysis, get_analysis, get_stack_effect
from opcodes import OpcodeInfo
from utils import DEFAULT_INPUT, ROOT, get_conf, to_pascal_case
OUT_FILE = ROOT / "crates/compiler-core/src/bytecode/opcode_metadata.rs"
@dataclasses.dataclass(frozen=True, slots=True)
class OpcodeGen:
info: OpcodeDef
@property
def fn_as_info_size(self) -> str:
return f"""
/// Returns [`Self`] as [`{self.size}`].
#[must_use]
pub const fn as_{self.size}(self) -> {self.size} {{
self.as_numeric()
}}
"""
@property
def fn_try_from_numeric(self) -> str:
return f"""
pub const fn try_from_{self.size}(
value: {self.size},
) -> Result<Self, MarshalError> {{
Self::try_from_numeric(value)
}}
"""
@property
def fn_has_arg(self) -> str:
return self.gen_fn_has_attr("has_arg", "oparg", "HAS_ARG_FLAG")
@property
def fn_has_const(self) -> str:
return self.gen_fn_has_attr("has_const", "uses_co_consts", "HAS_CONST_FLAG")
@property
def fn_has_name(self) -> str:
return self.gen_fn_has_attr("has_name", "uses_co_names", "HAS_NAME_FLAG")
@property
def fn_has_jump(self) -> str:
return self.gen_fn_has_attr("has_jump", "jumps", "HAS_JUMP_FLAG")
@property
def fn_has_free(self) -> str:
return self.gen_fn_has_attr("has_free", "has_free", "HAS_FREE_FLAG")
@property
def fn_has_local(self) -> str:
return self.gen_fn_has_attr("has_local", "uses_locals", "HAS_LOCAL_FLAG")
@property
def fn_is_instrumented(self) -> str:
arms = "|".join(
f"Self::{opcode.rust_name}" for opcode in self if opcode.is_instrumented
)
arms = arms.strip()
if arms:
inner = f"matches!(self, {arms})"
else:
inner = "false"
return f"""
#[must_use]
pub const fn is_instrumented(self) -> bool {{
{inner}
}}
"""
@property
def fn_to_base(self) -> str:
arms = ",\n".join(
f"Self::{iname} => Self::{name}"
for name, iname in self.instrumented_mapping.items()
)
arms = arms.strip()
if not arms:
inner = "None"
else:
inner = f"""
Some(match self {{
{arms},
_ => return None,
}})
"""
return f"""
#[must_use]
pub const fn to_base(self) -> Option<Self> {{
{inner}
}}
"""
@property
def fn_to_instrumented(self) -> str:
arms = ",\n".join(
f"Self::{name} => Self::{iname}"
for name, iname in self.instrumented_mapping.items()
)
arms = arms.strip()
if not arms:
inner = "None"
else:
inner = f"""
Some(match self {{
{arms},
_ => return None,
}})
"""
return f"""
#[must_use]
pub const fn to_instrumented(self) -> Option<Self> {{
{inner}
}}
"""
@property
def fn_deopt(self) -> str:
arms = ""
for target, specialized in self.info.deopts.items():
ops = "|".join(f"Self::{op}" for op in specialized)
arms += f"{ops} => Self::{target},\n"
arms = arms.strip()
if not arms:
inner = "None"
else:
inner = f"""
Some(match self {{
{arms}
_ => return None,
}})
"""
return f"""
#[must_use]
pub const fn deopt(self) -> Option<Self> {{
{inner}
}}
"""
@property
def fn_cache_entries(self) -> str:
arms = ""
for opcode in self:
name = opcode.rust_name
if opcode.is_instrumented:
continue
if getattr(opcode, "family", None) and (opcode.family.name != name):
continue
try:
size = opcode.cache_entry
except AttributeError:
continue
if size > 1:
arms += f"Self::{name} => {size - 1},\n"
arms = arms.strip()
if not arms:
inner = "0"
else:
inner = f"""
match self.deoptimize() {{
{arms}
_ => 0,
}}
"""
return f"""
#[must_use]
pub const fn cache_entries(self) -> usize {{
{inner}
}}
"""
@property
def fn_stack_effect_info(self) -> str:
oparg_used = False
arms = ""
for opcode in self:
name = opcode.rust_name
popped = opcode.stack_effect_popped
pushed = opcode.stack_effect_pushed
pushed_comment = ""
popped_comment = ""
if popped != opcode.cpy_popped:
popped_comment = f"// TODO: Differs from CPython `{opcode.cpy_popped}`"
if pushed != opcode.cpy_pushed:
pushed_comment = f"// TODO: Differs from CPython `{opcode.cpy_pushed}`"
oparg_used = oparg_used or any("oparg" in expr for expr in (pushed, popped))
arms += f"""
Self::{name} => (
{pushed}, {pushed_comment}
{popped}, {popped_comment}
),
""".strip()
arms = arms.strip()
oparg_arg = "_oparg"
oparg_cast = ""
if oparg_used:
oparg_arg = "oparg"
oparg_cast = f"""
// Reason for converting {oparg_arg} to i32 is because of expressions like `1 + (oparg -1)`
// that causes underflow errors.
let oparg = i32::try_from({oparg_arg}).expect("{oparg_arg} does not fit in an `i32`");
"""
return f"""
#[must_use]
pub fn stack_effect_info(&self, {oparg_arg}: u32) -> StackEffect {{
{oparg_cast}
let (pushed, popped) = match self {{
{arms}
}};
debug_assert!(u32::try_from(pushed).is_ok());
debug_assert!(u32::try_from(popped).is_ok());
StackEffect::new(pushed as u32, popped as u32)
}}
"""
def gen(self) -> str:
methods = "\n\n".join(
getattr(self, attr).strip()
for attr in sorted(dir(self))
if attr.startswith("fn_")
)
impls = "\n\n".join(
getattr(self, attr).strip()
for attr in sorted(dir(self))
if attr.startswith("impl_")
)
return f"""
impl super::{self.info.enum_name} {{
{methods}
}}
{impls}
"""
def gen_fn_has_attr(self, fn_name: str, properties_attr: str, doc_flag: str) -> str:
arms = "|".join(
f"Self::{opcode.rust_name}"
for opcode in self
if getattr(opcode.properties, properties_attr)
)
if arms:
inner = f"matches!(self, {arms})"
else:
inner = "false"
return f"""
/// Does this opcode have '{doc_flag}' set.
#[must_use]
pub const fn {fn_name}(self) -> bool {{
{inner}
}}
"""
@property
def instrumented_mapping(self) -> dict[str, str]:
names, inames = set(), set()
for opcode in self:
name = opcode.rust_name
if opcode.is_instrumented:
inames.add(name)
else:
names.add(name)
res = {}
for iname in sorted(inames):
name = iname.removeprefix("Instrumented")
if name not in names:
continue
res[name] = iname
return res
@property
def size(self) -> str:
return self.info.size
def __iter__(self):
yield from self.info.opcodes
def rustfmt(code: str) -> str:
return subprocess.check_output(["rustfmt", "--emit=stdout"], input=code, text=True)
def main():
override_conf = get_conf()
inp = DEFAULT_INPUT.read_text()
opcode_infos = OpcodeInfo.iter_infos(inp, override_conf)
outfile = io.StringIO()
for info in opcode_infos:
gen = OpcodeGen(info).gen()
outfile.write(gen)
generated = outfile.getvalue()
script_path = pathlib.Path(__file__).resolve().relative_to(ROOT).as_posix()
output = rustfmt(
f"""
// This file is generated by {script_path}
// Do not edit!
use crate::{{
bytecode::instruction::StackEffect,
marshal::MarshalError,
}};
{generated}
"""
)
OUT_FILE.write_text(output)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,159 @@
from __future__ import annotations
import collections
import dataclasses
import re
import typing
import warnings
import utils
from cpython import SKIP_PROPERTIES, Family, Properties, get_analysis, get_stack_effect
from utils import SKIP_OVERRIDE, Override, OverrideConfs, StackEffect, to_pascal_case
if typing.TYPE_CHECKING:
from collections.abc import Iterable
@dataclasses.dataclass(frozen=True, slots=True)
class OpcodeInfo:
enum_name: str
size: str
opcodes: tuple[Opcode, ...]
@property
def deopts(self) -> dict[str, list[str]]:
analysis = get_analysis()
names = {opcode.rust_name for opcode in self}
res = collections.defaultdict(list)
for family in analysis.families.values():
family_name = to_pascal_case(family.name)
if family_name not in names:
continue
for member in family.members:
member_name = to_pascal_case(member.name)
if member.name == family_name:
continue
res[family_name].append(member_name)
return dict(res)
def __iter__(self):
yield from self.opcodes
@classmethod
def iter_infos(
cls, text: str, override_confs: OverrideConfs
) -> Iterable[typing.Self]:
for block_match in re.finditer(
r"define_opcodes!\s*\((.+?)\);", text, re.DOTALL
):
block = block_match.group(1).strip()
size = re.search(r"#\[repr\((\w+)\)\]", block).group(1)
enum_name = re.search(
r"#\[repr\(\w+\)\]\s*pub\s+enum\s+(\w+)\s*;", block
).group(1)
second_enum_match = re.search(r"pub\s+enum\s+(\w+)\s*\{", block, re.DOTALL)
entries = utils.extract_enum_body(block, second_enum_match.end() - 1)
opcodes = tuple(sorted(iter_opcodes(entries, override_confs)))
yield cls(enum_name, size, opcodes)
def iter_opcodes(text: str, override_confs: OverrideConfs) -> Iterable[Opcode]:
analysis = get_analysis()
# Split on commas that are followed by a newline + an uppercase letter (new entry)
entries = map(str.strip, re.split(r",\s*\n\s*(?=[A-Z])", text))
for entry in entries:
if not entry:
continue
opcode = Opcode.from_str(entry)
rust_name = opcode.rust_name
override = override_confs.get(rust_name, SKIP_OVERRIDE)
cpython_name = opcode.cpython_name
kwargs = {}
if instr := analysis.instructions.get(cpython_name):
kwargs["properties"] = instr.properties
kwargs["family"] = getattr(instr, "family", None)
kwargs["cache_entry"] = getattr(instr, "size", -1)
stack = get_stack_effect(instr)
popped = (-stack.base_offset).to_c()
pushed = (stack.logical_sp - stack.base_offset).to_c()
kwargs["stack_effect"] = StackEffect(popped=popped, pushed=pushed)
elif override == SKIP_OVERRIDE:
warnings.warn(
f"Could not get instruction metadata for {rust_name}"
" from CPython or override conf"
)
yield dataclasses.replace(opcode, override=override, **kwargs)
@dataclasses.dataclass(frozen=True, slots=True)
class Opcode:
rust_name: str
id: int
have_argument: bool = False
cache_entry: int = 0
stack_effect: StackEffect | None = None
properties: Properties = dataclasses.field(default_factory=lambda: SKIP_PROPERTIES)
family: Family | None = None
override: Override = dataclasses.field(default_factory=Override)
@property
def is_instrumented(self) -> bool:
if (res := self.override.is_instrumented) is not None:
return res
return self.cpython_name.startswith("INSTRUMENTED_")
@property
def cpython_name(self):
return utils.to_upper_snake_case(self.rust_name)
@property
def cpy_popped(self) -> str | None:
return getattr(self.stack_effect, "popped", None)
@property
def cpy_pushed(self) -> str | None:
return getattr(self.stack_effect, "pushed", None)
@property
def stack_effect_popped(self) -> str:
ove_popped = self.override.stack_effect.popped
if (ove_popped is None) and (self.cpy_popped is None):
raise ValueError(f"{self.rust_name} is missing popped stack_effect")
return ove_popped or self.cpy_popped
@property
def stack_effect_pushed(self) -> str:
ove_pushed = self.override.stack_effect.pushed
if (ove_pushed is None) and (self.cpy_pushed is None):
raise ValueError(f"{self.rust_name} is missing pushed stack_effect")
return ove_pushed or self.cpy_pushed
@classmethod
def from_str(cls, entry: str) -> typing.Self:
rust_name = re.match(r"(\w+)", entry).group(1)
id_num = re.findall(r"= (\d+)", entry)[0]
have_argument = "Arg<" in entry
return cls(rust_name, int(id_num), have_argument=have_argument)
def __lt__(self, other: typing.Self) -> bool:
return self.id < other.id

View File

@@ -0,0 +1,96 @@
import dataclasses
import pathlib
import re
import sys
import tomllib
ROOT = pathlib.Path(__file__).parents[2].resolve()
DEFAULT_INPUT = ROOT / "crates/compiler-core/src/bytecode/instruction.rs"
DEFAULT_CONF = pathlib.Path(__file__).parent / "conf.toml"
@dataclasses.dataclass(frozen=True, kw_only=True, slots=True)
class StackEffect:
pushed: str | None = None
popped: str | None = None
@dataclasses.dataclass(frozen=True, kw_only=True, slots=True)
class Override:
is_instrumented: bool | None = None
stack_effect: StackEffect = dataclasses.field(default_factory=StackEffect)
type OverrideConfs = dict[str, Override]
SKIP_STACK_EFFECT = StackEffect()
SKIP_OVERRIDE = Override()
def get_conf(path: pathlib.Path = DEFAULT_CONF) -> OverrideConfs:
data = path.read_text(encoding="utf-8")
conf = tomllib.loads(data)
for k, v in conf.items():
v["stack_effect"] = StackEffect(**v.get("stack_effect", {}))
conf[k] = Override(**v)
return conf
def to_pascal_case(s: str) -> str:
return s.title().replace("_", "")
def to_upper_snake_case(s: str) -> str:
"""
Converts a PascalCaseString to be SNAKE_CASE
Parameters
----------
s : str
Pascal cased string to convert.
Returns
-------
str
Uppercased snake case string.
Examples
--------
>>> to_upper_snake_case("LoadAttr")
LOAD_ATTR
>>> to_upper_snake_case("CallIntrinsic1")
CALL_INTRINSIC_1
"""
res = re.sub(r"(?<=[a-z0-9])([A-Z])", r"_\1", s)
return re.sub(r"(\D)(\d+)$", r"\1_\2", res).upper()
def extract_enum_body(text: str, start: int) -> str:
"""
Extract the rust enum body from a raw rust source code.
Parameters
----------
text : str
Rust source code containing the enum body.
start : int
Offset to start searching from.
Returns
-------
str
Extracted enum body.
"""
assert text[start] == "{"
depth = 0
for i, ch in enumerate(text[start:], start):
if ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
return text[start + 1 : i].strip() # exclude the outer braces
raise ValueError("Could not find end to enum body")