Files
RustPython/scripts/generate_opcode_metadata.py
Shahar Naveh 9a5de28b79 Align _opcode_metadata.py to 3.14.3 (#7456)
* Align `_opcode_metadata.py` to 3.14.3

* Unmark passing test

* Ensure python 3.14 runs on CI

* Update banner

* Fix `test__opcode.py`

* Adjust generate script

* Fix docs
2026-03-18 22:09:00 +09:00

178 lines
4.8 KiB
Python

"""
Generate Lib/_opcode_metadata.py for RustPython bytecode.
This file generates opcode metadata that is compatible with CPython 3.13.
"""
import itertools
import pathlib
import re
import typing
ROOT = pathlib.Path(__file__).parents[1]
BYTECODE_FILE = (
ROOT / "crates" / "compiler-core" / "src" / "bytecode" / "instruction.rs"
)
OPCODE_METADATA_FILE = ROOT / "Lib" / "_opcode_metadata.py"
# Opcodes that needs to be first, regardless of their opcode ID.
PRIORITY_OPMAP = {
"CACHE",
"RESERVED",
"RESUME",
"INSTRUMENTED_LINE",
"ENTER_EXECUTOR",
}
def to_pascal_case(s: str) -> str:
res = re.sub(r"(?<=[a-z0-9])([A-Z])", r"_\1", s)
return re.sub(r"(\D)(\d+)$", r"\1_\2", res).upper()
class Opcode(typing.NamedTuple):
rust_name: str
id: int
have_oparg: bool
@property
def cpython_name(self) -> str:
return to_pascal_case(self.rust_name)
@property
def is_instrumented(self):
return self.cpython_name.startswith("INSTRUMENTED_")
@classmethod
def from_str(cls, body: str):
raw_variants = re.split(r"(\d+),", body.strip())
raw_variants.remove("")
for raw_name, raw_id in itertools.batched(raw_variants, 2, strict=True):
have_oparg = "Arg<" in raw_name # Hacky but works
name = re.findall(r"\b[A-Z][A-Za-z]*\d*\b(?=\s*[\({=])", raw_name)[0]
yield cls(rust_name=name.strip(), id=int(raw_id), have_oparg=have_oparg)
def __lt__(self, other: typing.Self) -> bool:
sprio, oprio = (
opcode.cpython_name not in PRIORITY_OPMAP for opcode in (self, other)
)
return (sprio, self.id) < (oprio, other.id)
def extract_enum_body(contents: str, enum_name: str) -> str:
res = re.search(f"pub enum {enum_name} " + r"\{(.+?)\n\}", contents, re.DOTALL)
if not res:
raise ValueError(f"Could not find {enum_name} enum")
return "\n".join(
line.split("//")[0].strip() # Remove any comment. i.e. "foo // some comment"
for line in res.group(1).splitlines()
if not line.strip().startswith("//") # Ignore comment lines
)
def build_deopts(contents: str) -> dict[str, list[str]]:
raw_body = re.search(
r"fn deopt\(self\) -> Option<Self>(.*)", contents, re.DOTALL
).group(1)
body = "\n".join(
itertools.takewhile(
lambda l: not l.startswith("_ =>"), # Take until reaching fallback
filter(
lambda l: (
not l.startswith(
("//", "Some(match")
) # Skip comments or start of match
),
map(str.strip, raw_body.splitlines()),
),
)
).removeprefix("{")
depth = 0
arms = []
buf = []
for char in body:
if char == "{":
depth += 1
elif char == "}":
depth -= 1
if depth == 0 and (char in ("}", ",")):
arm = "".join(buf).strip()
arms.append(arm)
buf = []
else:
buf.append(char)
# last arm
arms.append("".join(buf))
arms = [arm for arm in arms if arm]
deopts = {}
for arm in arms:
*specialized, deopt = map(to_pascal_case, re.findall(r"Self::(\w*)\b", arm))
deopts[deopt] = specialized
return deopts
contents = BYTECODE_FILE.read_text(encoding="utf-8")
deopts = build_deopts(contents)
enum_body = "\n".join(
extract_enum_body(contents, enum_name)
for enum_name in ("Instruction", "PseudoInstruction")
)
opcodes = list(Opcode.from_str(enum_body))
have_oparg = min(opcode.id for opcode in opcodes if opcode.have_oparg) - 1
min_instrumented = min(opcode.id for opcode in opcodes if opcode.is_instrumented)
# Generate the output file
output = """# This file is generated by scripts/generate_opcode_metadata.py
# for RustPython bytecode format (CPython 3.14 compatible opcode numbers).
# Do not edit!
"""
output += "\n_specializations = {\n"
for key, lst in deopts.items():
output += f' "{key}": [\n'
for item in lst:
output += f' "{item}",\n'
output += " ],\n"
output += "}\n"
specialized = set(itertools.chain.from_iterable(deopts.values()))
output += "\n_specialized_opmap = {\n"
for opcode in sorted(opcodes, key=lambda op: op.cpython_name):
cpython_name = opcode.cpython_name
if cpython_name not in specialized:
continue
output += f" '{cpython_name}': {opcode.id},\n"
output += "}\n"
output += "\nopmap = {\n"
for opcode in sorted(opcodes):
cpython_name = opcode.cpython_name
if cpython_name in specialized:
continue
output += f" '{cpython_name}': {opcode.id},\n"
output += "}\n"
output += f"""
HAVE_ARGUMENT = {have_oparg}
MIN_INSTRUMENTED_OPCODE = {min_instrumented}
"""
OPCODE_METADATA_FILE.write_text(output, encoding="utf-8")