mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
187 lines
5.1 KiB
Python
187 lines
5.1 KiB
Python
"""
|
|
Generate Lib/_opcode_metadata.py for RustPython bytecode.
|
|
|
|
This file generates opcode metadata that is compatible with CPython 3.13.
|
|
"""
|
|
|
|
import itertools
|
|
import pathlib
|
|
import re
|
|
import typing
|
|
|
|
ROOT = pathlib.Path(__file__).parents[1]
|
|
BYTECODE_FILE = (
|
|
ROOT / "crates" / "compiler-core" / "src" / "bytecode" / "instruction.rs"
|
|
)
|
|
OPCODE_METADATA_FILE = ROOT / "Lib" / "_opcode_metadata.py"
|
|
|
|
|
|
# Opcodes that needs to be first, regardless of their opcode ID.
|
|
PRIORITY_OPMAP = {
|
|
"CACHE",
|
|
"RESERVED",
|
|
"RESUME",
|
|
"INSTRUMENTED_LINE",
|
|
"ENTER_EXECUTOR",
|
|
}
|
|
|
|
|
|
def to_snake_case(s: str) -> str:
|
|
res = re.sub(r"(?<=[a-z0-9])([A-Z])", r"_\1", s)
|
|
return re.sub(r"(\D)(\d+)$", r"\1_\2", res).upper()
|
|
|
|
|
|
class Opcode(typing.NamedTuple):
|
|
rust_name: str
|
|
cpython_name: str
|
|
id: int
|
|
have_oparg: bool
|
|
|
|
@property
|
|
def is_instrumented(self):
|
|
return self.cpython_name.startswith("INSTRUMENTED_")
|
|
|
|
@classmethod
|
|
def from_str(cls, text: str):
|
|
# Split on commas that are followed by a newline + an uppercase letter (new entry)
|
|
entries = re.split(r",\s*\n\s*(?=[A-Z])", text)
|
|
for entry in entries:
|
|
entry = entry.strip()
|
|
if not entry:
|
|
continue
|
|
have_oparg = "Arg<" in entry # Hacky but works
|
|
rust_name = re.match(r"(\w+)", entry).group(1)
|
|
id_num, cpython_name = re.search(r'\((\d+),\s*"([^"]+)"\)', entry).groups()
|
|
yield cls(
|
|
rust_name=rust_name,
|
|
cpython_name=cpython_name,
|
|
id=int(id_num),
|
|
have_oparg=have_oparg,
|
|
)
|
|
|
|
def __lt__(self, other: typing.Self) -> bool:
|
|
sprio, oprio = (
|
|
opcode.cpython_name not in PRIORITY_OPMAP for opcode in (self, other)
|
|
)
|
|
return (sprio, self.id) < (oprio, other.id)
|
|
|
|
|
|
def extract_enum_body(text: str, name: str) -> str:
|
|
# Find the start of the enum block
|
|
start_match = re.search(rf"enum\s+{name}\s*\{{", text)
|
|
if not start_match:
|
|
return None
|
|
|
|
# Manually track brace depth from that point
|
|
depth = 0
|
|
start = start_match.end() - 1 # position of opening '{'
|
|
for i, ch in enumerate(text[start:], start):
|
|
if ch == "{":
|
|
depth += 1
|
|
elif ch == "}":
|
|
depth -= 1
|
|
if depth == 0:
|
|
# Return only the inner content (excluding outer braces)
|
|
return text[start + 1 : i]
|
|
|
|
|
|
def build_deopts(text: str) -> dict[str, list[str]]:
|
|
raw_body = re.search(r"fn deopt\(self\)(.*)", text, re.DOTALL).group(1)
|
|
match_start = raw_body.find("match self")
|
|
if match_start == -1:
|
|
raise ValueError("Could not detect a match statement in deopt method")
|
|
|
|
brace_depth = 0
|
|
block_start = None
|
|
block_end = None
|
|
|
|
for i, ch in enumerate(raw_body[match_start:], match_start):
|
|
if ch == "{":
|
|
brace_depth += 1
|
|
if block_start is None:
|
|
block_start = i + 1
|
|
elif ch == "}":
|
|
brace_depth -= 1
|
|
if brace_depth == 0:
|
|
block_end = i
|
|
break
|
|
|
|
match_body = raw_body[block_start:block_end]
|
|
|
|
arm_pattern = re.compile(
|
|
r"((?:Self::\w+\s*\|\s*)*Self::\w+)\s*=>\s*(?:\{\s*)?Opcode::(\w+)", re.DOTALL
|
|
)
|
|
variants_pattern = re.compile(r"Self::(\w+)")
|
|
|
|
deopts = {}
|
|
for hit in arm_pattern.finditer(match_body):
|
|
raw_variants = hit.group(1)
|
|
opcode = hit.group(2)
|
|
|
|
variants = variants_pattern.findall(raw_variants)
|
|
|
|
key = to_snake_case(opcode)
|
|
value = [to_snake_case(variant) for variant in variants]
|
|
deopts[key] = value
|
|
|
|
return deopts
|
|
|
|
|
|
contents = BYTECODE_FILE.read_text(encoding="utf-8")
|
|
|
|
deopts = build_deopts(contents)
|
|
|
|
enum_body = "\n".join(
|
|
extract_enum_body(contents, enum_name)
|
|
for enum_name in ("Instruction", "PseudoInstruction")
|
|
)
|
|
opcodes = list(Opcode.from_str(enum_body))
|
|
|
|
have_oparg = min(opcode.id for opcode in opcodes if opcode.have_oparg) - 1
|
|
min_instrumented = min(opcode.id for opcode in opcodes if opcode.is_instrumented)
|
|
|
|
# Generate the output file
|
|
output = """# This file is generated by scripts/generate_opcode_metadata.py
|
|
# for RustPython bytecode format (CPython 3.14 compatible opcode numbers).
|
|
# Do not edit!
|
|
"""
|
|
|
|
output += "\n_specializations = {\n"
|
|
|
|
for key, lst in deopts.items():
|
|
output += f' "{key}": [\n'
|
|
for item in lst:
|
|
output += f' "{item}",\n'
|
|
output += " ],\n"
|
|
|
|
output += "}\n"
|
|
|
|
specialized = set(itertools.chain.from_iterable(deopts.values()))
|
|
output += "\n_specialized_opmap = {\n"
|
|
for opcode in sorted(opcodes, key=lambda op: op.cpython_name):
|
|
cpython_name = opcode.cpython_name
|
|
if cpython_name not in specialized:
|
|
continue
|
|
|
|
output += f" '{cpython_name}': {opcode.id},\n"
|
|
|
|
output += "}\n"
|
|
|
|
output += "\nopmap = {\n"
|
|
|
|
for opcode in sorted(opcodes):
|
|
cpython_name = opcode.cpython_name
|
|
if cpython_name in specialized:
|
|
continue
|
|
|
|
output += f" '{cpython_name}': {opcode.id},\n"
|
|
|
|
output += "}\n"
|
|
|
|
output += f"""
|
|
HAVE_ARGUMENT = {have_oparg}
|
|
MIN_INSTRUMENTED_OPCODE = {min_instrumented}
|
|
"""
|
|
|
|
OPCODE_METADATA_FILE.write_text(output, encoding="utf-8")
|