""" Generate Lib/_opcode_metadata.py for RustPython bytecode. This file generates opcode metadata that is compatible with CPython 3.13. """ import itertools import pathlib import re import typing ROOT = pathlib.Path(__file__).parents[1] BYTECODE_FILE = ( ROOT / "crates" / "compiler-core" / "src" / "bytecode" / "instructions.rs" ) OPCODE_METADATA_FILE = ROOT / "Lib" / "_opcode_metadata.py" # Opcodes that needs to be first, regardless of their opcode ID. PRIORITY_OPMAP = { "CACHE", "RESERVED", "RESUME", "INSTRUMENTED_LINE", "ENTER_EXECUTOR", } def to_snake_case(s: str) -> str: res = re.sub(r"(?<=[a-z0-9])([A-Z])", r"_\1", s) return re.sub(r"(\D)(\d+)$", r"\1_\2", res).upper() class Opcode(typing.NamedTuple): rust_name: str id: int have_oparg: bool @property def is_instrumented(self): return self.cpython_name.startswith("INSTRUMENTED_") @property def cpython_name(self): return to_snake_case(self.rust_name) @classmethod def from_str(cls, text: str): # Split on commas that are followed by a newline + an uppercase letter (new entry) entries = re.split(r",\s*\n\s*(?=[A-Z])", text) for entry in entries: entry = entry.strip() if not entry: continue have_oparg = "Arg<" in entry # Hacky but works rust_name = re.match(r"(\w+)", entry).group(1) id_num = re.findall(r"= (\d+)", entry)[0] yield cls(rust_name=rust_name, id=int(id_num), have_oparg=have_oparg) def __lt__(self, other: typing.Self) -> bool: sprio, oprio = ( opcode.cpython_name not in PRIORITY_OPMAP for opcode in (self, other) ) return (sprio, self.id) < (oprio, other.id) def extract_enum_body(text: str, name: str) -> str: # Find the start of the enum block start_match = re.search(rf"enum\s+{name}\s*\{{", text) if not start_match: return None # Manually track brace depth from that point depth = 0 start = start_match.end() - 1 # position of opening '{' for i, ch in enumerate(text[start:], start): if ch == "{": depth += 1 elif ch == "}": depth -= 1 if depth == 0: # Return only the inner content (excluding outer braces) return text[start + 1 : i] def build_deopts(text: str) -> dict[str, list[str]]: raw_body = re.search(r"fn deopt\(self\)(.*)", text, re.DOTALL).group(1) match_start = raw_body.find("match self") if match_start == -1: raise ValueError("Could not detect a match statement in deopt method") brace_depth = 0 block_start = None block_end = None for i, ch in enumerate(raw_body[match_start:], match_start): if ch == "{": brace_depth += 1 if block_start is None: block_start = i + 1 elif ch == "}": brace_depth -= 1 if brace_depth == 0: block_end = i break match_body = raw_body[block_start:block_end] arm_pattern = re.compile( r"((?:Self::\w+\s*\|\s*)*Self::\w+)\s*=>\s*(?:\{\s*)?Self::(\w+)", re.DOTALL ) variants_pattern = re.compile(r"Self::(\w+)") deopts = {} for hit in arm_pattern.finditer(match_body): raw_variants = hit.group(1) opcode = hit.group(2) variants = variants_pattern.findall(raw_variants) key = to_snake_case(opcode) value = [to_snake_case(variant) for variant in variants] deopts[key] = value return deopts contents = BYTECODE_FILE.read_text(encoding="utf-8") deopts = build_deopts(contents) enum_body = "\n".join( extract_enum_body(contents, enum_name) for enum_name in ("Instruction", "PseudoInstruction") ) opcodes = list(Opcode.from_str(enum_body)) have_oparg = min(opcode.id for opcode in opcodes if opcode.have_oparg) - 1 min_instrumented = min(opcode.id for opcode in opcodes if opcode.is_instrumented) # Generate the output file output = """# This file is generated by scripts/generate_opcode_metadata.py # for RustPython bytecode format (CPython 3.14 compatible opcode numbers). # Do not edit! """ output += "\n_specializations = {\n" for key, lst in deopts.items(): output += f' "{key}": [\n' for item in lst: output += f' "{item}",\n' output += " ],\n" output += "}\n" specialized = set(itertools.chain.from_iterable(deopts.values())) output += "\n_specialized_opmap = {\n" for opcode in sorted(opcodes, key=lambda op: op.cpython_name): cpython_name = opcode.cpython_name if cpython_name not in specialized: continue output += f" '{cpython_name}': {opcode.id},\n" output += "}\n" output += "\nopmap = {\n" for opcode in sorted(opcodes): cpython_name = opcode.cpython_name if cpython_name in specialized: continue output += f" '{cpython_name}': {opcode.id},\n" output += "}\n" output += f""" HAVE_ARGUMENT = {have_oparg} MIN_INSTRUMENTED_OPCODE = {min_instrumented} """ OPCODE_METADATA_FILE.write_text(output, encoding="utf-8")