mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
* Align CFG cleanup bytecode with CPython * Bytecode parity: fblock unwind, fstring join, folding, scope - compile.rs: unwind_fblock_stack returns whether a finally ran so return-statement emission can adjust location handling; restructure try/except/finally cleanup to preserve or drop boundary NOPs based on whether the body falls through; rework f-string lowering with count/join helpers; remove the per-collection-type heuristic for AST-level folding and defer to flowgraph passes; add several folding helpers and a ComprehensionLoopControl enum. - ir.rs: re-run unary/binop folding around tuple folding, add reorder_conditional_scope_exit_and_jump_back_blocks and several block classification helpers, add MAX_STR_SIZE, change is_exit_without_lineno to take the block list. - symboltable.rs: in analyze_cells, remove names owned as cells in function-like scopes from the parent's free set; mark lambda scope type explicitly. * Refine CFG scope-exit backedge ordering
674 lines
22 KiB
Python
674 lines
22 KiB
Python
#!/usr/bin/env python3
|
|
"""Compare bytecode between CPython and RustPython.
|
|
|
|
Compiles all Python files under Lib/ with both interpreters and reports
|
|
differences in the generated bytecode instructions.
|
|
|
|
Usage:
|
|
python scripts/compare_bytecode.py
|
|
python scripts/compare_bytecode.py --detail
|
|
python scripts/compare_bytecode.py --filter "asyncio/*.py"
|
|
python scripts/compare_bytecode.py --summary-json report.json
|
|
"""
|
|
|
|
import argparse
|
|
import fnmatch
|
|
import json
|
|
import os
|
|
import random
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
from collections import defaultdict
|
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
|
|
DIS_DUMP = os.path.join(SCRIPT_DIR, "dis_dump.py")
|
|
DEFAULT_REPORT = os.path.join(PROJECT_ROOT, "compare_bytecode.report")
|
|
DUMP_TIMEOUT = 600
|
|
|
|
|
|
def find_rustpython():
|
|
"""Locate the RustPython binary, allowing release builds only."""
|
|
if "RUSTPYTHON" in os.environ:
|
|
path = os.environ["RUSTPYTHON"]
|
|
normalized = os.path.normpath(path)
|
|
debug_fragment = os.path.join("target", "debug", "rustpython")
|
|
if normalized.endswith(debug_fragment):
|
|
raise ValueError(
|
|
"RUSTPYTHON must point to a release binary, not target/debug/rustpython"
|
|
)
|
|
return path
|
|
|
|
path = os.path.join(PROJECT_ROOT, "target", "release", "rustpython")
|
|
if os.path.isfile(path) and os.access(path, os.X_OK):
|
|
return path
|
|
return None
|
|
|
|
|
|
def collect_targets(lib_dir, pattern=None):
|
|
"""Collect Python files to compare, relative to lib_dir."""
|
|
targets = []
|
|
for root, dirs, files in os.walk(lib_dir):
|
|
dirs[:] = sorted(
|
|
d for d in dirs if d != "__pycache__" and not d.startswith(".")
|
|
)
|
|
for fname in sorted(files):
|
|
if not fname.endswith(".py"):
|
|
continue
|
|
fpath = os.path.join(root, fname)
|
|
relpath = os.path.relpath(fpath, lib_dir)
|
|
if pattern and not fnmatch.fnmatch(relpath, pattern):
|
|
continue
|
|
targets.append((relpath, fpath))
|
|
return targets
|
|
|
|
|
|
def _start_one(interpreter, targets, base_dir):
|
|
"""Start a single dis_dump.py subprocess."""
|
|
env = os.environ.copy()
|
|
if interpreter != sys.executable:
|
|
env["RUSTPYTHONPATH"] = base_dir
|
|
|
|
files_file = None
|
|
output_file = None
|
|
try:
|
|
files_file = tempfile.NamedTemporaryFile(
|
|
mode="w",
|
|
encoding="utf-8",
|
|
delete=False,
|
|
prefix="compare-bytecode-files-",
|
|
)
|
|
output_file = tempfile.NamedTemporaryFile(
|
|
mode="w",
|
|
encoding="utf-8",
|
|
delete=False,
|
|
prefix="compare-bytecode-output-",
|
|
)
|
|
for _, path in targets:
|
|
files_file.write(path)
|
|
files_file.write("\n")
|
|
files_file.close()
|
|
output_file.close()
|
|
cmd = [
|
|
interpreter,
|
|
DIS_DUMP,
|
|
"--base-dir",
|
|
base_dir,
|
|
"--files-from",
|
|
files_file.name,
|
|
"--output",
|
|
output_file.name,
|
|
"--progress",
|
|
"10",
|
|
]
|
|
proc = subprocess.Popen(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=None, # inherit stderr so progress dots appear on terminal
|
|
env=env,
|
|
cwd=PROJECT_ROOT,
|
|
)
|
|
return {
|
|
"proc": proc,
|
|
"files_file": files_file.name,
|
|
"output_file": output_file.name,
|
|
"targets": targets,
|
|
"interpreter": interpreter,
|
|
"base_dir": base_dir,
|
|
}
|
|
except Exception:
|
|
for handle in (files_file, output_file):
|
|
if handle is None:
|
|
continue
|
|
try:
|
|
handle.close()
|
|
finally:
|
|
if os.path.exists(handle.name):
|
|
os.unlink(handle.name)
|
|
raise
|
|
|
|
|
|
def _load_dump_output(output_file):
|
|
try:
|
|
with open(output_file, encoding="utf-8") as f:
|
|
content = f.read().strip()
|
|
except OSError as e:
|
|
print(" Failed to read dump output: %s" % e, file=sys.stderr)
|
|
return None
|
|
if not content:
|
|
return {}
|
|
try:
|
|
return json.loads(content)
|
|
except json.JSONDecodeError as e:
|
|
print(" JSON parse error: %s" % e, file=sys.stderr)
|
|
return None
|
|
|
|
|
|
def _run_sync_dump(interpreter, targets, base_dir, timeout=DUMP_TIMEOUT):
|
|
job = _start_one(interpreter, targets, base_dir)
|
|
proc = job["proc"]
|
|
stdout = b""
|
|
timed_out = False
|
|
try:
|
|
stdout = proc.communicate(timeout=timeout)[0]
|
|
except subprocess.TimeoutExpired:
|
|
proc.kill()
|
|
proc.communicate()
|
|
print(f" Timeout ({timeout}s)", file=sys.stderr)
|
|
timed_out = True
|
|
|
|
try:
|
|
data = _load_dump_output(job["output_file"])
|
|
finally:
|
|
for path in (job["files_file"], job["output_file"]):
|
|
if os.path.exists(path):
|
|
os.unlink(path)
|
|
|
|
if timed_out:
|
|
return {}
|
|
if proc.returncode != 0:
|
|
print(" Warning: exited with code %d" % proc.returncode, file=sys.stderr)
|
|
stray = stdout.decode(errors="replace").strip()
|
|
if stray:
|
|
print(" Warning: unexpected stdout from dump helper", file=sys.stderr)
|
|
return data
|
|
|
|
|
|
def _rerun_missing_targets(interpreter, targets, base_dir):
|
|
recovered = {}
|
|
failed = []
|
|
empty = []
|
|
for target in targets:
|
|
relpath = target[0]
|
|
data = _run_sync_dump(interpreter, [target], base_dir)
|
|
if data is None:
|
|
failed.append(relpath)
|
|
recovered[relpath] = {
|
|
"status": "error",
|
|
"error": "dump helper failed while rerunning target",
|
|
}
|
|
elif data:
|
|
recovered.update(data)
|
|
else:
|
|
empty.append(relpath)
|
|
recovered[relpath] = {
|
|
"status": "error",
|
|
"error": "dump helper produced no data while rerunning target",
|
|
}
|
|
if failed:
|
|
print(
|
|
" Warning: rerun failed for %d file(s): %s"
|
|
% (len(failed), ", ".join(failed[:5])),
|
|
file=sys.stderr,
|
|
)
|
|
if empty:
|
|
print(
|
|
" Warning: rerun produced no data for %d file(s): %s"
|
|
% (len(empty), ", ".join(empty[:5])),
|
|
file=sys.stderr,
|
|
)
|
|
return recovered
|
|
|
|
|
|
def _finish_one(job, timeout=DUMP_TIMEOUT):
|
|
"""Wait for a single dis_dump.py process and return parsed JSON."""
|
|
proc = job["proc"]
|
|
expected = {relpath for relpath, _ in job["targets"]}
|
|
stdout = b""
|
|
try:
|
|
stdout = proc.communicate(timeout=timeout)[0]
|
|
except subprocess.TimeoutExpired:
|
|
proc.kill()
|
|
proc.communicate()
|
|
print(
|
|
f" Timeout ({timeout}s), retrying {len(job['targets'])} file(s) serially",
|
|
file=sys.stderr,
|
|
)
|
|
data = None
|
|
else:
|
|
data = _load_dump_output(job["output_file"])
|
|
finally:
|
|
for path in (job["files_file"], job["output_file"]):
|
|
if os.path.exists(path):
|
|
os.unlink(path)
|
|
|
|
if proc.returncode != 0:
|
|
print(" Warning: exited with code %d" % proc.returncode, file=sys.stderr)
|
|
|
|
stray = stdout.decode(errors="replace").strip()
|
|
if stray:
|
|
print(" Warning: unexpected stdout from dump helper", file=sys.stderr)
|
|
|
|
if data is None:
|
|
return _rerun_missing_targets(
|
|
job["interpreter"], job["targets"], job["base_dir"]
|
|
)
|
|
|
|
missing = [
|
|
target
|
|
for target in job["targets"]
|
|
if target[0] not in data and target[0] in expected
|
|
]
|
|
if missing:
|
|
print(
|
|
" Re-running %d missing file(s) serially" % len(missing),
|
|
file=sys.stderr,
|
|
)
|
|
data.update(
|
|
_rerun_missing_targets(job["interpreter"], missing, job["base_dir"])
|
|
)
|
|
return data
|
|
|
|
|
|
def start_dump(interpreter, targets, base_dir, num_workers=1):
|
|
"""Start dis_dump.py under the given interpreter, split across workers."""
|
|
if num_workers <= 1 or len(targets) <= num_workers:
|
|
return [_start_one(interpreter, targets, base_dir)]
|
|
|
|
chunks = [[] for _ in range(num_workers)]
|
|
for i, t in enumerate(targets):
|
|
chunks[i % num_workers].append(t)
|
|
|
|
return [_start_one(interpreter, chunk, base_dir) for chunk in chunks if chunk]
|
|
|
|
|
|
def finish_dump(procs):
|
|
"""Wait for all dis_dump.py processes and merge results."""
|
|
merged = {}
|
|
for job in procs:
|
|
merged.update(_finish_one(job))
|
|
return merged
|
|
|
|
|
|
def compare_insts(cp_insts, rp_insts):
|
|
"""Compare two instruction lists. Returns list of (index, cp, rp) diffs."""
|
|
diffs = []
|
|
for i in range(max(len(cp_insts), len(rp_insts))):
|
|
cp = cp_insts[i] if i < len(cp_insts) else None
|
|
rp = rp_insts[i] if i < len(rp_insts) else None
|
|
if cp != rp:
|
|
diffs.append((i, cp, rp))
|
|
return diffs
|
|
|
|
|
|
def compare_code(cp_code, rp_code, path=""):
|
|
"""Recursively compare code objects. Returns [(code_path, diffs)]."""
|
|
results = []
|
|
name = cp_code["name"]
|
|
full = (path + "/" + name) if path else name
|
|
|
|
diffs = compare_insts(cp_code.get("insts", []), rp_code.get("insts", []))
|
|
if diffs:
|
|
results.append((full, diffs))
|
|
|
|
cp_ch = cp_code.get("children", [])
|
|
rp_ch = rp_code.get("children", [])
|
|
|
|
cp_by_name = defaultdict(list)
|
|
rp_by_name = defaultdict(list)
|
|
for c in cp_ch:
|
|
cp_by_name[c["name"]].append(c)
|
|
for c in rp_ch:
|
|
rp_by_name[c["name"]].append(c)
|
|
|
|
all_names = list(dict.fromkeys(c["name"] for c in cp_ch))
|
|
for c in rp_ch:
|
|
if c["name"] not in cp_by_name:
|
|
all_names.append(c["name"])
|
|
|
|
for name in all_names:
|
|
cp_list = cp_by_name.get(name, [])
|
|
rp_list = rp_by_name.get(name, [])
|
|
for i in range(max(len(cp_list), len(rp_list))):
|
|
if i < len(cp_list) and i < len(rp_list):
|
|
results.extend(compare_code(cp_list[i], rp_list[i], full))
|
|
elif i < len(cp_list):
|
|
results.append((full + "/" + name, [(-1, "extra in CPython", None)]))
|
|
else:
|
|
results.append((full + "/" + name, [(-1, None, "extra in RustPython")]))
|
|
|
|
return results
|
|
|
|
|
|
def compare_code_summary(cp_code, rp_code):
|
|
"""Recursively compare code objects and return summary counts."""
|
|
diff_code_objects = 0
|
|
diff_instructions = compare_insts_count(
|
|
cp_code.get("insts", []), rp_code.get("insts", [])
|
|
)
|
|
if diff_instructions:
|
|
diff_code_objects += 1
|
|
|
|
cp_ch = cp_code.get("children", [])
|
|
rp_ch = rp_code.get("children", [])
|
|
cp_by_name = defaultdict(list)
|
|
rp_by_name = defaultdict(list)
|
|
for child in cp_ch:
|
|
cp_by_name[child["name"]].append(child)
|
|
for child in rp_ch:
|
|
rp_by_name[child["name"]].append(child)
|
|
|
|
all_names = list(dict.fromkeys(child["name"] for child in cp_ch))
|
|
for child in rp_ch:
|
|
if child["name"] not in cp_by_name:
|
|
all_names.append(child["name"])
|
|
|
|
for name in all_names:
|
|
cp_list = cp_by_name.get(name, [])
|
|
rp_list = rp_by_name.get(name, [])
|
|
for i in range(max(len(cp_list), len(rp_list))):
|
|
if i < len(cp_list) and i < len(rp_list):
|
|
child_objects, child_insts = compare_code_summary(
|
|
cp_list[i], rp_list[i]
|
|
)
|
|
diff_code_objects += child_objects
|
|
diff_instructions += child_insts
|
|
else:
|
|
diff_code_objects += 1
|
|
diff_instructions += 1
|
|
|
|
return diff_code_objects, diff_instructions
|
|
|
|
|
|
def compare_insts_count(cp_insts, rp_insts):
|
|
"""Count mismatched instruction slots without storing the full diff."""
|
|
diff_count = 0
|
|
for i in range(max(len(cp_insts), len(rp_insts))):
|
|
cp = cp_insts[i] if i < len(cp_insts) else None
|
|
rp = rp_insts[i] if i < len(rp_insts) else None
|
|
if cp != rp:
|
|
diff_count += 1
|
|
return diff_count
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Compare CPython/RustPython bytecode")
|
|
parser.add_argument(
|
|
"--detail", action="store_true", help="Show per-file instruction differences"
|
|
)
|
|
parser.add_argument("--filter", default=None, help="Glob pattern to filter files")
|
|
parser.add_argument(
|
|
"--max-diffs",
|
|
type=int,
|
|
default=5,
|
|
help="Max diffs shown per code object (default: 5)",
|
|
)
|
|
parser.add_argument(
|
|
"--summary-json", default=None, help="Write summary as JSON to file"
|
|
)
|
|
parser.add_argument(
|
|
"--sample",
|
|
type=int,
|
|
default=None,
|
|
help="Compare a random sample of N matching files",
|
|
)
|
|
parser.add_argument(
|
|
"--seed",
|
|
type=int,
|
|
default=None,
|
|
help="Random seed used with --sample",
|
|
)
|
|
parser.add_argument(
|
|
"--list-limit",
|
|
type=int,
|
|
default=10,
|
|
help="Max differing files to print in non-detail mode (default: 10)",
|
|
)
|
|
parser.add_argument(
|
|
"--lib-dir",
|
|
default=os.path.join(PROJECT_ROOT, "Lib"),
|
|
help="Library directory to compare",
|
|
)
|
|
parser.add_argument(
|
|
"-j",
|
|
"--jobs",
|
|
type=int,
|
|
default=None,
|
|
help="Number of parallel workers per interpreter (default: cpu_count)",
|
|
)
|
|
parser.add_argument(
|
|
"-o",
|
|
"--output",
|
|
default=DEFAULT_REPORT,
|
|
help="Report output file (default: compare_bytecode.report)",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
rp_bin = find_rustpython()
|
|
except ValueError as exc:
|
|
print("Error: %s" % exc, file=sys.stderr)
|
|
sys.exit(1)
|
|
if not rp_bin:
|
|
print("Error: RustPython binary not found.", file=sys.stderr)
|
|
print(" Build with: cargo build --release", file=sys.stderr)
|
|
print(" Or set RUSTPYTHON=/path/to/binary", file=sys.stderr)
|
|
sys.exit(1)
|
|
if not os.path.isfile(DIS_DUMP):
|
|
print("Error: disassembler helper not found: %s" % DIS_DUMP, file=sys.stderr)
|
|
print(
|
|
" Expected scripts/dis_dump.py from origin/bytecode-parity",
|
|
file=sys.stderr,
|
|
)
|
|
sys.exit(1)
|
|
|
|
targets = collect_targets(args.lib_dir, args.filter)
|
|
sample_seed = None
|
|
if args.sample is not None:
|
|
if args.sample < 1:
|
|
print("Error: --sample must be >= 1", file=sys.stderr)
|
|
sys.exit(1)
|
|
sample_seed = (
|
|
args.seed
|
|
if args.seed is not None
|
|
else random.SystemRandom().randrange(2**32)
|
|
)
|
|
rng = random.Random(sample_seed)
|
|
sample_size = min(args.sample, len(targets))
|
|
targets = sorted(rng.sample(targets, sample_size), key=lambda item: item[0])
|
|
if not targets:
|
|
print("Error: no Python files matched", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
report_path = args.output
|
|
log = lambda *a, **kw: print(*a, file=sys.stderr, **kw)
|
|
|
|
log("Report: %s" % os.path.relpath(report_path, PROJECT_ROOT))
|
|
log("Targets: %d file(s)" % len(targets))
|
|
num_workers = args.jobs if args.jobs else os.cpu_count() or 4
|
|
log("Workers: %d per interpreter" % num_workers)
|
|
sys.stderr.write("Dumping bytecode ")
|
|
sys.stderr.flush()
|
|
|
|
cp_procs = start_dump(sys.executable, targets, args.lib_dir, num_workers)
|
|
rp_procs = start_dump(rp_bin, targets, args.lib_dir, num_workers)
|
|
cp_data = finish_dump(cp_procs)
|
|
rp_data = finish_dump(rp_procs)
|
|
sys.stderr.write("\n")
|
|
if not cp_data:
|
|
log("Error: CPython dump produced no data")
|
|
sys.exit(1)
|
|
|
|
# Phase 2: Compare
|
|
all_files = sorted(set(cp_data) | set(rp_data))
|
|
|
|
match = 0
|
|
differ = 0
|
|
cp_err = 0
|
|
rp_err = 0
|
|
both_err = 0
|
|
rp_miss = 0
|
|
|
|
diff_files = []
|
|
rp_error_files = []
|
|
diff_summaries = []
|
|
need_detailed_diffs = args.detail
|
|
|
|
for fp in all_files:
|
|
cp = cp_data.get(fp)
|
|
rp = rp_data.get(fp)
|
|
|
|
if rp is None:
|
|
rp_miss += 1
|
|
continue
|
|
|
|
cp_ok = cp and cp.get("status") == "ok"
|
|
rp_ok = rp and rp.get("status") == "ok"
|
|
|
|
if not cp_ok and not rp_ok:
|
|
both_err += 1
|
|
elif not cp_ok:
|
|
cp_err += 1
|
|
elif not rp_ok:
|
|
rp_err += 1
|
|
rp_error_files.append((fp, rp.get("error", "?")))
|
|
else:
|
|
if need_detailed_diffs:
|
|
code_diffs = compare_code(cp["code"], rp["code"])
|
|
if code_diffs:
|
|
differ += 1
|
|
diff_files.append((fp, code_diffs))
|
|
else:
|
|
match += 1
|
|
else:
|
|
diff_code_objects, diff_instructions = compare_code_summary(
|
|
cp["code"], rp["code"]
|
|
)
|
|
if diff_code_objects:
|
|
differ += 1
|
|
diff_summaries.append(
|
|
{
|
|
"path": fp,
|
|
"diff_code_objects": diff_code_objects,
|
|
"diff_instructions": diff_instructions,
|
|
}
|
|
)
|
|
else:
|
|
match += 1
|
|
|
|
total = match + differ + cp_err + rp_err + both_err + rp_miss
|
|
|
|
def pct(n):
|
|
return "%.1f%%" % (100.0 * n / total) if total else "0%"
|
|
|
|
# Phase 3: Write report to file
|
|
with open(report_path, "w") as out:
|
|
p = lambda *a: print(*a, file=out)
|
|
|
|
p("CPython: %s (%s)" % (sys.executable, sys.version.split()[0]))
|
|
p("RustPython: %s" % rp_bin)
|
|
p("Lib: %s" % args.lib_dir)
|
|
if sample_seed is not None:
|
|
p("Sample: %s file(s), seed=%s" % (len(targets), sample_seed))
|
|
p()
|
|
p("=" * 60)
|
|
p(" Bytecode Comparison Report")
|
|
p("=" * 60)
|
|
p()
|
|
p(" Total files: %6d" % total)
|
|
p(" Match: %6d (%s)" % (match, pct(match)))
|
|
p(" Differ: %6d (%s)" % (differ, pct(differ)))
|
|
p(" RustPython error: %6d (%s)" % (rp_err, pct(rp_err)))
|
|
p(" CPython error: %6d (%s)" % (cp_err, pct(cp_err)))
|
|
p(" Both error: %6d (%s)" % (both_err, pct(both_err)))
|
|
if rp_miss:
|
|
p(" RustPython missing: %6d (%s)" % (rp_miss, pct(rp_miss)))
|
|
p()
|
|
|
|
if args.detail:
|
|
if rp_error_files:
|
|
p("-" * 60)
|
|
p(" RustPython Compile Errors")
|
|
p("-" * 60)
|
|
for fp, err in rp_error_files[:50]:
|
|
p(" %s: %s" % (fp, err))
|
|
if len(rp_error_files) > 50:
|
|
p(" ... and %d more" % (len(rp_error_files) - 50))
|
|
p()
|
|
|
|
if diff_files:
|
|
p("-" * 60)
|
|
p(" Bytecode Differences")
|
|
p("-" * 60)
|
|
for fp, code_diffs in diff_files:
|
|
p()
|
|
p(" %s:" % fp)
|
|
for code_path, diffs in code_diffs:
|
|
shown = min(len(diffs), args.max_diffs)
|
|
p(" %s: %d difference(s)" % (code_path, len(diffs)))
|
|
for idx, cp_inst, rp_inst in diffs[:shown]:
|
|
if idx == -1:
|
|
p(" %s" % (cp_inst or rp_inst))
|
|
else:
|
|
p(" [%3d] CPython: %s" % (idx, cp_inst))
|
|
p(" RustPython: %s" % rp_inst)
|
|
if len(diffs) > shown:
|
|
p(" ... and %d more" % (len(diffs) - shown))
|
|
p()
|
|
else:
|
|
list_limit = 0 if args.summary_json else max(args.list_limit, 0)
|
|
if diff_summaries and list_limit:
|
|
shown = min(list_limit, len(diff_summaries))
|
|
total = len(diff_summaries)
|
|
p(f"Top differing files ({shown} shown of {total}):")
|
|
top = sorted(
|
|
diff_summaries,
|
|
key=lambda item: (
|
|
item["diff_instructions"],
|
|
item["diff_code_objects"],
|
|
item["path"],
|
|
),
|
|
reverse=True,
|
|
)[:list_limit]
|
|
for item in top:
|
|
p(
|
|
" %s (%d code objects, %d instruction diffs)"
|
|
% (
|
|
item["path"],
|
|
item["diff_code_objects"],
|
|
item["diff_instructions"],
|
|
)
|
|
)
|
|
p()
|
|
p("Use --detail to see specific instruction differences.")
|
|
p()
|
|
|
|
# Summary JSON output
|
|
if args.summary_json:
|
|
summary = {
|
|
"total": total,
|
|
"sample": args.sample,
|
|
"sample_seed": sample_seed,
|
|
"match": match,
|
|
"differ": differ,
|
|
"rp_error": rp_err,
|
|
"cp_error": cp_err,
|
|
"both_error": both_err,
|
|
"rp_missing": rp_miss,
|
|
"match_pct": round(100.0 * match / total, 2) if total else 0,
|
|
"diff_files": [fp for fp, _ in diff_files]
|
|
if need_detailed_diffs
|
|
else [item["path"] for item in diff_summaries],
|
|
"top_diff_files": sorted(
|
|
diff_summaries,
|
|
key=lambda item: (
|
|
item["diff_instructions"],
|
|
item["diff_code_objects"],
|
|
item["path"],
|
|
),
|
|
reverse=True,
|
|
)[: min(20, len(diff_summaries))],
|
|
"rp_error_files": [fp for fp, _ in rp_error_files],
|
|
}
|
|
with open(args.summary_json, "w") as f:
|
|
json.dump(summary, f, indent=2)
|
|
log("Summary JSON: %s" % args.summary_json)
|
|
|
|
log("Done: %d match, %d differ, %d errors" % (match, differ, rp_err))
|
|
sys.exit(0 if differ == 0 and rp_err == 0 else 1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|