Bytecode parity - exception (#7557)

* Fix exception handling: except* chaining, finally cleanup, RERAISE - Align except* bytecode chaining - Fix exception state model and finally handler cleanup - Fix RERAISE to only pop exception, preserve values below * Port IR optimization passes from flowgraph.c - BUILD_TUPLE n + UNPACK_SEQUENCE n elimination - Dead store elimination within basic blocks - apply_static_swaps for SWAP reduction * Add bytecode comparison and disassembly dump scripts - compare_bytecode.py: compare CPython vs RustPython bytecode output - dis_dump.py: extract disassembly in normalized JSON format
2026-06-02 19:39:49 +09:00 · 2026-04-10 12:51:48 +09:00
parent 7b5ac61026
commit a49ce5bf34
8 changed files with 1726 additions and 207 deletions
--- a/Lib/test/test_peepholer.py
+++ b/Lib/test/test_peepholer.py
@@ -144,7 +144,6 @@ class TestTranforms(BytecodeTestCase):
            self.assertInBytecode(f, elem)
        self.check_lnotab(f)

-    @unittest.expectedFailure  # TODO: RUSTPYTHON
    def test_pack_unpack(self):
        for line, elem in (
            ('a, = a,', 'LOAD_CONST',),
@@ -158,7 +157,7 @@ class TestTranforms(BytecodeTestCase):
                self.assertNotInBytecode(code, 'UNPACK_SEQUENCE')
                self.check_lnotab(code)

-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AssertionError: 1 != 2
+    @unittest.expectedFailure  # TODO: RUSTPYTHON; LOAD_CONST count mismatch in long-tuple branch
    def test_constant_folding_tuples_of_constants(self):
        for line, elem in (
            ('a = 1,2,3', (1, 2, 3)),
@@ -873,7 +872,6 @@ class TestMarkingVariablesAsUnKnown(BytecodeTestCase):
        self.assertInBytecode(f, 'LOAD_FAST_CHECK')
        self.assertNotInBytecode(f, 'LOAD_FAST')

-    @unittest.expectedFailure  # TODO: RUSTPYTHON; RETURN_VALUE
    def test_load_fast_unknown_because_del(self):
        def f():
            x = 1
@@ -928,7 +926,6 @@ class TestMarkingVariablesAsUnKnown(BytecodeTestCase):
        self.assertInBytecode(f, 'LOAD_FAST_BORROW')
        self.assertNotInBytecode(f, 'LOAD_FAST_CHECK')

-    @unittest.expectedFailure  # TODO: RUSTPYTHON; L5 to L6 -> L6 [1] lasti
    def test_load_fast_unknown_after_error(self):
        def f():
            try:
@@ -940,7 +937,6 @@ class TestMarkingVariablesAsUnKnown(BytecodeTestCase):
        # Assert that it doesn't occur in the LOAD_FAST_CHECK branch.
        self.assertInBytecode(f, 'LOAD_FAST_CHECK')

-    @unittest.expectedFailure  # TODO: RUSTPYTHON; L5 to L6 -> L6 [1] lasti
    def test_load_fast_unknown_after_error_2(self):
        def f():
            try:
--- a/crates/codegen/src/compile.rs
+++ b/crates/codegen/src/compile.rs
@@ -3128,23 +3128,18 @@ impl Compiler {
                }
                self.compile_statements(finalbody)?;

-                // Pop FinallyEnd fblock BEFORE emitting RERAISE
-                // This ensures RERAISE routes to outer exception handler, not cleanup block
-                // Cleanup block is only for new exceptions raised during finally body execution
+                // RERAISE must be inside the cleanup handler's exception table
+                // range. When RERAISE re-raises the exception, the cleanup
+                // handler (COPY 3, POP_EXCEPT, RERAISE 1) runs POP_EXCEPT to
+                // restore exc_info before the exception reaches the outer handler.
+                emit!(self, Instruction::Reraise { depth: 0 });
+
+                // PopBlock after RERAISE (dead code, but marks the exception
+                // table range end so the cleanup covers RERAISE).
                if finally_cleanup_block.is_some() {
                    emit!(self, PseudoInstruction::PopBlock);
                    self.pop_fblock(FBlockType::FinallyEnd);
                }
-
-                // Restore prev_exc as current exception before RERAISE
-                // Stack: [prev_exc, exc] -> COPY 2 -> [prev_exc, exc, prev_exc]
-                // POP_EXCEPT pops prev_exc and sets exc_info->exc_value = prev_exc
-                // Stack after POP_EXCEPT: [prev_exc, exc]
-                emit!(self, Instruction::Copy { i: 2 });
-                emit!(self, Instruction::PopExcept);
-
-                // RERAISE 0: re-raise the original exception to outer handler
-                emit!(self, Instruction::Reraise { depth: 0 });
            }

            if let Some(cleanup) = finally_cleanup_block {
@@ -3170,6 +3165,7 @@ impl Compiler {
        emit!(self, PseudoInstruction::PopBlock);
        self.pop_fblock(FBlockType::TryExcept);
        emit!(self, PseudoInstruction::Jump { delta: else_block });
+        self.set_no_location();

        // except handlers:
        self.switch_to_block(handler_block);
@@ -3400,24 +3396,18 @@ impl Compiler {
            // Run finally body
            self.compile_statements(finalbody)?;

-            // Pop FinallyEnd fblock BEFORE emitting RERAISE
-            // This ensures RERAISE routes to outer exception handler, not cleanup block
-            // Cleanup block is only for new exceptions raised during finally body execution
+            // RERAISE must be inside the cleanup handler's exception table
+            // range. The cleanup handler (COPY 3, POP_EXCEPT, RERAISE 1)
+            // runs POP_EXCEPT to restore exc_info before re-raising to
+            // the outer handler.
+            emit!(self, Instruction::Reraise { depth: 0 });
+
+            // PopBlock after RERAISE (dead code, but marks the exception
+            // table range end so the cleanup covers RERAISE).
            if finally_cleanup_block.is_some() {
                emit!(self, PseudoInstruction::PopBlock);
                self.pop_fblock(FBlockType::FinallyEnd);
            }
-
-            // Restore prev_exc as current exception before RERAISE
-            // Stack: [lasti, prev_exc, exc] -> COPY 2 -> [lasti, prev_exc, exc, prev_exc]
-            // POP_EXCEPT pops prev_exc and sets exc_info->exc_value = prev_exc
-            // Stack after POP_EXCEPT: [lasti, prev_exc, exc]
-            emit!(self, Instruction::Copy { i: 2 });
-            emit!(self, Instruction::PopExcept);
-
-            // RERAISE 0: re-raise the original exception to outer handler
-            // Stack: [lasti, prev_exc, exc] - exception is on top
-            emit!(self, Instruction::Reraise { depth: 0 });
        }

        // finally cleanup block
@@ -3448,8 +3438,8 @@ impl Compiler {
    ) -> CompileResult<()> {
        let handler_block = self.new_block();
        let cleanup_block = self.new_block();
-        let orelse_block = self.new_block();
        let end_block = self.new_block();
+        let orelse_block = self.new_block();

        emit!(self, Instruction::Nop);
        emit!(
@@ -3598,7 +3588,9 @@ impl Compiler {

        self.switch_to_block(orelse_block);
        self.set_no_location();
-        self.compile_statements(orelse)?;
+        if !orelse.is_empty() {
+            self.compile_statements(orelse)?;
+        }
        emit!(
            self,
            PseudoInstruction::JumpNoInterrupt { delta: end_block }
@@ -3620,7 +3612,7 @@ impl Compiler {
        // Stack layout during handler processing: [prev_exc, orig, list, rest]
        let handler_block = self.new_block();
        let finally_block = self.new_block();
-        let else_block = self.new_block();
+        let cleanup_block = self.new_block();
        let end_block = self.new_block();
        let reraise_star_block = self.new_block();
        let reraise_block = self.new_block();
@@ -3630,6 +3622,12 @@ impl Compiler {
            None
        };
        let exit_block = self.new_block();
+        let continuation_block = end_block;
+        let else_block = if orelse.is_empty() && finalbody.is_empty() {
+            continuation_block
+        } else {
+            self.new_block()
+        };

        // Emit NOP at the try: line so LINE events fire for it
        emit!(self, Instruction::Nop);
@@ -3667,14 +3665,23 @@ impl Compiler {
        self.switch_to_block(handler_block);
        // Stack: [exc] (from exception table)

+        emit!(
+            self,
+            PseudoInstruction::SetupCleanup {
+                delta: cleanup_block
+            }
+        );
+
        // PUSH_EXC_INFO
        emit!(self, Instruction::PushExcInfo);
        // Stack: [prev_exc, exc]

        // Push EXCEPTION_GROUP_HANDLER fblock
-        let eg_dummy1 = self.new_block();
-        let eg_dummy2 = self.new_block();
-        self.push_fblock(FBlockType::ExceptionGroupHandler, eg_dummy1, eg_dummy2)?;
+        self.push_fblock(
+            FBlockType::ExceptionGroupHandler,
+            cleanup_block,
+            cleanup_block,
+        )?;

        // Initialize handler stack before the loop
        // BUILD_LIST 0 + COPY 2 to set up [prev_exc, orig, list, rest]
@@ -3695,17 +3702,24 @@ impl Compiler {
                    delta: reraise_star_block
                }
            );
+            self.set_no_location();
        }
        for (i, handler) in handlers.iter().enumerate() {
            let ast::ExceptHandler::ExceptHandler(ast::ExceptHandlerExceptHandler {
                type_,
                name,
                body,
+                range: handler_range,
                ..
            }) = handler;
+            let is_last_handler = i == n - 1;

            let no_match_block = self.new_block();
-            let next_block = self.new_block();
+            let next_handler_block = if is_last_handler {
+                reraise_star_block
+            } else {
+                self.new_block()
+            };

            // Compile exception type
            if let Some(exc_type) = type_ {
@@ -3762,7 +3776,7 @@ impl Compiler {
            );
            self.push_fblock_full(
                FBlockType::HandlerCleanup,
-                next_block,
+                next_handler_block,
                end_block,
                if let Some(alias) = name {
                    FBlockDatum::ExceptionName(alias.as_str().to_owned())
@@ -3775,6 +3789,7 @@ impl Compiler {
            self.compile_statements(body)?;

            // Handler body completed normally
+            self.set_no_location();
            emit!(self, PseudoInstruction::PopBlock);
            self.pop_fblock(FBlockType::HandlerCleanup);

@@ -3785,8 +3800,15 @@ impl Compiler {
                self.compile_name(alias.as_str(), NameUsage::Delete)?;
            }

-            // Jump to next handler
-            emit!(self, PseudoInstruction::Jump { delta: next_block });
+            if is_last_handler {
+                emit!(self, Instruction::ListAppend { i: 1 });
+            }
+            emit!(
+                self,
+                PseudoInstruction::Jump {
+                    delta: next_handler_block
+                }
+            );

            // Handler raised an exception (cleanup_end label)
            self.switch_to_block(handler_except_block);
@@ -3794,6 +3816,7 @@ impl Compiler {
            // (lasti is pushed because push_lasti=true in HANDLER_CLEANUP fblock)

            // Cleanup name binding
+            self.set_no_location();
            if let Some(alias) = name {
                self.emit_load_const(ConstantData::None);
                self.store_name(alias.as_str())?;
@@ -3812,36 +3835,43 @@ impl Compiler {
            emit!(self, Instruction::PopTop);
            // Stack: [prev_exc, orig, list, new_rest]

-            // JUMP except_with_error
-            // We directly JUMP to next_block since no_match_block falls through to it
-            emit!(self, PseudoInstruction::Jump { delta: next_block });
-
-            // No match - pop match (None)
-            self.switch_to_block(no_match_block);
-            emit!(self, Instruction::PopTop); // pop match (None)
-            // Stack: [prev_exc, orig, list, new_rest]
-            // Falls through to next_block
-
-            // except_with_error label
-            // All paths merge here at next_block
-            self.switch_to_block(next_block);
-            // Stack: [prev_exc, orig, list, rest]
-
-            // After last handler, append rest to list
-            if i == n - 1 {
-                // Stack: [prev_exc, orig, list, rest]
-                // ADDOP_I(c, NO_LOCATION, LIST_APPEND, 1);
-                // PEEK(1) = stack[len-1] after pop
-                // RustPython nth_value(i) = stack[len-i-1] after pop
-                // For LIST_APPEND 1: stack[len-1] = stack[len-i-1] -> i = 0
+            if is_last_handler {
                emit!(self, Instruction::ListAppend { i: 1 });
-                // Stack: [prev_exc, orig, list]
                emit!(
                    self,
                    PseudoInstruction::Jump {
                        delta: reraise_star_block
                    }
                );
+            } else {
+                emit!(
+                    self,
+                    PseudoInstruction::Jump {
+                        delta: next_handler_block
+                    }
+                );
+            }
+
+            if is_last_handler {
+                self.switch_to_block(no_match_block);
+                self.set_source_range(*handler_range);
+                emit!(self, Instruction::PopTop); // pop match (None)
+                // Stack: [prev_exc, orig, list, new_rest]
+
+                self.set_no_location();
+                emit!(self, Instruction::ListAppend { i: 1 });
+                emit!(
+                    self,
+                    PseudoInstruction::Jump {
+                        delta: reraise_star_block
+                    }
+                );
+            } else {
+                self.switch_to_block(no_match_block);
+                self.set_source_range(*handler_range);
+                emit!(self, Instruction::PopTop); // pop match (None)
+                // Stack: [prev_exc, orig, list, new_rest]
+                self.switch_to_block(next_handler_block);
            }
        }

@@ -3851,6 +3881,7 @@ impl Compiler {
        // Reraise star block
        self.switch_to_block(reraise_star_block);
        // Stack: [prev_exc, orig, list]
+        self.set_no_location();

        // CALL_INTRINSIC_2 PREP_RERAISE_STAR
        // Takes 2 args (orig, list) and produces result
@@ -3880,7 +3911,7 @@ impl Compiler {
        emit!(self, Instruction::PopTop);
        // Stack: [prev_exc]

-        // POP_BLOCK - no-op for us with exception tables (fblocks handle this)
+        emit!(self, PseudoInstruction::PopBlock);
        // POP_EXCEPT - restore previous exception context
        emit!(self, Instruction::PopExcept);
        // Stack: []
@@ -3890,14 +3921,19 @@ impl Compiler {
            self.pop_fblock(FBlockType::FinallyTry);
        }

-        emit!(self, PseudoInstruction::Jump { delta: end_block });
+        emit!(
+            self,
+            PseudoInstruction::Jump {
+                delta: continuation_block
+            }
+        );

        // Reraise the result
        self.switch_to_block(reraise_block);
        // Stack: [prev_exc, result]
+        self.set_no_location();

-        // POP_BLOCK - no-op for us
-        // SWAP 2
+        emit!(self, PseudoInstruction::PopBlock);
        emit!(self, Instruction::Swap { i: 2 });
        // Stack: [result, prev_exc]

@@ -3908,6 +3944,12 @@ impl Compiler {
        // RERAISE 0
        emit!(self, Instruction::Reraise { depth: 0 });

+        self.switch_to_block(cleanup_block);
+        self.set_no_location();
+        emit!(self, Instruction::Copy { i: 3 });
+        emit!(self, Instruction::PopExcept);
+        emit!(self, Instruction::Reraise { depth: 1 });
+
        // try-else path
        // NOTE: When we reach here in compilation, the nothing-to-reraise path above
        // has already popped FinallyTry. But else_block is a different execution path
@@ -3927,19 +3969,26 @@ impl Compiler {
                FBlockDatum::FinallyBody(finalbody.to_vec()),
            )?;
        }
-        self.switch_to_block(else_block);
-        self.compile_statements(orelse)?;
+        if else_block != continuation_block {
+            self.switch_to_block(else_block);
+            self.compile_statements(orelse)?;

-        if !finalbody.is_empty() {
-            // Pop the FinallyTry fblock we just pushed for the else path
-            emit!(self, PseudoInstruction::PopBlock);
-            self.pop_fblock(FBlockType::FinallyTry);
+            if !finalbody.is_empty() {
+                // Pop the FinallyTry fblock we just pushed for the else path
+                emit!(self, PseudoInstruction::PopBlock);
+                self.pop_fblock(FBlockType::FinallyTry);
+            }
+
+            emit!(
+                self,
+                PseudoInstruction::Jump {
+                    delta: continuation_block
+                }
+            );
        }

-        emit!(self, PseudoInstruction::Jump { delta: end_block });
-
-        self.switch_to_block(end_block);
        if !finalbody.is_empty() {
+            self.switch_to_block(end_block);
            // Snapshot sub_tables before first finally compilation
            let sub_table_cursor = self.symbol_table_stack.last().map(|t| t.next_sub_table);

@@ -3970,8 +4019,6 @@ impl Compiler {
                self.pop_fblock(FBlockType::FinallyEnd);
            }

-            emit!(self, Instruction::Copy { i: 2 });
-            emit!(self, Instruction::PopExcept);
            emit!(self, Instruction::Reraise { depth: 0 });

            if let Some(cleanup) = finally_cleanup_block {
@@ -3982,7 +4029,11 @@ impl Compiler {
            }
        }

-        self.switch_to_block(exit_block);
+        self.switch_to_block(if finalbody.is_empty() {
+            end_block
+        } else {
+            exit_block
+        });

        Ok(())
    }
@@ -5683,6 +5734,7 @@ impl Compiler {
        }

        // Pop fblock before normal exit
+        self.set_source_range(with_range);
        emit!(self, PseudoInstruction::PopBlock);
        self.pop_fblock(if is_async {
            FBlockType::AsyncWith
@@ -5742,15 +5794,14 @@ impl Compiler {
            }
        );

-        emit!(self, PseudoInstruction::PopBlock);
-        self.pop_fblock(FBlockType::ExceptionHandler);
-
        emit!(self, Instruction::Reraise { depth: 2 });

        // ===== Suppress block =====
        // Stack: [..., exit_func, self_exit, lasti, prev_exc, exc, True]
        self.switch_to_block(suppress_block);
        emit!(self, Instruction::PopTop); // pop True
+        emit!(self, PseudoInstruction::PopBlock);
+        self.pop_fblock(FBlockType::ExceptionHandler);
        emit!(self, Instruction::PopExcept); // pop exc, restore prev_exc
        emit!(self, Instruction::PopTop); // pop lasti
        emit!(self, Instruction::PopTop); // pop self_exit
@@ -8455,30 +8506,42 @@ impl Compiler {
        // Regular call: func → PUSH_NULL → args → CALL
        if let ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = &func {
            // Check for super() method call optimization
-            if !uses_ex_call
-                && let Some(super_type) = self.can_optimize_super_call(value, attr.as_str())
-            {
+            if let Some(super_type) = self.can_optimize_super_call(value, attr.as_str()) {
                // super().method() or super(cls, self).method() optimization
-                // Stack: [global_super, class, self] → LOAD_SUPER_METHOD → [method, self]
+                // CALL path: [global_super, class, self] → LOAD_SUPER_METHOD → [method, self]
+                // CALL_FUNCTION_EX path: [global_super, class, self] → LOAD_SUPER_ATTR → [attr]
                // Set source range to the super() call for LOAD_GLOBAL/LOAD_DEREF/etc.
                let super_range = value.range();
                self.set_source_range(super_range);
                self.load_args_for_super(&super_type)?;
                self.set_source_range(super_range);
                let idx = self.name(attr.as_str());
-                match super_type {
-                    SuperCallType::TwoArg { .. } => {
-                        self.emit_load_super_method(idx);
+                if uses_ex_call {
+                    match super_type {
+                        SuperCallType::TwoArg { .. } => {
+                            self.emit_load_super_attr(idx);
+                        }
+                        SuperCallType::ZeroArg => {
+                            self.emit_load_zero_super_attr(idx);
+                        }
                    }
-                    SuperCallType::ZeroArg => {
-                        self.emit_load_zero_super_method(idx);
+                    emit!(self, Instruction::PushNull);
+                    self.codegen_call_helper(0, args, call_range)?;
+                } else {
+                    match super_type {
+                        SuperCallType::TwoArg { .. } => {
+                            self.emit_load_super_method(idx);
+                        }
+                        SuperCallType::ZeroArg => {
+                            self.emit_load_zero_super_method(idx);
+                        }
                    }
+                    // NOP for line tracking at .method( line
+                    self.set_source_range(attr.range());
+                    emit!(self, Instruction::Nop);
+                    // CALL at .method( line (not the full expression line)
+                    self.codegen_call_helper(0, args, attr.range())?;
                }
-                // NOP for line tracking at .method( line
-                self.set_source_range(attr.range());
-                emit!(self, Instruction::Nop);
-                // CALL at .method( line (not the full expression line)
-                self.codegen_call_helper(0, args, attr.range())?;
            } else {
                self.compile_expression(value)?;
                let idx = self.name(attr.as_str());
--- a/crates/codegen/src/ir.rs
+++ b/crates/codegen/src/ir.rs
@@ -209,6 +209,13 @@ impl CodeInfo {

        // DCE always runs (removes dead code after terminal instructions)
        self.dce();
+        // BUILD_TUPLE n + UNPACK_SEQUENCE n → NOP + SWAP (n=2,3) or NOP+NOP (n=1)
+        self.optimize_build_tuple_unpack();
+        // Dead store elimination for duplicate STORE_FAST targets
+        // (apply_static_swaps in CPython's flowgraph.c)
+        self.eliminate_dead_stores();
+        // apply_static_swaps: reorder stores to eliminate SWAPs
+        self.apply_static_swaps();
        // Peephole optimizer creates superinstructions matching CPython
        self.peephole_optimize();

@@ -233,6 +240,8 @@ impl CodeInfo {
        duplicate_end_returns(&mut self.blocks);
        self.dce(); // truncate after terminal in blocks that got return duplicated
        self.eliminate_unreachable_blocks(); // remove now-unreachable last block
+        remove_redundant_nops_and_jumps(&mut self.blocks);
+        self.add_checks_for_loads_of_uninitialized_variables();
        // optimize_load_fast: after normalize_jumps
        self.optimize_load_fast_borrow();
        self.optimize_load_global_push_null();
@@ -1326,6 +1335,229 @@ impl CodeInfo {
        }
    }

+    /// BUILD_TUPLE n + UNPACK_SEQUENCE n optimization.
+    ///
+    /// Ported from CPython flowgraph.c optimize_basic_block:
+    /// - n == 1: both become NOP (identity operation)
+    /// - n == 2 or 3: BUILD_TUPLE → NOP, UNPACK_SEQUENCE → SWAP
+    fn optimize_build_tuple_unpack(&mut self) {
+        for block in &mut self.blocks {
+            let instrs = &mut block.instructions;
+            let len = instrs.len();
+            for i in 0..len.saturating_sub(1) {
+                let Some(Instruction::BuildTuple { .. }) = instrs[i].instr.real() else {
+                    continue;
+                };
+                let n = u32::from(instrs[i].arg);
+                let Some(Instruction::UnpackSequence { .. }) = instrs[i + 1].instr.real() else {
+                    continue;
+                };
+                if u32::from(instrs[i + 1].arg) != n {
+                    continue;
+                }
+                match n {
+                    1 => {
+                        instrs[i].instr = AnyInstruction::Real(Instruction::Nop);
+                        instrs[i].arg = OpArg::new(0);
+                        instrs[i + 1].instr = AnyInstruction::Real(Instruction::Nop);
+                        instrs[i + 1].arg = OpArg::new(0);
+                    }
+                    2 | 3 => {
+                        instrs[i].instr = AnyInstruction::Real(Instruction::Nop);
+                        instrs[i].arg = OpArg::new(0);
+                        instrs[i + 1].instr =
+                            AnyInstruction::Real(Instruction::Swap { i: Arg::marker() });
+                        instrs[i + 1].arg = OpArg::new(n);
+                    }
+                    _ => {}
+                }
+            }
+        }
+    }
+
+    /// apply_static_swaps: eliminate SWAPs by reordering target stores/pops.
+    ///
+    /// Ported from CPython Python/flowgraph.c::apply_static_swaps.
+    /// For each SWAP N, find the 1st and N-th swappable instructions after
+    /// it. If both are STORE_FAST/POP_TOP and safe to swap, exchange them
+    /// in the bytecode and replace SWAP with NOP.
+    ///
+    /// Safety: abort if the two stores write the same variable, or if any
+    /// intervening swappable stores to one of the same variables. Do not
+    /// cross line-number boundaries (user-visible name bindings).
+    fn apply_static_swaps(&mut self) {
+        /// Instruction classes that are safe to reorder around SWAP.
+        fn is_swappable(instr: &AnyInstruction) -> bool {
+            matches!(
+                instr,
+                AnyInstruction::Real(Instruction::StoreFast { .. } | Instruction::PopTop)
+            )
+        }
+
+        /// Variable index that a STORE_FAST writes to, or None.
+        fn stores_to(info: &InstructionInfo) -> Option<u32> {
+            match info.instr {
+                AnyInstruction::Real(Instruction::StoreFast { .. }) => Some(u32::from(info.arg)),
+                _ => None,
+            }
+        }
+
+        /// Next swappable index after `i` in `instrs`, skipping NOPs.
+        /// Returns None if a non-NOP non-swappable instruction blocks, or
+        /// if `lineno >= 0` and a different lineno is encountered.
+        fn next_swappable(instrs: &[InstructionInfo], mut i: usize, lineno: i32) -> Option<usize> {
+            loop {
+                i += 1;
+                if i >= instrs.len() {
+                    return None;
+                }
+                let info = &instrs[i];
+                let info_lineno = info.location.line.get() as i32;
+                if lineno >= 0 && info_lineno > 0 && info_lineno != lineno {
+                    return None;
+                }
+                if matches!(info.instr, AnyInstruction::Real(Instruction::Nop)) {
+                    continue;
+                }
+                if is_swappable(&info.instr) {
+                    return Some(i);
+                }
+                return None;
+            }
+        }
+
+        for block in &mut self.blocks {
+            let instrs = &mut block.instructions;
+            let len = instrs.len();
+            // Walk forward; for each SWAP attempt elimination.
+            let mut i = 0;
+            while i < len {
+                let swap_arg = match instrs[i].instr {
+                    AnyInstruction::Real(Instruction::Swap { .. }) => u32::from(instrs[i].arg),
+                    _ => {
+                        i += 1;
+                        continue;
+                    }
+                };
+                // SWAP oparg < 2 is a no-op; the compiler should not emit
+                // these, but be defensive.
+                if swap_arg < 2 {
+                    i += 1;
+                    continue;
+                }
+                // Find first swappable after SWAP (lineno = -1 initially).
+                let Some(j) = next_swappable(instrs, i, -1) else {
+                    i += 1;
+                    continue;
+                };
+                let lineno = instrs[j].location.line.get() as i32;
+                // Walk (swap_arg - 1) more swappable instructions, with
+                // lineno constraint.
+                let mut k = j;
+                let mut ok = true;
+                for _ in 1..swap_arg {
+                    match next_swappable(instrs, k, lineno) {
+                        Some(next) => k = next,
+                        None => {
+                            ok = false;
+                            break;
+                        }
+                    }
+                }
+                if !ok {
+                    i += 1;
+                    continue;
+                }
+                // Conflict check: if either j or k is a STORE_FAST, no
+                // intervening store may target the same variable, and
+                // they must not target the same variable themselves.
+                let store_j = stores_to(&instrs[j]);
+                let store_k = stores_to(&instrs[k]);
+                if store_j.is_some() || store_k.is_some() {
+                    if store_j == store_k {
+                        i += 1;
+                        continue;
+                    }
+                    let conflict = instrs[(j + 1)..k].iter().any(|info| {
+                        if let Some(store_idx) = stores_to(info) {
+                            Some(store_idx) == store_j || Some(store_idx) == store_k
+                        } else {
+                            false
+                        }
+                    });
+                    if conflict {
+                        i += 1;
+                        continue;
+                    }
+                }
+                // Safe to reorder. SWAP -> NOP, swap j and k.
+                instrs[i].instr = AnyInstruction::Real(Instruction::Nop);
+                instrs[i].arg = OpArg::new(0);
+                instrs.swap(j, k);
+                i += 1;
+            }
+        }
+    }
+
+    /// Eliminate dead stores in STORE_FAST sequences (apply_static_swaps).
+    ///
+    /// In sequences of consecutive STORE_FAST instructions (from tuple unpacking),
+    /// if the same variable is stored to more than once, only the first store
+    /// (which gets TOS — the rightmost value) matters. Later stores to the
+    /// same variable are dead and replaced with POP_TOP.
+    /// Simplified apply_static_swaps (CPython flowgraph.c):
+    /// In STORE_FAST sequences that follow UNPACK_SEQUENCE / UNPACK_EX,
+    /// replace duplicate stores to the same variable with POP_TOP.
+    /// UNPACK pushes values so stores execute left-to-right; the LAST
+    /// store to a variable carries the final value, earlier ones are dead.
+    fn eliminate_dead_stores(&mut self) {
+        for block in &mut self.blocks {
+            let instrs = &mut block.instructions;
+            let len = instrs.len();
+            let mut i = 0;
+            while i < len {
+                // Look for UNPACK_SEQUENCE or UNPACK_EX
+                let is_unpack = matches!(
+                    instrs[i].instr,
+                    AnyInstruction::Real(
+                        Instruction::UnpackSequence { .. } | Instruction::UnpackEx { .. }
+                    )
+                );
+                if !is_unpack {
+                    i += 1;
+                    continue;
+                }
+                // Scan the run of STORE_FAST right after the unpack
+                let run_start = i + 1;
+                let mut run_end = run_start;
+                while run_end < len
+                    && matches!(
+                        instrs[run_end].instr,
+                        AnyInstruction::Real(Instruction::StoreFast { .. })
+                    )
+                {
+                    run_end += 1;
+                }
+                if run_end - run_start >= 2 {
+                    // Pass 1: find the LAST occurrence of each variable
+                    let mut last_occurrence = std::collections::HashMap::new();
+                    for (j, instr) in instrs[run_start..run_end].iter().enumerate() {
+                        last_occurrence.insert(u32::from(instr.arg), j);
+                    }
+                    // Pass 2: non-last stores to the same variable are dead
+                    for (j, instr) in instrs[run_start..run_end].iter_mut().enumerate() {
+                        let idx = u32::from(instr.arg);
+                        if last_occurrence[&idx] != j {
+                            instr.instr = AnyInstruction::Real(Instruction::PopTop);
+                            instr.arg = OpArg::new(0);
+                        }
+                    }
+                }
+                i = run_end.max(i + 1);
+            }
+        }
+    }
+
    /// Peephole optimization: combine consecutive instructions into super-instructions
    fn peephole_optimize(&mut self) {
        for block in &mut self.blocks {
@@ -1346,34 +1578,49 @@ impl CodeInfo {
                    match (curr_instr, next_instr) {
                        // LoadFast + LoadFast -> LoadFastLoadFast (if both indices < 16)
                        (Instruction::LoadFast { .. }, Instruction::LoadFast { .. }) => {
-                            let idx1 = u32::from(curr.arg);
-                            let idx2 = u32::from(next.arg);
-                            if idx1 < 16 && idx2 < 16 {
-                                let packed = (idx1 << 4) | idx2;
-                                Some((
-                                    Instruction::LoadFastLoadFast {
-                                        var_nums: Arg::marker(),
-                                    },
-                                    OpArg::new(packed),
-                                ))
-                            } else {
+                            let line1 = curr.location.line.get() as i32;
+                            let line2 = next.location.line.get() as i32;
+                            if line1 > 0 && line2 > 0 && line1 != line2 {
                                None
+                            } else {
+                                let idx1 = u32::from(curr.arg);
+                                let idx2 = u32::from(next.arg);
+                                if idx1 < 16 && idx2 < 16 {
+                                    let packed = (idx1 << 4) | idx2;
+                                    Some((
+                                        Instruction::LoadFastLoadFast {
+                                            var_nums: Arg::marker(),
+                                        },
+                                        OpArg::new(packed),
+                                    ))
+                                } else {
+                                    None
+                                }
                            }
                        }
                        // StoreFast + StoreFast -> StoreFastStoreFast (if both indices < 16)
+                        // Dead store elimination: if both store to the same variable,
+                        // the first store is dead. Replace it with POP_TOP (like
+                        // apply_static_swaps in CPython's flowgraph.c).
                        (Instruction::StoreFast { .. }, Instruction::StoreFast { .. }) => {
-                            let idx1 = u32::from(curr.arg);
-                            let idx2 = u32::from(next.arg);
-                            if idx1 < 16 && idx2 < 16 {
-                                let packed = (idx1 << 4) | idx2;
-                                Some((
-                                    Instruction::StoreFastStoreFast {
-                                        var_nums: Arg::marker(),
-                                    },
-                                    OpArg::new(packed),
-                                ))
-                            } else {
+                            let line1 = curr.location.line.get() as i32;
+                            let line2 = next.location.line.get() as i32;
+                            if line1 > 0 && line2 > 0 && line1 != line2 {
                                None
+                            } else {
+                                let idx1 = u32::from(curr.arg);
+                                let idx2 = u32::from(next.arg);
+                                if idx1 < 16 && idx2 < 16 {
+                                    let packed = (idx1 << 4) | idx2;
+                                    Some((
+                                        Instruction::StoreFastStoreFast {
+                                            var_nums: Arg::marker(),
+                                        },
+                                        OpArg::new(packed),
+                                    ))
+                                } else {
+                                    None
+                                }
                            }
                        }
                        // Note: StoreFast + LoadFast → StoreFastLoadFast is done in a
@@ -1699,6 +1946,183 @@ impl CodeInfo {
        }
    }

+    fn add_checks_for_loads_of_uninitialized_variables(&mut self) {
+        let nlocals = self.metadata.varnames.len();
+        if nlocals == 0 {
+            return;
+        }
+
+        let mut nparams = self.metadata.argcount as usize + self.metadata.kwonlyargcount as usize;
+        if self.flags.contains(CodeFlags::VARARGS) {
+            nparams += 1;
+        }
+        if self.flags.contains(CodeFlags::VARKEYWORDS) {
+            nparams += 1;
+        }
+        nparams = nparams.min(nlocals);
+
+        let mut in_masks: Vec<Option<Vec<bool>>> = vec![None; self.blocks.len()];
+        let mut start_mask = vec![false; nlocals];
+        for slot in start_mask.iter_mut().skip(nparams) {
+            *slot = true;
+        }
+        in_masks[0] = Some(start_mask);
+
+        let mut worklist = vec![BlockIdx(0)];
+        while let Some(block_idx) = worklist.pop() {
+            let idx = block_idx.idx();
+            let Some(mut unsafe_mask) = in_masks[idx].clone() else {
+                continue;
+            };
+
+            let old_instructions = self.blocks[idx].instructions.clone();
+            let mut new_instructions = Vec::with_capacity(old_instructions.len());
+            let mut changed = false;
+
+            for info in old_instructions {
+                let mut info = info;
+                if let Some(eh) = info.except_handler {
+                    let target = next_nonempty_block(&self.blocks, eh.handler_block);
+                    if target != BlockIdx::NULL
+                        && merge_unsafe_mask(&mut in_masks[target.idx()], &unsafe_mask)
+                    {
+                        worklist.push(target);
+                    }
+                }
+                match info.instr.real() {
+                    Some(Instruction::DeleteFast { var_num }) => {
+                        let var_idx = usize::from(var_num.get(info.arg));
+                        if var_idx < nlocals {
+                            unsafe_mask[var_idx] = true;
+                        }
+                        new_instructions.push(info);
+                    }
+                    Some(Instruction::LoadFastAndClear { var_num }) => {
+                        let var_idx = usize::from(var_num.get(info.arg));
+                        if var_idx < nlocals {
+                            unsafe_mask[var_idx] = true;
+                        }
+                        new_instructions.push(info);
+                    }
+                    Some(Instruction::StoreFast { var_num }) => {
+                        let var_idx = usize::from(var_num.get(info.arg));
+                        if var_idx < nlocals {
+                            unsafe_mask[var_idx] = false;
+                        }
+                        new_instructions.push(info);
+                    }
+                    Some(Instruction::StoreFastStoreFast { var_nums }) => {
+                        let packed = var_nums.get(info.arg);
+                        let (idx1, idx2) = packed.indexes();
+                        let idx1 = usize::from(idx1);
+                        let idx2 = usize::from(idx2);
+                        if idx1 < nlocals {
+                            unsafe_mask[idx1] = false;
+                        }
+                        if idx2 < nlocals {
+                            unsafe_mask[idx2] = false;
+                        }
+                        new_instructions.push(info);
+                    }
+                    Some(Instruction::LoadFastCheck { var_num }) => {
+                        let var_idx = usize::from(var_num.get(info.arg));
+                        if var_idx < nlocals {
+                            unsafe_mask[var_idx] = false;
+                        }
+                        new_instructions.push(info);
+                    }
+                    Some(Instruction::LoadFast { var_num }) => {
+                        let var_idx = usize::from(var_num.get(info.arg));
+                        if var_idx < nlocals && unsafe_mask[var_idx] {
+                            info.instr = Instruction::LoadFastCheck {
+                                var_num: Arg::marker(),
+                            }
+                            .into();
+                            changed = true;
+                        }
+                        if var_idx < nlocals {
+                            unsafe_mask[var_idx] = false;
+                        }
+                        new_instructions.push(info);
+                    }
+                    Some(Instruction::LoadFastLoadFast { var_nums }) => {
+                        let packed = var_nums.get(info.arg);
+                        let (idx1, idx2) = packed.indexes();
+                        let idx1 = usize::from(idx1);
+                        let idx2 = usize::from(idx2);
+                        let needs_check_1 = idx1 < nlocals && unsafe_mask[idx1];
+                        let needs_check_2 = idx2 < nlocals && unsafe_mask[idx2];
+                        if needs_check_1 || needs_check_2 {
+                            let mut first = info;
+                            first.instr = if needs_check_1 {
+                                Instruction::LoadFastCheck {
+                                    var_num: Arg::marker(),
+                                }
+                            } else {
+                                Instruction::LoadFast {
+                                    var_num: Arg::marker(),
+                                }
+                            }
+                            .into();
+                            first.arg = OpArg::new(idx1 as u32);
+
+                            let mut second = info;
+                            second.instr = if needs_check_2 {
+                                Instruction::LoadFastCheck {
+                                    var_num: Arg::marker(),
+                                }
+                            } else {
+                                Instruction::LoadFast {
+                                    var_num: Arg::marker(),
+                                }
+                            }
+                            .into();
+                            second.arg = OpArg::new(idx2 as u32);
+
+                            new_instructions.push(first);
+                            new_instructions.push(second);
+                            changed = true;
+                        } else {
+                            new_instructions.push(info);
+                        }
+                        if idx1 < nlocals {
+                            unsafe_mask[idx1] = false;
+                        }
+                        if idx2 < nlocals {
+                            unsafe_mask[idx2] = false;
+                        }
+                    }
+                    _ => new_instructions.push(info),
+                }
+            }
+
+            if changed {
+                self.blocks[idx].instructions = new_instructions;
+            }
+
+            let block = &self.blocks[idx];
+            if block_has_fallthrough(block) {
+                let next = next_nonempty_block(&self.blocks, block.next);
+                if next != BlockIdx::NULL
+                    && merge_unsafe_mask(&mut in_masks[next.idx()], &unsafe_mask)
+                {
+                    worklist.push(next);
+                }
+            }
+
+            if let Some(last) = block.instructions.last()
+                && is_jump_instruction(last)
+            {
+                let target = next_nonempty_block(&self.blocks, last.target);
+                if target != BlockIdx::NULL
+                    && merge_unsafe_mask(&mut in_masks[target.idx()], &unsafe_mask)
+                {
+                    worklist.push(target);
+                }
+            }
+        }
+    }
+
    fn max_stackdepth(&mut self) -> crate::InternalResult<u32> {
        let mut maxdepth = 0u32;
        let mut stack = Vec::with_capacity(self.blocks.len());
@@ -2164,16 +2588,6 @@ fn push_cold_blocks_to_end(blocks: &mut Vec<Block>) {

    for (cold_idx, warm_next) in fixups {
        let jump_block_idx = BlockIdx(blocks.len() as u32);
-        let loc = blocks[cold_idx.idx()]
-            .instructions
-            .last()
-            .map(|i| i.location)
-            .unwrap_or_default();
-        let end_loc = blocks[cold_idx.idx()]
-            .instructions
-            .last()
-            .map(|i| i.end_location)
-            .unwrap_or_default();
        let mut jump_block = Block {
            cold: true,
            ..Block::default()
@@ -2185,10 +2599,10 @@ fn push_cold_blocks_to_end(blocks: &mut Vec<Block>) {
            .into(),
            arg: OpArg::new(0),
            target: warm_next,
-            location: loc,
-            end_location: end_loc,
+            location: SourceLocation::default(),
+            end_location: SourceLocation::default(),
            except_handler: None,
-            lineno_override: None,
+            lineno_override: Some(-1),
            cache_entries: 0,
        });
        jump_block.next = blocks[cold_idx.idx()].next;
@@ -2623,6 +3037,126 @@ fn inline_small_or_no_lineno_blocks(blocks: &mut [Block]) {
    }
 }

+fn remove_redundant_nops_in_blocks(blocks: &mut [Block]) -> usize {
+    let mut changes = 0;
+    let mut block_order = Vec::new();
+    let mut current = BlockIdx(0);
+    while current != BlockIdx::NULL {
+        block_order.push(current);
+        current = blocks[current.idx()].next;
+    }
+
+    for block_idx in block_order {
+        let bi = block_idx.idx();
+        let mut src_instructions = core::mem::take(&mut blocks[bi].instructions);
+        let mut kept = Vec::with_capacity(src_instructions.len());
+        let mut prev_lineno = -1i32;
+
+        for src in 0..src_instructions.len() {
+            let instr = src_instructions[src];
+            let lineno = instruction_lineno(&instr);
+            let mut remove = false;
+
+            if matches!(instr.instr.real(), Some(Instruction::Nop)) {
+                if lineno < 0 || prev_lineno == lineno {
+                    remove = true;
+                } else if src < src_instructions.len() - 1 {
+                    let next_lineno = instruction_lineno(&src_instructions[src + 1]);
+                    if next_lineno == lineno {
+                        remove = true;
+                    } else if next_lineno < 0 {
+                        src_instructions[src + 1].lineno_override = Some(lineno);
+                        remove = true;
+                    }
+                } else {
+                    let next = next_nonempty_block(blocks, blocks[bi].next);
+                    if next != BlockIdx::NULL {
+                        let mut next_lineno = None;
+                        for next_instr in &blocks[next.idx()].instructions {
+                            let line = instruction_lineno(next_instr);
+                            if matches!(next_instr.instr.real(), Some(Instruction::Nop)) && line < 0
+                            {
+                                continue;
+                            }
+                            next_lineno = Some(line);
+                            break;
+                        }
+                        if next_lineno.is_some_and(|line| line == lineno) {
+                            remove = true;
+                        }
+                    }
+                }
+            }
+
+            if remove {
+                changes += 1;
+            } else {
+                kept.push(instr);
+                prev_lineno = lineno;
+            }
+        }
+
+        blocks[bi].instructions = kept;
+    }
+
+    changes
+}
+
+fn remove_redundant_jumps_in_blocks(blocks: &mut [Block]) -> usize {
+    let mut changes = 0;
+    let mut current = BlockIdx(0);
+    while current != BlockIdx::NULL {
+        let idx = current.idx();
+        let next = next_nonempty_block(blocks, blocks[idx].next);
+        let jump_target = blocks[idx]
+            .instructions
+            .last()
+            .filter(|ins| ins.instr.is_unconditional_jump() && ins.target != BlockIdx::NULL)
+            .map(|ins| ins.target);
+        if next != BlockIdx::NULL
+            && let Some(target) = jump_target
+            && next_nonempty_block(blocks, target) == next
+            && let Some(last_instr) = blocks[idx].instructions.last_mut()
+        {
+            last_instr.instr = Instruction::Nop.into();
+            last_instr.arg = OpArg::new(0);
+            last_instr.target = BlockIdx::NULL;
+            changes += 1;
+        }
+        current = blocks[idx].next;
+    }
+    changes
+}
+
+fn remove_redundant_nops_and_jumps(blocks: &mut [Block]) {
+    loop {
+        let removed_nops = remove_redundant_nops_in_blocks(blocks);
+        let removed_jumps = remove_redundant_jumps_in_blocks(blocks);
+        if removed_nops + removed_jumps == 0 {
+            break;
+        }
+    }
+}
+
+fn merge_unsafe_mask(slot: &mut Option<Vec<bool>>, incoming: &[bool]) -> bool {
+    match slot {
+        Some(existing) => {
+            let mut changed = false;
+            for (dst, src) in existing.iter_mut().zip(incoming.iter().copied()) {
+                if src && !*dst {
+                    *dst = true;
+                    changed = true;
+                }
+            }
+            changed
+        }
+        None => {
+            *slot = Some(incoming.to_vec());
+            true
+        }
+    }
+}
+
 /// Follow chain of empty blocks to find first non-empty block.
 fn next_nonempty_block(blocks: &[Block], mut idx: BlockIdx) -> BlockIdx {
    while idx != BlockIdx::NULL
--- a/crates/codegen/src/snapshots/rustpython_codegencompiletests__nested_double_async_with.snap
+++ b/crates/codegen/src/snapshots/rustpython_codegencompiletests__nested_double_async_with.snap
@@ -1,5 +1,6 @@
 ---
 source: crates/codegen/src/compile.rs
+assertion_line: 10960
 expression: "compile_exec(\"\\\nasync def test():\n    for stop_exc in (StopIteration('spam'), StopAsyncIteration('ham')):\n        with self.subTest(type=type(stop_exc)):\n            try:\n                async with egg():\n                    raise stop_exc\n            except Exception as ex:\n                self.assertIs(ex, stop_exc)\n            else:\n                self.fail(f'{stop_exc} was suppressed')\n\")"
 ---
  1           0 RESUME               (0)
@@ -199,8 +200,8 @@ expression: "compile_exec(\"\\\nasync def test():\n    for stop_exc in (StopIter
                183 CACHE
                184 CACHE
                185 CACHE
-                186 CACHE
-            >>  187 CACHE
+            >>  186 CACHE
+                187 CACHE
                188 CACHE
                189 CACHE
                190 CACHE
@@ -214,41 +215,40 @@ expression: "compile_exec(\"\\\nasync def test():\n    for stop_exc in (StopIter
                198 CACHE
                199 CACHE
                200 POP_TOP
-                201 NOP

-  3             202 LOAD_CONST           (None)
-                203 LOAD_CONST           (None)
-            >>  204 LOAD_CONST           (None)
-                205 CALL                 (3)
+  3             201 LOAD_CONST           (None)
+                202 LOAD_CONST           (None)
+            >>  203 LOAD_CONST           (None)
+                204 CALL                 (3)
+                205 CACHE
                206 CACHE
                207 CACHE
-                208 CACHE
-                209 POP_TOP
-                210 JUMP_BACKWARD        (187)
-                211 CACHE
-                212 PUSH_EXC_INFO
-                213 WITH_EXCEPT_START
-                214 TO_BOOL
+                208 POP_TOP
+                209 JUMP_BACKWARD        (186)
+                210 CACHE
+                211 PUSH_EXC_INFO
+                212 WITH_EXCEPT_START
+                213 TO_BOOL
+                214 CACHE
                215 CACHE
                216 CACHE
-                217 CACHE
-                218 POP_JUMP_IF_TRUE     (2)
-                219 CACHE
-                220 NOT_TAKEN
-                221 RERAISE              (2)
-                222 POP_TOP
-                223 POP_EXCEPT
+                217 POP_JUMP_IF_TRUE     (2)
+                218 CACHE
+                219 NOT_TAKEN
+                220 RERAISE              (2)
+                221 POP_TOP
+                222 POP_EXCEPT
+                223 POP_TOP
                224 POP_TOP
                225 POP_TOP
-                226 POP_TOP
-                227 JUMP_BACKWARD        (204)
-                228 CACHE
-                229 COPY                 (3)
-                230 POP_EXCEPT
-                231 RERAISE              (1)
+                226 JUMP_BACKWARD        (203)
+                227 CACHE
+                228 COPY                 (3)
+                229 POP_EXCEPT
+                230 RERAISE              (1)

-  2             232 CALL_INTRINSIC_1     (StopIterationError)
-                233 RERAISE              (1)
+  2             231 CALL_INTRINSIC_1     (StopIterationError)
+                232 RERAISE              (1)

              2 MAKE_FUNCTION
              3 STORE_NAME           (0, test)
--- a/crates/vm/src/frame.rs
+++ b/crates/vm/src/frame.rs
@@ -3398,23 +3398,20 @@ impl ExecutingFrame<'_> {
                self.push_value(vm.ctx.new_bool(result).into());
                Ok(None)
            }
-            Instruction::Reraise { depth } => {
+            Instruction::Reraise { depth: _ } => {
                // inst(RERAISE, (values[oparg], exc -- values[oparg]))
                //
-                // Stack layout: [values..., exc] where len(values) == oparg
-                // RERAISE pops exc and oparg additional values from the stack.
-                // values[0] is lasti used to set frame->instr_ptr for traceback.
-                // We skip the lasti update since RustPython's traceback is already correct.
-                let depth_val = depth.get(arg) as usize;
-
-                // Pop exception from TOS
+                // RERAISE pops only `exc` from TOS. The `values` below it
+                // (lasti and optional prev_exc) stay on the stack — the
+                // outer exception handler's exception-table unwind will
+                // pop them down to its configured stack depth.
+                //
+                // `oparg` encodes how many values are preserved below exc
+                // (1 for simple reraise, 2 for with-block reraise where
+                // values[0]=lasti). Runtime-wise we don't need oparg since
+                // the exception table handles stack layout.
                let exc = self.pop_value();

-                // Pop the depth values (lasti and possibly other items like prev_exc)
-                for _ in 0..depth_val {
-                    self.pop_value();
-                }
-
                if let Some(exc_ref) = exc.downcast_ref::<PyBaseException>() {
                    Err(exc_ref.to_owned())
                } else {
@@ -6769,7 +6766,6 @@ impl ExecutingFrame<'_> {
            }
            bytecode::RaiseKind::BareRaise => {
                // RAISE_VARARGS 0: bare `raise` gets exception from VM state
-                // This is the current exception set by PUSH_EXC_INFO
                vm.topmost_exception()
                    .ok_or_else(|| vm.new_runtime_error("No active exception to reraise"))?
            }
--- a/crates/vm/src/vm/mod.rs
+++ b/crates/vm/src/vm/mod.rs
@@ -108,7 +108,7 @@ pub struct VirtualMachine {
 }

 /// Non-owning frame pointer for the frames stack.
-/// The pointed-to frame is kept alive by the caller of with_frame_exc/resume_gen_frame.
+/// The pointed-to frame is kept alive by the caller of with_frame/resume_gen_frame.
 #[derive(Copy, Clone)]
 pub struct FramePtr(NonNull<Py<Frame>>);

@@ -124,11 +124,21 @@ impl FramePtr {
 // FrameRef is alive on the call stack. The Vec is always empty when the VM moves between threads.
 unsafe impl Send for FramePtr {}

-#[derive(Debug, Default)]
+#[derive(Debug)]
 struct ExceptionStack {
+    /// Linked list of handled-exception slots (`_PyErr_StackItem` chain).
+    /// Bottom element is the thread's base slot; generator/coroutine resume
+    /// pushes an additional slot.  Normal frame calls do **not** push/pop.
    stack: Vec<Option<PyBaseExceptionRef>>,
 }

+impl Default for ExceptionStack {
+    fn default() -> Self {
+        // Thread's base `_PyErr_StackItem` – always present.
+        Self { stack: vec![None] }
+    }
+}
+
 /// Stop-the-world state for fork safety. Before `fork()`, the requester
 /// stops all other Python threads so they are not holding internal locks.
 #[cfg(all(unix, feature = "threading"))]
@@ -1554,17 +1564,7 @@ impl VirtualMachine {
        frame: FrameRef,
        f: F,
    ) -> PyResult<R> {
-        self.with_frame_impl(frame, None, true, f)
-    }
-
-    /// Like `with_frame` but allows specifying the initial exception state.
-    pub fn with_frame_exc<R, F: FnOnce(FrameRef) -> PyResult<R>>(
-        &self,
-        frame: FrameRef,
-        exc: Option<PyBaseExceptionRef>,
-        f: F,
-    ) -> PyResult<R> {
-        self.with_frame_impl(frame, exc, true, f)
+        self.with_frame_impl(frame, true, f)
    }

    pub(crate) fn with_frame_untraced<R, F: FnOnce(FrameRef) -> PyResult<R>>(
@@ -1572,13 +1572,12 @@ impl VirtualMachine {
        frame: FrameRef,
        f: F,
    ) -> PyResult<R> {
-        self.with_frame_impl(frame, None, false, f)
+        self.with_frame_impl(frame, false, f)
    }

    fn with_frame_impl<R, F: FnOnce(FrameRef) -> PyResult<R>>(
        &self,
        frame: FrameRef,
-        exc: Option<PyBaseExceptionRef>,
        traced: bool,
        f: F,
    ) -> PyResult<R> {
@@ -1597,19 +1596,22 @@ impl VirtualMachine {
                old_frame as *mut Frame,
                core::sync::atomic::Ordering::Relaxed,
            );
-            // Push exception context for frame isolation.
-            // For normal calls: None (clean slate).
-            // For generators: the saved exception from last yield.
-            self.push_exception(exc);
+            // Normal frame calls share the caller's exc_info slot so that
+            // callees can see the caller's handled exception via sys.exc_info().
+            // Save the current value to restore on exit — this prevents
+            // exc_info pollution from frames with unbalanced
+            // PUSH_EXC_INFO/POP_EXCEPT (e.g., exception escaping an except block
+            // whose cleanup entry is missing from the exception table).
+            let saved_exc = self.current_exception();
            let old_owner = frame.owner.swap(
                crate::frame::FrameOwner::Thread as i8,
                core::sync::atomic::Ordering::AcqRel,
            );

-            // Ensure cleanup on panic: restore owner, pop exception, frame chain, and frames Vec.
+            // Ensure cleanup on panic: restore owner, exc_info, frame chain, and frames Vec.
            scopeguard::defer! {
                frame.owner.store(old_owner, core::sync::atomic::Ordering::Release);
-                self.pop_exception();
+                self.set_exception(saved_exc);
                crate::vm::thread::set_current_frame(old_frame);
                self.frames.borrow_mut().pop();
                #[cfg(feature = "threading")]
@@ -1624,9 +1626,9 @@ impl VirtualMachine {
        })
    }

-    /// Lightweight frame execution for generator/coroutine resume.
-    /// Pushes to the thread frame stack and fires trace/profile events,
-    /// but skips the thread exception update for performance.
+    /// Frame execution for generator/coroutine resume.
+    /// Pushes a new exc_info slot (gi_exc_state) onto the chain,
+    /// linking the generator's saved handled-exception.
    pub fn resume_gen_frame<R, F: FnOnce(&Py<Frame>) -> PyResult<R>>(
        &self,
        frame: &FrameRef,
@@ -1649,20 +1651,20 @@ impl VirtualMachine {
            old_frame as *mut Frame,
            core::sync::atomic::Ordering::Relaxed,
        );
-        // Inline exception push without thread exception update
-        self.exceptions.borrow_mut().stack.push(exc);
+        // Push generator's exc_info slot onto the chain
+        // (gi_exc_state.previous_item = tstate->exc_info;
+        //  tstate->exc_info = &gi_exc_state;)
+        self.push_exception(exc);
        let old_owner = frame.owner.swap(
            crate::frame::FrameOwner::Thread as i8,
            core::sync::atomic::Ordering::AcqRel,
        );

-        // Ensure cleanup on panic: restore owner, pop exception, frame chain, frames Vec,
-        // and recursion depth.
+        // Ensure cleanup on panic: restore owner, pop exc_info slot, frame chain,
+        // frames Vec, and recursion depth.
        scopeguard::defer! {
            frame.owner.store(old_owner, core::sync::atomic::Ordering::Release);
-            self.exceptions.borrow_mut().stack
-                .pop()
-                .expect("pop_exception() without nested exc stack");
+            self.pop_exception();
            crate::vm::thread::set_current_frame(old_frame);
            self.frames.borrow_mut().pop();
            #[cfg(feature = "threading")]
@@ -2037,12 +2039,14 @@ impl VirtualMachine {
        }
    }

+    /// Push a new exc_info slot (for generator/coroutine resume).
    pub(crate) fn push_exception(&self, exc: Option<PyBaseExceptionRef>) {
        self.exceptions.borrow_mut().stack.push(exc);
        #[cfg(feature = "threading")]
        thread::update_thread_exception(self.topmost_exception());
    }

+    /// Pop the topmost exc_info slot (generator/coroutine yield/return).
    pub(crate) fn pop_exception(&self) -> Option<PyBaseExceptionRef> {
        let exc = self
            .exceptions
@@ -2059,6 +2063,7 @@ impl VirtualMachine {
        self.exceptions.borrow().stack.last().cloned().flatten()
    }

+    /// Set the current exc_info slot value (PUSH_EXC_INFO / POP_EXCEPT).
    pub(crate) fn set_exception(&self, exc: Option<PyBaseExceptionRef>) {
        // don't be holding the RefCell guard while __del__ is called
        let mut excs = self.exceptions.borrow_mut();
--- a/scripts/compare_bytecode.py
+++ b/scripts/compare_bytecode.py
@@ -0,0 +1,540 @@
+#!/usr/bin/env python3
+"""Compare bytecode between CPython and RustPython.
+
+Compiles all Python files under Lib/ with both interpreters and reports
+differences in the generated bytecode instructions.
+
+Usage:
+    python scripts/compare_bytecode.py
+    python scripts/compare_bytecode.py --detail
+    python scripts/compare_bytecode.py --filter "asyncio/*.py"
+    python scripts/compare_bytecode.py --summary-json report.json
+"""
+
+import argparse
+import fnmatch
+import json
+import os
+import random
+import subprocess
+import sys
+import tempfile
+from collections import defaultdict
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
+DIS_DUMP = os.path.join(SCRIPT_DIR, "dis_dump.py")
+DEFAULT_REPORT = os.path.join(PROJECT_ROOT, "compare_bytecode.report")
+
+
+def find_rustpython():
+    """Locate the RustPython binary, allowing release builds only."""
+    if "RUSTPYTHON" in os.environ:
+        path = os.environ["RUSTPYTHON"]
+        normalized = os.path.normpath(path)
+        debug_fragment = os.path.join("target", "debug", "rustpython")
+        if normalized.endswith(debug_fragment):
+            raise ValueError(
+                "RUSTPYTHON must point to a release binary, not target/debug/rustpython"
+            )
+        return path
+
+    path = os.path.join(PROJECT_ROOT, "target", "release", "rustpython")
+    if os.path.isfile(path) and os.access(path, os.X_OK):
+        return path
+    return None
+
+
+def collect_targets(lib_dir, pattern=None):
+    """Collect Python files to compare, relative to lib_dir."""
+    targets = []
+    for root, dirs, files in os.walk(lib_dir):
+        dirs[:] = sorted(
+            d for d in dirs if d != "__pycache__" and not d.startswith(".")
+        )
+        for fname in sorted(files):
+            if not fname.endswith(".py"):
+                continue
+            fpath = os.path.join(root, fname)
+            relpath = os.path.relpath(fpath, lib_dir)
+            if pattern and not fnmatch.fnmatch(relpath, pattern):
+                continue
+            targets.append((relpath, fpath))
+    return targets
+
+
+def _start_one(interpreter, targets, base_dir):
+    """Start a single dis_dump.py subprocess."""
+    env = os.environ.copy()
+    if interpreter != sys.executable:
+        env["RUSTPYTHONPATH"] = base_dir
+
+    files_file = tempfile.NamedTemporaryFile(
+        mode="w", encoding="utf-8", delete=False, dir=PROJECT_ROOT
+    )
+    try:
+        for _, path in targets:
+            files_file.write(path)
+            files_file.write("\n")
+        files_file.close()
+        cmd = [
+            interpreter,
+            DIS_DUMP,
+            "--base-dir",
+            base_dir,
+            "--files-from",
+            files_file.name,
+            "--progress",
+            "10",
+        ]
+        proc = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=None,  # inherit stderr so progress dots appear on terminal
+            env=env,
+            cwd=PROJECT_ROOT,
+        )
+        return proc, files_file.name
+    except Exception:
+        os.unlink(files_file.name)
+        raise
+
+
+def _finish_one(proc, files_file):
+    """Wait for a single dis_dump.py process and return parsed JSON."""
+    try:
+        stdout = proc.communicate(timeout=600)[0]
+    except subprocess.TimeoutExpired:
+        proc.kill()
+        proc.communicate()
+        print("  Timeout (600s)", file=sys.stderr)
+        os.unlink(files_file)
+        return {}
+    finally:
+        if os.path.exists(files_file):
+            os.unlink(files_file)
+
+    if proc.returncode != 0:
+        print("  Warning: exited with code %d" % proc.returncode, file=sys.stderr)
+
+    content = stdout.decode(errors="replace").strip()
+    if not content:
+        return {}
+    try:
+        return json.loads(content)
+    except json.JSONDecodeError as e:
+        print("  JSON parse error: %s" % e, file=sys.stderr)
+        return {}
+
+
+def start_dump(interpreter, targets, base_dir, num_workers=1):
+    """Start dis_dump.py under the given interpreter, split across workers."""
+    if num_workers <= 1 or len(targets) <= num_workers:
+        proc, ff = _start_one(interpreter, targets, base_dir)
+        return [(proc, ff)]
+
+    chunks = [[] for _ in range(num_workers)]
+    for i, t in enumerate(targets):
+        chunks[i % num_workers].append(t)
+
+    return [_start_one(interpreter, chunk, base_dir) for chunk in chunks if chunk]
+
+
+def finish_dump(procs):
+    """Wait for all dis_dump.py processes and merge results."""
+    merged = {}
+    for proc, files_file in procs:
+        merged.update(_finish_one(proc, files_file))
+    return merged
+
+
+def compare_insts(cp_insts, rp_insts):
+    """Compare two instruction lists. Returns list of (index, cp, rp) diffs."""
+    diffs = []
+    for i in range(max(len(cp_insts), len(rp_insts))):
+        cp = cp_insts[i] if i < len(cp_insts) else None
+        rp = rp_insts[i] if i < len(rp_insts) else None
+        if cp != rp:
+            diffs.append((i, cp, rp))
+    return diffs
+
+
+def compare_code(cp_code, rp_code, path=""):
+    """Recursively compare code objects. Returns [(code_path, diffs)]."""
+    results = []
+    name = cp_code["name"]
+    full = (path + "/" + name) if path else name
+
+    diffs = compare_insts(cp_code.get("insts", []), rp_code.get("insts", []))
+    if diffs:
+        results.append((full, diffs))
+
+    cp_ch = cp_code.get("children", [])
+    rp_ch = rp_code.get("children", [])
+
+    cp_by_name = defaultdict(list)
+    rp_by_name = defaultdict(list)
+    for c in cp_ch:
+        cp_by_name[c["name"]].append(c)
+    for c in rp_ch:
+        rp_by_name[c["name"]].append(c)
+
+    all_names = list(dict.fromkeys(c["name"] for c in cp_ch))
+    for c in rp_ch:
+        if c["name"] not in cp_by_name:
+            all_names.append(c["name"])
+
+    for name in all_names:
+        cp_list = cp_by_name.get(name, [])
+        rp_list = rp_by_name.get(name, [])
+        for i in range(max(len(cp_list), len(rp_list))):
+            if i < len(cp_list) and i < len(rp_list):
+                results.extend(compare_code(cp_list[i], rp_list[i], full))
+            elif i < len(cp_list):
+                results.append((full + "/" + name, [(-1, "extra in CPython", None)]))
+            else:
+                results.append((full + "/" + name, [(-1, None, "extra in RustPython")]))
+
+    return results
+
+
+def compare_code_summary(cp_code, rp_code):
+    """Recursively compare code objects and return summary counts."""
+    diff_code_objects = 0
+    diff_instructions = compare_insts_count(
+        cp_code.get("insts", []), rp_code.get("insts", [])
+    )
+    if diff_instructions:
+        diff_code_objects += 1
+
+    cp_ch = cp_code.get("children", [])
+    rp_ch = rp_code.get("children", [])
+    cp_by_name = defaultdict(list)
+    rp_by_name = defaultdict(list)
+    for child in cp_ch:
+        cp_by_name[child["name"]].append(child)
+    for child in rp_ch:
+        rp_by_name[child["name"]].append(child)
+
+    all_names = list(dict.fromkeys(child["name"] for child in cp_ch))
+    for child in rp_ch:
+        if child["name"] not in cp_by_name:
+            all_names.append(child["name"])
+
+    for name in all_names:
+        cp_list = cp_by_name.get(name, [])
+        rp_list = rp_by_name.get(name, [])
+        for i in range(max(len(cp_list), len(rp_list))):
+            if i < len(cp_list) and i < len(rp_list):
+                child_objects, child_insts = compare_code_summary(
+                    cp_list[i], rp_list[i]
+                )
+                diff_code_objects += child_objects
+                diff_instructions += child_insts
+            else:
+                diff_code_objects += 1
+                diff_instructions += 1
+
+    return diff_code_objects, diff_instructions
+
+
+def compare_insts_count(cp_insts, rp_insts):
+    """Count mismatched instruction slots without storing the full diff."""
+    diff_count = 0
+    for i in range(max(len(cp_insts), len(rp_insts))):
+        cp = cp_insts[i] if i < len(cp_insts) else None
+        rp = rp_insts[i] if i < len(rp_insts) else None
+        if cp != rp:
+            diff_count += 1
+    return diff_count
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Compare CPython/RustPython bytecode")
+    parser.add_argument(
+        "--detail", action="store_true", help="Show per-file instruction differences"
+    )
+    parser.add_argument("--filter", default=None, help="Glob pattern to filter files")
+    parser.add_argument(
+        "--max-diffs",
+        type=int,
+        default=5,
+        help="Max diffs shown per code object (default: 5)",
+    )
+    parser.add_argument(
+        "--summary-json", default=None, help="Write summary as JSON to file"
+    )
+    parser.add_argument(
+        "--sample",
+        type=int,
+        default=None,
+        help="Compare a random sample of N matching files",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=None,
+        help="Random seed used with --sample",
+    )
+    parser.add_argument(
+        "--list-limit",
+        type=int,
+        default=10,
+        help="Max differing files to print in non-detail mode (default: 10)",
+    )
+    parser.add_argument(
+        "--lib-dir",
+        default=os.path.join(PROJECT_ROOT, "Lib"),
+        help="Library directory to compare",
+    )
+    parser.add_argument(
+        "-j",
+        "--jobs",
+        type=int,
+        default=None,
+        help="Number of parallel workers per interpreter (default: cpu_count)",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        default=DEFAULT_REPORT,
+        help="Report output file (default: compare_bytecode.report)",
+    )
+    args = parser.parse_args()
+
+    try:
+        rp_bin = find_rustpython()
+    except ValueError as exc:
+        print("Error: %s" % exc, file=sys.stderr)
+        sys.exit(1)
+    if not rp_bin:
+        print("Error: RustPython binary not found.", file=sys.stderr)
+        print("  Build with: cargo build --release", file=sys.stderr)
+        print("  Or set RUSTPYTHON=/path/to/binary", file=sys.stderr)
+        sys.exit(1)
+    if not os.path.isfile(DIS_DUMP):
+        print("Error: disassembler helper not found: %s" % DIS_DUMP, file=sys.stderr)
+        print(
+            "  Expected scripts/dis_dump.py from origin/bytecode-parity",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    targets = collect_targets(args.lib_dir, args.filter)
+    sample_seed = None
+    if args.sample is not None:
+        if args.sample < 1:
+            print("Error: --sample must be >= 1", file=sys.stderr)
+            sys.exit(1)
+        sample_seed = (
+            args.seed
+            if args.seed is not None
+            else random.SystemRandom().randrange(2**32)
+        )
+        rng = random.Random(sample_seed)
+        sample_size = min(args.sample, len(targets))
+        targets = sorted(rng.sample(targets, sample_size), key=lambda item: item[0])
+    if not targets:
+        print("Error: no Python files matched", file=sys.stderr)
+        sys.exit(1)
+
+    report_path = args.output
+    log = lambda *a, **kw: print(*a, file=sys.stderr, **kw)
+
+    log("Report: %s" % os.path.relpath(report_path, PROJECT_ROOT))
+    log("Targets: %d file(s)" % len(targets))
+    num_workers = args.jobs if args.jobs else os.cpu_count() or 4
+    log("Workers: %d per interpreter" % num_workers)
+    sys.stderr.write("Dumping bytecode ")
+    sys.stderr.flush()
+
+    cp_procs = start_dump(sys.executable, targets, args.lib_dir, num_workers)
+    rp_procs = start_dump(rp_bin, targets, args.lib_dir, num_workers)
+    cp_data = finish_dump(cp_procs)
+    rp_data = finish_dump(rp_procs)
+    sys.stderr.write("\n")
+    if not cp_data:
+        log("Error: CPython dump produced no data")
+        sys.exit(1)
+
+    # Phase 2: Compare
+    all_files = sorted(set(cp_data) | set(rp_data))
+
+    match = 0
+    differ = 0
+    cp_err = 0
+    rp_err = 0
+    both_err = 0
+    rp_miss = 0
+
+    diff_files = []
+    rp_error_files = []
+    diff_summaries = []
+    need_detailed_diffs = args.detail
+
+    for fp in all_files:
+        cp = cp_data.get(fp)
+        rp = rp_data.get(fp)
+
+        if rp is None:
+            rp_miss += 1
+            continue
+
+        cp_ok = cp and cp.get("status") == "ok"
+        rp_ok = rp and rp.get("status") == "ok"
+
+        if not cp_ok and not rp_ok:
+            both_err += 1
+        elif not cp_ok:
+            cp_err += 1
+        elif not rp_ok:
+            rp_err += 1
+            rp_error_files.append((fp, rp.get("error", "?")))
+        else:
+            if need_detailed_diffs:
+                code_diffs = compare_code(cp["code"], rp["code"])
+                if code_diffs:
+                    differ += 1
+                    diff_files.append((fp, code_diffs))
+                else:
+                    match += 1
+            else:
+                diff_code_objects, diff_instructions = compare_code_summary(
+                    cp["code"], rp["code"]
+                )
+                if diff_code_objects:
+                    differ += 1
+                    diff_summaries.append(
+                        {
+                            "path": fp,
+                            "diff_code_objects": diff_code_objects,
+                            "diff_instructions": diff_instructions,
+                        }
+                    )
+                else:
+                    match += 1
+
+    total = match + differ + cp_err + rp_err + both_err + rp_miss
+
+    def pct(n):
+        return "%.1f%%" % (100.0 * n / total) if total else "0%"
+
+    # Phase 3: Write report to file
+    with open(report_path, "w") as out:
+        p = lambda *a: print(*a, file=out)
+
+        p("CPython:     %s (%s)" % (sys.executable, sys.version.split()[0]))
+        p("RustPython:  %s" % rp_bin)
+        p("Lib:         %s" % args.lib_dir)
+        if sample_seed is not None:
+            p("Sample:      %s file(s), seed=%s" % (len(targets), sample_seed))
+        p()
+        p("=" * 60)
+        p("  Bytecode Comparison Report")
+        p("=" * 60)
+        p()
+        p("  Total files:          %6d" % total)
+        p("  Match:                %6d  (%s)" % (match, pct(match)))
+        p("  Differ:               %6d  (%s)" % (differ, pct(differ)))
+        p("  RustPython error:     %6d  (%s)" % (rp_err, pct(rp_err)))
+        p("  CPython error:        %6d  (%s)" % (cp_err, pct(cp_err)))
+        p("  Both error:           %6d  (%s)" % (both_err, pct(both_err)))
+        if rp_miss:
+            p("  RustPython missing:   %6d  (%s)" % (rp_miss, pct(rp_miss)))
+        p()
+
+        if args.detail:
+            if rp_error_files:
+                p("-" * 60)
+                p("  RustPython Compile Errors")
+                p("-" * 60)
+                for fp, err in rp_error_files[:50]:
+                    p("  %s: %s" % (fp, err))
+                if len(rp_error_files) > 50:
+                    p("  ... and %d more" % (len(rp_error_files) - 50))
+                p()
+
+            if diff_files:
+                p("-" * 60)
+                p("  Bytecode Differences")
+                p("-" * 60)
+                for fp, code_diffs in diff_files:
+                    p()
+                    p("  %s:" % fp)
+                    for code_path, diffs in code_diffs:
+                        shown = min(len(diffs), args.max_diffs)
+                        p("    %s: %d difference(s)" % (code_path, len(diffs)))
+                        for idx, cp_inst, rp_inst in diffs[:shown]:
+                            if idx == -1:
+                                p("      %s" % (cp_inst or rp_inst))
+                            else:
+                                p("      [%3d] CPython:     %s" % (idx, cp_inst))
+                                p("            RustPython:  %s" % rp_inst)
+                        if len(diffs) > shown:
+                            p("      ... and %d more" % (len(diffs) - shown))
+                p()
+        else:
+            list_limit = 0 if args.summary_json else max(args.list_limit, 0)
+            if diff_summaries and list_limit:
+                shown = min(list_limit, len(diff_summaries))
+                total = len(diff_summaries)
+                p(f"Top differing files ({shown} shown of {total}):")
+                top = sorted(
+                    diff_summaries,
+                    key=lambda item: (
+                        item["diff_instructions"],
+                        item["diff_code_objects"],
+                        item["path"],
+                    ),
+                    reverse=True,
+                )[:list_limit]
+                for item in top:
+                    p(
+                        "  %s  (%d code objects, %d instruction diffs)"
+                        % (
+                            item["path"],
+                            item["diff_code_objects"],
+                            item["diff_instructions"],
+                        )
+                    )
+                p()
+                p("Use --detail to see specific instruction differences.")
+                p()
+
+    # Summary JSON output
+    if args.summary_json:
+        summary = {
+            "total": total,
+            "sample": args.sample,
+            "sample_seed": sample_seed,
+            "match": match,
+            "differ": differ,
+            "rp_error": rp_err,
+            "cp_error": cp_err,
+            "both_error": both_err,
+            "rp_missing": rp_miss,
+            "match_pct": round(100.0 * match / total, 2) if total else 0,
+            "diff_files": [fp for fp, _ in diff_files]
+            if need_detailed_diffs
+            else [item["path"] for item in diff_summaries],
+            "top_diff_files": sorted(
+                diff_summaries,
+                key=lambda item: (
+                    item["diff_instructions"],
+                    item["diff_code_objects"],
+                    item["path"],
+                ),
+                reverse=True,
+            )[: min(20, len(diff_summaries))],
+            "rp_error_files": [fp for fp, _ in rp_error_files],
+        }
+        with open(args.summary_json, "w") as f:
+            json.dump(summary, f, indent=2)
+        log("Summary JSON: %s" % args.summary_json)
+
+    log("Done: %d match, %d differ, %d errors" % (match, differ, rp_err))
+    sys.exit(0 if differ == 0 and rp_err == 0 else 1)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/dis_dump.py
+++ b/scripts/dis_dump.py
@@ -0,0 +1,385 @@
+#!/usr/bin/env python3
+"""Dump normalized bytecode for Python source files as JSON.
+
+Designed to produce comparable output across different Python implementations.
+Normalizes away implementation-specific details (byte offsets, memory addresses)
+while preserving semantic instruction content.
+
+Usage:
+    python dis_dump.py Lib/
+    python dis_dump.py --base-dir Lib path/to/file.py
+"""
+
+import argparse
+import dis
+import json
+import os
+import re
+import sys
+import types
+
+# Non-semantic filler instructions to skip
+SKIP_OPS = frozenset({"CACHE", "PRECALL", "EXTENDED_ARG"})
+
+# Opname normalization: map variant instructions to their base form.
+# These variants differ only in optimization hints, not semantics.
+_OPNAME_NORMALIZE = {
+    "LOAD_FAST_BORROW": "LOAD_FAST",
+    "LOAD_FAST_BORROW_LOAD_FAST_BORROW": "LOAD_FAST_LOAD_FAST",
+    "LOAD_FAST_CHECK": "LOAD_FAST",
+    "JUMP_BACKWARD_NO_INTERRUPT": "JUMP_BACKWARD",
+    "POP_ITER": "POP_TOP",
+    # Superinstruction normalization: these get decomposed in _extract_instructions
+    "STORE_FAST_LOAD_FAST_BORROW": "STORE_FAST_LOAD_FAST",
+}
+
+# Jump instruction names (fallback when hasjrel/hasjabs is incomplete)
+_JUMP_OPNAMES = frozenset(
+    {
+        "JUMP",
+        "JUMP_FORWARD",
+        "JUMP_BACKWARD",
+        "JUMP_BACKWARD_NO_INTERRUPT",
+        "POP_JUMP_IF_TRUE",
+        "POP_JUMP_IF_FALSE",
+        "POP_JUMP_IF_NONE",
+        "POP_JUMP_IF_NOT_NONE",
+        "JUMP_IF_TRUE_OR_POP",
+        "JUMP_IF_FALSE_OR_POP",
+        "FOR_ITER",
+        "SEND",
+    }
+)
+
+_JUMP_OPCODES = None
+
+
+def _jump_opcodes():
+    global _JUMP_OPCODES
+    if _JUMP_OPCODES is None:
+        _JUMP_OPCODES = set()
+        if hasattr(dis, "hasjrel"):
+            _JUMP_OPCODES.update(dis.hasjrel)
+        if hasattr(dis, "hasjabs"):
+            _JUMP_OPCODES.update(dis.hasjabs)
+    return _JUMP_OPCODES
+
+
+def _is_jump(inst):
+    """Check if an instruction is a jump (by opcode set or name)."""
+    return inst.opcode in _jump_opcodes() or inst.opname in _JUMP_OPNAMES
+
+
+def _normalize_argrepr(argrepr):
+    """Strip runtime-specific details from arg repr."""
+    if argrepr.startswith("<code object "):
+        # Extract just the name, stripping address and file/line info.
+        # Formats seen across interpreters:
+        #   <code object foo at 0xADDR, file "x.py", line 1>  (CPython 3.14)
+        #   <code object foo at 0xADDR>                        (RustPython)
+        name = argrepr[len("<code object ") :]
+        for marker in (" at 0x", ", file ", " file "):
+            idx = name.find(marker)
+            if idx >= 0:
+                name = name[:idx]
+        return "<code object %s>" % name.rstrip(">").strip()
+    # Normalize COMPARE_OP: strip bool(...) wrapper from CPython 3.14
+    # e.g. "bool(==)" -> "==", "bool(<)" -> "<"
+    m = re.match(r"^bool\((.+)\)$", argrepr)
+    if m:
+        return m.group(1)
+    # Remove memory addresses from other reprs
+    argrepr = re.sub(r" at 0x[0-9a-fA-F]+", "", argrepr)
+    # Remove LOAD_ATTR/LOAD_SUPER_ATTR suffixes: " + NULL|self", " + NULL"
+    argrepr = re.sub(r" \+ NULL\|self$", "", argrepr)
+    argrepr = re.sub(r" \+ NULL$", "", argrepr)
+
+    # Normalize unicode escapes
+    def _unescape(m):
+        try:
+            cp = int(m.group(1), 16)
+            if 0xD800 <= cp <= 0xDFFF:
+                return m.group(0)
+            return chr(cp)
+        except (ValueError, OverflowError):
+            return m.group(0)
+
+    argrepr = re.sub(r"\\u([0-9a-fA-F]{4})", _unescape, argrepr)
+    argrepr = re.sub(r"\\U([0-9a-fA-F]{8})", _unescape, argrepr)
+    return argrepr
+
+
+_IS_RUSTPYTHON = (
+    hasattr(sys, "implementation") and sys.implementation.name == "rustpython"
+)
+
+# RustPython's ComparisonOperator enum values → operator strings
+_RP_CMP_OPS = {0: "<", 1: "<=", 2: "==", 3: "!=", 4: ">", 5: ">="}
+
+
+def _resolve_arg_fallback(code, opname, arg):
+    """Resolve a raw argument to its human-readable form.
+
+    Used when the dis module doesn't populate argrepr (e.g., on RustPython).
+    """
+    if not isinstance(arg, int):
+        return arg
+    try:
+        if "FAST" in opname:
+            if 0 <= arg < len(code.co_varnames):
+                return code.co_varnames[arg]
+        elif opname == "LOAD_CONST":
+            if 0 <= arg < len(code.co_consts):
+                return _normalize_argrepr(repr(code.co_consts[arg]))
+        elif opname in (
+            "LOAD_DEREF",
+            "STORE_DEREF",
+            "DELETE_DEREF",
+            "LOAD_CLOSURE",
+            "MAKE_CELL",
+        ):
+            # arg is localsplus index:
+            #   0..nlocals-1 = varnames (parameter cells reuse these slots)
+            #   nlocals.. = non-parameter cells + freevars
+            nlocals = len(code.co_varnames)
+            if arg < nlocals:
+                return code.co_varnames[arg]
+            varnames_set = set(code.co_varnames)
+            nonparam_cells = [v for v in code.co_cellvars if v not in varnames_set]
+            extra = nonparam_cells + list(code.co_freevars)
+            idx = arg - nlocals
+            if 0 <= idx < len(extra):
+                return extra[idx]
+        elif opname in (
+            "LOAD_NAME",
+            "STORE_NAME",
+            "DELETE_NAME",
+            "LOAD_GLOBAL",
+            "STORE_GLOBAL",
+            "DELETE_GLOBAL",
+            "LOAD_ATTR",
+            "STORE_ATTR",
+            "DELETE_ATTR",
+            "IMPORT_NAME",
+            "IMPORT_FROM",
+            "LOAD_FROM_DICT_OR_GLOBALS",
+        ):
+            if 0 <= arg < len(code.co_names):
+                return code.co_names[arg]
+        elif opname == "LOAD_SUPER_ATTR":
+            name_idx = arg >> 2
+            if 0 <= name_idx < len(code.co_names):
+                return code.co_names[name_idx]
+    except Exception:
+        pass
+    return arg
+
+
+def _extract_instructions(code):
+    """Extract normalized instruction list from a code object.
+
+    - Filters out CACHE/PRECALL instructions
+    - Converts jump targets from byte offsets to instruction indices
+    - Resolves argument names via fallback when argrepr is missing
+    - Normalizes argument representations
+    """
+    try:
+        raw = list(dis.get_instructions(code))
+    except Exception as e:
+        return [["ERROR", str(e)]]
+
+    # Build filtered list and offset-to-index mapping
+    filtered = []
+    offset_to_idx = {}
+    for inst in raw:
+        if inst.opname in SKIP_OPS:
+            continue
+        offset_to_idx[inst.offset] = len(filtered)
+        filtered.append(inst)
+
+    # Map offsets that land on CACHE slots to the next real instruction
+    for inst in raw:
+        if inst.offset not in offset_to_idx:
+            for fi, finst in enumerate(filtered):
+                if finst.offset >= inst.offset:
+                    offset_to_idx[inst.offset] = fi
+                    break
+
+    # Superinstruction decomposition: split into constituent parts
+    # so we compare individual operations regardless of combining.
+    _SUPER_DECOMPOSE = {
+        "STORE_FAST_LOAD_FAST": ("STORE_FAST", "LOAD_FAST"),
+        "STORE_FAST_STORE_FAST": ("STORE_FAST", "STORE_FAST"),
+        "LOAD_FAST_LOAD_FAST": ("LOAD_FAST", "LOAD_FAST"),
+    }
+
+    result = []
+    for inst in filtered:
+        opname = _OPNAME_NORMALIZE.get(inst.opname, inst.opname)
+
+        # Decompose superinstructions into individual ops
+        if opname in _SUPER_DECOMPOSE:
+            op1, op2 = _SUPER_DECOMPOSE[opname]
+            if isinstance(inst.arg, int):
+                idx1 = (inst.arg >> 4) & 0xF
+                idx2 = inst.arg & 0xF
+            else:
+                idx1, idx2 = 0, 0
+            name1 = _resolve_arg_fallback(code, op1, idx1)
+            name2 = _resolve_arg_fallback(code, op2, idx2)
+            result.append([op1, name1])
+            result.append([op2, name2])
+            continue
+
+        if _is_jump(inst) and isinstance(inst.argval, int):
+            target_idx = offset_to_idx.get(inst.argval)
+            # Detect unresolved argval (RustPython may not resolve jump targets):
+            # 1. argval not in offset_to_idx (not a valid byte offset)
+            # 2. argval == arg (raw arg returned as-is, not resolved to offset)
+            # 3. For backward jumps: argval should be < current offset
+            is_backward = "BACKWARD" in inst.opname
+            argval_is_raw = inst.argval == inst.arg and inst.arg is not None
+            if target_idx is None or argval_is_raw:
+                target_idx = None  # force recalculation
+                if is_backward:
+                    # Target = current_offset + INSTR_SIZE + cache
+                    #        - arg * INSTR_SIZE
+                    # Try different cache sizes (NOT_TAKEN=1 for JUMP_BACKWARD, 0 for NO_INTERRUPT)
+                    if "NO_INTERRUPT" in inst.opname:
+                        cache_order = (0, 1, 2)
+                    else:
+                        cache_order = (1, 0, 2, 3)
+                    for cache in cache_order:
+                        target_off = inst.offset + 2 + cache * 2 - inst.arg * 2
+                        if target_off >= 0 and target_off in offset_to_idx:
+                            target_idx = offset_to_idx[target_off]
+                            break
+                elif inst.arg is not None:
+                    # Forward jumps: compute target offset using cache entry count.
+                    # POP_JUMP_IF_* have 1 cache entry (NOT_TAKEN), others have 0.
+                    if "POP_JUMP_IF" in inst.opname:
+                        cache_order = (1, 0, 2)
+                    elif inst.opname == "FOR_ITER":
+                        cache_order = (0, 1, 2)
+                    elif inst.opname == "SEND":
+                        cache_order = (1, 0, 2)
+                    else:
+                        cache_order = (0, 1, 2)
+                    for extra in cache_order:
+                        target_off = inst.offset + 2 + extra * 2 + inst.arg * 2
+                        if target_off in offset_to_idx:
+                            target_idx = offset_to_idx[target_off]
+                            break
+            if target_idx is None:
+                target_idx = inst.argval
+            result.append([opname, "->%d" % target_idx])
+        elif inst.opname == "COMPARE_OP":
+            # Normalize COMPARE_OP across interpreters (different encodings)
+            if _IS_RUSTPYTHON:
+                cmp_str = _RP_CMP_OPS.get(inst.arg, inst.argrepr)
+            else:
+                cmp_str = (
+                    _normalize_argrepr(inst.argrepr) if inst.argrepr else str(inst.arg)
+                )
+            result.append([opname, cmp_str])
+        elif inst.arg is not None and inst.argrepr:
+            # If argrepr is just a number, try to resolve it via fallback
+            # (RustPython may return raw index instead of variable name)
+            argrepr = inst.argrepr
+            if argrepr.isdigit() or (argrepr.startswith("-") and argrepr[1:].isdigit()):
+                resolved = _resolve_arg_fallback(code, opname, inst.arg)
+                if isinstance(resolved, str) and not resolved.isdigit():
+                    argrepr = resolved
+            result.append([opname, _normalize_argrepr(argrepr)])
+        elif inst.arg is not None:
+            resolved = _resolve_arg_fallback(code, opname, inst.arg)
+            result.append([opname, resolved])
+        else:
+            result.append([opname])
+
+    return result
+
+
+def _dump_code(code):
+    """Recursively dump a code object and its nested code objects."""
+    name = getattr(code, "co_qualname", None) or code.co_name
+    children = [_dump_code(c) for c in code.co_consts if isinstance(c, types.CodeType)]
+    r = {"name": name, "insts": _extract_instructions(code)}
+    if children:
+        r["children"] = children
+    return r
+
+
+def process_file(path):
+    """Compile a single file and return its bytecode dump."""
+    try:
+        with open(path, "rb") as f:
+            source = f.read()
+        code = compile(source, path, "exec")
+        return {"status": "ok", "code": _dump_code(code)}
+    except SyntaxError as e:
+        return {"status": "error", "error": "%s (line %s)" % (e.msg, e.lineno)}
+    except Exception as e:
+        return {"status": "error", "error": str(e)}
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Dump normalized bytecode as JSON")
+    parser.add_argument(
+        "--base-dir",
+        default=None,
+        help="Base directory used to compute relative output paths",
+    )
+    parser.add_argument(
+        "--files-from",
+        default=None,
+        help="Read newline-separated target paths from this file",
+    )
+    parser.add_argument(
+        "targets", nargs="*", help="Python files or directories to process"
+    )
+    parser.add_argument(
+        "--progress",
+        type=int,
+        default=0,
+        help="Print a dot to stderr every N files processed",
+    )
+    args = parser.parse_args()
+
+    targets = list(args.targets)
+    if args.files_from:
+        with open(args.files_from, encoding="utf-8") as f:
+            targets.extend(line.strip() for line in f if line.strip())
+
+    results = {}
+    count = 0
+    for target in targets:
+        if os.path.isdir(target):
+            for root, dirs, files in os.walk(target):
+                dirs[:] = sorted(
+                    d for d in dirs if d != "__pycache__" and not d.startswith(".")
+                )
+                for fname in sorted(files):
+                    if fname.endswith(".py"):
+                        fpath = os.path.join(root, fname)
+                        rel_base = args.base_dir or target
+                        relpath = os.path.relpath(fpath, rel_base)
+                        results[relpath] = process_file(fpath)
+                        count += 1
+                        if args.progress and count % args.progress == 0:
+                            sys.stderr.write(".")
+                            sys.stderr.flush()
+        elif target.endswith(".py"):
+            rel_base = args.base_dir or os.path.dirname(target) or "."
+            relpath = os.path.relpath(target, rel_base)
+            results[relpath] = process_file(target)
+            count += 1
+            if args.progress and count % args.progress == 0:
+                sys.stderr.write(".")
+                sys.stderr.flush()
+
+    json.dump(results, sys.stdout, ensure_ascii=False, separators=(",", ":"))
+
+
+if __name__ == "__main__":
+    main()