Merge pull request #7440 from youknowone/pycode

Upgrade test_code and fix code bugs
This commit is contained in:
Jeong, YunWon
2026-03-17 20:53:42 +09:00
committed by GitHub
10 changed files with 1281 additions and 72 deletions

323
Lib/test/_code_definitions.py vendored Normal file
View File

@@ -0,0 +1,323 @@
def simple_script():
assert True
def complex_script():
obj = 'a string'
pickle = __import__('pickle')
def spam_minimal():
pass
spam_minimal()
data = pickle.dumps(obj)
res = pickle.loads(data)
assert res == obj, (res, obj)
def script_with_globals():
obj1, obj2 = spam(42)
assert obj1 == 42
assert obj2 is None
def script_with_explicit_empty_return():
return None
def script_with_return():
return True
def spam_minimal():
# no arg defaults or kwarg defaults
# no annotations
# no local vars
# no free vars
# no globals
# no builtins
# no attr access (names)
# no code
return
def spam_with_builtins():
x = 42
values = (42,)
checks = tuple(callable(v) for v in values)
res = callable(values), tuple(values), list(values), checks
print(res)
def spam_with_globals_and_builtins():
func1 = spam
func2 = spam_minimal
funcs = (func1, func2)
checks = tuple(callable(f) for f in funcs)
res = callable(funcs), tuple(funcs), list(funcs), checks
print(res)
def spam_with_global_and_attr_same_name():
try:
spam_minimal.spam_minimal
except AttributeError:
pass
def spam_full_args(a, b, /, c, d, *args, e, f, **kwargs):
return (a, b, c, d, e, f, args, kwargs)
def spam_full_args_with_defaults(a=-1, b=-2, /, c=-3, d=-4, *args,
e=-5, f=-6, **kwargs):
return (a, b, c, d, e, f, args, kwargs)
def spam_args_attrs_and_builtins(a, b, /, c, d, *args, e, f, **kwargs):
if args.__len__() > 2:
return None
return a, b, c, d, e, f, args, kwargs
def spam_returns_arg(x):
return x
def spam_raises():
raise Exception('spam!')
def spam_with_inner_not_closure():
def eggs():
pass
eggs()
def spam_with_inner_closure():
x = 42
def eggs():
print(x)
eggs()
def spam_annotated(a: int, b: str, c: object) -> tuple:
return a, b, c
def spam_full(a, b, /, c, d:int=1, *args, e, f:object=None, **kwargs) -> tuple:
# arg defaults, kwarg defaults
# annotations
# all kinds of local vars, except cells
# no free vars
# some globals
# some builtins
# some attr access (names)
x = args
y = kwargs
z = (a, b, c, d)
kwargs['e'] = e
kwargs['f'] = f
extras = list((x, y, z, spam, spam.__name__))
return tuple(a, b, c, d, e, f, args, kwargs), extras
def spam(x):
return x, None
def spam_N(x):
def eggs_nested(y):
return None, y
return eggs_nested, x
def spam_C(x):
a = 1
def eggs_closure(y):
return None, y, a, x
return eggs_closure, a, x
def spam_NN(x):
def eggs_nested_N(y):
def ham_nested(z):
return None, z
return ham_nested, y
return eggs_nested_N, x
def spam_NC(x):
a = 1
def eggs_nested_C(y):
def ham_closure(z):
return None, z, y, a, x
return ham_closure, y
return eggs_nested_C, a, x
def spam_CN(x):
a = 1
def eggs_closure_N(y):
def ham_C_nested(z):
return None, z
return ham_C_nested, y, a, x
return eggs_closure_N, a, x
def spam_CC(x):
a = 1
def eggs_closure_C(y):
b = 2
def ham_C_closure(z):
return None, z, b, y, a, x
return ham_C_closure, b, y, a, x
return eggs_closure_C, a, x
eggs_nested, *_ = spam_N(1)
eggs_closure, *_ = spam_C(1)
eggs_nested_N, *_ = spam_NN(1)
eggs_nested_C, *_ = spam_NC(1)
eggs_closure_N, *_ = spam_CN(1)
eggs_closure_C, *_ = spam_CC(1)
ham_nested, *_ = eggs_nested_N(2)
ham_closure, *_ = eggs_nested_C(2)
ham_C_nested, *_ = eggs_closure_N(2)
ham_C_closure, *_ = eggs_closure_C(2)
TOP_FUNCTIONS = [
# shallow
simple_script,
complex_script,
script_with_globals,
script_with_explicit_empty_return,
script_with_return,
spam_minimal,
spam_with_builtins,
spam_with_globals_and_builtins,
spam_with_global_and_attr_same_name,
spam_full_args,
spam_full_args_with_defaults,
spam_args_attrs_and_builtins,
spam_returns_arg,
spam_raises,
spam_with_inner_not_closure,
spam_with_inner_closure,
spam_annotated,
spam_full,
spam,
# outer func
spam_N,
spam_C,
spam_NN,
spam_NC,
spam_CN,
spam_CC,
]
NESTED_FUNCTIONS = [
# inner func
eggs_nested,
eggs_closure,
eggs_nested_N,
eggs_nested_C,
eggs_closure_N,
eggs_closure_C,
# inner inner func
ham_nested,
ham_closure,
ham_C_nested,
ham_C_closure,
]
FUNCTIONS = [
*TOP_FUNCTIONS,
*NESTED_FUNCTIONS,
]
STATELESS_FUNCTIONS = [
simple_script,
complex_script,
script_with_explicit_empty_return,
script_with_return,
spam,
spam_minimal,
spam_with_builtins,
spam_full_args,
spam_args_attrs_and_builtins,
spam_returns_arg,
spam_raises,
spam_annotated,
spam_with_inner_not_closure,
spam_with_inner_closure,
spam_N,
spam_C,
spam_NN,
spam_NC,
spam_CN,
spam_CC,
eggs_nested,
eggs_nested_N,
ham_nested,
ham_C_nested
]
STATELESS_CODE = [
*STATELESS_FUNCTIONS,
script_with_globals,
spam_full_args_with_defaults,
spam_with_globals_and_builtins,
spam_with_global_and_attr_same_name,
spam_full,
]
PURE_SCRIPT_FUNCTIONS = [
simple_script,
complex_script,
script_with_explicit_empty_return,
spam_minimal,
spam_with_builtins,
spam_raises,
spam_with_inner_not_closure,
spam_with_inner_closure,
]
SCRIPT_FUNCTIONS = [
*PURE_SCRIPT_FUNCTIONS,
script_with_globals,
spam_with_globals_and_builtins,
spam_with_global_and_attr_same_name,
]
# generators
def gen_spam_1(*args):
for arg in args:
yield arg
def gen_spam_2(*args):
yield from args
async def async_spam():
pass
coro_spam = async_spam()
coro_spam.close()
async def asyncgen_spam(*args):
for arg in args:
yield arg
asynccoro_spam = asyncgen_spam(1, 2, 3)
FUNCTION_LIKE = [
gen_spam_1,
gen_spam_2,
async_spam,
asyncgen_spam,
]
FUNCTION_LIKE_APPLIED = [
coro_spam, # actually FunctionType?
asynccoro_spam, # actually FunctionType?
]

859
Lib/test/test_code.py vendored

File diff suppressed because it is too large Load Diff

View File

@@ -30,7 +30,6 @@ class CodeopTests(unittest.TestCase):
except OverflowError:
self.assertTrue(not is_syntax)
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: <code object <module> at 0xc99532080 file "<input>", line 1> != <code object <module> at 0xc99532f80 file "<input>", line 1>
def test_valid(self):
av = self.assertValid

View File

@@ -1131,7 +1131,6 @@ class DisTests(DisTestBase):
# Test that negative operargs are handled properly
self.do_disassembly_test(bug46724, dis_bug46724)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_kw_names(self):
# Test that value is displayed for keyword argument names:
self.do_disassembly_test(wrap_func_w_kwargs, dis_kw_names)
@@ -1179,7 +1178,6 @@ class DisTests(DisTestBase):
self.do_disassembly_test(fn_with_annotate_str, dis_fn_with_annotate_str)
self.do_disassembly_test(compound_stmt_str, dis_compound_stmt_str)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_disassemble_bytes(self):
self.do_disassembly_test(_f.__code__.co_code, dis_f_co_code)

View File

@@ -123,7 +123,6 @@ class ExceptionTestCase(unittest.TestCase):
self.assertEqual(StopIteration, new)
class CodeTestCase(unittest.TestCase):
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_code(self):
co = ExceptionTestCase.test_exceptions.__code__
new = marshal.loads(marshal.dumps(co))

View File

@@ -1081,6 +1081,14 @@ impl Compiler {
),
};
// Set CO_NESTED for scopes defined inside another function/class/etc.
// (i.e., not at module level)
let flags = if self.code_stack.len() > 1 {
flags | bytecode::CodeFlags::NESTED
} else {
flags
};
// Get private name from parent scope
let private = if !self.code_stack.is_empty() {
self.code_stack.last().unwrap().private.clone()
@@ -1202,7 +1210,8 @@ impl Compiler {
// enter_scope sets default values based on scope_type, but push_output
// allows callers to specify exact values
if let Some(info) = self.code_stack.last_mut() {
info.flags = flags;
// Preserve NESTED flag set by enter_scope
info.flags = flags | (info.flags & bytecode::CodeFlags::NESTED);
info.metadata.argcount = arg_count;
info.metadata.posonlyargcount = posonlyarg_count;
info.metadata.kwonlyargcount = kwonlyarg_count;
@@ -2179,18 +2188,26 @@ impl Compiler {
}
}
ast::Stmt::Expr(ast::StmtExpr { value, .. }) => {
self.compile_expression(value)?;
// Optimize away constant expressions with no side effects.
// In interactive mode, always compile (to print the result).
let dominated_by_interactive =
self.interactive && !self.ctx.in_func() && !self.ctx.in_class;
if !dominated_by_interactive && Self::is_const_expression(value) {
// Skip compilation entirely - the expression has no side effects
} else {
self.compile_expression(value)?;
if self.interactive && !self.ctx.in_func() && !self.ctx.in_class {
emit!(
self,
Instruction::CallIntrinsic1 {
func: bytecode::IntrinsicFunction1::Print
}
);
if dominated_by_interactive {
emit!(
self,
Instruction::CallIntrinsic1 {
func: bytecode::IntrinsicFunction1::Print
}
);
}
emit!(self, Instruction::PopTop);
}
emit!(self, Instruction::PopTop);
}
ast::Stmt::Global(_) | ast::Stmt::Nonlocal(_) => {
// Handled during symbol table construction.
@@ -3756,19 +3773,23 @@ impl Compiler {
});
self.current_code_info().flags |= bytecode::CodeFlags::HAS_DOCSTRING;
}
// If no docstring, don't add None to co_consts
// Note: RETURN_GENERATOR + POP_TOP for async functions is emitted in enter_scope()
// Compile body statements
self.compile_statements(body)?;
// Emit None at end if needed
// Emit implicit `return None` if the body doesn't end with return.
// Also ensure None is in co_consts even when not emitting return
// (matching CPython: functions without explicit constants always
// have None in co_consts).
match body.last() {
Some(ast::Stmt::Return(_)) => {}
_ => {
self.emit_return_const(ConstantData::None);
}
}
// Functions with no other constants should still have None in co_consts
if self.current_code_info().metadata.consts.is_empty() {
self.arg_constant(ConstantData::None);
}
// Exit scope and create function object
let code = self.exit_scope();
@@ -6890,6 +6911,19 @@ impl Compiler {
Ok(send_block)
}
/// Returns true if the expression is a constant with no side effects.
fn is_const_expression(expr: &ast::Expr) -> bool {
matches!(
expr,
ast::Expr::StringLiteral(_)
| ast::Expr::BytesLiteral(_)
| ast::Expr::NumberLiteral(_)
| ast::Expr::BooleanLiteral(_)
| ast::Expr::NoneLiteral(_)
| ast::Expr::EllipsisLiteral(_)
)
}
fn compile_expression(&mut self, expression: &ast::Expr) -> CompileResult<()> {
trace!("Compiling {expression:?}");
let range = expression.range();

View File

@@ -190,13 +190,7 @@ impl CodeInfo {
) -> crate::InternalResult<CodeObject> {
// Always fold tuple constants
self.fold_tuple_constants();
// Python only applies LOAD_SMALL_INT conversion to module-level code
// (not inside functions). Module code lacks OPTIMIZED flag.
// Note: RustPython incorrectly sets NEWLOCALS on modules, so only check OPTIMIZED
let is_module_level = !self.flags.contains(CodeFlags::OPTIMIZED);
if is_module_level {
self.convert_to_load_small_int();
}
self.convert_to_load_small_int();
self.remove_unused_consts();
self.remove_nops();
@@ -786,8 +780,8 @@ impl CodeInfo {
continue;
};
// Check if it's in small int range: -5 to 256 (_PY_IS_SMALL_INT)
if let Some(small) = value.to_i32().filter(|v| (-5..=256).contains(v)) {
// LOAD_SMALL_INT oparg is unsigned, so only 0..=255 can be encoded
if let Some(small) = value.to_i32().filter(|v| (0..=255).contains(v)) {
// Convert LOAD_CONST to LOAD_SMALL_INT
instr.instr = Instruction::LoadSmallInt { i: Arg::marker() }.into();
// The arg is the i32 value stored as u32 (two's complement)

View File

@@ -371,6 +371,7 @@ bitflags! {
const NEWLOCALS = 0x0002;
const VARARGS = 0x0004;
const VARKEYWORDS = 0x0008;
const NESTED = 0x0010;
const GENERATOR = 0x0020;
const COROUTINE = 0x0080;
const ITERABLE_COROUTINE = 0x0100;

View File

@@ -446,13 +446,9 @@ impl TryFrom<u8> for Instruction {
let instrumented_start = u8::from(Self::InstrumentedEndFor);
let instrumented_end = u8::from(Self::InstrumentedLine);
// No RustPython-only opcodes anymore - all opcodes match CPython 3.14
let custom_ops: &[u8] = &[];
if (cpython_start..=cpython_end).contains(&value)
|| value == resume_id
|| value == enter_executor_id
|| custom_ops.contains(&value)
|| (specialized_start..=specialized_end).contains(&value)
|| (instrumented_start..=instrumented_end).contains(&value)
{

View File

@@ -10,7 +10,7 @@ use crate::{
convert::{ToPyException, ToPyObject},
frozen,
function::OptionalArg,
types::{Constructor, Representable},
types::{Comparable, Constructor, Hashable, Representable},
};
use alloc::fmt;
use core::{
@@ -447,6 +447,75 @@ impl Representable for PyCode {
}
}
impl Comparable for PyCode {
fn cmp(
zelf: &Py<Self>,
other: &PyObject,
op: crate::types::PyComparisonOp,
vm: &VirtualMachine,
) -> PyResult<crate::function::PyComparisonValue> {
op.eq_only(|| {
let other = class_or_notimplemented!(Self, other);
let a = &zelf.code;
let b = &other.code;
let eq = a.obj_name == b.obj_name
&& a.arg_count == b.arg_count
&& a.posonlyarg_count == b.posonlyarg_count
&& a.kwonlyarg_count == b.kwonlyarg_count
&& a.flags == b.flags
&& a.first_line_number == b.first_line_number
&& a.instructions.original_bytes() == b.instructions.original_bytes()
&& a.linetable == b.linetable
&& a.exceptiontable == b.exceptiontable
&& a.names == b.names
&& a.varnames == b.varnames
&& a.freevars == b.freevars
&& a.cellvars == b.cellvars
&& {
let a_consts: Vec<_> = a.constants.iter().map(|c| c.0.clone()).collect();
let b_consts: Vec<_> = b.constants.iter().map(|c| c.0.clone()).collect();
if a_consts.len() != b_consts.len() {
false
} else {
let mut eq = true;
for (ac, bc) in a_consts.iter().zip(b_consts.iter()) {
if !vm.bool_eq(ac, bc)? {
eq = false;
break;
}
}
eq
}
};
Ok(eq.into())
})
}
}
impl Hashable for PyCode {
fn hash(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<crate::common::hash::PyHash> {
let code = &zelf.code;
// Hash a tuple of key attributes, matching CPython's code_hash
let tuple = vm.ctx.new_tuple(vec![
vm.ctx.new_str(code.obj_name.as_str()).into(),
vm.ctx.new_int(code.arg_count).into(),
vm.ctx.new_int(code.posonlyarg_count).into(),
vm.ctx.new_int(code.kwonlyarg_count).into(),
vm.ctx.new_int(code.varnames.len()).into(),
vm.ctx.new_int(code.flags.bits()).into(),
vm.ctx
.new_int(code.first_line_number.map_or(0, |n| n.get()) as i64)
.into(),
vm.ctx.new_bytes(code.instructions.original_bytes()).into(),
{
let consts: Vec<_> = code.constants.iter().map(|c| c.0.clone()).collect();
vm.ctx.new_tuple(consts).into()
},
]);
tuple.as_object().hash(vm)
}
}
// Arguments for code object constructor
#[derive(FromArgs)]
pub struct PyCodeNewArgs {
@@ -595,7 +664,10 @@ impl Constructor for PyCode {
}
}
#[pyclass(with(Representable, Constructor), flags(HAS_WEAKREF))]
#[pyclass(
with(Representable, Constructor, Comparable, Hashable),
flags(HAS_WEAKREF)
)]
impl PyCode {
#[pygetset]
const fn co_posonlyargcount(&self) -> usize {
@@ -721,6 +793,11 @@ impl PyCode {
vm.ctx.new_bytes(self.code.exceptiontable.to_vec())
}
// spell-checker: ignore lnotab
// co_lnotab is intentionally not implemented.
// It was deprecated since 3.12 and scheduled for removal in 3.14.
// Use co_lines() or co_linetable instead.
#[pymethod]
pub fn co_lines(&self, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
// TODO: Implement lazy iterator (lineiterator) like CPython for better performance
@@ -992,6 +1069,11 @@ impl PyCode {
vm.call_method(list.as_object(), "__iter__", ())
}
#[pymethod]
pub fn __replace__(&self, args: ReplaceArgs, vm: &VirtualMachine) -> PyResult<Self> {
self.replace(args, vm)
}
#[pymethod]
pub fn replace(&self, args: ReplaceArgs, vm: &VirtualMachine) -> PyResult<Self> {
let ReplaceArgs {