diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index 7782e6ec4..b0608d115 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -3306,12 +3306,19 @@ impl Compiler { // Set qualname self.set_qualname(); - // Handle docstring + // Handle docstring - store in co_consts[0] if present let (doc_str, body) = split_doc(body, &self.opts); - self.current_code_info() - .metadata - .consts - .insert_full(ConstantData::None); + if let Some(doc) = &doc_str { + // Docstring present: store in co_consts[0] and set HAS_DOCSTRING flag + self.current_code_info() + .metadata + .consts + .insert_full(ConstantData::Str { + value: doc.to_string().into(), + }); + self.current_code_info().flags |= bytecode::CodeFlags::HAS_DOCSTRING; + } + // If no docstring, don't add None to co_consts // Compile body statements self.compile_statements(body)?; @@ -3331,16 +3338,8 @@ impl Compiler { // Create function object with closure self.make_closure(code, funcflags)?; - // Handle docstring if present - if let Some(doc) = doc_str { - emit!(self, Instruction::Copy { index: 1_u32 }); - self.emit_load_const(ConstantData::Str { - value: doc.to_string().into(), - }); - emit!(self, Instruction::Swap { index: 2 }); - let doc_attr = self.name("__doc__"); - emit!(self, Instruction::StoreAttr { idx: doc_attr }); - } + // Note: docstring is now retrieved from co_consts[0] by the VM + // when HAS_DOCSTRING flag is set, so no runtime __doc__ assignment needed Ok(()) } @@ -6100,10 +6099,7 @@ impl Compiler { in_async_scope: false, }; - self.current_code_info() - .metadata - .consts - .insert_full(ConstantData::None); + // Lambda cannot have docstrings, so no None is added to co_consts self.compile_expression(body)?; self.emit_return_value(); diff --git a/crates/compiler-core/src/bytecode.rs b/crates/compiler-core/src/bytecode.rs index a0054b288..c59a64fea 100644 --- a/crates/compiler-core/src/bytecode.rs +++ b/crates/compiler-core/src/bytecode.rs @@ -290,13 +290,16 @@ pub struct CodeObject { bitflags! { #[derive(Copy, Clone, Debug, PartialEq)] - pub struct CodeFlags: u16 { + pub struct CodeFlags: u32 { const OPTIMIZED = 0x0001; const NEWLOCALS = 0x0002; const VARARGS = 0x0004; const VARKEYWORDS = 0x0008; const GENERATOR = 0x0020; const COROUTINE = 0x0080; + /// If a code object represents a function and has a docstring, + /// this bit is set and the first item in co_consts is the docstring. + const HAS_DOCSTRING = 0x4000000; } } diff --git a/crates/compiler-core/src/marshal.rs b/crates/compiler-core/src/marshal.rs index 5b528fe7e..d9bf368ec 100644 --- a/crates/compiler-core/src/marshal.rs +++ b/crates/compiler-core/src/marshal.rs @@ -202,7 +202,7 @@ pub fn deserialize_code( }) .collect::>>()?; - let flags = CodeFlags::from_bits_truncate(rdr.read_u16()?); + let flags = CodeFlags::from_bits_truncate(rdr.read_u32()?); let posonlyarg_count = rdr.read_u32()?; let arg_count = rdr.read_u32()?; @@ -660,7 +660,7 @@ pub fn serialize_code(buf: &mut W, code: &CodeObject) buf.write_u32(end.character_offset.to_zero_indexed() as _); } - buf.write_u16(code.flags.bits()); + buf.write_u32(code.flags.bits()); buf.write_u32(code.posonlyarg_count); buf.write_u32(code.arg_count); diff --git a/crates/vm/src/builtins/code.rs b/crates/vm/src/builtins/code.rs index 85816aabb..6507af342 100644 --- a/crates/vm/src/builtins/code.rs +++ b/crates/vm/src/builtins/code.rs @@ -152,7 +152,7 @@ pub struct ReplaceArgs { #[pyarg(named, optional)] co_names: OptionalArg>, #[pyarg(named, optional)] - co_flags: OptionalArg, + co_flags: OptionalArg, #[pyarg(named, optional)] co_varnames: OptionalArg>, #[pyarg(named, optional)] @@ -411,7 +411,7 @@ pub struct PyCodeNewArgs { kwonlyargcount: u32, nlocals: u32, stacksize: u32, - flags: u16, + flags: u32, co_code: PyBytesRef, consts: PyTupleRef, names: PyTupleRef, @@ -628,7 +628,7 @@ impl PyCode { } #[pygetset] - const fn co_flags(&self) -> u16 { + const fn co_flags(&self) -> u32 { self.code.flags.bits() } diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index 3d5159e58..27a3b8fee 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -77,6 +77,17 @@ impl PyFunction { builtins }; + // Get docstring from co_consts[0] if HAS_DOCSTRING flag is set + let doc = if code.code.flags.contains(bytecode::CodeFlags::HAS_DOCSTRING) { + code.code + .constants + .first() + .map(|c| c.as_object().to_owned()) + .unwrap_or_else(|| vm.ctx.none()) + } else { + vm.ctx.none() + }; + let qualname = vm.ctx.new_str(code.qualname.as_str()); let func = Self { code: PyMutex::new(code.clone()), @@ -89,7 +100,7 @@ impl PyFunction { type_params: PyMutex::new(vm.ctx.empty_tuple.clone()), annotations: PyMutex::new(vm.ctx.new_dict()), module: PyMutex::new(module), - doc: PyMutex::new(vm.ctx.none()), + doc: PyMutex::new(doc), #[cfg(feature = "jit")] jitted_code: OnceCell::new(), }; diff --git a/crates/vm/src/builtins/object.rs b/crates/vm/src/builtins/object.rs index 982e11afd..eb9d226ac 100644 --- a/crates/vm/src/builtins/object.rs +++ b/crates/vm/src/builtins/object.rs @@ -244,6 +244,12 @@ fn object_getstate_default(obj: &PyObject, required: bool, vm: &VirtualMachine) let slots = vm.ctx.new_dict(); for i in 0..slot_names_len { let borrowed_names = slot_names.borrow_vec(); + // Check if slotnames changed during iteration + if borrowed_names.len() != slot_names_len { + return Err(vm.new_runtime_error( + "__slotnames__ changed size during iteration".to_owned(), + )); + } let name = borrowed_names[i].downcast_ref::().unwrap(); let Ok(value) = obj.get_attr(name, vm) else { continue; @@ -702,11 +708,13 @@ fn reduce_newobj(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { (newobj, newargs.into()) } else { + // args == NULL with non-empty kwargs is BadInternalCall + let Some(args) = args else { + return Err(vm.new_system_error("bad internal call".to_owned())); + }; // Use copyreg.__newobj_ex__ let newobj = copyreg.get_attr("__newobj_ex__", vm)?; - let args_tuple: PyObjectRef = args - .map(|a| a.into()) - .unwrap_or_else(|| vm.ctx.empty_tuple.clone().into()); + let args_tuple: PyObjectRef = args.into(); let kwargs_dict: PyObjectRef = kwargs .map(|k| k.into()) .unwrap_or_else(|| vm.ctx.new_dict().into()); diff --git a/extra_tests/snippets/code_co_consts.py b/extra_tests/snippets/code_co_consts.py index 583556526..13f76a0d1 100644 --- a/extra_tests/snippets/code_co_consts.py +++ b/extra_tests/snippets/code_co_consts.py @@ -1,39 +1,112 @@ +""" +Test co_consts behavior for Python 3.14+ + +In Python 3.14+: +- Functions with docstrings have the docstring as co_consts[0] +- CO_HAS_DOCSTRING flag (0x4000000) indicates docstring presence +- Functions without docstrings do NOT have None added as placeholder for docstring + +Note: Other constants (small integers, code objects, etc.) may still appear in co_consts +depending on optimization level. This test focuses on docstring behavior. +""" + + +# Test function with docstring - docstring should be co_consts[0] +def with_doc(): + """This is a docstring""" + return 1 + + +assert with_doc.__code__.co_consts[0] == "This is a docstring", ( + with_doc.__code__.co_consts +) +assert with_doc.__doc__ == "This is a docstring" +# Check CO_HAS_DOCSTRING flag (0x4000000) +assert with_doc.__code__.co_flags & 0x4000000, hex(with_doc.__code__.co_flags) + + +# Test function without docstring - should NOT have HAS_DOCSTRING flag +def no_doc(): + return 1 + + +assert not (no_doc.__code__.co_flags & 0x4000000), hex(no_doc.__code__.co_flags) +assert no_doc.__doc__ is None + + +# Test async function with docstring from asyncio import sleep -def f(): - def g(): - return 1 - - assert g.__code__.co_consts[0] == None - return 2 - - -assert f.__code__.co_consts[0] == None - - -def generator(): - yield 1 - yield 2 - - -assert generator().gi_code.co_consts[0] == None - - -async def async_f(): +async def async_with_doc(): + """Async docstring""" await sleep(1) return 1 -assert async_f.__code__.co_consts[0] == None +assert async_with_doc.__code__.co_consts[0] == "Async docstring", ( + async_with_doc.__code__.co_consts +) +assert async_with_doc.__doc__ == "Async docstring" +assert async_with_doc.__code__.co_flags & 0x4000000 + +# Test async function without docstring +async def async_no_doc(): + await sleep(1) + return 1 + + +assert not (async_no_doc.__code__.co_flags & 0x4000000) +assert async_no_doc.__doc__ is None + + +# Test generator with docstring +def gen_with_doc(): + """Generator docstring""" + yield 1 + yield 2 + + +assert gen_with_doc.__code__.co_consts[0] == "Generator docstring" +assert gen_with_doc.__doc__ == "Generator docstring" +assert gen_with_doc.__code__.co_flags & 0x4000000 + + +# Test generator without docstring +def gen_no_doc(): + yield 1 + yield 2 + + +assert not (gen_no_doc.__code__.co_flags & 0x4000000) +assert gen_no_doc.__doc__ is None + + +# Test lambda - cannot have docstring lambda_f = lambda: 0 -assert lambda_f.__code__.co_consts[0] == None +assert not (lambda_f.__code__.co_flags & 0x4000000) +assert lambda_f.__doc__ is None -class cls: - def f(): +# Test class method with docstring +class cls_with_doc: + def method(): + """Method docstring""" return 1 -assert cls().f.__code__.co_consts[0] == None +assert cls_with_doc.method.__code__.co_consts[0] == "Method docstring" +assert cls_with_doc.method.__doc__ == "Method docstring" + + +# Test class method without docstring +class cls_no_doc: + def method(): + return 1 + + +assert not (cls_no_doc.method.__code__.co_flags & 0x4000000) +assert cls_no_doc.method.__doc__ is None + +print("All co_consts tests passed!") diff --git a/extra_tests/snippets/example_interactive.py b/extra_tests/snippets/example_interactive.py index f9484f15d..5958dd117 100644 --- a/extra_tests/snippets/example_interactive.py +++ b/extra_tests/snippets/example_interactive.py @@ -4,7 +4,7 @@ code_class = type(c1) def f(x, y, *args, power=1, **kwargs): - print("Constant String", 2, None, (2, 4)) + print("Constant String", 256, None, (2, 4)) assert code_class == type(c1) z = x * y return z**power @@ -19,7 +19,7 @@ assert c2.co_argcount == 2 # assert isinstance(c2.co_code, bytes) assert "Constant String" in c2.co_consts, c2.co_consts print(c2.co_consts) -assert 2 in c2.co_consts, c2.co_consts +assert 256 in c2.co_consts, c2.co_consts assert "example_interactive.py" in c2.co_filename assert c2.co_firstlineno == 6, str(c2.co_firstlineno) # assert isinstance(c2.co_flags, int) # 'OPTIMIZED, NEWLOCALS, NOFREE'