From 7620c28482ece38a8e66eba018fd62bbb7db91c5 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Fri, 6 Mar 2026 00:59:53 +0900 Subject: [PATCH] Tighten specialization guards and add send_none fastpath (#7359) * vm: align specialization guards with CPython patterns * vm: tighten call specialization runtime guards * vm: add send_none fastpath for generator specialization * vm: restrict method-descriptor specialization to methods * vm: deopt call specializations on guard misses * vm: match CPython send/for-iter closed-frame guards * vm: restrict len/isinstance specialization to builtins * vm: use exact-type guards for call specializations * vm: align class-call specialization flow with CPython * vm: prefer FAST call opcodes for positional builtin calls * vm: add callable identity guard to CALL_LIST_APPEND * vm: make CALL_LIST_APPEND runtime guard pointer-based * vm: align call guard cache and fallback behavior with CPython * vm: use base vectorcall fallback for EXIT-style call misses * vm: simplify CALL_LEN/CALL_ISINSTANCE runtime guards * vm: infer call-convention flags for CPython-style CALL specialization * vm: check use_tracing in eval_frame_active, add SendGen send_none - Implement specialization_eval_frame_active to check vm.use_tracing so specializations are skipped when tracing/profiling is active - Add send_none fastpath in SendGen handler for the common None case --- crates/derive-impl/src/pyclass.rs | 24 +- crates/derive-impl/src/pymodule.rs | 9 +- crates/derive-impl/src/util.rs | 71 +++ crates/vm/src/coroutine.rs | 63 ++- crates/vm/src/frame.rs | 762 ++++++++++++++++++++--------- crates/vm/src/function/method.rs | 10 +- 6 files changed, 667 insertions(+), 272 deletions(-) diff --git a/crates/derive-impl/src/pyclass.rs b/crates/derive-impl/src/pyclass.rs index dfb02a3ed..a65320cdb 100644 --- a/crates/derive-impl/src/pyclass.rs +++ b/crates/derive-impl/src/pyclass.rs @@ -1,8 +1,8 @@ use super::Diagnostic; use crate::util::{ ALL_ALLOWED_NAMES, ClassItemMeta, ContentItem, ContentItemInner, ErrorVec, ExceptionItemMeta, - ItemMeta, ItemMetaInner, ItemNursery, SimpleItemMeta, format_doc, pyclass_ident_and_attrs, - pyexception_ident_and_attrs, text_signature, + ItemMeta, ItemMetaInner, ItemNursery, SimpleItemMeta, format_doc, infer_native_call_flags, + pyclass_ident_and_attrs, pyexception_ident_and_attrs, text_signature, }; use core::str::FromStr; use proc_macro2::{Delimiter, Group, Span, TokenStream, TokenTree}; @@ -1015,6 +1015,16 @@ where let raw = item_meta.raw()?; let sig_doc = text_signature(func.sig(), &py_name); + let has_receiver = func + .sig() + .inputs + .iter() + .any(|arg| matches!(arg, syn::FnArg::Receiver(_))); + let drop_first_typed = match self.inner.attr_name { + AttrName::Method | AttrName::ClassMethod if !has_receiver => 1, + _ => 0, + }; + let call_flags = infer_native_call_flags(func.sig(), drop_first_typed); // Add #[allow(non_snake_case)] for setter methods like set___name__ let method_name = ident.to_string(); @@ -1031,6 +1041,7 @@ where doc, raw, attr_name: self.inner.attr_name, + call_flags, }); Ok(()) } @@ -1248,6 +1259,7 @@ struct MethodNurseryItem { raw: bool, doc: Option, attr_name: AttrName, + call_flags: TokenStream, } impl MethodNursery { @@ -1278,7 +1290,7 @@ impl ToTokens for MethodNursery { } else { quote! { None } }; - let flags = match &item.attr_name { + let binding_flags = match &item.attr_name { AttrName::Method => { quote! { rustpython_vm::function::PyMethodFlags::METHOD } } @@ -1290,6 +1302,12 @@ impl ToTokens for MethodNursery { } _ => unreachable!(), }; + let call_flags = &item.call_flags; + let flags = quote! { + rustpython_vm::function::PyMethodFlags::from_bits_retain( + (#binding_flags).bits() | (#call_flags).bits() + ) + }; // TODO: intern // let py_name = if py_name.starts_with("__") && py_name.ends_with("__") { // let name_ident = Ident::new(&py_name, ident.span()); diff --git a/crates/derive-impl/src/pymodule.rs b/crates/derive-impl/src/pymodule.rs index 775e68585..b4b553520 100644 --- a/crates/derive-impl/src/pymodule.rs +++ b/crates/derive-impl/src/pymodule.rs @@ -2,8 +2,8 @@ use crate::error::Diagnostic; use crate::pystructseq::PyStructSequenceMeta; use crate::util::{ ALL_ALLOWED_NAMES, AttrItemMeta, AttributeExt, ClassItemMeta, ContentItem, ContentItemInner, - ErrorVec, ItemMeta, ItemNursery, ModuleItemMeta, SimpleItemMeta, format_doc, iter_use_idents, - pyclass_ident_and_attrs, text_signature, + ErrorVec, ItemMeta, ItemNursery, ModuleItemMeta, SimpleItemMeta, format_doc, + infer_native_call_flags, iter_use_idents, pyclass_ident_and_attrs, text_signature, }; use core::str::FromStr; use proc_macro2::{Delimiter, Group, TokenStream, TokenTree}; @@ -525,6 +525,7 @@ struct FunctionNurseryItem { cfgs: Vec, ident: Ident, doc: String, + call_flags: TokenStream, } impl FunctionNursery { @@ -550,7 +551,6 @@ struct ValidatedFunctionNursery(FunctionNursery); impl ToTokens for ValidatedFunctionNursery { fn to_tokens(&self, tokens: &mut TokenStream) { let mut inner_tokens = TokenStream::new(); - let flags = quote! { rustpython_vm::function::PyMethodFlags::empty() }; for item in &self.0.items { let ident = &item.ident; let cfgs = &item.cfgs; @@ -558,6 +558,7 @@ impl ToTokens for ValidatedFunctionNursery { let py_names = &item.py_names; let doc = &item.doc; let doc = quote!(Some(#doc)); + let flags = &item.call_flags; inner_tokens.extend(quote![ #( @@ -706,12 +707,14 @@ impl ModuleItem for FunctionItem { py_names } }; + let call_flags = infer_native_call_flags(func.sig(), 0); args.context.function_items.add_item(FunctionNurseryItem { ident: ident.to_owned(), py_names, cfgs: args.cfgs.to_vec(), doc, + call_flags, }); Ok(()) } diff --git a/crates/derive-impl/src/util.rs b/crates/derive-impl/src/util.rs index a4bf7e6a8..068bde9bc 100644 --- a/crates/derive-impl/src/util.rs +++ b/crates/derive-impl/src/util.rs @@ -732,6 +732,77 @@ pub(crate) fn text_signature(sig: &Signature, name: &str) -> String { } } +pub(crate) fn infer_native_call_flags(sig: &Signature, drop_first_typed: usize) -> TokenStream { + // Best-effort mapping of Rust function signatures to CPython-style + // METH_* calling convention flags used by CALL specialization. + let mut typed_args = Vec::new(); + for arg in &sig.inputs { + let syn::FnArg::Typed(typed) = arg else { + continue; + }; + let ty_tokens = &typed.ty; + let ty = quote!(#ty_tokens).to_string().replace(' ', ""); + // `vm: &VirtualMachine` is not a Python-level argument. + if ty.starts_with('&') && ty.ends_with("VirtualMachine") { + continue; + } + typed_args.push(ty); + } + + let mut user_args = typed_args.into_iter(); + for _ in 0..drop_first_typed { + if user_args.next().is_none() { + break; + } + } + + let mut has_keywords = false; + let mut variable_arity = false; + let mut fixed_positional = 0usize; + + for ty in user_args { + let is_named = |name: &str| { + ty == name + || ty.starts_with(&format!("{name}<")) + || ty.contains(&format!("::{name}<")) + || ty.ends_with(&format!("::{name}")) + }; + + if is_named("FuncArgs") { + has_keywords = true; + variable_arity = true; + continue; + } + if is_named("KwArgs") { + has_keywords = true; + variable_arity = true; + continue; + } + if is_named("PosArgs") || is_named("OptionalArg") || is_named("OptionalOption") { + variable_arity = true; + continue; + } + fixed_positional += 1; + } + + if has_keywords { + quote! { + rustpython_vm::function::PyMethodFlags::from_bits_retain( + rustpython_vm::function::PyMethodFlags::FASTCALL.bits() + | rustpython_vm::function::PyMethodFlags::KEYWORDS.bits() + ) + } + } else if variable_arity { + quote! { rustpython_vm::function::PyMethodFlags::FASTCALL } + } else { + match fixed_positional { + 0 => quote! { rustpython_vm::function::PyMethodFlags::NOARGS }, + 1 => quote! { rustpython_vm::function::PyMethodFlags::O }, + _ => quote! { rustpython_vm::function::PyMethodFlags::FASTCALL }, + } + } +} + fn func_sig(sig: &Signature) -> String { sig.inputs .iter() diff --git a/crates/vm/src/coroutine.rs b/crates/vm/src/coroutine.rs index ac7aeba54..c4c2df6c1 100644 --- a/crates/vm/src/coroutine.rs +++ b/crates/vm/src/coroutine.rs @@ -115,6 +115,48 @@ impl Coro { result } + fn finalize_send_result( + &self, + jen: &PyObject, + result: PyResult, + vm: &VirtualMachine, + ) -> PyResult { + self.maybe_close(&result); + match result { + Ok(exec_res) => Ok(exec_res.into_iter_return(vm)), + Err(e) => { + if e.fast_isinstance(vm.ctx.exceptions.stop_iteration) { + let err = + vm.new_runtime_error(format!("{} raised StopIteration", gen_name(jen, vm))); + err.set___cause__(Some(e)); + Err(err) + } else if jen.class().is(vm.ctx.types.async_generator) + && e.fast_isinstance(vm.ctx.exceptions.stop_async_iteration) + { + let err = vm.new_runtime_error("async generator raised StopAsyncIteration"); + err.set___cause__(Some(e)); + Err(err) + } else { + Err(e) + } + } + } + } + + pub(crate) fn send_none(&self, jen: &PyObject, vm: &VirtualMachine) -> PyResult { + if self.closed.load() { + return Ok(PyIterReturn::StopIteration(None)); + } + self.frame.locals_to_fast(vm)?; + let value = if self.frame.lasti() > 0 { + Some(vm.ctx.none()) + } else { + None + }; + let result = self.run_with_context(jen, vm, |f| f.resume(value, vm)); + self.finalize_send_result(jen, result, vm) + } + pub fn send( &self, jen: &PyObject, @@ -136,26 +178,7 @@ impl Coro { None }; let result = self.run_with_context(jen, vm, |f| f.resume(value, vm)); - self.maybe_close(&result); - match result { - Ok(exec_res) => Ok(exec_res.into_iter_return(vm)), - Err(e) => { - if e.fast_isinstance(vm.ctx.exceptions.stop_iteration) { - let err = - vm.new_runtime_error(format!("{} raised StopIteration", gen_name(jen, vm))); - err.set___cause__(Some(e)); - Err(err) - } else if jen.class().is(vm.ctx.types.async_generator) - && e.fast_isinstance(vm.ctx.exceptions.stop_async_iteration) - { - let err = vm.new_runtime_error("async generator raised StopAsyncIteration"); - err.set___cause__(Some(e)); - Err(err) - } else { - Err(e) - } - } - } + self.finalize_send_result(jen, result, vm) } pub fn throw( diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index bf725d8bc..47d583b57 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -23,7 +23,7 @@ use crate::{ convert::{ToPyObject, ToPyResult}, coroutine::Coro, exceptions::ExceptionCtor, - function::{ArgMapping, Either, FuncArgs}, + function::{ArgMapping, Either, FuncArgs, PyMethodFlags}, object::PyAtomicBorrow, object::{Traverse, TraverseFn}, protocol::{PyIter, PyIterReturn}, @@ -2030,7 +2030,7 @@ impl ExecutingFrame<'_> { Instruction::ForIter { .. } => { // Relative forward jump: target = lasti + caches + delta let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); - self.adaptive(|s, ii, cb| s.specialize_for_iter(vm, ii, cb)); + self.adaptive(|s, ii, cb| s.specialize_for_iter(vm, u32::from(arg), ii, cb)); self.execute_for_iter(vm, target)?; Ok(None) } @@ -3150,12 +3150,28 @@ impl ExecutingFrame<'_> { } Instruction::Send { .. } => { // (receiver, v -- receiver, retval) - self.adaptive(|s, ii, cb| s.specialize_send(ii, cb)); + self.adaptive(|s, ii, cb| s.specialize_send(vm, ii, cb)); let exit_label = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let receiver = self.nth_value(1); + let can_fast_send = !self.specialization_eval_frame_active(vm) + && (receiver.downcast_ref_if_exact::(vm).is_some() + || receiver.downcast_ref_if_exact::(vm).is_some()) + && self + .builtin_coro(receiver) + .is_some_and(|coro| !coro.running() && !coro.closed()); let val = self.pop_value(); let receiver = self.top_value(); - - match self._send(receiver, val, vm)? { + let ret = if can_fast_send { + let coro = self.builtin_coro(receiver).unwrap(); + if vm.is_none(&val) { + coro.send_none(receiver, vm)? + } else { + coro.send(receiver, val, vm)? + } + } else { + self._send(receiver, val, vm)? + }; + match ret { PyIterReturn::Return(value) => { self.push_value(value); Ok(None) @@ -3176,13 +3192,23 @@ impl ExecutingFrame<'_> { let exit_label = bytecode::Label(self.lasti() + 1 + u32::from(arg)); // Stack: [receiver, val] — peek receiver before popping let receiver = self.nth_value(1); - let is_coro = self.builtin_coro(receiver).is_some(); + let can_fast_send = !self.specialization_eval_frame_active(vm) + && (receiver.downcast_ref_if_exact::(vm).is_some() + || receiver.downcast_ref_if_exact::(vm).is_some()) + && self + .builtin_coro(receiver) + .is_some_and(|coro| !coro.running() && !coro.closed()); let val = self.pop_value(); - let receiver = self.top_value(); - if is_coro { + if can_fast_send { + let receiver = self.top_value(); let coro = self.builtin_coro(receiver).unwrap(); - match coro.send(receiver, val, vm)? { + let ret = if vm.is_none(&val) { + coro.send_none(receiver, vm)? + } else { + coro.send(receiver, val, vm)? + }; + match ret { PyIterReturn::Return(value) => { self.push_value(value); return Ok(None); @@ -3199,6 +3225,10 @@ impl ExecutingFrame<'_> { } } } + self.deoptimize(Instruction::Send { + delta: Arg::marker(), + }); + let receiver = self.top_value(); match self._send(receiver, val, vm)? { PyIterReturn::Return(value) => { self.push_value(value); @@ -3998,14 +4028,14 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); // Stack: [callable, self_or_null, arg1, ..., argN] let callable = self.nth_value(nargs + 1); - if let Some(func) = callable.downcast_ref::() + if let Some(func) = callable.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); - let func = callable.downcast_ref::().unwrap(); + let func = callable.downcast_ref_if_exact::(vm).unwrap(); let args = if let Some(self_val) = self_or_null { let mut all_args = Vec::with_capacity(pos_args.len() + 1); all_args.push(self_val); @@ -4018,6 +4048,9 @@ impl ExecutingFrame<'_> { self.push_value(result); Ok(None) } else { + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -4035,11 +4068,11 @@ impl ExecutingFrame<'_> { .is_some(); let callable = self.nth_value(nargs + 1); if !self_or_null_is_some - && let Some(bound_method) = callable.downcast_ref::() + && let Some(bound_method) = callable.downcast_ref_if_exact::(vm) { let bound_function = bound_method.function_obj().clone(); let bound_self = bound_method.self_obj().clone(); - if let Some(func) = bound_function.downcast_ref::() + if let Some(func) = bound_function.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { @@ -4054,28 +4087,25 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) - } else { - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) } + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } Instruction::CallLen => { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let cached_ptr = self.code.instructions.read_cache_ptr(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, null, arg] let obj = self.pop_value(); // arg let null = self.pop_value_opt(); let callable = self.pop_value(); - let callable_tag = &*callable as *const PyObject as u32; - let is_len_callable = callable - .downcast_ref_if_exact::(vm) - .is_some_and(|native| native.zelf.is_none() && native.value.name == "len"); - if null.is_none() && cached_tag == callable_tag && is_len_callable { + let callable_ptr = &*callable as *const PyObject as usize; + if null.is_none() && cached_ptr == callable_ptr { let len = obj.length(vm)?; self.push_value(vm.ctx.new_int(len).into()); return Ok(None); @@ -4094,7 +4124,7 @@ impl ExecutingFrame<'_> { Instruction::CallIsinstance => { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let cached_ptr = self.code.instructions.read_cache_ptr(cache_base + 1); let nargs: u32 = arg.into(); let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self @@ -4104,13 +4134,8 @@ impl ExecutingFrame<'_> { let effective_nargs = nargs + u32::from(self_or_null_is_some); if effective_nargs == 2 { let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; - let is_isinstance_callable = callable - .downcast_ref_if_exact::(vm) - .is_some_and(|native| { - native.zelf.is_none() && native.value.name == "isinstance" - }); - if cached_tag == callable_tag && is_isinstance_callable { + let callable_ptr = callable as *const PyObject as usize; + if cached_ptr == callable_ptr { let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); let self_or_null = self.pop_value_opt(); @@ -4210,30 +4235,30 @@ impl ExecutingFrame<'_> { .is_some(); let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); - if callable - .downcast_ref_if_exact::(vm) - .is_some() - && effective_nargs == 1 - { - let nargs_usize = nargs as usize; - let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); - let self_or_null = self.pop_value_opt(); - let callable = self.pop_value(); - let mut args_vec = Vec::with_capacity(effective_nargs as usize); - if let Some(self_val) = self_or_null { - args_vec.push(self_val); + if let Some(native) = callable.downcast_ref_if_exact::(vm) { + let call_conv = native.value.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS); + if call_conv == PyMethodFlags::O && effective_nargs == 1 { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let mut args_vec = Vec::with_capacity(effective_nargs as usize); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(pos_args); + let result = + callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; + self.push_value(result); + return Ok(None); } - args_vec.extend(pos_args); - let result = - callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; - self.push_value(result); - return Ok(None); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallBuiltinFast => { let nargs: u32 = arg.into(); @@ -4244,29 +4269,33 @@ impl ExecutingFrame<'_> { .is_some(); let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); - if callable - .downcast_ref_if_exact::(vm) - .is_some() - { - let nargs_usize = nargs as usize; - let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); - let self_or_null = self.pop_value_opt(); - let callable = self.pop_value(); - let mut args_vec = Vec::with_capacity(effective_nargs as usize); - if let Some(self_val) = self_or_null { - args_vec.push(self_val); + if let Some(native) = callable.downcast_ref_if_exact::(vm) { + let call_conv = native.value.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS); + if call_conv == PyMethodFlags::FASTCALL { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let mut args_vec = Vec::with_capacity(effective_nargs as usize); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(pos_args); + let result = + callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; + self.push_value(result); + return Ok(None); } - args_vec.extend(pos_args); - let result = - callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; - self.push_value(result); - return Ok(None); } self.deoptimize(Instruction::Call { argc: Arg::marker(), }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallPyGeneral => { let instr_idx = self.lasti() as usize - 1; @@ -4274,7 +4303,7 @@ impl ExecutingFrame<'_> { let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - if let Some(func) = callable.downcast_ref::() + if let Some(func) = callable.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { @@ -4314,11 +4343,11 @@ impl ExecutingFrame<'_> { .is_some(); let callable = self.nth_value(nargs + 1); if !self_or_null_is_some - && let Some(bound_method) = callable.downcast_ref::() + && let Some(bound_method) = callable.downcast_ref_if_exact::(vm) { let bound_function = bound_method.function_obj().clone(); let bound_self = bound_method.self_obj().clone(); - if let Some(func) = bound_function.downcast_ref::() + if let Some(func) = bound_function.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { @@ -4339,41 +4368,31 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) - } else { - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) } + self.execute_call_vectorcall(nargs, vm) } Instruction::CallListAppend => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_ptr = self.code.instructions.read_cache_ptr(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, self_or_null, item] let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); let callable = self.nth_value(2); - let self_is_exact_list = self + let callable_ptr = callable as *const PyObject as usize; + let self_is_list = self .localsplus .stack_index(stack_len - 2) .as_ref() - .is_some_and(|obj| obj.class().is(vm.ctx.types.list_type)); - let is_list_append = - callable - .downcast_ref::() - .is_some_and(|descr| { - descr.method.name == "append" - && descr.objclass.is(vm.ctx.types.list_type) - }); - if is_list_append && self_or_null_is_some && self_is_exact_list { + .is_some_and(|obj| obj.downcast_ref::().is_some()); + if cached_ptr == callable_ptr && self_or_null_is_some && self_is_list { let item = self.pop_value(); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); if let Some(list_obj) = self_or_null.as_ref() - && let Some(list) = list_obj.downcast_ref_if_exact::(vm) + && let Some(list) = list_obj.downcast_ref::() { list.append(item); // CALL_LIST_APPEND fuses the following POP_TOP. @@ -4401,14 +4420,27 @@ impl ExecutingFrame<'_> { let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self.localsplus.stack_index(stack_len - 1).is_some(); let callable = self.nth_value(1); - let func = if self_or_null_is_some { - callable - .downcast_ref::() - .map(|d| d.method.func) + let descr = if self_or_null_is_some { + callable.downcast_ref_if_exact::(vm) } else { None }; - if let Some(func) = func { + if let Some(descr) = descr + && descr.method.flags.contains(PyMethodFlags::METHOD) + && (descr.method.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS)) + == PyMethodFlags::NOARGS + && self + .localsplus + .stack_index(stack_len - 1) + .as_ref() + .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) + { + let func = descr.method.func; let self_val = self.pop_value_opt().unwrap(); self.pop_value(); // callable let args = FuncArgs { @@ -4420,9 +4452,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorO => { @@ -4432,14 +4461,27 @@ impl ExecutingFrame<'_> { let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); let callable = self.nth_value(2); - let func = if self_or_null_is_some { - callable - .downcast_ref::() - .map(|d| d.method.func) + let descr = if self_or_null_is_some { + callable.downcast_ref_if_exact::(vm) } else { None }; - if let Some(func) = func { + if let Some(descr) = descr + && descr.method.flags.contains(PyMethodFlags::METHOD) + && (descr.method.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS)) + == PyMethodFlags::O + && self + .localsplus + .stack_index(stack_len - 2) + .as_ref() + .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) + { + let func = descr.method.func; let obj = self.pop_value(); let self_val = self.pop_value_opt().unwrap(); self.pop_value(); // callable @@ -4452,9 +4494,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorFast => { @@ -4465,14 +4504,27 @@ impl ExecutingFrame<'_> { .localsplus .stack_index(stack_len - nargs as usize - 1) .is_some(); - let func = if self_or_null_is_some { - callable - .downcast_ref::() - .map(|d| d.method.func) + let descr = if self_or_null_is_some { + callable.downcast_ref_if_exact::(vm) } else { None }; - if let Some(func) = func { + if let Some(descr) = descr + && descr.method.flags.contains(PyMethodFlags::METHOD) + && (descr.method.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS)) + == PyMethodFlags::FASTCALL + && self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .as_ref() + .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) + { + let func = descr.method.func; let positional_args: Vec = self.pop_multiple(nargs as usize).collect(); let self_val = self.pop_value_opt().unwrap(); @@ -4488,9 +4540,6 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallBuiltinClass => { @@ -4518,6 +4567,9 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallAllocAndEnterInit => { @@ -4538,7 +4590,7 @@ impl ExecutingFrame<'_> { { // Look up __init__ (guarded by type_version) if let Some(init) = cls.get_attr(identifier!(vm, __init__)) - && let Some(init_func) = init.downcast_ref::() + && let Some(init_func) = init.downcast_ref_if_exact::(vm) && init_func.can_specialize_call(nargs + 1) { // Allocate object directly (tp_new == object.__new__) @@ -4592,14 +4644,27 @@ impl ExecutingFrame<'_> { .localsplus .stack_index(stack_len - nargs as usize - 1) .is_some(); - let func = if self_or_null_is_some { - callable - .downcast_ref::() - .map(|d| d.method.func) + let descr = if self_or_null_is_some { + callable.downcast_ref_if_exact::(vm) } else { None }; - if let Some(func) = func { + if let Some(descr) = descr + && descr.method.flags.contains(PyMethodFlags::METHOD) + && (descr.method.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS)) + == (PyMethodFlags::FASTCALL | PyMethodFlags::KEYWORDS) + && self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .as_ref() + .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) + { + let func = descr.method.func; let positional_args: Vec = self.pop_multiple(nargs as usize).collect(); let self_val = self.pop_value_opt().unwrap(); @@ -4615,9 +4680,6 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallBuiltinFastWithKeywords => { @@ -4630,23 +4692,28 @@ impl ExecutingFrame<'_> { .is_some(); let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); - if callable - .downcast_ref_if_exact::(vm) - .is_some() - { - let nargs_usize = nargs as usize; - let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); - let self_or_null = self.pop_value_opt(); - let callable = self.pop_value(); - let mut args_vec = Vec::with_capacity(effective_nargs as usize); - if let Some(self_val) = self_or_null { - args_vec.push(self_val); + if let Some(native) = callable.downcast_ref_if_exact::(vm) { + let call_conv = native.value.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS); + if call_conv == (PyMethodFlags::FASTCALL | PyMethodFlags::KEYWORDS) { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let mut args_vec = Vec::with_capacity(effective_nargs as usize); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(pos_args); + let result = + callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; + self.push_value(result); + return Ok(None); } - args_vec.extend(pos_args); - let result = - callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; - self.push_value(result); - return Ok(None); } self.deoptimize(Instruction::Call { argc: Arg::marker(), @@ -4661,11 +4728,12 @@ impl ExecutingFrame<'_> { .stack_index(stack_len - nargs as usize - 1) .is_some(); let callable = self.nth_value(nargs + 1); - if callable.downcast_ref::().is_some() - || callable.downcast_ref::().is_some() + if callable.downcast_ref_if_exact::(vm).is_some() + || callable + .downcast_ref_if_exact::(vm) + .is_some() { - let args = self.collect_positional_args(nargs); - return self.execute_call(args, vm); + return self.execute_call_vectorcall(nargs, vm); } let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); @@ -4693,7 +4761,7 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] let callable = self.nth_value(nargs + 2); - if let Some(func) = callable.downcast_ref::() + if let Some(func) = callable.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { @@ -4745,11 +4813,11 @@ impl ExecutingFrame<'_> { .is_some(); let callable = self.nth_value(nargs + 2); if !self_or_null_is_some - && let Some(bound_method) = callable.downcast_ref::() + && let Some(bound_method) = callable.downcast_ref_if_exact::(vm) { let bound_function = bound_method.function_obj().clone(); let bound_self = bound_method.self_obj().clone(); - if let Some(func) = bound_function.downcast_ref::() + if let Some(func) = bound_function.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { @@ -4792,11 +4860,12 @@ impl ExecutingFrame<'_> { .stack_index(stack_len - nargs as usize - 2) .is_some(); let callable = self.nth_value(nargs + 2); - if callable.downcast_ref::().is_some() - || callable.downcast_ref::().is_some() + if callable.downcast_ref_if_exact::(vm).is_some() + || callable + .downcast_ref_if_exact::(vm) + .is_some() { - let args = self.collect_keyword_args(nargs); - return self.execute_call(args, vm); + return self.execute_call_kw_vectorcall(nargs, vm); } let nargs_usize = nargs as usize; let kwarg_names_obj = self.pop_value(); @@ -5242,6 +5311,9 @@ impl ExecutingFrame<'_> { } Ok(None) } else { + self.deoptimize(Instruction::ForIter { + delta: Arg::marker(), + }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -5257,6 +5329,9 @@ impl ExecutingFrame<'_> { } Ok(None) } else { + self.deoptimize(Instruction::ForIter { + delta: Arg::marker(), + }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -5272,6 +5347,9 @@ impl ExecutingFrame<'_> { } Ok(None) } else { + self.deoptimize(Instruction::ForIter { + delta: Arg::marker(), + }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -5279,8 +5357,22 @@ impl ExecutingFrame<'_> { Instruction::ForIterGen => { let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); let iter = self.top_value(); + if self.specialization_eval_frame_active(vm) { + self.deoptimize(Instruction::ForIter { + delta: Arg::marker(), + }); + self.execute_for_iter(vm, target)?; + return Ok(None); + } if let Some(generator) = iter.downcast_ref_if_exact::(vm) { - match generator.as_coro().send(iter, vm.ctx.none(), vm) { + if generator.as_coro().running() || generator.as_coro().closed() { + self.deoptimize(Instruction::ForIter { + delta: Arg::marker(), + }); + self.execute_for_iter(vm, target)?; + return Ok(None); + } + match generator.as_coro().send_none(iter, vm) { Ok(PyIterReturn::Return(value)) => { self.push_value(value); } @@ -5295,6 +5387,9 @@ impl ExecutingFrame<'_> { } Ok(None) } else { + self.deoptimize(Instruction::ForIter { + delta: Arg::marker(), + }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -6334,7 +6429,7 @@ impl ExecutingFrame<'_> { args }; - let is_python_call = callable.downcast_ref::().is_some(); + let is_python_call = callable.downcast_ref_if_exact::(vm).is_some(); // Fire CALL event let call_arg0 = if self.monitoring_mask & monitoring::EVENT_CALL != 0 { @@ -6637,7 +6732,7 @@ impl ExecutingFrame<'_> { let func = self.top_value(); // Get the function reference and call the new method let func_ref = func - .downcast_ref::() + .downcast_ref_if_exact::(vm) .expect("SET_FUNCTION_ATTRIBUTE expects function on stack"); let payload: &PyFunction = func_ref.payload(); @@ -7615,7 +7710,18 @@ impl ExecutingFrame<'_> { .is_some(); let callable = self.nth_value(nargs + 1); - if let Some(func) = callable.downcast_ref::() { + if let Some(func) = callable.downcast_ref_if_exact::(vm) { + if self.specialization_eval_frame_active(vm) { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } let version = func.get_version_for_current_state(); if version == 0 { unsafe { @@ -7651,11 +7757,50 @@ impl ExecutingFrame<'_> { // Bound Python method object (`method`) specialization. if !self_or_null_is_some - && let Some(bound_method) = callable.downcast_ref::() - && let Some(func) = bound_method.function_obj().downcast_ref::() + && let Some(bound_method) = callable.downcast_ref_if_exact::(vm) { - let version = func.get_version_for_current_state(); - if version == 0 { + if let Some(func) = bound_method + .function_obj() + .downcast_ref_if_exact::(vm) + { + if self.specialization_eval_frame_active(vm) { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + let version = func.get_version_for_current_state(); + if version == 0 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + + let new_op = if func.can_specialize_call(nargs + 1) { + Instruction::CallBoundMethodExactArgs + } else { + Instruction::CallBoundMethodGeneral + }; + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, version); + } + self.specialize_at(instr_idx, cache_base, new_op); + } else { + // Match CPython: bound methods wrapping non-Python callables + // are not specialized as CALL_NON_PY_GENERAL. unsafe { self.code.instructions.write_adaptive_counter( cache_base, @@ -7664,25 +7809,15 @@ impl ExecutingFrame<'_> { ), ); } - return; } - - let new_op = if func.can_specialize_call(nargs + 1) { - Instruction::CallBoundMethodExactArgs - } else { - Instruction::CallBoundMethodGeneral - }; - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, version); - } - self.specialize_at(instr_idx, cache_base, new_op); return; } // Try to specialize method descriptor calls - if self_or_null_is_some && let Some(descr) = callable.downcast_ref::() { + if self_or_null_is_some + && let Some(descr) = callable.downcast_ref_if_exact::(vm) + && descr.method.flags.contains(PyMethodFlags::METHOD) + { let call_cache_entries = Instruction::CallListAppend.cache_entries(); let next_idx = cache_base + call_cache_entries; let next_is_pop_top = if next_idx < self.code.instructions.len() { @@ -7692,18 +7827,58 @@ impl ExecutingFrame<'_> { false }; - let new_op = if nargs == 1 - && descr.method.name == "append" - && descr.objclass.is(vm.ctx.types.list_type) - && next_is_pop_top - { - Instruction::CallListAppend - } else { - match nargs { - 0 => Instruction::CallMethodDescriptorNoargs, - 1 => Instruction::CallMethodDescriptorO, - _ => Instruction::CallMethodDescriptorFast, + let call_conv = descr.method.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS); + + let new_op = if call_conv == PyMethodFlags::NOARGS { + if nargs != 0 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; } + Instruction::CallMethodDescriptorNoargs + } else if call_conv == PyMethodFlags::O { + if nargs != 1 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + if descr.method.name == "append" + && descr.objclass.is(vm.ctx.types.list_type) + && next_is_pop_top + { + let callable_ptr = callable as *const PyObject as usize; + unsafe { + self.code + .instructions + .write_cache_ptr(cache_base + 1, callable_ptr); + } + Instruction::CallListAppend + } else { + Instruction::CallMethodDescriptorO + } + } else if call_conv == PyMethodFlags::FASTCALL { + Instruction::CallMethodDescriptorFast + } else if call_conv == (PyMethodFlags::FASTCALL | PyMethodFlags::KEYWORDS) { + Instruction::CallMethodDescriptorFastWithKeywords + } else { + Instruction::CallNonPyGeneral }; self.specialize_at(instr_idx, cache_base, new_op); return; @@ -7712,28 +7887,54 @@ impl ExecutingFrame<'_> { // Try to specialize builtin calls if let Some(native) = callable.downcast_ref_if_exact::(vm) { let effective_nargs = nargs + u32::from(self_or_null_is_some); - let callable_tag = callable as *const PyObject as u32; - let new_op = if native.zelf.is_none() - && native.value.name == "len" - && nargs == 1 - && effective_nargs == 1 - { - Instruction::CallLen - } else if native.zelf.is_none() - && native.value.name == "isinstance" - && effective_nargs == 2 - { - Instruction::CallIsinstance - } else if effective_nargs == 1 { - Instruction::CallBuiltinO + let callable_ptr = callable as *const PyObject as usize; + let call_conv = native.value.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS); + let new_op = if call_conv == PyMethodFlags::O { + if effective_nargs != 1 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + if native.zelf.is_none() + && native.value.name == "len" + && native.module.is_some_and(|m| m.as_str() == "builtins") + && nargs == 1 + { + Instruction::CallLen + } else { + Instruction::CallBuiltinO + } + } else if call_conv == PyMethodFlags::FASTCALL { + if native.zelf.is_none() + && native.value.name == "isinstance" + && native.module.is_some_and(|m| m.as_str() == "builtins") + && effective_nargs == 2 + { + Instruction::CallIsinstance + } else { + Instruction::CallBuiltinFast + } + } else if call_conv == (PyMethodFlags::FASTCALL | PyMethodFlags::KEYWORDS) { + Instruction::CallBuiltinFastWithKeywords } else { - Instruction::CallBuiltinFast + Instruction::CallNonPyGeneral }; if matches!(new_op, Instruction::CallLen | Instruction::CallIsinstance) { unsafe { self.code .instructions - .write_cache_u32(cache_base + 1, callable_tag); + .write_cache_ptr(cache_base + 1, callable_ptr); } } self.specialize_at(instr_idx, cache_base, new_op); @@ -7741,30 +7942,38 @@ impl ExecutingFrame<'_> { } // type/str/tuple(x) and class-call specializations - if callable.class().is(vm.ctx.types.type_type) - && let Some(cls) = callable.downcast_ref::() - { - if !self_or_null_is_some && nargs == 1 { - let new_op = if callable.is(&vm.ctx.types.type_type.as_object()) { - Some(Instruction::CallType1) - } else if callable.is(&vm.ctx.types.str_type.as_object()) { - Some(Instruction::CallStr1) - } else if callable.is(&vm.ctx.types.tuple_type.as_object()) { - Some(Instruction::CallTuple1) - } else { - None - }; - if let Some(new_op) = new_op { - self.specialize_at(instr_idx, cache_base, new_op); + if let Some(cls) = callable.downcast_ref::() { + if cls.slots.flags.has_feature(PyTypeFlags::IMMUTABLETYPE) { + if !self_or_null_is_some && nargs == 1 { + let new_op = if callable.is(&vm.ctx.types.type_type.as_object()) { + Some(Instruction::CallType1) + } else if callable.is(&vm.ctx.types.str_type.as_object()) { + Some(Instruction::CallStr1) + } else if callable.is(&vm.ctx.types.tuple_type.as_object()) { + Some(Instruction::CallTuple1) + } else { + None + }; + if let Some(new_op) = new_op { + self.specialize_at(instr_idx, cache_base, new_op); + return; + } + } + if cls.slots.vectorcall.load().is_some() { + self.specialize_at(instr_idx, cache_base, Instruction::CallBuiltinClass); return; } - } - if cls.slots.flags.has_feature(PyTypeFlags::IMMUTABLETYPE) - && cls.slots.vectorcall.load().is_some() - { - self.specialize_at(instr_idx, cache_base, Instruction::CallBuiltinClass); + self.specialize_at(instr_idx, cache_base, Instruction::CallNonPyGeneral); return; } + + // CPython only considers CALL_ALLOC_AND_ENTER_INIT for types whose + // metaclass is exactly `type`. + if !callable.class().is(vm.ctx.types.type_type) { + self.specialize_at(instr_idx, cache_base, Instruction::CallNonPyGeneral); + return; + } + // CallAllocAndEnterInit: heap type with default __new__ if !self_or_null_is_some && cls.slots.flags.has_feature(PyTypeFlags::HEAPTYPE) { let object_new = vm.ctx.types.object_type.slots.new.load(); @@ -7772,7 +7981,7 @@ impl ExecutingFrame<'_> { if let (Some(cls_new_fn), Some(obj_new_fn)) = (cls_new, object_new) && cls_new_fn as usize == obj_new_fn as usize && let Some(init) = cls.get_attr(identifier!(vm, __init__)) - && let Some(init_func) = init.downcast_ref::() + && let Some(init_func) = init.downcast_ref_if_exact::(vm) && init_func.can_specialize_call(nargs + 1) { let version = cls.tp_version_tag.load(Acquire); @@ -7801,7 +8010,7 @@ impl ExecutingFrame<'_> { fn specialize_call_kw( &mut self, - _vm: &VirtualMachine, + vm: &VirtualMachine, nargs: u32, instr_idx: usize, cache_base: usize, @@ -7821,7 +8030,18 @@ impl ExecutingFrame<'_> { .is_some(); let callable = self.nth_value(nargs + 2); - if let Some(func) = callable.downcast_ref::() { + if let Some(func) = callable.downcast_ref_if_exact::(vm) { + if self.specialization_eval_frame_active(vm) { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } let version = func.get_version_for_current_state(); if version == 0 { unsafe { @@ -7845,11 +8065,44 @@ impl ExecutingFrame<'_> { } if !self_or_null_is_some - && let Some(bound_method) = callable.downcast_ref::() - && let Some(func) = bound_method.function_obj().downcast_ref::() + && let Some(bound_method) = callable.downcast_ref_if_exact::(vm) { - let version = func.get_version_for_current_state(); - if version == 0 { + if let Some(func) = bound_method + .function_obj() + .downcast_ref_if_exact::(vm) + { + if self.specialization_eval_frame_active(vm) { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + let version = func.get_version_for_current_state(); + if version == 0 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, version); + } + self.specialize_at(instr_idx, cache_base, Instruction::CallKwBoundMethod); + } else { + // Match CPython: bound methods wrapping non-Python callables + // are not specialized as CALL_KW_NON_PY. unsafe { self.code.instructions.write_adaptive_counter( cache_base, @@ -7858,14 +8111,7 @@ impl ExecutingFrame<'_> { ), ); } - return; } - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, version); - } - self.specialize_at(instr_idx, cache_base, Instruction::CallKwBoundMethod); return; } @@ -7873,7 +8119,7 @@ impl ExecutingFrame<'_> { self.specialize_at(instr_idx, cache_base, Instruction::CallKwNonPy); } - fn specialize_send(&mut self, instr_idx: usize, cache_base: usize) { + fn specialize_send(&mut self, vm: &VirtualMachine, instr_idx: usize, cache_base: usize) { if !matches!( self.code.instructions.read_op(instr_idx), Instruction::Send { .. } @@ -7882,7 +8128,9 @@ impl ExecutingFrame<'_> { } // Stack: [receiver, val] — receiver is at position 1 let receiver = self.nth_value(1); - if self.builtin_coro(receiver).is_some() { + let is_exact_gen_or_coro = receiver.downcast_ref_if_exact::(vm).is_some() + || receiver.downcast_ref_if_exact::(vm).is_some(); + if is_exact_gen_or_coro && !self.specialization_eval_frame_active(vm) { self.specialize_at(instr_idx, cache_base, Instruction::SendGen); } else { unsafe { @@ -8032,7 +8280,13 @@ impl ExecutingFrame<'_> { self.commit_specialization(instr_idx, cache_base, new_op); } - fn specialize_for_iter(&mut self, vm: &VirtualMachine, instr_idx: usize, cache_base: usize) { + fn specialize_for_iter( + &mut self, + vm: &VirtualMachine, + jump_delta: u32, + instr_idx: usize, + cache_base: usize, + ) { if !matches!( self.code.instructions.read_op(instr_idx), Instruction::ForIter { .. } @@ -8047,7 +8301,11 @@ impl ExecutingFrame<'_> { Some(Instruction::ForIterList) } else if iter.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::ForIterTuple) - } else if iter.downcast_ref_if_exact::(vm).is_some() { + } else if iter.downcast_ref_if_exact::(vm).is_some() + && jump_delta <= i16::MAX as u32 + && self.for_iter_has_end_for_shape(instr_idx, jump_delta) + && !self.specialization_eval_frame_active(vm) + { Some(Instruction::ForIterGen) } else { None @@ -8056,6 +8314,28 @@ impl ExecutingFrame<'_> { self.commit_specialization(instr_idx, cache_base, new_op); } + #[inline] + fn specialization_eval_frame_active(&self, vm: &VirtualMachine) -> bool { + vm.use_tracing.get() + } + + #[inline] + fn for_iter_has_end_for_shape(&self, instr_idx: usize, jump_delta: u32) -> bool { + let target_idx = instr_idx + + 1 + + Instruction::ForIter { + delta: Arg::marker(), + } + .cache_entries() + + jump_delta as usize; + self.code.instructions.get(target_idx).is_some_and(|unit| { + matches!( + unit.op, + Instruction::EndFor | Instruction::InstrumentedEndFor + ) + }) + } + /// Handle iterator exhaustion in specialized FOR_ITER handlers. /// Skips END_FOR if present at target and jumps. fn for_iter_jump_on_exhausted(&mut self, target: bytecode::Label) { diff --git a/crates/vm/src/function/method.rs b/crates/vm/src/function/method.rs index 52624cbbf..211f7e3ad 100644 --- a/crates/vm/src/function/method.rs +++ b/crates/vm/src/function/method.rs @@ -12,11 +12,11 @@ bitflags::bitflags! { // METH_XXX flags in CPython #[derive(Copy, Clone, Debug, PartialEq)] pub struct PyMethodFlags: u32 { - // const VARARGS = 0x0001; - // const KEYWORDS = 0x0002; + const VARARGS = 0x0001; + const KEYWORDS = 0x0002; // METH_NOARGS and METH_O must not be combined with the flags above. - // const NOARGS = 0x0004; - // const O = 0x0008; + const NOARGS = 0x0004; + const O = 0x0008; // METH_CLASS and METH_STATIC are a little different; these control // the construction of methods for a class. These cannot be used for @@ -31,7 +31,7 @@ bitflags::bitflags! { // const COEXIST = 0x0040; // if not Py_LIMITED_API - // const FASTCALL = 0x0080; + const FASTCALL = 0x0080; // This bit is preserved for Stackless Python // const STACKLESS = 0x0100;