From c6b6842e71f183c6d47def8e3b894e41685317b3 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 16:01:55 +0900 Subject: [PATCH 01/31] fix jit --- crates/jit/src/instructions.rs | 13 +++++++++++-- crates/vm/src/builtins/function.rs | 9 ++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/crates/jit/src/instructions.rs b/crates/jit/src/instructions.rs index 5bf4057b340..9d8be5bc6e3 100644 --- a/crates/jit/src/instructions.rs +++ b/crates/jit/src/instructions.rs @@ -210,9 +210,18 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> { func_ref: FuncRef, bytecode: &CodeObject, ) -> Result<(), JitCompileError> { + // JIT should consume a stable instruction stream: de-specialized opcodes + // with zeroed CACHE entries, not runtime-mutated quickened code. + let clean_instructions: bytecode::CodeUnits = bytecode + .instructions + .original_bytes() + .as_slice() + .try_into() + .map_err(|_| JitCompileError::BadBytecode)?; + let mut label_targets = BTreeSet::new(); let mut target_arg_state = OpArgState::default(); - for (offset, &raw_instr) in bytecode.instructions.iter().enumerate() { + for (offset, &raw_instr) in clean_instructions.iter().enumerate() { let (instruction, arg) = target_arg_state.get(raw_instr); if let Some(target) = Self::instruction_target(offset as u32, instruction, arg)? { label_targets.insert(target); @@ -223,7 +232,7 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> { // Track whether we have "returned" in the current block let mut in_unreachable_code = false; - for (offset, &raw_instr) in bytecode.instructions.iter().enumerate() { + for (offset, &raw_instr) in clean_instructions.iter().enumerate() { let label = Label(offset as u32); let (instruction, arg) = arg_state.get(raw_instr); diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index 1316dd7b725..7befb2ab418 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -727,10 +727,12 @@ impl PyFunction { #[pygetset(setter)] fn set___code__(&self, code: PyRef, vm: &VirtualMachine) { + #[cfg(feature = "jit")] + let mut jit_guard = self.jitted_code.lock(); self.code.swap_to_temporary_refs(code, vm); #[cfg(feature = "jit")] { - *self.jitted_code.lock() = None; + *jit_guard = None; } self.func_version.store(0, Relaxed); } @@ -968,7 +970,8 @@ impl PyFunction { #[cfg(feature = "jit")] #[pymethod] fn __jit__(zelf: PyRef, vm: &VirtualMachine) -> PyResult<()> { - if zelf.jitted_code.lock().is_some() { + let mut jit_guard = zelf.jitted_code.lock(); + if jit_guard.is_some() { return Ok(()); } let arg_types = jit::get_jit_arg_types(&zelf, vm)?; @@ -976,7 +979,7 @@ impl PyFunction { let code: &Py = &zelf.code; let compiled = rustpython_jit::compile(&code.code, &arg_types, ret_type) .map_err(|err| jit::new_jit_error(err.to_string(), vm))?; - *zelf.jitted_code.lock() = Some(compiled); + *jit_guard = Some(compiled); Ok(()) } } From b379239da4a1986957513726e9f403c33bdd5a49 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 16:13:48 +0900 Subject: [PATCH 02/31] vm: complete specialized opcode dispatch paths --- crates/vm/src/frame.rs | 237 +++++++++++++++++++++++-- crates/vm/src/stdlib/sys/monitoring.rs | 2 +- 2 files changed, 226 insertions(+), 13 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 29626d104da..7a6c5412490 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -2168,7 +2168,9 @@ impl ExecutingFrame<'_> { self.jump_relative_forward(u32::from(arg), 0); Ok(None) } - Instruction::JumpBackward { .. } => { + Instruction::JumpBackward { .. } + | Instruction::JumpBackwardJit + | Instruction::JumpBackwardNoJit => { self.jump_relative_backward(u32::from(arg), 1); Ok(None) } @@ -2303,6 +2305,18 @@ impl ExecutingFrame<'_> { } Instruction::LoadConst { consti: idx } => { self.push_value(self.code.constants[idx.get(arg) as usize].clone().into()); + // Mirror CPython's LOAD_CONST family transition. RustPython does + // not currently distinguish immortal constants at runtime. + let instr_idx = self.lasti() as usize - 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadConstMortal); + } + Ok(None) + } + Instruction::LoadConstMortal | Instruction::LoadConstImmortal => { + self.push_value(self.code.constants[u32::from(arg) as usize].clone().into()); Ok(None) } Instruction::LoadCommonConstant { idx } => { @@ -2805,7 +2819,7 @@ impl ExecutingFrame<'_> { Ok(None) } Instruction::RaiseVarargs { argc: kind } => self.execute_raise(vm, kind.get(arg)), - Instruction::Resume { .. } => { + Instruction::Resume { .. } | Instruction::ResumeCheck => { // Lazy quickening: initialize adaptive counters on first execution if !self.code.quickened.swap(true, atomic::Ordering::Relaxed) { self.code.instructions.quicken(); @@ -3268,6 +3282,35 @@ impl ExecutingFrame<'_> { self.load_attr_slow(vm, oparg) } } + Instruction::LoadAttrMethodLazyDict => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 + && owner.class().tp_version_tag.load(Acquire) == type_version + && owner.dict().is_none() + { + let descr_ptr = self.code.instructions.read_cache_u64(cache_base + 5); + let func = unsafe { &*(descr_ptr as *const PyObject) }.to_owned(); + let owner = self.pop_value(); + self.push_value(func); + self.push_value(owner); + Ok(None) + } else { + self.deoptimize_at( + Instruction::LoadAttr { + namei: Arg::marker(), + }, + instr_idx, + cache_base, + ); + self.load_attr_slow(vm, oparg) + } + } Instruction::LoadAttrMethodWithValues => { let oparg = LoadAttr::new(u32::from(arg)); let instr_idx = self.lasti() as usize - 1; @@ -3352,6 +3395,39 @@ impl ExecutingFrame<'_> { ); self.load_attr_slow(vm, oparg) } + Instruction::LoadAttrWithHint => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let attr_name = self.code.names[oparg.name_idx() as usize]; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 + && owner.class().tp_version_tag.load(Acquire) == type_version + && let Some(dict) = owner.dict() + && let Some(value) = dict.get_item_opt(attr_name, vm)? + { + self.pop_value(); + if oparg.is_method() { + self.push_value(value); + self.push_value_opt(None); + } else { + self.push_value(value); + } + return Ok(None); + } + + self.deoptimize_at( + Instruction::LoadAttr { + namei: Arg::marker(), + }, + instr_idx, + cache_base, + ); + self.load_attr_slow(vm, oparg) + } Instruction::LoadAttrModule => { let oparg = LoadAttr::new(u32::from(arg)); let instr_idx = self.lasti() as usize - 1; @@ -3507,6 +3583,48 @@ impl ExecutingFrame<'_> { } self.load_attr_slow(vm, oparg) } + Instruction::LoadAttrClassWithMetaclassCheck => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + let metaclass_version = self.code.instructions.read_cache_u32(cache_base + 3); + + if type_version != 0 + && metaclass_version != 0 + && let Some(owner_type) = owner.downcast_ref::() + && owner_type.tp_version_tag.load(Acquire) == type_version + && owner.class().tp_version_tag.load(Acquire) == metaclass_version + { + let descr_ptr = self.code.instructions.read_cache_u64(cache_base + 5); + let attr = unsafe { &*(descr_ptr as *const PyObject) }.to_owned(); + self.pop_value(); + if oparg.is_method() { + self.push_value(attr); + self.push_value_opt(None); + } else { + self.push_value(attr); + } + return Ok(None); + } + self.deoptimize_at( + Instruction::LoadAttr { + namei: Arg::marker(), + }, + instr_idx, + cache_base, + ); + self.load_attr_slow(vm, oparg) + } + Instruction::LoadAttrGetattributeOverridden => { + let oparg = LoadAttr::new(u32::from(arg)); + self.deoptimize(Instruction::LoadAttr { + namei: Arg::marker(), + }); + self.load_attr_slow(vm, oparg) + } Instruction::LoadAttrSlot => { let oparg = LoadAttr::new(u32::from(arg)); let instr_idx = self.lasti() as usize - 1; @@ -3604,6 +3722,32 @@ impl ExecutingFrame<'_> { ); self.store_attr(vm, attr_idx) } + Instruction::StoreAttrWithHint => { + let attr_idx = u32::from(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let attr_name = self.code.names[attr_idx as usize]; + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 + && owner.class().tp_version_tag.load(Acquire) == type_version + && let Some(dict) = owner.dict() + { + self.pop_value(); // owner + let value = self.pop_value(); + dict.set_item(attr_name, value, vm)?; + return Ok(None); + } + self.deoptimize_at( + Instruction::StoreAttr { + namei: Arg::marker(), + }, + instr_idx, + cache_base, + ); + self.store_attr(vm, attr_idx) + } Instruction::StoreAttrSlot => { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; @@ -3708,6 +3852,12 @@ impl ExecutingFrame<'_> { self.execute_bin_op(vm, bytecode::BinaryOperator::Add) } } + Instruction::BinaryOpSubscrGetitem | Instruction::BinaryOpExtend => { + let op = bytecode::BinaryOperator::try_from(u32::from(arg)) + .unwrap_or(bytecode::BinaryOperator::Subscr); + self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); + self.execute_bin_op(vm, op) + } Instruction::BinaryOpSubscrListInt => { let b = self.top_value(); let a = self.nth_value(1); @@ -6812,6 +6962,8 @@ impl ExecutingFrame<'_> { let new_op = if !class_has_dict { Instruction::LoadAttrMethodNoDict + } else if obj.dict().is_none() { + Instruction::LoadAttrMethodLazyDict } else { Instruction::LoadAttrMethodWithValues }; @@ -6901,13 +7053,34 @@ impl ExecutingFrame<'_> { } } else { // No class attr, must be in instance dict + let use_hint = if let Some(dict) = obj.dict() { + match dict.get_item_opt(attr_name, _vm) { + Ok(Some(_)) => true, + Ok(None) => false, + Err(_) => { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; + } + } + } else { + false + }; unsafe { self.code .instructions .write_cache_u32(cache_base + 1, type_version); - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadAttrInstanceValue); + self.code.instructions.replace_op( + instr_idx, + if use_hint { + Instruction::LoadAttrWithHint + } else { + Instruction::LoadAttrInstanceValue + }, + ); } } } else if let Some(ref descr) = cls_attr { @@ -6976,6 +7149,21 @@ impl ExecutingFrame<'_> { return; } } + let mut metaclass_version = 0; + if !mcl.slots.flags.has_feature(PyTypeFlags::IMMUTABLETYPE) { + metaclass_version = mcl.tp_version_tag.load(Acquire); + if metaclass_version == 0 { + metaclass_version = mcl.assign_version_tag(); + } + if metaclass_version == 0 { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; + } + } // Look up attr in the type's own MRO let cls_attr = owner_type.get_attr(attr_name); @@ -6991,10 +7179,18 @@ impl ExecutingFrame<'_> { .write_cache_u32(cache_base + 1, type_version); self.code .instructions - .write_cache_u64(cache_base + 5, descr_ptr); + .write_cache_u32(cache_base + 3, metaclass_version); self.code .instructions - .replace_op(instr_idx, Instruction::LoadAttrClass); + .write_cache_u64(cache_base + 5, descr_ptr); + self.code.instructions.replace_op( + instr_idx, + if metaclass_version == 0 { + Instruction::LoadAttrClass + } else { + Instruction::LoadAttrClassWithMetaclassCheck + }, + ); } return; } @@ -7814,7 +8010,7 @@ impl ExecutingFrame<'_> { fn specialize_store_attr( &mut self, - _vm: &VirtualMachine, + vm: &VirtualMachine, attr_idx: bytecode::NameIdx, instr_idx: usize, cache_base: usize, @@ -7890,14 +8086,31 @@ impl ExecutingFrame<'_> { .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } } - } else if owner.dict().is_some() { + } else if let Some(dict) = owner.dict() { + let use_hint = match dict.get_item_opt(attr_name, vm) { + Ok(Some(_)) => true, + Ok(None) => false, + Err(_) => { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; + } + }; unsafe { self.code .instructions .write_cache_u32(cache_base + 1, type_version); - self.code - .instructions - .replace_op(instr_idx, Instruction::StoreAttrInstanceValue); + self.code.instructions.replace_op( + instr_idx, + if use_hint { + Instruction::StoreAttrWithHint + } else { + Instruction::StoreAttrInstanceValue + }, + ); } } else { unsafe { diff --git a/crates/vm/src/stdlib/sys/monitoring.rs b/crates/vm/src/stdlib/sys/monitoring.rs index 858ea83b8a7..6d1aeb9c8f3 100644 --- a/crates/vm/src/stdlib/sys/monitoring.rs +++ b/crates/vm/src/stdlib/sys/monitoring.rs @@ -322,7 +322,7 @@ pub fn instrument_code(code: &PyCode, events: u32) { .code .instructions .iter() - .position(|u| matches!(u.op, Instruction::Resume { .. })) + .position(|u| matches!(u.op, Instruction::Resume { .. } | Instruction::ResumeCheck)) .unwrap_or(0); // Phase 4: Place regular INSTRUMENTED_* opcodes From bdeca2bb58a369d9afa56d5a6dc7e21dc6ccb000 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 16:16:44 +0900 Subject: [PATCH 03/31] vm: cache LOAD_GLOBAL with dict entry hints --- crates/vm/src/builtins/dict.rs | 27 +++++++ crates/vm/src/dict_inner.rs | 37 ++++++++++ crates/vm/src/frame.rs | 129 +++++++++++++++++---------------- 3 files changed, 129 insertions(+), 64 deletions(-) diff --git a/crates/vm/src/builtins/dict.rs b/crates/vm/src/builtins/dict.rs index c630fc25dff..7ba173fe7e4 100644 --- a/crates/vm/src/builtins/dict.rs +++ b/crates/vm/src/builtins/dict.rs @@ -668,6 +668,33 @@ impl Py { } } + /// Return a cached-entry hint for exact dict fast paths. + pub(crate) fn hint_for_key( + &self, + key: &K, + vm: &VirtualMachine, + ) -> PyResult> { + if self.exact_dict(vm) { + self.entries.hint_for_key(vm, key) + } else { + Ok(None) + } + } + + /// Fast lookup using a cached entry index hint. + pub(crate) fn get_item_opt_hint( + &self, + key: &K, + hint: u16, + vm: &VirtualMachine, + ) -> PyResult> { + if self.exact_dict(vm) { + self.entries.get_hint(vm, key, usize::from(hint)) + } else { + self.get_item_opt(key, vm) + } + } + pub fn get_item(&self, key: &K, vm: &VirtualMachine) -> PyResult { if self.exact_dict(vm) { self.inner_getitem(key, vm) diff --git a/crates/vm/src/dict_inner.rs b/crates/vm/src/dict_inner.rs index 2a77ea7d991..139e9e57ad6 100644 --- a/crates/vm/src/dict_inner.rs +++ b/crates/vm/src/dict_inner.rs @@ -337,6 +337,43 @@ impl Dict { self._get_inner(vm, key, hash) } + /// Return a stable entry hint for `key` if present. + /// + /// The hint is the internal entry index and can be used with + /// [`Self::get_hint`]. It is invalidated by dict mutations. + pub fn hint_for_key( + &self, + vm: &VirtualMachine, + key: &K, + ) -> PyResult> { + let hash = key.key_hash(vm)?; + let (entry, _) = self.lookup(vm, key, hash, None)?; + let Some(index) = entry.index() else { + return Ok(None); + }; + Ok(u16::try_from(index).ok()) + } + + /// Fast path lookup using a cached entry index (`hint`). + /// + /// Returns `None` if the hint is stale or the key no longer matches. + pub fn get_hint( + &self, + vm: &VirtualMachine, + key: &K, + hint: usize, + ) -> PyResult> { + let inner = self.read(); + let Some(Some(entry)) = inner.entries.get(hint) else { + return Ok(None); + }; + if key.key_is(&entry.key) || key.key_eq(vm, &entry.key)? { + Ok(Some(entry.value.clone())) + } else { + Ok(None) + } + } + fn _get_inner( &self, vm: &VirtualMachine, diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 7a6c5412490..f0425031d3c 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -5214,67 +5214,57 @@ impl ExecutingFrame<'_> { let oparg = u32::from(arg); let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; - let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); - let current_version = self.globals.version() as u32; - if cached_version == current_version { - // globals unchanged — name is in globals, look up only there + let cached_version = self.code.instructions.read_cache_u16(cache_base + 1); + let cached_index = self.code.instructions.read_cache_u16(cache_base + 3); + if let Ok(current_version) = u16::try_from(self.globals.version()) + && cached_version == current_version + { let name = self.code.names[(oparg >> 1) as usize]; - if let Some(x) = self.globals.get_item_opt(name, vm)? { - self.push_value(x); - if (oparg & 1) != 0 { - self.push_value_opt(None); - } - Ok(None) - } else { - // Name was removed from globals - self.deoptimize(Instruction::LoadGlobal { - namei: Arg::marker(), - }); - let x = self.load_global_or_builtin(name, vm)?; + if let Some(x) = self.globals.get_item_opt_hint(name, cached_index, vm)? { self.push_value(x); if (oparg & 1) != 0 { self.push_value_opt(None); } - Ok(None) + return Ok(None); } - } else { - self.deoptimize(Instruction::LoadGlobal { + } + self.deoptimize_at( + Instruction::LoadGlobal { namei: Arg::marker(), - }); - let name = self.code.names[(oparg >> 1) as usize]; - let x = self.load_global_or_builtin(name, vm)?; - self.push_value(x); - if (oparg & 1) != 0 { - self.push_value_opt(None); - } - Ok(None) + }, + instr_idx, + cache_base, + ); + let name = self.code.names[(oparg >> 1) as usize]; + let x = self.load_global_or_builtin(name, vm)?; + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); } + Ok(None) } Instruction::LoadGlobalBuiltin => { let oparg = u32::from(arg); let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; - let cached_globals_ver = self.code.instructions.read_cache_u32(cache_base + 1); - let cached_builtins_ver = self.code.instructions.read_cache_u32(cache_base + 2); - let current_globals_ver = self.globals.version() as u32; - if cached_globals_ver == current_globals_ver { - // globals unchanged — name is NOT in globals, check builtins - if let Some(builtins_dict) = self.builtins.downcast_ref_if_exact::(vm) { - let current_builtins_ver = builtins_dict.version() as u32; - if cached_builtins_ver == current_builtins_ver { - // Both versions match — safe to look up in builtins - let name = self.code.names[(oparg >> 1) as usize]; - if let Some(x) = builtins_dict.get_item_opt(name, vm)? { - self.push_value(x); - if (oparg & 1) != 0 { - self.push_value_opt(None); - } - return Ok(None); - } + let cached_globals_ver = self.code.instructions.read_cache_u16(cache_base + 1); + let cached_builtins_ver = self.code.instructions.read_cache_u16(cache_base + 2); + let cached_index = self.code.instructions.read_cache_u16(cache_base + 3); + if let Ok(current_globals_ver) = u16::try_from(self.globals.version()) + && cached_globals_ver == current_globals_ver + && let Some(builtins_dict) = self.builtins.downcast_ref_if_exact::(vm) + && let Ok(current_builtins_ver) = u16::try_from(builtins_dict.version()) + && cached_builtins_ver == current_builtins_ver + { + let name = self.code.names[(oparg >> 1) as usize]; + if let Some(x) = builtins_dict.get_item_opt_hint(name, cached_index, vm)? { + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); } + return Ok(None); } } - // Version mismatch or lookup failed — deoptimize self.deoptimize_at( Instruction::LoadGlobal { namei: Arg::marker(), @@ -7888,45 +7878,56 @@ impl ExecutingFrame<'_> { return; } let name = self.code.names[(oparg >> 1) as usize]; - // Check if name exists in globals - let in_globals = self.globals.get_item_opt(name, vm).ok().flatten().is_some(); - - let globals_version = self.globals.version() as u32; + let Ok(globals_version) = u16::try_from(self.globals.version()) else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; + }; - if in_globals { + if let Ok(Some(globals_hint)) = self.globals.hint_for_key(name, vm) { unsafe { self.code .instructions .replace_op(instr_idx, Instruction::LoadGlobalModule); self.code .instructions - .write_cache_u32(cache_base + 1, globals_version); + .write_cache_u16(cache_base + 1, globals_version); + self.code.instructions.write_cache_u16(cache_base + 2, 0); + self.code + .instructions + .write_cache_u16(cache_base + 3, globals_hint); } - } else if let Some(builtins_dict) = self.builtins.downcast_ref_if_exact::(vm) - && builtins_dict - .get_item_opt(name, vm) - .ok() - .flatten() - .is_some() + return; + } + + if let Some(builtins_dict) = self.builtins.downcast_ref_if_exact::(vm) + && let Ok(Some(builtins_hint)) = builtins_dict.hint_for_key(name, vm) + && let Ok(builtins_version) = u16::try_from(builtins_dict.version()) { - let builtins_version = builtins_dict.version() as u32; unsafe { self.code .instructions .replace_op(instr_idx, Instruction::LoadGlobalBuiltin); self.code .instructions - .write_cache_u32(cache_base + 1, globals_version); + .write_cache_u16(cache_base + 1, globals_version); self.code .instructions - .write_cache_u32(cache_base + 2, builtins_version); - } - } else { - unsafe { + .write_cache_u16(cache_base + 2, builtins_version); self.code .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + .write_cache_u16(cache_base + 3, builtins_hint); } + return; + } + + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } } From ca3814afdbfa1162b4edef2604fc8440df1844d3 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 16:35:32 +0900 Subject: [PATCH 04/31] vm: align adaptive specialization counters with CPython backoff --- crates/compiler-core/src/bytecode.rs | 119 ++++++--- crates/vm/src/frame.rs | 367 ++++++++++++++++++--------- 2 files changed, 328 insertions(+), 158 deletions(-) diff --git a/crates/compiler-core/src/bytecode.rs b/crates/compiler-core/src/bytecode.rs index d3dda5090ab..8a3251d0d81 100644 --- a/crates/compiler-core/src/bytecode.rs +++ b/crates/compiler-core/src/bytecode.rs @@ -349,9 +349,45 @@ pub struct CodeUnit { const _: () = assert!(mem::size_of::() == 2); /// Adaptive specialization: number of executions before attempting specialization. -pub const ADAPTIVE_WARMUP_VALUE: u8 = 50; -/// Adaptive specialization: backoff counter after de-optimization. -pub const ADAPTIVE_BACKOFF_VALUE: u8 = 250; +/// +/// Matches CPython's `_Py_BackoffCounter` encoding. +pub const ADAPTIVE_WARMUP_VALUE: u16 = adaptive_counter_bits(1, 1); +/// Adaptive specialization: cooldown counter after a successful specialization. +/// +/// Value/backoff = (52, 0), matching CPython's ADAPTIVE_COOLDOWN bits. +pub const ADAPTIVE_COOLDOWN_VALUE: u16 = adaptive_counter_bits(52, 0); + +const BACKOFF_BITS: u16 = 4; +const MAX_BACKOFF: u16 = 12; +const UNREACHABLE_BACKOFF: u16 = 15; + +/// Encode an adaptive counter as `(value << 4) | backoff`. +pub const fn adaptive_counter_bits(value: u16, backoff: u16) -> u16 { + (value << BACKOFF_BITS) | backoff +} + +/// True when the adaptive counter should trigger specialization. +#[inline] +pub const fn adaptive_counter_triggers(counter: u16) -> bool { + counter < UNREACHABLE_BACKOFF +} + +/// Decrement adaptive counter by one countdown step. +#[inline] +pub const fn advance_adaptive_counter(counter: u16) -> u16 { + counter.wrapping_sub(1 << BACKOFF_BITS) +} + +/// Reset adaptive counter with exponential backoff. +#[inline] +pub const fn adaptive_counter_backoff(counter: u16) -> u16 { + let backoff = counter & ((1 << BACKOFF_BITS) - 1); + if backoff < MAX_BACKOFF { + adaptive_counter_bits((1 << (backoff + 1)) - 1, backoff + 1) + } else { + adaptive_counter_bits((1 << MAX_BACKOFF) - 1, MAX_BACKOFF) + } +} impl CodeUnit { pub const fn new(op: Instruction, arg: OpArgByte) -> Self { @@ -370,12 +406,15 @@ impl TryFrom<&[u8]> for CodeUnit { } } -pub struct CodeUnits(UnsafeCell>); +pub struct CodeUnits { + units: UnsafeCell>, + adaptive_counters: Box<[AtomicU16]>, +} // SAFETY: All cache operations use atomic read/write instructions. // - replace_op / compare_exchange_op: AtomicU8 store/CAS (Release) // - cache read/write: AtomicU16 load/store (Relaxed) -// - adaptive counter: AtomicU8 load/store (Relaxed) +// - adaptive counter: AtomicU16 load/store (Relaxed) // Ordering is established by: // - replace_op (Release) ↔ dispatch loop read_op (Acquire) for cache data visibility // - tp_version_tag (Acquire) for descriptor pointer validity @@ -385,15 +424,23 @@ impl Clone for CodeUnits { fn clone(&self) -> Self { // SAFETY: No concurrent mutation during clone — cloning is only done // during code object construction or marshaling, not while instrumented. - let inner = unsafe { &*self.0.get() }; - Self(UnsafeCell::new(inner.clone())) + let units = unsafe { &*self.units.get() }.clone(); + let adaptive_counters = self + .adaptive_counters + .iter() + .map(|c| AtomicU16::new(c.load(Ordering::Relaxed))) + .collect(); + Self { + units: UnsafeCell::new(units), + adaptive_counters, + } } } impl fmt::Debug for CodeUnits { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // SAFETY: Debug formatting doesn't race with replace_op - let inner = unsafe { &*self.0.get() }; + let inner = unsafe { &*self.units.get() }; f.debug_tuple("CodeUnits").field(inner).finish() } } @@ -406,29 +453,37 @@ impl TryFrom<&[u8]> for CodeUnits { return Err(Self::Error::InvalidBytecode); } - let units: Self = value + let units = value .chunks_exact(2) .map(CodeUnit::try_from) - .collect::>()?; - Ok(units) + .collect::, _>>()?; + Ok(units.into()) } } impl From<[CodeUnit; N]> for CodeUnits { fn from(value: [CodeUnit; N]) -> Self { - Self(UnsafeCell::new(Box::from(value))) + Self::from(Vec::from(value)) } } impl From> for CodeUnits { fn from(value: Vec) -> Self { - Self(UnsafeCell::new(value.into_boxed_slice())) + let units = value.into_boxed_slice(); + let adaptive_counters = (0..units.len()) + .map(|_| AtomicU16::new(0)) + .collect::>() + .into_boxed_slice(); + Self { + units: UnsafeCell::new(units), + adaptive_counters, + } } } impl FromIterator for CodeUnits { fn from_iter>(iter: T) -> Self { - Self(UnsafeCell::new(iter.into_iter().collect())) + Self::from(iter.into_iter().collect::>()) } } @@ -439,7 +494,7 @@ impl Deref for CodeUnits { // SAFETY: Shared references to the slice are valid even while replace_op // may update individual opcode bytes — readers tolerate stale opcodes // (they will re-read on the next iteration). - unsafe { &*self.0.get() } + unsafe { &*self.units.get() } } } @@ -452,7 +507,7 @@ impl CodeUnits { /// - `index` must be in bounds. /// - `new_op` must have the same arg semantics as the original opcode. pub unsafe fn replace_op(&self, index: usize, new_op: Instruction) { - let units = unsafe { &*self.0.get() }; + let units = unsafe { &*self.units.get() }; let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8; unsafe { &*ptr }.store(new_op.into(), Ordering::Release); } @@ -468,7 +523,7 @@ impl CodeUnits { expected: Instruction, new_op: Instruction, ) -> bool { - let units = unsafe { &*self.0.get() }; + let units = unsafe { &*self.units.get() }; let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8; unsafe { &*ptr } .compare_exchange( @@ -483,7 +538,7 @@ impl CodeUnits { /// Atomically read the opcode at `index` with Acquire ordering. /// Pairs with `replace_op` (Release) to ensure cache data visibility. pub fn read_op(&self, index: usize) -> Instruction { - let units = unsafe { &*self.0.get() }; + let units = unsafe { &*self.units.get() }; let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8; let byte = unsafe { &*ptr }.load(Ordering::Acquire); // SAFETY: Only valid Instruction values are stored via replace_op/compare_exchange_op. @@ -492,7 +547,7 @@ impl CodeUnits { /// Atomically read the arg byte at `index` with Relaxed ordering. pub fn read_arg(&self, index: usize) -> OpArgByte { - let units = unsafe { &*self.0.get() }; + let units = unsafe { &*self.units.get() }; let ptr = units.as_ptr().wrapping_add(index) as *const u8; let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8; OpArgByte::from(unsafe { &*arg_ptr }.load(Ordering::Relaxed)) @@ -505,7 +560,7 @@ impl CodeUnits { /// # Safety /// - `index` must be in bounds and point to a CACHE entry. pub unsafe fn write_cache_u16(&self, index: usize, value: u16) { - let units = unsafe { &*self.0.get() }; + let units = unsafe { &*self.units.get() }; let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU16; unsafe { &*ptr }.store(value, Ordering::Relaxed); } @@ -516,7 +571,7 @@ impl CodeUnits { /// # Panics /// Panics if `index` is out of bounds. pub fn read_cache_u16(&self, index: usize) -> u16 { - let units = unsafe { &*self.0.get() }; + let units = unsafe { &*self.units.get() }; assert!(index < units.len(), "read_cache_u16: index out of bounds"); let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU16; unsafe { &*ptr }.load(Ordering::Relaxed) @@ -564,25 +619,19 @@ impl CodeUnits { lo | (hi << 32) } - /// Read the adaptive counter from the CACHE entry's `arg` byte at `index`. + /// Read adaptive counter bits for instruction at `index`. /// Uses Relaxed atomic load. - pub fn read_adaptive_counter(&self, index: usize) -> u8 { - let units = unsafe { &*self.0.get() }; - let ptr = units.as_ptr().wrapping_add(index) as *const u8; - let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8; - unsafe { &*arg_ptr }.load(Ordering::Relaxed) + pub fn read_adaptive_counter(&self, index: usize) -> u16 { + self.adaptive_counters[index].load(Ordering::Relaxed) } - /// Write the adaptive counter to the CACHE entry's `arg` byte at `index`. + /// Write adaptive counter bits for instruction at `index`. /// Uses Relaxed atomic store. /// /// # Safety - /// - `index` must be in bounds and point to a CACHE entry. - pub unsafe fn write_adaptive_counter(&self, index: usize, value: u8) { - let units = unsafe { &*self.0.get() }; - let ptr = units.as_ptr().wrapping_add(index) as *const u8; - let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8; - unsafe { &*arg_ptr }.store(value, Ordering::Relaxed); + /// - `index` must be in bounds. + pub unsafe fn write_adaptive_counter(&self, index: usize, value: u16) { + self.adaptive_counters[index].store(value, Ordering::Relaxed); } /// Produce a clean copy of the bytecode suitable for serialization @@ -611,7 +660,7 @@ impl CodeUnits { /// Initialize adaptive warmup counters for all cacheable instructions. /// Called lazily at RESUME (first execution of a code object). - /// Uses the `arg` byte of the first CACHE entry, preserving `op = Instruction::Cache`. + /// Counters are stored out-of-line to preserve `op = Instruction::Cache`. /// All writes are atomic (Relaxed) to avoid data races with concurrent readers. pub fn quicken(&self) { let len = self.len(); diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index f0425031d3c..c7fd8057717 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -18,7 +18,7 @@ use crate::{ tuple::{PyTuple, PyTupleIterator, PyTupleRef}, }, bytecode::{ - self, ADAPTIVE_BACKOFF_VALUE, Arg, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod, + self, ADAPTIVE_COOLDOWN_VALUE, Arg, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod, }, convert::{ToPyObject, ToPyResult}, coroutine::Coro, @@ -3157,9 +3157,12 @@ impl ExecutingFrame<'_> { delta: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } match self._send(receiver, val, vm)? { @@ -3335,9 +3338,14 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code + .instructions + .read_adaptive_counter(cache_base), + ), + ); } return self.load_attr_slow(vm, oparg); } @@ -3459,9 +3467,12 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } self.load_attr_slow(vm, oparg) } @@ -3493,9 +3504,12 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } self.load_attr_slow(vm, oparg) } @@ -3541,9 +3555,12 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } self.load_attr_slow(vm, oparg) } @@ -3577,9 +3594,12 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } self.load_attr_slow(vm, oparg) } @@ -3655,9 +3675,12 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } self.load_attr_slow(vm, oparg) } @@ -3690,9 +3713,12 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } self.load_attr_slow(vm, oparg) } @@ -3774,9 +3800,12 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } self.store_attr(vm, attr_idx) } @@ -3996,9 +4025,12 @@ impl ExecutingFrame<'_> { argc: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } let args = self.collect_positional_args(nargs); self.execute_call(args, vm) @@ -4034,9 +4066,12 @@ impl ExecutingFrame<'_> { argc: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } let args = self.collect_positional_args(nargs); self.execute_call(args, vm) @@ -4762,9 +4797,12 @@ impl ExecutingFrame<'_> { }, ); let cache_base = self.lasti() as usize; - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } let oparg = LoadSuperAttr::new(oparg); self.load_super_attr(vm, oparg) @@ -4842,9 +4880,12 @@ impl ExecutingFrame<'_> { }, ); let cache_base = self.lasti() as usize; - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } let oparg = LoadSuperAttr::new(oparg); self.load_super_attr(vm, oparg) @@ -6890,9 +6931,12 @@ impl ExecutingFrame<'_> { .is_some_and(|f| f as usize == PyBaseObject::getattro as *const () as usize); if !is_default_getattro { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -6905,9 +6949,12 @@ impl ExecutingFrame<'_> { if type_version == 0 { // Version counter overflow — backoff to avoid re-attempting every execution unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -6964,9 +7011,12 @@ impl ExecutingFrame<'_> { } // Can't specialize this method call unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } else { // Regular attribute access @@ -7014,17 +7064,23 @@ impl ExecutingFrame<'_> { } } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } } else if has_descr_get { // Non-data descriptor with __get__ — can't specialize unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } else if class_has_dict { if let Some(ref descr) = cls_attr { @@ -7049,9 +7105,14 @@ impl ExecutingFrame<'_> { Ok(None) => false, Err(_) => { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code + .instructions + .read_adaptive_counter(cache_base), + ), + ); } return; } @@ -7090,9 +7151,12 @@ impl ExecutingFrame<'_> { } else { // No dict, no class attr — can't specialize unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } } @@ -7115,9 +7179,12 @@ impl ExecutingFrame<'_> { } if type_version == 0 { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -7132,9 +7199,12 @@ impl ExecutingFrame<'_> { if attr_class.slots.descr_set.load().is_some() { // Data descriptor on metaclass — can't specialize unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -7147,9 +7217,12 @@ impl ExecutingFrame<'_> { } if metaclass_version == 0 { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -7188,9 +7261,12 @@ impl ExecutingFrame<'_> { // Can't specialize unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } @@ -7317,21 +7393,21 @@ impl ExecutingFrame<'_> { self.commit_specialization(instr_idx, cache_base, new_op); } - /// Adaptive counter: decrement the warmup counter, or call the specialize - /// function when it reaches zero. + /// Adaptive counter: trigger specialization at zero, otherwise advance countdown. #[inline] fn adaptive(&mut self, specialize: impl FnOnce(&mut Self, usize, usize)) { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; let counter = self.code.instructions.read_adaptive_counter(cache_base); - if counter > 0 { + if bytecode::adaptive_counter_triggers(counter) { + specialize(self, instr_idx, cache_base); + } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, counter - 1); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::advance_adaptive_counter(counter), + ); } - } else { - specialize(self, instr_idx, cache_base); } } @@ -7345,13 +7421,19 @@ impl ExecutingFrame<'_> { ) { if let Some(new_op) = new_op { unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_COOLDOWN_VALUE); self.code.instructions.replace_op(instr_idx, new_op); } } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } } @@ -7371,9 +7453,12 @@ impl ExecutingFrame<'_> { fn deoptimize_at(&mut self, base_op: Instruction, instr_idx: usize, cache_base: usize) { unsafe { self.code.instructions.replace_op(instr_idx, base_op); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } @@ -7456,9 +7541,12 @@ impl ExecutingFrame<'_> { let version = func.get_version_for_current_state(); if version == 0 { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -7630,9 +7718,12 @@ impl ExecutingFrame<'_> { let version = func.func_version(); if version == 0 { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -7680,9 +7771,12 @@ impl ExecutingFrame<'_> { } } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } } @@ -7708,9 +7802,12 @@ impl ExecutingFrame<'_> { || class.downcast_ref::().is_none() { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -7808,9 +7905,12 @@ impl ExecutingFrame<'_> { } } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } return; @@ -7880,9 +7980,12 @@ impl ExecutingFrame<'_> { let name = self.code.names[(oparg >> 1) as usize]; let Ok(globals_version) = u16::try_from(self.globals.version()) else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; }; @@ -7925,9 +8028,12 @@ impl ExecutingFrame<'_> { } unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } @@ -8034,9 +8140,12 @@ impl ExecutingFrame<'_> { .is_some_and(|f| f as usize == PyBaseObject::slot_setattro as *const () as usize); if !is_default_setattr { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -8048,9 +8157,12 @@ impl ExecutingFrame<'_> { } if type_version == 0 { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -8082,9 +8194,12 @@ impl ExecutingFrame<'_> { } } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } } else if let Some(dict) = owner.dict() { @@ -8093,9 +8208,12 @@ impl ExecutingFrame<'_> { Ok(None) => false, Err(_) => { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -8115,9 +8233,12 @@ impl ExecutingFrame<'_> { } } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } } From f04bf22ff4d630f0a272f7724f89e5d13dbeef59 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 16:39:40 +0900 Subject: [PATCH 05/31] vm: apply cooldown counter on specialization success paths --- crates/vm/src/frame.rs | 140 +++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 77 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index c7fd8057717..ffd7815f549 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -6965,10 +6965,8 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u32(cache_base + 1, type_version); - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadAttrModule); } + self.specialize_at(instr_idx, cache_base, Instruction::LoadAttrModule); return; } @@ -7004,9 +7002,7 @@ impl ExecutingFrame<'_> { } else { Instruction::LoadAttrMethodWithValues }; - unsafe { - self.code.instructions.replace_op(instr_idx, new_op); - } + self.specialize_at(instr_idx, cache_base, new_op); return; } // Can't specialize this method call @@ -7042,10 +7038,8 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u32(cache_base + 3, offset as u32); - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadAttrSlot); } + self.specialize_at(instr_idx, cache_base, Instruction::LoadAttrSlot); } else if let Some(ref descr) = cls_attr && descr.downcast_ref::().is_some() { @@ -7058,10 +7052,8 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u64(cache_base + 5, descr_ptr); - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadAttrProperty); } + self.specialize_at(instr_idx, cache_base, Instruction::LoadAttrProperty); } else { unsafe { self.code.instructions.write_adaptive_counter( @@ -7093,10 +7085,12 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u64(cache_base + 5, descr_ptr); - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadAttrNondescriptorWithValues); } + self.specialize_at( + instr_idx, + cache_base, + Instruction::LoadAttrNondescriptorWithValues, + ); } else { // No class attr, must be in instance dict let use_hint = if let Some(dict) = obj.dict() { @@ -7124,15 +7118,16 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u32(cache_base + 1, type_version); - self.code.instructions.replace_op( - instr_idx, - if use_hint { - Instruction::LoadAttrWithHint - } else { - Instruction::LoadAttrInstanceValue - }, - ); } + self.specialize_at( + instr_idx, + cache_base, + if use_hint { + Instruction::LoadAttrWithHint + } else { + Instruction::LoadAttrInstanceValue + }, + ); } } else if let Some(ref descr) = cls_attr { // No dict support, plain class attr — cache directly @@ -7144,10 +7139,12 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u64(cache_base + 5, descr_ptr); - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadAttrNondescriptorNoDict); } + self.specialize_at( + instr_idx, + cache_base, + Instruction::LoadAttrNondescriptorNoDict, + ); } else { // No dict, no class attr — can't specialize unsafe { @@ -7411,6 +7408,17 @@ impl ExecutingFrame<'_> { } } + /// Install a specialized opcode and set adaptive cooldown bits. + #[inline] + fn specialize_at(&mut self, instr_idx: usize, cache_base: usize, new_op: Instruction) { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_COOLDOWN_VALUE); + self.code.instructions.replace_op(instr_idx, new_op); + } + } + /// Commit a specialization result: replace op on success, backoff on failure. #[inline] fn commit_specialization( @@ -7420,12 +7428,7 @@ impl ExecutingFrame<'_> { new_op: Option, ) { if let Some(new_op) = new_op { - unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_COOLDOWN_VALUE); - self.code.instructions.replace_op(instr_idx, new_op); - } + self.specialize_at(instr_idx, cache_base, new_op); } else { unsafe { self.code.instructions.write_adaptive_counter( @@ -7569,11 +7572,11 @@ impl ExecutingFrame<'_> { Instruction::CallPyGeneral }; unsafe { - self.code.instructions.replace_op(instr_idx, new_op); self.code .instructions .write_cache_u32(cache_base + 1, version); } + self.specialize_at(instr_idx, cache_base, new_op); return; } @@ -7586,11 +7589,11 @@ impl ExecutingFrame<'_> { _ => Instruction::CallMethodDescriptorFast, }; unsafe { - self.code.instructions.replace_op(instr_idx, new_op); self.code .instructions .write_cache_u32(cache_base + 1, callable_tag); } + self.specialize_at(instr_idx, cache_base, new_op); return; } @@ -7609,11 +7612,11 @@ impl ExecutingFrame<'_> { let new_op = Some(new_op); if let Some(new_op) = new_op { unsafe { - self.code.instructions.replace_op(instr_idx, new_op); self.code .instructions .write_cache_u32(cache_base + 1, callable_tag); } + self.specialize_at(instr_idx, cache_base, new_op); return; } } @@ -7632,11 +7635,11 @@ impl ExecutingFrame<'_> { if let Some(new_op) = new_op { let callable_tag = callable as *const PyObject as u32; unsafe { - self.code.instructions.replace_op(instr_idx, new_op); self.code .instructions .write_cache_u32(cache_base + 1, callable_tag); } + self.specialize_at(instr_idx, cache_base, new_op); return; } } @@ -7655,13 +7658,15 @@ impl ExecutingFrame<'_> { let version = cls.tp_version_tag.load(Acquire); if version != 0 { unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::CallAllocAndEnterInit); self.code .instructions .write_cache_u32(cache_base + 1, version); } + self.specialize_at( + instr_idx, + cache_base, + Instruction::CallAllocAndEnterInit, + ); return; } } @@ -7669,13 +7674,11 @@ impl ExecutingFrame<'_> { // General builtin class call (any type with Callable) let callable_tag = callable as *const PyObject as u32; unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::CallBuiltinClass); self.code .instructions .write_cache_u32(cache_base + 1, callable_tag); } + self.specialize_at(instr_idx, cache_base, Instruction::CallBuiltinClass); return; } } @@ -7683,13 +7686,11 @@ impl ExecutingFrame<'_> { // General fallback: cache callable identity to skip re-specialization let callable_tag = callable as *const PyObject as u32; unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::CallNonPyGeneral); self.code .instructions .write_cache_u32(cache_base + 1, callable_tag); } + self.specialize_at(instr_idx, cache_base, Instruction::CallNonPyGeneral); } fn specialize_call_kw( @@ -7734,24 +7735,22 @@ impl ExecutingFrame<'_> { Instruction::CallKwPy }; unsafe { - self.code.instructions.replace_op(instr_idx, new_op); self.code .instructions .write_cache_u32(cache_base + 1, version); } + self.specialize_at(instr_idx, cache_base, new_op); return; } // General fallback let callable_tag = callable as *const PyObject as u32; unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::CallKwNonPy); self.code .instructions .write_cache_u32(cache_base + 1, callable_tag); } + self.specialize_at(instr_idx, cache_base, Instruction::CallKwNonPy); } fn specialize_send(&mut self, instr_idx: usize, cache_base: usize) { @@ -7764,11 +7763,7 @@ impl ExecutingFrame<'_> { // Stack: [receiver, val] — receiver is at position 1 let receiver = self.nth_value(1); if self.builtin_coro(receiver).is_some() { - unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::SendGen); - } + self.specialize_at(instr_idx, cache_base, Instruction::SendGen); } else { unsafe { self.code.instructions.write_adaptive_counter( @@ -7817,9 +7812,7 @@ impl ExecutingFrame<'_> { } else { Instruction::LoadSuperAttrAttr }; - unsafe { - self.code.instructions.replace_op(instr_idx, new_op); - } + self.specialize_at(instr_idx, cache_base, new_op); } fn specialize_compare_op( @@ -7899,10 +7892,8 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u32(cache_base + 1, type_version); - self.code - .instructions - .replace_op(instr_idx, Instruction::ToBoolAlwaysTrue); } + self.specialize_at(instr_idx, cache_base, Instruction::ToBoolAlwaysTrue); } else { unsafe { self.code.instructions.write_adaptive_counter( @@ -7992,9 +7983,6 @@ impl ExecutingFrame<'_> { if let Ok(Some(globals_hint)) = self.globals.hint_for_key(name, vm) { unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadGlobalModule); self.code .instructions .write_cache_u16(cache_base + 1, globals_version); @@ -8003,6 +7991,7 @@ impl ExecutingFrame<'_> { .instructions .write_cache_u16(cache_base + 3, globals_hint); } + self.specialize_at(instr_idx, cache_base, Instruction::LoadGlobalModule); return; } @@ -8011,9 +8000,6 @@ impl ExecutingFrame<'_> { && let Ok(builtins_version) = u16::try_from(builtins_dict.version()) { unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadGlobalBuiltin); self.code .instructions .write_cache_u16(cache_base + 1, globals_version); @@ -8024,6 +8010,7 @@ impl ExecutingFrame<'_> { .instructions .write_cache_u16(cache_base + 3, builtins_hint); } + self.specialize_at(instr_idx, cache_base, Instruction::LoadGlobalBuiltin); return; } @@ -8188,10 +8175,8 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u16(cache_base + 3, offset as u16); - self.code - .instructions - .replace_op(instr_idx, Instruction::StoreAttrSlot); } + self.specialize_at(instr_idx, cache_base, Instruction::StoreAttrSlot); } else { unsafe { self.code.instructions.write_adaptive_counter( @@ -8222,15 +8207,16 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u32(cache_base + 1, type_version); - self.code.instructions.replace_op( - instr_idx, - if use_hint { - Instruction::StoreAttrWithHint - } else { - Instruction::StoreAttrInstanceValue - }, - ); } + self.specialize_at( + instr_idx, + cache_base, + if use_hint { + Instruction::StoreAttrWithHint + } else { + Instruction::StoreAttrInstanceValue + }, + ); } else { unsafe { self.code.instructions.write_adaptive_counter( From 02a14957423aa9a5639dadfc7ada1e2472dd5cf3 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 16:41:08 +0900 Subject: [PATCH 06/31] vm: retain LOAD_GLOBAL specializations on misses --- crates/vm/src/frame.rs | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index ffd7815f549..73dae5a8951 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -5253,8 +5253,9 @@ impl ExecutingFrame<'_> { } Instruction::LoadGlobalModule => { let oparg = u32::from(arg); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; + // Keep specialized opcode on guard miss, matching CPython's + // JUMP_TO_PREDICTED(LOAD_GLOBAL) behavior. let cached_version = self.code.instructions.read_cache_u16(cache_base + 1); let cached_index = self.code.instructions.read_cache_u16(cache_base + 3); if let Ok(current_version) = u16::try_from(self.globals.version()) @@ -5269,13 +5270,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize_at( - Instruction::LoadGlobal { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); let name = self.code.names[(oparg >> 1) as usize]; let x = self.load_global_or_builtin(name, vm)?; self.push_value(x); @@ -5286,8 +5280,7 @@ impl ExecutingFrame<'_> { } Instruction::LoadGlobalBuiltin => { let oparg = u32::from(arg); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; let cached_globals_ver = self.code.instructions.read_cache_u16(cache_base + 1); let cached_builtins_ver = self.code.instructions.read_cache_u16(cache_base + 2); let cached_index = self.code.instructions.read_cache_u16(cache_base + 3); @@ -5306,13 +5299,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize_at( - Instruction::LoadGlobal { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); let name = self.code.names[(oparg >> 1) as usize]; let x = self.load_global_or_builtin(name, vm)?; self.push_value(x); From 13125d8865142ed78d1096b080bc480a12bb7b22 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 16:45:22 +0900 Subject: [PATCH 07/31] vm: keep attr and call specializations on guard misses --- crates/vm/src/frame.rs | 1182 ++++++++++------------------------------ 1 file changed, 294 insertions(+), 888 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 73dae5a8951..4ef550ae4a2 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -46,6 +46,7 @@ use malachite_bigint::BigInt; use num_traits::Zero; use rustpython_common::atomic::{PyAtomic, Radium}; use rustpython_common::{ + boxvec::BoxVec, lock::{OnceCell, PyMutex}, wtf8::{Wtf8, Wtf8Buf, wtf8_concat}, }; @@ -66,6 +67,19 @@ enum UnwindReason { Raising { exception: PyBaseExceptionRef }, } +#[derive(Debug)] +struct FrameState { + // We need 1 stack per frame + /// The main data frame of the stack machine + stack: BoxVec>, + /// Cell and free variable references (cellvars + freevars). + cells_frees: Box<[PyCellRef]>, + /// Previous line number for LINE event suppression. + /// Stored here (not on ExecutingFrame) so it persists across + /// generator/coroutine suspend and resume. + prev_line: u32, +} + /// Tracks who owns a frame. // = `_PyFrameOwner` #[repr(i8)] @@ -90,398 +104,53 @@ impl FrameOwner { } } -/// Lock-free mutable storage for frame-internal data. +/// Lock-free storage for local variables (localsplus). /// /// # Safety -/// Frame execution is single-threaded: only one thread at a time executes -/// a given frame (enforced by the owner field and generator running flag). -/// External readers (e.g. `f_locals`) are on the same thread as execution -/// (trace callback) or the frame is not executing. -struct FrameUnsafeCell(UnsafeCell); - -impl FrameUnsafeCell { - fn new(value: T) -> Self { - Self(UnsafeCell::new(value)) - } - - /// # Safety - /// Caller must ensure no concurrent mutable access. - #[inline(always)] - unsafe fn get(&self) -> *mut T { - self.0.get() - } +/// Mutable access is serialized by the frame's state mutex in `with_exec()`. +/// External readers (e.g. `f_locals`) must use `try_lock` on the state mutex: +/// if acquired, the frame is not executing and access is exclusive; if not, +/// the caller is on the same thread as `with_exec()` (trace callback) and +/// access is safe because frame execution is single-threaded. +pub struct FastLocals { + inner: UnsafeCell]>>, } -// SAFETY: Frame execution is single-threaded. See FrameUnsafeCell doc. +// SAFETY: Frame execution is serialized by the state mutex. #[cfg(feature = "threading")] -unsafe impl Send for FrameUnsafeCell {} +unsafe impl Send for FastLocals {} #[cfg(feature = "threading")] -unsafe impl Sync for FrameUnsafeCell {} - -/// Unified storage for local variables and evaluation stack. -/// -/// Memory layout (each slot is `usize`-sized): -/// `[0..nlocalsplus)` — fastlocals (`Option`) -/// `[nlocalsplus..nlocalsplus+stack_top)` — active evaluation stack (`Option`) -/// `[nlocalsplus+stack_top..capacity)` — unused stack capacity -/// -/// Both `Option` and `Option` are `usize`-sized -/// (niche optimization on NonNull / NonZeroUsize). The raw storage is -/// `usize` to unify them; typed access is provided through methods. -pub struct LocalsPlus { - /// Backing storage. - data: LocalsPlusData, - /// Number of fastlocals slots (nlocals + ncells + nfrees). - nlocalsplus: u32, - /// Current evaluation stack depth. - stack_top: u32, -} +unsafe impl Sync for FastLocals {} -enum LocalsPlusData { - /// Heap-allocated storage (generators, coroutines, exec/eval frames). - Heap(Box<[usize]>), - /// Data stack allocated storage (normal function calls). - /// The pointer is valid while the enclosing data stack frame is alive. - DataStack { ptr: *mut usize, capacity: usize }, -} - -// SAFETY: DataStack variant points to thread-local DataStack memory. -// Frame execution is single-threaded (enforced by owner field). -#[cfg(feature = "threading")] -unsafe impl Send for LocalsPlusData {} -#[cfg(feature = "threading")] -unsafe impl Sync for LocalsPlusData {} - -const _: () = { - assert!(core::mem::size_of::>() == core::mem::size_of::()); - // PyStackRef size is checked in object/core.rs -}; - -impl LocalsPlus { - /// Create a new heap-backed LocalsPlus. All slots start as None (0). - fn new(nlocalsplus: usize, stacksize: usize) -> Self { - let capacity = nlocalsplus - .checked_add(stacksize) - .expect("LocalsPlus capacity overflow"); - let nlocalsplus_u32 = u32::try_from(nlocalsplus).expect("nlocalsplus exceeds u32"); +impl FastLocals { + fn new(data: Box<[Option]>) -> Self { Self { - data: LocalsPlusData::Heap(vec![0usize; capacity].into_boxed_slice()), - nlocalsplus: nlocalsplus_u32, - stack_top: 0, - } - } - - /// Create a new LocalsPlus backed by the thread data stack. - /// All slots are zero-initialized. - /// - /// The caller must call `materialize_localsplus()` when the frame finishes - /// to migrate data to the heap, then `datastack_pop()` to free the memory. - fn new_on_datastack(nlocalsplus: usize, stacksize: usize, vm: &VirtualMachine) -> Self { - let capacity = nlocalsplus - .checked_add(stacksize) - .expect("LocalsPlus capacity overflow"); - let byte_size = capacity - .checked_mul(core::mem::size_of::()) - .expect("LocalsPlus byte size overflow"); - let nlocalsplus_u32 = u32::try_from(nlocalsplus).expect("nlocalsplus exceeds u32"); - let ptr = vm.datastack_push(byte_size) as *mut usize; - // Zero-initialize all slots (0 = None for both PyObjectRef and PyStackRef). - unsafe { core::ptr::write_bytes(ptr, 0, capacity) }; - Self { - data: LocalsPlusData::DataStack { ptr, capacity }, - nlocalsplus: nlocalsplus_u32, - stack_top: 0, - } - } - - /// Migrate data-stack-backed storage to the heap, preserving all values. - /// Returns the data stack base pointer for `DataStack::pop()`. - /// Returns `None` if already heap-backed. - fn materialize_to_heap(&mut self) -> Option<*mut u8> { - if let LocalsPlusData::DataStack { ptr, capacity } = &self.data { - let base = *ptr as *mut u8; - let heap_data = unsafe { core::slice::from_raw_parts(*ptr, *capacity) } - .to_vec() - .into_boxed_slice(); - self.data = LocalsPlusData::Heap(heap_data); - Some(base) - } else { - None - } - } - - /// Drop all contained values without freeing the backing storage. - fn drop_values(&mut self) { - self.stack_clear(); - let fastlocals = self.fastlocals_mut(); - for slot in fastlocals.iter_mut() { - let _ = slot.take(); - } - } - - // -- Data access helpers -- - - #[inline(always)] - fn data_as_slice(&self) -> &[usize] { - match &self.data { - LocalsPlusData::Heap(b) => b, - LocalsPlusData::DataStack { ptr, capacity } => unsafe { - core::slice::from_raw_parts(*ptr, *capacity) - }, - } - } - - #[inline(always)] - fn data_as_mut_slice(&mut self) -> &mut [usize] { - match &mut self.data { - LocalsPlusData::Heap(b) => b, - LocalsPlusData::DataStack { ptr, capacity } => unsafe { - core::slice::from_raw_parts_mut(*ptr, *capacity) - }, - } - } - - /// Total capacity (fastlocals + stack). - #[inline(always)] - fn capacity(&self) -> usize { - match &self.data { - LocalsPlusData::Heap(b) => b.len(), - LocalsPlusData::DataStack { capacity, .. } => *capacity, - } - } - - /// Stack capacity (max stack depth). - #[inline(always)] - fn stack_capacity(&self) -> usize { - self.capacity() - self.nlocalsplus as usize - } - - // -- Fastlocals access -- - - /// Immutable access to fastlocals as `Option` slice. - #[inline(always)] - fn fastlocals(&self) -> &[Option] { - let data = self.data_as_slice(); - let ptr = data.as_ptr() as *const Option; - unsafe { core::slice::from_raw_parts(ptr, self.nlocalsplus as usize) } - } - - /// Mutable access to fastlocals as `Option` slice. - #[inline(always)] - fn fastlocals_mut(&mut self) -> &mut [Option] { - let nlocalsplus = self.nlocalsplus as usize; - let data = self.data_as_mut_slice(); - let ptr = data.as_mut_ptr() as *mut Option; - unsafe { core::slice::from_raw_parts_mut(ptr, nlocalsplus) } - } - - // -- Stack access -- - - /// Current stack depth. - #[inline(always)] - fn stack_len(&self) -> usize { - self.stack_top as usize - } - - /// Whether the stack is empty. - #[inline(always)] - fn stack_is_empty(&self) -> bool { - self.stack_top == 0 - } - - /// Push a value onto the evaluation stack. - #[inline(always)] - fn stack_push(&mut self, val: Option) { - let idx = self.nlocalsplus as usize + self.stack_top as usize; - debug_assert!( - idx < self.capacity(), - "stack overflow: stack_top={}, capacity={}", - self.stack_top, - self.stack_capacity() - ); - let data = self.data_as_mut_slice(); - data[idx] = unsafe { core::mem::transmute::, usize>(val) }; - self.stack_top += 1; - } - - /// Try to push; returns Err if stack is full. - #[inline(always)] - fn stack_try_push(&mut self, val: Option) -> Result<(), Option> { - let idx = self.nlocalsplus as usize + self.stack_top as usize; - if idx >= self.capacity() { - return Err(val); + inner: UnsafeCell::new(data), } - let data = self.data_as_mut_slice(); - data[idx] = unsafe { core::mem::transmute::, usize>(val) }; - self.stack_top += 1; - Ok(()) - } - - /// Pop a value from the evaluation stack. - #[inline(always)] - fn stack_pop(&mut self) -> Option { - debug_assert!(self.stack_top > 0, "stack underflow"); - self.stack_top -= 1; - let idx = self.nlocalsplus as usize + self.stack_top as usize; - let data = self.data_as_mut_slice(); - let raw = core::mem::replace(&mut data[idx], 0); - unsafe { core::mem::transmute::>(raw) } } - /// Immutable view of the active stack as `Option` slice. - #[inline(always)] - fn stack_as_slice(&self) -> &[Option] { - let data = self.data_as_slice(); - let base = self.nlocalsplus as usize; - let ptr = unsafe { (data.as_ptr().add(base)) as *const Option }; - unsafe { core::slice::from_raw_parts(ptr, self.stack_top as usize) } - } - - /// Get a reference to a stack slot by index from the bottom. - #[inline(always)] - fn stack_index(&self, idx: usize) -> &Option { - debug_assert!(idx < self.stack_top as usize); - let data = self.data_as_slice(); - let raw_idx = self.nlocalsplus as usize + idx; - unsafe { &*(data.as_ptr().add(raw_idx) as *const Option) } - } - - /// Get a mutable reference to a stack slot by index from the bottom. - #[inline(always)] - fn stack_index_mut(&mut self, idx: usize) -> &mut Option { - debug_assert!(idx < self.stack_top as usize); - let raw_idx = self.nlocalsplus as usize + idx; - let data = self.data_as_mut_slice(); - unsafe { &mut *(data.as_mut_ptr().add(raw_idx) as *mut Option) } - } - - /// Get the last stack element (top of stack). - #[inline(always)] - fn stack_last(&self) -> Option<&Option> { - if self.stack_top == 0 { - None - } else { - Some(self.stack_index(self.stack_top as usize - 1)) - } - } - - /// Get mutable reference to the last stack element. + /// # Safety + /// Caller must ensure exclusive access (frame state locked or frame + /// not executing). #[inline(always)] - fn stack_last_mut(&mut self) -> Option<&mut Option> { - if self.stack_top == 0 { - None - } else { - let idx = self.stack_top as usize - 1; - Some(self.stack_index_mut(idx)) - } + pub unsafe fn borrow(&self) -> &[Option] { + unsafe { &*self.inner.get() } } - /// Swap two stack elements. + /// # Safety + /// Caller must ensure exclusive mutable access. #[inline(always)] - fn stack_swap(&mut self, a: usize, b: usize) { - let base = self.nlocalsplus as usize; - let data = self.data_as_mut_slice(); - data.swap(base + a, base + b); - } - - /// Truncate the stack to `new_len` elements, dropping excess values. - fn stack_truncate(&mut self, new_len: usize) { - debug_assert!(new_len <= self.stack_top as usize); - while self.stack_top as usize > new_len { - let _ = self.stack_pop(); - } - } - - /// Clear the stack, dropping all values. - fn stack_clear(&mut self) { - while self.stack_top > 0 { - let _ = self.stack_pop(); - } - } - - /// Drain stack elements from `from` to the end, returning an iterator - /// that yields `Option` in forward order and shrinks the stack. - fn stack_drain( - &mut self, - from: usize, - ) -> impl ExactSizeIterator> + '_ { - let end = self.stack_top as usize; - debug_assert!(from <= end); - // Reduce stack_top now; the drain iterator owns the elements. - self.stack_top = from as u32; - LocalsPlusStackDrain { - localsplus: self, - current: from, - end, - } - } - - /// Extend the stack with values from an iterator. - fn stack_extend(&mut self, iter: impl Iterator>) { - for val in iter { - self.stack_push(val); - } - } -} - -/// Iterator for draining stack elements in forward order. -struct LocalsPlusStackDrain<'a> { - localsplus: &'a mut LocalsPlus, - /// Current read position (stack-relative index). - current: usize, - /// End position (exclusive, stack-relative index). - end: usize, -} - -impl Iterator for LocalsPlusStackDrain<'_> { - type Item = Option; - - fn next(&mut self) -> Option { - if self.current >= self.end { - return None; - } - let idx = self.localsplus.nlocalsplus as usize + self.current; - let data = self.localsplus.data_as_mut_slice(); - let raw = core::mem::replace(&mut data[idx], 0); - self.current += 1; - Some(unsafe { core::mem::transmute::>(raw) }) - } - - fn size_hint(&self) -> (usize, Option) { - let remaining = self.end - self.current; - (remaining, Some(remaining)) - } -} - -impl ExactSizeIterator for LocalsPlusStackDrain<'_> {} - -impl Drop for LocalsPlusStackDrain<'_> { - fn drop(&mut self) { - while self.current < self.end { - let idx = self.localsplus.nlocalsplus as usize + self.current; - let data = self.localsplus.data_as_mut_slice(); - let raw = core::mem::replace(&mut data[idx], 0); - let _ = unsafe { core::mem::transmute::>(raw) }; - self.current += 1; - } - } -} - -impl Drop for LocalsPlus { - fn drop(&mut self) { - // drop_values handles both stack and fastlocals. - // For DataStack-backed storage, the caller should have called - // materialize_localsplus() + datastack_pop() before drop. - // If not (e.g. panic), the DataStack memory is leaked but - // values are still dropped safely. - self.drop_values(); + #[allow(clippy::mut_from_ref)] + pub unsafe fn borrow_mut(&self) -> &mut [Option] { + unsafe { &mut *self.inner.get() } } } -unsafe impl Traverse for LocalsPlus { - fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { - self.fastlocals().traverse(tracer_fn); - self.stack_as_slice().traverse(tracer_fn); +unsafe impl Traverse for FastLocals { + fn traverse(&self, traverse_fn: &mut TraverseFn<'_>) { + // SAFETY: GC runs on the same thread; no concurrent mutation. + let data = unsafe { &*self.inner.get() }; + data.traverse(traverse_fn); } } @@ -570,8 +239,7 @@ pub struct Frame { pub code: PyRef, pub func_obj: Option, - /// Unified storage for local variables and evaluation stack. - localsplus: FrameUnsafeCell, + pub fastlocals: FastLocals, pub locals: FrameLocals, pub globals: PyDictRef, pub builtins: PyObjectRef, @@ -580,11 +248,7 @@ pub struct Frame { pub lasti: PyAtomic, /// tracer function for this frame (usually is None) pub trace: PyMutex, - - /// Cell and free variable references (cellvars + freevars). - cells_frees: FrameUnsafeCell>, - /// Previous line number for LINE event suppression. - prev_line: FrameUnsafeCell, + state: PyMutex, // member pub trace_lines: PyMutex, @@ -620,20 +284,25 @@ impl PyPayload for Frame { } } +unsafe impl Traverse for FrameState { + fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { + self.stack.traverse(tracer_fn); + self.cells_frees.traverse(tracer_fn); + } +} + unsafe impl Traverse for Frame { fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { self.code.traverse(tracer_fn); self.func_obj.traverse(tracer_fn); - // SAFETY: GC traversal does not run concurrently with frame execution. - unsafe { - (*self.localsplus.get()).traverse(tracer_fn); - (*self.cells_frees.get()).traverse(tracer_fn); - } + self.fastlocals.traverse(tracer_fn); self.locals.traverse(tracer_fn); self.globals.traverse(tracer_fn); self.builtins.traverse(tracer_fn); self.trace.traverse(tracer_fn); + self.state.traverse(tracer_fn); self.temporary_refs.traverse(tracer_fn); + // generator is a borrowed reference, not traversed } } @@ -653,7 +322,6 @@ impl Frame { builtins: PyObjectRef, closure: &[PyCellRef], func_obj: Option, - use_datastack: bool, vm: &VirtualMachine, ) -> Self { let nlocals = code.varnames.len(); @@ -666,24 +334,23 @@ impl Frame { .chain(closure.iter().cloned()) .collect(); - let nlocalsplus = nlocals - .checked_add(num_cells) - .and_then(|v| v.checked_add(nfrees)) - .expect("Frame::new: nlocalsplus overflow"); - let max_stackdepth = code.max_stackdepth as usize; - let mut localsplus = if use_datastack { - LocalsPlus::new_on_datastack(nlocalsplus, max_stackdepth, vm) - } else { - LocalsPlus::new(nlocalsplus, max_stackdepth) - }; + // Extend fastlocals to include varnames + cellvars + freevars (localsplus) + let total_locals = nlocals + num_cells + nfrees; + let mut fastlocals_vec: Vec> = vec![None; total_locals]; // Store cell objects at cellvars and freevars positions for (i, cell) in cells_frees.iter().enumerate() { - localsplus.fastlocals_mut()[nlocals + i] = Some(cell.clone().into()); + fastlocals_vec[nlocals + i] = Some(cell.clone().into()); } + let state = FrameState { + stack: BoxVec::new(code.max_stackdepth as usize), + cells_frees, + prev_line: 0, + }; + Self { - localsplus: FrameUnsafeCell::new(localsplus), + fastlocals: FastLocals::new(fastlocals_vec.into_boxed_slice()), locals: match scope.locals { Some(locals) => FrameLocals::with_locals(locals), None if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) => FrameLocals::lazy(), @@ -696,8 +363,7 @@ impl Frame { code, func_obj, lasti: Radium::new(0), - cells_frees: FrameUnsafeCell::new(cells_frees), - prev_line: FrameUnsafeCell::new(0), + state: PyMutex::new(state), trace: PyMutex::new(vm.ctx.none()), trace_lines: PyMutex::new(true), trace_opcodes: PyMutex::new(false), @@ -711,45 +377,12 @@ impl Frame { } } - /// Access fastlocals immutably. - /// - /// # Safety - /// Caller must ensure no concurrent mutable access (frame not executing, - /// or called from the same thread during trace callback). - #[inline(always)] - pub unsafe fn fastlocals(&self) -> &[Option] { - unsafe { (*self.localsplus.get()).fastlocals() } - } - - /// Access fastlocals mutably. - /// - /// # Safety - /// Caller must ensure exclusive access (frame not executing). - #[inline(always)] - #[allow(clippy::mut_from_ref)] - pub unsafe fn fastlocals_mut(&self) -> &mut [Option] { - unsafe { (*self.localsplus.get()).fastlocals_mut() } - } - - /// Migrate data-stack-backed storage to the heap, preserving all values, - /// and return the data stack base pointer for `DataStack::pop()`. - /// Returns `None` if already heap-backed. - /// - /// # Safety - /// Caller must ensure the frame is not executing and the returned - /// pointer is passed to `VirtualMachine::datastack_pop()`. - pub(crate) unsafe fn materialize_localsplus(&self) -> Option<*mut u8> { - unsafe { (*self.localsplus.get()).materialize_to_heap() } - } - /// Clear evaluation stack and state-owned cell/free references. /// For full local/cell cleanup, call `clear_locals_and_stack()`. pub(crate) fn clear_stack_and_cells(&self) { - // SAFETY: Called when frame is not executing (generator closed). - unsafe { - (*self.localsplus.get()).stack_clear(); - let _old = core::mem::take(&mut *self.cells_frees.get()); - } + let mut state = self.state.lock(); + state.stack.clear(); + let _old = core::mem::take(&mut state.cells_frees); } /// Clear locals and stack after generator/coroutine close. @@ -757,7 +390,7 @@ impl Frame { pub(crate) fn clear_locals_and_stack(&self) { self.clear_stack_and_cells(); // SAFETY: Frame is not executing (generator closed). - let fastlocals = unsafe { (*self.localsplus.get()).fastlocals_mut() }; + let fastlocals = unsafe { self.fastlocals.borrow_mut() }; for slot in fastlocals.iter_mut() { *slot = None; } @@ -767,7 +400,7 @@ impl Frame { pub(crate) fn get_cell_contents(&self, cell_idx: usize) -> Option { let nlocals = self.code.varnames.len(); // SAFETY: Frame not executing; no concurrent mutation. - let fastlocals = unsafe { (*self.localsplus.get()).fastlocals() }; + let fastlocals = unsafe { self.fastlocals.borrow() }; fastlocals .get(nlocals + cell_idx) .and_then(|slot| slot.as_ref()) @@ -777,8 +410,7 @@ impl Frame { /// Set cell contents by cell index. Only safe to call before frame execution starts. pub(crate) fn set_cell_contents(&self, cell_idx: usize, value: Option) { - // SAFETY: Called before frame execution starts. - unsafe { (*self.cells_frees.get())[cell_idx].set(value) }; + self.state.lock().cells_frees[cell_idx].set(value); } /// Store a borrowed back-reference to the owning generator/coroutine. @@ -837,7 +469,7 @@ impl Frame { } let code = &**self.code; // SAFETY: Called before generator resume; no concurrent access. - let fastlocals = unsafe { (*self.localsplus.get()).fastlocals_mut() }; + let fastlocals = unsafe { self.fastlocals.borrow_mut() }; let locals_map = self.locals.mapping(vm); for (i, &varname) in code.varnames.iter().enumerate() { if i >= fastlocals.len() { @@ -854,15 +486,19 @@ impl Frame { } pub fn locals(&self, vm: &VirtualMachine) -> PyResult { - // SAFETY: Either the frame is not executing (caller checked owner), - // or we're in a trace callback on the same thread that's executing. + // Acquire the state mutex to synchronize with frame execution. + // If try_lock fails, the frame is executing on this thread (e.g. + // trace callback accessing f_locals), so fastlocals access is safe. + let _guard = self.state.try_lock(); let locals = &self.locals; let code = &**self.code; let map = &code.varnames; let j = core::cmp::min(map.len(), code.varnames.len()); let locals_map = locals.mapping(vm); if !code.varnames.is_empty() { - let fastlocals = unsafe { (*self.localsplus.get()).fastlocals() }; + // SAFETY: Either _guard holds the state mutex (frame not executing), + // or we're in a trace callback on the same thread that holds it. + let fastlocals = unsafe { self.fastlocals.borrow() }; for (&k, v) in zip(&map[..j], fastlocals) { match locals_map.ass_subscript(k, v.clone(), vm) { Ok(()) => {} @@ -898,12 +534,10 @@ impl Frame { impl Py { #[inline(always)] fn with_exec(&self, vm: &VirtualMachine, f: impl FnOnce(ExecutingFrame<'_>) -> R) -> R { - // SAFETY: Frame execution is single-threaded. Only one thread at a time - // executes a given frame (enforced by the owner field and generator - // running flag). Same safety argument as FastLocals (UnsafeCell). + let mut state = self.state.lock(); let exec = ExecutingFrame { code: &self.code, - localsplus: unsafe { &mut *self.localsplus.get() }, + fastlocals: &self.fastlocals, locals: &self.locals, globals: &self.globals, builtins: &self.builtins, @@ -917,8 +551,7 @@ impl Py { }, lasti: &self.lasti, object: self, - cells_frees: unsafe { &mut *self.cells_frees.get() }, - prev_line: unsafe { &mut *self.prev_line.get() }, + state: &mut state, monitoring_mask: 0, }; f(exec) @@ -953,24 +586,19 @@ impl Py { } pub fn yield_from_target(&self) -> Option { - // If the frame is currently executing (owned by thread), it has no - // yield-from target to report. - let owner = FrameOwner::from_i8(self.owner.load(atomic::Ordering::Acquire)); - if owner == FrameOwner::Thread { - return None; - } - // SAFETY: Frame is not executing, so UnsafeCell access is safe. + // Use try_lock to avoid deadlock when the frame is currently executing. + // A running coroutine has no yield-from target. + let mut state = self.state.try_lock()?; let exec = ExecutingFrame { code: &self.code, - localsplus: unsafe { &mut *self.localsplus.get() }, + fastlocals: &self.fastlocals, locals: &self.locals, globals: &self.globals, builtins: &self.builtins, builtins_dict: None, lasti: &self.lasti, object: self, - cells_frees: unsafe { &mut *self.cells_frees.get() }, - prev_line: unsafe { &mut *self.prev_line.get() }, + state: &mut state, monitoring_mask: 0, }; exec.yield_from_target().map(PyObject::to_owned) @@ -995,11 +623,11 @@ impl Py { } } -/// An executing frame; borrows mutable frame-internal data for the duration -/// of bytecode execution. +/// An executing frame; essentially just a struct to combine the immutable data outside the mutex +/// with the mutable data inside struct ExecutingFrame<'a> { code: &'a PyRef, - localsplus: &'a mut LocalsPlus, + fastlocals: &'a FastLocals, locals: &'a FrameLocals, globals: &'a PyDictRef, builtins: &'a PyObjectRef, @@ -1010,8 +638,7 @@ struct ExecutingFrame<'a> { builtins_dict: Option<&'a PyExact>, object: &'a Py, lasti: &'a PyAtomic, - cells_frees: &'a mut Box<[PyCellRef]>, - prev_line: &'a mut u32, + state: &'a mut FrameState, /// Cached monitoring events mask. Reloaded at Resume instruction only, monitoring_mask: u32, } @@ -1020,7 +647,8 @@ impl fmt::Debug for ExecutingFrame<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ExecutingFrame") .field("code", self.code) - .field("stack_len", &self.localsplus.stack_len()) + // .field("scope", self.scope) + .field("state", self.state) .finish() } } @@ -1105,9 +733,9 @@ impl ExecutingFrame<'_> { Instruction::Resume { .. } | Instruction::InstrumentedResume ) && let Some((loc, _)) = self.code.locations.get(idx) - && loc.line.get() as u32 != *self.prev_line + && loc.line.get() as u32 != self.state.prev_line { - *self.prev_line = loc.line.get() as u32; + self.state.prev_line = loc.line.get() as u32; vm.trace_event(crate::protocol::TraceEvent::Line, None)?; // Trace callback may have changed lasti via set_f_lineno. // Re-read and restart the loop from the new position. @@ -1139,7 +767,7 @@ impl ExecutingFrame<'_> { | Instruction::InstrumentedLine ) && let Some((loc, _)) = self.code.locations.get(idx) { - *self.prev_line = loc.line.get() as u32; + self.state.prev_line = loc.line.get() as u32; } // Fire 'opcode' trace event for sys.settrace when f_trace_opcodes @@ -1359,7 +987,7 @@ impl ExecutingFrame<'_> { // 3. Stack top is the delegate (receiver) // // First check if stack is empty - if so, we can't be in yield-from - if self.localsplus.stack_is_empty() { + if self.state.stack.is_empty() { return None; } let lasti = self.lasti() as usize; @@ -1404,7 +1032,7 @@ impl ExecutingFrame<'_> { // In CPython, _Py_call_instrumentation_line has a special case // for RESUME: it fires LINE even when prev_line == current_line. // Since gen_throw bypasses RESUME, we reset prev_line instead. - *self.prev_line = 0; + self.state.prev_line = 0; if let Some(jen) = self.yield_from_target() { // Check if the exception is GeneratorExit (type or instance). // For GeneratorExit, close the sub-iterator instead of throwing. @@ -1825,9 +1453,9 @@ impl ExecutingFrame<'_> { // CopyItem { index: 2 } copies second from top // This is 1-indexed to match CPython let idx = index.get(arg) as usize; - let stack_len = self.localsplus.stack_len(); + let stack_len = self.state.stack.len(); debug_assert!(stack_len >= idx, "CopyItem: stack underflow"); - let value = self.localsplus.stack_index(stack_len - idx).clone(); + let value = self.state.stack[stack_len - idx].clone(); self.push_stackref_opt(value); Ok(None) } @@ -1837,11 +1465,11 @@ impl ExecutingFrame<'_> { } Instruction::DeleteAttr { namei: idx } => self.delete_attr(vm, idx.get(arg)), Instruction::DeleteDeref { i } => { - self.cells_frees[i.get(arg) as usize].set(None); + self.state.cells_frees[i.get(arg) as usize].set(None); Ok(None) } Instruction::DeleteFast { var_num: idx } => { - let fastlocals = self.localsplus.fastlocals_mut(); + let fastlocals = unsafe { self.fastlocals.borrow_mut() }; let idx = idx.get(arg) as usize; if fastlocals[idx].is_none() { return Err(vm.new_exception_msg( @@ -2017,7 +1645,7 @@ impl ExecutingFrame<'_> { } Instruction::GetANext => { #[cfg(debug_assertions)] // remove when GetANext is fully implemented - let orig_stack_len = self.localsplus.stack_len(); + let orig_stack_len = self.state.stack.len(); let aiter = self.top_value(); let awaitable = if aiter.class().is(vm.ctx.types.async_generator) { @@ -2057,7 +1685,7 @@ impl ExecutingFrame<'_> { }; self.push_value(awaitable); #[cfg(debug_assertions)] - debug_assert_eq!(orig_stack_len + 1, self.localsplus.stack_len()); + debug_assert_eq!(orig_stack_len + 1, self.state.stack.len()); Ok(None) } Instruction::GetAwaitable { r#where: oparg } => { @@ -2274,7 +1902,7 @@ impl ExecutingFrame<'_> { }; self.push_value(match value { Some(v) => v, - None => self.cells_frees[i] + None => self.state.cells_frees[i] .get() .ok_or_else(|| self.unbound_cell_exception(i, vm))?, }); @@ -2343,7 +1971,7 @@ impl ExecutingFrame<'_> { } Instruction::LoadDeref { i } => { let idx = i.get(arg) as usize; - let x = self.cells_frees[idx] + let x = self.state.cells_frees[idx] .get() .ok_or_else(|| self.unbound_cell_exception(idx, vm))?; self.push_value(x); @@ -2361,7 +1989,7 @@ impl ExecutingFrame<'_> { ) } let idx = idx.get(arg) as usize; - let x = self.localsplus.fastlocals()[idx] + let x = unsafe { self.fastlocals.borrow() }[idx] .clone() .ok_or_else(|| reference_error(self.code.varnames[idx], vm))?; self.push_value(x); @@ -2371,7 +1999,7 @@ impl ExecutingFrame<'_> { // Load value and clear the slot (for inlined comprehensions) // If slot is empty, push None (not an error - variable may not exist yet) let idx = idx.get(arg) as usize; - let x = self.localsplus.fastlocals_mut()[idx] + let x = unsafe { self.fastlocals.borrow_mut() }[idx] .take() .unwrap_or_else(|| vm.ctx.none()); self.push_value(x); @@ -2381,16 +2009,18 @@ impl ExecutingFrame<'_> { // Same as LoadFast but explicitly checks for unbound locals // (LoadFast in RustPython already does this check) let idx = idx.get(arg) as usize; - let x = self.localsplus.fastlocals()[idx].clone().ok_or_else(|| { - vm.new_exception_msg( - vm.ctx.exceptions.unbound_local_error.to_owned(), - format!( - "local variable '{}' referenced before assignment", - self.code.varnames[idx] + let x = unsafe { self.fastlocals.borrow() }[idx] + .clone() + .ok_or_else(|| { + vm.new_exception_msg( + vm.ctx.exceptions.unbound_local_error.to_owned(), + format!( + "local variable '{}' referenced before assignment", + self.code.varnames[idx] + ) + .into(), ) - .into(), - ) - })?; + })?; self.push_value(x); Ok(None) } @@ -2400,7 +2030,7 @@ impl ExecutingFrame<'_> { let oparg = packed.get(arg); let idx1 = (oparg >> 4) as usize; let idx2 = (oparg & 15) as usize; - let fastlocals = self.localsplus.fastlocals(); + let fastlocals = unsafe { self.fastlocals.borrow() }; let x1 = fastlocals[idx1].clone().ok_or_else(|| { vm.new_exception_msg( vm.ctx.exceptions.unbound_local_error.to_owned(), @@ -2430,16 +2060,18 @@ impl ExecutingFrame<'_> { // lifetime issues at yield/exception points are resolved. Instruction::LoadFastBorrow { var_num: idx } => { let idx = idx.get(arg) as usize; - let x = self.localsplus.fastlocals()[idx].clone().ok_or_else(|| { - vm.new_exception_msg( - vm.ctx.exceptions.unbound_local_error.to_owned(), - format!( - "local variable '{}' referenced before assignment", - self.code.varnames[idx] + let x = unsafe { self.fastlocals.borrow() }[idx] + .clone() + .ok_or_else(|| { + vm.new_exception_msg( + vm.ctx.exceptions.unbound_local_error.to_owned(), + format!( + "local variable '{}' referenced before assignment", + self.code.varnames[idx] + ) + .into(), ) - .into(), - ) - })?; + })?; self.push_value(x); Ok(None) } @@ -2447,7 +2079,7 @@ impl ExecutingFrame<'_> { let oparg = packed.get(arg); let idx1 = (oparg >> 4) as usize; let idx2 = (oparg & 15) as usize; - let fastlocals = self.localsplus.fastlocals(); + let fastlocals = unsafe { self.fastlocals.borrow() }; let x1 = fastlocals[idx1].clone().ok_or_else(|| { vm.new_exception_msg( vm.ctx.exceptions.unbound_local_error.to_owned(), @@ -2956,18 +2588,18 @@ impl ExecutingFrame<'_> { } Instruction::StoreDeref { i } => { let value = self.pop_value(); - self.cells_frees[i.get(arg) as usize].set(Some(value)); + self.state.cells_frees[i.get(arg) as usize].set(Some(value)); Ok(None) } Instruction::StoreFast { var_num: idx } => { let value = self.pop_value(); - let fastlocals = self.localsplus.fastlocals_mut(); + let fastlocals = unsafe { self.fastlocals.borrow_mut() }; fastlocals[idx.get(arg) as usize] = Some(value); Ok(None) } Instruction::StoreFastLoadFast { var_nums } => { let value = self.pop_value(); - let locals = self.localsplus.fastlocals_mut(); + let locals = unsafe { self.fastlocals.borrow_mut() }; let oparg = var_nums.get(arg); locals[oparg.store_idx() as usize] = Some(value); let load_value = locals[oparg.load_idx() as usize] @@ -2982,7 +2614,7 @@ impl ExecutingFrame<'_> { let idx2 = (oparg & 15) as usize; let value1 = self.pop_value(); let value2 = self.pop_value(); - let fastlocals = self.localsplus.fastlocals_mut(); + let fastlocals = unsafe { self.fastlocals.borrow_mut() }; fastlocals[idx1] = Some(value1); fastlocals[idx2] = Some(value2); Ok(None) @@ -3022,7 +2654,7 @@ impl ExecutingFrame<'_> { self.execute_store_subscript(vm) } Instruction::Swap { i: index } => { - let len = self.localsplus.stack_len(); + let len = self.state.stack.len(); debug_assert!(len > 0, "stack underflow in SWAP"); let i = len - 1; // TOS index let index_val = index.get(arg) as usize; @@ -3035,7 +2667,7 @@ impl ExecutingFrame<'_> { len ); let j = len - index_val; - self.localsplus.stack_swap(i, j); + self.state.stack.swap(i, j); Ok(None) } Instruction::ToBool => { @@ -3059,9 +2691,9 @@ impl ExecutingFrame<'_> { // __exit__ is at TOS-3 (below lasti, prev_exc, and exc) let exc = vm.current_exception(); - let stack_len = self.localsplus.stack_len(); + let stack_len = self.state.stack.len(); let exit = expect_unchecked( - self.localsplus.stack_index(stack_len - 4).clone(), + self.state.stack[stack_len - 4].clone(), "WithExceptStart: __exit__ is NULL", ); @@ -3078,8 +2710,8 @@ impl ExecutingFrame<'_> { } Instruction::YieldValue { arg: oparg } => { debug_assert!( - self.localsplus - .stack_as_slice() + self.state + .stack .iter() .flatten() .all(|sr| !sr.is_borrowed()), @@ -3275,13 +2907,6 @@ impl ExecutingFrame<'_> { self.push_value(owner); Ok(None) } else { - self.deoptimize_at( - Instruction::LoadAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.load_attr_slow(vm, oparg) } } @@ -3304,13 +2929,6 @@ impl ExecutingFrame<'_> { self.push_value(owner); Ok(None) } else { - self.deoptimize_at( - Instruction::LoadAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.load_attr_slow(vm, oparg) } } @@ -3329,26 +2947,7 @@ impl ExecutingFrame<'_> { match dict.get_item_opt(attr_name, vm) { Ok(Some(_)) => true, Ok(None) => false, - Err(_) => { - // Dict lookup error → deoptimize to safe path - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code - .instructions - .read_adaptive_counter(cache_base), - ), - ); - } - return self.load_attr_slow(vm, oparg); - } + Err(_) => return self.load_attr_slow(vm, oparg), } } else { false @@ -3364,13 +2963,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize_at( - Instruction::LoadAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrInstanceValue => { @@ -3394,13 +2986,6 @@ impl ExecutingFrame<'_> { } // Not in instance dict — fall through to class lookup via slow path } - self.deoptimize_at( - Instruction::LoadAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrWithHint => { @@ -3427,13 +3012,6 @@ impl ExecutingFrame<'_> { return Ok(None); } - self.deoptimize_at( - Instruction::LoadAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrModule => { @@ -3459,21 +3037,6 @@ impl ExecutingFrame<'_> { } return Ok(None); } - // Deoptimize - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrNondescriptorNoDict => { @@ -3497,20 +3060,6 @@ impl ExecutingFrame<'_> { } return Ok(None); } - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrNondescriptorWithValues => { @@ -3548,20 +3097,6 @@ impl ExecutingFrame<'_> { } return Ok(None); } - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrClass => { @@ -3587,20 +3122,6 @@ impl ExecutingFrame<'_> { } return Ok(None); } - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrClassWithMetaclassCheck => { @@ -3629,20 +3150,10 @@ impl ExecutingFrame<'_> { } return Ok(None); } - self.deoptimize_at( - Instruction::LoadAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrGetattributeOverridden => { let oparg = LoadAttr::new(u32::from(arg)); - self.deoptimize(Instruction::LoadAttr { - namei: Arg::marker(), - }); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrSlot => { @@ -3668,20 +3179,6 @@ impl ExecutingFrame<'_> { } // Slot is None → AttributeError (fall through to slow path) } - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrProperty => { @@ -3706,20 +3203,6 @@ impl ExecutingFrame<'_> { } } } - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } self.load_attr_slow(vm, oparg) } Instruction::StoreAttrInstanceValue => { @@ -4098,9 +3581,6 @@ impl ExecutingFrame<'_> { self.push_value_opt(_null); self.push_value(obj); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -4127,9 +3607,6 @@ impl ExecutingFrame<'_> { self.push_value(obj); self.push_value(class_info); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -4154,9 +3631,6 @@ impl ExecutingFrame<'_> { self.push_value_opt(_null); self.push_value(obj); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -4179,9 +3653,6 @@ impl ExecutingFrame<'_> { self.push_value_opt(_null); self.push_value(obj); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -4209,9 +3680,6 @@ impl ExecutingFrame<'_> { self.push_value_opt(_null); self.push_value(obj); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -4240,9 +3708,6 @@ impl ExecutingFrame<'_> { self.push_value_opt(_null); self.push_value(obj); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -4273,9 +3738,6 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -4306,9 +3768,6 @@ impl ExecutingFrame<'_> { self.push_value(result); Ok(None) } else { - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -4335,9 +3794,6 @@ impl ExecutingFrame<'_> { self.push_value(result); Ok(None) } else { - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -4360,9 +3816,6 @@ impl ExecutingFrame<'_> { self.push_value_opt(self_or_null); self.push_value(item); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -4373,8 +3826,9 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); if nargs == 0 { // Stack: [callable, self_or_null] — peek to get func ptr - let stack_len = self.localsplus.stack_len(); - let self_or_null_is_some = self.localsplus.stack_index(stack_len - 1).is_some(); + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - 1].is_some(); let callable = self.nth_value(1); let callable_tag = callable as *const PyObject as u32; let func = if cached_tag == callable_tag && self_or_null_is_some { @@ -4396,10 +3850,8 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - self.execute_call_vectorcall(nargs, vm) + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } Instruction::CallMethodDescriptorO => { let instr_idx = self.lasti() as usize - 1; @@ -4408,8 +3860,9 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, self_or_null, arg1] - let stack_len = self.localsplus.stack_len(); - let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - 2].is_some(); let callable = self.nth_value(2); let callable_tag = callable as *const PyObject as u32; let func = if cached_tag == callable_tag && self_or_null_is_some { @@ -4432,10 +3885,8 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - self.execute_call_vectorcall(nargs, vm) + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } Instruction::CallMethodDescriptorFast => { let instr_idx = self.lasti() as usize - 1; @@ -4444,11 +3895,9 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); let callable_tag = callable as *const PyObject as u32; - let stack_len = self.localsplus.stack_len(); - let self_or_null_is_some = self - .localsplus - .stack_index(stack_len - nargs as usize - 1) - .is_some(); + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); let func = if cached_tag == callable_tag && self_or_null_is_some { callable .downcast_ref::() @@ -4472,10 +3921,8 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - self.execute_call_vectorcall(nargs, vm) + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } Instruction::CallBuiltinClass => { let instr_idx = self.lasti() as usize - 1; @@ -4484,12 +3931,23 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); let callable_tag = callable as *const PyObject as u32; - if !(cached_tag == callable_tag && callable.downcast_ref::().is_some()) { - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); + if cached_tag == callable_tag && callable.downcast_ref::().is_some() { + let args = self.collect_positional_args(nargs); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let final_args = if let Some(self_val) = self_or_null { + let mut args = args; + args.prepend_arg(self_val); + args + } else { + args + }; + let result = callable.call(final_args, vm)?; + self.push_value(result); + return Ok(None); } - self.execute_call_vectorcall(nargs, vm) + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } Instruction::CallAllocAndEnterInit => { let instr_idx = self.lasti() as usize - 1; @@ -4497,11 +3955,9 @@ impl ExecutingFrame<'_> { let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - let stack_len = self.localsplus.stack_len(); - let self_or_null_is_some = self - .localsplus - .stack_index(stack_len - nargs as usize - 1) - .is_some(); + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); if !self_or_null_is_some && cached_version != 0 && let Some(cls) = callable.downcast_ref::() @@ -4548,10 +4004,8 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - self.execute_call_vectorcall(nargs, vm) + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } Instruction::CallMethodDescriptorFastWithKeywords => { // Native function interface is uniform regardless of keyword support @@ -4561,11 +4015,9 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); let callable_tag = callable as *const PyObject as u32; - let stack_len = self.localsplus.stack_len(); - let self_or_null_is_some = self - .localsplus - .stack_index(stack_len - nargs as usize - 1) - .is_some(); + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); let func = if cached_tag == callable_tag && self_or_null_is_some { callable .downcast_ref::() @@ -4589,10 +4041,8 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - self.execute_call_vectorcall(nargs, vm) + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } Instruction::CallBuiltinFastWithKeywords => { // Native function interface is uniform regardless of keyword support @@ -4622,10 +4072,8 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - self.execute_call_vectorcall(nargs, vm) + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } Instruction::CallNonPyGeneral => { let instr_idx = self.lasti() as usize - 1; @@ -4634,12 +4082,12 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); let callable_tag = callable as *const PyObject as u32; - if cached_tag != callable_tag { - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); + if cached_tag == callable_tag { + let args = self.collect_positional_args(nargs); + return self.execute_call(args, vm); } - self.execute_call_vectorcall(nargs, vm) + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } Instruction::CallKwPy => { let instr_idx = self.lasti() as usize - 1; @@ -4681,9 +4129,6 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::CallKw { - argc: Arg::marker(), - }); let args = self.collect_keyword_args(nargs); self.execute_call(args, vm) } @@ -4717,9 +4162,6 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::CallKw { - argc: Arg::marker(), - }); let args = self.collect_keyword_args(nargs); self.execute_call(args, vm) } @@ -4730,12 +4172,12 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 2); let callable_tag = callable as *const PyObject as u32; - if cached_tag != callable_tag { - self.deoptimize(Instruction::CallKw { - argc: Arg::marker(), - }); + if cached_tag == callable_tag { + let args = self.collect_keyword_args(nargs); + return self.execute_call(args, vm); } - self.execute_call_kw_vectorcall(nargs, vm) + let args = self.collect_keyword_args(nargs); + self.execute_call(args, vm) } Instruction::LoadSuperAttrAttr => { let oparg = u32::from(arg); @@ -4788,22 +4230,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - // Deoptimize - unsafe { - self.code.instructions.replace_op( - self.lasti() as usize - 1, - Instruction::LoadSuperAttr { - namei: Arg::marker(), - }, - ); - let cache_base = self.lasti() as usize; - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } let oparg = LoadSuperAttr::new(oparg); self.load_super_attr(vm, oparg) } @@ -4871,22 +4297,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - // Deoptimize - unsafe { - self.code.instructions.replace_op( - self.lasti() as usize - 1, - Instruction::LoadSuperAttr { - namei: Arg::marker(), - }, - ); - let cache_base = self.lasti() as usize; - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } let oparg = LoadSuperAttr::new(oparg); self.load_super_attr(vm, oparg) } @@ -5372,8 +4782,8 @@ impl ExecutingFrame<'_> { } Instruction::InstrumentedYieldValue => { debug_assert!( - self.localsplus - .stack_as_slice() + self.state + .stack .iter() .flatten() .all(|sr| !sr.is_borrowed()), @@ -5638,8 +5048,8 @@ impl ExecutingFrame<'_> { // Fire LINE event only if line changed if let Some((loc, _)) = self.code.locations.get(idx) { let line = loc.line.get() as u32; - if line != *self.prev_line && line > 0 { - *self.prev_line = line; + if line != self.state.prev_line && line > 0 { + self.state.prev_line = line; monitoring::fire_line(vm, self.code, offset, line)?; } } @@ -5933,8 +5343,8 @@ impl ExecutingFrame<'_> { } // 1. Pop stack to entry.depth - while self.localsplus.stack_len() > entry.depth as usize { - let _ = self.localsplus.stack_pop(); + while self.state.stack.len() > entry.depth as usize { + self.state.stack.pop(); } // 2. If push_lasti=true (SETUP_CLEANUP), push lasti before exception @@ -6138,19 +5548,24 @@ impl ExecutingFrame<'_> { #[inline] fn execute_call_vectorcall(&mut self, nargs: u32, vm: &VirtualMachine) -> FrameResult { let nargs_usize = nargs as usize; - let stack_len = self.localsplus.stack_len(); - debug_assert!( - stack_len >= nargs_usize + 2, - "CALL stack underflow: need callable + self_or_null + {nargs_usize} args, have {stack_len}" - ); + let stack_len = self.state.stack.len(); let callable_idx = stack_len - nargs_usize - 2; let self_or_null_idx = stack_len - nargs_usize - 1; let args_start = stack_len - nargs_usize; - // Build args: [self?, arg1, ..., argN] - let self_or_null = self - .localsplus - .stack_index_mut(self_or_null_idx) + // Check if callable has vectorcall slot + let has_vectorcall = self.state.stack[callable_idx] + .as_ref() + .is_some_and(|sr| sr.as_object().class().slots.vectorcall.load().is_some()); + + if !has_vectorcall { + // Fallback to existing FuncArgs path + let args = self.collect_positional_args(nargs); + return self.execute_call(args, vm); + } + + // Build args slice: [self_or_null?, arg1, ..., argN] + let self_or_null = self.state.stack[self_or_null_idx] .take() .map(|sr| sr.to_pyobj()); let has_self = self_or_null.is_some(); @@ -6165,24 +5580,13 @@ impl ExecutingFrame<'_> { args_vec.push(self_val); } for stack_idx in args_start..stack_len { - let val = self - .localsplus - .stack_index_mut(stack_idx) - .take() - .unwrap() - .to_pyobj(); + let val = self.state.stack[stack_idx].take().unwrap().to_pyobj(); args_vec.push(val); } - let callable_obj = self - .localsplus - .stack_index_mut(callable_idx) - .take() - .unwrap() - .to_pyobj(); - self.localsplus.stack_truncate(callable_idx); + let callable_obj = self.state.stack[callable_idx].take().unwrap().to_pyobj(); + self.state.stack.truncate(callable_idx); - // invoke_vectorcall falls back to FuncArgs if no vectorcall slot let result = callable_obj.vectorcall(args_vec, effective_nargs, None, vm)?; self.push_value(result); Ok(None) @@ -6199,28 +5603,50 @@ impl ExecutingFrame<'_> { .downcast_ref::() .expect("kwarg names should be tuple"); let kw_count = kwarg_names_tuple.len(); - debug_assert!(kw_count <= nargs_usize, "CALL_KW kw_count exceeds nargs"); - let stack_len = self.localsplus.stack_len(); - debug_assert!( - stack_len >= nargs_usize + 2, - "CALL_KW stack underflow: need callable + self_or_null + {nargs_usize} args, have {stack_len}" - ); + let stack_len = self.state.stack.len(); let callable_idx = stack_len - nargs_usize - 2; let self_or_null_idx = stack_len - nargs_usize - 1; let args_start = stack_len - nargs_usize; + // Check if callable has vectorcall slot + let has_vectorcall = self.state.stack[callable_idx] + .as_ref() + .is_some_and(|sr| sr.as_object().class().slots.vectorcall.load().is_some()); + + if !has_vectorcall { + // Fallback: reconstruct kwarg_names iterator and use existing path + let kwarg_names_iter = kwarg_names_tuple.as_slice().iter().map(|pyobj| { + pyobj + .downcast_ref::() + .unwrap() + .as_str() + .to_owned() + }); + let args = self.pop_multiple(nargs_usize); + let func_args = FuncArgs::with_kwargs_names(args, kwarg_names_iter); + // pop self_or_null and callable + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let final_args = if let Some(self_val) = self_or_null { + let mut args = func_args; + args.prepend_arg(self_val); + args + } else { + func_args + }; + let value = callable.call(final_args, vm)?; + self.push_value(value); + return Ok(None); + } + // Build args: [self?, pos_arg1, ..., pos_argM, kw_val1, ..., kw_valK] - let self_or_null = self - .localsplus - .stack_index_mut(self_or_null_idx) + let self_or_null = self.state.stack[self_or_null_idx] .take() .map(|sr| sr.to_pyobj()); let has_self = self_or_null.is_some(); - let pos_count = nargs_usize - .checked_sub(kw_count) - .expect("CALL_KW: kw_count exceeds nargs"); + let pos_count = nargs_usize - kw_count; let effective_nargs = if has_self { pos_count + 1 } else { pos_count }; // Build the full args slice: positional (including self) + kwarg values @@ -6230,24 +5656,13 @@ impl ExecutingFrame<'_> { args_vec.push(self_val); } for stack_idx in args_start..stack_len { - let val = self - .localsplus - .stack_index_mut(stack_idx) - .take() - .unwrap() - .to_pyobj(); + let val = self.state.stack[stack_idx].take().unwrap().to_pyobj(); args_vec.push(val); } - let callable_obj = self - .localsplus - .stack_index_mut(callable_idx) - .take() - .unwrap() - .to_pyobj(); - self.localsplus.stack_truncate(callable_idx); + let callable_obj = self.state.stack[callable_idx].take().unwrap().to_pyobj(); + self.state.stack.truncate(callable_idx); - // invoke_vectorcall falls back to FuncArgs if no vectorcall slot let kwnames = kwarg_names_tuple.as_slice(); let result = callable_obj.vectorcall(args_vec, effective_nargs, Some(kwnames), vm)?; self.push_value(result); @@ -6433,7 +5848,7 @@ impl ExecutingFrame<'_> { let mut elements = elements; // Elements on stack from right-to-left: - self.localsplus.stack_extend( + self.state.stack.extend( elements .drain(before + middle..) .rev() @@ -6445,7 +5860,7 @@ impl ExecutingFrame<'_> { self.push_value(t.into()); // Lastly the first reversed values: - self.localsplus.stack_extend( + self.state.stack.extend( elements .into_iter() .rev() @@ -6777,7 +6192,7 @@ impl ExecutingFrame<'_> { Err(vm.new_value_error(msg)) } PyIterReturn::StopIteration(_) => { - self.localsplus.stack_extend( + self.state.stack.extend( elements .into_iter() .rev() @@ -7519,11 +6934,9 @@ impl ExecutingFrame<'_> { // Stack: [callable, self_or_null, arg1, ..., argN] // callable is at position nargs + 1 from top // self_or_null is at position nargs from top - let stack_len = self.localsplus.stack_len(); - let self_or_null_is_some = self - .localsplus - .stack_index(stack_len - nargs as usize - 1) - .is_some(); + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); let callable = self.nth_value(nargs + 1); if let Some(func) = callable.downcast_ref::() { @@ -7694,11 +7107,9 @@ impl ExecutingFrame<'_> { } // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] // callable is at position nargs + 2 from top - let stack_len = self.localsplus.stack_len(); - let self_or_null_is_some = self - .localsplus - .stack_index(stack_len - nargs as usize - 2) - .is_some(); + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 2].is_some(); let callable = self.nth_value(nargs + 2); if let Some(func) = callable.downcast_ref::() { @@ -8273,7 +7684,7 @@ impl ExecutingFrame<'_> { #[inline] #[track_caller] fn push_stackref_opt(&mut self, obj: Option) { - match self.localsplus.stack_try_push(obj) { + match self.state.stack.try_push(obj) { Ok(()) => {} Err(_e) => self.fatal("tried to push value onto stack but overflowed max_stackdepth"), } @@ -8311,10 +7722,10 @@ impl ExecutingFrame<'_> { /// Pop a raw stackref from the stack, returning None if the stack slot is NULL. #[inline] fn pop_stackref_opt(&mut self) -> Option { - if self.localsplus.stack_is_empty() { - self.fatal("tried to pop from empty stack"); + match self.state.stack.pop() { + Some(slot) => slot, + None => self.fatal("tried to pop from empty stack"), } - self.localsplus.stack_pop() } /// Pop a raw stackref from the stack. Panics if NULL. @@ -8491,7 +7902,7 @@ impl ExecutingFrame<'_> { /// Pop multiple values from the stack. Panics if any slot is NULL. fn pop_multiple(&mut self, count: usize) -> impl ExactSizeIterator + '_ { - let stack_len = self.localsplus.stack_len(); + let stack_len = self.state.stack.len(); if count > stack_len { let instr = self.code.instructions.get(self.lasti() as usize); let op_name = instr @@ -8507,7 +7918,7 @@ impl ExecutingFrame<'_> { self.code.source_path() ); } - self.localsplus.stack_drain(stack_len - count).map(|obj| { + self.state.stack.drain(stack_len - count..).map(|obj| { expect_unchecked(obj, "pop_multiple but null found. This is a compiler bug.").to_pyobj() }) } @@ -8515,7 +7926,7 @@ impl ExecutingFrame<'_> { #[inline] fn replace_top(&mut self, top: Option) -> Option { let mut slot = top.map(PyStackRef::new_owned); - let last = self.localsplus.stack_last_mut().unwrap(); + let last = self.state.stack.last_mut().unwrap(); core::mem::swap(last, &mut slot); slot.map(|sr| sr.to_pyobj()) } @@ -8523,18 +7934,18 @@ impl ExecutingFrame<'_> { #[inline] #[track_caller] fn top_value(&self) -> &PyObject { - match self.localsplus.stack_last() { - Some(Some(last)) => last.as_object(), - Some(None) => self.fatal("tried to get top of stack but got NULL"), - None => self.fatal("tried to get top of stack but stack is empty"), + match &*self.state.stack { + [.., Some(last)] => last.as_object(), + [.., None] => self.fatal("tried to get top of stack but got NULL"), + [] => self.fatal("tried to get top of stack but stack is empty"), } } #[inline] #[track_caller] fn nth_value(&self, depth: u32) -> &PyObject { - let idx = self.localsplus.stack_len() - depth as usize - 1; - match self.localsplus.stack_index(idx) { + let stack = &self.state.stack; + match &stack[stack.len() - depth as usize - 1] { Some(obj) => obj.as_object(), None => unsafe { core::hint::unreachable_unchecked() }, } @@ -8551,26 +7962,21 @@ impl ExecutingFrame<'_> { impl fmt::Debug for Frame { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // SAFETY: Debug is best-effort; concurrent mutation is unlikely - // and would only affect debug output. - let localsplus = unsafe { &*self.localsplus.get() }; - let stack_str = localsplus - .stack_as_slice() - .iter() - .fold(String::new(), |mut s, slot| { - match slot { - Some(elem) if elem.downcastable::() => { - s.push_str("\n > {frame}"); - } - Some(elem) => { - core::fmt::write(&mut s, format_args!("\n > {elem:?}")).unwrap(); - } - None => { - s.push_str("\n > NULL"); - } + let state = self.state.lock(); + let stack_str = state.stack.iter().fold(String::new(), |mut s, slot| { + match slot { + Some(elem) if elem.downcastable::() => { + s.push_str("\n > {frame}"); } - s - }); + Some(elem) => { + core::fmt::write(&mut s, format_args!("\n > {elem:?}")).unwrap(); + } + None => { + s.push_str("\n > NULL"); + } + } + s + }); // TODO: fix this up write!( f, From c7007b50495594e15677c1306c02fa450ea9da22 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 16:46:46 +0900 Subject: [PATCH 08/31] vm: retain store-attr and store-subscr specializations on misses --- crates/vm/src/frame.rs | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 4ef550ae4a2..55e623ec30f 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3222,13 +3222,6 @@ impl ExecutingFrame<'_> { dict.set_item(attr_name, value, vm)?; return Ok(None); } - self.deoptimize_at( - Instruction::StoreAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.store_attr(vm, attr_idx) } Instruction::StoreAttrWithHint => { @@ -3248,13 +3241,6 @@ impl ExecutingFrame<'_> { dict.set_item(attr_name, value, vm)?; return Ok(None); } - self.deoptimize_at( - Instruction::StoreAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.store_attr(vm, attr_idx) } Instruction::StoreAttrSlot => { @@ -3274,22 +3260,7 @@ impl ExecutingFrame<'_> { owner.set_slot(slot_offset, Some(value)); return Ok(None); } - // Deoptimize let attr_idx = u32::from(arg); - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::StoreAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } self.store_attr(vm, attr_idx) } Instruction::StoreSubscrListInt => { @@ -3307,10 +3278,8 @@ impl ExecutingFrame<'_> { return Ok(None); } drop(vec); - self.deoptimize(Instruction::StoreSubscr); return Err(vm.new_index_error("list assignment index out of range")); } - self.deoptimize(Instruction::StoreSubscr); obj.set_item(&*idx, value, vm)?; Ok(None) } @@ -3323,7 +3292,6 @@ impl ExecutingFrame<'_> { dict.set_item(&*idx, value, vm)?; Ok(None) } else { - self.deoptimize(Instruction::StoreSubscr); obj.set_item(&*idx, value, vm)?; Ok(None) } From 453294d4a0468b5d8b0f2988125b7908904dd2a5 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 16:51:11 +0900 Subject: [PATCH 09/31] vm: retain specialization opcodes on generic fallback paths --- crates/vm/src/frame.rs | 85 +----------------------------------------- 1 file changed, 2 insertions(+), 83 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 55e623ec30f..9eb66c682b8 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1269,7 +1269,6 @@ impl ExecutingFrame<'_> { self.push_value(result.to_pyobject(vm)); Ok(None) } else { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::InplaceAdd) } } @@ -3328,14 +3327,12 @@ impl ExecutingFrame<'_> { self.push_value(result.to_pyobject(vm)); Ok(None) } else { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::Add) } } Instruction::BinaryOpSubscrGetitem | Instruction::BinaryOpExtend => { let op = bytecode::BinaryOperator::try_from(u32::from(arg)) .unwrap_or(bytecode::BinaryOperator::Subscr); - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, op) } Instruction::BinaryOpSubscrListInt => { @@ -3356,10 +3353,8 @@ impl ExecutingFrame<'_> { return Ok(None); } drop(vec); - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); return Err(vm.new_index_error("list index out of range")); } - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } Instruction::BinaryOpSubscrTupleInt => { @@ -3378,10 +3373,8 @@ impl ExecutingFrame<'_> { self.push_value(value); return Ok(None); } - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); return Err(vm.new_index_error("tuple index out of range")); } - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } Instruction::BinaryOpSubscrDict => { @@ -3396,18 +3389,15 @@ impl ExecutingFrame<'_> { return Ok(None); } Ok(None) => { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); let key = self.pop_value(); self.pop_value(); return Err(vm.new_key_error(key)); } Err(e) => { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); return Err(e); } } } - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } Instruction::BinaryOpSubscrStrInt => { @@ -3426,12 +3416,10 @@ impl ExecutingFrame<'_> { return Ok(None); } Err(e) => { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); return Err(e); } } } - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } Instruction::BinaryOpSubscrListSlice => { @@ -3446,7 +3434,6 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } Instruction::CallPyExactArgs => { @@ -4282,9 +4269,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(result).into()); Ok(None) } else { - self.deoptimize(Instruction::CompareOp { - opname: Arg::marker(), - }); let op = bytecode::ComparisonOperator::try_from(u32::from(arg)) .unwrap_or(bytecode::ComparisonOperator::Equal); self.execute_compare(vm, op) @@ -4309,9 +4293,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(result).into()); Ok(None) } else { - self.deoptimize(Instruction::CompareOp { - opname: Arg::marker(), - }); let op = bytecode::ComparisonOperator::try_from(u32::from(arg)) .unwrap_or(bytecode::ComparisonOperator::Equal); self.execute_compare(vm, op) @@ -4331,9 +4312,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(result).into()); Ok(None) } else { - self.deoptimize(Instruction::CompareOp { - opname: Arg::marker(), - }); let op = bytecode::ComparisonOperator::try_from(u32::from(arg)) .unwrap_or(bytecode::ComparisonOperator::Equal); self.execute_compare(vm, op) @@ -4345,7 +4323,6 @@ impl ExecutingFrame<'_> { // Already a bool, no-op Ok(None) } else { - self.deoptimize(Instruction::ToBool); let obj = self.pop_value(); let result = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(result).into()); @@ -4360,7 +4337,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(result).into()); Ok(None) } else { - self.deoptimize(Instruction::ToBool); let obj = self.pop_value(); let result = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(result).into()); @@ -4374,7 +4350,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(false).into()); Ok(None) } else { - self.deoptimize(Instruction::ToBool); let obj = self.pop_value(); let result = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(result).into()); @@ -4389,7 +4364,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(result).into()); Ok(None) } else { - self.deoptimize(Instruction::ToBool); let obj = self.pop_value(); let result = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(result).into()); @@ -4404,7 +4378,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(result).into()); Ok(None) } else { - self.deoptimize(Instruction::ToBool); let obj = self.pop_value(); let result = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(result).into()); @@ -4424,7 +4397,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(true).into()); Ok(None) } else { - self.deoptimize(Instruction::ToBool); let obj = self.pop_value(); let result = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(result).into()); @@ -4447,9 +4419,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(value).into()); Ok(None) } else { - self.deoptimize(Instruction::ContainsOp { - invert: Arg::marker(), - }); let b = self.pop_value(); let a = self.pop_value(); let invert = bytecode::Invert::try_from(u32::from(arg) as u8) @@ -4480,9 +4449,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(value).into()); Ok(None) } else { - self.deoptimize(Instruction::ContainsOp { - invert: Arg::marker(), - }); let b = self.pop_value(); let a = self.pop_value(); let invert = bytecode::Invert::try_from(u32::from(arg) as u8) @@ -4508,9 +4474,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::UnpackSequence { - count: Arg::marker(), - }); let size = u32::from(arg); self.unpack_sequence(size, vm) } @@ -4528,9 +4491,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::UnpackSequence { - count: Arg::marker(), - }); self.unpack_sequence(size as u32, vm) } Instruction::UnpackSequenceList => { @@ -4548,9 +4508,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::UnpackSequence { - count: Arg::marker(), - }); self.unpack_sequence(size as u32, vm) } Instruction::ForIterRange => { @@ -4564,9 +4521,6 @@ impl ExecutingFrame<'_> { } Ok(None) } else { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -4582,9 +4536,6 @@ impl ExecutingFrame<'_> { } Ok(None) } else { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -4600,9 +4551,6 @@ impl ExecutingFrame<'_> { } Ok(None) } else { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -4622,9 +4570,6 @@ impl ExecutingFrame<'_> { } Ok(None) } else { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -6810,32 +6755,8 @@ impl ExecutingFrame<'_> { } } - /// Deoptimize: replace specialized op with its base adaptive op and reset - /// the adaptive counter. Computes instr_idx/cache_base from lasti(). - #[inline] - fn deoptimize(&mut self, base_op: Instruction) { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - self.deoptimize_at(base_op, instr_idx, cache_base); - } - - /// Deoptimize with explicit indices (for specialized handlers that already - /// have instr_idx/cache_base in scope). - #[inline] - fn deoptimize_at(&mut self, base_op: Instruction, instr_idx: usize, cache_base: usize) { - unsafe { - self.code.instructions.replace_op(instr_idx, base_op); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } - } - /// Execute a specialized binary op on two int operands. - /// Deoptimize if either operand is not an exact int. + /// Fallback to generic binary op if either operand is not an exact int. #[inline] fn execute_binary_op_int( &mut self, @@ -6855,13 +6776,12 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bigint(&result).into()); Ok(None) } else { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, deopt_op) } } /// Execute a specialized binary op on two float operands. - /// Deoptimize if either operand is not an exact float. + /// Fallback to generic binary op if either operand is not an exact float. #[inline] fn execute_binary_op_float( &mut self, @@ -6881,7 +6801,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_float(result).into()); Ok(None) } else { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, deopt_op) } } From b8f6cc234c778559252a2e5030b609128109635c Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 16:54:11 +0900 Subject: [PATCH 10/31] vm: align jump-backward specialization defaults with CPython --- crates/compiler-core/src/bytecode.rs | 9 ++++++++- crates/vm/src/frame.rs | 16 +++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/crates/compiler-core/src/bytecode.rs b/crates/compiler-core/src/bytecode.rs index 8a3251d0d81..90b58116164 100644 --- a/crates/compiler-core/src/bytecode.rs +++ b/crates/compiler-core/src/bytecode.rs @@ -356,6 +356,8 @@ pub const ADAPTIVE_WARMUP_VALUE: u16 = adaptive_counter_bits(1, 1); /// /// Value/backoff = (52, 0), matching CPython's ADAPTIVE_COOLDOWN bits. pub const ADAPTIVE_COOLDOWN_VALUE: u16 = adaptive_counter_bits(52, 0); +/// Initial JUMP_BACKWARD counter bits (value/backoff = 4095/12). +pub const JUMP_BACKWARD_INITIAL_VALUE: u16 = adaptive_counter_bits(4095, 12); const BACKOFF_BITS: u16 = 4; const MAX_BACKOFF: u16 = 12; @@ -674,8 +676,13 @@ impl CodeUnits { if !op.is_instrumented() { let cache_base = i + 1; if cache_base < len { + let initial_counter = if matches!(op, Instruction::JumpBackward { .. }) { + JUMP_BACKWARD_INITIAL_VALUE + } else { + ADAPTIVE_WARMUP_VALUE + }; unsafe { - self.write_adaptive_counter(cache_base, ADAPTIVE_WARMUP_VALUE); + self.write_adaptive_counter(cache_base, initial_counter); } } } diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 9eb66c682b8..a21d0c6d78a 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1795,9 +1795,19 @@ impl ExecutingFrame<'_> { self.jump_relative_forward(u32::from(arg), 0); Ok(None) } - Instruction::JumpBackward { .. } - | Instruction::JumpBackwardJit - | Instruction::JumpBackwardNoJit => { + Instruction::JumpBackward { .. } => { + // CPython rewrites JUMP_BACKWARD to JUMP_BACKWARD_NO_JIT + // when JIT is unavailable. + let instr_idx = self.lasti() as usize - 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::JumpBackwardNoJit); + } + self.jump_relative_backward(u32::from(arg), 1); + Ok(None) + } + Instruction::JumpBackwardJit | Instruction::JumpBackwardNoJit => { self.jump_relative_backward(u32::from(arg), 1); Ok(None) } From 7bfa96198c23845a6c584655e472fcb4984abca6 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 16:56:41 +0900 Subject: [PATCH 11/31] vm: retain exact-args call specializations on misses --- crates/vm/src/frame.rs | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index a21d0c6d78a..fec252ef448 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3465,21 +3465,6 @@ impl ExecutingFrame<'_> { self.push_value(result); Ok(None) } else { - // Deoptimize - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::Call { - argc: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -3506,21 +3491,6 @@ impl ExecutingFrame<'_> { self.push_value(result); Ok(None) } else { - // Deoptimize - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::Call { - argc: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } From c8529949e0ded9a7660f0a6769ae8a6a292967c8 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 16:58:34 +0900 Subject: [PATCH 12/31] vm: retain SEND_GEN specialization on non-coroutine sends --- crates/vm/src/frame.rs | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index fec252ef448..6eea31fc6a2 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -18,7 +18,7 @@ use crate::{ tuple::{PyTuple, PyTupleIterator, PyTupleRef}, }, bytecode::{ - self, ADAPTIVE_COOLDOWN_VALUE, Arg, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod, + self, ADAPTIVE_COOLDOWN_VALUE, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod, }, convert::{ToPyObject, ToPyResult}, coroutine::Coro, @@ -2788,24 +2788,6 @@ impl ExecutingFrame<'_> { } } } - { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::Send { - delta: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } - } match self._send(receiver, val, vm)? { PyIterReturn::Return(value) => { self.push_value(value); From 1a1bbf526c90796fdd773803f844b987371f665b Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 17:20:14 +0900 Subject: [PATCH 13/31] vm: specialize list.append calls like CPython CALL_LIST_APPEND --- crates/vm/src/frame.rs | 70 +++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 18 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 6eea31fc6a2..2d50fc4aa72 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3716,22 +3716,39 @@ impl ExecutingFrame<'_> { } } Instruction::CallListAppend => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { - // Stack: [list.append (bound method), self_or_null (list), item] - let item = self.pop_value(); - let self_or_null = self.pop_value_opt(); - let callable = self.pop_value(); - if let Some(list_obj) = self_or_null.as_ref() - && let Some(list) = list_obj.downcast_ref_if_exact::(vm) - { - list.append(item); - self.push_value(vm.ctx.none()); - return Ok(None); + // Stack: [callable, self_or_null, item] + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - 2].is_some(); + let callable = self.nth_value(2); + let callable_tag = callable as *const PyObject as u32; + let self_is_list = stack[stack_len - 2] + .as_ref() + .is_some_and(|obj| obj.downcast_ref::().is_some()); + if cached_tag == callable_tag && self_or_null_is_some && self_is_list { + let item = self.pop_value(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + if let Some(list_obj) = self_or_null.as_ref() + && let Some(list) = list_obj.downcast_ref::() + { + list.append(item); + // CALL_LIST_APPEND fuses the following POP_TOP. + self.jump_relative_forward( + 1, + Instruction::CallListAppend.cache_entries() as u32, + ); + return Ok(None); + } + self.push_value(callable); + self.push_value_opt(self_or_null); + self.push_value(item); } - self.push_value(callable); - self.push_value_opt(self_or_null); - self.push_value(item); } let args = self.collect_positional_args(nargs); self.execute_call(args, vm) @@ -6829,12 +6846,29 @@ impl ExecutingFrame<'_> { } // Try to specialize method descriptor calls - if self_or_null_is_some && callable.downcast_ref::().is_some() { + if self_or_null_is_some && let Some(descr) = callable.downcast_ref::() { let callable_tag = callable as *const PyObject as u32; - let new_op = match nargs { - 0 => Instruction::CallMethodDescriptorNoargs, - 1 => Instruction::CallMethodDescriptorO, - _ => Instruction::CallMethodDescriptorFast, + let call_cache_entries = Instruction::CallListAppend.cache_entries(); + let next_idx = cache_base + call_cache_entries; + let next_is_pop_top = if next_idx < self.code.instructions.len() { + let next_op = self.code.instructions.read_op(next_idx); + matches!(next_op.to_base().unwrap_or(next_op), Instruction::PopTop) + } else { + false + }; + + let new_op = if nargs == 1 + && descr.method.name == "append" + && descr.objclass.is(vm.ctx.types.list_type) + && next_is_pop_top + { + Instruction::CallListAppend + } else { + match nargs { + 0 => Instruction::CallMethodDescriptorNoargs, + 1 => Instruction::CallMethodDescriptorO, + _ => Instruction::CallMethodDescriptorFast, + } }; unsafe { self.code From bdd87f477b46faff5ce84c82c140d1b9c4dcb036 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 17:21:46 +0900 Subject: [PATCH 14/31] vm: set cooldown on LOAD_ATTR_CLASS specialization --- crates/vm/src/frame.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 2d50fc4aa72..8c7c84ff529 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -6526,6 +6526,11 @@ impl ExecutingFrame<'_> { if !has_descr_get { // METHOD or NON_DESCRIPTOR — can cache directly let descr_ptr = &**descr as *const PyObject as u64; + let new_op = if metaclass_version == 0 { + Instruction::LoadAttrClass + } else { + Instruction::LoadAttrClassWithMetaclassCheck + }; unsafe { self.code .instructions @@ -6536,15 +6541,8 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u64(cache_base + 5, descr_ptr); - self.code.instructions.replace_op( - instr_idx, - if metaclass_version == 0 { - Instruction::LoadAttrClass - } else { - Instruction::LoadAttrClassWithMetaclassCheck - }, - ); } + self.specialize_at(instr_idx, cache_base, new_op); return; } } From 696e57a56bd93a90141dfbfbdd94eb860d7dc404 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 17:27:32 +0900 Subject: [PATCH 15/31] vm: specialize bound method object CALL paths --- crates/vm/src/builtins/function.rs | 10 +++ crates/vm/src/frame.rs | 98 ++++++++++++++++++++++++++++-- 2 files changed, 104 insertions(+), 4 deletions(-) diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index 7befb2ab418..03663d22e5d 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -1152,6 +1152,16 @@ impl PyBoundMethod { Self { object, function } } + #[inline] + pub(crate) fn function_obj(&self) -> &PyObjectRef { + &self.function + } + + #[inline] + pub(crate) fn self_obj(&self) -> &PyObjectRef { + &self.object + } + #[deprecated(note = "Use `Self::new(object, function).into_ref(ctx)` instead")] pub fn new_ref(object: PyObjectRef, function: PyObjectRef, ctx: &Context) -> PyRef { Self::new(object, function).into_ref(ctx) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 8c7c84ff529..3e3932a7615 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -12,7 +12,7 @@ use crate::{ builtin_func::PyNativeFunction, descriptor::{MemberGetter, PyMemberDescriptor, PyMethodDescriptor}, frame::stack_analysis, - function::{PyCell, PyCellRef, PyFunction, vectorcall_function}, + function::{PyBoundMethod, PyCell, PyCellRef, PyFunction, vectorcall_function}, list::PyListIterator, range::PyRangeIterator, tuple::{PyTuple, PyTupleIterator, PyTupleRef}, @@ -3456,9 +3456,13 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); - // Stack: [callable, self_val, arg1, ..., argN] + // Stack: [callable, self_or_null, arg1, ..., argN] + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); let callable = self.nth_value(nargs + 1); - if let Some(func) = callable.downcast_ref::() + if self_or_null_is_some + && let Some(func) = callable.downcast_ref::() && func.func_version() == cached_version && cached_version != 0 { @@ -3472,6 +3476,28 @@ impl ExecutingFrame<'_> { let result = func.invoke_exact_args(all_args, vm)?; self.push_value(result); Ok(None) + } else if !self_or_null_is_some + && let Some(bound_method) = callable.downcast_ref::() + { + let bound_function = bound_method.function_obj().clone(); + let bound_self = bound_method.self_obj().clone(); + if let Some(func) = bound_function.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let pos_args: Vec = + self.pop_multiple(nargs as usize).collect(); + self.pop_value_opt(); // null (self_or_null) + self.pop_value(); // callable (bound method) + let mut all_args = Vec::with_capacity(pos_args.len() + 1); + all_args.push(bound_self); + all_args.extend(pos_args); + let result = func.invoke_exact_args(all_args, vm)?; + self.push_value(result); + return Ok(None); + } + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } else { let args = self.collect_positional_args(nargs); self.execute_call(args, vm) @@ -3694,8 +3720,12 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); let callable = self.nth_value(nargs + 1); - if let Some(func) = callable.downcast_ref::() + if self_or_null_is_some + && let Some(func) = callable.downcast_ref::() && func.func_version() == cached_version && cached_version != 0 { @@ -3710,6 +3740,34 @@ impl ExecutingFrame<'_> { vectorcall_function(&callable, args_vec, nargs_usize + 1, None, vm)?; self.push_value(result); Ok(None) + } else if !self_or_null_is_some + && let Some(bound_method) = callable.downcast_ref::() + { + let bound_function = bound_method.function_obj().clone(); + let bound_self = bound_method.self_obj().clone(); + if let Some(func) = bound_function.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + self.pop_value_opt(); // null (self_or_null) + self.pop_value(); // callable (bound method) + let mut args_vec = Vec::with_capacity(nargs_usize + 1); + args_vec.push(bound_self); + args_vec.extend(pos_args); + let result = vectorcall_function( + &bound_function, + args_vec, + nargs_usize + 1, + None, + vm, + )?; + self.push_value(result); + return Ok(None); + } + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } else { let args = self.collect_positional_args(nargs); self.execute_call(args, vm) @@ -6843,6 +6901,38 @@ impl ExecutingFrame<'_> { return; } + // Bound Python method object (`method`) specialization. + if !self_or_null_is_some + && let Some(bound_method) = callable.downcast_ref::() + && let Some(func) = bound_method.function_obj().downcast_ref::() + { + let version = func.get_version_for_current_state(); + if version == 0 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + + let new_op = if func.can_specialize_call(nargs + 1) { + Instruction::CallBoundMethodExactArgs + } else { + Instruction::CallBoundMethodGeneral + }; + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, version); + } + self.specialize_at(instr_idx, cache_base, new_op); + return; + } + // Try to specialize method descriptor calls if self_or_null_is_some && let Some(descr) = callable.downcast_ref::() { let callable_tag = callable as *const PyObject as u32; From 3dc64ccdfc5053502cec35bd72c1d969cb685bff Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 17:30:17 +0900 Subject: [PATCH 16/31] vm: specialize CALL_KW for bound method objects --- crates/vm/src/frame.rs | 66 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 3e3932a7615..e7225890df2 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4129,9 +4129,13 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); - // Stack: [callable, self_or_null(=self), arg1, ..., argN, kwarg_names] + // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 2].is_some(); let callable = self.nth_value(nargs + 2); - if let Some(func) = callable.downcast_ref::() + if self_or_null_is_some + && let Some(func) = callable.downcast_ref::() && func.func_version() == cached_version && cached_version != 0 { @@ -4153,6 +4157,39 @@ impl ExecutingFrame<'_> { vectorcall_function(&callable, args_vec, pos_count + 1, Some(kwnames), vm)?; self.push_value(result); return Ok(None); + } else if !self_or_null_is_some + && let Some(bound_method) = callable.downcast_ref::() + { + let bound_function = bound_method.function_obj().clone(); + let bound_self = bound_method.self_obj().clone(); + if let Some(func) = bound_function.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let nargs_usize = nargs as usize; + let kwarg_names_obj = self.pop_value(); + let kwarg_names_tuple = kwarg_names_obj + .downcast_ref::() + .expect("kwarg names should be tuple"); + let kw_count = kwarg_names_tuple.len(); + let all_args: Vec = self.pop_multiple(nargs_usize).collect(); + self.pop_value_opt(); // null (self_or_null) + self.pop_value(); // callable (bound method) + let pos_count = nargs_usize - kw_count; + let mut args_vec = Vec::with_capacity(nargs_usize + 1); + args_vec.push(bound_self); + args_vec.extend(all_args); + let kwnames = kwarg_names_tuple.as_slice(); + let result = vectorcall_function( + &bound_function, + args_vec, + pos_count + 1, + Some(kwnames), + vm, + )?; + self.push_value(result); + return Ok(None); + } } let args = self.collect_keyword_args(nargs); self.execute_call(args, vm) @@ -7111,6 +7148,31 @@ impl ExecutingFrame<'_> { return; } + if !self_or_null_is_some + && let Some(bound_method) = callable.downcast_ref::() + && let Some(func) = bound_method.function_obj().downcast_ref::() + { + let version = func.get_version_for_current_state(); + if version == 0 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, version); + } + self.specialize_at(instr_idx, cache_base, Instruction::CallKwBoundMethod); + return; + } + // General fallback let callable_tag = callable as *const PyObject as u32; unsafe { From f220a2415923d145c0ff92a08a19c5dbf14778d6 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 17:31:08 +0900 Subject: [PATCH 17/31] vm: use current-state function version for CALL_KW specialization --- crates/vm/src/frame.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index e7225890df2..b24949a6332 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -7121,7 +7121,7 @@ impl ExecutingFrame<'_> { let callable = self.nth_value(nargs + 2); if let Some(func) = callable.downcast_ref::() { - let version = func.func_version(); + let version = func.get_version_for_current_state(); if version == 0 { unsafe { self.code.instructions.write_adaptive_counter( From a66c70676720035f227ed7bdaeb438f2dab78c16 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 17:35:20 +0900 Subject: [PATCH 18/31] vm: align CALL/CALL_KW pyfunction specialization with CPython --- crates/vm/src/frame.rs | 89 ++++++++---------------------------------- 1 file changed, 16 insertions(+), 73 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index b24949a6332..6365f6c7811 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3439,10 +3439,18 @@ impl ExecutingFrame<'_> { && func.func_version() == cached_version && cached_version != 0 { - let args: Vec = self.pop_multiple(nargs as usize).collect(); - let _null = self.pop_value_opt(); // self_or_null (NULL) + let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); + let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); let func = callable.downcast_ref::().unwrap(); + let args = if let Some(self_val) = self_or_null { + let mut all_args = Vec::with_capacity(pos_args.len() + 1); + all_args.push(self_val); + all_args.extend(pos_args); + all_args + } else { + pos_args + }; let result = func.invoke_exact_args(args, vm)?; self.push_value(result); Ok(None) @@ -3456,27 +3464,12 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); - // Stack: [callable, self_or_null, arg1, ..., argN] + // Stack: [callable, self_or_null(NULL), arg1, ..., argN] let stack = &self.state.stack; let stack_len = stack.len(); let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); let callable = self.nth_value(nargs + 1); - if self_or_null_is_some - && let Some(func) = callable.downcast_ref::() - && func.func_version() == cached_version - && cached_version != 0 - { - let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); - let self_val = self.pop_value(); - let callable = self.pop_value(); - let func = callable.downcast_ref::().unwrap(); - let mut all_args = Vec::with_capacity(pos_args.len() + 1); - all_args.push(self_val); - all_args.extend(pos_args); - let result = func.invoke_exact_args(all_args, vm)?; - self.push_value(result); - Ok(None) - } else if !self_or_null_is_some + if !self_or_null_is_some && let Some(bound_method) = callable.downcast_ref::() { let bound_function = bound_method.function_obj().clone(); @@ -3724,23 +3717,7 @@ impl ExecutingFrame<'_> { let stack_len = stack.len(); let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); let callable = self.nth_value(nargs + 1); - if self_or_null_is_some - && let Some(func) = callable.downcast_ref::() - && func.func_version() == cached_version - && cached_version != 0 - { - let nargs_usize = nargs as usize; - let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); - let self_val = self.pop_value(); - let callable = self.pop_value(); - let mut args_vec = Vec::with_capacity(nargs_usize + 1); - args_vec.push(self_val); - args_vec.extend(pos_args); - let result = - vectorcall_function(&callable, args_vec, nargs_usize + 1, None, vm)?; - self.push_value(result); - Ok(None) - } else if !self_or_null_is_some + if !self_or_null_is_some && let Some(bound_method) = callable.downcast_ref::() { let bound_function = bound_method.function_obj().clone(); @@ -4134,30 +4111,7 @@ impl ExecutingFrame<'_> { let stack_len = stack.len(); let self_or_null_is_some = stack[stack_len - nargs as usize - 2].is_some(); let callable = self.nth_value(nargs + 2); - if self_or_null_is_some - && let Some(func) = callable.downcast_ref::() - && func.func_version() == cached_version - && cached_version != 0 - { - let nargs_usize = nargs as usize; - let kwarg_names_obj = self.pop_value(); - let kwarg_names_tuple = kwarg_names_obj - .downcast_ref::() - .expect("kwarg names should be tuple"); - let kw_count = kwarg_names_tuple.len(); - let all_args: Vec = self.pop_multiple(nargs_usize).collect(); - let self_val = self.pop_value(); - let callable = self.pop_value(); - let pos_count = nargs_usize - kw_count; - let mut args_vec = Vec::with_capacity(nargs_usize + 1); - args_vec.push(self_val); - args_vec.extend(all_args); - let kwnames = kwarg_names_tuple.as_slice(); - let result = - vectorcall_function(&callable, args_vec, pos_count + 1, Some(kwnames), vm)?; - self.push_value(result); - return Ok(None); - } else if !self_or_null_is_some + if !self_or_null_is_some && let Some(bound_method) = callable.downcast_ref::() { let bound_function = bound_method.function_obj().clone(); @@ -6919,13 +6873,7 @@ impl ExecutingFrame<'_> { }; let new_op = if func.can_specialize_call(effective_nargs) { - if self_or_null_is_some { - Instruction::CallBoundMethodExactArgs - } else { - Instruction::CallPyExactArgs - } - } else if self_or_null_is_some { - Instruction::CallBoundMethodGeneral + Instruction::CallPyExactArgs } else { Instruction::CallPyGeneral }; @@ -7134,17 +7082,12 @@ impl ExecutingFrame<'_> { return; } - let new_op = if self_or_null_is_some { - Instruction::CallKwBoundMethod - } else { - Instruction::CallKwPy - }; unsafe { self.code .instructions .write_cache_u32(cache_base + 1, version); } - self.specialize_at(instr_idx, cache_base, new_op); + self.specialize_at(instr_idx, cache_base, Instruction::CallKwPy); return; } From 59545f5a109c6be2d16a87c4a40ea3eff9edf5f4 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 17:50:12 +0900 Subject: [PATCH 19/31] vm: drop call-site identity caches in generic CALL specializations --- crates/vm/src/frame.rs | 123 +++++++++++------------------------------ 1 file changed, 33 insertions(+), 90 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 6365f6c7811..b58338c767a 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3620,17 +3620,13 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallBuiltinO => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { let obj = self.pop_value(); let _null = self.pop_value_opt(); let callable = self.pop_value(); - let callable_tag = &*callable as *const PyObject as u32; - if cached_tag == callable_tag - && let Some(native) = callable.downcast_ref::() + if let Some(native) = callable.downcast_ref::() + && native.zelf.is_none() { let args = FuncArgs { args: vec![obj], @@ -3648,18 +3644,13 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallBuiltinFast => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; - let func = if cached_tag == callable_tag { + let func = { callable .downcast_ref::() + .filter(|n| n.zelf.is_none()) .map(|n| n.value.func) - } else { - None }; if let Some(func) = func { let positional_args: Vec = @@ -3751,9 +3742,6 @@ impl ExecutingFrame<'_> { } } Instruction::CallListAppend => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, self_or_null, item] @@ -3761,11 +3749,17 @@ impl ExecutingFrame<'_> { let stack_len = stack.len(); let self_or_null_is_some = stack[stack_len - 2].is_some(); let callable = self.nth_value(2); - let callable_tag = callable as *const PyObject as u32; let self_is_list = stack[stack_len - 2] .as_ref() .is_some_and(|obj| obj.downcast_ref::().is_some()); - if cached_tag == callable_tag && self_or_null_is_some && self_is_list { + let is_list_append = + callable + .downcast_ref::() + .is_some_and(|descr| { + descr.method.name == "append" + && descr.objclass.is(vm.ctx.types.list_type) + }); + if is_list_append && self_or_null_is_some && self_is_list { let item = self.pop_value(); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); @@ -3789,9 +3783,6 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallMethodDescriptorNoargs => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 0 { // Stack: [callable, self_or_null] — peek to get func ptr @@ -3799,8 +3790,7 @@ impl ExecutingFrame<'_> { let stack_len = stack.len(); let self_or_null_is_some = stack[stack_len - 1].is_some(); let callable = self.nth_value(1); - let callable_tag = callable as *const PyObject as u32; - let func = if cached_tag == callable_tag && self_or_null_is_some { + let func = if self_or_null_is_some { callable .downcast_ref::() .map(|d| d.method.func) @@ -3823,9 +3813,6 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallMethodDescriptorO => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, self_or_null, arg1] @@ -3833,8 +3820,7 @@ impl ExecutingFrame<'_> { let stack_len = stack.len(); let self_or_null_is_some = stack[stack_len - 2].is_some(); let callable = self.nth_value(2); - let callable_tag = callable as *const PyObject as u32; - let func = if cached_tag == callable_tag && self_or_null_is_some { + let func = if self_or_null_is_some { callable .downcast_ref::() .map(|d| d.method.func) @@ -3858,16 +3844,12 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallMethodDescriptorFast => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; let stack = &self.state.stack; let stack_len = stack.len(); let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); - let func = if cached_tag == callable_tag && self_or_null_is_some { + let func = if self_or_null_is_some { callable .downcast_ref::() .map(|d| d.method.func) @@ -3894,13 +3876,9 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallBuiltinClass => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; - if cached_tag == callable_tag && callable.downcast_ref::().is_some() { + if callable.downcast_ref::().is_some() { let args = self.collect_positional_args(nargs); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); @@ -3978,16 +3956,12 @@ impl ExecutingFrame<'_> { } Instruction::CallMethodDescriptorFastWithKeywords => { // Native function interface is uniform regardless of keyword support - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; let stack = &self.state.stack; let stack_len = stack.len(); let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); - let func = if cached_tag == callable_tag && self_or_null_is_some { + let func = if self_or_null_is_some { callable .downcast_ref::() .map(|d| d.method.func) @@ -4015,18 +3989,13 @@ impl ExecutingFrame<'_> { } Instruction::CallBuiltinFastWithKeywords => { // Native function interface is uniform regardless of keyword support - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; - let func = if cached_tag == callable_tag { + let func = { callable .downcast_ref::() + .filter(|n| n.zelf.is_none()) .map(|n| n.value.func) - } else { - None }; if let Some(func) = func { let positional_args: Vec = @@ -4045,13 +4014,11 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallNonPyGeneral => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; - if cached_tag == callable_tag { + if callable.downcast_ref::().is_some() + || callable.downcast_ref::().is_some() + { let args = self.collect_positional_args(nargs); return self.execute_call(args, vm); } @@ -4149,13 +4116,11 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallKwNonPy => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 2); - let callable_tag = callable as *const PyObject as u32; - if cached_tag == callable_tag { + if callable.downcast_ref::().is_some() + || callable.downcast_ref::().is_some() + { let args = self.collect_keyword_args(nargs); return self.execute_call(args, vm); } @@ -6920,7 +6885,6 @@ impl ExecutingFrame<'_> { // Try to specialize method descriptor calls if self_or_null_is_some && let Some(descr) = callable.downcast_ref::() { - let callable_tag = callable as *const PyObject as u32; let call_cache_entries = Instruction::CallListAppend.cache_entries(); let next_idx = cache_base + call_cache_entries; let next_is_pop_top = if next_idx < self.code.instructions.len() { @@ -6943,11 +6907,6 @@ impl ExecutingFrame<'_> { _ => Instruction::CallMethodDescriptorFast, } }; - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, callable_tag); - } self.specialize_at(instr_idx, cache_base, new_op); return; } @@ -6966,10 +6925,12 @@ impl ExecutingFrame<'_> { }; let new_op = Some(new_op); if let Some(new_op) = new_op { - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, callable_tag); + if matches!(new_op, Instruction::CallLen | Instruction::CallIsinstance) { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } } self.specialize_at(instr_idx, cache_base, new_op); return; @@ -7027,24 +6988,12 @@ impl ExecutingFrame<'_> { } } // General builtin class call (any type with Callable) - let callable_tag = callable as *const PyObject as u32; - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, callable_tag); - } self.specialize_at(instr_idx, cache_base, Instruction::CallBuiltinClass); return; } } - // General fallback: cache callable identity to skip re-specialization - let callable_tag = callable as *const PyObject as u32; - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, callable_tag); - } + // General fallback: specialized non-Python callable path self.specialize_at(instr_idx, cache_base, Instruction::CallNonPyGeneral); } @@ -7116,13 +7065,7 @@ impl ExecutingFrame<'_> { return; } - // General fallback - let callable_tag = callable as *const PyObject as u32; - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, callable_tag); - } + // General fallback: specialized non-Python callable path self.specialize_at(instr_idx, cache_base, Instruction::CallKwNonPy); } From f8eebec26f899126949dd7645c6da2648f744d46 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 17:51:24 +0900 Subject: [PATCH 20/31] vm: align builtin type call specializations with CPython guards --- crates/vm/src/frame.rs | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index b58338c767a..4578843b86f 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3547,17 +3547,13 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallType1 => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, null, arg] let obj = self.pop_value(); let _null = self.pop_value_opt(); let callable = self.pop_value(); - let callable_tag = &*callable as *const PyObject as u32; - if cached_tag == callable_tag { + if callable.is(vm.ctx.types.type_type.as_object()) { let tp = obj.class().to_owned().into(); self.push_value(tp); return Ok(None); @@ -3571,16 +3567,12 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallStr1 => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { let obj = self.pop_value(); let _null = self.pop_value_opt(); let callable = self.pop_value(); - let callable_tag = &*callable as *const PyObject as u32; - if cached_tag == callable_tag { + if callable.is(vm.ctx.types.str_type.as_object()) { let result = obj.str(vm)?; self.push_value(result.into()); return Ok(None); @@ -3593,16 +3585,12 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallTuple1 => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { let obj = self.pop_value(); let _null = self.pop_value_opt(); let callable = self.pop_value(); - let callable_tag = &*callable as *const PyObject as u32; - if cached_tag == callable_tag { + if callable.is(vm.ctx.types.tuple_type.as_object()) { // tuple(x) returns x as-is when x is already an exact tuple if let Ok(tuple) = obj.clone().downcast_exact::(vm) { self.push_value(tuple.into_pyref().into()); @@ -6949,12 +6937,6 @@ impl ExecutingFrame<'_> { None }; if let Some(new_op) = new_op { - let callable_tag = callable as *const PyObject as u32; - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, callable_tag); - } self.specialize_at(instr_idx, cache_base, new_op); return; } From 58bb1ea62b2d253706feff26049a18ec8b82aa70 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 18:18:25 +0900 Subject: [PATCH 21/31] vm: align builtin CALL guards with CPython self_or_null semantics --- crates/vm/src/frame.rs | 195 +++++++++++++++++++++-------------------- 1 file changed, 100 insertions(+), 95 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 4578843b86f..7f487372f4f 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3504,17 +3504,17 @@ impl ExecutingFrame<'_> { if nargs == 1 { // Stack: [callable, null, arg] let obj = self.pop_value(); // arg - let _null = self.pop_value_opt(); + let null = self.pop_value_opt(); let callable = self.pop_value(); let callable_tag = &*callable as *const PyObject as u32; - if cached_tag == callable_tag { + if null.is_none() && cached_tag == callable_tag { let len = obj.length(vm)?; self.push_value(vm.ctx.new_int(len).into()); return Ok(None); } // Guard failed — re-push and fallback self.push_value(callable); - self.push_value_opt(_null); + self.push_value_opt(null); self.push_value(obj); } let args = self.collect_positional_args(nargs); @@ -3525,23 +3525,27 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); - if nargs == 2 { - // Stack: [callable, null, obj, class_info] - let class_info = self.pop_value(); - let obj = self.pop_value(); - let _null = self.pop_value_opt(); - let callable = self.pop_value(); - let callable_tag = &*callable as *const PyObject as u32; + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let effective_nargs = nargs + u32::from(self_or_null_is_some); + if effective_nargs == 2 { + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; if cached_tag == callable_tag { - let result = obj.is_instance(&class_info, vm)?; + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + self.pop_value(); // callable + let mut all_args = Vec::with_capacity(2); + if let Some(self_val) = self_or_null { + all_args.push(self_val); + } + all_args.extend(pos_args); + let result = all_args[0].is_instance(&all_args[1], vm)?; self.push_value(vm.ctx.new_bool(result).into()); return Ok(None); } - // Guard failed — re-push and fallback - self.push_value(callable); - self.push_value_opt(_null); - self.push_value(obj); - self.push_value(class_info); } let args = self.collect_positional_args(nargs); self.execute_call(args, vm) @@ -3551,16 +3555,16 @@ impl ExecutingFrame<'_> { if nargs == 1 { // Stack: [callable, null, arg] let obj = self.pop_value(); - let _null = self.pop_value_opt(); + let null = self.pop_value_opt(); let callable = self.pop_value(); - if callable.is(vm.ctx.types.type_type.as_object()) { + if null.is_none() && callable.is(vm.ctx.types.type_type.as_object()) { let tp = obj.class().to_owned().into(); self.push_value(tp); return Ok(None); } // Guard failed — re-push and fallback self.push_value(callable); - self.push_value_opt(_null); + self.push_value_opt(null); self.push_value(obj); } let args = self.collect_positional_args(nargs); @@ -3570,15 +3574,15 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); if nargs == 1 { let obj = self.pop_value(); - let _null = self.pop_value_opt(); + let null = self.pop_value_opt(); let callable = self.pop_value(); - if callable.is(vm.ctx.types.str_type.as_object()) { + if null.is_none() && callable.is(vm.ctx.types.str_type.as_object()) { let result = obj.str(vm)?; self.push_value(result.into()); return Ok(None); } self.push_value(callable); - self.push_value_opt(_null); + self.push_value_opt(null); self.push_value(obj); } let args = self.collect_positional_args(nargs); @@ -3588,9 +3592,9 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); if nargs == 1 { let obj = self.pop_value(); - let _null = self.pop_value_opt(); + let null = self.pop_value_opt(); let callable = self.pop_value(); - if callable.is(vm.ctx.types.tuple_type.as_object()) { + if null.is_none() && callable.is(vm.ctx.types.tuple_type.as_object()) { // tuple(x) returns x as-is when x is already an exact tuple if let Ok(tuple) = obj.clone().downcast_exact::(vm) { self.push_value(tuple.into_pyref().into()); @@ -3601,7 +3605,7 @@ impl ExecutingFrame<'_> { return Ok(None); } self.push_value(callable); - self.push_value_opt(_null); + self.push_value_opt(null); self.push_value(obj); } let args = self.collect_positional_args(nargs); @@ -3609,47 +3613,48 @@ impl ExecutingFrame<'_> { } Instruction::CallBuiltinO => { let nargs: u32 = arg.into(); - if nargs == 1 { - let obj = self.pop_value(); - let _null = self.pop_value_opt(); + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let effective_nargs = nargs + u32::from(self_or_null_is_some); + let callable = self.nth_value(nargs + 1); + if callable.downcast_ref::().is_some() && effective_nargs == 1 { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); - if let Some(native) = callable.downcast_ref::() - && native.zelf.is_none() - { - let args = FuncArgs { - args: vec![obj], - kwargs: Default::default(), - }; - let result = (native.value.func)(vm, args)?; - self.push_value(result); - return Ok(None); + let mut args_vec = Vec::with_capacity(effective_nargs as usize); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); } - self.push_value(callable); - self.push_value_opt(_null); - self.push_value(obj); + args_vec.extend(pos_args); + let result = + callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; + self.push_value(result); + return Ok(None); } let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } Instruction::CallBuiltinFast => { let nargs: u32 = arg.into(); + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); - let func = { - callable - .downcast_ref::() - .filter(|n| n.zelf.is_none()) - .map(|n| n.value.func) - }; - if let Some(func) = func { - let positional_args: Vec = - self.pop_multiple(nargs as usize).collect(); - self.pop_value_opt(); // null (self_or_null) - self.pop_value(); // callable - let args = FuncArgs { - args: positional_args, - kwargs: Default::default(), - }; - let result = func(vm, args)?; + if callable.downcast_ref::().is_some() { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let mut args_vec = Vec::with_capacity(effective_nargs as usize); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(pos_args); + let result = + callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; self.push_value(result); return Ok(None); } @@ -3978,23 +3983,23 @@ impl ExecutingFrame<'_> { Instruction::CallBuiltinFastWithKeywords => { // Native function interface is uniform regardless of keyword support let nargs: u32 = arg.into(); + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); - let func = { - callable - .downcast_ref::() - .filter(|n| n.zelf.is_none()) - .map(|n| n.value.func) - }; - if let Some(func) = func { - let positional_args: Vec = - self.pop_multiple(nargs as usize).collect(); - self.pop_value_opt(); // null (self_or_null) - self.pop_value(); // callable - let args = FuncArgs { - args: positional_args, - kwargs: Default::default(), - }; - let result = func(vm, args)?; + if callable.downcast_ref::().is_some() { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let mut args_vec = Vec::with_capacity(effective_nargs as usize); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(pos_args); + let result = + callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; self.push_value(result); return Ok(None); } @@ -6900,30 +6905,30 @@ impl ExecutingFrame<'_> { } // Try to specialize builtin calls - if !self_or_null_is_some { - if let Some(native) = callable.downcast_ref::() - && native.zelf.is_none() - { - let callable_tag = callable as *const PyObject as u32; - let new_op = match (native.value.name, nargs) { - ("len", 1) => Instruction::CallLen, - ("isinstance", 2) => Instruction::CallIsinstance, - (_, 1) => Instruction::CallBuiltinO, - _ => Instruction::CallBuiltinFast, - }; - let new_op = Some(new_op); - if let Some(new_op) = new_op { - if matches!(new_op, Instruction::CallLen | Instruction::CallIsinstance) { - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, callable_tag); - } - } - self.specialize_at(instr_idx, cache_base, new_op); - return; + if let Some(native) = callable.downcast_ref::() { + let effective_nargs = nargs + u32::from(self_or_null_is_some); + let callable_tag = callable as *const PyObject as u32; + let new_op = if native.value.name == "len" && nargs == 1 && effective_nargs == 1 { + Instruction::CallLen + } else if native.value.name == "isinstance" && effective_nargs == 2 { + Instruction::CallIsinstance + } else if effective_nargs == 1 { + Instruction::CallBuiltinO + } else { + Instruction::CallBuiltinFast + }; + if matches!(new_op, Instruction::CallLen | Instruction::CallIsinstance) { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); } } + self.specialize_at(instr_idx, cache_base, new_op); + return; + } + + if !self_or_null_is_some { // type/str/tuple(x) specialization if callable.class().is(vm.ctx.types.type_type) { if nargs == 1 { From 157a2c39666cadec28984244bc7298690a7babeb Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 18:21:21 +0900 Subject: [PATCH 22/31] vm: require exact list in CALL_LIST_APPEND fast path --- crates/vm/src/frame.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 7f487372f4f..7c2c3ef3abb 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3742,9 +3742,9 @@ impl ExecutingFrame<'_> { let stack_len = stack.len(); let self_or_null_is_some = stack[stack_len - 2].is_some(); let callable = self.nth_value(2); - let self_is_list = stack[stack_len - 2] + let self_is_exact_list = stack[stack_len - 2] .as_ref() - .is_some_and(|obj| obj.downcast_ref::().is_some()); + .is_some_and(|obj| obj.class().is(vm.ctx.types.list_type)); let is_list_append = callable .downcast_ref::() @@ -3752,12 +3752,12 @@ impl ExecutingFrame<'_> { descr.method.name == "append" && descr.objclass.is(vm.ctx.types.list_type) }); - if is_list_append && self_or_null_is_some && self_is_list { + if is_list_append && self_or_null_is_some && self_is_exact_list { let item = self.pop_value(); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); if let Some(list_obj) = self_or_null.as_ref() - && let Some(list) = list_obj.downcast_ref::() + && let Some(list) = list_obj.downcast_ref_if_exact::(vm) { list.append(item); // CALL_LIST_APPEND fuses the following POP_TOP. From cae26d4ecb961994c319164007a79d134da72ce9 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 18:24:52 +0900 Subject: [PATCH 23/31] vm: align CALL builtin/class specialization flow with CPython --- crates/vm/src/frame.rs | 57 ++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 7c2c3ef3abb..12c2cfb97d4 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -6946,37 +6946,40 @@ impl ExecutingFrame<'_> { return; } } - // CallAllocAndEnterInit: heap type with default __new__ - if let Some(cls) = callable.downcast_ref::() - && cls.slots.flags.has_feature(PyTypeFlags::HEAPTYPE) - { - let object_new = vm.ctx.types.object_type.slots.new.load(); - let cls_new = cls.slots.new.load(); - if let (Some(cls_new_fn), Some(obj_new_fn)) = (cls_new, object_new) - && cls_new_fn as usize == obj_new_fn as usize - && let Some(init) = cls.get_attr(identifier!(vm, __init__)) - && let Some(init_func) = init.downcast_ref::() - && init_func.can_specialize_call(nargs + 1) - { - let version = cls.tp_version_tag.load(Acquire); - if version != 0 { - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, version); + if let Some(cls) = callable.downcast_ref::() { + if cls.slots.flags.has_feature(PyTypeFlags::IMMUTABLETYPE) { + self.specialize_at(instr_idx, cache_base, Instruction::CallBuiltinClass); + return; + } + // CallAllocAndEnterInit: heap type with default __new__ + if cls.slots.flags.has_feature(PyTypeFlags::HEAPTYPE) { + let object_new = vm.ctx.types.object_type.slots.new.load(); + let cls_new = cls.slots.new.load(); + if let (Some(cls_new_fn), Some(obj_new_fn)) = (cls_new, object_new) + && cls_new_fn as usize == obj_new_fn as usize + && let Some(init) = cls.get_attr(identifier!(vm, __init__)) + && let Some(init_func) = init.downcast_ref::() + && init_func.can_specialize_call(nargs + 1) + { + let version = cls.tp_version_tag.load(Acquire); + if version != 0 { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, version); + } + self.specialize_at( + instr_idx, + cache_base, + Instruction::CallAllocAndEnterInit, + ); + return; } - self.specialize_at( - instr_idx, - cache_base, - Instruction::CallAllocAndEnterInit, - ); - return; } } + self.specialize_at(instr_idx, cache_base, Instruction::CallNonPyGeneral); + return; } - // General builtin class call (any type with Callable) - self.specialize_at(instr_idx, cache_base, Instruction::CallBuiltinClass); - return; } } From 0d01f7e4197e8321dbde90b2b73442abc1c81cb1 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 18:27:37 +0900 Subject: [PATCH 24/31] vm: tighten len/isinstance CALL specializations to builtin guards --- crates/vm/src/frame.rs | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 12c2cfb97d4..2b5afb47036 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3507,7 +3507,10 @@ impl ExecutingFrame<'_> { let null = self.pop_value_opt(); let callable = self.pop_value(); let callable_tag = &*callable as *const PyObject as u32; - if null.is_none() && cached_tag == callable_tag { + let is_len_callable = callable + .downcast_ref::() + .is_some_and(|native| native.zelf.is_none() && native.value.name == "len"); + if null.is_none() && cached_tag == callable_tag && is_len_callable { let len = obj.length(vm)?; self.push_value(vm.ctx.new_int(len).into()); return Ok(None); @@ -3532,7 +3535,12 @@ impl ExecutingFrame<'_> { if effective_nargs == 2 { let callable = self.nth_value(nargs + 1); let callable_tag = callable as *const PyObject as u32; - if cached_tag == callable_tag { + let is_isinstance_callable = callable + .downcast_ref::() + .is_some_and(|native| { + native.zelf.is_none() && native.value.name == "isinstance" + }); + if cached_tag == callable_tag && is_isinstance_callable { let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); let self_or_null = self.pop_value_opt(); @@ -6908,9 +6916,16 @@ impl ExecutingFrame<'_> { if let Some(native) = callable.downcast_ref::() { let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable_tag = callable as *const PyObject as u32; - let new_op = if native.value.name == "len" && nargs == 1 && effective_nargs == 1 { + let new_op = if native.zelf.is_none() + && native.value.name == "len" + && nargs == 1 + && effective_nargs == 1 + { Instruction::CallLen - } else if native.value.name == "isinstance" && effective_nargs == 2 { + } else if native.zelf.is_none() + && native.value.name == "isinstance" + && effective_nargs == 2 + { Instruction::CallIsinstance } else if effective_nargs == 1 { Instruction::CallBuiltinO From 22cccae2180f12e455ceffd0f457714b66bff50a Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 20:07:07 +0900 Subject: [PATCH 25/31] vm: gate CALL_BUILTIN_CLASS on type vectorcall like CPython --- crates/vm/src/frame.rs | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 2b5afb47036..5052e937b07 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3879,18 +3879,25 @@ impl ExecutingFrame<'_> { Instruction::CallBuiltinClass => { let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - if callable.downcast_ref::().is_some() { - let args = self.collect_positional_args(nargs); + if let Some(cls) = callable.downcast_ref::() + && cls.slots.vectorcall.load().is_some() + { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); - let final_args = if let Some(self_val) = self_or_null { - let mut args = args; - args.prepend_arg(self_val); - args - } else { - args - }; - let result = callable.call(final_args, vm)?; + let self_is_some = self_or_null.is_some(); + let mut args_vec = Vec::with_capacity(nargs_usize + usize::from(self_is_some)); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(pos_args); + let result = callable.vectorcall( + args_vec, + nargs_usize + usize::from(self_is_some), + None, + vm, + )?; self.push_value(result); return Ok(None); } @@ -6962,7 +6969,9 @@ impl ExecutingFrame<'_> { } } if let Some(cls) = callable.downcast_ref::() { - if cls.slots.flags.has_feature(PyTypeFlags::IMMUTABLETYPE) { + if cls.slots.flags.has_feature(PyTypeFlags::IMMUTABLETYPE) + && cls.slots.vectorcall.load().is_some() + { self.specialize_at(instr_idx, cache_base, Instruction::CallBuiltinClass); return; } From 30e8a75d01aa38dd967a25530ff4fdf94632a943 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 20:10:58 +0900 Subject: [PATCH 26/31] vm: run non-py CALL specializations via direct vectorcall --- crates/vm/src/frame.rs | 52 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 5052e937b07..29adca3c15b 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4023,6 +4023,9 @@ impl ExecutingFrame<'_> { } Instruction::CallNonPyGeneral => { let nargs: u32 = arg.into(); + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); let callable = self.nth_value(nargs + 1); if callable.downcast_ref::().is_some() || callable.downcast_ref::().is_some() @@ -4030,8 +4033,24 @@ impl ExecutingFrame<'_> { let args = self.collect_positional_args(nargs); return self.execute_call(args, vm); } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let mut args_vec = + Vec::with_capacity(nargs_usize + usize::from(self_or_null_is_some)); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(pos_args); + let result = callable.vectorcall( + args_vec, + nargs_usize + usize::from(self_or_null_is_some), + None, + vm, + )?; + self.push_value(result); + Ok(None) } Instruction::CallKwPy => { let instr_idx = self.lasti() as usize - 1; @@ -4125,6 +4144,9 @@ impl ExecutingFrame<'_> { } Instruction::CallKwNonPy => { let nargs: u32 = arg.into(); + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 2].is_some(); let callable = self.nth_value(nargs + 2); if callable.downcast_ref::().is_some() || callable.downcast_ref::().is_some() @@ -4132,8 +4154,30 @@ impl ExecutingFrame<'_> { let args = self.collect_keyword_args(nargs); return self.execute_call(args, vm); } - let args = self.collect_keyword_args(nargs); - self.execute_call(args, vm) + let nargs_usize = nargs as usize; + let kwarg_names_obj = self.pop_value(); + let kwarg_names_tuple = kwarg_names_obj + .downcast_ref::() + .expect("kwarg names should be tuple"); + let kw_count = kwarg_names_tuple.len(); + let all_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let pos_count = nargs_usize - kw_count; + let mut args_vec = + Vec::with_capacity(nargs_usize + usize::from(self_or_null_is_some)); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(all_args); + let result = callable.vectorcall( + args_vec, + pos_count + usize::from(self_or_null_is_some), + Some(kwarg_names_tuple.as_slice()), + vm, + )?; + self.push_value(result); + Ok(None) } Instruction::LoadSuperAttrAttr => { let oparg = u32::from(arg); From da368592986dadecca9c8f6a408f163f790be469 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 20:13:27 +0900 Subject: [PATCH 27/31] vm: align class-call specialization branching with CPython --- crates/vm/src/frame.rs | 98 +++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 50 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 29adca3c15b..589114845f8 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -6994,61 +6994,59 @@ impl ExecutingFrame<'_> { return; } - if !self_or_null_is_some { - // type/str/tuple(x) specialization - if callable.class().is(vm.ctx.types.type_type) { - if nargs == 1 { - let new_op = if callable.is(&vm.ctx.types.type_type.as_object()) { - Some(Instruction::CallType1) - } else if callable.is(&vm.ctx.types.str_type.as_object()) { - Some(Instruction::CallStr1) - } else if callable.is(&vm.ctx.types.tuple_type.as_object()) { - Some(Instruction::CallTuple1) - } else { - None - }; - if let Some(new_op) = new_op { - self.specialize_at(instr_idx, cache_base, new_op); - return; - } + // type/str/tuple(x) and class-call specializations + if callable.class().is(vm.ctx.types.type_type) + && let Some(cls) = callable.downcast_ref::() + { + if !self_or_null_is_some && nargs == 1 { + let new_op = if callable.is(&vm.ctx.types.type_type.as_object()) { + Some(Instruction::CallType1) + } else if callable.is(&vm.ctx.types.str_type.as_object()) { + Some(Instruction::CallStr1) + } else if callable.is(&vm.ctx.types.tuple_type.as_object()) { + Some(Instruction::CallTuple1) + } else { + None + }; + if let Some(new_op) = new_op { + self.specialize_at(instr_idx, cache_base, new_op); + return; } - if let Some(cls) = callable.downcast_ref::() { - if cls.slots.flags.has_feature(PyTypeFlags::IMMUTABLETYPE) - && cls.slots.vectorcall.load().is_some() - { - self.specialize_at(instr_idx, cache_base, Instruction::CallBuiltinClass); - return; - } - // CallAllocAndEnterInit: heap type with default __new__ - if cls.slots.flags.has_feature(PyTypeFlags::HEAPTYPE) { - let object_new = vm.ctx.types.object_type.slots.new.load(); - let cls_new = cls.slots.new.load(); - if let (Some(cls_new_fn), Some(obj_new_fn)) = (cls_new, object_new) - && cls_new_fn as usize == obj_new_fn as usize - && let Some(init) = cls.get_attr(identifier!(vm, __init__)) - && let Some(init_func) = init.downcast_ref::() - && init_func.can_specialize_call(nargs + 1) - { - let version = cls.tp_version_tag.load(Acquire); - if version != 0 { - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, version); - } - self.specialize_at( - instr_idx, - cache_base, - Instruction::CallAllocAndEnterInit, - ); - return; - } + } + if cls.slots.flags.has_feature(PyTypeFlags::IMMUTABLETYPE) + && cls.slots.vectorcall.load().is_some() + { + self.specialize_at(instr_idx, cache_base, Instruction::CallBuiltinClass); + return; + } + // CallAllocAndEnterInit: heap type with default __new__ + if !self_or_null_is_some && cls.slots.flags.has_feature(PyTypeFlags::HEAPTYPE) { + let object_new = vm.ctx.types.object_type.slots.new.load(); + let cls_new = cls.slots.new.load(); + if let (Some(cls_new_fn), Some(obj_new_fn)) = (cls_new, object_new) + && cls_new_fn as usize == obj_new_fn as usize + && let Some(init) = cls.get_attr(identifier!(vm, __init__)) + && let Some(init_func) = init.downcast_ref::() + && init_func.can_specialize_call(nargs + 1) + { + let version = cls.tp_version_tag.load(Acquire); + if version != 0 { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, version); } + self.specialize_at( + instr_idx, + cache_base, + Instruction::CallAllocAndEnterInit, + ); + return; } - self.specialize_at(instr_idx, cache_base, Instruction::CallNonPyGeneral); - return; } } + self.specialize_at(instr_idx, cache_base, Instruction::CallNonPyGeneral); + return; } // General fallback: specialized non-Python callable path From 42d81ffe6cb63eb2c055941c0eb2a986afee273a Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 21:03:22 +0900 Subject: [PATCH 28/31] Fix CI: disable ForIterGen, tighten CALL guards - Disable ForIterGen specialization (falls through to generic path) because inline generator frame resumption is needed for correct debugger StopIteration visibility (test_bdb) - Use downcast_ref_if_exact for PyNativeFunction in CALL specialization guards - Add can_specialize_call guard for class __init__ specialization - Remove expectedFailure for test_bad_newobj_args (now passing) --- Lib/test/test_pickle.py | 4 ---- crates/vm/src/frame.rs | 46 ++++++++++++++++++++--------------------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index a09037034ad..c9d4a348448 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -85,10 +85,6 @@ def dumps(self, arg, proto=None, **kwargs): f.seek(0) return bytes(f.read()) - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_bad_newobj_args(self): - return super().test_bad_newobj_args() - @unittest.expectedFailure # TODO: RUSTPYTHON def test_buffer_callback_error(self): return super().test_buffer_callback_error() diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 589114845f8..cdee2c99cf6 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3508,7 +3508,7 @@ impl ExecutingFrame<'_> { let callable = self.pop_value(); let callable_tag = &*callable as *const PyObject as u32; let is_len_callable = callable - .downcast_ref::() + .downcast_ref_if_exact::(vm) .is_some_and(|native| native.zelf.is_none() && native.value.name == "len"); if null.is_none() && cached_tag == callable_tag && is_len_callable { let len = obj.length(vm)?; @@ -3536,7 +3536,7 @@ impl ExecutingFrame<'_> { let callable = self.nth_value(nargs + 1); let callable_tag = callable as *const PyObject as u32; let is_isinstance_callable = callable - .downcast_ref::() + .downcast_ref_if_exact::(vm) .is_some_and(|native| { native.zelf.is_none() && native.value.name == "isinstance" }); @@ -3626,7 +3626,11 @@ impl ExecutingFrame<'_> { let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); - if callable.downcast_ref::().is_some() && effective_nargs == 1 { + if callable + .downcast_ref_if_exact::(vm) + .is_some() + && effective_nargs == 1 + { let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); let self_or_null = self.pop_value_opt(); @@ -3651,7 +3655,10 @@ impl ExecutingFrame<'_> { let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); - if callable.downcast_ref::().is_some() { + if callable + .downcast_ref_if_exact::(vm) + .is_some() + { let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); let self_or_null = self.pop_value_opt(); @@ -3921,6 +3928,7 @@ impl ExecutingFrame<'_> { // Look up __init__ (guarded by type_version) if let Some(init) = cls.get_attr(identifier!(vm, __init__)) && let Some(init_func) = init.downcast_ref::() + && init_func.can_specialize_call(nargs + 1) { // Allocate object directly (tp_new == object.__new__) let dict = if cls @@ -4003,7 +4011,10 @@ impl ExecutingFrame<'_> { let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); - if callable.downcast_ref::().is_some() { + if callable + .downcast_ref_if_exact::(vm) + .is_some() + { let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); let self_or_null = self.pop_value_opt(); @@ -4601,23 +4612,12 @@ impl ExecutingFrame<'_> { } } Instruction::ForIterGen => { + // ForIterGen is not faithfully implementable without inline + // generator frame resumption (as CPython does). Fall through + // to the generic path so the debugger sees StopIteration. let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); - let iter = self.top_value(); - if let Some(generator) = iter.downcast_ref_if_exact::(vm) { - match generator.as_coro().send(iter, vm.ctx.none(), vm) { - Ok(PyIterReturn::Return(value)) => { - self.push_value(value); - } - Ok(PyIterReturn::StopIteration(_)) => { - self.for_iter_jump_on_exhausted(target); - } - Err(e) => return Err(e), - } - Ok(None) - } else { - self.execute_for_iter(vm, target)?; - Ok(None) - } + self.execute_for_iter(vm, target)?; + Ok(None) } Instruction::LoadGlobalModule => { let oparg = u32::from(arg); @@ -6964,7 +6964,7 @@ impl ExecutingFrame<'_> { } // Try to specialize builtin calls - if let Some(native) = callable.downcast_ref::() { + if let Some(native) = callable.downcast_ref_if_exact::(vm) { let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable_tag = callable as *const PyObject as u32; let new_op = if native.zelf.is_none() @@ -7299,8 +7299,6 @@ impl ExecutingFrame<'_> { Some(Instruction::ForIterList) } else if iter.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::ForIterTuple) - } else if iter.downcast_ref_if_exact::(vm).is_some() { - Some(Instruction::ForIterGen) } else { None }; From d464119937f7b738c300e39d4815d61882337e4e Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 21:18:32 +0900 Subject: [PATCH 29/31] vm: restore FOR_ITER_GEN specialization and tuple index parity --- crates/vm/src/builtins/tuple.rs | 8 +++++++- crates/vm/src/frame.rs | 27 ++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/crates/vm/src/builtins/tuple.rs b/crates/vm/src/builtins/tuple.rs index b7ed066f1d1..03f88f1b5fe 100644 --- a/crates/vm/src/builtins/tuple.rs +++ b/crates/vm/src/builtins/tuple.rs @@ -327,7 +327,13 @@ impl PyTuple { fn _getitem(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult { match SequenceIndex::try_from_borrowed_object(vm, needle, "tuple")? { - SequenceIndex::Int(i) => self.elements.getitem_by_index(vm, i), + SequenceIndex::Int(i) => { + let index = self + .elements + .wrap_index(i) + .ok_or_else(|| vm.new_index_error("tuple index out of range"))?; + Ok(self.elements[index].clone()) + } SequenceIndex::Slice(slice) => self .elements .getitem_by_slice(vm, slice) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index cdee2c99cf6..4e0b020b28d 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4612,12 +4612,27 @@ impl ExecutingFrame<'_> { } } Instruction::ForIterGen => { - // ForIterGen is not faithfully implementable without inline - // generator frame resumption (as CPython does). Fall through - // to the generic path so the debugger sees StopIteration. let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); - self.execute_for_iter(vm, target)?; - Ok(None) + let iter = self.top_value(); + if let Some(generator) = iter.downcast_ref_if_exact::(vm) { + match generator.as_coro().send(iter, vm.ctx.none(), vm) { + Ok(PyIterReturn::Return(value)) => { + self.push_value(value); + } + Ok(PyIterReturn::StopIteration(value)) => { + if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { + let stop_exc = vm.new_stop_iteration(value); + self.fire_exception_trace(&stop_exc, vm)?; + } + self.for_iter_jump_on_exhausted(target); + } + Err(e) => return Err(e), + } + Ok(None) + } else { + self.execute_for_iter(vm, target)?; + Ok(None) + } } Instruction::LoadGlobalModule => { let oparg = u32::from(arg); @@ -7299,6 +7314,8 @@ impl ExecutingFrame<'_> { Some(Instruction::ForIterList) } else if iter.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::ForIterTuple) + } else if iter.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::ForIterGen) } else { None }; From 0f044d6081f434a487e127c00ac1713563a5c5ec Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 23:56:43 +0900 Subject: [PATCH 30/31] Add datastack-backed FastLocals for non-generator frames Introduce FastLocalsData enum with Heap and DataStack variants so non-generator/coroutine frames allocate localsplus on the VM datastack instead of the heap. Includes materialize_to_heap for migration when needed (e.g. generator suspension). --- crates/vm/src/frame.rs | 1251 ++++++++++++++++++++++++++++++---------- 1 file changed, 949 insertions(+), 302 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 4e0b020b28d..8032cf2802d 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -18,7 +18,7 @@ use crate::{ tuple::{PyTuple, PyTupleIterator, PyTupleRef}, }, bytecode::{ - self, ADAPTIVE_COOLDOWN_VALUE, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod, + self, ADAPTIVE_COOLDOWN_VALUE, Arg, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod, }, convert::{ToPyObject, ToPyResult}, coroutine::Coro, @@ -46,7 +46,6 @@ use malachite_bigint::BigInt; use num_traits::Zero; use rustpython_common::atomic::{PyAtomic, Radium}; use rustpython_common::{ - boxvec::BoxVec, lock::{OnceCell, PyMutex}, wtf8::{Wtf8, Wtf8Buf, wtf8_concat}, }; @@ -67,19 +66,6 @@ enum UnwindReason { Raising { exception: PyBaseExceptionRef }, } -#[derive(Debug)] -struct FrameState { - // We need 1 stack per frame - /// The main data frame of the stack machine - stack: BoxVec>, - /// Cell and free variable references (cellvars + freevars). - cells_frees: Box<[PyCellRef]>, - /// Previous line number for LINE event suppression. - /// Stored here (not on ExecutingFrame) so it persists across - /// generator/coroutine suspend and resume. - prev_line: u32, -} - /// Tracks who owns a frame. // = `_PyFrameOwner` #[repr(i8)] @@ -104,53 +90,398 @@ impl FrameOwner { } } -/// Lock-free storage for local variables (localsplus). +/// Lock-free mutable storage for frame-internal data. /// /// # Safety -/// Mutable access is serialized by the frame's state mutex in `with_exec()`. -/// External readers (e.g. `f_locals`) must use `try_lock` on the state mutex: -/// if acquired, the frame is not executing and access is exclusive; if not, -/// the caller is on the same thread as `with_exec()` (trace callback) and -/// access is safe because frame execution is single-threaded. -pub struct FastLocals { - inner: UnsafeCell]>>, +/// Frame execution is single-threaded: only one thread at a time executes +/// a given frame (enforced by the owner field and generator running flag). +/// External readers (e.g. `f_locals`) are on the same thread as execution +/// (trace callback) or the frame is not executing. +struct FrameUnsafeCell(UnsafeCell); + +impl FrameUnsafeCell { + fn new(value: T) -> Self { + Self(UnsafeCell::new(value)) + } + + /// # Safety + /// Caller must ensure no concurrent mutable access. + #[inline(always)] + unsafe fn get(&self) -> *mut T { + self.0.get() + } } -// SAFETY: Frame execution is serialized by the state mutex. +// SAFETY: Frame execution is single-threaded. See FrameUnsafeCell doc. #[cfg(feature = "threading")] -unsafe impl Send for FastLocals {} +unsafe impl Send for FrameUnsafeCell {} #[cfg(feature = "threading")] -unsafe impl Sync for FastLocals {} +unsafe impl Sync for FrameUnsafeCell {} -impl FastLocals { - fn new(data: Box<[Option]>) -> Self { +/// Unified storage for local variables and evaluation stack. +/// +/// Memory layout (each slot is `usize`-sized): +/// `[0..nlocalsplus)` — fastlocals (`Option`) +/// `[nlocalsplus..nlocalsplus+stack_top)` — active evaluation stack (`Option`) +/// `[nlocalsplus+stack_top..capacity)` — unused stack capacity +/// +/// Both `Option` and `Option` are `usize`-sized +/// (niche optimization on NonNull / NonZeroUsize). The raw storage is +/// `usize` to unify them; typed access is provided through methods. +pub struct LocalsPlus { + /// Backing storage. + data: LocalsPlusData, + /// Number of fastlocals slots (nlocals + ncells + nfrees). + nlocalsplus: u32, + /// Current evaluation stack depth. + stack_top: u32, +} + +enum LocalsPlusData { + /// Heap-allocated storage (generators, coroutines, exec/eval frames). + Heap(Box<[usize]>), + /// Data stack allocated storage (normal function calls). + /// The pointer is valid while the enclosing data stack frame is alive. + DataStack { ptr: *mut usize, capacity: usize }, +} + +// SAFETY: DataStack variant points to thread-local DataStack memory. +// Frame execution is single-threaded (enforced by owner field). +#[cfg(feature = "threading")] +unsafe impl Send for LocalsPlusData {} +#[cfg(feature = "threading")] +unsafe impl Sync for LocalsPlusData {} + +const _: () = { + assert!(core::mem::size_of::>() == core::mem::size_of::()); + // PyStackRef size is checked in object/core.rs +}; + +impl LocalsPlus { + /// Create a new heap-backed LocalsPlus. All slots start as None (0). + fn new(nlocalsplus: usize, stacksize: usize) -> Self { + let capacity = nlocalsplus + .checked_add(stacksize) + .expect("LocalsPlus capacity overflow"); + let nlocalsplus_u32 = u32::try_from(nlocalsplus).expect("nlocalsplus exceeds u32"); Self { - inner: UnsafeCell::new(data), + data: LocalsPlusData::Heap(vec![0usize; capacity].into_boxed_slice()), + nlocalsplus: nlocalsplus_u32, + stack_top: 0, } } - /// # Safety - /// Caller must ensure exclusive access (frame state locked or frame - /// not executing). + /// Create a new LocalsPlus backed by the thread data stack. + /// All slots are zero-initialized. + /// + /// The caller must call `materialize_localsplus()` when the frame finishes + /// to migrate data to the heap, then `datastack_pop()` to free the memory. + fn new_on_datastack(nlocalsplus: usize, stacksize: usize, vm: &VirtualMachine) -> Self { + let capacity = nlocalsplus + .checked_add(stacksize) + .expect("LocalsPlus capacity overflow"); + let byte_size = capacity + .checked_mul(core::mem::size_of::()) + .expect("LocalsPlus byte size overflow"); + let nlocalsplus_u32 = u32::try_from(nlocalsplus).expect("nlocalsplus exceeds u32"); + let ptr = vm.datastack_push(byte_size) as *mut usize; + // Zero-initialize all slots (0 = None for both PyObjectRef and PyStackRef). + unsafe { core::ptr::write_bytes(ptr, 0, capacity) }; + Self { + data: LocalsPlusData::DataStack { ptr, capacity }, + nlocalsplus: nlocalsplus_u32, + stack_top: 0, + } + } + + /// Migrate data-stack-backed storage to the heap, preserving all values. + /// Returns the data stack base pointer for `DataStack::pop()`. + /// Returns `None` if already heap-backed. + fn materialize_to_heap(&mut self) -> Option<*mut u8> { + if let LocalsPlusData::DataStack { ptr, capacity } = &self.data { + let base = *ptr as *mut u8; + let heap_data = unsafe { core::slice::from_raw_parts(*ptr, *capacity) } + .to_vec() + .into_boxed_slice(); + self.data = LocalsPlusData::Heap(heap_data); + Some(base) + } else { + None + } + } + + /// Drop all contained values without freeing the backing storage. + fn drop_values(&mut self) { + self.stack_clear(); + let fastlocals = self.fastlocals_mut(); + for slot in fastlocals.iter_mut() { + let _ = slot.take(); + } + } + + // -- Data access helpers -- + #[inline(always)] - pub unsafe fn borrow(&self) -> &[Option] { - unsafe { &*self.inner.get() } + fn data_as_slice(&self) -> &[usize] { + match &self.data { + LocalsPlusData::Heap(b) => b, + LocalsPlusData::DataStack { ptr, capacity } => unsafe { + core::slice::from_raw_parts(*ptr, *capacity) + }, + } } - /// # Safety - /// Caller must ensure exclusive mutable access. #[inline(always)] - #[allow(clippy::mut_from_ref)] - pub unsafe fn borrow_mut(&self) -> &mut [Option] { - unsafe { &mut *self.inner.get() } + fn data_as_mut_slice(&mut self) -> &mut [usize] { + match &mut self.data { + LocalsPlusData::Heap(b) => b, + LocalsPlusData::DataStack { ptr, capacity } => unsafe { + core::slice::from_raw_parts_mut(*ptr, *capacity) + }, + } + } + + /// Total capacity (fastlocals + stack). + #[inline(always)] + fn capacity(&self) -> usize { + match &self.data { + LocalsPlusData::Heap(b) => b.len(), + LocalsPlusData::DataStack { capacity, .. } => *capacity, + } + } + + /// Stack capacity (max stack depth). + #[inline(always)] + fn stack_capacity(&self) -> usize { + self.capacity() - self.nlocalsplus as usize + } + + // -- Fastlocals access -- + + /// Immutable access to fastlocals as `Option` slice. + #[inline(always)] + fn fastlocals(&self) -> &[Option] { + let data = self.data_as_slice(); + let ptr = data.as_ptr() as *const Option; + unsafe { core::slice::from_raw_parts(ptr, self.nlocalsplus as usize) } + } + + /// Mutable access to fastlocals as `Option` slice. + #[inline(always)] + fn fastlocals_mut(&mut self) -> &mut [Option] { + let nlocalsplus = self.nlocalsplus as usize; + let data = self.data_as_mut_slice(); + let ptr = data.as_mut_ptr() as *mut Option; + unsafe { core::slice::from_raw_parts_mut(ptr, nlocalsplus) } + } + + // -- Stack access -- + + /// Current stack depth. + #[inline(always)] + fn stack_len(&self) -> usize { + self.stack_top as usize + } + + /// Whether the stack is empty. + #[inline(always)] + fn stack_is_empty(&self) -> bool { + self.stack_top == 0 + } + + /// Push a value onto the evaluation stack. + #[inline(always)] + fn stack_push(&mut self, val: Option) { + let idx = self.nlocalsplus as usize + self.stack_top as usize; + debug_assert!( + idx < self.capacity(), + "stack overflow: stack_top={}, capacity={}", + self.stack_top, + self.stack_capacity() + ); + let data = self.data_as_mut_slice(); + data[idx] = unsafe { core::mem::transmute::, usize>(val) }; + self.stack_top += 1; + } + + /// Try to push; returns Err if stack is full. + #[inline(always)] + fn stack_try_push(&mut self, val: Option) -> Result<(), Option> { + let idx = self.nlocalsplus as usize + self.stack_top as usize; + if idx >= self.capacity() { + return Err(val); + } + let data = self.data_as_mut_slice(); + data[idx] = unsafe { core::mem::transmute::, usize>(val) }; + self.stack_top += 1; + Ok(()) + } + + /// Pop a value from the evaluation stack. + #[inline(always)] + fn stack_pop(&mut self) -> Option { + debug_assert!(self.stack_top > 0, "stack underflow"); + self.stack_top -= 1; + let idx = self.nlocalsplus as usize + self.stack_top as usize; + let data = self.data_as_mut_slice(); + let raw = core::mem::replace(&mut data[idx], 0); + unsafe { core::mem::transmute::>(raw) } + } + + /// Immutable view of the active stack as `Option` slice. + #[inline(always)] + fn stack_as_slice(&self) -> &[Option] { + let data = self.data_as_slice(); + let base = self.nlocalsplus as usize; + let ptr = unsafe { (data.as_ptr().add(base)) as *const Option }; + unsafe { core::slice::from_raw_parts(ptr, self.stack_top as usize) } + } + + /// Get a reference to a stack slot by index from the bottom. + #[inline(always)] + fn stack_index(&self, idx: usize) -> &Option { + debug_assert!(idx < self.stack_top as usize); + let data = self.data_as_slice(); + let raw_idx = self.nlocalsplus as usize + idx; + unsafe { &*(data.as_ptr().add(raw_idx) as *const Option) } + } + + /// Get a mutable reference to a stack slot by index from the bottom. + #[inline(always)] + fn stack_index_mut(&mut self, idx: usize) -> &mut Option { + debug_assert!(idx < self.stack_top as usize); + let raw_idx = self.nlocalsplus as usize + idx; + let data = self.data_as_mut_slice(); + unsafe { &mut *(data.as_mut_ptr().add(raw_idx) as *mut Option) } + } + + /// Get the last stack element (top of stack). + #[inline(always)] + fn stack_last(&self) -> Option<&Option> { + if self.stack_top == 0 { + None + } else { + Some(self.stack_index(self.stack_top as usize - 1)) + } + } + + /// Get mutable reference to the last stack element. + #[inline(always)] + fn stack_last_mut(&mut self) -> Option<&mut Option> { + if self.stack_top == 0 { + None + } else { + let idx = self.stack_top as usize - 1; + Some(self.stack_index_mut(idx)) + } + } + + /// Swap two stack elements. + #[inline(always)] + fn stack_swap(&mut self, a: usize, b: usize) { + let base = self.nlocalsplus as usize; + let data = self.data_as_mut_slice(); + data.swap(base + a, base + b); + } + + /// Truncate the stack to `new_len` elements, dropping excess values. + fn stack_truncate(&mut self, new_len: usize) { + debug_assert!(new_len <= self.stack_top as usize); + while self.stack_top as usize > new_len { + let _ = self.stack_pop(); + } + } + + /// Clear the stack, dropping all values. + fn stack_clear(&mut self) { + while self.stack_top > 0 { + let _ = self.stack_pop(); + } + } + + /// Drain stack elements from `from` to the end, returning an iterator + /// that yields `Option` in forward order and shrinks the stack. + fn stack_drain( + &mut self, + from: usize, + ) -> impl ExactSizeIterator> + '_ { + let end = self.stack_top as usize; + debug_assert!(from <= end); + // Reduce stack_top now; the drain iterator owns the elements. + self.stack_top = from as u32; + LocalsPlusStackDrain { + localsplus: self, + current: from, + end, + } + } + + /// Extend the stack with values from an iterator. + fn stack_extend(&mut self, iter: impl Iterator>) { + for val in iter { + self.stack_push(val); + } + } +} + +/// Iterator for draining stack elements in forward order. +struct LocalsPlusStackDrain<'a> { + localsplus: &'a mut LocalsPlus, + /// Current read position (stack-relative index). + current: usize, + /// End position (exclusive, stack-relative index). + end: usize, +} + +impl Iterator for LocalsPlusStackDrain<'_> { + type Item = Option; + + fn next(&mut self) -> Option { + if self.current >= self.end { + return None; + } + let idx = self.localsplus.nlocalsplus as usize + self.current; + let data = self.localsplus.data_as_mut_slice(); + let raw = core::mem::replace(&mut data[idx], 0); + self.current += 1; + Some(unsafe { core::mem::transmute::>(raw) }) + } + + fn size_hint(&self) -> (usize, Option) { + let remaining = self.end - self.current; + (remaining, Some(remaining)) + } +} + +impl ExactSizeIterator for LocalsPlusStackDrain<'_> {} + +impl Drop for LocalsPlusStackDrain<'_> { + fn drop(&mut self) { + while self.current < self.end { + let idx = self.localsplus.nlocalsplus as usize + self.current; + let data = self.localsplus.data_as_mut_slice(); + let raw = core::mem::replace(&mut data[idx], 0); + let _ = unsafe { core::mem::transmute::>(raw) }; + self.current += 1; + } } } -unsafe impl Traverse for FastLocals { - fn traverse(&self, traverse_fn: &mut TraverseFn<'_>) { - // SAFETY: GC runs on the same thread; no concurrent mutation. - let data = unsafe { &*self.inner.get() }; - data.traverse(traverse_fn); +impl Drop for LocalsPlus { + fn drop(&mut self) { + // drop_values handles both stack and fastlocals. + // For DataStack-backed storage, the caller should have called + // materialize_localsplus() + datastack_pop() before drop. + // If not (e.g. panic), the DataStack memory is leaked but + // values are still dropped safely. + self.drop_values(); + } +} + +unsafe impl Traverse for LocalsPlus { + fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { + self.fastlocals().traverse(tracer_fn); + self.stack_as_slice().traverse(tracer_fn); } } @@ -239,7 +570,8 @@ pub struct Frame { pub code: PyRef, pub func_obj: Option, - pub fastlocals: FastLocals, + /// Unified storage for local variables and evaluation stack. + localsplus: FrameUnsafeCell, pub locals: FrameLocals, pub globals: PyDictRef, pub builtins: PyObjectRef, @@ -248,7 +580,11 @@ pub struct Frame { pub lasti: PyAtomic, /// tracer function for this frame (usually is None) pub trace: PyMutex, - state: PyMutex, + + /// Cell and free variable references (cellvars + freevars). + cells_frees: FrameUnsafeCell>, + /// Previous line number for LINE event suppression. + prev_line: FrameUnsafeCell, // member pub trace_lines: PyMutex, @@ -284,25 +620,20 @@ impl PyPayload for Frame { } } -unsafe impl Traverse for FrameState { - fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { - self.stack.traverse(tracer_fn); - self.cells_frees.traverse(tracer_fn); - } -} - unsafe impl Traverse for Frame { fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { self.code.traverse(tracer_fn); self.func_obj.traverse(tracer_fn); - self.fastlocals.traverse(tracer_fn); + // SAFETY: GC traversal does not run concurrently with frame execution. + unsafe { + (*self.localsplus.get()).traverse(tracer_fn); + (*self.cells_frees.get()).traverse(tracer_fn); + } self.locals.traverse(tracer_fn); self.globals.traverse(tracer_fn); self.builtins.traverse(tracer_fn); self.trace.traverse(tracer_fn); - self.state.traverse(tracer_fn); self.temporary_refs.traverse(tracer_fn); - // generator is a borrowed reference, not traversed } } @@ -322,6 +653,7 @@ impl Frame { builtins: PyObjectRef, closure: &[PyCellRef], func_obj: Option, + use_datastack: bool, vm: &VirtualMachine, ) -> Self { let nlocals = code.varnames.len(); @@ -334,23 +666,24 @@ impl Frame { .chain(closure.iter().cloned()) .collect(); - // Extend fastlocals to include varnames + cellvars + freevars (localsplus) - let total_locals = nlocals + num_cells + nfrees; - let mut fastlocals_vec: Vec> = vec![None; total_locals]; + let nlocalsplus = nlocals + .checked_add(num_cells) + .and_then(|v| v.checked_add(nfrees)) + .expect("Frame::new: nlocalsplus overflow"); + let max_stackdepth = code.max_stackdepth as usize; + let mut localsplus = if use_datastack { + LocalsPlus::new_on_datastack(nlocalsplus, max_stackdepth, vm) + } else { + LocalsPlus::new(nlocalsplus, max_stackdepth) + }; // Store cell objects at cellvars and freevars positions for (i, cell) in cells_frees.iter().enumerate() { - fastlocals_vec[nlocals + i] = Some(cell.clone().into()); + localsplus.fastlocals_mut()[nlocals + i] = Some(cell.clone().into()); } - let state = FrameState { - stack: BoxVec::new(code.max_stackdepth as usize), - cells_frees, - prev_line: 0, - }; - Self { - fastlocals: FastLocals::new(fastlocals_vec.into_boxed_slice()), + localsplus: FrameUnsafeCell::new(localsplus), locals: match scope.locals { Some(locals) => FrameLocals::with_locals(locals), None if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) => FrameLocals::lazy(), @@ -363,7 +696,8 @@ impl Frame { code, func_obj, lasti: Radium::new(0), - state: PyMutex::new(state), + cells_frees: FrameUnsafeCell::new(cells_frees), + prev_line: FrameUnsafeCell::new(0), trace: PyMutex::new(vm.ctx.none()), trace_lines: PyMutex::new(true), trace_opcodes: PyMutex::new(false), @@ -377,12 +711,45 @@ impl Frame { } } + /// Access fastlocals immutably. + /// + /// # Safety + /// Caller must ensure no concurrent mutable access (frame not executing, + /// or called from the same thread during trace callback). + #[inline(always)] + pub unsafe fn fastlocals(&self) -> &[Option] { + unsafe { (*self.localsplus.get()).fastlocals() } + } + + /// Access fastlocals mutably. + /// + /// # Safety + /// Caller must ensure exclusive access (frame not executing). + #[inline(always)] + #[allow(clippy::mut_from_ref)] + pub unsafe fn fastlocals_mut(&self) -> &mut [Option] { + unsafe { (*self.localsplus.get()).fastlocals_mut() } + } + + /// Migrate data-stack-backed storage to the heap, preserving all values, + /// and return the data stack base pointer for `DataStack::pop()`. + /// Returns `None` if already heap-backed. + /// + /// # Safety + /// Caller must ensure the frame is not executing and the returned + /// pointer is passed to `VirtualMachine::datastack_pop()`. + pub(crate) unsafe fn materialize_localsplus(&self) -> Option<*mut u8> { + unsafe { (*self.localsplus.get()).materialize_to_heap() } + } + /// Clear evaluation stack and state-owned cell/free references. /// For full local/cell cleanup, call `clear_locals_and_stack()`. pub(crate) fn clear_stack_and_cells(&self) { - let mut state = self.state.lock(); - state.stack.clear(); - let _old = core::mem::take(&mut state.cells_frees); + // SAFETY: Called when frame is not executing (generator closed). + unsafe { + (*self.localsplus.get()).stack_clear(); + let _old = core::mem::take(&mut *self.cells_frees.get()); + } } /// Clear locals and stack after generator/coroutine close. @@ -390,7 +757,7 @@ impl Frame { pub(crate) fn clear_locals_and_stack(&self) { self.clear_stack_and_cells(); // SAFETY: Frame is not executing (generator closed). - let fastlocals = unsafe { self.fastlocals.borrow_mut() }; + let fastlocals = unsafe { (*self.localsplus.get()).fastlocals_mut() }; for slot in fastlocals.iter_mut() { *slot = None; } @@ -400,7 +767,7 @@ impl Frame { pub(crate) fn get_cell_contents(&self, cell_idx: usize) -> Option { let nlocals = self.code.varnames.len(); // SAFETY: Frame not executing; no concurrent mutation. - let fastlocals = unsafe { self.fastlocals.borrow() }; + let fastlocals = unsafe { (*self.localsplus.get()).fastlocals() }; fastlocals .get(nlocals + cell_idx) .and_then(|slot| slot.as_ref()) @@ -410,7 +777,8 @@ impl Frame { /// Set cell contents by cell index. Only safe to call before frame execution starts. pub(crate) fn set_cell_contents(&self, cell_idx: usize, value: Option) { - self.state.lock().cells_frees[cell_idx].set(value); + // SAFETY: Called before frame execution starts. + unsafe { (*self.cells_frees.get())[cell_idx].set(value) }; } /// Store a borrowed back-reference to the owning generator/coroutine. @@ -469,7 +837,7 @@ impl Frame { } let code = &**self.code; // SAFETY: Called before generator resume; no concurrent access. - let fastlocals = unsafe { self.fastlocals.borrow_mut() }; + let fastlocals = unsafe { (*self.localsplus.get()).fastlocals_mut() }; let locals_map = self.locals.mapping(vm); for (i, &varname) in code.varnames.iter().enumerate() { if i >= fastlocals.len() { @@ -486,19 +854,15 @@ impl Frame { } pub fn locals(&self, vm: &VirtualMachine) -> PyResult { - // Acquire the state mutex to synchronize with frame execution. - // If try_lock fails, the frame is executing on this thread (e.g. - // trace callback accessing f_locals), so fastlocals access is safe. - let _guard = self.state.try_lock(); + // SAFETY: Either the frame is not executing (caller checked owner), + // or we're in a trace callback on the same thread that's executing. let locals = &self.locals; let code = &**self.code; let map = &code.varnames; let j = core::cmp::min(map.len(), code.varnames.len()); let locals_map = locals.mapping(vm); if !code.varnames.is_empty() { - // SAFETY: Either _guard holds the state mutex (frame not executing), - // or we're in a trace callback on the same thread that holds it. - let fastlocals = unsafe { self.fastlocals.borrow() }; + let fastlocals = unsafe { (*self.localsplus.get()).fastlocals() }; for (&k, v) in zip(&map[..j], fastlocals) { match locals_map.ass_subscript(k, v.clone(), vm) { Ok(()) => {} @@ -534,10 +898,12 @@ impl Frame { impl Py { #[inline(always)] fn with_exec(&self, vm: &VirtualMachine, f: impl FnOnce(ExecutingFrame<'_>) -> R) -> R { - let mut state = self.state.lock(); + // SAFETY: Frame execution is single-threaded. Only one thread at a time + // executes a given frame (enforced by the owner field and generator + // running flag). Same safety argument as FastLocals (UnsafeCell). let exec = ExecutingFrame { code: &self.code, - fastlocals: &self.fastlocals, + localsplus: unsafe { &mut *self.localsplus.get() }, locals: &self.locals, globals: &self.globals, builtins: &self.builtins, @@ -551,7 +917,8 @@ impl Py { }, lasti: &self.lasti, object: self, - state: &mut state, + cells_frees: unsafe { &mut *self.cells_frees.get() }, + prev_line: unsafe { &mut *self.prev_line.get() }, monitoring_mask: 0, }; f(exec) @@ -586,19 +953,24 @@ impl Py { } pub fn yield_from_target(&self) -> Option { - // Use try_lock to avoid deadlock when the frame is currently executing. - // A running coroutine has no yield-from target. - let mut state = self.state.try_lock()?; + // If the frame is currently executing (owned by thread), it has no + // yield-from target to report. + let owner = FrameOwner::from_i8(self.owner.load(atomic::Ordering::Acquire)); + if owner == FrameOwner::Thread { + return None; + } + // SAFETY: Frame is not executing, so UnsafeCell access is safe. let exec = ExecutingFrame { code: &self.code, - fastlocals: &self.fastlocals, + localsplus: unsafe { &mut *self.localsplus.get() }, locals: &self.locals, globals: &self.globals, builtins: &self.builtins, builtins_dict: None, lasti: &self.lasti, object: self, - state: &mut state, + cells_frees: unsafe { &mut *self.cells_frees.get() }, + prev_line: unsafe { &mut *self.prev_line.get() }, monitoring_mask: 0, }; exec.yield_from_target().map(PyObject::to_owned) @@ -623,11 +995,11 @@ impl Py { } } -/// An executing frame; essentially just a struct to combine the immutable data outside the mutex -/// with the mutable data inside +/// An executing frame; borrows mutable frame-internal data for the duration +/// of bytecode execution. struct ExecutingFrame<'a> { code: &'a PyRef, - fastlocals: &'a FastLocals, + localsplus: &'a mut LocalsPlus, locals: &'a FrameLocals, globals: &'a PyDictRef, builtins: &'a PyObjectRef, @@ -638,7 +1010,8 @@ struct ExecutingFrame<'a> { builtins_dict: Option<&'a PyExact>, object: &'a Py, lasti: &'a PyAtomic, - state: &'a mut FrameState, + cells_frees: &'a mut Box<[PyCellRef]>, + prev_line: &'a mut u32, /// Cached monitoring events mask. Reloaded at Resume instruction only, monitoring_mask: u32, } @@ -647,8 +1020,7 @@ impl fmt::Debug for ExecutingFrame<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ExecutingFrame") .field("code", self.code) - // .field("scope", self.scope) - .field("state", self.state) + .field("stack_len", &self.localsplus.stack_len()) .finish() } } @@ -733,9 +1105,9 @@ impl ExecutingFrame<'_> { Instruction::Resume { .. } | Instruction::InstrumentedResume ) && let Some((loc, _)) = self.code.locations.get(idx) - && loc.line.get() as u32 != self.state.prev_line + && loc.line.get() as u32 != *self.prev_line { - self.state.prev_line = loc.line.get() as u32; + *self.prev_line = loc.line.get() as u32; vm.trace_event(crate::protocol::TraceEvent::Line, None)?; // Trace callback may have changed lasti via set_f_lineno. // Re-read and restart the loop from the new position. @@ -767,7 +1139,7 @@ impl ExecutingFrame<'_> { | Instruction::InstrumentedLine ) && let Some((loc, _)) = self.code.locations.get(idx) { - self.state.prev_line = loc.line.get() as u32; + *self.prev_line = loc.line.get() as u32; } // Fire 'opcode' trace event for sys.settrace when f_trace_opcodes @@ -987,7 +1359,7 @@ impl ExecutingFrame<'_> { // 3. Stack top is the delegate (receiver) // // First check if stack is empty - if so, we can't be in yield-from - if self.state.stack.is_empty() { + if self.localsplus.stack_is_empty() { return None; } let lasti = self.lasti() as usize; @@ -1032,7 +1404,7 @@ impl ExecutingFrame<'_> { // In CPython, _Py_call_instrumentation_line has a special case // for RESUME: it fires LINE even when prev_line == current_line. // Since gen_throw bypasses RESUME, we reset prev_line instead. - self.state.prev_line = 0; + *self.prev_line = 0; if let Some(jen) = self.yield_from_target() { // Check if the exception is GeneratorExit (type or instance). // For GeneratorExit, close the sub-iterator instead of throwing. @@ -1452,9 +1824,9 @@ impl ExecutingFrame<'_> { // CopyItem { index: 2 } copies second from top // This is 1-indexed to match CPython let idx = index.get(arg) as usize; - let stack_len = self.state.stack.len(); + let stack_len = self.localsplus.stack_len(); debug_assert!(stack_len >= idx, "CopyItem: stack underflow"); - let value = self.state.stack[stack_len - idx].clone(); + let value = self.localsplus.stack_index(stack_len - idx).clone(); self.push_stackref_opt(value); Ok(None) } @@ -1464,11 +1836,11 @@ impl ExecutingFrame<'_> { } Instruction::DeleteAttr { namei: idx } => self.delete_attr(vm, idx.get(arg)), Instruction::DeleteDeref { i } => { - self.state.cells_frees[i.get(arg) as usize].set(None); + self.cells_frees[i.get(arg) as usize].set(None); Ok(None) } Instruction::DeleteFast { var_num: idx } => { - let fastlocals = unsafe { self.fastlocals.borrow_mut() }; + let fastlocals = self.localsplus.fastlocals_mut(); let idx = idx.get(arg) as usize; if fastlocals[idx].is_none() { return Err(vm.new_exception_msg( @@ -1644,7 +2016,7 @@ impl ExecutingFrame<'_> { } Instruction::GetANext => { #[cfg(debug_assertions)] // remove when GetANext is fully implemented - let orig_stack_len = self.state.stack.len(); + let orig_stack_len = self.localsplus.stack_len(); let aiter = self.top_value(); let awaitable = if aiter.class().is(vm.ctx.types.async_generator) { @@ -1684,7 +2056,7 @@ impl ExecutingFrame<'_> { }; self.push_value(awaitable); #[cfg(debug_assertions)] - debug_assert_eq!(orig_stack_len + 1, self.state.stack.len()); + debug_assert_eq!(orig_stack_len + 1, self.localsplus.stack_len()); Ok(None) } Instruction::GetAwaitable { r#where: oparg } => { @@ -1911,7 +2283,7 @@ impl ExecutingFrame<'_> { }; self.push_value(match value { Some(v) => v, - None => self.state.cells_frees[i] + None => self.cells_frees[i] .get() .ok_or_else(|| self.unbound_cell_exception(i, vm))?, }); @@ -1980,7 +2352,7 @@ impl ExecutingFrame<'_> { } Instruction::LoadDeref { i } => { let idx = i.get(arg) as usize; - let x = self.state.cells_frees[idx] + let x = self.cells_frees[idx] .get() .ok_or_else(|| self.unbound_cell_exception(idx, vm))?; self.push_value(x); @@ -1998,7 +2370,7 @@ impl ExecutingFrame<'_> { ) } let idx = idx.get(arg) as usize; - let x = unsafe { self.fastlocals.borrow() }[idx] + let x = self.localsplus.fastlocals()[idx] .clone() .ok_or_else(|| reference_error(self.code.varnames[idx], vm))?; self.push_value(x); @@ -2008,7 +2380,7 @@ impl ExecutingFrame<'_> { // Load value and clear the slot (for inlined comprehensions) // If slot is empty, push None (not an error - variable may not exist yet) let idx = idx.get(arg) as usize; - let x = unsafe { self.fastlocals.borrow_mut() }[idx] + let x = self.localsplus.fastlocals_mut()[idx] .take() .unwrap_or_else(|| vm.ctx.none()); self.push_value(x); @@ -2018,18 +2390,16 @@ impl ExecutingFrame<'_> { // Same as LoadFast but explicitly checks for unbound locals // (LoadFast in RustPython already does this check) let idx = idx.get(arg) as usize; - let x = unsafe { self.fastlocals.borrow() }[idx] - .clone() - .ok_or_else(|| { - vm.new_exception_msg( - vm.ctx.exceptions.unbound_local_error.to_owned(), - format!( - "local variable '{}' referenced before assignment", - self.code.varnames[idx] - ) - .into(), + let x = self.localsplus.fastlocals()[idx].clone().ok_or_else(|| { + vm.new_exception_msg( + vm.ctx.exceptions.unbound_local_error.to_owned(), + format!( + "local variable '{}' referenced before assignment", + self.code.varnames[idx] ) - })?; + .into(), + ) + })?; self.push_value(x); Ok(None) } @@ -2039,7 +2409,7 @@ impl ExecutingFrame<'_> { let oparg = packed.get(arg); let idx1 = (oparg >> 4) as usize; let idx2 = (oparg & 15) as usize; - let fastlocals = unsafe { self.fastlocals.borrow() }; + let fastlocals = self.localsplus.fastlocals(); let x1 = fastlocals[idx1].clone().ok_or_else(|| { vm.new_exception_msg( vm.ctx.exceptions.unbound_local_error.to_owned(), @@ -2069,18 +2439,16 @@ impl ExecutingFrame<'_> { // lifetime issues at yield/exception points are resolved. Instruction::LoadFastBorrow { var_num: idx } => { let idx = idx.get(arg) as usize; - let x = unsafe { self.fastlocals.borrow() }[idx] - .clone() - .ok_or_else(|| { - vm.new_exception_msg( - vm.ctx.exceptions.unbound_local_error.to_owned(), - format!( - "local variable '{}' referenced before assignment", - self.code.varnames[idx] - ) - .into(), + let x = self.localsplus.fastlocals()[idx].clone().ok_or_else(|| { + vm.new_exception_msg( + vm.ctx.exceptions.unbound_local_error.to_owned(), + format!( + "local variable '{}' referenced before assignment", + self.code.varnames[idx] ) - })?; + .into(), + ) + })?; self.push_value(x); Ok(None) } @@ -2088,7 +2456,7 @@ impl ExecutingFrame<'_> { let oparg = packed.get(arg); let idx1 = (oparg >> 4) as usize; let idx2 = (oparg & 15) as usize; - let fastlocals = unsafe { self.fastlocals.borrow() }; + let fastlocals = self.localsplus.fastlocals(); let x1 = fastlocals[idx1].clone().ok_or_else(|| { vm.new_exception_msg( vm.ctx.exceptions.unbound_local_error.to_owned(), @@ -2597,18 +2965,18 @@ impl ExecutingFrame<'_> { } Instruction::StoreDeref { i } => { let value = self.pop_value(); - self.state.cells_frees[i.get(arg) as usize].set(Some(value)); + self.cells_frees[i.get(arg) as usize].set(Some(value)); Ok(None) } Instruction::StoreFast { var_num: idx } => { let value = self.pop_value(); - let fastlocals = unsafe { self.fastlocals.borrow_mut() }; + let fastlocals = self.localsplus.fastlocals_mut(); fastlocals[idx.get(arg) as usize] = Some(value); Ok(None) } Instruction::StoreFastLoadFast { var_nums } => { let value = self.pop_value(); - let locals = unsafe { self.fastlocals.borrow_mut() }; + let locals = self.localsplus.fastlocals_mut(); let oparg = var_nums.get(arg); locals[oparg.store_idx() as usize] = Some(value); let load_value = locals[oparg.load_idx() as usize] @@ -2623,7 +2991,7 @@ impl ExecutingFrame<'_> { let idx2 = (oparg & 15) as usize; let value1 = self.pop_value(); let value2 = self.pop_value(); - let fastlocals = unsafe { self.fastlocals.borrow_mut() }; + let fastlocals = self.localsplus.fastlocals_mut(); fastlocals[idx1] = Some(value1); fastlocals[idx2] = Some(value2); Ok(None) @@ -2663,7 +3031,7 @@ impl ExecutingFrame<'_> { self.execute_store_subscript(vm) } Instruction::Swap { i: index } => { - let len = self.state.stack.len(); + let len = self.localsplus.stack_len(); debug_assert!(len > 0, "stack underflow in SWAP"); let i = len - 1; // TOS index let index_val = index.get(arg) as usize; @@ -2676,7 +3044,7 @@ impl ExecutingFrame<'_> { len ); let j = len - index_val; - self.state.stack.swap(i, j); + self.localsplus.stack_swap(i, j); Ok(None) } Instruction::ToBool => { @@ -2700,9 +3068,9 @@ impl ExecutingFrame<'_> { // __exit__ is at TOS-3 (below lasti, prev_exc, and exc) let exc = vm.current_exception(); - let stack_len = self.state.stack.len(); + let stack_len = self.localsplus.stack_len(); let exit = expect_unchecked( - self.state.stack[stack_len - 4].clone(), + self.localsplus.stack_index(stack_len - 4).clone(), "WithExceptStart: __exit__ is NULL", ); @@ -2719,8 +3087,8 @@ impl ExecutingFrame<'_> { } Instruction::YieldValue { arg: oparg } => { debug_assert!( - self.state - .stack + self.localsplus + .stack_as_slice() .iter() .flatten() .all(|sr| !sr.is_borrowed()), @@ -2898,6 +3266,13 @@ impl ExecutingFrame<'_> { self.push_value(owner); Ok(None) } else { + self.deoptimize_at( + Instruction::LoadAttr { + namei: Arg::marker(), + }, + instr_idx, + cache_base, + ); self.load_attr_slow(vm, oparg) } } @@ -2920,6 +3295,13 @@ impl ExecutingFrame<'_> { self.push_value(owner); Ok(None) } else { + self.deoptimize_at( + Instruction::LoadAttr { + namei: Arg::marker(), + }, + instr_idx, + cache_base, + ); self.load_attr_slow(vm, oparg) } } @@ -2938,7 +3320,26 @@ impl ExecutingFrame<'_> { match dict.get_item_opt(attr_name, vm) { Ok(Some(_)) => true, Ok(None) => false, - Err(_) => return self.load_attr_slow(vm, oparg), + Err(_) => { + // Dict lookup error → deoptimize to safe path + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::LoadAttr { + namei: Arg::marker(), + }, + ); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code + .instructions + .read_adaptive_counter(cache_base), + ), + ); + } + return self.load_attr_slow(vm, oparg); + } } } else { false @@ -2954,6 +3355,13 @@ impl ExecutingFrame<'_> { return Ok(None); } } + self.deoptimize_at( + Instruction::LoadAttr { + namei: Arg::marker(), + }, + instr_idx, + cache_base, + ); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrInstanceValue => { @@ -2977,6 +3385,13 @@ impl ExecutingFrame<'_> { } // Not in instance dict — fall through to class lookup via slow path } + self.deoptimize_at( + Instruction::LoadAttr { + namei: Arg::marker(), + }, + instr_idx, + cache_base, + ); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrWithHint => { @@ -3003,6 +3418,13 @@ impl ExecutingFrame<'_> { return Ok(None); } + self.deoptimize_at( + Instruction::LoadAttr { + namei: Arg::marker(), + }, + instr_idx, + cache_base, + ); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrModule => { @@ -3028,6 +3450,21 @@ impl ExecutingFrame<'_> { } return Ok(None); } + // Deoptimize + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::LoadAttr { + namei: Arg::marker(), + }, + ); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrNondescriptorNoDict => { @@ -3051,6 +3488,20 @@ impl ExecutingFrame<'_> { } return Ok(None); } + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::LoadAttr { + namei: Arg::marker(), + }, + ); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrNondescriptorWithValues => { @@ -3088,6 +3539,20 @@ impl ExecutingFrame<'_> { } return Ok(None); } + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::LoadAttr { + namei: Arg::marker(), + }, + ); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrClass => { @@ -3113,6 +3578,20 @@ impl ExecutingFrame<'_> { } return Ok(None); } + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::LoadAttr { + namei: Arg::marker(), + }, + ); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrClassWithMetaclassCheck => { @@ -3141,10 +3620,20 @@ impl ExecutingFrame<'_> { } return Ok(None); } + self.deoptimize_at( + Instruction::LoadAttr { + namei: Arg::marker(), + }, + instr_idx, + cache_base, + ); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrGetattributeOverridden => { let oparg = LoadAttr::new(u32::from(arg)); + self.deoptimize(Instruction::LoadAttr { + namei: Arg::marker(), + }); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrSlot => { @@ -3170,6 +3659,20 @@ impl ExecutingFrame<'_> { } // Slot is None → AttributeError (fall through to slow path) } + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::LoadAttr { + namei: Arg::marker(), + }, + ); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrProperty => { @@ -3194,6 +3697,20 @@ impl ExecutingFrame<'_> { } } } + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::LoadAttr { + namei: Arg::marker(), + }, + ); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } self.load_attr_slow(vm, oparg) } Instruction::StoreAttrInstanceValue => { @@ -3465,9 +3982,11 @@ impl ExecutingFrame<'_> { let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); // Stack: [callable, self_or_null(NULL), arg1, ..., argN] - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let callable = self.nth_value(nargs + 1); if !self_or_null_is_some && let Some(bound_method) = callable.downcast_ref::() @@ -3520,6 +4039,9 @@ impl ExecutingFrame<'_> { self.push_value_opt(null); self.push_value(obj); } + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -3528,9 +4050,11 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let effective_nargs = nargs + u32::from(self_or_null_is_some); if effective_nargs == 2 { let callable = self.nth_value(nargs + 1); @@ -3555,6 +4079,9 @@ impl ExecutingFrame<'_> { return Ok(None); } } + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -3575,6 +4102,9 @@ impl ExecutingFrame<'_> { self.push_value_opt(null); self.push_value(obj); } + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -3593,6 +4123,9 @@ impl ExecutingFrame<'_> { self.push_value_opt(null); self.push_value(obj); } + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -3616,14 +4149,19 @@ impl ExecutingFrame<'_> { self.push_value_opt(null); self.push_value(obj); } + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } Instruction::CallBuiltinO => { let nargs: u32 = arg.into(); - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); if callable @@ -3645,14 +4183,19 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } Instruction::CallBuiltinFast => { let nargs: u32 = arg.into(); - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); if callable @@ -3673,6 +4216,9 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -3703,6 +4249,9 @@ impl ExecutingFrame<'_> { self.push_value(result); Ok(None) } else { + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -3712,9 +4261,11 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let callable = self.nth_value(nargs + 1); if !self_or_null_is_some && let Some(bound_method) = callable.downcast_ref::() @@ -3745,6 +4296,9 @@ impl ExecutingFrame<'_> { let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } else { + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -3753,11 +4307,12 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, self_or_null, item] - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - 2].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); let callable = self.nth_value(2); - let self_is_exact_list = stack[stack_len - 2] + let self_is_exact_list = self + .localsplus + .stack_index(stack_len - 2) .as_ref() .is_some_and(|obj| obj.class().is(vm.ctx.types.list_type)); let is_list_append = @@ -3787,6 +4342,9 @@ impl ExecutingFrame<'_> { self.push_value(item); } } + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -3794,9 +4352,8 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); if nargs == 0 { // Stack: [callable, self_or_null] — peek to get func ptr - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - 1].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self.localsplus.stack_index(stack_len - 1).is_some(); let callable = self.nth_value(1); let func = if self_or_null_is_some { callable @@ -3817,16 +4374,17 @@ impl ExecutingFrame<'_> { return Ok(None); } } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); + self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorO => { let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, self_or_null, arg1] - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - 2].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); let callable = self.nth_value(2); let func = if self_or_null_is_some { callable @@ -3848,15 +4406,19 @@ impl ExecutingFrame<'_> { return Ok(None); } } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); + self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorFast => { let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let func = if self_or_null_is_some { callable .downcast_ref::() @@ -3880,8 +4442,10 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); + self.execute_call_vectorcall(nargs, vm) } Instruction::CallBuiltinClass => { let nargs: u32 = arg.into(); @@ -3908,8 +4472,7 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallAllocAndEnterInit => { let instr_idx = self.lasti() as usize - 1; @@ -3917,9 +4480,11 @@ impl ExecutingFrame<'_> { let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); if !self_or_null_is_some && cached_version != 0 && let Some(cls) = callable.downcast_ref::() @@ -3967,16 +4532,20 @@ impl ExecutingFrame<'_> { return Ok(None); } } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); + self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorFastWithKeywords => { // Native function interface is uniform regardless of keyword support let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let func = if self_or_null_is_some { callable .downcast_ref::() @@ -4000,15 +4569,19 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); + self.execute_call_vectorcall(nargs, vm) } Instruction::CallBuiltinFastWithKeywords => { // Native function interface is uniform regardless of keyword support let nargs: u32 = arg.into(); - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); if callable @@ -4029,14 +4602,18 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); + self.execute_call_vectorcall(nargs, vm) } Instruction::CallNonPyGeneral => { let nargs: u32 = arg.into(); - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let callable = self.nth_value(nargs + 1); if callable.downcast_ref::().is_some() || callable.downcast_ref::().is_some() @@ -4103,6 +4680,9 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } + self.deoptimize(Instruction::CallKw { + argc: Arg::marker(), + }); let args = self.collect_keyword_args(nargs); self.execute_call(args, vm) } @@ -4112,9 +4692,11 @@ impl ExecutingFrame<'_> { let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 2].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 2) + .is_some(); let callable = self.nth_value(nargs + 2); if !self_or_null_is_some && let Some(bound_method) = callable.downcast_ref::() @@ -4150,14 +4732,19 @@ impl ExecutingFrame<'_> { return Ok(None); } } + self.deoptimize(Instruction::CallKw { + argc: Arg::marker(), + }); let args = self.collect_keyword_args(nargs); self.execute_call(args, vm) } Instruction::CallKwNonPy => { let nargs: u32 = arg.into(); - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 2].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 2) + .is_some(); let callable = self.nth_value(nargs + 2); if callable.downcast_ref::().is_some() || callable.downcast_ref::().is_some() @@ -4241,6 +4828,22 @@ impl ExecutingFrame<'_> { return Ok(None); } } + // Deoptimize + unsafe { + self.code.instructions.replace_op( + self.lasti() as usize - 1, + Instruction::LoadSuperAttr { + namei: Arg::marker(), + }, + ); + let cache_base = self.lasti() as usize; + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } let oparg = LoadSuperAttr::new(oparg); self.load_super_attr(vm, oparg) } @@ -4308,6 +4911,22 @@ impl ExecutingFrame<'_> { return Ok(None); } } + // Deoptimize + unsafe { + self.code.instructions.replace_op( + self.lasti() as usize - 1, + Instruction::LoadSuperAttr { + namei: Arg::marker(), + }, + ); + let cache_base = self.lasti() as usize; + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } let oparg = LoadSuperAttr::new(oparg); self.load_super_attr(vm, oparg) } @@ -4755,8 +5374,8 @@ impl ExecutingFrame<'_> { } Instruction::InstrumentedYieldValue => { debug_assert!( - self.state - .stack + self.localsplus + .stack_as_slice() .iter() .flatten() .all(|sr| !sr.is_borrowed()), @@ -5021,8 +5640,8 @@ impl ExecutingFrame<'_> { // Fire LINE event only if line changed if let Some((loc, _)) = self.code.locations.get(idx) { let line = loc.line.get() as u32; - if line != self.state.prev_line && line > 0 { - self.state.prev_line = line; + if line != *self.prev_line && line > 0 { + *self.prev_line = line; monitoring::fire_line(vm, self.code, offset, line)?; } } @@ -5316,8 +5935,8 @@ impl ExecutingFrame<'_> { } // 1. Pop stack to entry.depth - while self.state.stack.len() > entry.depth as usize { - self.state.stack.pop(); + while self.localsplus.stack_len() > entry.depth as usize { + let _ = self.localsplus.stack_pop(); } // 2. If push_lasti=true (SETUP_CLEANUP), push lasti before exception @@ -5521,24 +6140,19 @@ impl ExecutingFrame<'_> { #[inline] fn execute_call_vectorcall(&mut self, nargs: u32, vm: &VirtualMachine) -> FrameResult { let nargs_usize = nargs as usize; - let stack_len = self.state.stack.len(); + let stack_len = self.localsplus.stack_len(); + debug_assert!( + stack_len >= nargs_usize + 2, + "CALL stack underflow: need callable + self_or_null + {nargs_usize} args, have {stack_len}" + ); let callable_idx = stack_len - nargs_usize - 2; let self_or_null_idx = stack_len - nargs_usize - 1; let args_start = stack_len - nargs_usize; - // Check if callable has vectorcall slot - let has_vectorcall = self.state.stack[callable_idx] - .as_ref() - .is_some_and(|sr| sr.as_object().class().slots.vectorcall.load().is_some()); - - if !has_vectorcall { - // Fallback to existing FuncArgs path - let args = self.collect_positional_args(nargs); - return self.execute_call(args, vm); - } - - // Build args slice: [self_or_null?, arg1, ..., argN] - let self_or_null = self.state.stack[self_or_null_idx] + // Build args: [self?, arg1, ..., argN] + let self_or_null = self + .localsplus + .stack_index_mut(self_or_null_idx) .take() .map(|sr| sr.to_pyobj()); let has_self = self_or_null.is_some(); @@ -5553,13 +6167,24 @@ impl ExecutingFrame<'_> { args_vec.push(self_val); } for stack_idx in args_start..stack_len { - let val = self.state.stack[stack_idx].take().unwrap().to_pyobj(); + let val = self + .localsplus + .stack_index_mut(stack_idx) + .take() + .unwrap() + .to_pyobj(); args_vec.push(val); } - let callable_obj = self.state.stack[callable_idx].take().unwrap().to_pyobj(); - self.state.stack.truncate(callable_idx); + let callable_obj = self + .localsplus + .stack_index_mut(callable_idx) + .take() + .unwrap() + .to_pyobj(); + self.localsplus.stack_truncate(callable_idx); + // invoke_vectorcall falls back to FuncArgs if no vectorcall slot let result = callable_obj.vectorcall(args_vec, effective_nargs, None, vm)?; self.push_value(result); Ok(None) @@ -5576,50 +6201,28 @@ impl ExecutingFrame<'_> { .downcast_ref::() .expect("kwarg names should be tuple"); let kw_count = kwarg_names_tuple.len(); + debug_assert!(kw_count <= nargs_usize, "CALL_KW kw_count exceeds nargs"); - let stack_len = self.state.stack.len(); + let stack_len = self.localsplus.stack_len(); + debug_assert!( + stack_len >= nargs_usize + 2, + "CALL_KW stack underflow: need callable + self_or_null + {nargs_usize} args, have {stack_len}" + ); let callable_idx = stack_len - nargs_usize - 2; let self_or_null_idx = stack_len - nargs_usize - 1; let args_start = stack_len - nargs_usize; - // Check if callable has vectorcall slot - let has_vectorcall = self.state.stack[callable_idx] - .as_ref() - .is_some_and(|sr| sr.as_object().class().slots.vectorcall.load().is_some()); - - if !has_vectorcall { - // Fallback: reconstruct kwarg_names iterator and use existing path - let kwarg_names_iter = kwarg_names_tuple.as_slice().iter().map(|pyobj| { - pyobj - .downcast_ref::() - .unwrap() - .as_str() - .to_owned() - }); - let args = self.pop_multiple(nargs_usize); - let func_args = FuncArgs::with_kwargs_names(args, kwarg_names_iter); - // pop self_or_null and callable - let self_or_null = self.pop_value_opt(); - let callable = self.pop_value(); - let final_args = if let Some(self_val) = self_or_null { - let mut args = func_args; - args.prepend_arg(self_val); - args - } else { - func_args - }; - let value = callable.call(final_args, vm)?; - self.push_value(value); - return Ok(None); - } - // Build args: [self?, pos_arg1, ..., pos_argM, kw_val1, ..., kw_valK] - let self_or_null = self.state.stack[self_or_null_idx] + let self_or_null = self + .localsplus + .stack_index_mut(self_or_null_idx) .take() .map(|sr| sr.to_pyobj()); let has_self = self_or_null.is_some(); - let pos_count = nargs_usize - kw_count; + let pos_count = nargs_usize + .checked_sub(kw_count) + .expect("CALL_KW: kw_count exceeds nargs"); let effective_nargs = if has_self { pos_count + 1 } else { pos_count }; // Build the full args slice: positional (including self) + kwarg values @@ -5629,13 +6232,24 @@ impl ExecutingFrame<'_> { args_vec.push(self_val); } for stack_idx in args_start..stack_len { - let val = self.state.stack[stack_idx].take().unwrap().to_pyobj(); + let val = self + .localsplus + .stack_index_mut(stack_idx) + .take() + .unwrap() + .to_pyobj(); args_vec.push(val); } - let callable_obj = self.state.stack[callable_idx].take().unwrap().to_pyobj(); - self.state.stack.truncate(callable_idx); + let callable_obj = self + .localsplus + .stack_index_mut(callable_idx) + .take() + .unwrap() + .to_pyobj(); + self.localsplus.stack_truncate(callable_idx); + // invoke_vectorcall falls back to FuncArgs if no vectorcall slot let kwnames = kwarg_names_tuple.as_slice(); let result = callable_obj.vectorcall(args_vec, effective_nargs, Some(kwnames), vm)?; self.push_value(result); @@ -5821,7 +6435,7 @@ impl ExecutingFrame<'_> { let mut elements = elements; // Elements on stack from right-to-left: - self.state.stack.extend( + self.localsplus.stack_extend( elements .drain(before + middle..) .rev() @@ -5833,7 +6447,7 @@ impl ExecutingFrame<'_> { self.push_value(t.into()); // Lastly the first reversed values: - self.state.stack.extend( + self.localsplus.stack_extend( elements .into_iter() .rev() @@ -6165,7 +6779,7 @@ impl ExecutingFrame<'_> { Err(vm.new_value_error(msg)) } PyIterReturn::StopIteration(_) => { - self.state.stack.extend( + self.localsplus.stack_extend( elements .into_iter() .rev() @@ -6813,6 +7427,30 @@ impl ExecutingFrame<'_> { } } + /// Deoptimize: replace specialized op with its base adaptive op and reset + /// the adaptive counter. Computes instr_idx/cache_base from lasti(). + #[inline] + fn deoptimize(&mut self, base_op: Instruction) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + self.deoptimize_at(base_op, instr_idx, cache_base); + } + + /// Deoptimize with explicit indices (for specialized handlers that already + /// have instr_idx/cache_base in scope). + #[inline] + fn deoptimize_at(&mut self, base_op: Instruction, instr_idx: usize, cache_base: usize) { + unsafe { + self.code.instructions.replace_op(instr_idx, base_op); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + } + /// Execute a specialized binary op on two int operands. /// Fallback to generic binary op if either operand is not an exact int. #[inline] @@ -6879,9 +7517,11 @@ impl ExecutingFrame<'_> { // Stack: [callable, self_or_null, arg1, ..., argN] // callable is at position nargs + 1 from top // self_or_null is at position nargs from top - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let callable = self.nth_value(nargs + 1); if let Some(func) = callable.downcast_ref::() { @@ -7083,9 +7723,11 @@ impl ExecutingFrame<'_> { } // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] // callable is at position nargs + 2 from top - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 2].is_some(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 2) + .is_some(); let callable = self.nth_value(nargs + 2); if let Some(func) = callable.downcast_ref::() { @@ -7674,7 +8316,7 @@ impl ExecutingFrame<'_> { #[inline] #[track_caller] fn push_stackref_opt(&mut self, obj: Option) { - match self.state.stack.try_push(obj) { + match self.localsplus.stack_try_push(obj) { Ok(()) => {} Err(_e) => self.fatal("tried to push value onto stack but overflowed max_stackdepth"), } @@ -7712,10 +8354,10 @@ impl ExecutingFrame<'_> { /// Pop a raw stackref from the stack, returning None if the stack slot is NULL. #[inline] fn pop_stackref_opt(&mut self) -> Option { - match self.state.stack.pop() { - Some(slot) => slot, - None => self.fatal("tried to pop from empty stack"), + if self.localsplus.stack_is_empty() { + self.fatal("tried to pop from empty stack"); } + self.localsplus.stack_pop() } /// Pop a raw stackref from the stack. Panics if NULL. @@ -7892,7 +8534,7 @@ impl ExecutingFrame<'_> { /// Pop multiple values from the stack. Panics if any slot is NULL. fn pop_multiple(&mut self, count: usize) -> impl ExactSizeIterator + '_ { - let stack_len = self.state.stack.len(); + let stack_len = self.localsplus.stack_len(); if count > stack_len { let instr = self.code.instructions.get(self.lasti() as usize); let op_name = instr @@ -7908,7 +8550,7 @@ impl ExecutingFrame<'_> { self.code.source_path() ); } - self.state.stack.drain(stack_len - count..).map(|obj| { + self.localsplus.stack_drain(stack_len - count).map(|obj| { expect_unchecked(obj, "pop_multiple but null found. This is a compiler bug.").to_pyobj() }) } @@ -7916,7 +8558,7 @@ impl ExecutingFrame<'_> { #[inline] fn replace_top(&mut self, top: Option) -> Option { let mut slot = top.map(PyStackRef::new_owned); - let last = self.state.stack.last_mut().unwrap(); + let last = self.localsplus.stack_last_mut().unwrap(); core::mem::swap(last, &mut slot); slot.map(|sr| sr.to_pyobj()) } @@ -7924,18 +8566,18 @@ impl ExecutingFrame<'_> { #[inline] #[track_caller] fn top_value(&self) -> &PyObject { - match &*self.state.stack { - [.., Some(last)] => last.as_object(), - [.., None] => self.fatal("tried to get top of stack but got NULL"), - [] => self.fatal("tried to get top of stack but stack is empty"), + match self.localsplus.stack_last() { + Some(Some(last)) => last.as_object(), + Some(None) => self.fatal("tried to get top of stack but got NULL"), + None => self.fatal("tried to get top of stack but stack is empty"), } } #[inline] #[track_caller] fn nth_value(&self, depth: u32) -> &PyObject { - let stack = &self.state.stack; - match &stack[stack.len() - depth as usize - 1] { + let idx = self.localsplus.stack_len() - depth as usize - 1; + match self.localsplus.stack_index(idx) { Some(obj) => obj.as_object(), None => unsafe { core::hint::unreachable_unchecked() }, } @@ -7952,21 +8594,26 @@ impl ExecutingFrame<'_> { impl fmt::Debug for Frame { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let state = self.state.lock(); - let stack_str = state.stack.iter().fold(String::new(), |mut s, slot| { - match slot { - Some(elem) if elem.downcastable::() => { - s.push_str("\n > {frame}"); - } - Some(elem) => { - core::fmt::write(&mut s, format_args!("\n > {elem:?}")).unwrap(); - } - None => { - s.push_str("\n > NULL"); + // SAFETY: Debug is best-effort; concurrent mutation is unlikely + // and would only affect debug output. + let localsplus = unsafe { &*self.localsplus.get() }; + let stack_str = localsplus + .stack_as_slice() + .iter() + .fold(String::new(), |mut s, slot| { + match slot { + Some(elem) if elem.downcastable::() => { + s.push_str("\n > {frame}"); + } + Some(elem) => { + core::fmt::write(&mut s, format_args!("\n > {elem:?}")).unwrap(); + } + None => { + s.push_str("\n > NULL"); + } } - } - s - }); + s + }); // TODO: fix this up write!( f, From cc3fd09611c9e8d294e6d46d08e734032a6af00f Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 00:22:40 +0900 Subject: [PATCH 31/31] Drop read lock before key_eq in dict get_hint Move key_eq call outside the read lock guard to avoid potential deadlock when Python __eq__ re-enters dict mutation paths. Matches the existing pattern in lookup(). --- crates/vm/src/dict_inner.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/crates/vm/src/dict_inner.rs b/crates/vm/src/dict_inner.rs index 139e9e57ad6..763fa856319 100644 --- a/crates/vm/src/dict_inner.rs +++ b/crates/vm/src/dict_inner.rs @@ -363,12 +363,19 @@ impl Dict { key: &K, hint: usize, ) -> PyResult> { - let inner = self.read(); - let Some(Some(entry)) = inner.entries.get(hint) else { - return Ok(None); + let (entry_key, entry_value) = { + let inner = self.read(); + let Some(Some(entry)) = inner.entries.get(hint) else { + return Ok(None); + }; + if key.key_is(&entry.key) { + return Ok(Some(entry.value.clone())); + } + (entry.key.clone(), entry.value.clone()) }; - if key.key_is(&entry.key) || key.key_eq(vm, &entry.key)? { - Ok(Some(entry.value.clone())) + // key_eq may run Python __eq__, so must be outside the lock. + if key.key_eq(vm, &entry_key)? { + Ok(Some(entry_value)) } else { Ok(None) }