From 687e99f02b119b16d57b44fa470a732e44fe1c4e Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 13:46:35 +0900 Subject: [PATCH 01/20] Add debug_assert to invoke_exact_args, lazy func_version reassignment - Add debug_assert preconditions in invoke_exact_args - Add get_version_for_current_state() for lazy version reassignment after func_version invalidation - Document NEXT_TYPE_VERSION overflow policy --- crates/vm/src/builtins/function.rs | 26 ++++++++++++++++++++++++++ crates/vm/src/builtins/type.rs | 7 ++++++- crates/vm/src/frame.rs | 2 +- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index e0b7116553a..d3f54f5a8b6 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -603,6 +603,22 @@ impl Py { self.func_version.load(Relaxed) } + /// _PyFunction_GetVersionForCurrentState + /// Returns the current version, assigning a fresh one if previously invalidated. + /// Returns 0 if the version counter has overflowed. + pub fn get_version_for_current_state(&self) -> u32 { + let v = self.func_version.load(Relaxed); + if v != 0 { + return v; + } + let new_v = FUNC_VERSION_COUNTER.fetch_add(1, Relaxed); + if new_v == 0 { + return 0; // Counter overflow + } + self.func_version.store(new_v, Relaxed); + new_v + } + /// Check if this function is eligible for exact-args call specialization. /// Returns true if: no VARARGS, no VARKEYWORDS, no kwonly args, not generator/coroutine, /// and effective_nargs matches co_argcount. @@ -627,6 +643,16 @@ impl Py { pub fn invoke_exact_args(&self, args: &[PyObjectRef], vm: &VirtualMachine) -> PyResult { let code: PyRef = (*self.code).to_owned(); + debug_assert_eq!(args.len(), code.arg_count as usize); + debug_assert!(code.flags.contains(bytecode::CodeFlags::NEWLOCALS)); + debug_assert!(!code.flags.intersects( + bytecode::CodeFlags::VARARGS + | bytecode::CodeFlags::VARKEYWORDS + | bytecode::CodeFlags::GENERATOR + | bytecode::CodeFlags::COROUTINE + )); + debug_assert_eq!(code.kwonlyarg_count, 0); + let frame = Frame::new( code.clone(), Scope::new(None, self.globals.clone()), diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index d43bbd8fc3b..f69163bd8ca 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -55,6 +55,10 @@ pub struct PyType { pub tp_version_tag: AtomicU32, } +/// Monotonic counter for type version tags. Once it reaches `u32::MAX`, +/// `assign_version_tag()` returns 0 permanently, disabling new inline-cache +/// entries but not invalidating correctness (cache misses fall back to the +/// generic path). static NEXT_TYPE_VERSION: AtomicU32 = AtomicU32::new(1); unsafe impl crate::object::Traverse for PyType { @@ -199,7 +203,8 @@ fn is_subtype_with_mro(a_mro: &[PyTypeRef], a: &Py, b: &Py) -> b } impl PyType { - /// Assign a fresh version tag. Returns 0 on overflow (all caches invalidated). + /// Assign a fresh version tag. Returns 0 if the version counter has been + /// exhausted, in which case no new cache entries can be created. pub fn assign_version_tag(&self) -> u32 { loop { let current = NEXT_TYPE_VERSION.load(Ordering::Relaxed); diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index e35efff6f19..4f1feb02925 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4765,7 +4765,7 @@ impl ExecutingFrame<'_> { let callable = self.nth_value(nargs + 1); if let Some(func) = callable.downcast_ref::() { - let version = func.func_version(); + let version = func.get_version_for_current_state(); if version == 0 { unsafe { self.code From 81d307b106c7759130f4aa1b4a8ebcf2d7471f47 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 1 Mar 2026 21:52:18 +0900 Subject: [PATCH 02/20] working --- crates/vm/src/builtins/list.rs | 17 ++ crates/vm/src/builtins/range.rs | 13 ++ crates/vm/src/builtins/tuple.rs | 18 ++ crates/vm/src/dict_inner.rs | 17 +- crates/vm/src/frame.rs | 394 +++++++++++++++++++++++++++++++- 5 files changed, 456 insertions(+), 3 deletions(-) diff --git a/crates/vm/src/builtins/list.rs b/crates/vm/src/builtins/list.rs index aa25af58e9b..84d7a4e309c 100644 --- a/crates/vm/src/builtins/list.rs +++ b/crates/vm/src/builtins/list.rs @@ -637,6 +637,23 @@ impl PyListIterator { } } +impl PyListIterator { + /// Fast path for FOR_ITER specialization. + pub(crate) fn fast_next(&self) -> Option { + self.internal + .lock() + .next(|list, pos| { + let vec = list.borrow_vec(); + Ok(PyIterReturn::from_result(vec.get(pos).cloned().ok_or(None))) + }) + .ok() + .and_then(|r| match r { + PyIterReturn::Return(v) => Some(v), + PyIterReturn::StopIteration(_) => None, + }) + } +} + impl SelfIter for PyListIterator {} impl IterNext for PyListIterator { fn next(zelf: &Py, _vm: &VirtualMachine) -> PyResult { diff --git a/crates/vm/src/builtins/range.rs b/crates/vm/src/builtins/range.rs index 0d0b5ccdd5d..af74ce121c2 100644 --- a/crates/vm/src/builtins/range.rs +++ b/crates/vm/src/builtins/range.rs @@ -660,6 +660,19 @@ impl PyRangeIterator { } } +impl PyRangeIterator { + /// Fast path for FOR_ITER specialization. Returns the next isize value + /// without allocating PyInt or PyIterReturn. + pub(crate) fn fast_next(&self) -> Option { + let index = self.index.fetch_add(1); + if index < self.length { + Some(self.start + (index as isize) * self.step) + } else { + None + } + } +} + impl SelfIter for PyRangeIterator {} impl IterNext for PyRangeIterator { fn next(zelf: &Py, vm: &VirtualMachine) -> PyResult { diff --git a/crates/vm/src/builtins/tuple.rs b/crates/vm/src/builtins/tuple.rs index 046506f6f4d..8ca2f74a3bf 100644 --- a/crates/vm/src/builtins/tuple.rs +++ b/crates/vm/src/builtins/tuple.rs @@ -572,6 +572,24 @@ impl PyTupleIterator { } } +impl PyTupleIterator { + /// Fast path for FOR_ITER specialization. + pub(crate) fn fast_next(&self) -> Option { + self.internal + .lock() + .next(|tuple, pos| { + Ok(PyIterReturn::from_result( + tuple.get(pos).cloned().ok_or(None), + )) + }) + .ok() + .and_then(|r| match r { + PyIterReturn::Return(v) => Some(v), + PyIterReturn::StopIteration(_) => None, + }) + } +} + impl SelfIter for PyTupleIterator {} impl IterNext for PyTupleIterator { fn next(zelf: &Py, _vm: &VirtualMachine) -> PyResult { diff --git a/crates/vm/src/dict_inner.rs b/crates/vm/src/dict_inner.rs index 34c98ad9c75..9ed3c222fe0 100644 --- a/crates/vm/src/dict_inner.rs +++ b/crates/vm/src/dict_inner.rs @@ -17,7 +17,9 @@ use crate::{ object::{Traverse, TraverseFn}, }; use alloc::fmt; -use core::{mem::size_of, ops::ControlFlow}; +use core::mem::size_of; +use core::ops::ControlFlow; +use core::sync::atomic::{AtomicU64, Ordering::Relaxed}; use num_traits::ToPrimitive; // HashIndex is intended to be same size with hash::PyHash @@ -34,6 +36,7 @@ type EntryIndex = usize; pub struct Dict { inner: PyRwLock>, + version: AtomicU64, } unsafe impl Traverse for Dict { @@ -98,6 +101,7 @@ impl Clone for Dict { fn clone(&self) -> Self { Self { inner: PyRwLock::new(self.inner.read().clone()), + version: AtomicU64::new(0), } } } @@ -111,6 +115,7 @@ impl Default for Dict { indices: vec![IndexEntry::FREE; 8], entries: Vec::new(), }), + version: AtomicU64::new(0), } } } @@ -254,6 +259,16 @@ impl DictInner { type PopInnerResult = ControlFlow>>; impl Dict { + /// Monotonically increasing version counter for mutation tracking. + pub fn version(&self) -> u64 { + self.version.load(Relaxed) + } + + /// Bump the version counter after any mutation. + fn bump_version(&self) { + self.version.fetch_add(1, Relaxed); + } + fn read(&self) -> PyRwLockReadGuard<'_, DictInner> { self.inner.read() } diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 4f1feb02925..c32545372fd 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -10,8 +10,9 @@ use crate::{ asyncgenerator::PyAsyncGenWrappedValue, frame::stack_analysis, function::{PyCell, PyCellRef, PyFunction}, + list::PyListIterator, range::PyRangeIterator, - tuple::{PyTuple, PyTupleRef}, + tuple::{PyTuple, PyTupleIterator, PyTupleRef}, }, bytecode::{ self, ADAPTIVE_BACKOFF_VALUE, Arg, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod, @@ -38,6 +39,7 @@ use core::sync::atomic::Ordering::{Acquire, Relaxed}; use indexmap::IndexMap; use itertools::Itertools; use malachite_bigint::BigInt; +use num_traits::Zero; use rustpython_common::atomic::{PyAtomic, Radium}; use rustpython_common::{ boxvec::BoxVec, @@ -1408,7 +1410,22 @@ impl ExecutingFrame<'_> { self.push_value(matched); Ok(None) } - Instruction::CompareOp { op } => self.execute_compare(vm, op.get(arg)), + Instruction::CompareOp { op } => { + let op_val = op.get(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_compare_op(vm, op_val, instr_idx, cache_base); + } + self.execute_compare(vm, op_val) + } Instruction::ContainsOp(invert) => { let b = self.pop_value(); let a = self.pop_value(); @@ -1593,6 +1610,18 @@ impl ExecutingFrame<'_> { Instruction::ForIter { .. } => { // Relative forward jump: target = lasti + caches + delta let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_for_iter(vm, instr_idx, cache_base); + } self.execute_for_iter(vm, target)?; Ok(None) } @@ -2618,6 +2647,18 @@ impl ExecutingFrame<'_> { Ok(None) } Instruction::ToBool => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_to_bool(vm, instr_idx, cache_base); + } let obj = self.pop_value(); let bool_val = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(bool_val).into()); @@ -3067,6 +3108,189 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } } + Instruction::CompareOpInt => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_int), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let op = self.compare_op_from_arg(arg); + let result = op.eval_ord(a_int.as_bigint().cmp(b_int.as_bigint())); + self.pop_value(); + self.pop_value(); + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } else { + self.deoptimize_compare_op(); + let op = bytecode::ComparisonOperator::try_from(u32::from(arg)) + .unwrap_or(bytecode::ComparisonOperator::Equal); + self.execute_compare(vm, op) + } + } + Instruction::CompareOpFloat => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_f), Some(b_f)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let op = self.compare_op_from_arg(arg); + let result = a_f + .to_f64() + .partial_cmp(&b_f.to_f64()) + .is_some_and(|ord| op.eval_ord(ord)); + self.pop_value(); + self.pop_value(); + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } else { + self.deoptimize_compare_op(); + let op = bytecode::ComparisonOperator::try_from(u32::from(arg)) + .unwrap_or(bytecode::ComparisonOperator::Equal); + self.execute_compare(vm, op) + } + } + Instruction::CompareOpStr => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_str), Some(b_str)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let op = self.compare_op_from_arg(arg); + let result = op.eval_ord(a_str.as_wtf8().cmp(b_str.as_wtf8())); + self.pop_value(); + self.pop_value(); + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } else { + self.deoptimize_compare_op(); + let op = bytecode::ComparisonOperator::try_from(u32::from(arg)) + .unwrap_or(bytecode::ComparisonOperator::Equal); + self.execute_compare(vm, op) + } + } + Instruction::ToBoolBool => { + let obj = self.top_value(); + if obj.class().is(vm.ctx.types.bool_type) { + // Already a bool, no-op + Ok(None) + } else { + self.deoptimize_to_bool(); + let obj = self.pop_value(); + let result = obj.try_to_bool(vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } + } + Instruction::ToBoolInt => { + let obj = self.top_value(); + if let Some(int_val) = obj.downcast_ref_if_exact::(vm) { + let result = !int_val.as_bigint().is_zero(); + self.pop_value(); + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } else { + self.deoptimize_to_bool(); + let obj = self.pop_value(); + let result = obj.try_to_bool(vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } + } + Instruction::ToBoolNone => { + let obj = self.top_value(); + if obj.class().is(vm.ctx.types.none_type) { + self.pop_value(); + self.push_value(vm.ctx.new_bool(false).into()); + Ok(None) + } else { + self.deoptimize_to_bool(); + let obj = self.pop_value(); + let result = obj.try_to_bool(vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } + } + Instruction::ToBoolList => { + let obj = self.top_value(); + if let Some(list) = obj.downcast_ref_if_exact::(vm) { + let result = !list.borrow_vec().is_empty(); + self.pop_value(); + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } else { + self.deoptimize_to_bool(); + let obj = self.pop_value(); + let result = obj.try_to_bool(vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } + } + Instruction::ToBoolStr => { + let obj = self.top_value(); + if let Some(s) = obj.downcast_ref_if_exact::(vm) { + let result = !s.is_empty(); + self.pop_value(); + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } else { + self.deoptimize_to_bool(); + let obj = self.pop_value(); + let result = obj.try_to_bool(vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } + } + Instruction::ForIterRange => { + let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let iter = self.top_value(); + if let Some(range_iter) = iter.downcast_ref_if_exact::(vm) { + if let Some(value) = range_iter.fast_next() { + self.push_value(vm.ctx.new_int(value).into()); + } else { + self.for_iter_jump_on_exhausted(target); + } + Ok(None) + } else { + self.deoptimize_for_iter(); + self.execute_for_iter(vm, target)?; + Ok(None) + } + } + Instruction::ForIterList => { + let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let iter = self.top_value(); + if let Some(list_iter) = iter.downcast_ref_if_exact::(vm) { + if let Some(value) = list_iter.fast_next() { + self.push_value(value); + } else { + self.for_iter_jump_on_exhausted(target); + } + Ok(None) + } else { + self.deoptimize_for_iter(); + self.execute_for_iter(vm, target)?; + Ok(None) + } + } + Instruction::ForIterTuple => { + let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let iter = self.top_value(); + if let Some(tuple_iter) = iter.downcast_ref_if_exact::(vm) { + if let Some(value) = tuple_iter.fast_next() { + self.push_value(value); + } else { + self.for_iter_jump_on_exhausted(target); + } + Ok(None) + } else { + self.deoptimize_for_iter(); + self.execute_for_iter(vm, target)?; + Ok(None) + } + } // All INSTRUMENTED_* opcodes delegate to a cold function to keep // the hot instruction loop free of monitoring overhead. _ => self.execute_instrumented(instruction, arg, vm), @@ -4805,6 +5029,172 @@ impl ExecutingFrame<'_> { } } + fn specialize_compare_op( + &mut self, + vm: &VirtualMachine, + _op: bytecode::ComparisonOperator, + instr_idx: usize, + cache_base: usize, + ) { + let b = self.top_value(); + let a = self.nth_value(1); + + let new_op = if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::CompareOpInt) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::CompareOpFloat) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::CompareOpStr) + } else { + None + }; + + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + } + + /// Recover the ComparisonOperator from the instruction arg byte. + /// `replace_op` preserves the arg byte, so the original op remains accessible. + fn compare_op_from_arg(&self, arg: bytecode::OpArg) -> PyComparisonOp { + bytecode::ComparisonOperator::try_from(u32::from(arg)) + .unwrap_or(bytecode::ComparisonOperator::Equal) + .into() + } + + fn deoptimize_compare_op(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::CompareOp { op: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + + fn specialize_to_bool(&mut self, vm: &VirtualMachine, instr_idx: usize, _cache_base: usize) { + let obj = self.top_value(); + let cls = obj.class(); + + let new_op = if cls.is(vm.ctx.types.bool_type) { + Some(Instruction::ToBoolBool) + } else if cls.is(PyInt::class(&vm.ctx)) { + Some(Instruction::ToBoolInt) + } else if cls.is(vm.ctx.types.none_type) { + Some(Instruction::ToBoolNone) + } else if cls.is(PyList::class(&vm.ctx)) { + Some(Instruction::ToBoolList) + } else if cls.is(PyStr::class(&vm.ctx)) { + Some(Instruction::ToBoolStr) + } else { + None + }; + + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } + } else { + let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + } + + fn deoptimize_to_bool(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::ToBool); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + + fn specialize_for_iter(&mut self, vm: &VirtualMachine, instr_idx: usize, cache_base: usize) { + let iter = self.top_value(); + + let new_op = if iter.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::ForIterRange) + } else if iter.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::ForIterList) + } else if iter.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::ForIterTuple) + } else { + None + }; + + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + } + + fn deoptimize_for_iter(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::ForIter { + target: Arg::marker(), + }, + ); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + + /// Handle iterator exhaustion in specialized FOR_ITER handlers. + /// Skips END_FOR if present at target and jumps. + fn for_iter_jump_on_exhausted(&mut self, target: bytecode::Label) { + let target_idx = target.0 as usize; + let jump_target = if let Some(unit) = self.code.instructions.get(target_idx) { + if matches!( + unit.op, + bytecode::Instruction::EndFor | bytecode::Instruction::InstrumentedEndFor + ) { + bytecode::Label(target.0 + 1) + } else { + target + } + } else { + target + }; + self.jump(jump_target); + } + fn load_super_attr(&mut self, vm: &VirtualMachine, oparg: LoadSuperAttr) -> FrameResult { let attr_name = self.code.names[oparg.name_idx() as usize]; From 01762234504300b2dd98b3a9767730567d32250f Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 1 Mar 2026 22:43:54 +0900 Subject: [PATCH 03/20] Add COMPARE_OP, TO_BOOL, FOR_ITER, LOAD_GLOBAL specialization - COMPARE_OP: CompareOpInt, CompareOpFloat, CompareOpStr - TO_BOOL: ToBoolBool, ToBoolInt, ToBoolNone, ToBoolList, ToBoolStr - FOR_ITER: ForIterRange, ForIterList, ForIterTuple with fast_next() - LOAD_GLOBAL: LoadGlobalModule, LoadGlobalBuiltin with dict version guard - Add version counter to Dict for mutation tracking --- crates/vm/src/builtins/dict.rs | 5 ++ crates/vm/src/dict_inner.rs | 12 +++ crates/vm/src/frame.rs | 140 +++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+) diff --git a/crates/vm/src/builtins/dict.rs b/crates/vm/src/builtins/dict.rs index 0fc615e442e..43891d3b7f7 100644 --- a/crates/vm/src/builtins/dict.rs +++ b/crates/vm/src/builtins/dict.rs @@ -79,6 +79,11 @@ impl PyDict { &self.entries } + /// Monotonically increasing version for mutation tracking. + pub(crate) fn version(&self) -> u64 { + self.entries.version() + } + /// Returns all keys as a Vec, atomically under a single read lock. /// Thread-safe: prevents "dictionary changed size during iteration" errors. pub fn keys_vec(&self) -> Vec { diff --git a/crates/vm/src/dict_inner.rs b/crates/vm/src/dict_inner.rs index 9ed3c222fe0..e5e264b3554 100644 --- a/crates/vm/src/dict_inner.rs +++ b/crates/vm/src/dict_inner.rs @@ -316,6 +316,7 @@ impl Dict { break None; } }; + self.bump_version(); Ok(()) } @@ -379,6 +380,7 @@ impl Dict { // defer dec rc core::mem::take(&mut inner.entries) }; + self.bump_version(); } /// Delete a key @@ -435,6 +437,9 @@ impl Dict { ControlFlow::Continue(()) => continue, } }; + if removed.is_some() { + self.bump_version(); + } Ok(removed.map(|entry| entry.value)) } @@ -457,6 +462,7 @@ impl Dict { break None; } }; + self.bump_version(); Ok(()) } @@ -490,6 +496,7 @@ impl Dict { value.clone(), index_entry, ); + self.bump_version(); return Ok(value); } } @@ -526,6 +533,7 @@ impl Dict { let key_obj = key.to_pyobject(vm); let ret = (key_obj.clone(), value.clone()); inner.unchecked_push(index_index, hash, key_obj, value, index_entry); + self.bump_version(); return Ok(ret); } } @@ -726,6 +734,9 @@ impl Dict { ControlFlow::Continue(()) => continue, } }; + if removed.is_some() { + self.bump_version(); + } Ok(removed) } @@ -742,6 +753,7 @@ impl Dict { // entry.index always refers valid index inner.indices.get_unchecked_mut(entry.index) } = IndexEntry::DUMMY; + self.bump_version(); Some((entry.key, entry.value)) } diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index c32545372fd..a2b6b44aa6a 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -2091,6 +2091,18 @@ impl ExecutingFrame<'_> { } Instruction::LoadGlobal(idx) => { let oparg = idx.get(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_load_global(vm, oparg, instr_idx, cache_base); + } let name = &self.code.names[(oparg >> 1) as usize]; let x = self.load_global_or_builtin(name, vm)?; self.push_value(x); @@ -3291,6 +3303,79 @@ impl ExecutingFrame<'_> { Ok(None) } } + Instruction::LoadGlobalModule => { + let oparg = u32::from(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); + let current_version = self.globals.version() as u32; + if cached_version == current_version { + // globals unchanged — name is in globals, look up only there + let name = self.code.names[(oparg >> 1) as usize]; + if let Some(x) = self.globals.get_item_opt(name, vm)? { + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); + } + Ok(None) + } else { + // Name was removed from globals + self.deoptimize_load_global(); + let x = self.load_global_or_builtin(name, vm)?; + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); + } + Ok(None) + } + } else { + self.deoptimize_load_global(); + let name = self.code.names[(oparg >> 1) as usize]; + let x = self.load_global_or_builtin(name, vm)?; + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); + } + Ok(None) + } + } + Instruction::LoadGlobalBuiltin => { + let oparg = u32::from(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); + let current_version = self.globals.version() as u32; + if cached_version == current_version { + // globals unchanged — name is NOT in globals, look up in builtins + let name = self.code.names[(oparg >> 1) as usize]; + if let Some(builtins_dict) = self.builtins.downcast_ref::() + && let Some(x) = builtins_dict.get_item_opt(name, vm)? + { + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); + } + return Ok(None); + } + // Fallback: name not found or builtins not a dict + self.deoptimize_load_global(); + let x = self.load_global_or_builtin(name, vm)?; + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); + } + Ok(None) + } else { + self.deoptimize_load_global(); + let name = self.code.names[(oparg >> 1) as usize]; + let x = self.load_global_or_builtin(name, vm)?; + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); + } + Ok(None) + } + } // All INSTRUMENTED_* opcodes delegate to a cold function to keep // the hot instruction loop free of monitoring overhead. _ => self.execute_instrumented(instruction, arg, vm), @@ -5195,6 +5280,61 @@ impl ExecutingFrame<'_> { self.jump(jump_target); } + fn specialize_load_global( + &mut self, + vm: &VirtualMachine, + oparg: u32, + instr_idx: usize, + cache_base: usize, + ) { + let name = self.code.names[(oparg >> 1) as usize]; + // Check if name exists in globals + let in_globals = self.globals.get_item_opt(name, vm).ok().flatten().is_some(); + + let globals_version = self.globals.version() as u32; + + let new_op = if in_globals { + Some(Instruction::LoadGlobalModule) + } else if self + .builtins + .downcast_ref::() + .and_then(|b| b.get_item_opt(name, vm).ok().flatten()) + .is_some() + { + Some(Instruction::LoadGlobalBuiltin) + } else { + None + }; + + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, globals_version); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + } + + fn deoptimize_load_global(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadGlobal(Arg::marker())); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + fn load_super_attr(&mut self, vm: &VirtualMachine, oparg: LoadSuperAttr) -> FrameResult { let attr_name = self.code.names[oparg.name_idx() as usize]; From 9bb0c46c402d1d4551d91b0800aca3b6b22fbac0 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 1 Mar 2026 23:18:50 +0900 Subject: [PATCH 04/20] Add BINARY_SUBSCR, CONTAINS_OP, UNPACK_SEQUENCE, STORE_ATTR specialization - BinaryOpSubscrListInt, BinaryOpSubscrTupleInt, BinaryOpSubscrDict - ContainsOpDict, ContainsOpSet - UnpackSequenceTwoTuple, UnpackSequenceTuple, UnpackSequenceList - StoreAttrInstanceValue with type_version guard - Deoptimize bytecode for marshal serialization (original_bytes) - Separate co_code (deoptimized) from _co_code_adaptive (quickened) --- crates/vm/src/builtins/object.rs | 2 +- crates/vm/src/frame.rs | 432 ++++++++++++++++++++++++++++++- 2 files changed, 430 insertions(+), 4 deletions(-) diff --git a/crates/vm/src/builtins/object.rs b/crates/vm/src/builtins/object.rs index b6c7b263d77..8fed43cd5d7 100644 --- a/crates/vm/src/builtins/object.rs +++ b/crates/vm/src/builtins/object.rs @@ -365,7 +365,7 @@ impl PyBaseObject { } #[pyslot] - fn slot_setattro( + pub(crate) fn slot_setattro( obj: &PyObject, attr_name: &Py, value: PySetterValue, diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index a2b6b44aa6a..d570fc56666 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -25,6 +25,7 @@ use crate::{ object::{Traverse, TraverseFn}, protocol::{PyIter, PyIterReturn}, scope::Scope, + sliceable::SliceableSequenceOp, stdlib::{builtins, sys::monitoring, typing}, types::{PyComparisonOp, PyTypeFlags}, vm::{Context, PyMethod}, @@ -1427,6 +1428,19 @@ impl ExecutingFrame<'_> { self.execute_compare(vm, op_val) } Instruction::ContainsOp(invert) => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_contains_op(vm, instr_idx, cache_base); + } + let b = self.pop_value(); let a = self.pop_value(); @@ -2576,7 +2590,21 @@ impl ExecutingFrame<'_> { self.execute_set_function_attribute(vm, attr.get(arg)) } Instruction::SetupAnnotations => self.setup_annotations(vm), - Instruction::StoreAttr { idx } => self.store_attr(vm, idx.get(arg)), + Instruction::StoreAttr { idx } => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_store_attr(vm, idx.get(arg), instr_idx, cache_base); + } + self.store_attr(vm, idx.get(arg)) + } Instruction::StoreDeref(i) => { let value = self.pop_value(); self.state.cells_frees[i.get(arg) as usize].set(Some(value)); @@ -2640,7 +2668,21 @@ impl ExecutingFrame<'_> { container.set_item(&*slice, value, vm)?; Ok(None) } - Instruction::StoreSubscr => self.execute_store_subscript(vm), + Instruction::StoreSubscr => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_store_subscr(vm, instr_idx, cache_base); + } + self.execute_store_subscript(vm) + } Instruction::Swap { index } => { let len = self.state.stack.len(); debug_assert!(len > 0, "stack underflow in SWAP"); @@ -2680,7 +2722,21 @@ impl ExecutingFrame<'_> { let args = args.get(arg); self.execute_unpack_ex(vm, args.before, args.after) } - Instruction::UnpackSequence { size } => self.unpack_sequence(size.get(arg), vm), + Instruction::UnpackSequence { size } => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_unpack_sequence(vm, instr_idx, cache_base); + } + self.unpack_sequence(size.get(arg), vm) + } Instruction::WithExceptStart => { // Stack: [..., __exit__, lasti, prev_exc, exc] // Call __exit__(type, value, tb) and push result @@ -2944,6 +3000,34 @@ impl ExecutingFrame<'_> { } self.load_attr_slow(vm, oparg) } + Instruction::StoreAttrInstanceValue => { + let attr_idx = u32::from(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let attr_name = self.code.names[attr_idx as usize]; + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 + && owner.class().tp_version_tag.load(Acquire) == type_version + && let Some(dict) = owner.dict() + { + self.pop_value(); // owner + let value = self.pop_value(); + dict.set_item(attr_name, value, vm)?; + return Ok(None); + } + // Deoptimize + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::StoreAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.store_attr(vm, attr_idx) + } // Specialized BINARY_OP opcodes Instruction::BinaryOpAddInt => { let b = self.top_value(); @@ -3047,6 +3131,78 @@ impl ExecutingFrame<'_> { self.execute_bin_op(vm, bytecode::BinaryOperator::Multiply) } } + Instruction::BinaryOpSubscrListInt => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(list), Some(idx)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Ok(i) = idx.try_to_primitive::(vm) + { + let vec = list.borrow_vec(); + if let Some(pos) = vec.wrap_index(i) { + let value = vec.do_get(pos); + drop(vec); + self.pop_value(); + self.pop_value(); + self.push_value(value); + return Ok(None); + } + drop(vec); + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + return Err(vm.new_index_error("list index out of range")); + } + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) + } + Instruction::BinaryOpSubscrTupleInt => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(tuple), Some(idx)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Ok(i) = idx.try_to_primitive::(vm) + { + let elements = tuple.as_slice(); + if let Some(pos) = elements.wrap_index(i) { + let value = elements[pos].clone(); + self.pop_value(); + self.pop_value(); + self.push_value(value); + return Ok(None); + } + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + return Err(vm.new_index_error("tuple index out of range")); + } + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) + } + Instruction::BinaryOpSubscrDict => { + let b = self.top_value(); + let a = self.nth_value(1); + if let Some(dict) = a.downcast_ref_if_exact::(vm) { + match dict.get_item_opt(b, vm) { + Ok(Some(value)) => { + self.pop_value(); + self.pop_value(); + self.push_value(value); + return Ok(None); + } + Ok(None) => { + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + let key = self.pop_value(); + self.pop_value(); + return Err(vm.new_key_error(key)); + } + Err(e) => { + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + return Err(e); + } + } + } + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) + } Instruction::CallPyExactArgs => { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; @@ -3255,6 +3411,112 @@ impl ExecutingFrame<'_> { Ok(None) } } + Instruction::ContainsOpDict => { + let b = self.top_value(); // haystack + if let Some(dict) = b.downcast_ref_if_exact::(vm) { + let a = self.nth_value(1); // needle + let found = dict.get_item_opt(a, vm)?.is_some(); + self.pop_value(); + self.pop_value(); + let invert = bytecode::Invert::try_from(u32::from(arg) as u8) + .unwrap_or(bytecode::Invert::No); + let value = match invert { + bytecode::Invert::No => found, + bytecode::Invert::Yes => !found, + }; + self.push_value(vm.ctx.new_bool(value).into()); + Ok(None) + } else { + self.deoptimize_contains_op(); + let b = self.pop_value(); + let a = self.pop_value(); + let invert = bytecode::Invert::try_from(u32::from(arg) as u8) + .unwrap_or(bytecode::Invert::No); + let value = match invert { + bytecode::Invert::No => self._in(vm, &a, &b)?, + bytecode::Invert::Yes => self._not_in(vm, &a, &b)?, + }; + self.push_value(vm.ctx.new_bool(value).into()); + Ok(None) + } + } + Instruction::ContainsOpSet => { + let b = self.top_value(); // haystack + if b.downcast_ref_if_exact::(vm).is_some() { + let a = self.nth_value(1); // needle + let found = vm._contains(b, a)?; + self.pop_value(); + self.pop_value(); + let invert = bytecode::Invert::try_from(u32::from(arg) as u8) + .unwrap_or(bytecode::Invert::No); + let value = match invert { + bytecode::Invert::No => found, + bytecode::Invert::Yes => !found, + }; + self.push_value(vm.ctx.new_bool(value).into()); + Ok(None) + } else { + self.deoptimize_contains_op(); + let b = self.pop_value(); + let a = self.pop_value(); + let invert = bytecode::Invert::try_from(u32::from(arg) as u8) + .unwrap_or(bytecode::Invert::No); + let value = match invert { + bytecode::Invert::No => self._in(vm, &a, &b)?, + bytecode::Invert::Yes => self._not_in(vm, &a, &b)?, + }; + self.push_value(vm.ctx.new_bool(value).into()); + Ok(None) + } + } + Instruction::UnpackSequenceTwoTuple => { + let obj = self.top_value(); + if let Some(tuple) = obj.downcast_ref_if_exact::(vm) { + let elements = tuple.as_slice(); + if elements.len() == 2 { + let e0 = elements[0].clone(); + let e1 = elements[1].clone(); + self.pop_value(); + self.push_value(e1); + self.push_value(e0); + return Ok(None); + } + } + self.deoptimize_unpack_sequence(); + let size = u32::from(arg); + self.unpack_sequence(size, vm) + } + Instruction::UnpackSequenceTuple => { + let size = u32::from(arg) as usize; + let obj = self.top_value(); + if let Some(tuple) = obj.downcast_ref_if_exact::(vm) { + let elements = tuple.as_slice(); + if elements.len() == size { + let elems: Vec<_> = elements.to_vec(); + self.pop_value(); + self.state.stack.extend(elems.into_iter().rev().map(Some)); + return Ok(None); + } + } + self.deoptimize_unpack_sequence(); + self.unpack_sequence(size as u32, vm) + } + Instruction::UnpackSequenceList => { + let size = u32::from(arg) as usize; + let obj = self.top_value(); + if let Some(list) = obj.downcast_ref_if_exact::(vm) { + let vec = list.borrow_vec(); + if vec.len() == size { + let elems: Vec<_> = vec.to_vec(); + drop(vec); + self.pop_value(); + self.state.stack.extend(elems.into_iter().rev().map(Some)); + return Ok(None); + } + } + self.deoptimize_unpack_sequence(); + self.unpack_sequence(size as u32, vm) + } Instruction::ForIterRange => { let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); let iter = self.top_value(); @@ -5029,6 +5291,21 @@ impl ExecutingFrame<'_> { None } } + bytecode::BinaryOperator::Subscr => { + if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpSubscrListInt) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpSubscrTupleInt) + } else if a.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::BinaryOpSubscrDict) + } else { + None + } + } _ => None, }; @@ -5335,6 +5612,155 @@ impl ExecutingFrame<'_> { } } + fn specialize_contains_op(&mut self, vm: &VirtualMachine, instr_idx: usize, cache_base: usize) { + let haystack = self.top_value(); // b = TOS = haystack + let new_op = if haystack.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::ContainsOpDict) + } else if haystack.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::ContainsOpSet) + } else { + None + }; + + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + } + + fn deoptimize_contains_op(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::ContainsOp(Arg::marker())); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + + fn specialize_unpack_sequence( + &mut self, + vm: &VirtualMachine, + instr_idx: usize, + cache_base: usize, + ) { + let obj = self.top_value(); + let new_op = if let Some(tuple) = obj.downcast_ref_if_exact::(vm) { + if tuple.len() == 2 { + Some(Instruction::UnpackSequenceTwoTuple) + } else { + Some(Instruction::UnpackSequenceTuple) + } + } else if obj.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::UnpackSequenceList) + } else { + None + }; + + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + } + + fn deoptimize_unpack_sequence(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::UnpackSequence { + size: Arg::marker(), + }, + ); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + + fn specialize_store_attr( + &mut self, + _vm: &VirtualMachine, + attr_idx: bytecode::NameIdx, + instr_idx: usize, + cache_base: usize, + ) { + // TOS = owner (the object being assigned to) + let owner = self.top_value(); + let cls = owner.class(); + + // Only specialize if setattr is the default (generic_setattr) + let is_default_setattr = cls + .slots + .setattro + .load() + .is_some_and(|f| f as usize == PyBaseObject::slot_setattro as *const () as usize); + if !is_default_setattr { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; + } + + // Get or assign type version + let mut type_version = cls.tp_version_tag.load(Acquire); + if type_version == 0 { + type_version = cls.assign_version_tag(); + } + if type_version == 0 { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; + } + + // Check no data descriptor for this attr + let attr_name = self.code.names[attr_idx as usize]; + let has_data_descr = cls.get_attr(attr_name).is_some_and(|descr| { + let descr_cls = descr.class(); + descr_cls.slots.descr_get.load().is_some() && descr_cls.slots.descr_set.load().is_some() + }); + + if !has_data_descr && owner.dict().is_some() { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .replace_op(instr_idx, Instruction::StoreAttrInstanceValue); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + } + fn load_super_attr(&mut self, vm: &VirtualMachine, oparg: LoadSuperAttr) -> FrameResult { let attr_name = self.code.names[oparg.name_idx() as usize]; From 1c07777eee255e81c8787eb9406a93a81cf295b7 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 00:20:58 +0900 Subject: [PATCH 05/20] Add STORE_SUBSCR, BinaryOpAddUnicode, ToBoolAlwaysTrue, CallLen, CallIsinstance, CallType1 specialization --- crates/vm/src/frame.rs | 266 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 264 insertions(+), 2 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index d570fc56666..27f1ea18397 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1,3 +1,4 @@ +use crate::anystr::AnyStr as _; #[cfg(feature = "flame")] use crate::bytecode::InstructionMetadata; use crate::{ @@ -8,6 +9,7 @@ use crate::{ PyFloat, PyGenerator, PyInt, PyInterpolation, PyList, PySet, PySlice, PyStr, PyStrInterned, PyTemplate, PyTraceback, PyType, PyUtf8Str, asyncgenerator::PyAsyncGenWrappedValue, + builtin_func::PyNativeFunction, frame::stack_analysis, function::{PyCell, PyCellRef, PyFunction}, list::PyListIterator, @@ -17,7 +19,7 @@ use crate::{ bytecode::{ self, ADAPTIVE_BACKOFF_VALUE, Arg, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod, }, - convert::ToPyResult, + convert::{IntoObject, ToPyObject, ToPyResult}, coroutine::Coro, exceptions::ExceptionCtor, function::{ArgMapping, Either, FuncArgs}, @@ -3028,6 +3030,42 @@ impl ExecutingFrame<'_> { } self.store_attr(vm, attr_idx) } + Instruction::StoreSubscrListInt => { + // Stack: [value, obj, idx] (TOS=idx, TOS1=obj, TOS2=value) + let idx = self.pop_value(); + let obj = self.pop_value(); + let value = self.pop_value(); + if let Some(list) = obj.downcast_ref_if_exact::(vm) + && let Some(int_idx) = idx.downcast_ref_if_exact::(vm) + && let Ok(i) = int_idx.try_to_primitive::(vm) + { + let mut vec = list.borrow_vec_mut(); + if let Some(pos) = vec.wrap_index(i) { + vec[pos] = value; + return Ok(None); + } + drop(vec); + self.deoptimize_store_subscr(); + return Err(vm.new_index_error("list assignment index out of range")); + } + self.deoptimize_store_subscr(); + obj.set_item(&*idx, value, vm)?; + Ok(None) + } + Instruction::StoreSubscrDict => { + // Stack: [value, obj, idx] (TOS=idx, TOS1=obj, TOS2=value) + let idx = self.pop_value(); + let obj = self.pop_value(); + let value = self.pop_value(); + if let Some(dict) = obj.downcast_ref_if_exact::(vm) { + dict.set_item(&*idx, value, vm)?; + Ok(None) + } else { + self.deoptimize_store_subscr(); + obj.set_item(&*idx, value, vm)?; + Ok(None) + } + } // Specialized BINARY_OP opcodes Instruction::BinaryOpAddInt => { let b = self.top_value(); @@ -3131,6 +3169,23 @@ impl ExecutingFrame<'_> { self.execute_bin_op(vm, bytecode::BinaryOperator::Multiply) } } + Instruction::BinaryOpAddUnicode => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_str), Some(b_str)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let result = a_str.as_wtf8().py_add(b_str.as_wtf8()); + self.pop_value(); + self.pop_value(); + self.push_value(result.to_pyobject(vm)); + Ok(None) + } else { + self.deoptimize_binary_op(bytecode::BinaryOperator::Add); + self.execute_bin_op(vm, bytecode::BinaryOperator::Add) + } + } Instruction::BinaryOpSubscrListInt => { let b = self.top_value(); let a = self.nth_value(1); @@ -3276,6 +3331,83 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } } + Instruction::CallLen => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 1 { + // Stack: [callable, null, arg] + let obj = self.pop_value(); // arg + let _null = self.pop_value_opt(); + let callable = self.pop_value(); + let callable_tag = &*callable as *const PyObject as u32; + if cached_tag == callable_tag { + let len = obj.length(vm)?; + self.push_value(vm.ctx.new_int(len).into()); + return Ok(None); + } + // Guard failed — re-push and fallback + self.push_value(callable); + self.push_value_opt(_null); + self.push_value(obj); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallIsinstance => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 2 { + // Stack: [callable, null, obj, classinfo] + let classinfo = self.pop_value(); + let obj = self.pop_value(); + let _null = self.pop_value_opt(); + let callable = self.pop_value(); + let callable_tag = &*callable as *const PyObject as u32; + if cached_tag == callable_tag { + let result = obj.is_instance(&classinfo, vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + return Ok(None); + } + // Guard failed — re-push and fallback + self.push_value(callable); + self.push_value_opt(_null); + self.push_value(obj); + self.push_value(classinfo); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallType1 => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 1 { + // Stack: [callable, null, arg] + let obj = self.pop_value(); + let _null = self.pop_value_opt(); + let callable = self.pop_value(); + let callable_tag = &*callable as *const PyObject as u32; + if cached_tag == callable_tag { + let tp = obj.class().to_owned().into(); + self.push_value(tp); + return Ok(None); + } + // Guard failed — re-push and fallback + self.push_value(callable); + self.push_value_opt(_null); + self.push_value(obj); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } Instruction::CompareOpInt => { let b = self.top_value(); let a = self.nth_value(1); @@ -3411,6 +3543,26 @@ impl ExecutingFrame<'_> { Ok(None) } } + Instruction::ToBoolAlwaysTrue => { + // Objects without __bool__ or __len__ are always True. + // Guard: check the type hasn't gained these slots. + let obj = self.top_value(); + let slots = &obj.class().slots; + if slots.as_number.boolean.load().is_none() + && slots.as_mapping.length.load().is_none() + && slots.as_sequence.length.load().is_none() + { + self.pop_value(); + self.push_value(vm.ctx.new_bool(true).into()); + Ok(None) + } else { + self.deoptimize_to_bool(); + let obj = self.pop_value(); + let result = obj.try_to_bool(vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } + } Instruction::ContainsOpDict => { let b = self.top_value(); // haystack if let Some(dict) = b.downcast_ref_if_exact::(vm) { @@ -5261,6 +5413,10 @@ impl ExecutingFrame<'_> { && b.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::BinaryOpAddFloat) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpAddUnicode) } else { None } @@ -5337,7 +5493,7 @@ impl ExecutingFrame<'_> { fn specialize_call( &mut self, - _vm: &VirtualMachine, + vm: &VirtualMachine, nargs: u32, instr_idx: usize, cache_base: usize, @@ -5384,6 +5540,45 @@ impl ExecutingFrame<'_> { } } + // Try to specialize builtin calls + if !self_or_null_is_some { + if let Some(native) = callable.downcast_ref::() + && native.zelf.is_none() + { + let callable_tag = callable as *const PyObject as u32; + let new_op = match (native.value.name, nargs) { + ("len", 1) => Some(Instruction::CallLen), + ("isinstance", 2) => Some(Instruction::CallIsinstance), + _ => None, + }; + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } + return; + } + } + // type(x) specialization + if callable.class().is(vm.ctx.types.type_type) + && callable.is(&vm.ctx.types.type_type.as_object()) + && nargs == 1 + { + let callable_tag = callable as *const PyObject as u32; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::CallType1); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } + return; + } + } + unsafe { self.code .instructions @@ -5465,6 +5660,11 @@ impl ExecutingFrame<'_> { Some(Instruction::ToBoolList) } else if cls.is(PyStr::class(&vm.ctx)) { Some(Instruction::ToBoolStr) + } else if cls.slots.as_number.boolean.load().is_none() + && cls.slots.as_mapping.length.load().is_none() + && cls.slots.as_sequence.length.load().is_none() + { + Some(Instruction::ToBoolAlwaysTrue) } else { None }; @@ -5522,6 +5722,22 @@ impl ExecutingFrame<'_> { } } + fn deoptimize_call(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::Call { + nargs: Arg::marker(), + }, + ); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + fn deoptimize_for_iter(&mut self) { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; @@ -5612,6 +5828,52 @@ impl ExecutingFrame<'_> { } } + fn specialize_store_subscr( + &mut self, + vm: &VirtualMachine, + instr_idx: usize, + cache_base: usize, + ) { + // Stack: [value, obj, idx] — obj is TOS-1 + let obj = self.nth_value(1); + let idx = self.top_value(); + + let new_op = if obj.downcast_ref_if_exact::(vm).is_some() + && idx.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::StoreSubscrListInt) + } else if obj.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::StoreSubscrDict) + } else { + None + }; + + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + } + + fn deoptimize_store_subscr(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::StoreSubscr); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + fn specialize_contains_op(&mut self, vm: &VirtualMachine, instr_idx: usize, cache_base: usize) { let haystack = self.top_value(); // b = TOS = haystack let new_op = if haystack.downcast_ref_if_exact::(vm).is_some() { From 240f3ac880bc6227477446bf1cf433af4cd19188 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 00:28:40 +0900 Subject: [PATCH 06/20] Add BinaryOpSubscrStrInt, CallStr1, CallTuple1 specialization --- crates/vm/src/frame.rs | 113 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 99 insertions(+), 14 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 27f1ea18397..1b2be25403c 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3258,6 +3258,34 @@ impl ExecutingFrame<'_> { self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } + Instruction::BinaryOpSubscrStrInt => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_str), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Ok(i) = b_int.try_to_primitive::(vm) + { + match a_str.getitem_by_index(vm, i) { + Ok(ch) => { + self.pop_value(); + self.pop_value(); + self.push_value( + PyStr::from(ch).into_pyobject(vm), + ); + return Ok(None); + } + Err(e) => { + self.deoptimize_binary_op( + bytecode::BinaryOperator::Subscr, + ); + return Err(e); + } + } + } + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) + } Instruction::CallPyExactArgs => { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; @@ -3408,6 +3436,53 @@ impl ExecutingFrame<'_> { let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } + Instruction::CallStr1 => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 1 { + let obj = self.pop_value(); + let _null = self.pop_value_opt(); + let callable = self.pop_value(); + let callable_tag = &*callable as *const PyObject as u32; + if cached_tag == callable_tag { + let result = obj.str(vm)?; + self.push_value(result.into()); + return Ok(None); + } + self.push_value(callable); + self.push_value_opt(_null); + self.push_value(obj); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallTuple1 => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 1 { + let obj = self.pop_value(); + let _null = self.pop_value_opt(); + let callable = self.pop_value(); + let callable_tag = &*callable as *const PyObject as u32; + if cached_tag == callable_tag { + let elements: Vec = + vm.extract_elements_with(&obj, Ok)?; + self.push_value(vm.ctx.new_tuple(elements).into()); + return Ok(None); + } + self.push_value(callable); + self.push_value_opt(_null); + self.push_value(obj); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } Instruction::CompareOpInt => { let b = self.top_value(); let a = self.nth_value(1); @@ -5458,6 +5533,10 @@ impl ExecutingFrame<'_> { Some(Instruction::BinaryOpSubscrTupleInt) } else if a.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::BinaryOpSubscrDict) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpSubscrStrInt) } else { None } @@ -5561,21 +5640,27 @@ impl ExecutingFrame<'_> { return; } } - // type(x) specialization - if callable.class().is(vm.ctx.types.type_type) - && callable.is(&vm.ctx.types.type_type.as_object()) - && nargs == 1 - { - let callable_tag = callable as *const PyObject as u32; - unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::CallType1); - self.code - .instructions - .write_cache_u32(cache_base + 1, callable_tag); + // type/str/tuple(x) specialization + if callable.class().is(vm.ctx.types.type_type) && nargs == 1 { + let new_op = if callable.is(&vm.ctx.types.type_type.as_object()) { + Some(Instruction::CallType1) + } else if callable.is(&vm.ctx.types.str_type.as_object()) { + Some(Instruction::CallStr1) + } else if callable.is(&vm.ctx.types.tuple_type.as_object()) { + Some(Instruction::CallTuple1) + } else { + None + }; + if let Some(new_op) = new_op { + let callable_tag = callable as *const PyObject as u32; + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } + return; } - return; } } From cadb9bec87fb714e79de61221e2959f5467bf72a Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 00:30:15 +0900 Subject: [PATCH 07/20] Add BinaryOpInplaceAddUnicode specialization --- crates/vm/src/frame.rs | 44 +++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 1b2be25403c..30d31fdd334 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1245,7 +1245,21 @@ impl ExecutingFrame<'_> { // TODO: In CPython, this does in-place unicode concatenation when // refcount is 1. Falls back to regular iadd for now. Instruction::BinaryOpInplaceAddUnicode => { - self.execute_bin_op(vm, bytecode::BinaryOperator::InplaceAdd) + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_str), Some(b_str)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let result = a_str.as_wtf8().py_add(b_str.as_wtf8()); + self.pop_value(); + self.pop_value(); + self.push_value(result.to_pyobject(vm)); + Ok(None) + } else { + self.deoptimize_binary_op(bytecode::BinaryOperator::InplaceAdd); + self.execute_bin_op(vm, bytecode::BinaryOperator::InplaceAdd) + } } Instruction::BinarySlice => { // Stack: [container, start, stop] -> [result] @@ -3270,15 +3284,11 @@ impl ExecutingFrame<'_> { Ok(ch) => { self.pop_value(); self.pop_value(); - self.push_value( - PyStr::from(ch).into_pyobject(vm), - ); + self.push_value(PyStr::from(ch).into_pyobject(vm)); return Ok(None); } Err(e) => { - self.deoptimize_binary_op( - bytecode::BinaryOperator::Subscr, - ); + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); return Err(e); } } @@ -3470,8 +3480,7 @@ impl ExecutingFrame<'_> { let callable = self.pop_value(); let callable_tag = &*callable as *const PyObject as u32; if cached_tag == callable_tag { - let elements: Vec = - vm.extract_elements_with(&obj, Ok)?; + let elements: Vec = vm.extract_elements_with(&obj, Ok)?; self.push_value(vm.ctx.new_tuple(elements).into()); return Ok(None); } @@ -3721,7 +3730,9 @@ impl ExecutingFrame<'_> { if elements.len() == size { let elems: Vec<_> = elements.to_vec(); self.pop_value(); - self.state.stack.extend(elems.into_iter().rev().map(Some)); + for elem in elems.into_iter().rev() { + self.push_value(elem); + } return Ok(None); } } @@ -3737,7 +3748,9 @@ impl ExecutingFrame<'_> { let elems: Vec<_> = vec.to_vec(); drop(vec); self.pop_value(); - self.state.stack.extend(elems.into_iter().rev().map(Some)); + for elem in elems.into_iter().rev() { + self.push_value(elem); + } return Ok(None); } } @@ -5541,6 +5554,15 @@ impl ExecutingFrame<'_> { None } } + bytecode::BinaryOperator::InplaceAdd => { + if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpInplaceAddUnicode) + } else { + None + } + } _ => None, }; From fd098fe5091b7394a56167c36132e89255ef016e Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 11:20:18 +0900 Subject: [PATCH 08/20] Add LoadAttrModule, CallBuiltinO, CallPyGeneral, CallBoundMethodGeneral, ForIterGen, CallListAppend specialization --- crates/vm/src/frame.rs | 203 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 191 insertions(+), 12 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 30d31fdd334..94642007c63 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -6,8 +6,8 @@ use crate::{ TryFromObject, VirtualMachine, builtins::{ PyBaseException, PyBaseExceptionRef, PyBaseObject, PyCode, PyCoroutine, PyDict, PyDictRef, - PyFloat, PyGenerator, PyInt, PyInterpolation, PyList, PySet, PySlice, PyStr, PyStrInterned, - PyTemplate, PyTraceback, PyType, PyUtf8Str, + PyFloat, PyGenerator, PyInt, PyInterpolation, PyList, PyModule, PySet, PySlice, PyStr, + PyStrInterned, PyTemplate, PyTraceback, PyType, PyUtf8Str, asyncgenerator::PyAsyncGenWrappedValue, builtin_func::PyNativeFunction, frame::stack_analysis, @@ -3016,6 +3016,40 @@ impl ExecutingFrame<'_> { } self.load_attr_slow(vm, oparg) } + Instruction::LoadAttrModule => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let attr_name = self.code.names[oparg.name_idx() as usize]; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 + && owner.class().tp_version_tag.load(Acquire) == type_version + && let Some(module) = owner.downcast_ref_if_exact::(vm) + && let Ok(value) = module.get_attr(attr_name, vm) + { + self.pop_value(); + if oparg.is_method() { + self.push_value(value); + self.push_value_opt(None); + } else { + self.push_value(value); + } + return Ok(None); + } + // Deoptimize + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.load_attr_slow(vm, oparg) + } Instruction::StoreAttrInstanceValue => { let attr_idx = u32::from(arg); let instr_idx = self.lasti() as usize - 1; @@ -3492,6 +3526,112 @@ impl ExecutingFrame<'_> { let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } + Instruction::CallBuiltinO => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 1 { + let obj = self.pop_value(); + let _null = self.pop_value_opt(); + let callable = self.pop_value(); + let callable_tag = &*callable as *const PyObject as u32; + if cached_tag == callable_tag + && let Some(native) = callable.downcast_ref::() + { + let args = FuncArgs { + args: vec![obj], + kwargs: Default::default(), + }; + let result = (native.value.func)(vm, args)?; + self.push_value(result); + return Ok(None); + } + self.push_value(callable); + self.push_value_opt(_null); + self.push_value(obj); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallPyGeneral => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + if let Some(func) = callable.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let args = self.collect_positional_args(nargs); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let func = callable.downcast_ref::().unwrap(); + let final_args = if let Some(self_val) = self_or_null { + let mut args = args; + args.prepend_arg(self_val); + args + } else { + args + }; + let result = func.invoke(final_args, vm)?; + self.push_value(result); + Ok(None) + } else { + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + } + Instruction::CallBoundMethodGeneral => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + if let Some(func) = callable.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let args = self.collect_positional_args(nargs); + let self_val = self.pop_value(); + let callable = self.pop_value(); + let func = callable.downcast_ref::().unwrap(); + let mut final_args = args; + final_args.prepend_arg(self_val); + let result = func.invoke(final_args, vm)?; + self.push_value(result); + Ok(None) + } else { + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + } + Instruction::CallListAppend => { + let nargs: u32 = arg.into(); + if nargs == 1 { + // Stack: [list.append (bound method), self_or_null (list), item] + let item = self.pop_value(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + if let Some(list_obj) = self_or_null.as_ref() + && let Some(list) = list_obj.downcast_ref_if_exact::(vm) + { + list.append(item); + self.push_value(vm.ctx.none()); + return Ok(None); + } + self.push_value(callable); + self.push_value_opt(self_or_null); + self.push_value(item); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } Instruction::CompareOpInt => { let b = self.top_value(); let a = self.nth_value(1); @@ -3805,6 +3945,26 @@ impl ExecutingFrame<'_> { Ok(None) } } + Instruction::ForIterGen => { + let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let iter = self.top_value(); + if let Some(generator) = iter.downcast_ref_if_exact::(vm) { + match generator.as_coro().send(iter, vm.ctx.none(), vm) { + Ok(PyIterReturn::Return(value)) => { + self.push_value(value); + } + Ok(PyIterReturn::StopIteration(_)) => { + self.for_iter_jump_on_exhausted(target); + } + Err(e) => return Err(e), + } + Ok(None) + } else { + self.deoptimize_for_iter(); + self.execute_for_iter(vm, target)?; + Ok(None) + } + } Instruction::LoadGlobalModule => { let oparg = u32::from(arg); let instr_idx = self.lasti() as usize - 1; @@ -5386,6 +5546,19 @@ impl ExecutingFrame<'_> { return; } + // Module attribute access: use LoadAttrModule + if obj.downcast_ref_if_exact::(_vm).is_some() { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrModule); + } + return; + } + let attr_name = self.code.names[oparg.name_idx() as usize]; // Look up attr in class via MRO @@ -5624,21 +5797,24 @@ impl ExecutingFrame<'_> { nargs }; - if func.can_specialize_call(effective_nargs) { - let new_op = if self_or_null_is_some { + let new_op = if func.can_specialize_call(effective_nargs) { + if self_or_null_is_some { Instruction::CallBoundMethodExactArgs } else { Instruction::CallPyExactArgs - }; - unsafe { - self.code.instructions.replace_op(instr_idx, new_op); - // Store func_version in cache (after counter) - self.code - .instructions - .write_cache_u32(cache_base + 1, version); } - return; + } else if self_or_null_is_some { + Instruction::CallBoundMethodGeneral + } else { + Instruction::CallPyGeneral + }; + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, version); } + return; } // Try to specialize builtin calls @@ -5650,6 +5826,7 @@ impl ExecutingFrame<'_> { let new_op = match (native.value.name, nargs) { ("len", 1) => Some(Instruction::CallLen), ("isinstance", 2) => Some(Instruction::CallIsinstance), + (_, 1) => Some(Instruction::CallBuiltinO), _ => None, }; if let Some(new_op) = new_op { @@ -5812,6 +5989,8 @@ impl ExecutingFrame<'_> { Some(Instruction::ForIterList) } else if iter.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::ForIterTuple) + } else if iter.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::ForIterGen) } else { None }; From dd2911320b9a4851219d53431e465fbbcdb706ac Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 11:39:42 +0900 Subject: [PATCH 09/20] Add LoadAttrNondescriptor*, CallMethodDescriptor* specialization - LoadAttrNondescriptorNoDict: plain class attr on objects without dict - LoadAttrNondescriptorWithValues: plain class attr with dict fallback - LoadAttrClass: handler for type attribute access (not yet routed) - CallMethodDescriptorNoargs: method descriptor with 0 args - CallMethodDescriptorO: method descriptor with 1 arg - CallMethodDescriptorFast: method descriptor with multiple args - Use HAS_DICT flag instead of obj.dict().is_some() for method/nondescriptor routing --- crates/vm/src/frame.rs | 290 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 284 insertions(+), 6 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 94642007c63..a916b9a00eb 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -10,6 +10,7 @@ use crate::{ PyStrInterned, PyTemplate, PyTraceback, PyType, PyUtf8Str, asyncgenerator::PyAsyncGenWrappedValue, builtin_func::PyNativeFunction, + descriptor::PyMethodDescriptor, frame::stack_analysis, function::{PyCell, PyCellRef, PyFunction}, list::PyListIterator, @@ -3050,6 +3051,115 @@ impl ExecutingFrame<'_> { } self.load_attr_slow(vm, oparg) } + Instruction::LoadAttrNondescriptorNoDict => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 && owner.class().tp_version_tag.load(Acquire) == type_version { + // Load cached class attribute directly (no dict, no data descriptor) + let descr_ptr = self.code.instructions.read_cache_u64(cache_base + 5); + let attr = unsafe { &*(descr_ptr as *const PyObject) }.to_owned(); + self.pop_value(); + if oparg.is_method() { + self.push_value(attr); + self.push_value_opt(None); + } else { + self.push_value(attr); + } + return Ok(None); + } + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.load_attr_slow(vm, oparg) + } + Instruction::LoadAttrNondescriptorWithValues => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let attr_name = self.code.names[oparg.name_idx() as usize]; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 && owner.class().tp_version_tag.load(Acquire) == type_version { + // Instance dict has priority — check if attr is shadowed + if let Some(dict) = owner.dict() + && let Some(value) = dict.get_item_opt(attr_name, vm)? + { + self.pop_value(); + if oparg.is_method() { + self.push_value(value); + self.push_value_opt(None); + } else { + self.push_value(value); + } + return Ok(None); + } + // Not in instance dict — use cached class attr + let descr_ptr = self.code.instructions.read_cache_u64(cache_base + 5); + let attr = unsafe { &*(descr_ptr as *const PyObject) }.to_owned(); + self.pop_value(); + if oparg.is_method() { + self.push_value(attr); + self.push_value_opt(None); + } else { + self.push_value(attr); + } + return Ok(None); + } + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.load_attr_slow(vm, oparg) + } + Instruction::LoadAttrClass => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 + && let Some(owner_type) = owner.downcast_ref::() + && owner_type.tp_version_tag.load(Acquire) == type_version + { + let descr_ptr = self.code.instructions.read_cache_u64(cache_base + 5); + let attr = unsafe { &*(descr_ptr as *const PyObject) }.to_owned(); + self.pop_value(); + if oparg.is_method() { + self.push_value(attr); + self.push_value_opt(None); + } else { + self.push_value(attr); + } + return Ok(None); + } + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.load_attr_slow(vm, oparg) + } Instruction::StoreAttrInstanceValue => { let attr_idx = u32::from(arg); let instr_idx = self.lasti() as usize - 1; @@ -3632,6 +3742,114 @@ impl ExecutingFrame<'_> { let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } + Instruction::CallMethodDescriptorNoargs => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 0 { + // Stack: [callable, self_or_null] — peek to get func ptr + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - 1].is_some(); + let callable = self.nth_value(1); + let callable_tag = callable as *const PyObject as u32; + let func = if cached_tag == callable_tag && self_or_null_is_some { + callable + .downcast_ref::() + .map(|d| d.method.func) + } else { + None + }; + if let Some(func) = func { + let self_val = self.pop_value_opt().unwrap(); + self.pop_value(); // callable + let args = FuncArgs { + args: vec![self_val], + kwargs: Default::default(), + }; + let result = func(vm, args)?; + self.push_value(result); + return Ok(None); + } + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallMethodDescriptorO => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 1 { + // Stack: [callable, self_or_null, arg1] + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - 2].is_some(); + let callable = self.nth_value(2); + let callable_tag = callable as *const PyObject as u32; + let func = if cached_tag == callable_tag && self_or_null_is_some { + callable + .downcast_ref::() + .map(|d| d.method.func) + } else { + None + }; + if let Some(func) = func { + let obj = self.pop_value(); + let self_val = self.pop_value_opt().unwrap(); + self.pop_value(); // callable + let args = FuncArgs { + args: vec![self_val, obj], + kwargs: Default::default(), + }; + let result = func(vm, args)?; + self.push_value(result); + return Ok(None); + } + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallMethodDescriptorFast => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let func = if cached_tag == callable_tag && self_or_null_is_some { + callable + .downcast_ref::() + .map(|d| d.method.func) + } else { + None + }; + if let Some(func) = func { + let positional_args: Vec = + self.pop_multiple(nargs as usize).collect(); + let self_val = self.pop_value_opt().unwrap(); + self.pop_value(); // callable + let mut all_args = Vec::with_capacity(nargs as usize + 1); + all_args.push(self_val); + all_args.extend(positional_args); + let args = FuncArgs { + args: all_args, + kwargs: Default::default(), + }; + let result = func(vm, args)?; + self.push_value(result); + return Ok(None); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } Instruction::CompareOpInt => { let b = self.top_value(); let a = self.nth_value(1); @@ -5563,7 +5781,7 @@ impl ExecutingFrame<'_> { // Look up attr in class via MRO let cls_attr = cls.get_attr(attr_name); - let has_dict = obj.dict().is_some(); + let class_has_dict = cls.slots.flags.has_feature(PyTypeFlags::HAS_DICT); if oparg.is_method() { // Method specialization @@ -5584,7 +5802,7 @@ impl ExecutingFrame<'_> { .write_cache_u64(cache_base + 5, descr_ptr); } - let new_op = if !has_dict { + let new_op = if !class_has_dict { Instruction::LoadAttrMethodNoDict } else { Instruction::LoadAttrMethodWithValues @@ -5607,19 +5825,60 @@ impl ExecutingFrame<'_> { descr_cls.slots.descr_get.load().is_some() && descr_cls.slots.descr_set.load().is_some() }); + let has_descr_get = cls_attr.as_ref().is_some_and(|descr| { + descr.class().slots.descr_get.load().is_some() + }); - if !has_data_descr && has_dict { - // Instance attribute access — skip class descriptor check + if has_data_descr || has_descr_get { + // Data descriptor or non-data descriptor with __get__ — can't specialize + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } else if class_has_dict { + if let Some(ref descr) = cls_attr { + // Plain class attr + class supports dict — check dict first, fallback + let descr_ptr = &**descr as *const PyObject as u64; + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .write_cache_u64(cache_base + 5, descr_ptr); + self.code.instructions.replace_op( + instr_idx, + Instruction::LoadAttrNondescriptorWithValues, + ); + } + } else { + // No class attr, must be in instance dict + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrInstanceValue); + } + } + } else if let Some(ref descr) = cls_attr { + // No dict support, plain class attr — cache directly + let descr_ptr = &**descr as *const PyObject as u64; unsafe { self.code .instructions .write_cache_u32(cache_base + 1, type_version); self.code .instructions - .replace_op(instr_idx, Instruction::LoadAttrInstanceValue); + .write_cache_u64(cache_base + 5, descr_ptr); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrNondescriptorNoDict); } } else { - // Data descriptor or no dict — can't easily specialize + // No dict, no class attr — can't specialize unsafe { self.code .instructions @@ -5817,6 +6076,25 @@ impl ExecutingFrame<'_> { return; } + // Try to specialize method descriptor calls + if self_or_null_is_some + && callable.downcast_ref::().is_some() + { + let callable_tag = callable as *const PyObject as u32; + let new_op = match nargs { + 0 => Instruction::CallMethodDescriptorNoargs, + 1 => Instruction::CallMethodDescriptorO, + _ => Instruction::CallMethodDescriptorFast, + }; + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } + return; + } + // Try to specialize builtin calls if !self_or_null_is_some { if let Some(native) = callable.downcast_ref::() From b238a27f04d15f389ea38d306d3d322e9562c910 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 11:45:11 +0900 Subject: [PATCH 10/20] Add CallBuiltinFast, CallNonPyGeneral specialization - CallBuiltinFast: native function calls with arbitrary positional args - CallNonPyGeneral: fallback for unmatched callables (custom __call__, etc.) - All builtin function calls now specialize (CallBuiltinFast as default) - specialize_call now always produces a specialized instruction --- crates/vm/src/frame.rs | 62 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index a916b9a00eb..2317785113e 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3665,6 +3665,37 @@ impl ExecutingFrame<'_> { let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } + Instruction::CallBuiltinFast => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; + let func = if cached_tag == callable_tag { + callable + .downcast_ref::() + .map(|n| n.value.func) + } else { + None + }; + if let Some(func) = func { + let positional_args: Vec = + self.pop_multiple(nargs as usize).collect(); + self.pop_value_opt(); // null (self_or_null) + self.pop_value(); // callable + let args = FuncArgs { + args: positional_args, + kwargs: Default::default(), + }; + let result = func(vm, args)?; + self.push_value(result); + return Ok(None); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } Instruction::CallPyGeneral => { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; @@ -3850,6 +3881,21 @@ impl ExecutingFrame<'_> { let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } + Instruction::CallNonPyGeneral => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; + if cached_tag == callable_tag { + let args = self.collect_positional_args(nargs); + return self.execute_call(args, vm); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } Instruction::CompareOpInt => { let b = self.top_value(); let a = self.nth_value(1); @@ -6102,11 +6148,12 @@ impl ExecutingFrame<'_> { { let callable_tag = callable as *const PyObject as u32; let new_op = match (native.value.name, nargs) { - ("len", 1) => Some(Instruction::CallLen), - ("isinstance", 2) => Some(Instruction::CallIsinstance), - (_, 1) => Some(Instruction::CallBuiltinO), - _ => None, + ("len", 1) => Instruction::CallLen, + ("isinstance", 2) => Instruction::CallIsinstance, + (_, 1) => Instruction::CallBuiltinO, + _ => Instruction::CallBuiltinFast, }; + let new_op = Some(new_op); if let Some(new_op) = new_op { unsafe { self.code.instructions.replace_op(instr_idx, new_op); @@ -6141,10 +6188,15 @@ impl ExecutingFrame<'_> { } } + // General fallback: cache callable identity to skip re-specialization + let callable_tag = callable as *const PyObject as u32; unsafe { self.code .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + .replace_op(instr_idx, Instruction::CallNonPyGeneral); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); } } From d9500354828db05f1faf167aa35fb1e3eb3e0be6 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 11:50:41 +0900 Subject: [PATCH 11/20] Add SendGen specialization for generator/coroutine send - SendGen: direct coro.send() for generator/coroutine receivers - Add adaptive counter to Send instruction - specialize_send checks builtin_coro for PyGenerator/PyCoroutine --- crates/vm/src/frame.rs | 91 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 6 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 2317785113e..13f301238a3 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -2799,22 +2799,83 @@ impl ExecutingFrame<'_> { } Instruction::Send { .. } => { // (receiver, v -- receiver, retval) - // Pops v, sends it to receiver. On yield, pushes retval - // (so stack = [..., receiver, retval]). On return/StopIteration, - // also pushes retval and jumps to END_SEND which will pop receiver. - // Relative forward: target = lasti + caches(1) + delta + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_send(instr_idx, cache_base); + } + let exit_label = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let val = self.pop_value(); + let receiver = self.top_value(); + + match self._send(receiver, val, vm)? { + PyIterReturn::Return(value) => { + self.push_value(value); + Ok(None) + } + PyIterReturn::StopIteration(value) => { + if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { + let stop_exc = vm.new_stop_iteration(value.clone()); + self.fire_exception_trace(&stop_exc, vm)?; + } + let value = vm.unwrap_or_none(value); + self.push_value(value); + self.jump(exit_label); + Ok(None) + } + } + } + Instruction::SendGen => { let exit_label = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + // Stack: [receiver, val] — peek receiver before popping + let receiver = self.nth_value(1); + let is_coro = self.builtin_coro(receiver).is_some(); let val = self.pop_value(); let receiver = self.top_value(); + if is_coro { + let coro = self.builtin_coro(receiver).unwrap(); + match coro.send(receiver, val, vm)? { + PyIterReturn::Return(value) => { + self.push_value(value); + return Ok(None); + } + PyIterReturn::StopIteration(value) => { + if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { + let stop_exc = vm.new_stop_iteration(value.clone()); + self.fire_exception_trace(&stop_exc, vm)?; + } + let value = vm.unwrap_or_none(value); + self.push_value(value); + self.jump(exit_label); + return Ok(None); + } + } + } + // Deoptimize + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::Send { target: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } match self._send(receiver, val, vm)? { PyIterReturn::Return(value) => { self.push_value(value); Ok(None) } PyIterReturn::StopIteration(value) => { - // Fire 'exception' trace event for StopIteration, - // matching SEND's exception handling. if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { let stop_exc = vm.new_stop_iteration(value.clone()); self.fire_exception_trace(&stop_exc, vm)?; @@ -6200,6 +6261,24 @@ impl ExecutingFrame<'_> { } } + fn specialize_send(&mut self, instr_idx: usize, cache_base: usize) { + // Stack: [receiver, val] — receiver is at position 1 + let receiver = self.nth_value(1); + if self.builtin_coro(receiver).is_some() { + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::SendGen); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + } + fn specialize_compare_op( &mut self, vm: &VirtualMachine, From 32376d5a37e481dab3b7bd94d398748432b72db7 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 12:53:22 +0900 Subject: [PATCH 12/20] Add LoadAttrSlot, StoreAttrSlot specialization for __slots__ access - LoadAttrSlot: direct obj.get_slot(offset) bypassing descriptor protocol - StoreAttrSlot: direct obj.set_slot(offset, value) bypassing descriptor protocol - Detect PyMemberDescriptor with MemberGetter::Offset in specialize_load_attr/store_attr - Cache slot offset in cache_base+3 --- crates/vm/src/frame.rs | 127 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 120 insertions(+), 7 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 13f301238a3..8b22274ced5 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -10,7 +10,7 @@ use crate::{ PyStrInterned, PyTemplate, PyTraceback, PyType, PyUtf8Str, asyncgenerator::PyAsyncGenWrappedValue, builtin_func::PyNativeFunction, - descriptor::PyMethodDescriptor, + descriptor::{MemberGetter, PyMemberDescriptor, PyMethodDescriptor}, frame::stack_analysis, function::{PyCell, PyCellRef, PyFunction}, list::PyListIterator, @@ -3221,6 +3221,40 @@ impl ExecutingFrame<'_> { } self.load_attr_slow(vm, oparg) } + Instruction::LoadAttrSlot => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 && owner.class().tp_version_tag.load(Acquire) == type_version + { + let slot_offset = + self.code.instructions.read_cache_u32(cache_base + 3) as usize; + if let Some(value) = owner.get_slot(slot_offset) { + self.pop_value(); + if oparg.is_method() { + self.push_value(value); + self.push_value_opt(None); + } else { + self.push_value(value); + } + return Ok(None); + } + // Slot is None → AttributeError (fall through to slow path) + } + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.load_attr_slow(vm, oparg) + } Instruction::StoreAttrInstanceValue => { let attr_idx = u32::from(arg); let instr_idx = self.lasti() as usize - 1; @@ -3249,6 +3283,35 @@ impl ExecutingFrame<'_> { } self.store_attr(vm, attr_idx) } + Instruction::StoreAttrSlot => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + let version_match = type_version != 0 && { + let owner = self.top_value(); + owner.class().tp_version_tag.load(Acquire) == type_version + }; + + if version_match { + let slot_offset = + self.code.instructions.read_cache_u32(cache_base + 3) as usize; + let owner = self.pop_value(); + let value = self.pop_value(); + owner.set_slot(slot_offset, Some(value)); + return Ok(None); + } + // Deoptimize + let attr_idx = u32::from(arg); + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::StoreAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.store_attr(vm, attr_idx) + } Instruction::StoreSubscrListInt => { // Stack: [value, obj, idx] (TOS=idx, TOS1=obj, TOS2=value) let idx = self.pop_value(); @@ -5936,8 +5999,32 @@ impl ExecutingFrame<'_> { descr.class().slots.descr_get.load().is_some() }); - if has_data_descr || has_descr_get { - // Data descriptor or non-data descriptor with __get__ — can't specialize + if has_data_descr { + // Check for member descriptor (slot access) + if let Some(ref descr) = cls_attr + && let Some(member_descr) = descr.downcast_ref::() + && let MemberGetter::Offset(offset) = member_descr.member.getter + { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .write_cache_u32(cache_base + 3, offset as u32); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrSlot); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + } else if has_descr_get { + // Non-data descriptor with __get__ — can't specialize unsafe { self.code .instructions @@ -6693,14 +6780,40 @@ impl ExecutingFrame<'_> { return; } - // Check no data descriptor for this attr + // Check for data descriptor let attr_name = self.code.names[attr_idx as usize]; - let has_data_descr = cls.get_attr(attr_name).is_some_and(|descr| { + let cls_attr = cls.get_attr(attr_name); + let has_data_descr = cls_attr.as_ref().is_some_and(|descr| { let descr_cls = descr.class(); - descr_cls.slots.descr_get.load().is_some() && descr_cls.slots.descr_set.load().is_some() + descr_cls.slots.descr_get.load().is_some() + && descr_cls.slots.descr_set.load().is_some() }); - if !has_data_descr && owner.dict().is_some() { + if has_data_descr { + // Check for member descriptor (slot access) + if let Some(ref descr) = cls_attr + && let Some(member_descr) = descr.downcast_ref::() + && let MemberGetter::Offset(offset) = member_descr.member.getter + { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .write_cache_u32(cache_base + 3, offset as u32); + self.code + .instructions + .replace_op(instr_idx, Instruction::StoreAttrSlot); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + } else if owner.dict().is_some() { unsafe { self.code .instructions From a7c179cb3867cdb3927c57c38441814c08372c0d Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 13:33:23 +0900 Subject: [PATCH 13/20] Add LoadSuperAttrAttr, LoadSuperAttrMethod, CallBuiltinClass, CallBuiltinFastWithKeywords, CallMethodDescriptorFastWithKeywords specialization --- crates/vm/src/frame.rs | 336 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 317 insertions(+), 19 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 8b22274ced5..fd2fbd58631 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1875,7 +1875,21 @@ impl ExecutingFrame<'_> { Ok(None) } Instruction::LoadAttr { idx } => self.load_attr(vm, idx.get(arg)), - Instruction::LoadSuperAttr { arg: idx } => self.load_super_attr(vm, idx.get(arg)), + Instruction::LoadSuperAttr { arg: idx } => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_load_super_attr(vm, idx.get(arg), instr_idx, cache_base); + } + self.load_super_attr(vm, idx.get(arg)) + } Instruction::LoadBuildClass => { let build_class = if let Some(builtins_dict) = self.builtins_dict { builtins_dict @@ -4005,6 +4019,106 @@ impl ExecutingFrame<'_> { let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } + Instruction::CallBuiltinClass => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; + if cached_tag == callable_tag + && callable.downcast_ref::().is_some() + { + let args = self.collect_positional_args(nargs); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let final_args = if let Some(self_val) = self_or_null { + let mut args = args; + args.prepend_arg(self_val); + args + } else { + args + }; + let result = callable.call(final_args, vm)?; + self.push_value(result); + return Ok(None); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallMethodDescriptorFastWithKeywords => { + // Same as CallMethodDescriptorFast — RustPython's native function + // interface is uniform regardless of keyword support + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let func = if cached_tag == callable_tag && self_or_null_is_some { + callable + .downcast_ref::() + .map(|d| d.method.func) + } else { + None + }; + if let Some(func) = func { + let positional_args: Vec = + self.pop_multiple(nargs as usize).collect(); + let self_val = self.pop_value_opt().unwrap(); + self.pop_value(); // callable + let mut all_args = Vec::with_capacity(nargs as usize + 1); + all_args.push(self_val); + all_args.extend(positional_args); + let args = FuncArgs { + args: all_args, + kwargs: Default::default(), + }; + let result = func(vm, args)?; + self.push_value(result); + return Ok(None); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallBuiltinFastWithKeywords => { + // Same as CallBuiltinFast — RustPython's native function + // interface is uniform regardless of keyword support + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; + let func = if cached_tag == callable_tag { + callable + .downcast_ref::() + .map(|n| n.value.func) + } else { + None + }; + if let Some(func) = func { + let positional_args: Vec = + self.pop_multiple(nargs as usize).collect(); + self.pop_value_opt(); // null (self_or_null) + self.pop_value(); // callable + let args = FuncArgs { + args: positional_args, + kwargs: Default::default(), + }; + let result = func(vm, args)?; + self.push_value(result); + return Ok(None); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } Instruction::CallNonPyGeneral => { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; @@ -4020,6 +4134,145 @@ impl ExecutingFrame<'_> { let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } + Instruction::LoadSuperAttrAttr => { + let oparg = u32::from(arg); + let attr_name = self.code.names[(oparg >> 2) as usize]; + // Stack: [global_super, class, self] + let self_obj = self.top_value(); + let class_obj = self.nth_value(1); + let global_super = self.nth_value(2); + // Guard: global_super is builtin super and class is a type + if global_super.is(&vm.ctx.types.super_type.as_object()) + && class_obj.downcast_ref::().is_some() + { + let class = class_obj.downcast_ref::().unwrap(); + let start_type = self_obj.class(); + // MRO lookup: skip classes up to and including `class`, then search + let mro: Vec> = start_type.mro_map_collect(|x| x.to_owned()); + let mut found = None; + let mut past_class = false; + for cls in &mro { + if !past_class { + if cls.is(class) { + past_class = true; + } + continue; + } + if let Some(descr) = cls.get_direct_attr(attr_name) { + // Call descriptor __get__ if available + let obj_arg = if self_obj.class().is(class) { + None + } else { + Some(self_obj.to_owned()) + }; + let result = vm + .call_get_descriptor_specific( + &descr, + obj_arg, + Some(start_type.as_object().to_owned()), + ) + .unwrap_or(Ok(descr))?; + found = Some(result); + break; + } + } + if let Some(attr) = found { + self.pop_value(); // self + self.pop_value(); // class + self.pop_value(); // super + self.push_value(attr); + return Ok(None); + } + } + // Deoptimize + unsafe { + self.code.instructions.replace_op( + self.lasti() as usize - 1, + Instruction::LoadSuperAttr { + arg: Arg::marker(), + }, + ); + let cache_base = self.lasti() as usize; + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + let oparg = LoadSuperAttr::new(oparg); + self.load_super_attr(vm, oparg) + } + Instruction::LoadSuperAttrMethod => { + let oparg = u32::from(arg); + let attr_name = self.code.names[(oparg >> 2) as usize]; + // Stack: [global_super, class, self] + let self_obj = self.top_value(); + let class_obj = self.nth_value(1); + let global_super = self.nth_value(2); + // Guard: global_super is builtin super and class is a type + if global_super.is(&vm.ctx.types.super_type.as_object()) + && class_obj.downcast_ref::().is_some() + { + let class = class_obj.downcast_ref::().unwrap(); + let self_val = self_obj.to_owned(); + let start_type = self_obj.class(); + // MRO lookup + let mro: Vec> = start_type.mro_map_collect(|x| x.to_owned()); + let mut found = None; + let mut past_class = false; + for cls in &mro { + if !past_class { + if cls.is(class) { + past_class = true; + } + continue; + } + if let Some(descr) = cls.get_direct_attr(attr_name) { + // Check if it's a method (has __get__) + let method = vm.call_get_descriptor_specific( + &descr, + Some(self_val.clone()), + Some(start_type.as_object().to_owned()), + ); + match method { + Some(Ok(bound)) => { + found = Some((bound, true)); + } + Some(Err(e)) => return Err(e), + None => { + found = Some((descr, false)); + } + } + break; + } + } + if let Some((attr, is_method)) = found { + self.pop_value(); // self + self.pop_value(); // class + self.pop_value(); // super + self.push_value(attr); + if is_method { + self.push_value(self_val); + } else { + self.push_null(); + } + return Ok(None); + } + } + // Deoptimize + unsafe { + self.code.instructions.replace_op( + self.lasti() as usize - 1, + Instruction::LoadSuperAttr { + arg: Arg::marker(), + }, + ); + let cache_base = self.lasti() as usize; + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + let oparg = LoadSuperAttr::new(oparg); + self.load_super_attr(vm, oparg) + } Instruction::CompareOpInt => { let b = self.top_value(); let a = self.nth_value(1); @@ -6313,26 +6566,39 @@ impl ExecutingFrame<'_> { } } // type/str/tuple(x) specialization - if callable.class().is(vm.ctx.types.type_type) && nargs == 1 { - let new_op = if callable.is(&vm.ctx.types.type_type.as_object()) { - Some(Instruction::CallType1) - } else if callable.is(&vm.ctx.types.str_type.as_object()) { - Some(Instruction::CallStr1) - } else if callable.is(&vm.ctx.types.tuple_type.as_object()) { - Some(Instruction::CallTuple1) - } else { - None - }; - if let Some(new_op) = new_op { - let callable_tag = callable as *const PyObject as u32; - unsafe { - self.code.instructions.replace_op(instr_idx, new_op); - self.code - .instructions - .write_cache_u32(cache_base + 1, callable_tag); + if callable.class().is(vm.ctx.types.type_type) { + if nargs == 1 { + let new_op = if callable.is(&vm.ctx.types.type_type.as_object()) { + Some(Instruction::CallType1) + } else if callable.is(&vm.ctx.types.str_type.as_object()) { + Some(Instruction::CallStr1) + } else if callable.is(&vm.ctx.types.tuple_type.as_object()) { + Some(Instruction::CallTuple1) + } else { + None + }; + if let Some(new_op) = new_op { + let callable_tag = callable as *const PyObject as u32; + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } + return; } - return; } + // General builtin class call (any type with Callable) + let callable_tag = callable as *const PyObject as u32; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::CallBuiltinClass); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } + return; } } @@ -6366,6 +6632,38 @@ impl ExecutingFrame<'_> { } } + fn specialize_load_super_attr( + &mut self, + vm: &VirtualMachine, + oparg: LoadSuperAttr, + instr_idx: usize, + cache_base: usize, + ) { + // Stack: [global_super, class, self] + let global_super = self.nth_value(2); + let class = self.nth_value(1); + + if !global_super.is(&vm.ctx.types.super_type.as_object()) + || class.downcast_ref::().is_none() + { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; + } + + let new_op = if oparg.is_load_method() { + Instruction::LoadSuperAttrMethod + } else { + Instruction::LoadSuperAttrAttr + }; + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } + } + fn specialize_compare_op( &mut self, vm: &VirtualMachine, From e1289f1f6ac85429e0cefb89acdc468291fa25e9 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 13:41:26 +0900 Subject: [PATCH 14/20] Add LoadAttrProperty specialization for property descriptor access --- crates/vm/src/builtins/property.rs | 4 +++ crates/vm/src/frame.rs | 52 +++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/crates/vm/src/builtins/property.rs b/crates/vm/src/builtins/property.rs index 6c53c4b4d98..509307c7b00 100644 --- a/crates/vm/src/builtins/property.rs +++ b/crates/vm/src/builtins/property.rs @@ -133,6 +133,10 @@ impl PyProperty { self.getter.read().clone() } + pub(crate) fn get_fget(&self) -> Option { + self.getter.read().clone() + } + #[pygetset] fn fset(&self) -> Option { self.setter.read().clone() diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index fd2fbd58631..28055e6e29e 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -6,7 +6,7 @@ use crate::{ TryFromObject, VirtualMachine, builtins::{ PyBaseException, PyBaseExceptionRef, PyBaseObject, PyCode, PyCoroutine, PyDict, PyDictRef, - PyFloat, PyGenerator, PyInt, PyInterpolation, PyList, PyModule, PySet, PySlice, PyStr, + PyFloat, PyGenerator, PyInt, PyInterpolation, PyList, PyModule, PyProperty, PySet, PySlice, PyStr, PyStrInterned, PyTemplate, PyTraceback, PyType, PyUtf8Str, asyncgenerator::PyAsyncGenWrappedValue, builtin_func::PyNativeFunction, @@ -3269,6 +3269,40 @@ impl ExecutingFrame<'_> { } self.load_attr_slow(vm, oparg) } + Instruction::LoadAttrProperty => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 && owner.class().tp_version_tag.load(Acquire) == type_version + { + let descr_ptr = self.code.instructions.read_cache_u64(cache_base + 5); + if descr_ptr != 0 { + let descr = + unsafe { &*(descr_ptr as *const PyObject) }; + if let Some(prop) = descr.downcast_ref::() { + let owner = self.pop_value(); + if let Some(getter) = prop.get_fget() { + let result = getter.call((owner,), vm)?; + self.push_value(result); + return Ok(None); + } + } + } + } + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.load_attr_slow(vm, oparg) + } Instruction::StoreAttrInstanceValue => { let attr_idx = u32::from(arg); let instr_idx = self.lasti() as usize - 1; @@ -6269,6 +6303,22 @@ impl ExecutingFrame<'_> { .instructions .replace_op(instr_idx, Instruction::LoadAttrSlot); } + } else if let Some(ref descr) = cls_attr + && descr.downcast_ref::().is_some() + { + // Property descriptor — cache the property object pointer + let descr_ptr = &**descr as *const PyObject as u64; + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .write_cache_u64(cache_base + 5, descr_ptr); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrProperty); + } } else { unsafe { self.code From 2350bc1f321d7cdf822bb39693ffd3af0757518d Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 13:44:26 +0900 Subject: [PATCH 15/20] Add LoadAttrClass specialization for class attribute access --- crates/vm/src/frame.rs | 79 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 28055e6e29e..2557325098e 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -6191,6 +6191,12 @@ impl ExecutingFrame<'_> { let obj = self.top_value(); let cls = obj.class(); + // Check if this is a type object (class attribute access) + if obj.downcast_ref::().is_some() { + self.specialize_class_load_attr(_vm, oparg, instr_idx, cache_base); + return; + } + // Only specialize if getattro is the default (PyBaseObject::getattro) let is_default_getattro = cls .slots @@ -6385,6 +6391,79 @@ impl ExecutingFrame<'_> { } } + fn specialize_class_load_attr( + &mut self, + _vm: &VirtualMachine, + oparg: LoadAttr, + instr_idx: usize, + cache_base: usize, + ) { + let obj = self.top_value(); + let owner_type = obj.downcast_ref::().unwrap(); + + // Get or assign type version for the type object itself + let mut type_version = owner_type.tp_version_tag.load(Acquire); + if type_version == 0 { + type_version = owner_type.assign_version_tag(); + } + if type_version == 0 { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; + } + + let attr_name = self.code.names[oparg.name_idx() as usize]; + + // Check metaclass: ensure no data descriptor on metaclass for this name + let mcl = obj.class(); + let mcl_attr = mcl.get_attr(attr_name); + if let Some(ref attr) = mcl_attr { + let attr_class = attr.class(); + if attr_class.slots.descr_set.load().is_some() { + // Data descriptor on metaclass — can't specialize + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; + } + } + + // Look up attr in the type's own MRO + let cls_attr = owner_type.get_attr(attr_name); + if let Some(ref descr) = cls_attr { + let descr_class = descr.class(); + let has_descr_get = descr_class.slots.descr_get.load().is_some(); + if !has_descr_get { + // METHOD or NON_DESCRIPTOR — can cache directly + let descr_ptr = &**descr as *const PyObject as u64; + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .write_cache_u64(cache_base + 5, descr_ptr); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrClass); + } + return; + } + } + + // Can't specialize + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + fn load_attr_slow(&mut self, vm: &VirtualMachine, oparg: LoadAttr) -> FrameResult { let attr_name = self.code.names[oparg.name_idx() as usize]; let parent = self.pop_value(); From ba9d5282787c06b1b7ac4a416134a469cc2caa47 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 13:51:02 +0900 Subject: [PATCH 16/20] Add BinaryOpSubscrListSlice specialization --- crates/vm/src/frame.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 2557325098e..e9444cc213f 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3612,6 +3612,21 @@ impl ExecutingFrame<'_> { self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } + Instruction::BinaryOpSubscrListSlice => { + let b = self.top_value(); + let a = self.nth_value(1); + if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref::().is_some() + { + let b_owned = self.pop_value(); + let a_owned = self.pop_value(); + let result = a_owned.get_item(b_owned.as_object(), vm)?; + self.push_value(result); + return Ok(None); + } + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) + } Instruction::CallPyExactArgs => { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; @@ -6558,6 +6573,10 @@ impl ExecutingFrame<'_> { && b.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::BinaryOpSubscrStrInt) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref::().is_some() + { + Some(Instruction::BinaryOpSubscrListSlice) } else { None } From 3c88368ce62c2fadbf676492d8d7b8cb0fdbaa74 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 13:54:28 +0900 Subject: [PATCH 17/20] Add CallKwPy, CallKwBoundMethod, CallKwNonPy specialization Fix LoadSuperAttrMethod to push unbound descriptor + self instead of bound method + self which caused double self binding. Fix LoadSuperAttrAttr obj_arg condition for classmethod detection. --- crates/vm/src/frame.rs | 253 ++++++++++++++++++++++++++++++++--------- 1 file changed, 201 insertions(+), 52 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index e9444cc213f..e003d7479e3 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -6,8 +6,8 @@ use crate::{ TryFromObject, VirtualMachine, builtins::{ PyBaseException, PyBaseExceptionRef, PyBaseObject, PyCode, PyCoroutine, PyDict, PyDictRef, - PyFloat, PyGenerator, PyInt, PyInterpolation, PyList, PyModule, PyProperty, PySet, PySlice, PyStr, - PyStrInterned, PyTemplate, PyTraceback, PyType, PyUtf8Str, + PyFloat, PyGenerator, PyInt, PyInterpolation, PyList, PyModule, PyProperty, PySet, PySlice, + PyStr, PyStrInterned, PyTemplate, PyTraceback, PyType, PyUtf8Str, asyncgenerator::PyAsyncGenWrappedValue, builtin_func::PyNativeFunction, descriptor::{MemberGetter, PyMemberDescriptor, PyMethodDescriptor}, @@ -1388,8 +1388,21 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallKw { nargs } => { + let nargs = nargs.get(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_call_kw(vm, nargs, instr_idx, cache_base); + } // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] - let args = self.collect_keyword_args(nargs.get(arg)); + let args = self.collect_keyword_args(nargs); self.execute_call(args, vm) } Instruction::CallFunctionEx => { @@ -2877,9 +2890,12 @@ impl ExecutingFrame<'_> { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::Send { target: Arg::marker() }); + self.code.instructions.replace_op( + instr_idx, + Instruction::Send { + target: Arg::marker(), + }, + ); self.code .instructions .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); @@ -3243,8 +3259,7 @@ impl ExecutingFrame<'_> { let owner = self.top_value(); let type_version = self.code.instructions.read_cache_u32(cache_base + 1); - if type_version != 0 && owner.class().tp_version_tag.load(Acquire) == type_version - { + if type_version != 0 && owner.class().tp_version_tag.load(Acquire) == type_version { let slot_offset = self.code.instructions.read_cache_u32(cache_base + 3) as usize; if let Some(value) = owner.get_slot(slot_offset) { @@ -3277,12 +3292,10 @@ impl ExecutingFrame<'_> { let owner = self.top_value(); let type_version = self.code.instructions.read_cache_u32(cache_base + 1); - if type_version != 0 && owner.class().tp_version_tag.load(Acquire) == type_version - { + if type_version != 0 && owner.class().tp_version_tag.load(Acquire) == type_version { let descr_ptr = self.code.instructions.read_cache_u64(cache_base + 5); if descr_ptr != 0 { - let descr = - unsafe { &*(descr_ptr as *const PyObject) }; + let descr = unsafe { &*(descr_ptr as *const PyObject) }; if let Some(prop) = descr.downcast_ref::() { let owner = self.pop_value(); if let Some(getter) = prop.get_fget() { @@ -3731,14 +3744,14 @@ impl ExecutingFrame<'_> { let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 2 { - // Stack: [callable, null, obj, classinfo] - let classinfo = self.pop_value(); + // Stack: [callable, null, obj, class_info] + let class_info = self.pop_value(); let obj = self.pop_value(); let _null = self.pop_value_opt(); let callable = self.pop_value(); let callable_tag = &*callable as *const PyObject as u32; if cached_tag == callable_tag { - let result = obj.is_instance(&classinfo, vm)?; + let result = obj.is_instance(&class_info, vm)?; self.push_value(vm.ctx.new_bool(result).into()); return Ok(None); } @@ -3746,7 +3759,7 @@ impl ExecutingFrame<'_> { self.push_value(callable); self.push_value_opt(_null); self.push_value(obj); - self.push_value(classinfo); + self.push_value(class_info); } self.deoptimize_call(); let args = self.collect_positional_args(nargs); @@ -4075,9 +4088,7 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); let callable_tag = callable as *const PyObject as u32; - if cached_tag == callable_tag - && callable.downcast_ref::().is_some() - { + if cached_tag == callable_tag && callable.downcast_ref::().is_some() { let args = self.collect_positional_args(nargs); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); @@ -4183,6 +4194,77 @@ impl ExecutingFrame<'_> { let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } + Instruction::CallKwPy => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] + // callable is at position nargs + 2 from top (nargs args + kwarg_names + self_or_null) + let callable = self.nth_value(nargs + 2); + if let Some(func) = callable.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let args = self.collect_keyword_args(nargs); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let func = callable.downcast_ref::().unwrap(); + let final_args = if let Some(self_val) = self_or_null { + let mut args = args; + args.prepend_arg(self_val); + args + } else { + args + }; + let result = func.invoke(final_args, vm)?; + self.push_value(result); + return Ok(None); + } + self.deoptimize_call_kw(); + let args = self.collect_keyword_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallKwBoundMethod => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + // Stack: [callable, self_or_null(=self), arg1, ..., argN, kwarg_names] + let callable = self.nth_value(nargs + 2); + if let Some(func) = callable.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let args = self.collect_keyword_args(nargs); + let self_val = self.pop_value(); // self_or_null is always Some here + let callable = self.pop_value(); + let func = callable.downcast_ref::().unwrap(); + let mut final_args = args; + final_args.prepend_arg(self_val); + let result = func.invoke(final_args, vm)?; + self.push_value(result); + return Ok(None); + } + self.deoptimize_call_kw(); + let args = self.collect_keyword_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallKwNonPy => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 2); + let callable_tag = callable as *const PyObject as u32; + if cached_tag == callable_tag { + let args = self.collect_keyword_args(nargs); + return self.execute_call(args, vm); + } + self.deoptimize_call_kw(); + let args = self.collect_keyword_args(nargs); + self.execute_call(args, vm) + } Instruction::LoadSuperAttrAttr => { let oparg = u32::from(arg); let attr_name = self.code.names[(oparg >> 2) as usize]; @@ -4209,7 +4291,8 @@ impl ExecutingFrame<'_> { } if let Some(descr) = cls.get_direct_attr(attr_name) { // Call descriptor __get__ if available - let obj_arg = if self_obj.class().is(class) { + // Pass None for obj when self IS its own type (classmethod) + let obj_arg = if self_obj.is(start_type.as_object()) { None } else { Some(self_obj.to_owned()) @@ -4237,9 +4320,7 @@ impl ExecutingFrame<'_> { unsafe { self.code.instructions.replace_op( self.lasti() as usize - 1, - Instruction::LoadSuperAttr { - arg: Arg::marker(), - }, + Instruction::LoadSuperAttr { arg: Arg::marker() }, ); let cache_base = self.lasti() as usize; self.code @@ -4275,20 +4356,27 @@ impl ExecutingFrame<'_> { continue; } if let Some(descr) = cls.get_direct_attr(attr_name) { - // Check if it's a method (has __get__) - let method = vm.call_get_descriptor_specific( - &descr, - Some(self_val.clone()), - Some(start_type.as_object().to_owned()), - ); - match method { - Some(Ok(bound)) => { - found = Some((bound, true)); - } - Some(Err(e)) => return Err(e), - None => { - found = Some((descr, false)); - } + let descr_cls = descr.class(); + if descr_cls + .slots + .flags + .has_feature(PyTypeFlags::METHOD_DESCRIPTOR) + { + // Method descriptor: push unbound func + self + // CALL will prepend self as first positional arg + found = Some((descr, true)); + } else if let Some(descr_get) = descr_cls.slots.descr_get.load() { + // Has __get__ but not METHOD_DESCRIPTOR: bind it + let bound = descr_get( + descr, + Some(self_val.clone()), + Some(start_type.as_object().to_owned()), + vm, + )?; + found = Some((bound, false)); + } else { + // Plain attribute + found = Some((descr, false)); } break; } @@ -4310,9 +4398,7 @@ impl ExecutingFrame<'_> { unsafe { self.code.instructions.replace_op( self.lasti() as usize - 1, - Instruction::LoadSuperAttr { - arg: Arg::marker(), - }, + Instruction::LoadSuperAttr { arg: Arg::marker() }, ); let cache_base = self.lasti() as usize; self.code @@ -6303,9 +6389,9 @@ impl ExecutingFrame<'_> { descr_cls.slots.descr_get.load().is_some() && descr_cls.slots.descr_set.load().is_some() }); - let has_descr_get = cls_attr.as_ref().is_some_and(|descr| { - descr.class().slots.descr_get.load().is_some() - }); + let has_descr_get = cls_attr + .as_ref() + .is_some_and(|descr| descr.class().slots.descr_get.load().is_some()); if has_data_descr { // Check for member descriptor (slot access) @@ -6365,10 +6451,9 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u64(cache_base + 5, descr_ptr); - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttrNondescriptorWithValues, - ); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrNondescriptorWithValues); } } else { // No class attr, must be in instance dict @@ -6672,9 +6757,7 @@ impl ExecutingFrame<'_> { } // Try to specialize method descriptor calls - if self_or_null_is_some - && callable.downcast_ref::().is_some() - { + if self_or_null_is_some && callable.downcast_ref::().is_some() { let callable_tag = callable as *const PyObject as u32; let new_op = match nargs { 0 => Instruction::CallMethodDescriptorNoargs, @@ -6762,6 +6845,57 @@ impl ExecutingFrame<'_> { } } + fn specialize_call_kw( + &mut self, + _vm: &VirtualMachine, + nargs: u32, + instr_idx: usize, + cache_base: usize, + ) { + // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] + // callable is at position nargs + 2 from top + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 2].is_some(); + let callable = self.nth_value(nargs + 2); + + if let Some(func) = callable.downcast_ref::() { + let version = func.func_version(); + if version == 0 { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; + } + + let new_op = if self_or_null_is_some { + Instruction::CallKwBoundMethod + } else { + Instruction::CallKwPy + }; + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, version); + } + return; + } + + // General fallback + let callable_tag = callable as *const PyObject as u32; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::CallKwNonPy); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } + } + fn specialize_send(&mut self, instr_idx: usize, cache_base: usize) { // Stack: [receiver, val] — receiver is at position 1 let receiver = self.nth_value(1); @@ -6966,6 +7100,22 @@ impl ExecutingFrame<'_> { } } + fn deoptimize_call_kw(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::CallKw { + nargs: Arg::marker(), + }, + ); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + fn deoptimize_for_iter(&mut self) { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; @@ -7231,8 +7381,7 @@ impl ExecutingFrame<'_> { let cls_attr = cls.get_attr(attr_name); let has_data_descr = cls_attr.as_ref().is_some_and(|descr| { let descr_cls = descr.class(); - descr_cls.slots.descr_get.load().is_some() - && descr_cls.slots.descr_set.load().is_some() + descr_cls.slots.descr_get.load().is_some() && descr_cls.slots.descr_set.load().is_some() }); if has_data_descr { From ab6bbb64112d79264e60d80bab7d26f7b04e7157 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 17:17:49 +0900 Subject: [PATCH 18/20] Clean up comments in specialization code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unnecessary CPython references, FIXME→TODO, redundant Note: prefix, and "Same as" cross-references. --- crates/vm/src/builtins/function.rs | 16 +++++++++++---- crates/vm/src/builtins/range.rs | 25 +++-------------------- crates/vm/src/dict_inner.rs | 14 +++++-------- crates/vm/src/frame.rs | 32 ++++++++++++++++-------------- 4 files changed, 37 insertions(+), 50 deletions(-) diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index d3f54f5a8b6..02198785815 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -80,6 +80,14 @@ pub struct PyFunction { static FUNC_VERSION_COUNTER: AtomicU32 = AtomicU32::new(1); +/// Atomically allocate the next function version, returning 0 if exhausted. +/// Once the counter wraps to 0, it stays at 0 permanently. +fn next_func_version() -> u32 { + FUNC_VERSION_COUNTER + .fetch_update(Relaxed, Relaxed, |v| (v != 0).then(|| v.wrapping_add(1))) + .unwrap_or(0) +} + unsafe impl Traverse for PyFunction { fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { self.globals.traverse(tracer_fn); @@ -204,7 +212,7 @@ impl PyFunction { annotate: PyMutex::new(None), module: PyMutex::new(module), doc: PyMutex::new(doc), - func_version: AtomicU32::new(FUNC_VERSION_COUNTER.fetch_add(1, Relaxed)), + func_version: AtomicU32::new(next_func_version()), #[cfg(feature = "jit")] jitted_code: OnceCell::new(), }; @@ -603,17 +611,17 @@ impl Py { self.func_version.load(Relaxed) } - /// _PyFunction_GetVersionForCurrentState /// Returns the current version, assigning a fresh one if previously invalidated. /// Returns 0 if the version counter has overflowed. + /// `_PyFunction_GetVersionForCurrentState` pub fn get_version_for_current_state(&self) -> u32 { let v = self.func_version.load(Relaxed); if v != 0 { return v; } - let new_v = FUNC_VERSION_COUNTER.fetch_add(1, Relaxed); + let new_v = next_func_version(); if new_v == 0 { - return 0; // Counter overflow + return 0; } self.func_version.store(new_v, Relaxed); new_v diff --git a/crates/vm/src/builtins/range.rs b/crates/vm/src/builtins/range.rs index af74ce121c2..ec1a662ddad 100644 --- a/crates/vm/src/builtins/range.rs +++ b/crates/vm/src/builtins/range.rs @@ -613,19 +613,6 @@ pub struct PyRangeIterator { length: usize, } -impl PyRangeIterator { - /// Advance and return next value without going through the iterator protocol. - #[inline] - pub(crate) fn next_fast(&self) -> Option { - let index = self.index.fetch_add(1); - if index < self.length { - Some(self.start + (index as isize) * self.step) - } else { - None - } - } -} - impl PyPayload for PyRangeIterator { #[inline] fn class(ctx: &Context) -> &'static Py { @@ -676,15 +663,9 @@ impl PyRangeIterator { impl SelfIter for PyRangeIterator {} impl IterNext for PyRangeIterator { fn next(zelf: &Py, vm: &VirtualMachine) -> PyResult { - // TODO: In pathological case (index == usize::MAX) this can wrap around - // (since fetch_add wraps). This would result in the iterator spinning again - // from the beginning. - let index = zelf.index.fetch_add(1); - let r = if index < zelf.length { - let value = zelf.start + (index as isize) * zelf.step; - PyIterReturn::Return(vm.ctx.new_int(value).into()) - } else { - PyIterReturn::StopIteration(None) + let r = match zelf.fast_next() { + Some(value) => PyIterReturn::Return(vm.ctx.new_int(value).into()), + None => PyIterReturn::StopIteration(None), }; Ok(r) } diff --git a/crates/vm/src/dict_inner.rs b/crates/vm/src/dict_inner.rs index e5e264b3554..e4d8174abbd 100644 --- a/crates/vm/src/dict_inner.rs +++ b/crates/vm/src/dict_inner.rs @@ -298,6 +298,7 @@ impl Dict { }; if entry.index == index_index { let removed = core::mem::replace(&mut entry.value, value); + self.bump_version(); // defer dec RC break Some(removed); } else { @@ -313,10 +314,10 @@ impl Dict { continue; } inner.unchecked_push(index_index, hash, key.to_pyobject(vm), value, entry_index); + self.bump_version(); break None; } }; - self.bump_version(); Ok(()) } @@ -377,10 +378,10 @@ impl Dict { inner.indices.resize(8, IndexEntry::FREE); inner.used = 0; inner.filled = 0; + self.bump_version(); // defer dec rc core::mem::take(&mut inner.entries) }; - self.bump_version(); } /// Delete a key @@ -437,9 +438,6 @@ impl Dict { ControlFlow::Continue(()) => continue, } }; - if removed.is_some() { - self.bump_version(); - } Ok(removed.map(|entry| entry.value)) } @@ -459,10 +457,10 @@ impl Dict { continue; } inner.unchecked_push(index_index, hash, key.to_owned(), value, entry); + self.bump_version(); break None; } }; - self.bump_version(); Ok(()) } @@ -721,6 +719,7 @@ impl Dict { } = IndexEntry::DUMMY; inner.used -= 1; let removed = slot.take(); + self.bump_version(); Ok(ControlFlow::Break(removed)) } @@ -734,9 +733,6 @@ impl Dict { ControlFlow::Continue(()) => continue, } }; - if removed.is_some() { - self.bump_version(); - } Ok(removed) } diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index e003d7479e3..ec19078d0b8 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -772,8 +772,8 @@ impl ExecutingFrame<'_> { } // Fire 'opcode' trace event for sys.settrace when f_trace_opcodes - // is set. Skip RESUME and ExtendedArg (matching CPython's exclusion - // of these in _Py_call_instrumentation_instruction). + // is set. Skip RESUME and ExtendedArg + // (_Py_call_instrumentation_instruction). if !vm.is_none(&self.object.trace.lock()) && *self.object.trace_opcodes.lock() && !matches!( @@ -787,6 +787,7 @@ impl ExecutingFrame<'_> { } } + vm.check_signals()?; let lasti_before = self.lasti(); let result = self.execute_instruction(op, arg, &mut do_extend_arg, vm); // Skip inline cache entries if instruction fell through (no jump). @@ -1196,8 +1197,6 @@ impl ExecutingFrame<'_> { extend_arg: &mut bool, vm: &VirtualMachine, ) -> FrameResult { - vm.check_signals()?; - flame_guard!(format!( "Frame::execute_instruction({})", instruction.display(arg, &self.code.code).to_string() @@ -3824,8 +3823,13 @@ impl ExecutingFrame<'_> { let callable = self.pop_value(); let callable_tag = &*callable as *const PyObject as u32; if cached_tag == callable_tag { - let elements: Vec = vm.extract_elements_with(&obj, Ok)?; - self.push_value(vm.ctx.new_tuple(elements).into()); + // tuple(x) returns x as-is when x is already an exact tuple + if let Ok(tuple) = obj.clone().downcast_exact::(vm) { + self.push_value(tuple.into_pyref().into()); + } else { + let elements: Vec = vm.extract_elements_with(&obj, Ok)?; + self.push_value(vm.ctx.new_tuple(elements).into()); + } return Ok(None); } self.push_value(callable); @@ -4108,8 +4112,7 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallMethodDescriptorFastWithKeywords => { - // Same as CallMethodDescriptorFast — RustPython's native function - // interface is uniform regardless of keyword support + // Native function interface is uniform regardless of keyword support let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); @@ -4147,8 +4150,7 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallBuiltinFastWithKeywords => { - // Same as CallBuiltinFast — RustPython's native function - // interface is uniform regardless of keyword support + // Native function interface is uniform regardless of keyword support let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); @@ -5416,8 +5418,8 @@ impl ExecutingFrame<'_> { // Look up handler in exception table // lasti points to NEXT instruction (already incremented in run loop) // The exception occurred at the previous instruction - // Python uses signed int where INSTR_OFFSET() - 1 = -1 before first instruction - // We use u32, so check for 0 explicitly (equivalent to CPython's -1) + // Python uses signed int where INSTR_OFFSET() - 1 = -1 before first instruction. + // We use u32, so check for 0 explicitly. if self.lasti() == 0 { // No instruction executed yet, no handler can match return Err(exception); @@ -5449,7 +5451,7 @@ impl ExecutingFrame<'_> { // 3. Push exception onto stack // always push exception, PUSH_EXC_INFO transforms [exc] -> [prev_exc, exc] - // Note: Do NOT call vm.set_exception here! PUSH_EXC_INFO will do it. + // Do NOT call vm.set_exception here! PUSH_EXC_INFO will do it. // PUSH_EXC_INFO needs to get prev_exc from vm.current_exception() BEFORE setting the new one. self.push_value(exception.into()); @@ -5778,7 +5780,7 @@ impl ExecutingFrame<'_> { ) -> PyResult { match self.builtin_coro(jen) { Some(coro) => coro.send(jen, val, vm), - // FIXME: turn return type to PyResult then ExecutionResult will be simplified + // TODO: turn return type to PyResult then ExecutionResult will be simplified None if vm.is_none(&val) => PyIter::new(jen).next(vm), None => { let meth = jen.get_attr("send", vm)?; @@ -5893,7 +5895,7 @@ impl ExecutingFrame<'_> { // FOR_ITER_RANGE: bypass generic iterator protocol for range iterators if let Some(range_iter) = top.downcast_ref_if_exact::(vm) { - if let Some(value) = range_iter.next_fast() { + if let Some(value) = range_iter.fast_next() { self.push_value(vm.ctx.new_int(value).into()); return Ok(true); } From 48fd5c70bfd8ac4c694059996a0d82d2f06346ba Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 21:08:52 +0900 Subject: [PATCH 19/20] fix check_signals --- crates/vm/src/frame.rs | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index ec19078d0b8..25514bff8a2 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -20,7 +20,7 @@ use crate::{ bytecode::{ self, ADAPTIVE_BACKOFF_VALUE, Arg, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod, }, - convert::{IntoObject, ToPyObject, ToPyResult}, + convert::{ToPyObject, ToPyResult}, coroutine::Coro, exceptions::ExceptionCtor, function::{ArgMapping, Either, FuncArgs}, @@ -787,7 +787,33 @@ impl ExecutingFrame<'_> { } } - vm.check_signals()?; + if let Err(exception) = vm.check_signals() { + #[cold] + fn handle_signal_exception( + frame: &mut ExecutingFrame<'_>, + exception: PyBaseExceptionRef, + idx: usize, + vm: &VirtualMachine, + ) -> FrameResult { + let (loc, _end_loc) = frame.code.locations[idx]; + let next = exception.__traceback__(); + let new_traceback = + PyTraceback::new(next, frame.object.to_owned(), idx as u32 * 2, loc.line); + exception.set_traceback_typed(Some(new_traceback.into_ref(&vm.ctx))); + vm.contextualize_exception(&exception); + frame.unwind_blocks(vm, UnwindReason::Raising { exception }) + } + match handle_signal_exception(self, exception, idx, vm) { + Ok(None) => {} + Ok(Some(value)) => { + break Ok(value); + } + Err(exception) => { + break Err(exception); + } + } + continue; + } let lasti_before = self.lasti(); let result = self.execute_instruction(op, arg, &mut do_extend_arg, vm); // Skip inline cache entries if instruction fell through (no jump). From 51accdb3cb7e8a560aebd5fdb71baf1ae73ba255 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Tue, 3 Mar 2026 09:53:11 +0900 Subject: [PATCH 20/20] fix import --- crates/vm/src/frame.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 25514bff8a2..ad077132ef1 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1,4 +1,4 @@ -use crate::anystr::AnyStr as _; +use crate::anystr::AnyStr; #[cfg(feature = "flame")] use crate::bytecode::InstructionMetadata; use crate::{