diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index f74057f4816..3b7d7bf14ea 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -13,7 +13,7 @@ use crate::{ bytecode, class::PyClassImpl, common::wtf8::{Wtf8Buf, wtf8_concat}, - frame::Frame, + frame::{Frame, FrameRef}, function::{FuncArgs, OptionalArg, PyComparisonValue, PySetterValue}, scope::Scope, types::{ @@ -673,27 +673,14 @@ impl Py { /// Returns `None` for generator/coroutine code paths that do not push a /// regular datastack-backed frame in the fast call path. pub(crate) fn datastack_frame_size_bytes(&self) -> Option { - let code: &Py = &self.code; - if code - .flags - .intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE) - { - return None; - } - let nlocalsplus = code - .varnames - .len() - .checked_add(code.cellvars.len())? - .checked_add(code.freevars.len())?; - let capacity = nlocalsplus.checked_add(code.max_stackdepth as usize)?; - capacity.checked_mul(core::mem::size_of::()) + datastack_frame_size_bytes_for_code(&self.code) } - /// Fast path for calling a simple function with exact positional args. - /// Skips FuncArgs allocation, prepend_arg, and fill_locals_from_args. - /// Only valid when: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonlyargs, - /// and nargs == co_argcount. - pub fn invoke_exact_args(&self, mut args: Vec, vm: &VirtualMachine) -> PyResult { + pub(crate) fn prepare_exact_args_frame( + &self, + mut args: Vec, + vm: &VirtualMachine, + ) -> FrameRef { let code: PyRef = (*self.code).to_owned(); debug_assert_eq!(args.len(), code.arg_count as usize); @@ -704,16 +691,11 @@ impl Py { .intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS) ); debug_assert_eq!(code.kwonlyarg_count, 0); - - // Generator/coroutine code objects are SIMPLE_FUNCTION in call - // specialization classification, but their call path must still - // go through invoke() to produce generator/coroutine objects. - if code - .flags - .intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE) - { - return self.invoke(FuncArgs::from(args), vm); - } + debug_assert!( + !code + .flags + .intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE) + ); let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) { None @@ -727,12 +709,11 @@ impl Py { self.builtins.clone(), self.closure.as_ref().map_or(&[], |c| c.as_slice()), Some(self.to_owned().into()), - true, // Always use datastack (invoke_exact_args is never gen/coro) + true, // Exact-args fast path is only used for non-gen/coro functions. vm, ) .into_ref(&vm.ctx); - // Move args directly into fastlocals (no clone/refcount needed) { let fastlocals = unsafe { frame.fastlocals_mut() }; for (slot, arg) in fastlocals.iter_mut().zip(args.drain(..)) { @@ -740,7 +721,6 @@ impl Py { } } - // Handle cell2arg if let Some(cell2arg) = code.cell2arg.as_deref() { let fastlocals = unsafe { frame.fastlocals_mut() }; for (cell_idx, arg_idx) in cell2arg.iter().enumerate().filter(|(_, i)| **i != -1) { @@ -749,6 +729,36 @@ impl Py { } } + frame + } + + /// Fast path for calling a simple function with exact positional args. + /// Skips FuncArgs allocation, prepend_arg, and fill_locals_from_args. + /// Only valid when: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonlyargs, + /// and nargs == co_argcount. + pub fn invoke_exact_args(&self, args: Vec, vm: &VirtualMachine) -> PyResult { + let code: PyRef = (*self.code).to_owned(); + + debug_assert_eq!(args.len(), code.arg_count as usize); + debug_assert!(code.flags.contains(bytecode::CodeFlags::OPTIMIZED)); + debug_assert!( + !code + .flags + .intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS) + ); + debug_assert_eq!(code.kwonlyarg_count, 0); + + // Generator/coroutine code objects are SIMPLE_FUNCTION in call + // specialization classification, but their call path must still + // go through invoke() to produce generator/coroutine objects. + if code + .flags + .intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE) + { + return self.invoke(FuncArgs::from(args), vm); + } + let frame = self.prepare_exact_args_frame(args, vm); + let result = vm.run_frame(frame.clone()); unsafe { if let Some(base) = frame.materialize_localsplus() { @@ -759,6 +769,22 @@ impl Py { } } +pub(crate) fn datastack_frame_size_bytes_for_code(code: &Py) -> Option { + if code + .flags + .intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE) + { + return None; + } + let nlocalsplus = code + .varnames + .len() + .checked_add(code.cellvars.len())? + .checked_add(code.freevars.len())?; + let capacity = nlocalsplus.checked_add(code.max_stackdepth as usize)?; + capacity.checked_mul(core::mem::size_of::()) +} + impl PyPayload for PyFunction { #[inline] fn class(ctx: &Context) -> &'static Py { @@ -1351,6 +1377,7 @@ pub(crate) fn vectorcall_function( let has_kwargs = kwnames.is_some_and(|kw| !kw.is_empty()); let is_simple = !has_kwargs + && code.flags.contains(bytecode::CodeFlags::OPTIMIZED) && !code.flags.contains(bytecode::CodeFlags::VARARGS) && !code.flags.contains(bytecode::CodeFlags::VARKEYWORDS) && code.kwonlyarg_count == 0 @@ -1361,37 +1388,8 @@ pub(crate) fn vectorcall_function( if is_simple && nargs == code.arg_count as usize { // FAST PATH: simple positional-only call, exact arg count. // Move owned args directly into fastlocals — no clone needed. - let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) { - None // lazy allocation — most frames never access locals dict - } else { - Some(ArgMapping::from_dict_exact(zelf.globals.clone())) - }; - - let frame = Frame::new( - code.to_owned(), - Scope::new(locals, zelf.globals.clone()), - zelf.builtins.clone(), - zelf.closure.as_ref().map_or(&[], |c| c.as_slice()), - Some(zelf.to_owned().into()), - true, // Always use datastack (is_simple excludes gen/coro) - vm, - ) - .into_ref(&vm.ctx); - - { - let fastlocals = unsafe { frame.fastlocals_mut() }; - for (slot, arg) in fastlocals.iter_mut().zip(args.drain(..nargs)) { - *slot = Some(arg); - } - } - - if let Some(cell2arg) = code.cell2arg.as_deref() { - let fastlocals = unsafe { frame.fastlocals_mut() }; - for (cell_idx, arg_idx) in cell2arg.iter().enumerate().filter(|(_, i)| **i != -1) { - let x = fastlocals[*arg_idx as usize].take(); - frame.set_cell_contents(cell_idx, x); - } - } + args.truncate(nargs); + let frame = zelf.prepare_exact_args_frame(args, vm); let result = vm.run_frame(frame.clone()); unsafe { diff --git a/crates/vm/src/builtins/str.rs b/crates/vm/src/builtins/str.rs index 6322c5bee7f..8e98fc6e5c4 100644 --- a/crates/vm/src/builtins/str.rs +++ b/crates/vm/src/builtins/str.rs @@ -1691,7 +1691,7 @@ impl ToPyObject for char { fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef { let cp = self as u32; if cp <= u8::MAX as u32 { - vm.ctx.latin1_char_cache[cp as usize].clone().into() + vm.ctx.latin1_char(cp as u8).into() } else { vm.ctx.new_str(self).into() } @@ -1702,7 +1702,7 @@ impl ToPyObject for CodePoint { fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef { let cp = self.to_u32(); if cp <= u8::MAX as u32 { - vm.ctx.latin1_char_cache[cp as usize].clone().into() + vm.ctx.latin1_char(cp as u8).into() } else { vm.ctx.new_str(self).into() } @@ -1747,7 +1747,7 @@ impl ToPyObject for AsciiString { impl ToPyObject for AsciiChar { fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef { - vm.ctx.new_str(self).into() + vm.ctx.latin1_char(u8::from(self)).into() } } diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index f383441e4ef..3a1cebb9d10 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -3,8 +3,8 @@ use super::{ PyUtf8StrRef, PyWeak, mappingproxy::PyMappingProxy, object, union_, }; use crate::{ - AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, - VirtualMachine, + AsObject, Context, Py, PyAtomicRef, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, + TryFromObject, VirtualMachine, builtins::{ PyBaseExceptionRef, descriptor::{ @@ -18,7 +18,7 @@ use crate::{ common::{ ascii, borrow::BorrowedValue, - lock::{PyRwLock, PyRwLockReadGuard}, + lock::{PyMutex, PyRwLock, PyRwLockReadGuard}, }, function::{FuncArgs, KwArgs, OptionalArg, PyMethodDef, PySetterValue}, object::{Traverse, TraverseFn}, @@ -228,8 +228,7 @@ unsafe impl crate::object::Traverse for PyType { .map(|(_, v)| v.traverse(tracer_fn)) .count(); if let Some(ext) = self.heaptype_ext.as_ref() { - ext.specialization_init.read().traverse(tracer_fn); - ext.specialization_getitem.read().traverse(tracer_fn); + ext.specialization_cache.traverse(tracer_fn); } } @@ -259,18 +258,7 @@ unsafe impl crate::object::Traverse for PyType { } } if let Some(ext) = self.heaptype_ext.as_ref() { - if let Some(mut guard) = ext.specialization_init.try_write() - && let Some(init) = guard.take() - { - out.push(init.into()); - } - if let Some(mut guard) = ext.specialization_getitem.try_write() - && let Some(getitem) = guard.take() - { - out.push(getitem.into()); - ext.specialization_getitem_version - .store(0, Ordering::Release); - } + ext.specialization_cache.clear_into(out); } } } @@ -281,9 +269,99 @@ pub struct HeapTypeExt { pub qualname: PyRwLock, pub slots: Option>>, pub type_data: PyRwLock>, - pub specialization_init: PyRwLock>>, - pub specialization_getitem: PyRwLock>>, - pub specialization_getitem_version: AtomicU32, + pub specialization_cache: TypeSpecializationCache, +} + +pub struct TypeSpecializationCache { + pub init: PyAtomicRef>, + pub getitem: PyAtomicRef>, + pub getitem_version: AtomicU32, + // Serialize cache writes/invalidation similar to CPython's BEGIN_TYPE_LOCK. + write_lock: PyMutex<()>, + retired: PyRwLock>, +} + +impl TypeSpecializationCache { + fn new() -> Self { + Self { + init: PyAtomicRef::from(None::>), + getitem: PyAtomicRef::from(None::>), + getitem_version: AtomicU32::new(0), + write_lock: PyMutex::new(()), + retired: PyRwLock::new(Vec::new()), + } + } + + #[inline] + fn retire_old_function(&self, old: Option>) { + if let Some(old) = old { + self.retired.write().push(old.into()); + } + } + + #[inline] + fn swap_init(&self, new_init: Option>, vm: Option<&VirtualMachine>) { + if let Some(vm) = vm { + // Keep replaced refs alive for the currently executing frame, matching + // CPython-style "old pointer remains valid during ongoing execution" + // without accumulating global retired refs. + self.init.swap_to_temporary_refs(new_init, vm); + return; + } + // SAFETY: old value is moved to `retired`, so it stays alive while + // concurrent readers may still hold borrowed references. + let old = unsafe { self.init.swap(new_init) }; + self.retire_old_function(old); + } + + #[inline] + fn swap_getitem(&self, new_getitem: Option>, vm: Option<&VirtualMachine>) { + if let Some(vm) = vm { + self.getitem.swap_to_temporary_refs(new_getitem, vm); + return; + } + // SAFETY: old value is moved to `retired`, so it stays alive while + // concurrent readers may still hold borrowed references. + let old = unsafe { self.getitem.swap(new_getitem) }; + self.retire_old_function(old); + } + + #[inline] + fn invalidate_for_type_modified(&self) { + let _guard = self.write_lock.lock(); + // _spec_cache contract: type modification invalidates all cached + // specialization functions. + self.swap_init(None, None); + self.swap_getitem(None, None); + } + + fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { + if let Some(init) = self.init.deref() { + tracer_fn(init.as_object()); + } + if let Some(getitem) = self.getitem.deref() { + tracer_fn(getitem.as_object()); + } + self.retired + .read() + .iter() + .map(|obj| obj.traverse(tracer_fn)) + .count(); + } + + fn clear_into(&self, out: &mut Vec) { + let _guard = self.write_lock.lock(); + let old_init = unsafe { self.init.swap(None) }; + if let Some(old_init) = old_init { + out.push(old_init.into()); + } + let old_getitem = unsafe { self.getitem.swap(None) }; + if let Some(old_getitem) = old_getitem { + out.push(old_getitem.into()); + } + self.getitem_version.store(0, Ordering::Release); + out.extend(self.retired.write().drain(..)); + } } pub struct PointerSlot(NonNull); @@ -412,10 +490,7 @@ impl PyType { /// Invalidate this type's version tag and cascade to all subclasses. pub fn modified(&self) { if let Some(ext) = self.heaptype_ext.as_ref() { - *ext.specialization_init.write() = None; - *ext.specialization_getitem.write() = None; - ext.specialization_getitem_version - .store(0, Ordering::Release); + ext.specialization_cache.invalidate_for_type_modified(); } // If already invalidated, all subclasses must also be invalidated // (guaranteed by the MRO invariant in assign_version_tag). @@ -470,9 +545,7 @@ impl PyType { qualname: PyRwLock::new(name), slots: None, type_data: PyRwLock::new(None), - specialization_init: PyRwLock::new(None), - specialization_getitem: PyRwLock::new(None), - specialization_getitem_version: AtomicU32::new(0), + specialization_cache: TypeSpecializationCache::new(), }; let base = bases[0].clone(); @@ -831,6 +904,7 @@ impl PyType { &self, init: PyRef, tp_version: u32, + vm: &VirtualMachine, ) -> bool { let Some(ext) = self.heaptype_ext.as_ref() else { return false; @@ -838,11 +912,14 @@ impl PyType { if tp_version == 0 { return false; } - let mut guard = ext.specialization_init.write(); if self.tp_version_tag.load(Ordering::Acquire) != tp_version { return false; } - *guard = Some(init); + let _guard = ext.specialization_cache.write_lock.lock(); + if self.tp_version_tag.load(Ordering::Acquire) != tp_version { + return false; + } + ext.specialization_cache.swap_init(Some(init), Some(vm)); true } @@ -855,11 +932,12 @@ impl PyType { if tp_version == 0 { return None; } - let guard = ext.specialization_init.read(); if self.tp_version_tag.load(Ordering::Acquire) != tp_version { return None; } - guard.as_ref().map(|init| init.to_owned()) + ext.specialization_cache + .init + .to_owned_ordering(Ordering::Acquire) } /// Cache __getitem__ for BINARY_OP_SUBSCR_GETITEM specialization. @@ -868,6 +946,7 @@ impl PyType { &self, getitem: PyRef, tp_version: u32, + vm: &VirtualMachine, ) -> bool { let Some(ext) = self.heaptype_ext.as_ref() else { return false; @@ -875,34 +954,38 @@ impl PyType { if tp_version == 0 { return false; } - let func_version = getitem.get_version_for_current_state(); - if func_version == 0 { + let _guard = ext.specialization_cache.write_lock.lock(); + if self.tp_version_tag.load(Ordering::Acquire) != tp_version { return false; } - let mut guard = ext.specialization_getitem.write(); - if self.tp_version_tag.load(Ordering::Acquire) != tp_version { + let func_version = getitem.get_version_for_current_state(); + if func_version == 0 { return false; } - *guard = Some(getitem); - ext.specialization_getitem_version - .store(func_version, Ordering::Release); + ext.specialization_cache + .swap_getitem(Some(getitem), Some(vm)); + ext.specialization_cache + .getitem_version + .store(func_version, Ordering::Relaxed); true } /// Read cached __getitem__ for BINARY_OP_SUBSCR_GETITEM specialization. pub(crate) fn get_cached_getitem_for_specialization(&self) -> Option<(PyRef, u32)> { let ext = self.heaptype_ext.as_ref()?; - let cached_version = ext.specialization_getitem_version.load(Ordering::Acquire); + // Match CPython check order: pointer (Acquire) then function version. + let getitem = ext + .specialization_cache + .getitem + .to_owned_ordering(Ordering::Acquire)?; + let cached_version = ext + .specialization_cache + .getitem_version + .load(Ordering::Relaxed); if cached_version == 0 { return None; } - let guard = ext.specialization_getitem.read(); - if self.tp_version_tag.load(Ordering::Acquire) == 0 { - return None; - } - guard - .as_ref() - .map(|getitem| (getitem.to_owned(), cached_version)) + Some((getitem, cached_version)) } pub fn get_direct_attr(&self, attr_name: &'static PyStrInterned) -> Option { @@ -2001,9 +2084,7 @@ impl Constructor for PyType { qualname: PyRwLock::new(qualname), slots: heaptype_slots.clone(), type_data: PyRwLock::new(None), - specialization_init: PyRwLock::new(None), - specialization_getitem: PyRwLock::new(None), - specialization_getitem_version: AtomicU32::new(0), + specialization_cache: TypeSpecializationCache::new(), }; (slots, heaptype_ext) }; diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 7aea8927663..fc61a8b3899 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1,3 +1,5 @@ +// spell-checker: ignore compactlong compactlongs + use crate::anystr::AnyStr; #[cfg(feature = "flame")] use crate::bytecode::InstructionMetadata; @@ -12,7 +14,10 @@ use crate::{ builtin_func::PyNativeFunction, descriptor::{MemberGetter, PyMemberDescriptor, PyMethodDescriptor}, frame::stack_analysis, - function::{PyBoundMethod, PyCell, PyCellRef, PyFunction, vectorcall_function}, + function::{ + PyBoundMethod, PyCell, PyCellRef, PyFunction, datastack_frame_size_bytes_for_code, + vectorcall_function, + }, list::PyListIterator, range::PyRangeIterator, tuple::{PyTuple, PyTupleIterator, PyTupleRef}, @@ -1045,6 +1050,208 @@ struct ExecutingFrame<'a> { monitoring_mask: u32, } +#[inline] +fn specialization_compact_int_value(i: &PyInt, vm: &VirtualMachine) -> Option { + // _PyLong_IsCompact(): a one-digit PyLong (base 2^30), + // i.e. abs(value) <= 2^30 - 1. + const CPYTHON_COMPACT_LONG_ABS_MAX: i64 = (1i64 << 30) - 1; + let v = i.try_to_primitive::(vm).ok()?; + if (-CPYTHON_COMPACT_LONG_ABS_MAX..=CPYTHON_COMPACT_LONG_ABS_MAX).contains(&v) { + Some(v as isize) + } else { + None + } +} + +#[inline] +fn compact_int_from_obj(obj: &PyObject, vm: &VirtualMachine) -> Option { + obj.downcast_ref_if_exact::(vm) + .and_then(|i| specialization_compact_int_value(i, vm)) +} + +#[inline] +fn exact_float_from_obj(obj: &PyObject, vm: &VirtualMachine) -> Option { + obj.downcast_ref_if_exact::(vm).map(|f| f.to_f64()) +} + +#[inline] +fn specialization_nonnegative_compact_index(i: &PyInt, vm: &VirtualMachine) -> Option { + // _PyLong_IsNonNegativeCompact(): a single base-2^30 digit. + const CPYTHON_COMPACT_LONG_MAX: u64 = (1u64 << 30) - 1; + let v = i.try_to_primitive::(vm).ok()?; + if v <= CPYTHON_COMPACT_LONG_MAX { + Some(v as usize) + } else { + None + } +} + +fn release_datastack_frame(frame: &Py, vm: &VirtualMachine) { + unsafe { + if let Some(base) = frame.materialize_localsplus() { + vm.datastack_pop(base); + } + } +} + +type BinaryOpExtendGuard = fn(&PyObject, &PyObject, &VirtualMachine) -> bool; +type BinaryOpExtendAction = fn(&PyObject, &PyObject, &VirtualMachine) -> Option; + +struct BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator, + guard: BinaryOpExtendGuard, + action: BinaryOpExtendAction, +} + +const BINARY_OP_EXTEND_EXTERNAL_CACHE_OFFSET: usize = 1; + +#[inline] +fn compactlongs_guard(lhs: &PyObject, rhs: &PyObject, vm: &VirtualMachine) -> bool { + compact_int_from_obj(lhs, vm).is_some() && compact_int_from_obj(rhs, vm).is_some() +} + +macro_rules! bitwise_longs_action { + ($name:ident, $op:tt) => { + #[inline] + fn $name(lhs: &PyObject, rhs: &PyObject, vm: &VirtualMachine) -> Option { + let lhs_val = compact_int_from_obj(lhs, vm)?; + let rhs_val = compact_int_from_obj(rhs, vm)?; + Some(vm.ctx.new_int(lhs_val $op rhs_val).into()) + } + }; +} +bitwise_longs_action!(compactlongs_or, |); +bitwise_longs_action!(compactlongs_and, &); +bitwise_longs_action!(compactlongs_xor, ^); + +#[inline] +fn float_compactlong_guard(lhs: &PyObject, rhs: &PyObject, vm: &VirtualMachine) -> bool { + exact_float_from_obj(lhs, vm).is_some_and(|f| !f.is_nan()) + && compact_int_from_obj(rhs, vm).is_some() +} + +#[inline] +fn nonzero_float_compactlong_guard(lhs: &PyObject, rhs: &PyObject, vm: &VirtualMachine) -> bool { + float_compactlong_guard(lhs, rhs, vm) && compact_int_from_obj(rhs, vm).is_some_and(|v| v != 0) +} + +macro_rules! float_long_action { + ($name:ident, $op:tt) => { + #[inline] + fn $name(lhs: &PyObject, rhs: &PyObject, vm: &VirtualMachine) -> Option { + let lhs_val = exact_float_from_obj(lhs, vm)?; + let rhs_val = compact_int_from_obj(rhs, vm)?; + Some(vm.ctx.new_float(lhs_val $op rhs_val as f64).into()) + } + }; +} +float_long_action!(float_compactlong_add, +); +float_long_action!(float_compactlong_subtract, -); +float_long_action!(float_compactlong_multiply, *); +float_long_action!(float_compactlong_true_div, /); + +#[inline] +fn compactlong_float_guard(lhs: &PyObject, rhs: &PyObject, vm: &VirtualMachine) -> bool { + compact_int_from_obj(lhs, vm).is_some() + && exact_float_from_obj(rhs, vm).is_some_and(|f| !f.is_nan()) +} + +#[inline] +fn nonzero_compactlong_float_guard(lhs: &PyObject, rhs: &PyObject, vm: &VirtualMachine) -> bool { + compactlong_float_guard(lhs, rhs, vm) && exact_float_from_obj(rhs, vm).is_some_and(|f| f != 0.0) +} + +macro_rules! long_float_action { + ($name:ident, $op:tt) => { + #[inline] + fn $name(lhs: &PyObject, rhs: &PyObject, vm: &VirtualMachine) -> Option { + let lhs_val = compact_int_from_obj(lhs, vm)?; + let rhs_val = exact_float_from_obj(rhs, vm)?; + Some(vm.ctx.new_float(lhs_val as f64 $op rhs_val).into()) + } + }; +} +long_float_action!(compactlong_float_add, +); +long_float_action!(compactlong_float_subtract, -); +long_float_action!(compactlong_float_multiply, *); +long_float_action!(compactlong_float_true_div, /); + +static BINARY_OP_EXTEND_DESCRIPTORS: &[BinaryOpExtendSpecializationDescr] = &[ + // long-long arithmetic + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::Or, + guard: compactlongs_guard, + action: compactlongs_or, + }, + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::And, + guard: compactlongs_guard, + action: compactlongs_and, + }, + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::Xor, + guard: compactlongs_guard, + action: compactlongs_xor, + }, + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::InplaceOr, + guard: compactlongs_guard, + action: compactlongs_or, + }, + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::InplaceAnd, + guard: compactlongs_guard, + action: compactlongs_and, + }, + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::InplaceXor, + guard: compactlongs_guard, + action: compactlongs_xor, + }, + // float-long arithmetic + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::Add, + guard: float_compactlong_guard, + action: float_compactlong_add, + }, + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::Subtract, + guard: float_compactlong_guard, + action: float_compactlong_subtract, + }, + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::TrueDivide, + guard: nonzero_float_compactlong_guard, + action: float_compactlong_true_div, + }, + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::Multiply, + guard: float_compactlong_guard, + action: float_compactlong_multiply, + }, + // long-float arithmetic + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::Add, + guard: compactlong_float_guard, + action: compactlong_float_add, + }, + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::Subtract, + guard: compactlong_float_guard, + action: compactlong_float_subtract, + }, + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::TrueDivide, + guard: nonzero_compactlong_float_guard, + action: compactlong_float_true_div, + }, + BinaryOpExtendSpecializationDescr { + oparg: bytecode::BinaryOperator::Multiply, + guard: compactlong_float_guard, + action: compactlong_float_multiply, + }, +]; + impl fmt::Debug for ExecutingFrame<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ExecutingFrame") @@ -1055,6 +1262,57 @@ impl fmt::Debug for ExecutingFrame<'_> { } impl ExecutingFrame<'_> { + #[inline] + fn monitoring_disabled_for_code(&self, vm: &VirtualMachine) -> bool { + self.code.is(&vm.ctx.init_cleanup_code) + } + + fn specialization_new_init_cleanup_frame(&self, vm: &VirtualMachine) -> FrameRef { + Frame::new( + vm.ctx.init_cleanup_code.clone(), + Scope::new( + Some(ArgMapping::from_dict_exact(vm.ctx.new_dict())), + self.globals.clone(), + ), + self.builtins.clone(), + &[], + None, + true, + vm, + ) + .into_ref(&vm.ctx) + } + + fn specialization_run_init_cleanup_shim( + &self, + new_obj: PyObjectRef, + init_func: &Py, + pos_args: Vec, + vm: &VirtualMachine, + ) -> PyResult { + let shim = self.specialization_new_init_cleanup_frame(vm); + let shim_result = vm.with_frame_untraced(shim.clone(), |shim| { + shim.with_exec(vm, |mut exec| exec.push_value(new_obj.clone())); + + let mut all_args = Vec::with_capacity(pos_args.len() + 1); + all_args.push(new_obj.clone()); + all_args.extend(pos_args); + + let init_frame = init_func.prepare_exact_args_frame(all_args, vm); + let init_result = vm.run_frame(init_frame.clone()); + release_datastack_frame(&init_frame, vm); + let init_result = init_result?; + + shim.with_exec(vm, |mut exec| exec.push_value(init_result)); + match shim.run(vm)? { + ExecutionResult::Return(value) => Ok(value), + ExecutionResult::Yield(_) => unreachable!("_Py_InitCleanup shim cannot yield"), + } + }); + release_datastack_frame(&shim, vm); + shim_result + } + #[inline(always)] fn update_lasti(&mut self, f: impl FnOnce(&mut u32)) { let mut val = self.lasti.load(Relaxed); @@ -2896,6 +3154,17 @@ impl ExecutingFrame<'_> { self.code.instructions.quicken(); atomic::fence(atomic::Ordering::Release); } + if self.monitoring_disabled_for_code(vm) { + let global_ver = vm + .state + .instrumentation_version + .load(atomic::Ordering::Acquire); + monitoring::instrument_code(self.code, 0); + self.code + .instrumentation_version + .store(global_ver, atomic::Ordering::Release); + return Ok(None); + } // Check if bytecode needs re-instrumentation let global_ver = vm .state @@ -3732,7 +4001,7 @@ impl ExecutingFrame<'_> { let value = self.pop_value(); if let Some(list) = obj.downcast_ref_if_exact::(vm) && let Some(int_idx) = idx.downcast_ref_if_exact::(vm) - && let Some(i) = Self::specialization_nonnegative_compact_index(int_idx, vm) + && let Some(i) = specialization_nonnegative_compact_index(int_idx, vm) { let mut vec = list.borrow_vec_mut(); if i < vec.len() { @@ -3812,133 +4081,12 @@ impl ExecutingFrame<'_> { let op = self.binary_op_from_arg(arg); let b = self.top_value(); let a = self.nth_value(1); - - let fast = match op { - bytecode::BinaryOperator::And | bytecode::BinaryOperator::InplaceAnd => { - if let (Some(a_int), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) && let (Some(a_val), Some(b_val)) = ( - Self::specialization_compact_int_value(a_int, vm), - Self::specialization_compact_int_value(b_int, vm), - ) { - Some(vm.ctx.new_int(a_val & b_val).into()) - } else { - None - } - } - bytecode::BinaryOperator::Or | bytecode::BinaryOperator::InplaceOr => { - if let (Some(a_int), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) && let (Some(a_val), Some(b_val)) = ( - Self::specialization_compact_int_value(a_int, vm), - Self::specialization_compact_int_value(b_int, vm), - ) { - Some(vm.ctx.new_int(a_val | b_val).into()) - } else { - None - } - } - bytecode::BinaryOperator::Xor | bytecode::BinaryOperator::InplaceXor => { - if let (Some(a_int), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) && let (Some(a_val), Some(b_val)) = ( - Self::specialization_compact_int_value(a_int, vm), - Self::specialization_compact_int_value(b_int, vm), - ) { - Some(vm.ctx.new_int(a_val ^ b_val).into()) - } else { - None - } - } - bytecode::BinaryOperator::Add => { - if let (Some(a_float), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) && let Some(b_val) = Self::specialization_compact_int_value(b_int, vm) - && !a_float.to_f64().is_nan() - { - Some(vm.ctx.new_float(a_float.to_f64() + b_val as f64).into()) - } else if let (Some(a_int), Some(b_float)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) && let Some(a_val) = - Self::specialization_compact_int_value(a_int, vm) - && !b_float.to_f64().is_nan() - { - Some(vm.ctx.new_float(a_val as f64 + b_float.to_f64()).into()) - } else { - None - } - } - bytecode::BinaryOperator::Subtract => { - if let (Some(a_float), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) && let Some(b_val) = Self::specialization_compact_int_value(b_int, vm) - && !a_float.to_f64().is_nan() - { - Some(vm.ctx.new_float(a_float.to_f64() - b_val as f64).into()) - } else if let (Some(a_int), Some(b_float)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) && let Some(a_val) = - Self::specialization_compact_int_value(a_int, vm) - && !b_float.to_f64().is_nan() - { - Some(vm.ctx.new_float(a_val as f64 - b_float.to_f64()).into()) - } else { - None - } - } - bytecode::BinaryOperator::Multiply => { - if let (Some(a_float), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) && let Some(b_val) = Self::specialization_compact_int_value(b_int, vm) - && !a_float.to_f64().is_nan() - { - Some(vm.ctx.new_float(a_float.to_f64() * b_val as f64).into()) - } else if let (Some(a_int), Some(b_float)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) && let Some(a_val) = - Self::specialization_compact_int_value(a_int, vm) - && !b_float.to_f64().is_nan() - { - Some(vm.ctx.new_float(a_val as f64 * b_float.to_f64()).into()) - } else { - None - } - } - bytecode::BinaryOperator::TrueDivide => { - if let (Some(a_float), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) && let Some(b_val) = Self::specialization_compact_int_value(b_int, vm) - && b_val != 0 - && !a_float.to_f64().is_nan() - { - Some(vm.ctx.new_float(a_float.to_f64() / b_val as f64).into()) - } else if let (Some(a_int), Some(b_float)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) && let Some(a_val) = - Self::specialization_compact_int_value(a_int, vm) - && b_float.to_f64() != 0.0 - && !b_float.to_f64().is_nan() - { - Some(vm.ctx.new_float(a_val as f64 / b_float.to_f64()).into()) - } else { - None - } - } - _ => None, - }; - - if let Some(result) = fast { + let cache_base = self.lasti() as usize; + if let Some(descr) = self.read_cached_binary_op_extend_descr(cache_base) + && descr.oparg == op + && (descr.guard)(a, b, vm) + && let Some(result) = (descr.action)(a, b, vm) + { self.pop_value(); self.pop_value(); self.push_value(result); @@ -3953,7 +4101,7 @@ impl ExecutingFrame<'_> { if let (Some(list), Some(idx)) = ( a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), - ) && let Some(i) = Self::specialization_nonnegative_compact_index(idx, vm) + ) && let Some(i) = specialization_nonnegative_compact_index(idx, vm) { let vec = list.borrow_vec(); if i < vec.len() { @@ -3973,7 +4121,7 @@ impl ExecutingFrame<'_> { if let (Some(tuple), Some(idx)) = ( a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), - ) && let Some(i) = Self::specialization_nonnegative_compact_index(idx, vm) + ) && let Some(i) = specialization_nonnegative_compact_index(idx, vm) { let elements = tuple.as_slice(); if i < elements.len() { @@ -4015,7 +4163,7 @@ impl ExecutingFrame<'_> { if let (Some(a_str), Some(b_int)) = ( a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), - ) && let Some(i) = Self::specialization_nonnegative_compact_index(b_int, vm) + ) && let Some(i) = specialization_nonnegative_compact_index(b_int, vm) && let Ok(ch) = a_str.getitem_by_index(vm, i as isize) && ch.is_ascii() { @@ -4455,7 +4603,6 @@ impl ExecutingFrame<'_> { let self_index = stack_len - nargs as usize - 1 + usize::from(!self_or_null_is_some); if let Some(descr) = callable.downcast_ref_if_exact::(vm) - && descr.method.flags.contains(PyMethodFlags::METHOD) && (descr.method.flags & (PyMethodFlags::VARARGS | PyMethodFlags::FASTCALL @@ -4503,7 +4650,6 @@ impl ExecutingFrame<'_> { let self_index = stack_len - nargs as usize - 1 + usize::from(!self_or_null_is_some); if let Some(descr) = callable.downcast_ref_if_exact::(vm) - && descr.method.flags.contains(PyMethodFlags::METHOD) && (descr.method.flags & (PyMethodFlags::VARARGS | PyMethodFlags::FASTCALL @@ -4551,7 +4697,6 @@ impl ExecutingFrame<'_> { stack_len - nargs as usize - 1 + usize::from(!self_or_null_is_some); if total_nargs > 0 && let Some(descr) = callable.downcast_ref_if_exact::(vm) - && descr.method.flags.contains(PyMethodFlags::METHOD) && (descr.method.flags & (PyMethodFlags::VARARGS | PyMethodFlags::FASTCALL @@ -4631,17 +4776,25 @@ impl ExecutingFrame<'_> { && let Some(init_func) = cls.get_cached_init_for_specialization(cached_version) && let Some(cls_alloc) = cls.slots.alloc.load() { - // co_framesize + _Py_InitCleanup.co_framesize guard. - // We do not materialize frame-specials on datastack, so use - // only the cleanup shim's eval-stack payload (2 stack slots). - const INIT_CLEANUP_STACK_BYTES: usize = 2 * core::mem::size_of::(); + // Match CPython's `code->co_framesize + _Py_InitCleanup.co_framesize` + // shape, using RustPython's datastack-backed frame size + // equivalent for the extra shim frame. + let init_cleanup_stack_bytes = + datastack_frame_size_bytes_for_code(&vm.ctx.init_cleanup_code) + .expect("_Py_InitCleanup shim is not a generator/coroutine"); if !self.specialization_has_datastack_space_for_func_with_extra( vm, &init_func, - INIT_CLEANUP_STACK_BYTES, + init_cleanup_stack_bytes, ) { return self.execute_call_vectorcall(nargs, vm); } + // CPython creates `_Py_InitCleanup` + `__init__` frames here. + // Keep the guard conservative and deopt when the effective + // recursion budget for those two frames is not available. + if self.specialization_call_recursion_guard_with_extra_frames(vm, 1) { + return self.execute_call_vectorcall(nargs, vm); + } // Allocate object directly (tp_new == object.__new__, tp_alloc == generic). let cls_ref = cls.to_owned(); let new_obj = cls_alloc(cls_ref, 0, vm)?; @@ -4650,25 +4803,9 @@ impl ExecutingFrame<'_> { let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); let _null = self.pop_value_opt(); // self_or_null (None) let _callable = self.pop_value(); // callable (type) - - let mut all_args = Vec::with_capacity(pos_args.len() + 1); - all_args.push(new_obj.clone()); - all_args.extend(pos_args); - - let init_callable: PyObjectRef = init_func.into(); - let effective_nargs = all_args.len(); - let init_result = - vectorcall_function(&init_callable, all_args, effective_nargs, None, vm)?; - - // EXIT_INIT_CHECK: __init__ must return None - if !vm.is_none(&init_result) { - return Err(vm.new_type_error(format!( - "__init__() should return None, not '{}'", - init_result.class().name() - ))); - } - - self.push_value(new_obj); + let result = self + .specialization_run_init_cleanup_shim(new_obj, &init_func, pos_args, vm)?; + self.push_value(result); return Ok(None); } self.execute_call_vectorcall(nargs, vm) @@ -4687,7 +4824,6 @@ impl ExecutingFrame<'_> { stack_len - nargs as usize - 1 + usize::from(!self_or_null_is_some); if total_nargs > 0 && let Some(descr) = callable.downcast_ref_if_exact::(vm) - && descr.method.flags.contains(PyMethodFlags::METHOD) && (descr.method.flags & (PyMethodFlags::VARARGS | PyMethodFlags::FASTCALL @@ -5057,8 +5193,8 @@ impl ExecutingFrame<'_> { a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), ) && let (Some(a_val), Some(b_val)) = ( - Self::specialization_compact_int_value(a_int, vm), - Self::specialization_compact_int_value(b_int, vm), + specialization_compact_int_value(a_int, vm), + specialization_compact_int_value(b_int, vm), ) { let op = self.compare_op_from_arg(arg); let result = op.eval_ord(a_val.cmp(&b_val)); @@ -5463,6 +5599,18 @@ impl ExecutingFrame<'_> { instruction.is_instrumented(), "execute_instrumented called with non-instrumented opcode {instruction:?}" ); + if self.monitoring_disabled_for_code(vm) { + let global_ver = vm + .state + .instrumentation_version + .load(atomic::Ordering::Acquire); + monitoring::instrument_code(self.code, 0); + self.code + .instrumentation_version + .store(global_ver, atomic::Ordering::Release); + self.update_lasti(|i| *i -= 1); + return Ok(None); + } self.monitoring_mask = vm.state.monitoring_events.load(); match instruction { Instruction::InstrumentedResume => { @@ -7094,6 +7242,51 @@ impl ExecutingFrame<'_> { } } + #[inline] + unsafe fn write_cached_binary_op_extend_descr( + &self, + cache_base: usize, + descr: Option<&'static BinaryOpExtendSpecializationDescr>, + ) { + let ptr = descr.map_or(0, |d| { + d as *const BinaryOpExtendSpecializationDescr as usize + }); + unsafe { + self.code + .instructions + .write_cache_ptr(cache_base + BINARY_OP_EXTEND_EXTERNAL_CACHE_OFFSET, ptr); + } + } + + #[inline] + fn read_cached_binary_op_extend_descr( + &self, + cache_base: usize, + ) -> Option<&'static BinaryOpExtendSpecializationDescr> { + let ptr = self + .code + .instructions + .read_cache_ptr(cache_base + BINARY_OP_EXTEND_EXTERNAL_CACHE_OFFSET); + if ptr == 0 { + return None; + } + // SAFETY: We only store pointers to entries in `BINARY_OP_EXTEND_DESCRIPTORS`. + Some(unsafe { &*(ptr as *const BinaryOpExtendSpecializationDescr) }) + } + + #[inline] + fn binary_op_extended_specialization( + &self, + op: bytecode::BinaryOperator, + lhs: &PyObject, + rhs: &PyObject, + vm: &VirtualMachine, + ) -> Option<&'static BinaryOpExtendSpecializationDescr> { + BINARY_OP_EXTEND_DESCRIPTORS + .iter() + .find(|d| d.oparg == op && (d.guard)(lhs, rhs, vm)) + } + fn load_attr(&mut self, vm: &VirtualMachine, oparg: LoadAttr) -> FrameResult { self.adaptive(|s, ii, cb| s.specialize_load_attr(vm, oparg, ii, cb)); self.load_attr_slow(vm, oparg) @@ -7187,19 +7380,38 @@ impl ExecutingFrame<'_> { return; } - // Module attribute access: use LoadAttrModule - if obj.downcast_ref_if_exact::(_vm).is_some() { - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, type_version); + let attr_name = self.code.names[oparg.name_idx() as usize]; + + // Match CPython: only specialize module attribute loads when the + // current module dict has no __getattr__ override and the attribute is + // already present. + if let Some(module) = obj.downcast_ref_if_exact::(_vm) { + let module_dict = module.dict(); + match ( + module_dict.get_item_opt(identifier!(_vm, __getattr__), _vm), + module_dict.get_item_opt(attr_name, _vm), + ) { + (Ok(None), Ok(Some(_))) => { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + } + self.specialize_at(instr_idx, cache_base, Instruction::LoadAttrModule); + } + (Ok(_), Ok(_)) => self.cooldown_adaptive_at(cache_base), + _ => unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + }, } - self.specialize_at(instr_idx, cache_base, Instruction::LoadAttrModule); return; } - let attr_name = self.code.names[oparg.name_idx() as usize]; - // Look up attr in class via MRO let cls_attr = cls.get_attr(attr_name); let class_has_dict = cls.slots.flags.has_feature(PyTypeFlags::HAS_DICT); @@ -7309,8 +7521,11 @@ impl ExecutingFrame<'_> { Instruction::LoadAttrNondescriptorWithValues, ); } else { - // No class attr, must be in instance dict - let use_hint = if let Some(dict) = obj.dict() { + // Match CPython ABSENT/no-shadow behavior: if the + // attribute is missing on both the class and the current + // instance, keep the generic opcode and just enter + // cooldown instead of specializing a repeated miss path. + let has_instance_attr = if let Some(dict) = obj.dict() { match dict.get_item_opt(attr_name, _vm) { Ok(Some(_)) => true, Ok(None) => false, @@ -7331,20 +7546,16 @@ impl ExecutingFrame<'_> { } else { false }; - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, type_version); + if has_instance_attr { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + } + self.specialize_at(instr_idx, cache_base, Instruction::LoadAttrWithHint); + } else { + self.cooldown_adaptive_at(cache_base); } - self.specialize_at( - instr_idx, - cache_base, - if use_hint { - Instruction::LoadAttrWithHint - } else { - Instruction::LoadAttrInstanceValue - }, - ); } } else if let Some(ref descr) = cls_attr { // No dict support, plain class attr — cache directly @@ -7358,15 +7569,8 @@ impl ExecutingFrame<'_> { Instruction::LoadAttrNondescriptorNoDict, ); } else { - // No dict, no class attr — can't specialize - unsafe { - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } + // No dict and no class attr: repeated miss path, so cooldown. + self.cooldown_adaptive_at(cache_base); } } } @@ -7518,6 +7722,11 @@ impl ExecutingFrame<'_> { } let b = self.top_value(); let a = self.nth_value(1); + // `external_cache` in _PyBinaryOpCache is used only by BINARY_OP_EXTEND. + unsafe { + self.write_cached_binary_op_extend_descr(cache_base, None); + } + let mut cached_extend_descr = None; let new_op = match op { bytecode::BinaryOperator::Add => { @@ -7540,28 +7749,9 @@ impl ExecutingFrame<'_> { } else { Some(Instruction::BinaryOpAddUnicode) } - } else if let (Some(a_float), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - if !a_float.to_f64().is_nan() - && Self::specialization_compact_int_value(b_int, vm).is_some() - { - Some(Instruction::BinaryOpExtend) - } else { - None - } - } else if let (Some(a_int), Some(b_float)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - if !b_float.to_f64().is_nan() - && Self::specialization_compact_int_value(a_int, vm).is_some() - { - Some(Instruction::BinaryOpExtend) - } else { - None - } + } else if let Some(descr) = self.binary_op_extended_specialization(op, a, b, vm) { + cached_extend_descr = Some(descr); + Some(Instruction::BinaryOpExtend) } else { None } @@ -7575,28 +7765,9 @@ impl ExecutingFrame<'_> { && b.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::BinaryOpSubtractFloat) - } else if let (Some(a_float), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - if !a_float.to_f64().is_nan() - && Self::specialization_compact_int_value(b_int, vm).is_some() - { - Some(Instruction::BinaryOpExtend) - } else { - None - } - } else if let (Some(a_int), Some(b_float)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - if !b_float.to_f64().is_nan() - && Self::specialization_compact_int_value(a_int, vm).is_some() - { - Some(Instruction::BinaryOpExtend) - } else { - None - } + } else if let Some(descr) = self.binary_op_extended_specialization(op, a, b, vm) { + cached_extend_descr = Some(descr); + Some(Instruction::BinaryOpExtend) } else { None } @@ -7610,64 +7781,25 @@ impl ExecutingFrame<'_> { && b.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::BinaryOpMultiplyFloat) - } else if let (Some(a_float), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - if !a_float.to_f64().is_nan() - && Self::specialization_compact_int_value(b_int, vm).is_some() - { - Some(Instruction::BinaryOpExtend) - } else { - None - } - } else if let (Some(a_int), Some(b_float)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - if !b_float.to_f64().is_nan() - && Self::specialization_compact_int_value(a_int, vm).is_some() - { - Some(Instruction::BinaryOpExtend) - } else { - None - } + } else if let Some(descr) = self.binary_op_extended_specialization(op, a, b, vm) { + cached_extend_descr = Some(descr); + Some(Instruction::BinaryOpExtend) } else { None } } bytecode::BinaryOperator::TrueDivide => { - if let (Some(a_float), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - if !a_float.to_f64().is_nan() - && Self::specialization_compact_int_value(b_int, vm).is_some_and(|x| x != 0) - { - Some(Instruction::BinaryOpExtend) - } else { - None - } - } else if let (Some(a_int), Some(b_float)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - if !b_float.to_f64().is_nan() - && b_float.to_f64() != 0.0 - && Self::specialization_compact_int_value(a_int, vm).is_some() - { - Some(Instruction::BinaryOpExtend) - } else { - None - } + if let Some(descr) = self.binary_op_extended_specialization(op, a, b, vm) { + cached_extend_descr = Some(descr); + Some(Instruction::BinaryOpExtend) } else { None } } bytecode::BinaryOperator::Subscr => { - let b_is_nonnegative_int = b.downcast_ref_if_exact::(vm).is_some_and(|i| { - Self::specialization_nonnegative_compact_index(i, vm).is_some() - }); + let b_is_nonnegative_int = b + .downcast_ref_if_exact::(vm) + .is_some_and(|i| specialization_nonnegative_compact_index(i, vm).is_some()); if a.downcast_ref_if_exact::(vm).is_some() && b_is_nonnegative_int { Some(Instruction::BinaryOpSubscrListInt) } else if a.downcast_ref_if_exact::(vm).is_some() && b_is_nonnegative_int { @@ -7693,7 +7825,11 @@ impl ExecutingFrame<'_> { type_version = cls.assign_version_tag(); } if type_version != 0 { - if cls.cache_getitem_for_specialization(func.to_owned(), type_version) { + if cls.cache_getitem_for_specialization( + func.to_owned(), + type_version, + vm, + ) { Some(Instruction::BinaryOpSubscrGetitem) } else { None @@ -7762,17 +7898,9 @@ impl ExecutingFrame<'_> { | bytecode::BinaryOperator::InplaceAnd | bytecode::BinaryOperator::InplaceOr | bytecode::BinaryOperator::InplaceXor => { - if let (Some(a_int), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - if Self::specialization_compact_int_value(a_int, vm).is_some() - && Self::specialization_compact_int_value(b_int, vm).is_some() - { - Some(Instruction::BinaryOpExtend) - } else { - None - } + if let Some(descr) = self.binary_op_extended_specialization(op, a, b, vm) { + cached_extend_descr = Some(descr); + Some(Instruction::BinaryOpExtend) } else { None } @@ -7780,6 +7908,11 @@ impl ExecutingFrame<'_> { _ => None, }; + if matches!(new_op, Some(Instruction::BinaryOpExtend)) { + unsafe { + self.write_cached_binary_op_extend_descr(cache_base, cached_extend_descr); + } + } self.commit_specialization(instr_idx, cache_base, new_op); } @@ -7833,6 +7966,15 @@ impl ExecutingFrame<'_> { } } + #[inline] + fn cooldown_adaptive_at(&mut self, cache_base: usize) { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_COOLDOWN_VALUE); + } + } + /// Commit a specialization result: replace op on success, backoff on failure. #[inline] fn commit_specialization( @@ -8054,9 +8196,7 @@ impl ExecutingFrame<'_> { } // Try to specialize method descriptor calls - if let Some(descr) = callable.downcast_ref_if_exact::(vm) - && descr.method.flags.contains(PyMethodFlags::METHOD) - { + if let Some(descr) = callable.downcast_ref_if_exact::(vm) { let call_cache_entries = Instruction::CallListAppend.cache_entries(); let next_idx = cache_base + call_cache_entries; let next_is_pop_top = if next_idx < self.code.instructions.len() { @@ -8221,16 +8361,27 @@ impl ExecutingFrame<'_> { (cls_new, object_new, cls_alloc, object_alloc) && cls_new_fn as usize == obj_new_fn as usize && cls_alloc_fn as usize == obj_alloc_fn as usize - && let Some(init) = cls.get_attr(identifier!(vm, __init__)) - && let Some(init_func) = init.downcast_ref_if_exact::(vm) - && init_func.is_simple_for_call_specialization() { + let init = cls.get_attr(identifier!(vm, __init__)); let mut version = cls.tp_version_tag.load(Acquire); if version == 0 { version = cls.assign_version_tag(); } - if version != 0 - && cls.cache_init_for_specialization(init_func.to_owned(), version) + if version == 0 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + if let Some(init) = init + && let Some(init_func) = init.downcast_ref_if_exact::(vm) + && init_func.is_simple_for_call_specialization() + && cls.cache_init_for_specialization(init_func.to_owned(), version, vm) { unsafe { self.code @@ -8471,8 +8622,8 @@ impl ExecutingFrame<'_> { a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), ) { - if Self::specialization_compact_int_value(a_int, vm).is_some() - && Self::specialization_compact_int_value(b_int, vm).is_some() + if specialization_compact_int_value(a_int, vm).is_some() + && specialization_compact_int_value(b_int, vm).is_some() { Some(Instruction::CompareOpInt) } else { @@ -8623,38 +8774,25 @@ impl ExecutingFrame<'_> { Some(frame_size) => frame_size .checked_add(extra_bytes) .is_some_and(|size| vm.datastack_has_space(size)), - None => true, - } - } - - #[inline] - fn specialization_compact_int_value(i: &PyInt, vm: &VirtualMachine) -> Option { - // _PyLong_IsCompact(): a one-digit PyLong (base 2^30), - // i.e. abs(value) <= 2^30 - 1. - const CPYTHON_COMPACT_LONG_ABS_MAX: i64 = (1i64 << 30) - 1; - let v = i.try_to_primitive::(vm).ok()?; - if (-CPYTHON_COMPACT_LONG_ABS_MAX..=CPYTHON_COMPACT_LONG_ABS_MAX).contains(&v) { - Some(v as isize) - } else { - None + None => extra_bytes == 0 || vm.datastack_has_space(extra_bytes), } } #[inline] - fn specialization_nonnegative_compact_index(i: &PyInt, vm: &VirtualMachine) -> Option { - // _PyLong_IsNonNegativeCompact(): a single base-2^30 digit. - const CPYTHON_COMPACT_LONG_MAX: u64 = (1u64 << 30) - 1; - let v = i.try_to_primitive::(vm).ok()?; - if v <= CPYTHON_COMPACT_LONG_MAX { - Some(v as usize) - } else { - None - } + fn specialization_call_recursion_guard(&self, vm: &VirtualMachine) -> bool { + self.specialization_call_recursion_guard_with_extra_frames(vm, 0) } #[inline] - fn specialization_call_recursion_guard(&self, vm: &VirtualMachine) -> bool { - vm.current_recursion_depth().saturating_add(1) >= vm.recursion_limit.get() + fn specialization_call_recursion_guard_with_extra_frames( + &self, + vm: &VirtualMachine, + extra_frames: usize, + ) -> bool { + vm.current_recursion_depth() + .saturating_add(1) + .saturating_add(extra_frames) + >= vm.recursion_limit.get() } #[inline] @@ -8783,9 +8921,7 @@ impl ExecutingFrame<'_> { idx.downcast_ref_if_exact::(vm), ) { let list_len = list.borrow_vec().len(); - if Self::specialization_nonnegative_compact_index(int_idx, vm) - .is_some_and(|i| i < list_len) - { + if specialization_nonnegative_compact_index(int_idx, vm).is_some_and(|i| i < list_len) { Some(Instruction::StoreSubscrListInt) } else { None diff --git a/crates/vm/src/object/ext.rs b/crates/vm/src/object/ext.rs index 11cee6af3ec..e39d1c7765f 100644 --- a/crates/vm/src/object/ext.rs +++ b/crates/vm/src/object/ext.rs @@ -355,11 +355,19 @@ impl From>> for PyAtomicRef> { impl PyAtomicRef> { pub fn deref(&self) -> Option<&Py> { - unsafe { self.inner.load(Ordering::Relaxed).cast::>().as_ref() } + self.deref_ordering(Ordering::Relaxed) + } + + pub fn deref_ordering(&self, ordering: Ordering) -> Option<&Py> { + unsafe { self.inner.load(ordering).cast::>().as_ref() } } pub fn to_owned(&self) -> Option> { - self.deref().map(|x| x.to_owned()) + self.to_owned_ordering(Ordering::Relaxed) + } + + pub fn to_owned_ordering(&self, ordering: Ordering) -> Option> { + self.deref_ordering(ordering).map(|x| x.to_owned()) } /// # Safety @@ -441,16 +449,19 @@ impl From> for PyAtomicRef> { impl PyAtomicRef> { pub fn deref(&self) -> Option<&PyObject> { - unsafe { - self.inner - .load(Ordering::Relaxed) - .cast::() - .as_ref() - } + self.deref_ordering(Ordering::Relaxed) + } + + pub fn deref_ordering(&self, ordering: Ordering) -> Option<&PyObject> { + unsafe { self.inner.load(ordering).cast::().as_ref() } } pub fn to_owned(&self) -> Option { - self.deref().map(|x| x.to_owned()) + self.to_owned_ordering(Ordering::Relaxed) + } + + pub fn to_owned_ordering(&self, ordering: Ordering) -> Option { + self.deref_ordering(ordering).map(|x| x.to_owned()) } /// # Safety diff --git a/crates/vm/src/protocol/callable.rs b/crates/vm/src/protocol/callable.rs index cecb9431fbb..6ff988abbe6 100644 --- a/crates/vm/src/protocol/callable.rs +++ b/crates/vm/src/protocol/callable.rs @@ -146,6 +146,14 @@ pub(crate) enum TraceEvent { } impl TraceEvent { + /// Whether sys.settrace receives this event. + fn is_trace_event(&self) -> bool { + matches!( + self, + Self::Call | Self::Return | Self::Exception | Self::Line | Self::Opcode + ) + } + /// Whether sys.setprofile receives this event. /// In legacy_tracing.c, profile callbacks are only registered for /// PY_RETURN, PY_UNWIND, C_CALL, C_RETURN, C_RAISE. @@ -211,6 +219,7 @@ impl VirtualMachine { return Ok(None); } + let is_trace_event = event.is_trace_event(); let is_profile_event = event.is_profile_event(); let is_opcode_event = event.is_opcode_event(); @@ -231,7 +240,7 @@ impl VirtualMachine { // temporarily disable tracing, during the call to the // tracing function itself. - if !self.is_none(&trace_func) { + if is_trace_event && !self.is_none(&trace_func) { self.use_tracing.set(false); let res = trace_func.call(args.clone(), self); self.use_tracing.set(true); diff --git a/crates/vm/src/stdlib/builtins.rs b/crates/vm/src/stdlib/builtins.rs index c145c5f8a41..ad8819194ba 100644 --- a/crates/vm/src/stdlib/builtins.rs +++ b/crates/vm/src/stdlib/builtins.rs @@ -998,9 +998,7 @@ mod builtins { }; let write = |obj: PyStrRef| vm.call_method(&file, "write", (obj,)); - let sep = options - .sep - .unwrap_or_else(|| PyStr::from(" ").into_ref(&vm.ctx)); + let sep = options.sep.unwrap_or_else(|| vm.ctx.new_str(" ")); let mut first = true; for object in objects { @@ -1013,9 +1011,7 @@ mod builtins { write(object.str(vm)?)?; } - let end = options - .end - .unwrap_or_else(|| PyStr::from("\n").into_ref(&vm.ctx)); + let end = options.end.unwrap_or_else(|| vm.ctx.new_str("\n")); write(end)?; if options.flush.into() { diff --git a/crates/vm/src/types/slot.rs b/crates/vm/src/types/slot.rs index af404d5c956..222d827c7f5 100644 --- a/crates/vm/src/types/slot.rs +++ b/crates/vm/src/types/slot.rs @@ -614,7 +614,7 @@ fn init_wrapper(obj: PyObjectRef, args: FuncArgs, vm: &VirtualMachine) -> PyResu let res = vm.call_special_method(&obj, identifier!(vm, __init__), args)?; if !vm.is_none(&res) { return Err(vm.new_type_error(format!( - "__init__ should return None, not '{:.200}'", + "__init__() should return None, not '{:.200}'", res.class().name() ))); } diff --git a/crates/vm/src/vm/context.rs b/crates/vm/src/vm/context.rs index dfd8829549f..70c409d7af2 100644 --- a/crates/vm/src/vm/context.rs +++ b/crates/vm/src/vm/context.rs @@ -14,6 +14,7 @@ use crate::{ object, pystr, type_::PyAttributes, }, + bytecode::{self, CodeFlags, CodeUnit, Instruction}, class::StaticType, common::rc::PyRc, exceptions, @@ -29,6 +30,7 @@ use malachite_bigint::BigInt; use num_complex::Complex64; use num_traits::ToPrimitive; use rustpython_common::lock::PyRwLock; +use rustpython_compiler_core::{OneIndexed, SourceLocation}; #[derive(Debug)] pub struct Context { @@ -49,6 +51,7 @@ pub struct Context { pub int_cache_pool: Vec, pub(crate) latin1_char_cache: Vec>, pub(crate) ascii_char_cache: Vec>, + pub(crate) init_cleanup_code: PyRef, // there should only be exact objects of str in here, no non-str objects and no subclasses pub(crate) string_pool: StringPool, pub(crate) slot_new_wrapper: PyMethodDef, @@ -353,6 +356,7 @@ impl Context { PyMethodFlags::METHOD, None, ); + let init_cleanup_code = Self::new_init_cleanup_code(&types, &names); let empty_str = unsafe { string_pool.intern("", types.str_type.to_owned()) }; let empty_bytes = create_object(PyBytes::from(Vec::new()), types.bytes_type); @@ -379,6 +383,7 @@ impl Context { int_cache_pool, latin1_char_cache, ascii_char_cache, + init_cleanup_code, string_pool, slot_new_wrapper, names, @@ -388,6 +393,51 @@ impl Context { } } + fn new_init_cleanup_code(types: &TypeZoo, names: &ConstName) -> PyRef { + let loc = SourceLocation { + line: OneIndexed::MIN, + character_offset: OneIndexed::from_zero_indexed(0), + }; + let instructions = [ + CodeUnit { + op: Instruction::ExitInitCheck, + arg: 0.into(), + }, + CodeUnit { + op: Instruction::ReturnValue, + arg: 0.into(), + }, + CodeUnit { + op: Instruction::Resume { + context: bytecode::Arg::marker(), + }, + arg: 0.into(), + }, + ]; + let code = bytecode::CodeObject { + instructions: instructions.into(), + locations: vec![(loc, loc); instructions.len()].into_boxed_slice(), + flags: CodeFlags::OPTIMIZED, + posonlyarg_count: 0, + arg_count: 0, + kwonlyarg_count: 0, + source_path: names.__init__, + first_line_number: None, + max_stackdepth: 2, + obj_name: names.__init__, + qualname: names.__init__, + cell2arg: None, + constants: core::iter::empty().collect(), + names: Vec::new().into_boxed_slice(), + varnames: Vec::new().into_boxed_slice(), + cellvars: Vec::new().into_boxed_slice(), + freevars: Vec::new().into_boxed_slice(), + linetable: Vec::new().into_boxed_slice(), + exceptiontable: Vec::new().into_boxed_slice(), + }; + PyRef::new_ref(PyCode::new(code), types.code_type.to_owned(), None) + } + pub fn intern_str(&self, s: S) -> &'static PyStrInterned { unsafe { self.string_pool.intern(s, self.types.str_type.to_owned()) } } @@ -458,9 +508,28 @@ impl Context { PyComplex::from(value).into_ref(self) } + #[inline] + pub fn latin1_char(&self, ch: u8) -> PyRef { + self.latin1_char_cache[ch as usize].clone() + } + + #[inline] + fn latin1_singleton_index(s: &PyStr) -> Option { + let mut cps = s.as_wtf8().code_points(); + let cp = cps.next()?; + if cps.next().is_some() { + return None; + } + u8::try_from(cp.to_u32()).ok() + } + #[inline] pub fn new_str(&self, s: impl Into) -> PyRef { - s.into().into_ref(self) + let s = s.into(); + if let Some(ch) = Self::latin1_singleton_index(&s) { + return self.latin1_char(ch); + } + s.into_ref(self) } #[inline] diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index 72899016675..48d800f09dd 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -1550,7 +1550,7 @@ impl VirtualMachine { frame: FrameRef, f: F, ) -> PyResult { - self.with_frame_exc(frame, None, f) + self.with_frame_impl(frame, None, true, f) } /// Like `with_frame` but allows specifying the initial exception state. @@ -1559,6 +1559,24 @@ impl VirtualMachine { frame: FrameRef, exc: Option, f: F, + ) -> PyResult { + self.with_frame_impl(frame, exc, true, f) + } + + pub(crate) fn with_frame_untraced PyResult>( + &self, + frame: FrameRef, + f: F, + ) -> PyResult { + self.with_frame_impl(frame, None, false, f) + } + + fn with_frame_impl PyResult>( + &self, + frame: FrameRef, + exc: Option, + traced: bool, + f: F, ) -> PyResult { self.with_recursion("", || { // SAFETY: `frame` (FrameRef) stays alive for the entire closure scope, @@ -1594,7 +1612,11 @@ impl VirtualMachine { crate::vm::thread::pop_thread_frame(); } - self.dispatch_traced_frame(&frame, |frame| f(frame.to_owned())) + if traced { + self.dispatch_traced_frame(&frame, |frame| f(frame.to_owned())) + } else { + f(frame.to_owned()) + } }) }