diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt index f48bb332334..819d6875b58 100644 --- a/.cspell.dict/cpython.txt +++ b/.cspell.dict/cpython.txt @@ -54,6 +54,8 @@ eofs evalloop excepthandler exceptiontable +fastlocal +fastlocals fblock fblocks fdescr @@ -171,6 +173,7 @@ SMALLBUF SOABI SSLEOF stackdepth +stackref staticbase stginfo storefast diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 132ad61a65b..f1175401676 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1,7 +1,8 @@ #[cfg(feature = "flame")] use crate::bytecode::InstructionMetadata; use crate::{ - AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, VirtualMachine, + AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, PyStackRef, TryFromObject, + VirtualMachine, builtins::{ PyBaseException, PyBaseExceptionRef, PyCode, PyCoroutine, PyDict, PyDictRef, PyGenerator, PyInterpolation, PyList, PySet, PySlice, PyStr, PyStrInterned, PyTemplate, PyTraceback, @@ -60,7 +61,7 @@ enum UnwindReason { struct FrameState { // We need 1 stack per frame /// The main data frame of the stack machine - stack: BoxVec>, + stack: BoxVec>, /// Cell and free variable references (cellvars + freevars). cells_frees: Box<[PyCellRef]>, /// Previous line number for LINE event suppression. @@ -1280,14 +1281,9 @@ impl ExecutingFrame<'_> { // This is 1-indexed to match CPython let idx = index.get(arg) as usize; let stack_len = self.state.stack.len(); - if stack_len < idx { - eprintln!("CopyItem ERROR: stack_len={}, idx={}", stack_len, idx); - eprintln!(" code: {}", self.code.obj_name); - eprintln!(" lasti: {}", self.lasti()); - panic!("CopyItem: stack underflow"); - } + debug_assert!(stack_len >= idx, "CopyItem: stack underflow"); let value = self.state.stack[stack_len - idx].clone(); - self.push_value_opt(value); + self.push_stackref_opt(value); Ok(None) } Instruction::CopyFreeVars { .. } => { @@ -1868,8 +1864,9 @@ impl ExecutingFrame<'_> { self.push_value(x2); Ok(None) } - // TODO: Implement true borrow optimization (skip Arc::clone). - // Currently this just clones like LoadFast. + // Borrow optimization not yet active; falls back to clone. + // push_borrowed() is available but disabled until stack + // lifetime issues at yield/exception points are resolved. Instruction::LoadFastBorrow(idx) => { let idx = idx.get(arg) as usize; let x = unsafe { self.fastlocals.borrow() }[idx] @@ -2502,6 +2499,14 @@ impl ExecutingFrame<'_> { Ok(None) } Instruction::YieldValue { arg: oparg } => { + debug_assert!( + self.state + .stack + .iter() + .flatten() + .all(|sr| !sr.is_borrowed()), + "borrowed refs on stack at yield point" + ); let value = self.pop_value(); // arg=0: direct yield (wrapped for async generators) // arg=1: yield from await/yield-from (NOT wrapped) @@ -2681,6 +2686,14 @@ impl ExecutingFrame<'_> { self.unwind_blocks(vm, UnwindReason::Returning { value }) } Instruction::InstrumentedYieldValue => { + debug_assert!( + self.state + .stack + .iter() + .flatten() + .all(|sr| !sr.is_borrowed()), + "borrowed refs on stack at yield point" + ); let value = self.pop_value(); if self.monitoring_mask & monitoring::EVENT_PY_YIELD != 0 { let offset = (self.lasti() - 1) * 2; @@ -3604,18 +3617,24 @@ impl ExecutingFrame<'_> { let mut elements = elements; // Elements on stack from right-to-left: - self.state - .stack - .extend(elements.drain(before + middle..).rev().map(Some)); + self.state.stack.extend( + elements + .drain(before + middle..) + .rev() + .map(|e| Some(PyStackRef::new_owned(e))), + ); let middle_elements = elements.drain(before..).collect(); let t = vm.ctx.new_list(middle_elements); self.push_value(t.into()); // Lastly the first reversed values: - self.state - .stack - .extend(elements.into_iter().rev().map(Some)); + self.state.stack.extend( + elements + .into_iter() + .rev() + .map(|e| Some(PyStackRef::new_owned(e))), + ); Ok(None) } @@ -3854,9 +3873,12 @@ impl ExecutingFrame<'_> { if let Some(elements) = fast_elements { return match elements.len().cmp(&size) { core::cmp::Ordering::Equal => { - self.state - .stack - .extend(elements.into_iter().rev().map(Some)); + self.state.stack.extend( + elements + .into_iter() + .rev() + .map(|e| Some(PyStackRef::new_owned(e))), + ); Ok(None) } core::cmp::Ordering::Greater => Err(vm.new_value_error(format!( @@ -3922,9 +3944,12 @@ impl ExecutingFrame<'_> { Err(vm.new_value_error(msg)) } PyIterReturn::StopIteration(_) => { - self.state - .stack - .extend(elements.into_iter().rev().map(Some)); + self.state.stack.extend( + elements + .into_iter() + .rev() + .map(|e| Some(PyStackRef::new_owned(e))), + ); Ok(None) } } @@ -4057,43 +4082,75 @@ impl ExecutingFrame<'_> { // Block stack functions removed - exception table handles all exception/cleanup #[inline] - #[track_caller] // not a real track_caller but push_value is less useful for debugging - fn push_value_opt(&mut self, obj: Option) { + #[track_caller] + fn push_stackref_opt(&mut self, obj: Option) { match self.state.stack.try_push(obj) { Ok(()) => {} Err(_e) => self.fatal("tried to push value onto stack but overflowed max_stackdepth"), } } + #[inline] + #[track_caller] // not a real track_caller but push_value is less useful for debugging + fn push_value_opt(&mut self, obj: Option) { + self.push_stackref_opt(obj.map(PyStackRef::new_owned)); + } + #[inline] #[track_caller] fn push_value(&mut self, obj: PyObjectRef) { - self.push_value_opt(Some(obj)); + self.push_stackref_opt(Some(PyStackRef::new_owned(obj))); + } + + /// Push a borrowed reference onto the stack (no refcount increment). + /// + /// # Safety + /// The object must remain alive until the borrowed ref is consumed. + /// The compiler guarantees consumption within the same basic block. + #[inline] + #[track_caller] + #[allow(dead_code)] + unsafe fn push_borrowed(&mut self, obj: &PyObject) { + self.push_stackref_opt(Some(unsafe { PyStackRef::new_borrowed(obj) })); } #[inline] fn push_null(&mut self) { - self.push_value_opt(None); + self.push_stackref_opt(None); } - /// Pop a value from the stack, returning None if the stack slot is NULL + /// Pop a raw stackref from the stack, returning None if the stack slot is NULL. #[inline] - fn pop_value_opt(&mut self) -> Option { + fn pop_stackref_opt(&mut self) -> Option { match self.state.stack.pop() { - Some(slot) => slot, // slot is Option + Some(slot) => slot, None => self.fatal("tried to pop from empty stack"), } } + /// Pop a raw stackref from the stack. Panics if NULL. #[inline] #[track_caller] - fn pop_value(&mut self) -> PyObjectRef { + fn pop_stackref(&mut self) -> PyStackRef { expect_unchecked( - self.pop_value_opt(), - "pop value but null found. This is a compiler bug.", + self.pop_stackref_opt(), + "pop stackref but null found. This is a compiler bug.", ) } + /// Pop a value from the stack, returning None if the stack slot is NULL. + /// Automatically promotes borrowed refs to owned. + #[inline] + fn pop_value_opt(&mut self) -> Option { + self.pop_stackref_opt().map(|sr| sr.to_pyobj()) + } + + #[inline] + #[track_caller] + fn pop_value(&mut self) -> PyObjectRef { + self.pop_stackref().to_pyobj() + } + fn call_intrinsic_1( &mut self, func: bytecode::IntrinsicFunction1, @@ -4262,22 +4319,23 @@ impl ExecutingFrame<'_> { ); } self.state.stack.drain(stack_len - count..).map(|obj| { - expect_unchecked(obj, "pop_multiple but null found. This is a compiler bug.") + expect_unchecked(obj, "pop_multiple but null found. This is a compiler bug.").to_pyobj() }) } #[inline] - fn replace_top(&mut self, mut top: Option) -> Option { + fn replace_top(&mut self, top: Option) -> Option { + let mut slot = top.map(PyStackRef::new_owned); let last = self.state.stack.last_mut().unwrap(); - core::mem::swap(last, &mut top); - top + core::mem::swap(last, &mut slot); + slot.map(|sr| sr.to_pyobj()) } #[inline] #[track_caller] fn top_value(&self) -> &PyObject { match &*self.state.stack { - [.., Some(last)] => last, + [.., Some(last)] => last.as_object(), [.., None] => self.fatal("tried to get top of stack but got NULL"), [] => self.fatal("tried to get top of stack but stack is empty"), } @@ -4288,7 +4346,7 @@ impl ExecutingFrame<'_> { fn nth_value(&self, depth: u32) -> &PyObject { let stack = &self.state.stack; match &stack[stack.len() - depth as usize - 1] { - Some(obj) => obj, + Some(obj) => obj.as_object(), None => unsafe { core::hint::unreachable_unchecked() }, } } @@ -4415,7 +4473,7 @@ fn is_module_initializing(module: &PyObject, vm: &VirtualMachine) -> bool { initializing_attr.try_to_bool(vm).unwrap_or(false) } -fn expect_unchecked(optional: Option, err_msg: &'static str) -> PyObjectRef { +fn expect_unchecked(optional: Option, err_msg: &'static str) -> T { if cfg!(debug_assertions) { optional.expect(err_msg) } else { diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs index 8a15688f591..df347124f43 100644 --- a/crates/vm/src/lib.rs +++ b/crates/vm/src/lib.rs @@ -98,7 +98,7 @@ pub mod windows; pub use self::convert::{TryFromBorrowedObject, TryFromObject}; pub use self::object::{ AsObject, Py, PyAtomicRef, PyExact, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact, - PyResult, PyWeakRef, + PyResult, PyStackRef, PyWeakRef, }; pub use self::vm::{Context, Interpreter, InterpreterBuilder, Settings, VirtualMachine}; diff --git a/crates/vm/src/object/core.rs b/crates/vm/src/object/core.rs index ea661103c39..b48045f2163 100644 --- a/crates/vm/src/object/core.rs +++ b/crates/vm/src/object/core.rs @@ -40,6 +40,7 @@ use core::{ cell::UnsafeCell, marker::PhantomData, mem::ManuallyDrop, + num::NonZeroUsize, ops::Deref, ptr::{self, NonNull}, }; @@ -1400,6 +1401,153 @@ impl fmt::Debug for PyObjectRef { } } +const STACKREF_BORROW_TAG: usize = 1; + +/// A tagged stack reference to a Python object. +/// +/// Uses the lowest bit of the pointer to distinguish owned vs borrowed: +/// - bit 0 = 0 → **owned**: refcount was incremented; Drop will decrement. +/// - bit 0 = 1 → **borrowed**: no refcount change; Drop is a no-op. +/// +/// Same size as `PyObjectRef` (one pointer-width). `PyObject` is at least +/// 8-byte aligned, so the low bit is always available for tagging. +/// +/// Uses `NonZeroUsize` so that `Option` has the same size as +/// `PyStackRef` via niche optimization (matching `Option`). +#[repr(transparent)] +pub struct PyStackRef { + bits: NonZeroUsize, +} + +impl PyStackRef { + /// Create an owned stack reference, consuming the `PyObjectRef`. + /// Refcount is NOT incremented — ownership is transferred. + #[inline(always)] + pub fn new_owned(obj: PyObjectRef) -> Self { + let ptr = obj.into_raw(); + let bits = ptr.as_ptr() as usize; + debug_assert!( + bits & STACKREF_BORROW_TAG == 0, + "PyObject pointer must be aligned" + ); + Self { + // SAFETY: valid PyObject pointers are never null + bits: unsafe { NonZeroUsize::new_unchecked(bits) }, + } + } + + /// Create a borrowed stack reference from a `&PyObject`. + /// + /// # Safety + /// The caller must guarantee that the pointed-to object lives at least as + /// long as this `PyStackRef`. In practice the compiler guarantees that + /// borrowed refs are consumed within the same basic block, before any + /// `STORE_FAST`/`DELETE_FAST` could overwrite the source slot. + #[inline(always)] + pub unsafe fn new_borrowed(obj: &PyObject) -> Self { + let bits = (obj as *const PyObject as usize) | STACKREF_BORROW_TAG; + Self { + // SAFETY: valid PyObject pointers are never null, and ORing with 1 keeps it non-zero + bits: unsafe { NonZeroUsize::new_unchecked(bits) }, + } + } + + /// Whether this is a borrowed (non-owning) reference. + #[inline(always)] + pub fn is_borrowed(&self) -> bool { + self.bits.get() & STACKREF_BORROW_TAG != 0 + } + + /// Get a `&PyObject` reference. Works for both owned and borrowed. + #[inline(always)] + pub fn as_object(&self) -> &PyObject { + unsafe { &*((self.bits.get() & !STACKREF_BORROW_TAG) as *const PyObject) } + } + + /// Convert to an owned `PyObjectRef`. + /// + /// * If **borrowed** → increments refcount, forgets self. + /// * If **owned** → reconstructs `PyObjectRef` from the raw pointer, forgets self. + #[inline(always)] + pub fn to_pyobj(self) -> PyObjectRef { + let obj = if self.is_borrowed() { + self.as_object().to_owned() // inc refcount + } else { + let ptr = unsafe { NonNull::new_unchecked(self.bits.get() as *mut PyObject) }; + unsafe { PyObjectRef::from_raw(ptr) } + }; + core::mem::forget(self); // don't run Drop + obj + } + + /// Promote a borrowed ref to owned **in place** (increments refcount, + /// clears the borrow tag). No-op if already owned. + #[inline(always)] + pub fn promote(&mut self) { + if self.is_borrowed() { + self.as_object().0.ref_count.inc(); + // SAFETY: clearing the low bit of a non-null pointer keeps it non-zero + self.bits = + unsafe { NonZeroUsize::new_unchecked(self.bits.get() & !STACKREF_BORROW_TAG) }; + } + } +} + +impl Drop for PyStackRef { + #[inline] + fn drop(&mut self) { + if !self.is_borrowed() { + // Owned: decrement refcount (potentially deallocate). + let ptr = unsafe { NonNull::new_unchecked(self.bits.get() as *mut PyObject) }; + drop(unsafe { PyObjectRef::from_raw(ptr) }); + } + // Borrowed: nothing to do. + } +} + +impl core::ops::Deref for PyStackRef { + type Target = PyObject; + + #[inline(always)] + fn deref(&self) -> &PyObject { + self.as_object() + } +} + +impl Clone for PyStackRef { + /// Cloning always produces an **owned** reference (increments refcount). + #[inline(always)] + fn clone(&self) -> Self { + Self::new_owned(self.as_object().to_owned()) + } +} + +impl fmt::Debug for PyStackRef { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_borrowed() { + write!(f, "PyStackRef(borrowed, ")?; + } else { + write!(f, "PyStackRef(owned, ")?; + } + self.as_object().fmt(f)?; + write!(f, ")") + } +} + +cfg_if::cfg_if! { + if #[cfg(feature = "threading")] { + unsafe impl Send for PyStackRef {} + unsafe impl Sync for PyStackRef {} + } +} + +// Ensure Option uses niche optimization and matches Option in size +const _: () = assert!( + core::mem::size_of::>() == core::mem::size_of::>() +); +const _: () = + assert!(core::mem::size_of::>() == core::mem::size_of::()); + #[repr(transparent)] pub struct Py(PyInner); diff --git a/crates/vm/src/object/traverse.rs b/crates/vm/src/object/traverse.rs index 367076b78e3..9a5ae324baf 100644 --- a/crates/vm/src/object/traverse.rs +++ b/crates/vm/src/object/traverse.rs @@ -2,7 +2,9 @@ use core::ptr::NonNull; use rustpython_common::lock::{PyMutex, PyRwLock}; -use crate::{AsObject, PyObject, PyObjectRef, PyRef, function::Either, object::PyObjectPayload}; +use crate::{ + AsObject, PyObject, PyObjectRef, PyRef, PyStackRef, function::Either, object::PyObjectPayload, +}; pub type TraverseFn<'a> = dyn FnMut(&PyObject) + 'a; @@ -45,6 +47,12 @@ unsafe impl Traverse for PyObjectRef { } } +unsafe impl Traverse for PyStackRef { + fn traverse(&self, traverse_fn: &mut TraverseFn<'_>) { + traverse_fn(self.as_object()) + } +} + unsafe impl Traverse for PyRef { fn traverse(&self, traverse_fn: &mut TraverseFn<'_>) { traverse_fn(self.as_object())