From 73179e4db1c7605f6e405119e0358630aa3a2f86 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 1 Mar 2026 15:50:05 +0900 Subject: [PATCH 1/3] Implement PyStackRef borrowed stack references for LOAD_FAST_BORROW --- crates/vm/src/frame.rs | 175 ++++++++++++++++++++----------- crates/vm/src/lib.rs | 2 +- crates/vm/src/object/core.rs | 130 +++++++++++++++++++++++ crates/vm/src/object/traverse.rs | 11 +- 4 files changed, 254 insertions(+), 64 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 132ad61a65b..eec7b3dd416 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1,7 +1,8 @@ #[cfg(feature = "flame")] use crate::bytecode::InstructionMetadata; use crate::{ - AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, VirtualMachine, + AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, PyStackRef, TryFromObject, + VirtualMachine, builtins::{ PyBaseException, PyBaseExceptionRef, PyCode, PyCoroutine, PyDict, PyDictRef, PyGenerator, PyInterpolation, PyList, PySet, PySlice, PyStr, PyStrInterned, PyTemplate, PyTraceback, @@ -60,7 +61,7 @@ enum UnwindReason { struct FrameState { // We need 1 stack per frame /// The main data frame of the stack machine - stack: BoxVec>, + stack: BoxVec>, /// Cell and free variable references (cellvars + freevars). cells_frees: Box<[PyCellRef]>, /// Previous line number for LINE event suppression. @@ -1261,8 +1262,8 @@ impl ExecutingFrame<'_> { } Instruction::CompareOp { op } => self.execute_compare(vm, op.get(arg)), Instruction::ContainsOp(invert) => { - let b = self.pop_value(); - let a = self.pop_value(); + let b = self.pop_stackref(); + let a = self.pop_stackref(); let value = match invert.get(arg) { bytecode::Invert::No => self._in(vm, &a, &b)?, @@ -1287,7 +1288,7 @@ impl ExecutingFrame<'_> { panic!("CopyItem: stack underflow"); } let value = self.state.stack[stack_len - idx].clone(); - self.push_value_opt(value); + self.push_stackref_opt(value); Ok(None) } Instruction::CopyFreeVars { .. } => { @@ -1912,6 +1913,7 @@ impl ExecutingFrame<'_> { .into(), ) })?; + drop(fastlocals); self.push_value(x1); self.push_value(x2); Ok(None) @@ -2502,6 +2504,10 @@ impl ExecutingFrame<'_> { Ok(None) } Instruction::YieldValue { arg: oparg } => { + debug_assert!( + self.state.stack.iter().flatten().all(|sr| !sr.is_borrowed()), + "borrowed refs on stack at yield point" + ); let value = self.pop_value(); // arg=0: direct yield (wrapped for async generators) // arg=1: yield from await/yield-from (NOT wrapped) @@ -3604,18 +3610,24 @@ impl ExecutingFrame<'_> { let mut elements = elements; // Elements on stack from right-to-left: - self.state - .stack - .extend(elements.drain(before + middle..).rev().map(Some)); + self.state.stack.extend( + elements + .drain(before + middle..) + .rev() + .map(|e| Some(PyStackRef::new_owned(e))), + ); let middle_elements = elements.drain(before..).collect(); let t = vm.ctx.new_list(middle_elements); self.push_value(t.into()); // Lastly the first reversed values: - self.state - .stack - .extend(elements.into_iter().rev().map(Some)); + self.state.stack.extend( + elements + .into_iter() + .rev() + .map(|e| Some(PyStackRef::new_owned(e))), + ); Ok(None) } @@ -3767,37 +3779,37 @@ impl ExecutingFrame<'_> { #[cfg_attr(feature = "flame-it", flame("Frame"))] fn execute_bin_op(&mut self, vm: &VirtualMachine, op: bytecode::BinaryOperator) -> FrameResult { - let b_ref = &self.pop_value(); - let a_ref = &self.pop_value(); + let b_ref = self.pop_stackref(); + let a_ref = self.pop_stackref(); let value = match op { - bytecode::BinaryOperator::Subtract => vm._sub(a_ref, b_ref), - bytecode::BinaryOperator::Add => vm._add(a_ref, b_ref), - bytecode::BinaryOperator::Multiply => vm._mul(a_ref, b_ref), - bytecode::BinaryOperator::MatrixMultiply => vm._matmul(a_ref, b_ref), - bytecode::BinaryOperator::Power => vm._pow(a_ref, b_ref, vm.ctx.none.as_object()), - bytecode::BinaryOperator::TrueDivide => vm._truediv(a_ref, b_ref), - bytecode::BinaryOperator::FloorDivide => vm._floordiv(a_ref, b_ref), - bytecode::BinaryOperator::Remainder => vm._mod(a_ref, b_ref), - bytecode::BinaryOperator::Lshift => vm._lshift(a_ref, b_ref), - bytecode::BinaryOperator::Rshift => vm._rshift(a_ref, b_ref), - bytecode::BinaryOperator::Xor => vm._xor(a_ref, b_ref), - bytecode::BinaryOperator::Or => vm._or(a_ref, b_ref), - bytecode::BinaryOperator::And => vm._and(a_ref, b_ref), - bytecode::BinaryOperator::InplaceSubtract => vm._isub(a_ref, b_ref), - bytecode::BinaryOperator::InplaceAdd => vm._iadd(a_ref, b_ref), - bytecode::BinaryOperator::InplaceMultiply => vm._imul(a_ref, b_ref), - bytecode::BinaryOperator::InplaceMatrixMultiply => vm._imatmul(a_ref, b_ref), + bytecode::BinaryOperator::Subtract => vm._sub(&a_ref, &b_ref), + bytecode::BinaryOperator::Add => vm._add(&a_ref, &b_ref), + bytecode::BinaryOperator::Multiply => vm._mul(&a_ref, &b_ref), + bytecode::BinaryOperator::MatrixMultiply => vm._matmul(&a_ref, &b_ref), + bytecode::BinaryOperator::Power => vm._pow(&a_ref, &b_ref, vm.ctx.none.as_object()), + bytecode::BinaryOperator::TrueDivide => vm._truediv(&a_ref, &b_ref), + bytecode::BinaryOperator::FloorDivide => vm._floordiv(&a_ref, &b_ref), + bytecode::BinaryOperator::Remainder => vm._mod(&a_ref, &b_ref), + bytecode::BinaryOperator::Lshift => vm._lshift(&a_ref, &b_ref), + bytecode::BinaryOperator::Rshift => vm._rshift(&a_ref, &b_ref), + bytecode::BinaryOperator::Xor => vm._xor(&a_ref, &b_ref), + bytecode::BinaryOperator::Or => vm._or(&a_ref, &b_ref), + bytecode::BinaryOperator::And => vm._and(&a_ref, &b_ref), + bytecode::BinaryOperator::InplaceSubtract => vm._isub(&a_ref, &b_ref), + bytecode::BinaryOperator::InplaceAdd => vm._iadd(&a_ref, &b_ref), + bytecode::BinaryOperator::InplaceMultiply => vm._imul(&a_ref, &b_ref), + bytecode::BinaryOperator::InplaceMatrixMultiply => vm._imatmul(&a_ref, &b_ref), bytecode::BinaryOperator::InplacePower => { - vm._ipow(a_ref, b_ref, vm.ctx.none.as_object()) - } - bytecode::BinaryOperator::InplaceTrueDivide => vm._itruediv(a_ref, b_ref), - bytecode::BinaryOperator::InplaceFloorDivide => vm._ifloordiv(a_ref, b_ref), - bytecode::BinaryOperator::InplaceRemainder => vm._imod(a_ref, b_ref), - bytecode::BinaryOperator::InplaceLshift => vm._ilshift(a_ref, b_ref), - bytecode::BinaryOperator::InplaceRshift => vm._irshift(a_ref, b_ref), - bytecode::BinaryOperator::InplaceXor => vm._ixor(a_ref, b_ref), - bytecode::BinaryOperator::InplaceOr => vm._ior(a_ref, b_ref), - bytecode::BinaryOperator::InplaceAnd => vm._iand(a_ref, b_ref), + vm._ipow(&a_ref, &b_ref, vm.ctx.none.as_object()) + } + bytecode::BinaryOperator::InplaceTrueDivide => vm._itruediv(&a_ref, &b_ref), + bytecode::BinaryOperator::InplaceFloorDivide => vm._ifloordiv(&a_ref, &b_ref), + bytecode::BinaryOperator::InplaceRemainder => vm._imod(&a_ref, &b_ref), + bytecode::BinaryOperator::InplaceLshift => vm._ilshift(&a_ref, &b_ref), + bytecode::BinaryOperator::InplaceRshift => vm._irshift(&a_ref, &b_ref), + bytecode::BinaryOperator::InplaceXor => vm._ixor(&a_ref, &b_ref), + bytecode::BinaryOperator::InplaceOr => vm._ior(&a_ref, &b_ref), + bytecode::BinaryOperator::InplaceAnd => vm._iand(&a_ref, &b_ref), bytecode::BinaryOperator::Subscr => a_ref.get_item(b_ref.as_object(), vm), }?; @@ -3854,9 +3866,12 @@ impl ExecutingFrame<'_> { if let Some(elements) = fast_elements { return match elements.len().cmp(&size) { core::cmp::Ordering::Equal => { - self.state - .stack - .extend(elements.into_iter().rev().map(Some)); + self.state.stack.extend( + elements + .into_iter() + .rev() + .map(|e| Some(PyStackRef::new_owned(e))), + ); Ok(None) } core::cmp::Ordering::Greater => Err(vm.new_value_error(format!( @@ -3922,9 +3937,12 @@ impl ExecutingFrame<'_> { Err(vm.new_value_error(msg)) } PyIterReturn::StopIteration(_) => { - self.state - .stack - .extend(elements.into_iter().rev().map(Some)); + self.state.stack.extend( + elements + .into_iter() + .rev() + .map(|e| Some(PyStackRef::new_owned(e))), + ); Ok(None) } } @@ -4057,43 +4075,74 @@ impl ExecutingFrame<'_> { // Block stack functions removed - exception table handles all exception/cleanup #[inline] - #[track_caller] // not a real track_caller but push_value is less useful for debugging - fn push_value_opt(&mut self, obj: Option) { + #[track_caller] + fn push_stackref_opt(&mut self, obj: Option) { match self.state.stack.try_push(obj) { Ok(()) => {} Err(_e) => self.fatal("tried to push value onto stack but overflowed max_stackdepth"), } } + #[inline] + #[track_caller] // not a real track_caller but push_value is less useful for debugging + fn push_value_opt(&mut self, obj: Option) { + self.push_stackref_opt(obj.map(PyStackRef::new_owned)); + } + #[inline] #[track_caller] fn push_value(&mut self, obj: PyObjectRef) { - self.push_value_opt(Some(obj)); + self.push_stackref_opt(Some(PyStackRef::new_owned(obj))); + } + + /// Push a borrowed reference onto the stack (no refcount increment). + /// + /// # Safety + /// The object must remain alive until the borrowed ref is consumed. + /// The compiler guarantees consumption within the same basic block. + #[inline] + #[track_caller] + unsafe fn push_borrowed(&mut self, obj: &PyObject) { + self.push_stackref_opt(Some(unsafe { PyStackRef::new_borrowed(obj) })); } #[inline] fn push_null(&mut self) { - self.push_value_opt(None); + self.push_stackref_opt(None); } - /// Pop a value from the stack, returning None if the stack slot is NULL + /// Pop a raw stackref from the stack, returning None if the stack slot is NULL. #[inline] - fn pop_value_opt(&mut self) -> Option { + fn pop_stackref_opt(&mut self) -> Option { match self.state.stack.pop() { - Some(slot) => slot, // slot is Option + Some(slot) => slot, None => self.fatal("tried to pop from empty stack"), } } + /// Pop a raw stackref from the stack. Panics if NULL. #[inline] #[track_caller] - fn pop_value(&mut self) -> PyObjectRef { + fn pop_stackref(&mut self) -> PyStackRef { expect_unchecked( - self.pop_value_opt(), - "pop value but null found. This is a compiler bug.", + self.pop_stackref_opt(), + "pop stackref but null found. This is a compiler bug.", ) } + /// Pop a value from the stack, returning None if the stack slot is NULL. + /// Automatically promotes borrowed refs to owned. + #[inline] + fn pop_value_opt(&mut self) -> Option { + self.pop_stackref_opt().map(|sr| sr.to_pyobj()) + } + + #[inline] + #[track_caller] + fn pop_value(&mut self) -> PyObjectRef { + self.pop_stackref().to_pyobj() + } + fn call_intrinsic_1( &mut self, func: bytecode::IntrinsicFunction1, @@ -4263,21 +4312,23 @@ impl ExecutingFrame<'_> { } self.state.stack.drain(stack_len - count..).map(|obj| { expect_unchecked(obj, "pop_multiple but null found. This is a compiler bug.") + .to_pyobj() }) } #[inline] - fn replace_top(&mut self, mut top: Option) -> Option { + fn replace_top(&mut self, top: Option) -> Option { + let mut slot = top.map(PyStackRef::new_owned); let last = self.state.stack.last_mut().unwrap(); - core::mem::swap(last, &mut top); - top + core::mem::swap(last, &mut slot); + slot.map(|sr| sr.to_pyobj()) } #[inline] #[track_caller] fn top_value(&self) -> &PyObject { match &*self.state.stack { - [.., Some(last)] => last, + [.., Some(last)] => last.as_object(), [.., None] => self.fatal("tried to get top of stack but got NULL"), [] => self.fatal("tried to get top of stack but stack is empty"), } @@ -4288,7 +4339,7 @@ impl ExecutingFrame<'_> { fn nth_value(&self, depth: u32) -> &PyObject { let stack = &self.state.stack; match &stack[stack.len() - depth as usize - 1] { - Some(obj) => obj, + Some(obj) => obj.as_object(), None => unsafe { core::hint::unreachable_unchecked() }, } } @@ -4415,7 +4466,7 @@ fn is_module_initializing(module: &PyObject, vm: &VirtualMachine) -> bool { initializing_attr.try_to_bool(vm).unwrap_or(false) } -fn expect_unchecked(optional: Option, err_msg: &'static str) -> PyObjectRef { +fn expect_unchecked(optional: Option, err_msg: &'static str) -> T { if cfg!(debug_assertions) { optional.expect(err_msg) } else { diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs index 8a15688f591..df347124f43 100644 --- a/crates/vm/src/lib.rs +++ b/crates/vm/src/lib.rs @@ -98,7 +98,7 @@ pub mod windows; pub use self::convert::{TryFromBorrowedObject, TryFromObject}; pub use self::object::{ AsObject, Py, PyAtomicRef, PyExact, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact, - PyResult, PyWeakRef, + PyResult, PyStackRef, PyWeakRef, }; pub use self::vm::{Context, Interpreter, InterpreterBuilder, Settings, VirtualMachine}; diff --git a/crates/vm/src/object/core.rs b/crates/vm/src/object/core.rs index ea661103c39..c65b7f0b33f 100644 --- a/crates/vm/src/object/core.rs +++ b/crates/vm/src/object/core.rs @@ -1400,6 +1400,136 @@ impl fmt::Debug for PyObjectRef { } } +// --------------------------------------------------------------------------- +// PyStackRef – tagged stack reference (owned or borrowed) +// --------------------------------------------------------------------------- + +const STACKREF_BORROW_TAG: usize = 1; + +/// A tagged stack reference to a Python object. +/// +/// Uses the lowest bit of the pointer to distinguish owned vs borrowed: +/// - bit 0 = 0 → **owned**: refcount was incremented; Drop will decrement. +/// - bit 0 = 1 → **borrowed**: no refcount change; Drop is a no-op. +/// +/// Same size as `PyObjectRef` (one pointer-width). `PyObject` is at least +/// 8-byte aligned, so the low bit is always available for tagging. +#[repr(transparent)] +pub struct PyStackRef { + bits: usize, +} + +impl PyStackRef { + /// Create an owned stack reference, consuming the `PyObjectRef`. + /// Refcount is NOT incremented — ownership is transferred. + #[inline(always)] + pub fn new_owned(obj: PyObjectRef) -> Self { + let ptr = obj.into_raw(); + let bits = ptr.as_ptr() as usize; + debug_assert!(bits & STACKREF_BORROW_TAG == 0, "PyObject pointer must be aligned"); + Self { bits } + } + + /// Create a borrowed stack reference from a `&PyObject`. + /// + /// # Safety + /// The caller must guarantee that the pointed-to object lives at least as + /// long as this `PyStackRef`. In practice the compiler guarantees that + /// borrowed refs are consumed within the same basic block, before any + /// `STORE_FAST`/`DELETE_FAST` could overwrite the source slot. + #[inline(always)] + pub unsafe fn new_borrowed(obj: &PyObject) -> Self { + let bits = (obj as *const PyObject as usize) | STACKREF_BORROW_TAG; + Self { bits } + } + + /// Whether this is a borrowed (non-owning) reference. + #[inline(always)] + pub fn is_borrowed(&self) -> bool { + self.bits & STACKREF_BORROW_TAG != 0 + } + + /// Get a `&PyObject` reference. Works for both owned and borrowed. + #[inline(always)] + pub fn as_object(&self) -> &PyObject { + unsafe { &*((self.bits & !STACKREF_BORROW_TAG) as *const PyObject) } + } + + /// Convert to an owned `PyObjectRef`. + /// + /// * If **borrowed** → increments refcount, forgets self. + /// * If **owned** → reconstructs `PyObjectRef` from the raw pointer, forgets self. + #[inline(always)] + pub fn to_pyobj(self) -> PyObjectRef { + let obj = if self.is_borrowed() { + self.as_object().to_owned() // inc refcount + } else { + let ptr = unsafe { NonNull::new_unchecked(self.bits as *mut PyObject) }; + unsafe { PyObjectRef::from_raw(ptr) } + }; + core::mem::forget(self); // don't run Drop + obj + } + + /// Promote a borrowed ref to owned **in place** (increments refcount, + /// clears the borrow tag). No-op if already owned. + #[inline(always)] + pub fn promote(&mut self) { + if self.is_borrowed() { + self.as_object().0.ref_count.inc(); + self.bits &= !STACKREF_BORROW_TAG; + } + } +} + +impl Drop for PyStackRef { + #[inline] + fn drop(&mut self) { + if !self.is_borrowed() { + // Owned: decrement refcount (potentially deallocate). + let ptr = unsafe { NonNull::new_unchecked(self.bits as *mut PyObject) }; + drop(unsafe { PyObjectRef::from_raw(ptr) }); + } + // Borrowed: nothing to do. + } +} + +impl core::ops::Deref for PyStackRef { + type Target = PyObject; + + #[inline(always)] + fn deref(&self) -> &PyObject { + self.as_object() + } +} + +impl Clone for PyStackRef { + /// Cloning always produces an **owned** reference (increments refcount). + #[inline(always)] + fn clone(&self) -> Self { + Self::new_owned(self.as_object().to_owned()) + } +} + +impl fmt::Debug for PyStackRef { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_borrowed() { + write!(f, "PyStackRef(borrowed, ")?; + } else { + write!(f, "PyStackRef(owned, ")?; + } + self.as_object().fmt(f)?; + write!(f, ")") + } +} + +cfg_if::cfg_if! { + if #[cfg(feature = "threading")] { + unsafe impl Send for PyStackRef {} + unsafe impl Sync for PyStackRef {} + } +} + #[repr(transparent)] pub struct Py(PyInner); diff --git a/crates/vm/src/object/traverse.rs b/crates/vm/src/object/traverse.rs index 367076b78e3..309f9f25ce8 100644 --- a/crates/vm/src/object/traverse.rs +++ b/crates/vm/src/object/traverse.rs @@ -2,7 +2,10 @@ use core::ptr::NonNull; use rustpython_common::lock::{PyMutex, PyRwLock}; -use crate::{AsObject, PyObject, PyObjectRef, PyRef, function::Either, object::PyObjectPayload}; +use crate::{ + AsObject, PyObject, PyObjectRef, PyRef, PyStackRef, function::Either, + object::PyObjectPayload, +}; pub type TraverseFn<'a> = dyn FnMut(&PyObject) + 'a; @@ -45,6 +48,12 @@ unsafe impl Traverse for PyObjectRef { } } +unsafe impl Traverse for PyStackRef { + fn traverse(&self, traverse_fn: &mut TraverseFn<'_>) { + traverse_fn(self.as_object()) + } +} + unsafe impl Traverse for PyRef { fn traverse(&self, traverse_fn: &mut TraverseFn<'_>) { traverse_fn(self.as_object()) From 5c25fc7b1501cac677eb7698d4722dfad750480e Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 1 Mar 2026 20:43:27 +0900 Subject: [PATCH 2/3] Fix PyStackRef: use NonZeroUsize for niche optimization, revert borrowed refs - Change PyStackRef.bits from usize to NonZeroUsize so Option has the same size as Option via niche optimization - Revert LoadFastBorrow to use clone instead of actual borrowed refs to avoid borrowed refs remaining on stack at yield points - Add static size assertions for Option - Add stackref, fastlocal to cspell dictionary - Remove debug eprintln statements - Fix clippy warning for unused push_borrowed --- .cspell.dict/cpython.txt | 3 ++ crates/vm/src/frame.rs | 80 ++++++++++++++++---------------- crates/vm/src/object/core.rs | 40 ++++++++++++---- crates/vm/src/object/traverse.rs | 3 +- 4 files changed, 74 insertions(+), 52 deletions(-) diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt index f48bb332334..819d6875b58 100644 --- a/.cspell.dict/cpython.txt +++ b/.cspell.dict/cpython.txt @@ -54,6 +54,8 @@ eofs evalloop excepthandler exceptiontable +fastlocal +fastlocals fblock fblocks fdescr @@ -171,6 +173,7 @@ SMALLBUF SOABI SSLEOF stackdepth +stackref staticbase stginfo storefast diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index eec7b3dd416..2c6b07fc7c5 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1262,8 +1262,8 @@ impl ExecutingFrame<'_> { } Instruction::CompareOp { op } => self.execute_compare(vm, op.get(arg)), Instruction::ContainsOp(invert) => { - let b = self.pop_stackref(); - let a = self.pop_stackref(); + let b = self.pop_value(); + let a = self.pop_value(); let value = match invert.get(arg) { bytecode::Invert::No => self._in(vm, &a, &b)?, @@ -1281,12 +1281,7 @@ impl ExecutingFrame<'_> { // This is 1-indexed to match CPython let idx = index.get(arg) as usize; let stack_len = self.state.stack.len(); - if stack_len < idx { - eprintln!("CopyItem ERROR: stack_len={}, idx={}", stack_len, idx); - eprintln!(" code: {}", self.code.obj_name); - eprintln!(" lasti: {}", self.lasti()); - panic!("CopyItem: stack underflow"); - } + debug_assert!(stack_len >= idx, "CopyItem: stack underflow"); let value = self.state.stack[stack_len - idx].clone(); self.push_stackref_opt(value); Ok(None) @@ -1913,7 +1908,6 @@ impl ExecutingFrame<'_> { .into(), ) })?; - drop(fastlocals); self.push_value(x1); self.push_value(x2); Ok(None) @@ -2505,7 +2499,11 @@ impl ExecutingFrame<'_> { } Instruction::YieldValue { arg: oparg } => { debug_assert!( - self.state.stack.iter().flatten().all(|sr| !sr.is_borrowed()), + self.state + .stack + .iter() + .flatten() + .all(|sr| !sr.is_borrowed()), "borrowed refs on stack at yield point" ); let value = self.pop_value(); @@ -3779,37 +3777,37 @@ impl ExecutingFrame<'_> { #[cfg_attr(feature = "flame-it", flame("Frame"))] fn execute_bin_op(&mut self, vm: &VirtualMachine, op: bytecode::BinaryOperator) -> FrameResult { - let b_ref = self.pop_stackref(); - let a_ref = self.pop_stackref(); + let b_ref = &self.pop_value(); + let a_ref = &self.pop_value(); let value = match op { - bytecode::BinaryOperator::Subtract => vm._sub(&a_ref, &b_ref), - bytecode::BinaryOperator::Add => vm._add(&a_ref, &b_ref), - bytecode::BinaryOperator::Multiply => vm._mul(&a_ref, &b_ref), - bytecode::BinaryOperator::MatrixMultiply => vm._matmul(&a_ref, &b_ref), - bytecode::BinaryOperator::Power => vm._pow(&a_ref, &b_ref, vm.ctx.none.as_object()), - bytecode::BinaryOperator::TrueDivide => vm._truediv(&a_ref, &b_ref), - bytecode::BinaryOperator::FloorDivide => vm._floordiv(&a_ref, &b_ref), - bytecode::BinaryOperator::Remainder => vm._mod(&a_ref, &b_ref), - bytecode::BinaryOperator::Lshift => vm._lshift(&a_ref, &b_ref), - bytecode::BinaryOperator::Rshift => vm._rshift(&a_ref, &b_ref), - bytecode::BinaryOperator::Xor => vm._xor(&a_ref, &b_ref), - bytecode::BinaryOperator::Or => vm._or(&a_ref, &b_ref), - bytecode::BinaryOperator::And => vm._and(&a_ref, &b_ref), - bytecode::BinaryOperator::InplaceSubtract => vm._isub(&a_ref, &b_ref), - bytecode::BinaryOperator::InplaceAdd => vm._iadd(&a_ref, &b_ref), - bytecode::BinaryOperator::InplaceMultiply => vm._imul(&a_ref, &b_ref), - bytecode::BinaryOperator::InplaceMatrixMultiply => vm._imatmul(&a_ref, &b_ref), + bytecode::BinaryOperator::Subtract => vm._sub(a_ref, b_ref), + bytecode::BinaryOperator::Add => vm._add(a_ref, b_ref), + bytecode::BinaryOperator::Multiply => vm._mul(a_ref, b_ref), + bytecode::BinaryOperator::MatrixMultiply => vm._matmul(a_ref, b_ref), + bytecode::BinaryOperator::Power => vm._pow(a_ref, b_ref, vm.ctx.none.as_object()), + bytecode::BinaryOperator::TrueDivide => vm._truediv(a_ref, b_ref), + bytecode::BinaryOperator::FloorDivide => vm._floordiv(a_ref, b_ref), + bytecode::BinaryOperator::Remainder => vm._mod(a_ref, b_ref), + bytecode::BinaryOperator::Lshift => vm._lshift(a_ref, b_ref), + bytecode::BinaryOperator::Rshift => vm._rshift(a_ref, b_ref), + bytecode::BinaryOperator::Xor => vm._xor(a_ref, b_ref), + bytecode::BinaryOperator::Or => vm._or(a_ref, b_ref), + bytecode::BinaryOperator::And => vm._and(a_ref, b_ref), + bytecode::BinaryOperator::InplaceSubtract => vm._isub(a_ref, b_ref), + bytecode::BinaryOperator::InplaceAdd => vm._iadd(a_ref, b_ref), + bytecode::BinaryOperator::InplaceMultiply => vm._imul(a_ref, b_ref), + bytecode::BinaryOperator::InplaceMatrixMultiply => vm._imatmul(a_ref, b_ref), bytecode::BinaryOperator::InplacePower => { - vm._ipow(&a_ref, &b_ref, vm.ctx.none.as_object()) - } - bytecode::BinaryOperator::InplaceTrueDivide => vm._itruediv(&a_ref, &b_ref), - bytecode::BinaryOperator::InplaceFloorDivide => vm._ifloordiv(&a_ref, &b_ref), - bytecode::BinaryOperator::InplaceRemainder => vm._imod(&a_ref, &b_ref), - bytecode::BinaryOperator::InplaceLshift => vm._ilshift(&a_ref, &b_ref), - bytecode::BinaryOperator::InplaceRshift => vm._irshift(&a_ref, &b_ref), - bytecode::BinaryOperator::InplaceXor => vm._ixor(&a_ref, &b_ref), - bytecode::BinaryOperator::InplaceOr => vm._ior(&a_ref, &b_ref), - bytecode::BinaryOperator::InplaceAnd => vm._iand(&a_ref, &b_ref), + vm._ipow(a_ref, b_ref, vm.ctx.none.as_object()) + } + bytecode::BinaryOperator::InplaceTrueDivide => vm._itruediv(a_ref, b_ref), + bytecode::BinaryOperator::InplaceFloorDivide => vm._ifloordiv(a_ref, b_ref), + bytecode::BinaryOperator::InplaceRemainder => vm._imod(a_ref, b_ref), + bytecode::BinaryOperator::InplaceLshift => vm._ilshift(a_ref, b_ref), + bytecode::BinaryOperator::InplaceRshift => vm._irshift(a_ref, b_ref), + bytecode::BinaryOperator::InplaceXor => vm._ixor(a_ref, b_ref), + bytecode::BinaryOperator::InplaceOr => vm._ior(a_ref, b_ref), + bytecode::BinaryOperator::InplaceAnd => vm._iand(a_ref, b_ref), bytecode::BinaryOperator::Subscr => a_ref.get_item(b_ref.as_object(), vm), }?; @@ -4102,6 +4100,7 @@ impl ExecutingFrame<'_> { /// The compiler guarantees consumption within the same basic block. #[inline] #[track_caller] + #[allow(dead_code)] unsafe fn push_borrowed(&mut self, obj: &PyObject) { self.push_stackref_opt(Some(unsafe { PyStackRef::new_borrowed(obj) })); } @@ -4311,8 +4310,7 @@ impl ExecutingFrame<'_> { ); } self.state.stack.drain(stack_len - count..).map(|obj| { - expect_unchecked(obj, "pop_multiple but null found. This is a compiler bug.") - .to_pyobj() + expect_unchecked(obj, "pop_multiple but null found. This is a compiler bug.").to_pyobj() }) } diff --git a/crates/vm/src/object/core.rs b/crates/vm/src/object/core.rs index c65b7f0b33f..2a95e84f611 100644 --- a/crates/vm/src/object/core.rs +++ b/crates/vm/src/object/core.rs @@ -40,6 +40,7 @@ use core::{ cell::UnsafeCell, marker::PhantomData, mem::ManuallyDrop, + num::NonZeroUsize, ops::Deref, ptr::{self, NonNull}, }; @@ -1414,9 +1415,12 @@ const STACKREF_BORROW_TAG: usize = 1; /// /// Same size as `PyObjectRef` (one pointer-width). `PyObject` is at least /// 8-byte aligned, so the low bit is always available for tagging. +/// +/// Uses `NonZeroUsize` so that `Option` has the same size as +/// `PyStackRef` via niche optimization (matching `Option`). #[repr(transparent)] pub struct PyStackRef { - bits: usize, + bits: NonZeroUsize, } impl PyStackRef { @@ -1426,8 +1430,14 @@ impl PyStackRef { pub fn new_owned(obj: PyObjectRef) -> Self { let ptr = obj.into_raw(); let bits = ptr.as_ptr() as usize; - debug_assert!(bits & STACKREF_BORROW_TAG == 0, "PyObject pointer must be aligned"); - Self { bits } + debug_assert!( + bits & STACKREF_BORROW_TAG == 0, + "PyObject pointer must be aligned" + ); + Self { + // SAFETY: valid PyObject pointers are never null + bits: unsafe { NonZeroUsize::new_unchecked(bits) }, + } } /// Create a borrowed stack reference from a `&PyObject`. @@ -1440,19 +1450,22 @@ impl PyStackRef { #[inline(always)] pub unsafe fn new_borrowed(obj: &PyObject) -> Self { let bits = (obj as *const PyObject as usize) | STACKREF_BORROW_TAG; - Self { bits } + Self { + // SAFETY: valid PyObject pointers are never null, and ORing with 1 keeps it non-zero + bits: unsafe { NonZeroUsize::new_unchecked(bits) }, + } } /// Whether this is a borrowed (non-owning) reference. #[inline(always)] pub fn is_borrowed(&self) -> bool { - self.bits & STACKREF_BORROW_TAG != 0 + self.bits.get() & STACKREF_BORROW_TAG != 0 } /// Get a `&PyObject` reference. Works for both owned and borrowed. #[inline(always)] pub fn as_object(&self) -> &PyObject { - unsafe { &*((self.bits & !STACKREF_BORROW_TAG) as *const PyObject) } + unsafe { &*((self.bits.get() & !STACKREF_BORROW_TAG) as *const PyObject) } } /// Convert to an owned `PyObjectRef`. @@ -1464,7 +1477,7 @@ impl PyStackRef { let obj = if self.is_borrowed() { self.as_object().to_owned() // inc refcount } else { - let ptr = unsafe { NonNull::new_unchecked(self.bits as *mut PyObject) }; + let ptr = unsafe { NonNull::new_unchecked(self.bits.get() as *mut PyObject) }; unsafe { PyObjectRef::from_raw(ptr) } }; core::mem::forget(self); // don't run Drop @@ -1477,7 +1490,9 @@ impl PyStackRef { pub fn promote(&mut self) { if self.is_borrowed() { self.as_object().0.ref_count.inc(); - self.bits &= !STACKREF_BORROW_TAG; + // SAFETY: clearing the low bit of a non-null pointer keeps it non-zero + self.bits = + unsafe { NonZeroUsize::new_unchecked(self.bits.get() & !STACKREF_BORROW_TAG) }; } } } @@ -1487,7 +1502,7 @@ impl Drop for PyStackRef { fn drop(&mut self) { if !self.is_borrowed() { // Owned: decrement refcount (potentially deallocate). - let ptr = unsafe { NonNull::new_unchecked(self.bits as *mut PyObject) }; + let ptr = unsafe { NonNull::new_unchecked(self.bits.get() as *mut PyObject) }; drop(unsafe { PyObjectRef::from_raw(ptr) }); } // Borrowed: nothing to do. @@ -1530,6 +1545,13 @@ cfg_if::cfg_if! { } } +// Ensure Option uses niche optimization and matches Option in size +const _: () = assert!( + core::mem::size_of::>() == core::mem::size_of::>() +); +const _: () = + assert!(core::mem::size_of::>() == core::mem::size_of::()); + #[repr(transparent)] pub struct Py(PyInner); diff --git a/crates/vm/src/object/traverse.rs b/crates/vm/src/object/traverse.rs index 309f9f25ce8..9a5ae324baf 100644 --- a/crates/vm/src/object/traverse.rs +++ b/crates/vm/src/object/traverse.rs @@ -3,8 +3,7 @@ use core::ptr::NonNull; use rustpython_common::lock::{PyMutex, PyRwLock}; use crate::{ - AsObject, PyObject, PyObjectRef, PyRef, PyStackRef, function::Either, - object::PyObjectPayload, + AsObject, PyObject, PyObjectRef, PyRef, PyStackRef, function::Either, object::PyObjectPayload, }; pub type TraverseFn<'a> = dyn FnMut(&PyObject) + 'a; From e6b181039a3b5008b7a599fd50b99f9ab44b9446 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 1 Mar 2026 21:55:12 +0900 Subject: [PATCH 3/3] Add borrowed ref debug_assert to InstrumentedYieldValue, clean up comments --- crates/vm/src/frame.rs | 13 +++++++++++-- crates/vm/src/object/core.rs | 4 ---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 2c6b07fc7c5..f1175401676 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1864,8 +1864,9 @@ impl ExecutingFrame<'_> { self.push_value(x2); Ok(None) } - // TODO: Implement true borrow optimization (skip Arc::clone). - // Currently this just clones like LoadFast. + // Borrow optimization not yet active; falls back to clone. + // push_borrowed() is available but disabled until stack + // lifetime issues at yield/exception points are resolved. Instruction::LoadFastBorrow(idx) => { let idx = idx.get(arg) as usize; let x = unsafe { self.fastlocals.borrow() }[idx] @@ -2685,6 +2686,14 @@ impl ExecutingFrame<'_> { self.unwind_blocks(vm, UnwindReason::Returning { value }) } Instruction::InstrumentedYieldValue => { + debug_assert!( + self.state + .stack + .iter() + .flatten() + .all(|sr| !sr.is_borrowed()), + "borrowed refs on stack at yield point" + ); let value = self.pop_value(); if self.monitoring_mask & monitoring::EVENT_PY_YIELD != 0 { let offset = (self.lasti() - 1) * 2; diff --git a/crates/vm/src/object/core.rs b/crates/vm/src/object/core.rs index 2a95e84f611..b48045f2163 100644 --- a/crates/vm/src/object/core.rs +++ b/crates/vm/src/object/core.rs @@ -1401,10 +1401,6 @@ impl fmt::Debug for PyObjectRef { } } -// --------------------------------------------------------------------------- -// PyStackRef – tagged stack reference (owned or borrowed) -// --------------------------------------------------------------------------- - const STACKREF_BORROW_TAG: usize = 1; /// A tagged stack reference to a Python object.