From 3a073f7d11592670fb62ed82d2e205b9f1cbaf79 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Tue, 3 Feb 2026 15:56:47 +0900 Subject: [PATCH] Add GC infrastructure: object tracking, tp_clear, and helper methods Object tracking (CPython _PyObject_GC_TRACK/UNTRACK equivalent): - PyRef::new_ref() now calls gc_state().track_object() for traceable objects - track_object() sets GcBits::TRACKED flag via set_gc_tracked() - Static types call clear_gc_tracked() since they are immortal - is_gc_tracked() now checks the TRACKED bit flag tp_clear infrastructure for breaking reference cycles: - Add clear_for_gc_collect_callbacks() to WeakRefList (CPython: handle_weakrefs) - Add try_call_finalizer() for GC to call __del__ (CPython: PyObject_CallFinalizerFromDealloc) - Add gc_clear_raw()/gc_clear() for tp_clear (CPython: subtype_clear) - Add gc_get_referent_ptrs() for raw pointer traversal - Add gc_has_clear() to check clear capability GC state improvements: - maybe_collect() now checks gen0 threshold (CPython: _PyObject_GC_Alloc) - collect()/collect_force() reset gen0 count WeakRefList::clear() cleanup: - Drop lock before invoking callbacks to avoid deadlock --- .cspell.dict/cpython.txt | 1 + crates/vm/src/gc_state.rs | 22 +++- crates/vm/src/object/core.rs | 218 +++++++++++++++++++++++++++++------ 3 files changed, 205 insertions(+), 36 deletions(-) diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt index c70e46cb207..d99f823976b 100644 --- a/.cspell.dict/cpython.txt +++ b/.cspell.dict/cpython.txt @@ -55,6 +55,7 @@ fielddesc fieldlist fileutils finalbody +finalizers flowgraph formatfloat freevar diff --git a/crates/vm/src/gc_state.rs b/crates/vm/src/gc_state.rs index e3bac79ca27..87dd1152d9c 100644 --- a/crates/vm/src/gc_state.rs +++ b/crates/vm/src/gc_state.rs @@ -366,8 +366,22 @@ impl GcState { /// Check if automatic GC should run and run it if needed. /// Called after object allocation. - /// Currently a stub — returns false. + /// Returns true if GC was run, false otherwise. pub fn maybe_collect(&self) -> bool { + if !self.is_enabled() { + return false; + } + + // _PyObject_GC_Alloc checks thresholds + + // Check gen0 threshold + let count0 = self.generations[0].count.load(Ordering::SeqCst) as u32; + let threshold0 = self.generations[0].threshold(); + if threshold0 > 0 && count0 >= threshold0 { + self.collect(0); + return true; + } + false } @@ -377,12 +391,18 @@ impl GcState { /// Currently a stub — the actual collection algorithm requires EBR /// and will be added in a follow-up. pub fn collect(&self, _generation: usize) -> (usize, usize) { + // gc_collect_main + // Reset gen0 count even though we're not actually collecting + self.generations[0].count.store(0, Ordering::SeqCst); (0, 0) } /// Force collection even if GC is disabled (for manual gc.collect() calls). + /// gc.collect() always runs regardless of gc.isenabled() /// Currently a stub. pub fn collect_force(&self, _generation: usize) -> (usize, usize) { + // Reset gen0 count even though we're not actually collecting + self.generations[0].count.store(0, Ordering::SeqCst); (0, 0) } diff --git a/crates/vm/src/object/core.rs b/crates/vm/src/object/core.rs index 2ea3a5d91c3..55626a0d8d4 100644 --- a/crates/vm/src/object/core.rs +++ b/crates/vm/src/object/core.rs @@ -82,7 +82,7 @@ use core::{ pub(super) struct Erased; /// Default dealloc: handles __del__, weakref clearing, tp_clear, and memory free. -/// Equivalent to subtype_dealloc in CPython. +/// Equivalent to subtype_dealloc. pub(super) unsafe fn default_dealloc(obj: *mut PyObject) { let obj_ref = unsafe { &*(obj as *const PyObject) }; if let Err(()) = obj_ref.drop_slow_inner() { @@ -383,58 +383,101 @@ impl WeakRefList { weak } - /// PyObject_ClearWeakRefs: clear all weakrefs when the referent dies. + /// Clear all weakrefs and call their callbacks. + /// Called when the owner object is being dropped. + // PyObject_ClearWeakRefs fn clear(&self, obj: &PyObject) { let obj_addr = obj as *const PyObject as usize; - let mut to_callback: Vec<(PyRef, PyObjectRef)> = Vec::new(); + let _lock = weakref_lock::lock(obj_addr); - { - let _lock = weakref_lock::lock(obj_addr); + // Clear generic cache + self.generic.store(ptr::null_mut(), Ordering::Relaxed); - // Walk the list, collecting weakrefs with callbacks - let mut current = NonNull::new(self.head.load(Ordering::Relaxed)); - while let Some(node) = current { - let next = unsafe { WeakLink::pointers(node).as_ref().get_next() }; + // Walk the list, collecting weakrefs with callbacks + let mut callbacks: Vec<(PyRef, PyObjectRef)> = Vec::new(); + let mut current = NonNull::new(self.head.load(Ordering::Relaxed)); + while let Some(node) = current { + let next = unsafe { WeakLink::pointers(node).as_ref().get_next() }; - let wr = unsafe { node.as_ref() }; + let wr = unsafe { node.as_ref() }; - // Set wr_object to null (marks weakref as dead) - wr.0.payload - .wr_object - .store(ptr::null_mut(), Ordering::Relaxed); + // Mark weakref as dead + wr.0.payload + .wr_object + .store(ptr::null_mut(), Ordering::Relaxed); - // Unlink from list - unsafe { - let mut ptrs = WeakLink::pointers(node); - ptrs.as_mut().set_prev(None); - ptrs.as_mut().set_next(None); - } + // Unlink from list + unsafe { + let mut ptrs = WeakLink::pointers(node); + ptrs.as_mut().set_prev(None); + ptrs.as_mut().set_next(None); + } - // Collect callback if weakref is still alive (strong_count > 0) - if wr.0.ref_count.get() > 0 { - let cb = unsafe { wr.0.payload.callback.get().replace(None) }; - if let Some(cb) = cb { - to_callback.push((wr.to_owned(), cb)); - } + // Collect callback if present and weakref is still alive + if wr.0.ref_count.get() > 0 { + let cb = unsafe { wr.0.payload.callback.get().replace(None) }; + if let Some(cb) = cb { + callbacks.push((wr.to_owned(), cb)); } - - current = next; } - self.head.store(ptr::null_mut(), Ordering::Relaxed); - self.generic.store(ptr::null_mut(), Ordering::Relaxed); + current = next; } + self.head.store(ptr::null_mut(), Ordering::Relaxed); - // Call callbacks without holding the lock - for (wr, cb) in to_callback { + // Invoke callbacks outside the lock + drop(_lock); + for (wr, cb) in callbacks { crate::vm::thread::with_vm(&cb, |vm| { - // TODO: handle unraisable exception - let wr_obj: PyObjectRef = wr.clone().into(); - let _ = cb.call((wr_obj,), vm); + let _ = cb.call((wr.clone(),), vm); }); } } + /// Clear all weakrefs but DON'T call callbacks. Instead, return them for later invocation. + /// Used by GC to ensure ALL weakrefs are cleared BEFORE any callbacks are invoked. + /// handle_weakrefs() clears all weakrefs first, then invokes callbacks. + fn clear_for_gc_collect_callbacks(&self, obj: &PyObject) -> Vec<(PyRef, PyObjectRef)> { + let obj_addr = obj as *const PyObject as usize; + let _lock = weakref_lock::lock(obj_addr); + + // Clear generic cache + self.generic.store(ptr::null_mut(), Ordering::Relaxed); + + let mut callbacks = Vec::new(); + let mut current = NonNull::new(self.head.load(Ordering::Relaxed)); + while let Some(node) = current { + let next = unsafe { WeakLink::pointers(node).as_ref().get_next() }; + + let wr = unsafe { node.as_ref() }; + + // Mark weakref as dead + wr.0.payload + .wr_object + .store(ptr::null_mut(), Ordering::Relaxed); + + // Unlink from list + unsafe { + let mut ptrs = WeakLink::pointers(node); + ptrs.as_mut().set_prev(None); + ptrs.as_mut().set_next(None); + } + + // Collect callback without invoking + if wr.0.ref_count.get() > 0 { + let cb = unsafe { wr.0.payload.callback.get().replace(None) }; + if let Some(cb) = cb { + callbacks.push((wr.to_owned(), cb)); + } + } + + current = next; + } + self.head.store(ptr::null_mut(), Ordering::Relaxed); + + callbacks + } + fn count(&self, obj: &PyObject) -> usize { let _lock = weakref_lock::lock(obj as *const PyObject as usize); let mut count = 0usize; @@ -1044,6 +1087,8 @@ impl PyObject { } // __del__ should only be called once (like _PyGC_FINALIZED check in GIL_DISABLED) + // We call __del__ BEFORE clearing weakrefs to allow the finalizer to access + // the object's weak references if needed. let del = self.class().slots.del.load(); if let Some(slot_del) = del && !self.gc_finalized() @@ -1051,6 +1096,11 @@ impl PyObject { self.set_gc_finalized(); call_slot_del(self, slot_del)?; } + + // Clear weak refs AFTER __del__. + // Note: This differs from GC behavior which clears weakrefs before finalizers, + // but for direct deallocation (drop_slow_inner), we need to allow the finalizer + // to run without triggering use-after-free from WeakRefList operations. if let Some(wrl) = self.weak_ref_list() { wrl.clear(self); } @@ -1097,6 +1147,104 @@ impl PyObject { }); result } + + /// Call __del__ if present, without triggering object deallocation. + /// Used by GC to call finalizers before breaking cycles. + /// This allows proper resurrection detection. + /// CPython: PyObject_CallFinalizerFromDealloc in Objects/object.c + pub fn try_call_finalizer(&self) { + let del = self.class().slots.del.load(); + if let Some(slot_del) = del + && !self.gc_finalized() + { + // Mark as finalized BEFORE calling __del__ to prevent double-call + // This ensures drop_slow_inner() won't call __del__ again + self.set_gc_finalized(); + let result = crate::vm::thread::with_vm(self, |vm| { + if let Err(e) = slot_del(self, vm) + && let Some(del_method) = self.get_class_attr(identifier!(vm, __del__)) + { + vm.run_unraisable(e, None, del_method); + } + }); + let _ = result; + } + } + + /// Clear weakrefs but collect callbacks instead of calling them. + /// This is used by GC to ensure ALL weakrefs are cleared BEFORE any callbacks run. + /// Returns collected callbacks as (PyRef, callback) pairs. + // = handle_weakrefs + pub fn gc_clear_weakrefs_collect_callbacks(&self) -> Vec<(PyRef, PyObjectRef)> { + if let Some(wrl) = self.weak_ref_list() { + wrl.clear_for_gc_collect_callbacks(self) + } else { + vec![] + } + } + + /// Get raw pointers to referents without incrementing reference counts. + /// This is used during GC to avoid reference count manipulation. + /// tp_traverse visits objects without incref + /// + /// # Safety + /// The returned pointers are only valid as long as the object is alive + /// and its contents haven't been modified. + pub unsafe fn gc_get_referent_ptrs(&self) -> Vec> { + let mut result = Vec::new(); + // Traverse the entire object including dict and slots + self.0.traverse(&mut |child: &PyObject| { + result.push(NonNull::from(child)); + }); + result + } + + /// Pop edges from this object for cycle breaking. + /// Returns extracted child references that were removed from this object (tp_clear). + /// This is used during garbage collection to break circular references. + /// + /// # Safety + /// - ptr must be a valid pointer to a PyObject + /// - The caller must have exclusive access (no other references exist) + /// - This is only safe during GC when the object is unreachable + pub unsafe fn gc_clear_raw(ptr: *mut PyObject) -> Vec { + let mut result = Vec::new(); + let obj = unsafe { &*ptr }; + + // 1. Clear payload-specific references (vtable.clear / tp_clear) + if let Some(clear_fn) = obj.0.vtable.clear { + unsafe { clear_fn(ptr, &mut result) }; + } + + // 2. Clear member slots (subtype_clear) + for slot in obj.0.slots.iter() { + if let Some(val) = slot.write().take() { + result.push(val); + } + } + + result + } + + /// Clear this object for cycle breaking (tp_clear). + /// This version takes &self but should only be called during GC + /// when exclusive access is guaranteed. + /// + /// # Safety + /// - The caller must guarantee exclusive access (no other references exist) + /// - This is only safe during GC when the object is unreachable + pub unsafe fn gc_clear(&self) -> Vec { + // SAFETY: During GC collection, this object is unreachable (gc_refs == 0), + // meaning no other code has a reference to it. The only references are + // internal cycle references which we're about to break. + unsafe { Self::gc_clear_raw(self as *const _ as *mut PyObject) } + } + + /// Check if this object has clear capability (tp_clear) + // Py_TPFLAGS_HAVE_GC types have tp_clear + pub fn gc_has_clear(&self) -> bool { + self.0.vtable.clear.is_some() || self.0.dict.is_some() || !self.0.slots.is_empty() + } } impl Borrow for PyObjectRef {