From d2269157ef2dd30d440234cda786d4b88c1026ad Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Mon, 27 Jan 2025 16:12:17 -0800 Subject: [PATCH 1/2] feature gated gc Signed-off-by: Ashwin Naren --- Cargo.toml | 1 + vm/Cargo.toml | 1 + vm/src/object/gc/mod.rs | 145 ++++++++++++++++++++++++++++++++++++++++ vm/src/object/mod.rs | 1 + 4 files changed, 148 insertions(+) create mode 100644 vm/src/object/gc/mod.rs diff --git a/Cargo.toml b/Cargo.toml index 33fb96e84c4..0be338d5d4c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ bz2 = ["stdlib", "rustpython-stdlib/bz2"] sqlite = ["rustpython-stdlib/sqlite"] ssl = ["rustpython-stdlib/ssl"] ssl-vendor = ["ssl", "rustpython-stdlib/ssl-vendor"] +gc = ["rustpython-vm/gc"] [dependencies] rustpython-compiler = { workspace = true } diff --git a/vm/Cargo.toml b/vm/Cargo.toml index acc645bb74a..a4fc0acaf7a 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -24,6 +24,7 @@ codegen = ["rustpython-codegen", "ast"] parser = ["rustpython-parser", "ast"] serde = ["dep:serde"] wasmbind = ["chrono/wasmbind", "getrandom/wasm_js", "wasm-bindgen"] +gc = [] [dependencies] rustpython-compiler = { workspace = true, optional = true } diff --git a/vm/src/object/gc/mod.rs b/vm/src/object/gc/mod.rs new file mode 100644 index 00000000000..027b6e3837f --- /dev/null +++ b/vm/src/object/gc/mod.rs @@ -0,0 +1,145 @@ + + +#[derive(Debug, Default)] +pub struct GcResult { + acyclic_cnt: usize, + cyclic_cnt: usize, +} + +impl GcResult { + fn new(tuple: (usize, usize)) -> Self { + Self { + acyclic_cnt: tuple.0, + cyclic_cnt: tuple.1, + } + } +} + +impl From<(usize, usize)> for GcResult { + fn from(t: (usize, usize)) -> Self { + Self::new(t) + } +} + +impl From for (usize, usize) { + fn from(g: GcResult) -> Self { + (g.acyclic_cnt, g.cyclic_cnt) + } +} + +impl From for usize { + fn from(g: GcResult) -> Self { + g.acyclic_cnt + g.cyclic_cnt + } +} + +#[derive(PartialEq, Eq)] +pub enum GcStatus { + /// should be drop by caller + ShouldDrop, + /// because object is part of a garbage cycle, we don't want double dealloc + /// or use after drop, so run `__del__` only. Drop(destructor)&dealloc is handle by gc + GarbageCycle, + /// already buffered, will be dealloc by collector, caller should call [`PyObject::del_Drop`] to run destructor only but not dealloc memory region + BufferedDrop, + /// should keep and not drop by caller + ShouldKeep, + /// Do Nothing, perhaps because it is RAII's deeds + DoNothing, +} + +impl GcStatus { + /// if ref cnt already dropped to zero, then can drop + pub fn can_drop(&self) -> bool { + let stat = self; + *stat == GcStatus::ShouldDrop + || *stat == GcStatus::BufferedDrop + || *stat == GcStatus::GarbageCycle + } +} + +pub fn collect() -> GcResult { + #[cfg(feature = "gc")] + { + #[cfg(feature = "threading")] + { + GLOBAL_COLLECTOR.force_gc() + } + #[cfg(not(feature = "threading"))] + { + GLOBAL_COLLECTOR.with(|v| v.force_gc()) + } + } + #[cfg(not(feature = "gc"))] + { + Default::default() + } +} + +pub fn try_gc() -> GcResult { + #[cfg(feature = "gc")] + { + #[cfg(feature = "threading")] + { + GLOBAL_COLLECTOR.fast_try_gc() + } + #[cfg(not(feature = "threading"))] + { + GLOBAL_COLLECTOR.with(|v| v.fast_try_gc()) + } + } + #[cfg(not(feature = "gc"))] + { + Default::default() + } +} + +pub fn isenabled() -> bool { + #[cfg(feature = "gc")] + { + #[cfg(feature = "threading")] + { + GLOBAL_COLLECTOR.is_enabled() + } + #[cfg(not(feature = "threading"))] + { + GLOBAL_COLLECTOR.with(|v| v.is_enabled()) + } + } + #[cfg(not(feature = "gc"))] + { + false + } +} + +pub fn enable() { + #[cfg(feature = "gc")] + { + #[cfg(feature = "threading")] + { + GLOBAL_COLLECTOR.enable() + } + #[cfg(not(feature = "threading"))] + { + GLOBAL_COLLECTOR.with(|v| v.enable()) + } + } + #[cfg(not(feature = "gc"))] + return; +} + +pub fn disable() { + #[cfg(feature = "gc")] + { + #[cfg(feature = "threading")] + { + GLOBAL_COLLECTOR.disable() + } + #[cfg(not(feature = "threading"))] + { + GLOBAL_COLLECTOR.with(|v| v.disable()) + } + } + #[cfg(not(feature = "gc"))] + return; +} diff --git a/vm/src/object/mod.rs b/vm/src/object/mod.rs index 034523afe5a..02e99183a2a 100644 --- a/vm/src/object/mod.rs +++ b/vm/src/object/mod.rs @@ -3,6 +3,7 @@ mod ext; mod payload; mod traverse; mod traverse_object; +mod gc; pub use self::core::*; pub use self::ext::*; From 0e3475fa7fc9f8d01a050bb3e6096e616b1dd982 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Thu, 27 Feb 2025 10:08:17 -0800 Subject: [PATCH 2/2] attem Signed-off-by: Ashwin Naren --- vm/src/frame.rs | 11 ++ vm/src/object/gc/mod.rs | 227 +++++++++++++++++++--------------------- 2 files changed, 119 insertions(+), 119 deletions(-) diff --git a/vm/src/frame.rs b/vm/src/frame.rs index 7cbe25909b6..16b38c1cf9f 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -351,7 +351,18 @@ impl ExecutingFrame<'_> { // Execute until return or exception: let instrs = &self.code.instructions; let mut arg_state = bytecode::OpArgState::default(); + #[allow(unused_variables)] + #[allow(unused_mut)] + let mut gc_count = 0; loop { + #[cfg(feature = "gc")] + { + gc_count += 1; + if gc_count > 1000 { + crate::object::gc::try_gc(); + gc_count = 0; + } + } let idx = self.lasti() as usize; // eprintln!( // "location: {:?} {}", diff --git a/vm/src/object/gc/mod.rs b/vm/src/object/gc/mod.rs index 027b6e3837f..ca21bde4654 100644 --- a/vm/src/object/gc/mod.rs +++ b/vm/src/object/gc/mod.rs @@ -1,145 +1,134 @@ +use std::collections::HashSet; +use rustpython_common::lock::PyMutex; +use rustpython_common::rc::PyRc; +use crate::object::core::{Erased, PyInner}; +use crate::object::Traverse; +use crate::PyObject; - -#[derive(Debug, Default)] -pub struct GcResult { - acyclic_cnt: usize, - cyclic_cnt: usize, +/// A very basic tracing, stop-the-world garbage collector. +/// +/// It maintains a list of allocated objects and, when triggered, +/// stops the world, marks all objects reachable from a set of root objects, +/// and sweeps away the rest. +pub struct GarbageCollector { + /// All objects allocated on the GC heap. + heap: Vec<*mut PyObject>, + /// Set of objects reached during the mark phase. + marked: HashSet<*mut PyObject>, } -impl GcResult { - fn new(tuple: (usize, usize)) -> Self { - Self { - acyclic_cnt: tuple.0, - cyclic_cnt: tuple.1, +impl GarbageCollector { + /// Create a new GC instance. + pub fn new() -> Self { + GarbageCollector { + heap: Vec::new(), + marked: HashSet::new(), } } -} - -impl From<(usize, usize)> for GcResult { - fn from(t: (usize, usize)) -> Self { - Self::new(t) - } -} -impl From for (usize, usize) { - fn from(g: GcResult) -> Self { - (g.acyclic_cnt, g.cyclic_cnt) + /// Register a newly allocated object with the GC. + /// + /// # Safety + /// + /// The caller must ensure that `obj` is a valid pointer to a PyObject. + pub unsafe fn add_object(&mut self, obj: *mut PyObject) { + self.heap.push(obj); } -} -impl From for usize { - fn from(g: GcResult) -> Self { - g.acyclic_cnt + g.cyclic_cnt + /// The mark phase: starting from the roots, mark all reachable objects. + /// + /// The `roots` slice should contain pointers to all the root objects. + pub unsafe fn mark(&mut self, roots: &[*mut PyObject]) { + for &root in roots { + unsafe { + self.mark_object(root); + } + } } -} -#[derive(PartialEq, Eq)] -pub enum GcStatus { - /// should be drop by caller - ShouldDrop, - /// because object is part of a garbage cycle, we don't want double dealloc - /// or use after drop, so run `__del__` only. Drop(destructor)&dealloc is handle by gc - GarbageCycle, - /// already buffered, will be dealloc by collector, caller should call [`PyObject::del_Drop`] to run destructor only but not dealloc memory region - BufferedDrop, - /// should keep and not drop by caller - ShouldKeep, - /// Do Nothing, perhaps because it is RAII's deeds - DoNothing, -} + /// Recursively mark an object and its children. + /// + /// If the object is null or already marked, the function returns immediately. + unsafe fn mark_object(&mut self, obj: *mut PyObject) { + if obj.is_null() || self.marked.contains(&obj) { + return; + } + self.marked.insert(obj); -impl GcStatus { - /// if ref cnt already dropped to zero, then can drop - pub fn can_drop(&self) -> bool { - let stat = self; - *stat == GcStatus::ShouldDrop - || *stat == GcStatus::BufferedDrop - || *stat == GcStatus::GarbageCycle - } -} + // Define a tracer callback that recursively marks child objects. + // We assume that `traverse` is implemented to call this callback + // on each child. + let mut tracer = |child: &PyObject| { + // Safety: We assume that rust borrow checking rules are not violated. + let child = child as *const PyObject as *mut PyObject; + unsafe { + self.mark_object(child); + } + }; -pub fn collect() -> GcResult { - #[cfg(feature = "gc")] - { - #[cfg(feature = "threading")] - { - GLOBAL_COLLECTOR.force_gc() - } - #[cfg(not(feature = "threading"))] - { - GLOBAL_COLLECTOR.with(|v| v.force_gc()) + // Traverse the object’s children. + // Safety: We assume that `obj` is a valid pointer with a properly implemented traverse. + unsafe { + (*obj).traverse(&mut tracer); } } - #[cfg(not(feature = "gc"))] - { - Default::default() + + /// The sweep phase: deallocate any object not marked as reachable. + /// + /// Unmarked objects are freed using `drop_dealloc_obj`. After sweeping, + /// the `marked` set is cleared for the next GC cycle. + pub unsafe fn sweep(&mut self) { + self.heap.retain(|&obj| { + if self.marked.contains(&obj) { + // Object is reachable; keep it. + true + } else { + // Object is unreachable; deallocate it. + unsafe { + drop(unsafe { Box::from_raw(obj as *mut PyInner) }); + } + false + } + }); + self.marked.clear(); } -} -pub fn try_gc() -> GcResult { - #[cfg(feature = "gc")] - { - #[cfg(feature = "threading")] - { - GLOBAL_COLLECTOR.fast_try_gc() + /// Perform a full garbage collection cycle. + /// + /// This stops the world, marks all objects reachable from `roots`, + /// and then sweeps away the unmarked objects. + /// + /// # Safety + /// + /// The caller must ensure that no new allocations or mutations occur during GC. + pub unsafe fn collect_garbage(&mut self) { + unsafe { + // TODO: Collect roots. + self.mark(roots); + self.sweep(); } - #[cfg(not(feature = "threading"))] - { - GLOBAL_COLLECTOR.with(|v| v.fast_try_gc()) - } - } - #[cfg(not(feature = "gc"))] - { - Default::default() } } -pub fn isenabled() -> bool { - #[cfg(feature = "gc")] - { - #[cfg(feature = "threading")] - { - GLOBAL_COLLECTOR.is_enabled() - } - #[cfg(not(feature = "threading"))] - { - GLOBAL_COLLECTOR.with(|v| v.is_enabled()) - } - } - #[cfg(not(feature = "gc"))] - { - false +impl Default for GarbageCollector { + fn default() -> Self { + Self::new() } } -pub fn enable() { - #[cfg(feature = "gc")] - { - #[cfg(feature = "threading")] - { - GLOBAL_COLLECTOR.enable() - } - #[cfg(not(feature = "threading"))] - { - GLOBAL_COLLECTOR.with(|v| v.enable()) - } - } - #[cfg(not(feature = "gc"))] - return; +#[cfg(feature = "threading")] +pub static GLOBAL_COLLECTOR: once_cell::sync::Lazy>> = + once_cell::sync::Lazy::new(|| PyRc::new(PyMutex::new(Default::default()))); + +#[cfg(not(feature = "threading"))] +thread_local! { + pub static GLOBAL_COLLECTOR: PyRc> = PyRc::new(PyMutex::new(Default::default())); } -pub fn disable() { - #[cfg(feature = "gc")] - { - #[cfg(feature = "threading")] - { - GLOBAL_COLLECTOR.disable() - } - #[cfg(not(feature = "threading"))] - { - GLOBAL_COLLECTOR.with(|v| v.disable()) - } - } - #[cfg(not(feature = "gc"))] - return; +pub unsafe fn register_object(obj: *mut PyObject) { + GLOBAL_COLLECTOR.lock().add_object(obj); +} + +pub unsafe fn try_gc() { + GLOBAL_COLLECTOR.lock().collect_garbage(); }