From e36962b92ad8b09de306e1684e413d837f5067a9 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 2 Mar 2026 11:23:43 +0900 Subject: [PATCH] Lazy locals dict for NEWLOCALS frames Defer locals dict allocation for function frames until first access. Most function frames only use fastlocals and never touch the locals dict, so this skips one dict heap allocation per function call. Also remove a redundant code.clone() in invoke_with_locals. Func call ~23% faster, method call ~15% faster in benchmarks. --- benches/microbenchmarks.rs | 2 + crates/vm/src/builtins/function.rs | 21 ++-- crates/vm/src/frame.rs | 150 ++++++++++++++++++++++------- crates/vm/src/scope.rs | 5 +- crates/vm/src/vm/mod.rs | 5 +- crates/wasm/src/vm_class.rs | 12 ++- 6 files changed, 142 insertions(+), 53 deletions(-) diff --git a/benches/microbenchmarks.rs b/benches/microbenchmarks.rs index ba5dcd6c2ec..3fef2f8dd7e 100644 --- a/benches/microbenchmarks.rs +++ b/benches/microbenchmarks.rs @@ -134,6 +134,8 @@ fn bench_rustpython_code(group: &mut BenchmarkGroup, bench: &MicroBenc if let Some(idx) = iterations { scope .locals + .as_ref() + .expect("new_scope_with_builtins always provides locals") .as_object() .set_item("ITERATIONS", vm.new_pyobj(idx), vm) .expect("Error adding ITERATIONS local variable"); diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index 489482d1933..bb45b5766b9 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -548,17 +548,20 @@ impl Py { let code = self.code.lock().clone(); let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) { - ArgMapping::from_dict_exact(vm.ctx.new_dict()) + None } else if let Some(locals) = locals { - locals + Some(locals) } else { - ArgMapping::from_dict_exact(self.globals.clone()) + Some(ArgMapping::from_dict_exact(self.globals.clone())) }; + let is_gen = code.flags.contains(bytecode::CodeFlags::GENERATOR); + let is_coro = code.flags.contains(bytecode::CodeFlags::COROUTINE); + // Construct frame: let frame = Frame::new( - code.clone(), - Scope::new(Some(locals), self.globals.clone()), + code, + Scope::new(locals, self.globals.clone()), self.builtins.clone(), self.closure.as_ref().map_or(&[], |c| c.as_slice()), Some(self.to_owned().into()), @@ -567,10 +570,6 @@ impl Py { .into_ref(&vm.ctx); self.fill_locals_from_args(&frame, func_args, vm)?; - - // If we have a generator, create a new generator - let is_gen = code.flags.contains(bytecode::CodeFlags::GENERATOR); - let is_coro = code.flags.contains(bytecode::CodeFlags::COROUTINE); match (is_gen, is_coro) { (true, false) => { let obj = PyGenerator::new(frame.clone(), self.__name__(), self.__qualname__()) @@ -629,11 +628,9 @@ impl Py { pub fn invoke_exact_args(&self, args: &[PyObjectRef], vm: &VirtualMachine) -> PyResult { let code = self.code.lock().clone(); - let locals = ArgMapping::from_dict_exact(vm.ctx.new_dict()); - let frame = Frame::new( code.clone(), - Scope::new(Some(locals), self.globals.clone()), + Scope::new(None, self.globals.clone()), self.builtins.clone(), self.closure.as_ref().map_or(&[], |c| c.as_slice()), Some(self.to_owned().into()), diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 08ce117fd48..7be72b4ea0a 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -16,7 +16,7 @@ use crate::{ bytecode::{ self, ADAPTIVE_BACKOFF_VALUE, Arg, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod, }, - convert::{IntoObject, ToPyResult}, + convert::ToPyResult, coroutine::Coro, exceptions::ExceptionCtor, function::{ArgMapping, Either, FuncArgs}, @@ -41,7 +41,7 @@ use malachite_bigint::BigInt; use rustpython_common::atomic::{PyAtomic, Radium}; use rustpython_common::{ boxvec::BoxVec, - lock::PyMutex, + lock::{OnceCell, PyMutex}, wtf8::{Wtf8, Wtf8Buf, wtf8_concat}, }; use rustpython_compiler_core::SourceLocation; @@ -148,13 +148,93 @@ unsafe impl Traverse for FastLocals { } } +/// Lazy locals dict for frames. For NEWLOCALS frames, the dict is +/// only allocated on first access (most function frames never need it). +pub struct FrameLocals { + inner: OnceCell, +} + +impl FrameLocals { + /// Create with an already-initialized locals mapping (non-NEWLOCALS frames). + fn with_locals(locals: ArgMapping) -> Self { + let cell = OnceCell::new(); + let _ = cell.set(locals); + Self { inner: cell } + } + + /// Create an empty lazy locals (for NEWLOCALS frames). + /// The dict will be created on first access. + fn lazy() -> Self { + Self { + inner: OnceCell::new(), + } + } + + /// Get the locals mapping, creating it lazily if needed. + #[inline] + pub fn get_or_create(&self, vm: &VirtualMachine) -> &ArgMapping { + self.inner + .get_or_init(|| ArgMapping::from_dict_exact(vm.ctx.new_dict())) + } + + /// Get the locals mapping if already created. + #[inline] + pub fn get(&self) -> Option<&ArgMapping> { + self.inner.get() + } + + #[inline] + pub fn mapping(&self, vm: &VirtualMachine) -> crate::protocol::PyMapping<'_> { + self.get_or_create(vm).mapping() + } + + #[inline] + pub fn clone_mapping(&self, vm: &VirtualMachine) -> ArgMapping { + self.get_or_create(vm).clone() + } + + pub fn into_object(&self, vm: &VirtualMachine) -> PyObjectRef { + self.clone_mapping(vm).into() + } + + pub fn as_object(&self, vm: &VirtualMachine) -> &PyObject { + self.get_or_create(vm).obj() + } +} + +impl fmt::Debug for FrameLocals { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("FrameLocals") + .field("initialized", &self.inner.get().is_some()) + .finish() + } +} + +impl Clone for FrameLocals { + fn clone(&self) -> Self { + let cell = OnceCell::new(); + if let Some(locals) = self.inner.get() { + let _ = cell.set(locals.clone()); + } + Self { inner: cell } + } +} + +unsafe impl Traverse for FrameLocals { + fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { + if let Some(locals) = self.inner.get() { + locals.traverse(tracer_fn); + } + } +} + #[pyclass(module = false, name = "frame", traverse = "manual")] pub struct Frame { pub code: PyRef, pub func_obj: Option, pub fastlocals: FastLocals, - pub locals: ArgMapping, + pub locals: FrameLocals, pub globals: PyDictRef, pub builtins: PyObjectRef, @@ -265,7 +345,13 @@ impl Frame { Self { fastlocals: FastLocals::new(fastlocals_vec.into_boxed_slice()), - locals: scope.locals, + locals: match scope.locals { + Some(locals) => FrameLocals::with_locals(locals), + None if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) => FrameLocals::lazy(), + None => { + FrameLocals::with_locals(ArgMapping::from_dict_exact(scope.globals.clone())) + } + }, globals: scope.globals, builtins, code, @@ -378,11 +464,12 @@ impl Frame { let code = &**self.code; // SAFETY: Called before generator resume; no concurrent access. let fastlocals = unsafe { self.fastlocals.borrow_mut() }; + let locals_map = self.locals.mapping(vm); for (i, &varname) in code.varnames.iter().enumerate() { if i >= fastlocals.len() { break; } - match self.locals.mapping().subscript(varname, vm) { + match locals_map.subscript(varname, vm) { Ok(value) => fastlocals[i] = Some(value), Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {} Err(e) => return Err(e), @@ -401,12 +488,13 @@ impl Frame { let code = &**self.code; let map = &code.varnames; let j = core::cmp::min(map.len(), code.varnames.len()); + let locals_map = locals.mapping(vm); if !code.varnames.is_empty() { // SAFETY: Either _guard holds the state mutex (frame not executing), // or we're in a trace callback on the same thread that holds it. let fastlocals = unsafe { self.fastlocals.borrow() }; for (&k, v) in zip(&map[..j], fastlocals) { - match locals.mapping().ass_subscript(k, v.clone(), vm) { + match locals_map.ass_subscript(k, v.clone(), vm) { Ok(()) => {} Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {} Err(e) => return Err(e), @@ -416,7 +504,7 @@ impl Frame { if !code.cellvars.is_empty() || !code.freevars.is_empty() { for (i, &k) in code.cellvars.iter().enumerate() { let cell_value = self.get_cell_contents(i); - match locals.mapping().ass_subscript(k, cell_value, vm) { + match locals_map.ass_subscript(k, cell_value, vm) { Ok(()) => {} Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {} Err(e) => return Err(e), @@ -425,7 +513,7 @@ impl Frame { if code.flags.contains(bytecode::CodeFlags::OPTIMIZED) { for (i, &k) in code.freevars.iter().enumerate() { let cell_value = self.get_cell_contents(code.cellvars.len() + i); - match locals.mapping().ass_subscript(k, cell_value, vm) { + match locals_map.ass_subscript(k, cell_value, vm) { Ok(()) => {} Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {} Err(e) => return Err(e), @@ -433,7 +521,7 @@ impl Frame { } } } - Ok(locals.clone()) + Ok(locals.clone_mapping(vm)) } } @@ -534,7 +622,7 @@ impl Py { struct ExecutingFrame<'a> { code: &'a PyRef, fastlocals: &'a FastLocals, - locals: &'a ArgMapping, + locals: &'a FrameLocals, globals: &'a PyDictRef, builtins: &'a PyObjectRef, /// Cached downcast of builtins to PyDict for fast LOAD_GLOBAL. @@ -1377,7 +1465,7 @@ impl ExecutingFrame<'_> { } Instruction::DeleteName(idx) => { let name = self.code.names[idx.get(arg) as usize]; - let res = self.locals.mapping().ass_subscript(name, None, vm); + let res = self.locals.mapping(vm).ass_subscript(name, None, vm); match res { Ok(()) => {} @@ -1750,7 +1838,7 @@ impl ExecutingFrame<'_> { } Instruction::LoadLocals => { // Push the locals dict onto the stack - let locals = self.locals.clone().into_object(); + let locals = self.locals.into_object(vm); self.push_value(locals); Ok(None) } @@ -1977,7 +2065,7 @@ impl ExecutingFrame<'_> { } Instruction::LoadName(idx) => { let name = self.code.names[idx.get(arg) as usize]; - let result = self.locals.mapping().subscript(name, vm); + let result = self.locals.mapping(vm).subscript(name, vm); match result { Ok(x) => self.push_value(x), Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => { @@ -2483,7 +2571,9 @@ impl ExecutingFrame<'_> { Instruction::StoreName(idx) => { let name = self.code.names[idx.get(arg) as usize]; let value = self.pop_value(); - self.locals.mapping().ass_subscript(name, Some(value), vm)?; + self.locals + .mapping(vm) + .ass_subscript(name, Some(value), vm)?; Ok(None) } Instruction::StoreSlice => { @@ -3535,20 +3625,19 @@ impl ExecutingFrame<'_> { }) }; + let locals_map = self.locals.mapping(vm); if let Ok(all) = dict.get_item(identifier!(vm, __all__), vm) { let items: Vec = all.try_to_value(vm)?; for item in items { let name = require_str(item, "__all__")?; let value = module.get_attr(&*name, vm)?; - self.locals - .mapping() - .ass_subscript(&name, Some(value), vm)?; + locals_map.ass_subscript(&name, Some(value), vm)?; } } else { for (k, v) in dict { let k = require_str(k, "__dict__")?; if !k.as_bytes().starts_with(b"_") { - self.locals.mapping().ass_subscript(&k, Some(v), vm)?; + locals_map.ass_subscript(&k, Some(v), vm)?; } } } @@ -4240,23 +4329,15 @@ impl ExecutingFrame<'_> { #[cold] fn setup_annotations(&mut self, vm: &VirtualMachine) -> FrameResult { let __annotations__ = identifier!(vm, __annotations__); + let locals_obj = self.locals.as_object(vm); // Try using locals as dict first, if not, fallback to generic method. - let has_annotations = match self - .locals - .clone() - .into_object() - .downcast_exact::(vm) - { - Ok(d) => d.contains_key(__annotations__, vm), - Err(o) => { - let needle = __annotations__.as_object(); - self._in(vm, needle, &o)? - } + let has_annotations = if let Some(d) = locals_obj.downcast_ref_if_exact::(vm) { + d.contains_key(__annotations__, vm) + } else { + self._in(vm, __annotations__.as_object(), locals_obj)? }; if !has_annotations { - self.locals - .as_object() - .set_item(__annotations__, vm.ctx.new_dict().into(), vm)?; + locals_obj.set_item(__annotations__, vm.ctx.new_dict().into(), vm)?; } Ok(None) } @@ -5076,12 +5157,11 @@ impl fmt::Debug for Frame { s }); // TODO: fix this up - let locals = self.locals.clone(); write!( f, - "Frame Object {{ \n Stack:{}\n Locals:{:?}\n}}", + "Frame Object {{ \n Stack:{}\n Locals initialized:{}\n}}", stack_str, - locals.into_object() + self.locals.get().is_some() ) } } diff --git a/crates/vm/src/scope.rs b/crates/vm/src/scope.rs index 74392dc9b73..5c6d92a8deb 100644 --- a/crates/vm/src/scope.rs +++ b/crates/vm/src/scope.rs @@ -3,7 +3,7 @@ use alloc::fmt; #[derive(Clone)] pub struct Scope { - pub locals: ArgMapping, + pub locals: Option, pub globals: PyDictRef, } @@ -17,7 +17,6 @@ impl fmt::Debug for Scope { impl Scope { #[inline] pub fn new(locals: Option, globals: PyDictRef) -> Self { - let locals = locals.unwrap_or_else(|| ArgMapping::from_dict_exact(globals.clone())); Self { locals, globals } } @@ -31,6 +30,8 @@ impl Scope { .set_item("__builtins__", vm.builtins.clone().into(), vm) .unwrap(); } + // For module-level code, locals defaults to globals + let locals = locals.or_else(|| Some(ArgMapping::from_dict_exact(globals.clone()))); Self::new(locals, globals) } diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index d5425eb603c..d4e7f5b0536 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -1271,7 +1271,10 @@ impl VirtualMachine { .map_err(|_| self.new_import_error("__import__ not found", module.to_owned()))?; let (locals, globals) = if let Some(frame) = self.current_frame() { - (Some(frame.locals.clone()), Some(frame.globals.clone())) + ( + Some(frame.locals.clone_mapping(self)), + Some(frame.globals.clone()), + ) } else { (None, None) }; diff --git a/crates/wasm/src/vm_class.rs b/crates/wasm/src/vm_class.rs index 4ad347a19e1..0657b0bc08e 100644 --- a/crates/wasm/src/vm_class.rs +++ b/crates/wasm/src/vm_class.rs @@ -8,7 +8,7 @@ use core::cell::RefCell; use js_sys::{Object, TypeError}; use rustpython_vm::{ Interpreter, PyObjectRef, PyRef, PyResult, Settings, VirtualMachine, builtins::PyWeak, - compiler::Mode, scope::Scope, + compiler::Mode, function::ArgMapping, scope::Scope, }; use std::collections::HashMap; use wasm_bindgen::prelude::*; @@ -275,8 +275,14 @@ impl WASMVirtualMachine { } } - vm.run_code_obj(code, Scope::new(None, attrs.clone())) - .into_js(vm)?; + vm.run_code_obj( + code, + Scope::new( + Some(ArgMapping::from_dict_exact(attrs.clone())), + attrs.clone(), + ), + ) + .into_js(vm)?; let module = vm.new_module(&name, attrs, None);