From dbd7210deb8397a6e35b63d5438c4694a0f6b878 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 1 Mar 2026 10:34:39 +0900 Subject: [PATCH] Implement trace/profile events and opcode tracing for generators - Add TraceEvent::Exception and Opcode variants with profile filtering - Extract dispatch_traced_frame helper for Call/Return trace events - Fire exception trace on new raises, SEND StopIteration, FOR_ITER StopIteration - Fire opcode trace events gated by f_trace_opcodes - Move prev_line to FrameState for persistence across generator suspend/resume - Reset prev_line in gen_throw for correct LINE monitoring after yield - Add per-code event mask (events_for_code) to prevent unrelated code instrumentation - Remove expectedFailure markers from test_bdb (5) and test_sys_setprofile (14) --- .cspell.dict/cpython.txt | 1 + Lib/test/test_bdb.py | 5 -- Lib/test/test_sys_setprofile.py | 14 ---- crates/vm/src/frame.rs | 89 ++++++++++++++++++++++---- crates/vm/src/protocol/callable.rs | 31 ++++++++- crates/vm/src/stdlib/sys/monitoring.rs | 20 +++++- crates/vm/src/vm/mod.rs | 86 ++++++++++++------------- 7 files changed, 167 insertions(+), 79 deletions(-) diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt index c00cfb3b7e5..f48bb332334 100644 --- a/.cspell.dict/cpython.txt +++ b/.cspell.dict/cpython.txt @@ -28,6 +28,7 @@ carg cellarg cellvar cellvars +ceval cfield CLASSDEREF classdict diff --git a/Lib/test/test_bdb.py b/Lib/test/test_bdb.py index f1077d91fdd..f15dae13eb3 100644 --- a/Lib/test/test_bdb.py +++ b/Lib/test/test_bdb.py @@ -614,7 +614,6 @@ def test_step_next_on_last_statement(self): with TracerRun(self) as tracer: tracer.runcall(tfunc_main) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: All paired tuples have not been processed, the last one was number 1 [('next',), ('quit',)] def test_stepinstr(self): self.expect_set = [ ('line', 2, 'tfunc_main'), ('stepinstr', ), @@ -1084,7 +1083,6 @@ def func(): with TracerRun(self) as tracer: tracer.runcall(tfunc_import) - @unittest.expectedFailure # TODO: RUSTPYTHON; Error in atexit._run_exitfuncs def test_next_until_return_in_generator(self): # Issue #16596. # Check that set_next(), set_until() and set_return() do not treat the @@ -1126,7 +1124,6 @@ def main(): with TracerRun(self) as tracer: tracer.runcall(tfunc_import) - @unittest.expectedFailure # TODO: RUSTPYTHON; Error in atexit._run_exitfuncs def test_next_command_in_generator_for_loop(self): # Issue #16596. code = """ @@ -1158,7 +1155,6 @@ def main(): with TracerRun(self) as tracer: tracer.runcall(tfunc_import) - @unittest.expectedFailure # TODO: RUSTPYTHON; Error in atexit._run_exitfuncs def test_next_command_in_generator_with_subiterator(self): # Issue #16596. code = """ @@ -1190,7 +1186,6 @@ def main(): with TracerRun(self) as tracer: tracer.runcall(tfunc_import) - @unittest.expectedFailure # TODO: RUSTPYTHON; Error in atexit._run_exitfuncs def test_return_command_in_generator_with_subiterator(self): # Issue #16596. code = """ diff --git a/Lib/test/test_sys_setprofile.py b/Lib/test/test_sys_setprofile.py index d23f5bf6d9b..21a09b51926 100644 --- a/Lib/test/test_sys_setprofile.py +++ b/Lib/test/test_sys_setprofile.py @@ -108,7 +108,6 @@ def f(p): (1, 'return', f_ident), ]) - @unittest.expectedFailure # TODO: RUSTPYTHON; [(1, 'call', (112, 'f'))] def test_exception(self): def f(p): 1/0 @@ -135,7 +134,6 @@ def f(p): (1, 'return', f_ident), ]) - @unittest.expectedFailure # TODO: RUSTPYTHON; [(1, 'call', (138, 'f'))] def test_nested_exception(self): def f(p): 1/0 @@ -147,7 +145,6 @@ def f(p): (1, 'return', f_ident), ]) - @unittest.expectedFailure # TODO: RUSTPYTHON; (1, 'return', (151, 'g'))] def test_exception_in_except_clause(self): def f(p): 1/0 @@ -167,7 +164,6 @@ def g(p): (1, 'return', g_ident), ]) - @unittest.expectedFailure # TODO: RUSTPYTHON; (1, 'falling through', (170, 'g'))] def test_exception_propagation(self): def f(p): 1/0 @@ -183,7 +179,6 @@ def g(p): (1, 'return', g_ident), ]) - @unittest.expectedFailure # TODO: RUSTPYTHON; [(1, 'call', (183, 'f'))] def test_raise_twice(self): def f(p): try: 1/0 @@ -193,7 +188,6 @@ def f(p): (1, 'return', f_ident), ]) - @unittest.expectedFailure # TODO: RUSTPYTHON; [(1, 'call', (192, 'f'))] def test_raise_reraise(self): def f(p): try: 1/0 @@ -203,7 +197,6 @@ def f(p): (1, 'return', f_ident), ]) - @unittest.expectedFailure # TODO: RUSTPYTHON; [(1, 'call', (201, 'f'))] def test_raise(self): def f(p): raise Exception() @@ -212,7 +205,6 @@ def f(p): (1, 'return', f_ident), ]) - @unittest.expectedFailure # TODO: RUSTPYTHON; (5, 'call', (209, 'f'))] def test_distant_exception(self): def f(): 1/0 @@ -297,7 +289,6 @@ def f(p): (1, 'return', f_ident), ]) - @unittest.expectedFailure # TODO: RUSTPYTHON; [(1, 'call', (293, 'f'))] def test_basic_exception(self): def f(p): 1/0 @@ -315,7 +306,6 @@ def f(p): (1, 'return', f_ident), ]) - @unittest.expectedFailure # TODO: RUSTPYTHON; (5, 'call', (310, 'f'))] def test_distant_exception(self): def f(): 1/0 @@ -354,7 +344,6 @@ def f(p): (1, 'return', f_ident)]) # Test an invalid call (bpo-34126) - @unittest.expectedFailure # TODO: RUSTPYTHON; [(1, 'call', (348, 'f'))] def test_unbound_method_no_args(self): def f(p): dict.get() @@ -363,7 +352,6 @@ def f(p): (1, 'return', f_ident)]) # Test an invalid call (bpo-34126) - @unittest.expectedFailure # TODO: RUSTPYTHON; [(1, 'call', (356, 'f'))] def test_unbound_method_invalid_args(self): def f(p): dict.get(print, 42) @@ -372,7 +360,6 @@ def f(p): (1, 'return', f_ident)]) # Test an invalid call (bpo-34125) - @unittest.expectedFailure # TODO: RUSTPYTHON; [(1, 'call', (365, 'f'))] def test_unbound_method_no_keyword_args(self): kwargs = {} def f(p): @@ -382,7 +369,6 @@ def f(p): (1, 'return', f_ident)]) # Test an invalid call (bpo-34125) - @unittest.expectedFailure # TODO: RUSTPYTHON; [(1, 'call', (374, 'f'))] def test_unbound_method_invalid_keyword_args(self): kwargs = {} def f(p): diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 5224a54ed54..f7ca44a22d8 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -62,6 +62,10 @@ struct FrameState { stack: BoxVec>, /// Cell and free variable references (cellvars + freevars). cells_frees: Box<[PyCellRef]>, + /// Previous line number for LINE event suppression. + /// Stored here (not on ExecutingFrame) so it persists across + /// generator/coroutine suspend and resume. + prev_line: u32, } /// Tracks who owns a frame. @@ -200,6 +204,7 @@ impl Frame { let state = FrameState { stack: BoxVec::new(code.max_stackdepth as usize), cells_frees, + prev_line: 0, }; Self { @@ -383,7 +388,6 @@ impl Py { object: self, state: &mut state, monitoring_mask: 0, - prev_line: 0, }; f(exec) } @@ -430,7 +434,6 @@ impl Py { object: self, state: &mut state, monitoring_mask: 0, - prev_line: 0, }; exec.yield_from_target().map(PyObject::to_owned) } @@ -467,8 +470,6 @@ struct ExecutingFrame<'a> { state: &'a mut FrameState, /// Cached monitoring events mask. Reloaded at Resume instruction only, monitoring_mask: u32, - /// Previous line number for LINE event suppression. - prev_line: u32, } impl fmt::Debug for ExecutingFrame<'_> { @@ -518,6 +519,23 @@ impl ExecutingFrame<'_> { } } + /// Fire 'exception' trace event (sys.settrace) with (type, value, traceback) tuple. + /// Matches `_PyEval_MonitorRaise` → `PY_MONITORING_EVENT_RAISE` → + /// `sys_trace_exception_func` in legacy_tracing.c. + fn fire_exception_trace(&self, exc: &PyBaseExceptionRef, vm: &VirtualMachine) -> PyResult<()> { + if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { + let exc_type: PyObjectRef = exc.class().to_owned().into(); + let exc_value: PyObjectRef = exc.clone().into(); + let exc_tb: PyObjectRef = exc + .__traceback__() + .map(|tb| -> PyObjectRef { tb.into() }) + .unwrap_or_else(|| vm.ctx.none()); + let tuple = vm.ctx.new_tuple(vec![exc_type, exc_value, exc_tb]).into(); + vm.trace_event(crate::protocol::TraceEvent::Exception, Some(tuple))?; + } + Ok(()) + } + fn run(&mut self, vm: &VirtualMachine) -> PyResult { flame_guard!(format!( "Frame::run({obj_name})", @@ -545,9 +563,9 @@ impl ExecutingFrame<'_> { Some(Instruction::Resume { .. } | Instruction::InstrumentedResume) ) && let Some((loc, _)) = self.code.locations.get(idx) - && loc.line.get() as u32 != self.prev_line + && loc.line.get() as u32 != self.state.prev_line { - self.prev_line = loc.line.get() as u32; + self.state.prev_line = loc.line.get() as u32; vm.trace_event(crate::protocol::TraceEvent::Line, None)?; // Trace callback may have changed lasti via set_f_lineno. // Re-read and restart the loop from the new position. @@ -575,7 +593,23 @@ impl ExecutingFrame<'_> { | Instruction::InstrumentedLine ) && let Some((loc, _)) = self.code.locations.get(idx) { - self.prev_line = loc.line.get() as u32; + self.state.prev_line = loc.line.get() as u32; + } + + // Fire 'opcode' trace event for sys.settrace when f_trace_opcodes + // is set. Skip RESUME and ExtendedArg (matching CPython's exclusion + // of these in _Py_call_instrumentation_instruction). + if vm.use_tracing.get() + && !vm.is_none(&self.object.trace.lock()) + && *self.object.trace_opcodes.lock() + && !matches!( + op, + Instruction::Resume { .. } + | Instruction::InstrumentedResume + | Instruction::ExtendedArg + ) + { + vm.trace_event(crate::protocol::TraceEvent::Opcode, None)?; } let result = self.execute_instruction(op, arg, &mut do_extend_arg, vm); @@ -691,6 +725,13 @@ impl ExecutingFrame<'_> { } }; + // Fire 'exception' trace event for sys.settrace. + // Only for new raises, not re-raises (matching the + // `error` label that calls _PyEval_MonitorRaise). + if !is_reraise { + self.fire_exception_trace(&exception, vm)?; + } + match handle_exception(self, exception, idx, is_reraise, is_new_raise, vm) { Ok(None) => {} Ok(Some(result)) => break Ok(result), @@ -777,6 +818,12 @@ impl ExecutingFrame<'_> { exc_tb: PyObjectRef, ) -> PyResult { self.monitoring_mask = vm.state.monitoring_events.load(); + // Reset prev_line so that LINE monitoring events fire even if + // the exception handler is on the same line as the yield point. + // In CPython, _Py_call_instrumentation_line has a special case + // for RESUME: it fires LINE even when prev_line == current_line. + // Since gen_throw bypasses RESUME, we reset prev_line instead. + self.state.prev_line = 0; if let Some(jen) = self.yield_from_target() { // Check if the exception is GeneratorExit (type or instance). // For GeneratorExit, close the sub-iterator instead of throwing. @@ -2167,7 +2214,10 @@ impl ExecutingFrame<'_> { .instrumentation_version .load(atomic::Ordering::Acquire); if code_ver != global_ver { - let events = vm.state.monitoring_events.load(); + let events = { + let state = vm.state.monitoring.lock(); + state.events_for_code(self.code.get_id()) + }; monitoring::instrument_code(self.code, events); self.code .instrumentation_version @@ -2424,6 +2474,12 @@ impl ExecutingFrame<'_> { Ok(None) } PyIterReturn::StopIteration(value) => { + // Fire 'exception' trace event for StopIteration, + // matching SEND's exception handling. + if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { + let stop_exc = vm.new_stop_iteration(value.clone()); + self.fire_exception_trace(&stop_exc, vm)?; + } let value = vm.unwrap_or_none(value); self.push_value(value); self.jump(exit_label); @@ -2538,7 +2594,10 @@ impl ExecutingFrame<'_> { .instrumentation_version .load(atomic::Ordering::Acquire); if code_ver != global_ver { - let events = vm.state.monitoring_events.load(); + let events = { + let state = vm.state.monitoring.lock(); + state.events_for_code(self.code.get_id()) + }; monitoring::instrument_code(self.code, events); self.code .instrumentation_version @@ -2809,8 +2868,8 @@ impl ExecutingFrame<'_> { // Fire LINE event only if line changed if let Some((loc, _)) = self.code.locations.get(idx) { let line = loc.line.get() as u32; - if line != self.prev_line && line > 0 { - self.prev_line = line; + if line != self.state.prev_line && line > 0 { + self.state.prev_line = line; monitoring::fire_line(vm, self.code, offset, line)?; } } @@ -3529,7 +3588,13 @@ impl ExecutingFrame<'_> { self.push_value(value); Ok(true) } - Ok(PyIterReturn::StopIteration(_)) => { + Ok(PyIterReturn::StopIteration(value)) => { + // Fire 'exception' trace event for StopIteration, matching + // FOR_ITER's inline call to _PyEval_MonitorRaise. + if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { + let stop_exc = vm.new_stop_iteration(value); + self.fire_exception_trace(&stop_exc, vm)?; + } // Skip END_FOR (base or instrumented) and jump to POP_ITER. let target_idx = target.0 as usize; let jump_target = if let Some(unit) = self.code.instructions.get(target_idx) { diff --git a/crates/vm/src/protocol/callable.rs b/crates/vm/src/protocol/callable.rs index 316ed36dd19..1f01221ff0c 100644 --- a/crates/vm/src/protocol/callable.rs +++ b/crates/vm/src/protocol/callable.rs @@ -74,19 +74,40 @@ impl<'a> PyCallable<'a> { pub(crate) enum TraceEvent { Call, Return, + Exception, Line, + Opcode, CCall, CReturn, CException, } +impl TraceEvent { + /// Whether sys.setprofile receives this event. + /// In legacy_tracing.c, profile callbacks are only registered for + /// PY_RETURN, PY_UNWIND, C_CALL, C_RETURN, C_RAISE. + fn is_profile_event(&self) -> bool { + matches!( + self, + Self::Call | Self::Return | Self::CCall | Self::CReturn | Self::CException + ) + } + + /// Whether this event is dispatched only when f_trace_opcodes is set. + pub(crate) fn is_opcode_event(&self) -> bool { + matches!(self, Self::Opcode) + } +} + impl core::fmt::Display for TraceEvent { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use TraceEvent::*; match self { Call => write!(f, "call"), Return => write!(f, "return"), + Exception => write!(f, "exception"), Line => write!(f, "line"), + Opcode => write!(f, "opcode"), CCall => write!(f, "c_call"), CReturn => write!(f, "c_return"), CException => write!(f, "c_exception"), @@ -127,10 +148,18 @@ impl VirtualMachine { return Ok(None); } + let is_profile_event = event.is_profile_event(); + let is_opcode_event = event.is_opcode_event(); + let Some(frame_ref) = self.current_frame() else { return Ok(None); }; + // Opcode events are only dispatched when f_trace_opcodes is set. + if is_opcode_event && !*frame_ref.trace_opcodes.lock() { + return Ok(None); + } + let frame: PyObjectRef = frame_ref.into(); let event = self.ctx.new_str(event.to_string()).into(); let args = vec![frame, event, arg.unwrap_or_else(|| self.ctx.none())]; @@ -160,7 +189,7 @@ impl VirtualMachine { } } - if !self.is_none(&profile_func) { + if is_profile_event && !self.is_none(&profile_func) { self.use_tracing.set(false); let res = profile_func.call(args, self); self.use_tracing.set(true); diff --git a/crates/vm/src/stdlib/sys/monitoring.rs b/crates/vm/src/stdlib/sys/monitoring.rs index 05c427357dd..bc28335700e 100644 --- a/crates/vm/src/stdlib/sys/monitoring.rs +++ b/crates/vm/src/stdlib/sys/monitoring.rs @@ -119,6 +119,20 @@ impl MonitoringState { let local = self.local_events.values().fold(0, |acc, &e| acc | e); global | local } + + /// Compute the events that apply to a specific code object: + /// global events OR'd with local events registered for that code. + /// This prevents events like INSTRUCTION that are local to one code + /// from being applied to unrelated code objects. + pub fn events_for_code(&self, code_id: usize) -> u32 { + let global = self.global_events.iter().fold(0, |acc, &e| acc | e); + let local = self + .local_events + .iter() + .filter(|((_, cid), _)| *cid == code_id) + .fold(0, |acc, (_, &e)| acc | e); + global | local + } } /// Global atomic mask: OR of all tools' events. Checked in the hot path @@ -464,13 +478,17 @@ fn update_events_mask(vm: &VirtualMachine, state: &MonitoringState) { + 1; // Eagerly re-instrument all frames on the current thread's stack so that // code objects already past their RESUME pick up the new event set. + // Each code object gets only the events that apply to it (global + its + // own local events), preventing e.g. INSTRUCTION from being applied to + // unrelated code objects. for fp in vm.frames.borrow().iter() { // SAFETY: frames in the Vec are alive while their FrameRef is on the call stack. let frame = unsafe { fp.as_ref() }; let code = &frame.code; let code_ver = code.instrumentation_version.load(Ordering::Acquire); if code_ver != new_ver { - instrument_code(code, events); + let code_events = state.events_for_code(code.get_id()); + instrument_code(code, code_events); code.instrumentation_version .store(new_ver, Ordering::Release); } diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index 3ee421e04ff..d5425eb603c 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -34,7 +34,7 @@ use crate::{ frozen::FrozenModule, function::{ArgMapping, FuncArgs, PySetterValue}, import, - protocol::{PyIterIter, TraceEvent}, + protocol::PyIterIter, scope::Scope, signal, stdlib, warn::WarningsState, @@ -1082,33 +1082,7 @@ impl VirtualMachine { crate::vm::thread::pop_thread_frame(); } - // Fire 'call' trace event after pushing frame - // (current_frame() now returns the callee's frame) - // - // trace_dispatch protocol (matching CPython's trace_trampoline): - // - For 'call' events, the global trace function is called. - // If it returns non-None, set f_trace to that value (trace this frame). - // If it returns None, leave f_trace unset (skip tracing this frame). - // - For 'return' events, fire if this frame has f_trace set OR if - // a profile function is active (profiling is independent of f_trace). - match self.trace_event(TraceEvent::Call, None) { - Ok(trace_result) => { - if let Some(local_trace) = trace_result { - *frame.trace.lock() = local_trace; - } - let result = f(frame.clone()); - // Fire 'return' event if frame is being traced or profiled - if result.is_ok() - && self.use_tracing.get() - && (!self.is_none(&frame.trace.lock()) - || !self.is_none(&self.profile_func.borrow())) - { - let _ = self.trace_event(TraceEvent::Return, None); - } - result - } - Err(e) => Err(e), - } + self.dispatch_traced_frame(&frame, |frame| f(frame.to_owned())) }) } @@ -1159,26 +1133,46 @@ impl VirtualMachine { self.recursion_depth.update(|d| d - 1); } + self.dispatch_traced_frame(frame, |frame| f(frame)) + } + + /// Fire trace/profile 'call' and 'return' events around a frame body. + /// + /// Matches `call_trace_protected` / `trace_trampoline` protocol: + /// - Fire `TraceEvent::Call`; if the trace function returns non-None, + /// install it as the per-frame `f_trace`. + /// - Execute the closure (the actual frame body). + /// - Fire `TraceEvent::Return` on both normal return **and** exception + /// unwind (`PY_UNWIND` → `PyTrace_RETURN` with `arg = None`). + /// Propagate any trace-function error, replacing the original exception. + fn dispatch_traced_frame) -> PyResult>( + &self, + frame: &Py, + f: F, + ) -> PyResult { use crate::protocol::TraceEvent; - match self.trace_event(TraceEvent::Call, None) { - Ok(trace_result) => { - // Update per-frame trace if trace function returned a new local trace - if let Some(local_trace) = trace_result { - *frame.trace.lock() = local_trace; - } - let result = f(frame); - // Fire 'return' event if frame is being traced or profiled - if result.is_ok() - && self.use_tracing.get() - && (!self.is_none(&frame.trace.lock()) - || !self.is_none(&self.profile_func.borrow())) - { - let _ = self.trace_event(TraceEvent::Return, None); - } - result - } - Err(e) => Err(e), + + // Fire 'call' trace event. current_frame() now returns the callee. + let trace_result = self.trace_event(TraceEvent::Call, None)?; + if let Some(local_trace) = trace_result { + *frame.trace.lock() = local_trace; } + + let result = f(frame); + + // Fire 'return' event if frame is being traced or profiled. + // PY_UNWIND fires PyTrace_RETURN with arg=None — so we fire for + // both Ok and Err, matching `call_trace_protected` behavior. + if self.use_tracing.get() + && (!self.is_none(&frame.trace.lock()) || !self.is_none(&self.profile_func.borrow())) + { + let ret_result = self.trace_event(TraceEvent::Return, None); + // call_trace_protected: if trace function raises, its error + // replaces the original exception. + ret_result?; + } + + result } /// Returns a basic CompileOpts instance with options accurate to the vm. Used