From aa607b2645353b84b68f9e1d34b1f86347565b31 Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Thu, 5 Mar 2026 11:25:32 +0100 Subject: [PATCH 1/2] Newtype ConstIdx, Constants --- crates/codegen/src/compile.rs | 6 +-- crates/codegen/src/ir.rs | 16 +++++--- crates/compiler-core/src/bytecode.rs | 40 +++++++++++++++---- .../compiler-core/src/bytecode/instruction.rs | 37 +++++++++-------- crates/compiler-core/src/bytecode/oparg.rs | 36 +++++++++++++++++ crates/compiler-core/src/marshal.rs | 2 +- crates/jit/src/instructions.rs | 5 +-- crates/jit/tests/common.rs | 12 +++--- crates/vm/src/builtins/code.rs | 8 ++-- crates/vm/src/frame.rs | 6 +-- 10 files changed, 116 insertions(+), 52 deletions(-) diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index b4e24a30461..0b5733a6d46 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -29,7 +29,7 @@ use rustpython_compiler_core::{ self, AnyInstruction, Arg as OpArgMarker, BinaryOperator, BuildSliceArgCount, CodeObject, ComparisonOperator, ConstantData, ConvertValueOparg, Instruction, IntrinsicFunction1, Invert, LoadAttr, LoadSuperAttr, OpArg, OpArgType, PseudoInstruction, SpecialMethod, - UnpackExArgs, + UnpackExArgs, oparg, }, }; use rustpython_wtf8::Wtf8Buf; @@ -8075,9 +8075,9 @@ impl Compiler { // fn block_done() - fn arg_constant(&mut self, constant: ConstantData) -> u32 { + fn arg_constant(&mut self, constant: ConstantData) -> oparg::ConstIdx { let info = self.current_code_info(); - info.metadata.consts.insert_full(constant).0.to_u32() + info.metadata.consts.insert_full(constant).0.to_u32().into() } fn emit_load_const(&mut self, constant: ConstantData) { diff --git a/crates/codegen/src/ir.rs b/crates/codegen/src/ir.rs index 52499664fe0..a5936ef722b 100644 --- a/crates/codegen/src/ir.rs +++ b/crates/codegen/src/ir.rs @@ -10,7 +10,7 @@ use rustpython_compiler_core::{ bytecode::{ AnyInstruction, Arg, CodeFlags, CodeObject, CodeUnit, CodeUnits, ConstantData, ExceptionTableEntry, InstrDisplayContext, Instruction, InstructionMetadata, Label, OpArg, - PseudoInstruction, PyCodeLocationInfoKind, encode_exception_table, + PseudoInstruction, PyCodeLocationInfoKind, encode_exception_table, oparg, }, varint::{write_signed_varint, write_varint}, }; @@ -695,15 +695,15 @@ impl CodeInfo { } (Instruction::LoadConst { consti }, Instruction::ToBool) => { let consti = consti.get(curr.arg); - let constant = &self.metadata.consts[consti as usize]; + let constant = &self.metadata.consts[consti.as_usize()]; if let ConstantData::Boolean { .. } = constant { - Some((curr_instr, OpArg::from(consti))) + Some((curr_instr, OpArg::from(consti.as_u32()))) } else { None } } (Instruction::LoadConst { consti }, Instruction::UnaryNot) => { - let constant = &self.metadata.consts[consti.get(curr.arg) as usize]; + let constant = &self.metadata.consts[consti.get(curr.arg).as_usize()]; match constant { ConstantData::Boolean { value } => { let (const_idx, _) = self @@ -1100,15 +1100,19 @@ impl CodeInfo { impl InstrDisplayContext for CodeInfo { type Constant = ConstantData; - fn get_constant(&self, i: usize) -> &ConstantData { - &self.metadata.consts[i] + + fn get_constant(&self, consti: oparg::ConstIdx) -> &ConstantData { + &self.metadata.consts[consti.as_usize()] } + fn get_name(&self, i: usize) -> &str { self.metadata.names[i].as_ref() } + fn get_varname(&self, i: usize) -> &str { self.metadata.varnames[i].as_ref() } + fn get_cell_name(&self, i: usize) -> &str { self.metadata .cellvars diff --git a/crates/compiler-core/src/bytecode.rs b/crates/compiler-core/src/bytecode.rs index 46182962654..d7ed55930f6 100644 --- a/crates/compiler-core/src/bytecode.rs +++ b/crates/compiler-core/src/bytecode.rs @@ -11,7 +11,7 @@ use bitflags::bitflags; use core::{ cell::UnsafeCell, hash, mem, - ops::Deref, + ops::{Deref, Index}, sync::atomic::{AtomicU8, AtomicU16, AtomicUsize, Ordering}, }; use itertools::Itertools; @@ -32,7 +32,7 @@ pub use crate::bytecode::{ }; mod instruction; -mod oparg; +pub mod oparg; /// Exception table entry for zero-cost exception handling /// Format: (start, size, target, depth<<1|lasti) @@ -293,6 +293,31 @@ impl ConstantBag for BasicBag { } } +#[derive(Clone)] +pub struct Constants(Box<[C]>); + +impl Deref for Constants { + type Target = [C]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Index for Constants { + type Output = C; + + fn index(&self, consti: oparg::ConstIdx) -> &Self::Output { + &self.0[consti.as_usize()] + } +} + +impl FromIterator for Constants { + fn from_iter>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + /// Primary container of a single code object. Each python function has /// a code object. Also a module has a code object. #[derive(Clone)] @@ -312,7 +337,7 @@ pub struct CodeObject { /// Qualified name of the object (like CPython's co_qualname) pub qualname: C::Name, pub cell2arg: Option>, - pub constants: Box<[C]>, + pub constants: Constants, pub names: Box<[C::Name]>, pub varnames: Box<[C::Name]>, pub cellvars: Box<[C::Name]>, @@ -1020,8 +1045,7 @@ impl CodeObject { CodeObject { constants: self .constants - .into_vec() - .into_iter() + .iter() .map(|x| bag.make_constant(x.borrow_constant())) .collect(), names: map_names(self.names), @@ -1095,7 +1119,7 @@ impl fmt::Display for CodeObject { pub trait InstrDisplayContext { type Constant: Constant; - fn get_constant(&self, i: usize) -> &Self::Constant; + fn get_constant(&self, consti: oparg::ConstIdx) -> &Self::Constant; fn get_name(&self, i: usize) -> &str; @@ -1107,8 +1131,8 @@ pub trait InstrDisplayContext { impl InstrDisplayContext for CodeObject { type Constant = C; - fn get_constant(&self, i: usize) -> &C { - &self.constants[i] + fn get_constant(&self, consti: oparg::ConstIdx) -> &C { + &self.constants[consti] } fn get_name(&self, i: usize) -> &str { diff --git a/crates/compiler-core/src/bytecode/instruction.rs b/crates/compiler-core/src/bytecode/instruction.rs index 754447956fa..e1b2ffd65c7 100644 --- a/crates/compiler-core/src/bytecode/instruction.rs +++ b/crates/compiler-core/src/bytecode/instruction.rs @@ -4,7 +4,7 @@ use crate::{ bytecode::{ BorrowedConstant, Constant, InstrDisplayContext, oparg::{ - BinaryOperator, BuildSliceArgCount, CommonConstant, ComparisonOperator, + self, BinaryOperator, BuildSliceArgCount, CommonConstant, ComparisonOperator, ConvertValueOparg, IntrinsicFunction1, IntrinsicFunction2, Invert, Label, LoadAttr, LoadSuperAttr, MakeFunctionFlags, NameIdx, OpArg, OpArgByte, OpArgType, RaiseKind, SpecialMethod, StoreFastLoadFast, UnpackExArgs, @@ -186,7 +186,7 @@ pub enum Instruction { idx: Arg, } = 81, LoadConst { - consti: Arg, + consti: Arg, } = 82, LoadDeref { i: Arg, @@ -1124,22 +1124,25 @@ impl InstructionMetadata for Instruction { let name = |i: u32| ctx.get_name(i as usize); let cell_name = |i: u32| ctx.get_cell_name(i as usize); - let fmt_const = - |op: &str, arg: OpArg, f: &mut fmt::Formatter<'_>, idx: &Arg| -> fmt::Result { - let value = ctx.get_constant(idx.get(arg) as usize); - match value.borrow_constant() { - BorrowedConstant::Code { code } if expand_code_objects => { - write!(f, "{op:pad$}({code:?}):")?; - code.display_inner(f, true, level + 1)?; - Ok(()) - } - c => { - write!(f, "{op:pad$}(")?; - c.fmt_display(f)?; - write!(f, ")") - } + let fmt_const = |op: &str, + arg: OpArg, + f: &mut fmt::Formatter<'_>, + consti: &Arg| + -> fmt::Result { + let value = ctx.get_constant(consti.get(arg)); + match value.borrow_constant() { + BorrowedConstant::Code { code } if expand_code_objects => { + write!(f, "{op:pad$}({code:?}):")?; + code.display_inner(f, true, level + 1)?; + Ok(()) } - }; + c => { + write!(f, "{op:pad$}(")?; + c.fmt_display(f)?; + write!(f, ")") + } + } + }; match self { Self::BinarySlice => w!(BINARY_SLICE), diff --git a/crates/compiler-core/src/bytecode/oparg.rs b/crates/compiler-core/src/bytecode/oparg.rs index 729b84db591..f57a2b6fdab 100644 --- a/crates/compiler-core/src/bytecode/oparg.rs +++ b/crates/compiler-core/src/bytecode/oparg.rs @@ -872,3 +872,39 @@ impl LoadAttrBuilder { self } } + +#[derive(Clone, Copy)] +pub struct ConstIdx(u32); + +impl ConstIdx { + #[must_use] + pub const fn from_u32(value: u32) -> Self { + Self(value) + } + + /// Returns the index as a `u32` value. + #[must_use] + pub const fn as_u32(self) -> u32 { + self.0 + } + + /// Returns the index as a `usize` value. + #[must_use] + pub const fn as_usize(self) -> usize { + self.0 as usize + } +} + +impl From for ConstIdx { + fn from(value: u32) -> Self { + Self::from_u32(value) + } +} + +impl From for u32 { + fn from(consti: ConstIdx) -> Self { + consti.as_u32() + } +} + +impl OpArgType for ConstIdx {} diff --git a/crates/compiler-core/src/marshal.rs b/crates/compiler-core/src/marshal.rs index 310bad9d868..ba3cf7a35c3 100644 --- a/crates/compiler-core/src/marshal.rs +++ b/crates/compiler-core/src/marshal.rs @@ -240,7 +240,7 @@ pub fn deserialize_code( let len = rdr.read_u32()?; let constants = (0..len) .map(|_| deserialize_value(rdr, bag)) - .collect::>>()?; + .collect::>()?; let mut read_names = || { let len = rdr.read_u32()?; diff --git a/crates/jit/src/instructions.rs b/crates/jit/src/instructions.rs index 9d8be5bc6e3..01f13e9d289 100644 --- a/crates/jit/src/instructions.rs +++ b/crates/jit/src/instructions.rs @@ -637,9 +637,8 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> { Ok(()) } Instruction::LoadConst { consti } => { - let val = self.prepare_const( - bytecode.constants[consti.get(arg) as usize].borrow_constant(), - )?; + let val = + self.prepare_const(bytecode.constants[consti.get(arg)].borrow_constant())?; self.stack.push(val); Ok(()) } diff --git a/crates/jit/tests/common.rs b/crates/jit/tests/common.rs index 095bb87d7be..38f4b255d0e 100644 --- a/crates/jit/tests/common.rs +++ b/crates/jit/tests/common.rs @@ -1,6 +1,6 @@ use core::ops::ControlFlow; use rustpython_compiler_core::bytecode::{ - CodeObject, ConstantData, Instruction, OpArg, OpArgState, + CodeObject, ConstantData, Constants, Instruction, OpArg, OpArgState, }; use rustpython_jit::{CompiledCode, JitType}; use rustpython_wtf8::{Wtf8, Wtf8Buf}; @@ -77,7 +77,7 @@ fn extract_annotations_from_annotate_code(code: &CodeObject) -> HashMap { - stack.push((true, consti.get(arg) as usize)); + stack.push((true, consti.get(arg).as_usize())); } Instruction::LoadName { namei } => { stack.push((false, namei.get(arg) as usize)); @@ -99,7 +99,8 @@ fn extract_annotations_from_annotate_code(code: &CodeObject) -> HashMap ControlFlow<()> { match instruction { @@ -193,8 +194,7 @@ impl StackMachine { // No-op for JIT tests } Instruction::LoadConst { consti } => { - let idx = consti.get(arg); - self.stack.push(constants[idx as usize].clone().into()) + self.stack.push(constants[consti.get(arg)].clone().into()) } Instruction::LoadName { namei } => self .stack diff --git a/crates/vm/src/builtins/code.rs b/crates/vm/src/builtins/code.rs index a41bc5b03b0..57d03ec6de0 100644 --- a/crates/vm/src/builtins/code.rs +++ b/crates/vm/src/builtins/code.rs @@ -538,16 +538,14 @@ impl Constructor for PyCode { .map_err(|e| vm.new_value_error(format!("invalid bytecode: {}", e)))?; // Convert constants - let constants: Box<[Literal]> = args + let constants = args .consts .iter() .map(|obj| { - // Convert PyObject to Literal constant - // For now, just wrap it + // Convert PyObject to Literal constant. For now, just wrap it Literal(obj.clone()) }) - .collect::>() - .into_boxed_slice(); + .collect(); // Create locations (start and end pairs) let row = if args.firstlineno > 0 { diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 47d583b578c..80b199be5f7 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -2354,8 +2354,8 @@ impl ExecutingFrame<'_> { }); Ok(None) } - Instruction::LoadConst { consti: idx } => { - self.push_value(self.code.constants[idx.get(arg) as usize].clone().into()); + Instruction::LoadConst { consti } => { + self.push_value(self.code.constants[consti.get(arg)].clone().into()); // Mirror CPython's LOAD_CONST family transition. RustPython does // not currently distinguish immortal constants at runtime. let instr_idx = self.lasti() as usize - 1; @@ -2367,7 +2367,7 @@ impl ExecutingFrame<'_> { Ok(None) } Instruction::LoadConstMortal | Instruction::LoadConstImmortal => { - self.push_value(self.code.constants[u32::from(arg) as usize].clone().into()); + self.push_value(self.code.constants[u32::from(arg).into()].clone().into()); Ok(None) } Instruction::LoadCommonConstant { idx } => { From c17bd4924b1751965b427032a4b5459fcffbbb50 Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Fri, 6 Mar 2026 09:53:19 +0100 Subject: [PATCH 2/2] Set generic --- crates/jit/tests/common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/jit/tests/common.rs b/crates/jit/tests/common.rs index 38f4b255d0e..0bdfd77d856 100644 --- a/crates/jit/tests/common.rs +++ b/crates/jit/tests/common.rs @@ -186,7 +186,7 @@ impl StackMachine { &mut self, instruction: Instruction, arg: OpArg, - constants: &Constants, + constants: &Constants, names: &[String], ) -> ControlFlow<()> { match instruction {