diff --git a/Lib/test/test_generators.py b/Lib/test/test_generators.py index c6069f6fb2e..dc5b23b0f93 100644 --- a/Lib/test/test_generators.py +++ b/Lib/test/test_generators.py @@ -660,7 +660,6 @@ def f(): with self.assertRaises(RuntimeError): gen.close() - @unittest.expectedFailure # TODO: RUSTPYTHON; no deterministic GC finalization def test_close_releases_frame_locals(self): # See gh-118272 @@ -684,6 +683,7 @@ def genfn(): # See https://github.com/python/cpython/issues/125723 class GeneratorDeallocTest(unittest.TestCase): + @unittest.expectedFailure # TODO: RUSTPYTHON; frame uses shared Arc, no ownership transfer def test_frame_outlives_generator(self): def g1(): a = 42 diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 0fd4f66df28..d63dc60be31 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -5312,7 +5312,6 @@ def test_bytes_like(self): with self.assertRaises(TypeError): os.scandir(path_bytes) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: not found in {, , , , , , , , , , , } @unittest.skipUnless(os.listdir in os.supports_fd, 'fd support for listdir required for this test.') def test_fd(self): diff --git a/crates/common/src/os.rs b/crates/common/src/os.rs index ef8547289a2..2a318f477e5 100644 --- a/crates/common/src/os.rs +++ b/crates/common/src/os.rs @@ -118,7 +118,7 @@ pub mod ffi { fn from_bytes(slice: &[u8]) -> &OsStr { // WASI strings are guaranteed to be UTF-8 - OsStr::new(std::str::from_utf8(slice).expect("wasip2 strings are UTF-8")) + OsStr::new(core::str::from_utf8(slice).expect("wasip2 strings are UTF-8")) } } diff --git a/crates/vm/src/builtins/frame.rs b/crates/vm/src/builtins/frame.rs index ed2e1e672fd..6ee8620b7d2 100644 --- a/crates/vm/src/builtins/frame.rs +++ b/crates/vm/src/builtins/frame.rs @@ -179,8 +179,8 @@ impl Py { } } - // Clear the evaluation stack - self.clear_value_stack(); + // Clear the evaluation stack and cell references + self.clear_stack_and_cells(); // Clear temporary refs self.temporary_refs.lock().clear(); diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index 9a6a6d49e3c..43615711e5d 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -431,7 +431,7 @@ impl PyFunction { if let Some(cell2arg) = code.cell2arg.as_deref() { for (cell_idx, arg_idx) in cell2arg.iter().enumerate().filter(|(_, i)| **i != -1) { let x = fastlocals[*arg_idx as usize].take(); - frame.cells_frees[cell_idx].set(x); + frame.set_cell_contents(cell_idx, x); } } diff --git a/crates/vm/src/builtins/super.rs b/crates/vm/src/builtins/super.rs index b7bc3004332..8286c2a5e5f 100644 --- a/crates/vm/src/builtins/super.rs +++ b/crates/vm/src/builtins/super.rs @@ -85,15 +85,17 @@ impl Initializer for PySuper { if frame.code.arg_count == 0 { return Err(vm.new_runtime_error("super(): no arguments")); } - let obj = frame.fastlocals.lock()[0] - .clone() + // Lock fastlocals briefly to get arg[0], then drop the guard + // before calling get_cell_contents (which also locks fastlocals). + let first_arg = frame.fastlocals.lock()[0].clone(); + let obj = first_arg .or_else(|| { if let Some(cell2arg) = frame.code.cell2arg.as_deref() { cell2arg[..frame.code.cellvars.len()] .iter() .enumerate() .find(|(_, arg_idx)| **arg_idx == 0) - .and_then(|(cell_idx, _)| frame.cells_frees[cell_idx].get()) + .and_then(|(cell_idx, _)| frame.get_cell_contents(cell_idx)) } else { None } @@ -104,8 +106,8 @@ impl Initializer for PySuper { for (i, var) in frame.code.freevars.iter().enumerate() { if var.as_bytes() == b"__class__" { let i = frame.code.cellvars.len() + i; - let class = frame.cells_frees[i] - .get() + let class = frame + .get_cell_contents(i) .ok_or_else(|| vm.new_runtime_error("super(): empty __class__ cell"))?; typ = Some(class.downcast().map_err(|o| { vm.new_type_error(format!( diff --git a/crates/vm/src/coroutine.rs b/crates/vm/src/coroutine.rs index ac44e33f799..857d7403d5c 100644 --- a/crates/vm/src/coroutine.rs +++ b/crates/vm/src/coroutine.rs @@ -207,6 +207,9 @@ impl Coro { ) }); self.closed.store(true); + // Release frame locals and stack to free references held by the + // closed generator, matching gen_send_ex2 with close_on_completion. + self.frame.clear_locals_and_stack(); match result { Ok(ExecutionResult::Yield(_)) => { Err(vm.new_runtime_error(format!("{} ignored GeneratorExit", gen_name(jen, vm)))) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index f03aeeaa0c3..2e3905f4f14 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -53,6 +53,8 @@ struct FrameState { // We need 1 stack per frame /// The main data frame of the stack machine stack: BoxVec>, + /// Cell and free variable references (cellvars + freevars). + cells_frees: Box<[PyCellRef]>, /// index of last instruction ran #[cfg(feature = "threading")] lasti: u32, @@ -93,7 +95,6 @@ pub struct Frame { pub func_obj: Option, pub fastlocals: PyMutex]>>, - pub(crate) cells_frees: Box<[PyCellRef]>, pub locals: ArgMapping, pub globals: PyDictRef, pub builtins: PyObjectRef, @@ -135,6 +136,7 @@ impl PyPayload for Frame { unsafe impl Traverse for FrameState { fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { self.stack.traverse(tracer_fn); + self.cells_frees.traverse(tracer_fn); } } @@ -143,7 +145,6 @@ unsafe impl Traverse for Frame { self.code.traverse(tracer_fn); self.func_obj.traverse(tracer_fn); self.fastlocals.traverse(tracer_fn); - self.cells_frees.traverse(tracer_fn); self.locals.traverse(tracer_fn); self.globals.traverse(tracer_fn); self.builtins.traverse(tracer_fn); @@ -193,13 +194,13 @@ impl Frame { let state = FrameState { stack: BoxVec::new(code.max_stackdepth as usize), + cells_frees, #[cfg(feature = "threading")] lasti: 0, }; Self { fastlocals: PyMutex::new(fastlocals_vec.into_boxed_slice()), - cells_frees, locals: scope.locals, globals: scope.globals, builtins, @@ -218,9 +219,38 @@ impl Frame { } } - /// Clear the evaluation stack. Used by frame.clear() to break reference cycles. - pub(crate) fn clear_value_stack(&self) { - self.state.lock().stack.clear(); + /// Clear evaluation stack and state-owned cell/free references. + /// For full local/cell cleanup, call `clear_locals_and_stack()`. + pub(crate) fn clear_stack_and_cells(&self) { + let mut state = self.state.lock(); + state.stack.clear(); + let _old = core::mem::take(&mut state.cells_frees); + } + + /// Clear locals and stack after generator/coroutine close. + /// Releases references held by the frame, matching _PyFrame_ClearLocals. + pub(crate) fn clear_locals_and_stack(&self) { + self.clear_stack_and_cells(); + let mut fastlocals = self.fastlocals.lock(); + for slot in fastlocals.iter_mut() { + *slot = None; + } + } + + /// Get cell contents by cell index. Reads through fastlocals (no state lock needed). + pub(crate) fn get_cell_contents(&self, cell_idx: usize) -> Option { + let nlocals = self.code.varnames.len(); + let fastlocals = self.fastlocals.lock(); + fastlocals + .get(nlocals + cell_idx) + .and_then(|slot| slot.as_ref()) + .and_then(|obj| obj.downcast_ref::()) + .and_then(|cell| cell.get()) + } + + /// Set cell contents by cell index. Only safe to call before frame execution starts. + pub(crate) fn set_cell_contents(&self, cell_idx: usize, value: Option) { + self.state.lock().cells_frees[cell_idx].set(value); } /// Store a borrowed back-reference to the owning generator/coroutine. @@ -296,23 +326,25 @@ impl Frame { } } if !code.cellvars.is_empty() || !code.freevars.is_empty() { - let map_to_dict = |keys: &[&PyStrInterned], values: &[PyCellRef]| { - for (&k, v) in zip(keys, values) { - if let Some(value) = v.get() { - locals.mapping().ass_subscript(k, Some(value), vm)?; - } else { - match locals.mapping().ass_subscript(k, None, vm) { - Ok(()) => {} - Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {} - Err(e) => return Err(e), - } - } + // Access cells through fastlocals to avoid locking state + // (state may be held by with_exec during frame execution). + for (i, &k) in code.cellvars.iter().enumerate() { + let cell_value = self.get_cell_contents(i); + match locals.mapping().ass_subscript(k, cell_value, vm) { + Ok(()) => {} + Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {} + Err(e) => return Err(e), } - Ok(()) - }; - map_to_dict(&code.cellvars, &self.cells_frees)?; + } if code.flags.contains(bytecode::CodeFlags::OPTIMIZED) { - map_to_dict(&code.freevars, &self.cells_frees[code.cellvars.len()..])?; + for (i, &k) in code.freevars.iter().enumerate() { + let cell_value = self.get_cell_contents(code.cellvars.len() + i); + match locals.mapping().ass_subscript(k, cell_value, vm) { + Ok(()) => {} + Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {} + Err(e) => return Err(e), + } + } } } Ok(locals.clone()) @@ -326,7 +358,6 @@ impl Py { let exec = ExecutingFrame { code: &self.code, fastlocals: &self.fastlocals, - cells_frees: &self.cells_frees, locals: &self.locals, globals: &self.globals, builtins: &self.builtins, @@ -372,7 +403,6 @@ impl Py { let exec = ExecutingFrame { code: &self.code, fastlocals: &self.fastlocals, - cells_frees: &self.cells_frees, locals: &self.locals, globals: &self.globals, builtins: &self.builtins, @@ -407,7 +437,6 @@ impl Py { struct ExecutingFrame<'a> { code: &'a PyRef, fastlocals: &'a PyMutex]>>, - cells_frees: &'a [PyCellRef], locals: &'a ArgMapping, globals: &'a PyDictRef, builtins: &'a PyObjectRef, @@ -1011,7 +1040,7 @@ impl ExecutingFrame<'_> { } Instruction::DeleteAttr { idx } => self.delete_attr(vm, idx.get(arg)), Instruction::DeleteDeref(i) => { - self.cells_frees[i.get(arg) as usize].set(None); + self.state.cells_frees[i.get(arg) as usize].set(None); Ok(None) } Instruction::DeleteFast(idx) => { @@ -1436,7 +1465,7 @@ impl ExecutingFrame<'_> { }; self.push_value(match value { Some(v) => v, - None => self.cells_frees[i] + None => self.state.cells_frees[i] .get() .ok_or_else(|| self.unbound_cell_exception(i, vm))?, }); @@ -1493,7 +1522,7 @@ impl ExecutingFrame<'_> { } Instruction::LoadDeref(i) => { let idx = i.get(arg) as usize; - let x = self.cells_frees[idx] + let x = self.state.cells_frees[idx] .get() .ok_or_else(|| self.unbound_cell_exception(idx, vm))?; self.push_value(x); @@ -2083,7 +2112,7 @@ impl ExecutingFrame<'_> { Instruction::StoreAttr { idx } => self.store_attr(vm, idx.get(arg)), Instruction::StoreDeref(i) => { let value = self.pop_value(); - self.cells_frees[i.get(arg) as usize].set(Some(value)); + self.state.cells_frees[i.get(arg) as usize].set(Some(value)); Ok(None) } Instruction::StoreFast(idx) => { diff --git a/crates/vm/src/stdlib/os.rs b/crates/vm/src/stdlib/os.rs index 2fb71d9ec01..d67e97874f4 100644 --- a/crates/vm/src/stdlib/os.rs +++ b/crates/vm/src/stdlib/os.rs @@ -2,10 +2,10 @@ use crate::{ AsObject, Py, PyObjectRef, PyPayload, PyResult, TryFromObject, VirtualMachine, - builtins::{PyDictRef, PyModule, PySet}, + builtins::{PyModule, PySet}, common::crt_fd, convert::{IntoPyException, ToPyException, ToPyObject}, - function::{ArgMapping, ArgumentError, FromArgs, FuncArgs}, + function::{ArgumentError, FromArgs, FuncArgs}, }; use std::{fs, io, path::Path}; @@ -186,6 +186,9 @@ pub(super) mod _os { const STAT_DIR_FD: bool = cfg!(not(any(windows, target_os = "redox"))); const UTIME_DIR_FD: bool = cfg!(not(any(windows, target_os = "redox"))); pub(crate) const SYMLINK_DIR_FD: bool = cfg!(not(any(windows, target_os = "redox"))); + pub(crate) const UNLINK_DIR_FD: bool = cfg!(not(any(windows, target_os = "redox"))); + const RMDIR_DIR_FD: bool = cfg!(not(any(windows, target_os = "redox"))); + const SCANDIR_FD: bool = cfg!(all(unix, not(target_os = "redox"))); #[pyattr] use libc::{O_APPEND, O_CREAT, O_EXCL, O_RDONLY, O_RDWR, O_TRUNC, O_WRONLY}; @@ -357,6 +360,30 @@ pub(super) mod _os { fs::create_dir_all(path.as_str()).map_err(|err| err.into_pyexception(vm)) } + #[cfg(not(windows))] + #[pyfunction] + fn rmdir( + path: OsPath, + dir_fd: DirFd<'_, { RMDIR_DIR_FD as usize }>, + vm: &VirtualMachine, + ) -> PyResult<()> { + #[cfg(not(target_os = "redox"))] + if let Some(fd) = dir_fd.raw_opt() { + let c_path = path.clone().into_cstring(vm)?; + let res = unsafe { libc::unlinkat(fd, c_path.as_ptr(), libc::AT_REMOVEDIR) }; + return if res < 0 { + let err = crate::common::os::errno_io_error(); + Err(OSErrorBuilder::with_filename(&err, path, vm)) + } else { + Ok(()) + }; + } + #[cfg(target_os = "redox")] + let [] = dir_fd.0; + fs::remove_dir(&path).map_err(|err| OSErrorBuilder::with_filename(&err, path, vm)) + } + + #[cfg(windows)] #[pyfunction] fn rmdir(path: OsPath, dir_fd: DirFd<'_, 0>, vm: &VirtualMachine) -> PyResult<()> { let [] = dir_fd.0; @@ -400,22 +427,36 @@ pub(super) mod _os { #[cfg(all(unix, not(target_os = "redox")))] { use rustpython_common::os::ffi::OsStrExt; + use std::os::unix::io::IntoRawFd; let new_fd = nix::unistd::dup(fno).map_err(|e| e.into_pyexception(vm))?; - let mut dir = - nix::dir::Dir::from_fd(new_fd).map_err(|e| e.into_pyexception(vm))?; - dir.iter() - .filter_map_ok(|entry| { - let fname = entry.file_name().to_bytes(); - match fname { - b"." | b".." => None, - _ => Some( - OutputMode::String - .process_path(std::ffi::OsStr::from_bytes(fname), vm), - ), + let raw_fd = new_fd.into_raw_fd(); + let dir = OwnedDir::from_fd(raw_fd).map_err(|e| { + unsafe { libc::close(raw_fd) }; + e.into_pyexception(vm) + })?; + // OwnedDir::drop calls rewinddir (reset to start) then closedir. + let mut list = Vec::new(); + loop { + nix::errno::Errno::clear(); + let entry = unsafe { libc::readdir(dir.as_ptr()) }; + if entry.is_null() { + let err = nix::errno::Errno::last(); + if err != nix::errno::Errno::UnknownErrno { + return Err(io::Error::from(err).into_pyexception(vm)); } - }) - .collect::>() - .map_err(|e| e.into_pyexception(vm))? + break; + } + let fname = unsafe { core::ffi::CStr::from_ptr((*entry).d_name.as_ptr()) } + .to_bytes(); + match fname { + b"." | b".." => continue, + _ => list.push( + OutputMode::String + .process_path(std::ffi::OsStr::from_bytes(fname), vm), + ), + } + } + list } } }; @@ -563,6 +604,12 @@ pub(super) mod _os { file_name: std::ffi::OsString, pathval: PathBuf, file_type: io::Result, + /// dirent d_type value, used when file_type is unavailable (fd-based scandir) + #[cfg(unix)] + d_type: Option, + /// Parent directory fd for fd-based scandir, used for fstatat + #[cfg(not(any(windows, target_os = "redox")))] + dir_fd: Option, mode: OutputMode, stat: OnceCell, lstat: OnceCell, @@ -586,53 +633,116 @@ pub(super) mod _os { Ok(self.mode.process_path(&self.pathval, vm)) } + /// Build the DirFd to use for stat calls. + /// If this entry was produced by fd-based scandir, use the stored dir_fd + /// so that fstatat(dir_fd, name, ...) is used instead of stat(full_path). + fn stat_dir_fd(&self) -> DirFd<'_, { STAT_DIR_FD as usize }> { + #[cfg(not(any(windows, target_os = "redox")))] + if let Some(raw_fd) = self.dir_fd { + // Safety: the fd came from os.open() and is borrowed for + // the lifetime of this DirEntry reference. + let borrowed = unsafe { crt_fd::Borrowed::borrow_raw(raw_fd) }; + return DirFd([borrowed; STAT_DIR_FD as usize]); + } + DirFd::default() + } + + /// Stat-based mode test fallback. Uses fstatat when dir_fd is available. + #[cfg(unix)] + fn test_mode_via_stat( + &self, + follow_symlinks: bool, + mode_bits: u32, + vm: &VirtualMachine, + ) -> PyResult { + match self.stat(self.stat_dir_fd(), FollowSymlinks(follow_symlinks), vm) { + Ok(stat_obj) => { + let st_mode: i32 = stat_obj.get_attr("st_mode", vm)?.try_into_value(vm)?; + #[allow(clippy::unnecessary_cast)] + Ok((st_mode as u32 & libc::S_IFMT as u32) == mode_bits) + } + Err(e) => { + if e.fast_isinstance(vm.ctx.exceptions.file_not_found_error) { + Ok(false) + } else { + Err(e) + } + } + } + } + #[pymethod] fn is_dir(&self, follow_symlinks: FollowSymlinks, vm: &VirtualMachine) -> PyResult { - // Use cached file_type first to avoid stat() calls that may fail if let Ok(file_type) = &self.file_type && (!follow_symlinks.0 || !file_type.is_symlink()) { return Ok(file_type.is_dir()); } + #[cfg(unix)] + if let Some(dt) = self.d_type { + let is_symlink = dt == libc::DT_LNK; + let need_stat = dt == libc::DT_UNKNOWN || (follow_symlinks.0 && is_symlink); + if !need_stat { + return Ok(dt == libc::DT_DIR); + } + } + #[cfg(unix)] + return self.test_mode_via_stat(follow_symlinks.0, libc::S_IFDIR as _, vm); + #[cfg(not(unix))] match super::fs_metadata(&self.pathval, follow_symlinks.0) { Ok(meta) => Ok(meta.is_dir()), - Err(e) => { - if e.kind() == io::ErrorKind::NotFound { - Ok(false) - } else { - Err(e.into_pyexception(vm)) - } - } + Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(false), + Err(e) => Err(e.into_pyexception(vm)), } } #[pymethod] fn is_file(&self, follow_symlinks: FollowSymlinks, vm: &VirtualMachine) -> PyResult { - // Use cached file_type first to avoid stat() calls that may fail if let Ok(file_type) = &self.file_type && (!follow_symlinks.0 || !file_type.is_symlink()) { return Ok(file_type.is_file()); } + #[cfg(unix)] + if let Some(dt) = self.d_type { + let is_symlink = dt == libc::DT_LNK; + let need_stat = dt == libc::DT_UNKNOWN || (follow_symlinks.0 && is_symlink); + if !need_stat { + return Ok(dt == libc::DT_REG); + } + } + #[cfg(unix)] + return self.test_mode_via_stat(follow_symlinks.0, libc::S_IFREG as _, vm); + #[cfg(not(unix))] match super::fs_metadata(&self.pathval, follow_symlinks.0) { Ok(meta) => Ok(meta.is_file()), - Err(e) => { - if e.kind() == io::ErrorKind::NotFound { - Ok(false) - } else { - Err(e.into_pyexception(vm)) - } - } + Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(false), + Err(e) => Err(e.into_pyexception(vm)), } } #[pymethod] fn is_symlink(&self, vm: &VirtualMachine) -> PyResult { - Ok(self - .file_type - .as_ref() - .map_err(|err| err.into_pyexception(vm))? - .is_symlink()) + if let Ok(file_type) = &self.file_type { + return Ok(file_type.is_symlink()); + } + #[cfg(unix)] + if let Some(dt) = self.d_type + && dt != libc::DT_UNKNOWN + { + return Ok(dt == libc::DT_LNK); + } + #[cfg(unix)] + return self.test_mode_via_stat(false, libc::S_IFLNK as _, vm); + #[cfg(not(unix))] + match &self.file_type { + Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(false), + Err(e) => { + use crate::convert::ToPyException; + Err(e.to_pyexception(vm)) + } + Ok(_) => Ok(false), + } } #[pymethod] @@ -642,6 +752,12 @@ pub(super) mod _os { follow_symlinks: FollowSymlinks, vm: &VirtualMachine, ) -> PyResult { + // Use stored dir_fd if the caller didn't provide one + let effective_dir_fd = if dir_fd == DirFd::default() { + self.stat_dir_fd() + } else { + dir_fd + }; let do_stat = |follow_symlinks| { stat( OsPath { @@ -649,7 +765,7 @@ pub(super) mod _os { origin: None, } .into(), - dir_fd, + effective_dir_fd, FollowSymlinks(follow_symlinks), vm, ) @@ -663,7 +779,6 @@ pub(super) mod _os { } }; let stat = if follow_symlinks.0 { - // if follow_symlinks == true and we aren't a symlink, cache both stat and lstat match self.stat.get() { Some(val) => val, None => { @@ -873,6 +988,10 @@ pub(super) mod _os { file_name: entry.file_name(), pathval, file_type: entry.file_type(), + #[cfg(unix)] + d_type: None, + #[cfg(not(any(windows, target_os = "redox")))] + dir_fd: None, mode: zelf.mode, lstat, stat: OnceCell::new(), @@ -893,17 +1012,202 @@ pub(super) mod _os { } } + /// Wrapper around a raw `libc::DIR*` for fd-based scandir. + #[cfg(all(unix, not(target_os = "redox")))] + struct OwnedDir(core::ptr::NonNull); + + #[cfg(all(unix, not(target_os = "redox")))] + impl OwnedDir { + fn from_fd(fd: crt_fd::Raw) -> io::Result { + let ptr = unsafe { libc::fdopendir(fd) }; + core::ptr::NonNull::new(ptr) + .map(OwnedDir) + .ok_or_else(io::Error::last_os_error) + } + + fn as_ptr(&self) -> *mut libc::DIR { + self.0.as_ptr() + } + } + + #[cfg(all(unix, not(target_os = "redox")))] + impl Drop for OwnedDir { + fn drop(&mut self) { + unsafe { + libc::rewinddir(self.0.as_ptr()); + libc::closedir(self.0.as_ptr()); + } + } + } + + #[cfg(all(unix, not(target_os = "redox")))] + impl core::fmt::Debug for OwnedDir { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("OwnedDir").field(&self.0).finish() + } + } + + // Safety: OwnedDir wraps a *mut libc::DIR. All access is synchronized + // through the PyMutex in ScandirIteratorFd. + #[cfg(all(unix, not(target_os = "redox")))] + unsafe impl Send for OwnedDir {} + #[cfg(all(unix, not(target_os = "redox")))] + unsafe impl Sync for OwnedDir {} + + #[cfg(all(unix, not(target_os = "redox")))] + #[pyattr] + #[pyclass(name = "ScandirIter")] + #[derive(Debug, PyPayload)] + struct ScandirIteratorFd { + dir: crate::common::lock::PyMutex>, + /// The original fd passed to scandir(), stored in DirEntry for fstatat + orig_fd: crt_fd::Raw, + } + + #[cfg(all(unix, not(target_os = "redox")))] + #[pyclass(flags(DISALLOW_INSTANTIATION), with(Destructor, IterNext, Iterable))] + impl ScandirIteratorFd { + #[pymethod] + fn close(&self) { + let _dropped = self.dir.lock().take(); + } + + #[pymethod] + const fn __enter__(zelf: PyRef) -> PyRef { + zelf + } + + #[pymethod] + fn __exit__(zelf: PyRef, _args: FuncArgs) { + zelf.close() + } + + #[pymethod] + fn __reduce__(&self, vm: &VirtualMachine) -> PyResult { + Err(vm.new_type_error("cannot pickle 'ScandirIterator' object".to_owned())) + } + } + + #[cfg(all(unix, not(target_os = "redox")))] + impl Destructor for ScandirIteratorFd { + fn del(zelf: &Py, vm: &VirtualMachine) -> PyResult<()> { + if zelf.dir.lock().is_some() { + let _ = crate::stdlib::warnings::warn( + vm.ctx.exceptions.resource_warning, + format!("unclosed scandir iterator {:?}", zelf.as_object()), + 1, + vm, + ); + zelf.close(); + } + Ok(()) + } + } + + #[cfg(all(unix, not(target_os = "redox")))] + impl SelfIter for ScandirIteratorFd {} + + #[cfg(all(unix, not(target_os = "redox")))] + impl IterNext for ScandirIteratorFd { + fn next(zelf: &crate::Py, vm: &VirtualMachine) -> PyResult { + use rustpython_common::os::ffi::OsStrExt; + let mut guard = zelf.dir.lock(); + let dir = match guard.as_mut() { + None => return Ok(PyIterReturn::StopIteration(None)), + Some(dir) => dir, + }; + loop { + nix::errno::Errno::clear(); + let entry = unsafe { + let ptr = libc::readdir(dir.as_ptr()); + if ptr.is_null() { + let err = nix::errno::Errno::last(); + if err != nix::errno::Errno::UnknownErrno { + return Err(io::Error::from(err).into_pyexception(vm)); + } + drop(guard.take()); + return Ok(PyIterReturn::StopIteration(None)); + } + &*ptr + }; + let fname = unsafe { core::ffi::CStr::from_ptr(entry.d_name.as_ptr()) }.to_bytes(); + if fname == b"." || fname == b".." { + continue; + } + let file_name = std::ffi::OsString::from(std::ffi::OsStr::from_bytes(fname)); + let pathval = PathBuf::from(&file_name); + #[cfg(target_os = "freebsd")] + let ino = entry.d_fileno; + #[cfg(not(target_os = "freebsd"))] + let ino = entry.d_ino; + let d_type = entry.d_type; + return Ok(PyIterReturn::Return( + DirEntry { + file_name, + pathval, + file_type: Err(io::Error::other( + "file_type unavailable for fd-based scandir", + )), + d_type: if d_type == libc::DT_UNKNOWN { + None + } else { + Some(d_type) + }, + dir_fd: Some(zelf.orig_fd), + mode: OutputMode::String, + lstat: OnceCell::new(), + stat: OnceCell::new(), + ino: AtomicCell::new(ino as _), + } + .into_ref(&vm.ctx) + .into(), + )); + } + } + } + #[pyfunction] - fn scandir(path: OptionalArg, vm: &VirtualMachine) -> PyResult { - let path = path.unwrap_or_else(|| OsPath::new_str(".")); - let entries = fs::read_dir(&path.path) - .map_err(|err| OSErrorBuilder::with_filename(&err, path.clone(), vm))?; - Ok(ScandirIterator { - entries: PyRwLock::new(Some(entries)), - mode: path.mode(), + fn scandir(path: OptionalArg>>, vm: &VirtualMachine) -> PyResult { + let path = path + .flatten() + .unwrap_or_else(|| OsPathOrFd::Path(OsPath::new_str("."))); + match path { + OsPathOrFd::Path(path) => { + let entries = fs::read_dir(&path.path) + .map_err(|err| OSErrorBuilder::with_filename(&err, path.clone(), vm))?; + Ok(ScandirIterator { + entries: PyRwLock::new(Some(entries)), + mode: path.mode(), + } + .into_ref(&vm.ctx) + .into()) + } + OsPathOrFd::Fd(fno) => { + #[cfg(not(all(unix, not(target_os = "redox"))))] + { + let _ = fno; + Err(vm.new_not_implemented_error("can't pass fd to scandir on this platform")) + } + #[cfg(all(unix, not(target_os = "redox")))] + { + use std::os::unix::io::IntoRawFd; + // closedir() closes the fd, so duplicate it first + let new_fd = nix::unistd::dup(fno).map_err(|e| e.into_pyexception(vm))?; + let raw_fd = new_fd.into_raw_fd(); + let dir = OwnedDir::from_fd(raw_fd).map_err(|e| { + // fdopendir failed, close the dup'd fd + unsafe { libc::close(raw_fd) }; + e.into_pyexception(vm) + })?; + Ok(ScandirIteratorFd { + dir: crate::common::lock::PyMutex::new(Some(dir)), + orig_fd: fno.as_raw(), + } + .into_ref(&vm.ctx) + .into()) + } + } } - .into_ref(&vm.ctx) - .into()) } #[derive(Debug, FromArgs)] @@ -1960,12 +2264,12 @@ pub(super) mod _os { // mkfifo Some Some None // mknod Some Some None SupportFunc::new("readlink", Some(false), None, Some(false)), - SupportFunc::new("remove", Some(false), None, Some(false)), - SupportFunc::new("unlink", Some(false), None, Some(false)), + SupportFunc::new("remove", Some(false), Some(UNLINK_DIR_FD), Some(false)), + SupportFunc::new("unlink", Some(false), Some(UNLINK_DIR_FD), Some(false)), SupportFunc::new("rename", Some(false), None, Some(false)), SupportFunc::new("replace", Some(false), None, Some(false)), // TODO: Fix replace - SupportFunc::new("rmdir", Some(false), None, Some(false)), - SupportFunc::new("scandir", None, Some(false), Some(false)), + SupportFunc::new("rmdir", Some(false), Some(RMDIR_DIR_FD), Some(false)), + SupportFunc::new("scandir", Some(SCANDIR_FD), Some(false), Some(false)), SupportFunc::new("stat", Some(true), Some(STAT_DIR_FD), Some(true)), SupportFunc::new("fstat", Some(true), Some(STAT_DIR_FD), Some(true)), SupportFunc::new("symlink", Some(false), Some(SYMLINK_DIR_FD), Some(false)), @@ -2043,7 +2347,11 @@ pub fn module_exec(vm: &VirtualMachine, module: &Py) -> PyResult<()> { /// For `os._Environ`, accesses the internal `_data` dict directly at the Rust level. /// This avoids Python-level method calls that can deadlock after fork() when /// parking_lot locks are held by threads that no longer exist. -pub(crate) fn envobj_to_dict(env: ArgMapping, vm: &VirtualMachine) -> PyResult { +#[cfg(any(unix, windows))] +pub(crate) fn envobj_to_dict( + env: crate::function::ArgMapping, + vm: &VirtualMachine, +) -> PyResult { let obj = env.obj(); if let Some(dict) = obj.downcast_ref_if_exact::(vm) { return Ok(dict.to_owned()); diff --git a/crates/vm/src/stdlib/posix.rs b/crates/vm/src/stdlib/posix.rs index fcec8298d13..35ab3d48f97 100644 --- a/crates/vm/src/stdlib/posix.rs +++ b/crates/vm/src/stdlib/posix.rs @@ -507,7 +507,23 @@ pub mod module { #[pyfunction] #[pyfunction(name = "unlink")] - fn remove(path: OsPath, dir_fd: DirFd<'_, 0>, vm: &VirtualMachine) -> PyResult<()> { + fn remove( + path: OsPath, + dir_fd: DirFd<'_, { _os::UNLINK_DIR_FD as usize }>, + vm: &VirtualMachine, + ) -> PyResult<()> { + #[cfg(not(target_os = "redox"))] + if let Some(fd) = dir_fd.raw_opt() { + let c_path = path.clone().into_cstring(vm)?; + let res = unsafe { libc::unlinkat(fd, c_path.as_ptr(), 0) }; + return if res < 0 { + let err = crate::common::os::errno_io_error(); + Err(OSErrorBuilder::with_filename(&err, path, vm)) + } else { + Ok(()) + }; + } + #[cfg(target_os = "redox")] let [] = dir_fd.0; fs::remove_file(&path).map_err(|err| OSErrorBuilder::with_filename(&err, path, vm)) }