Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
GH-107465: Add pathlib.Path.from_uri() classmethod.
This method supports file URIs (including variants) as described in
RFC 8089, such as URIs generated by `pathlib.Path.as_uri()` and
`urllib.request.pathname2url`.

The method is added to `Path` rather than `PurePath` because it uses
`os.fsdecode()`, and so its results vary from system to system. I intend to
deprecate `PurePath.as_uri()` and move it to `Path` for the same reason.
  • Loading branch information
barneygale committed Aug 4, 2023
commit 0f51181345dcf3a3bc5978ef451dcdce41914368
45 changes: 45 additions & 0 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -850,6 +850,51 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.5


.. classmethod:: Path.from_uri(uri)

Return a new path object from parsing a 'file' URI conforming to
:rfc:`8089`. For example::

>>> p = Path.from_uri('file:///etc/hosts')
PosixPath('/etc/hosts')

On Windows, DOS device and UNC paths may be parsed from URIs::

>>> p = Path.from_uri('file:///c:/windows')
WindowsPath('c:/windows')
>>> p = Path.from_uri('file://server/share')
WindowsPath('//server/share')

Several variant forms are supported::

>>> p = Path.from_uri('file:////server/share')
WindowsPath('//server/share')
>>> p = Path.from_uri('file://///server/share')
WindowsPath('//server/share')
>>> p = Path.from_uri('file:c:/windows')
WindowsPath('c:/windows')
>>> p = Path.from_uri('file:/c|/windows')
WindowsPath('c:/windows')
Comment thread
barneygale marked this conversation as resolved.
>>> p = Path.from_uri('file://///c:/windows')
WindowsPath('c:/windows')

URIs with no slash after the scheme (and no drive letter) are parsed as
relative paths::

>>> p = Path.from_uri('file:foo/bar')
WindowsPath('foo/bar')
Comment thread
barneygale marked this conversation as resolved.
Outdated

Users may wish to test the result with :meth:`~PurePath.is_absolute` and
reject relative paths, as these are not portable across processes with
differing working directories.

:func:`os.fsdecode` is used to decode percent-escaped byte sequences, and
so file URIs are not portable across machines with differing
:ref:`filesystem encodings <filesystem-encoding>`.

.. versionadded:: 3.13


.. method:: Path.stat(*, follow_symlinks=True)

Return a :class:`os.stat_result` object containing information about this path, like :func:`os.stat`.
Expand Down
3 changes: 3 additions & 0 deletions Doc/whatsnew/3.13.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ pathlib
:exc:`NotImplementedError` when a path operation isn't supported.
(Contributed by Barney Gale in :gh:`89812`.)

* Add :method:`Path.from_uri` classmethod.
(Contributed by Barney Gale in :gh:`107465`.)

* Add support for recursive wildcards in :meth:`pathlib.PurePath.match`.
(Contributed by Barney Gale in :gh:`73435`.)

Expand Down
20 changes: 18 additions & 2 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from _collections_abc import Sequence
from errno import ENOENT, ENOTDIR, EBADF, ELOOP
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
from urllib.parse import quote_from_bytes as urlquote_from_bytes


__all__ = [
Expand Down Expand Up @@ -433,7 +432,8 @@ def as_uri(self):
# It's a posix path => 'file:///etc/hosts'
prefix = 'file://'
path = str(self)
return prefix + urlquote_from_bytes(os.fsencode(path))
from urllib.parse import quote_from_bytes
return prefix + quote_from_bytes(os.fsencode(path))

@property
def _str_normcase(self):
Expand Down Expand Up @@ -1178,6 +1178,22 @@ def __new__(cls, *args, **kwargs):
cls = WindowsPath if os.name == 'nt' else PosixPath
return object.__new__(cls)

@classmethod
def from_uri(cls, uri):
"""Return a new path from the given 'file' URI."""
uri = uri.removeprefix('file:')
Comment thread
barneygale marked this conversation as resolved.
Outdated
if uri[:3] == '///':
# Remove empty authority
uri = uri[2:]
if uri[:1] == '/' and (uri[2:3] in ':|' or uri[1:3] == '//'):
# Remove slash before DOS device/UNC path
uri = uri[1:]
if uri[1:2] == '|':
# Replace bar with colon in DOS drive
uri = uri[:1] + ':' + uri[2:]
from urllib.parse import unquote_to_bytes
return cls(os.fsdecode(unquote_to_bytes(uri)))

@classmethod
def cwd(cls):
"""Return a new path pointing to the current working directory."""
Expand Down
31 changes: 31 additions & 0 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import tempfile
import unittest
from unittest import mock
from urllib.request import pathname2url

from test.support import import_helper
from test.support import set_recursion_limit
Expand Down Expand Up @@ -2913,6 +2914,20 @@ def test_passing_kwargs_deprecated(self):
with self.assertWarns(DeprecationWarning):
self.cls(foo="bar")

def test_from_uri_common(self):
P = self.cls
self.assertEqual(P.from_uri('file:foo/bar'), P('foo/bar'))
Comment thread
barneygale marked this conversation as resolved.
Outdated
self.assertEqual(P.from_uri('file:/foo/bar'), P('/foo/bar'))
self.assertEqual(P.from_uri('file://foo/bar'), P('//foo/bar'))
self.assertEqual(P.from_uri('file:///foo/bar'), P('/foo/bar'))
self.assertEqual(P.from_uri('file:////foo/bar'), P('//foo/bar'))

def test_from_uri_pathname2url_common(self):
P = self.cls
self.assertEqual(P.from_uri(pathname2url('foo/bar')), P('foo/bar'))
self.assertEqual(P.from_uri(pathname2url('/foo/bar')), P('/foo/bar'))
self.assertEqual(P.from_uri(pathname2url('//foo/bar')), P('//foo/bar'))


class WalkTests(unittest.TestCase):

Expand Down Expand Up @@ -3441,7 +3456,23 @@ def check():
env['HOME'] = 'C:\\Users\\eve'
check()

def test_from_uri(self):
P = self.cls
# DOS drive paths
self.assertEqual(P.from_uri('file:c:/path/to/file'), P('c:/path/to/file'))
self.assertEqual(P.from_uri('file:c|/path/to/file'), P('c:/path/to/file'))
self.assertEqual(P.from_uri('file:/c|/path/to/file'), P('c:/path/to/file'))
self.assertEqual(P.from_uri('file:///c|/path/to/file'), P('c:/path/to/file'))
self.assertEqual(P.from_uri('file://///c|/path/to/file'), P('c:/path/to/file'))
# UNC paths
self.assertEqual(P.from_uri('file://server/path/to/file'), P('//server/path/to/file'))
self.assertEqual(P.from_uri('file:////server/path/to/file'), P('//server/path/to/file'))
self.assertEqual(P.from_uri('file://///server/path/to/file'), P('//server/path/to/file'))

def test_from_uri_pathname2url(self):
P = self.cls
self.assertEqual(P.from_uri(pathname2url(r'c:\path\to\file')), P('c:/path/to/file'))
self.assertEqual(P.from_uri(pathname2url(r'\\server\path\to\file')), P('//server/path/to/file'))

class PathSubclassTest(PathTest):
class cls(pathlib.Path):
Expand Down