From f62798de7c56bf902c51cd7cf02fb6a861359586 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio <35803280+lobis@users.noreply.github.com> Date: Mon, 20 Nov 2023 14:32:58 -0600 Subject: [PATCH] feat: fsspec for all non-object writing - %-encoded urls no longer decoded (#1034) * writing goes through fsspec * increase rc version * type hints and docs * add helper methods, create * throw more specific error * add additional test for `create` failure with scheme other than local * simplify source selection * remove windows specific code * raise exception if invalid combination of handler / input (file-like object and fsspec) * use softer check for file-like object * cover problematic case with additional slash (file:///c:/file.root) * test "file:" scheme (no slash) * test backslash --- src/uproot/_util.py | 139 ++++++++--------------- src/uproot/reading.py | 6 +- src/uproot/sink/file.py | 123 +++++++++++--------- src/uproot/version.py | 2 +- src/uproot/writing/writable.py | 73 ++++-------- tests/test_0325_fix_windows_file_uris.py | 91 --------------- tests/test_0692_fsspec_reading.py | 30 +++++ tests/test_0692_fsspec_writing.py | 46 ++++++++ tests/test_0976_path_object_split.py | 7 ++ 9 files changed, 225 insertions(+), 292 deletions(-) delete mode 100644 tests/test_0325_fix_windows_file_uris.py diff --git a/src/uproot/_util.py b/src/uproot/_util.py index 2017c30ad..44f38e1ca 100644 --- a/src/uproot/_util.py +++ b/src/uproot/_util.py @@ -12,17 +12,20 @@ import itertools import numbers import os -import platform import re import warnings from collections.abc import Iterable -from urllib.parse import unquote, urlparse +from pathlib import Path +from typing import IO +from urllib.parse import urlparse import fsspec import numpy import packaging.version -win = platform.system().lower().startswith("win") +import uproot.source.chunk +import uproot.source.fsspec +import uproot.source.object def tobytes(array): @@ -36,7 +39,7 @@ def tobytes(array): return array.tostring() -def isint(x): +def isint(x) -> bool: """ Returns True if and only if ``x`` is an integer (including NumPy, not including bool). @@ -46,7 +49,7 @@ def isint(x): ) -def isnum(x): +def isnum(x) -> bool: """ Returns True if and only if ``x`` is a number (including NumPy, not including bool). @@ -56,7 +59,7 @@ def isnum(x): ) -def ensure_str(x): +def ensure_str(x) -> str: """ Ensures that ``x`` is a string (decoding with 'surrogateescape' if necessary). """ @@ -94,18 +97,17 @@ def is_file_like( obj, readable: bool = False, writable: bool = False, seekable: bool = False ) -> bool: return ( - callable(getattr(obj, "read", None)) - and callable(getattr(obj, "write", None)) - and callable(getattr(obj, "seek", None)) - and callable(getattr(obj, "tell", None)) - and callable(getattr(obj, "flush", None)) + all( + callable(getattr(obj, attr, None)) + for attr in ("read", "write", "seek", "tell", "flush") + ) and (not readable or not hasattr(obj, "readable") or obj.readable()) and (not writable or not hasattr(obj, "writable") or obj.writable()) and (not seekable or not hasattr(obj, "seekable") or obj.seekable()) ) -def parse_version(version): +def parse_version(version: str): """ Converts a semver string into a Version object that can be compared with ``<``, ``>=``, etc. @@ -116,7 +118,7 @@ def parse_version(version): return packaging.version.parse(version) -def from_module(obj, module_name): +def from_module(obj, module_name: str) -> bool: """ Returns True if ``obj`` is an instance of a class from a module given by name. @@ -155,7 +157,7 @@ def _regularize_filter_regex_flags(flags): return flagsbyte -def no_filter(x): +def no_filter(x) -> bool: """ A filter that accepts anything (always returns True). """ @@ -285,10 +287,6 @@ def regularize_path(path): return path -_windows_drive_letter_ending = re.compile(r".*\b[A-Za-z]$") -_windows_absolute_path_pattern = re.compile(r"^[A-Za-z]:[\\/]") -_windows_absolute_path_pattern_slash = re.compile(r"^[\\/][A-Za-z]:[\\/]") - # These schemes may not appear in fsspec if the corresponding libraries are not installed (e.g. s3fs) _remote_schemes = ["root", "s3", "http", "https"] _schemes = list({*_remote_schemes, *fsspec.available_protocols()}) @@ -324,87 +322,48 @@ def file_object_path_split(urlpath: str) -> tuple[str, str | None]: return urlpath, obj -def file_path_to_source_class(file_path, options): +def file_path_to_source_class( + file_path_or_object: str | Path | IO, options: dict +) -> tuple[type[uproot.source.chunk.Source], str | IO]: """ Use a file path to get the :doc:`uproot.source.chunk.Source` class that would read it. Returns a tuple of (class, file_path) where the class is a subclass of :doc:`uproot.source.chunk.Source`. """ - import uproot.source.chunk - - file_path = regularize_path(file_path) + file_path_or_object: str | IO = regularize_path(file_path_or_object) source_cls = options["handler"] - if source_cls is not None: - if not ( - isinstance(source_cls, type) - and issubclass(source_cls, uproot.source.chunk.Source) + if source_cls is not None and not ( + isinstance(source_cls, type) + and issubclass(source_cls, uproot.source.chunk.Source) + ): + raise TypeError( + f"'handler' is not a class object inheriting from Source: {source_cls!r}" + ) + + # Infer the source class from the file path + if all( + callable(getattr(file_path_or_object, attr, None)) for attr in ("read", "seek") + ): + # need a very soft object check for ubuntu python3.8 pyroot ci tests, cannot use uproot._util.is_file_like + if ( + source_cls is not None + and source_cls is not uproot.source.object.ObjectSource ): raise TypeError( - f"'handler' is not a class object inheriting from Source: {source_cls!r}" + f"'handler' is not ObjectSource for a file-like object: {source_cls!r}" ) - return source_cls, file_path - - if ( - not isinstance(file_path, str) - and hasattr(file_path, "read") - and hasattr(file_path, "seek") - ): - source_cls = uproot.source.object.ObjectSource - return source_cls, file_path - - windows_absolute_path = None - if win and _windows_absolute_path_pattern.match(file_path) is not None: - windows_absolute_path = file_path - - parsed_url = urlparse(file_path) - if parsed_url.scheme.lower() == "file": - parsed_url_path = unquote(parsed_url.path) + return uproot.source.object.ObjectSource, file_path_or_object + elif isinstance(file_path_or_object, str): + source_cls = ( + uproot.source.fsspec.FSSpecSource if source_cls is None else source_cls + ) + return source_cls, file_path_or_object else: - parsed_url_path = parsed_url.path - - if win and windows_absolute_path is None: - if _windows_absolute_path_pattern.match(parsed_url_path) is not None: - windows_absolute_path = parsed_url_path - elif _windows_absolute_path_pattern_slash.match(parsed_url_path) is not None: - windows_absolute_path = parsed_url_path[1:] - - scheme = parsed_url.scheme.lower() - if ( - scheme == "file" - or len(parsed_url.scheme) == 0 - or windows_absolute_path is not None - ): - if windows_absolute_path is None: - if parsed_url.netloc.lower() == "localhost": - file_path = parsed_url_path - else: - file_path = parsed_url.netloc + parsed_url_path - else: - file_path = windows_absolute_path - - # uproot.source.file.MemmapSource - source_cls = uproot.source.fsspec.FSSpecSource - - return source_cls, os.path.expanduser(file_path) - - elif scheme == "root": - # uproot.source.xrootd.XRootDSource - source_cls = uproot.source.fsspec.FSSpecSource - return source_cls, file_path - - elif scheme == "s3": - # uproot.source.s3.S3Source - source_cls = uproot.source.fsspec.FSSpecSource - return source_cls, file_path - - elif scheme in ("http", "https"): - # uproot.source.http.HTTPSource - source_cls = uproot.source.fsspec.FSSpecSource - return source_cls, file_path - - return uproot.source.fsspec.FSSpecSource, file_path + raise TypeError( + f"file_path is not a string or file-like object: {file_path_or_object!r}" + ) if isinstance(__builtins__, dict): @@ -448,7 +407,7 @@ def _file_not_found(files, message=None): ) -def memory_size(data, error_message=None): +def memory_size(data, error_message=None) -> int: """ Regularizes strings like '## kB' and plain integer number of bytes to an integer number of bytes. @@ -739,7 +698,7 @@ def damerau_levenshtein(a, b, ratio=False): # Modified Damerau-Levenshtein distance. Adds a middling penalty # for capitalization. # https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance - M = [[0] * (len(b) + 1) for i in range(len(a) + 1)] + M = [[0] * (len(b) + 1) for _ in range(len(a) + 1)] for i in range(len(a) + 1): M[i][0] = i @@ -771,7 +730,7 @@ def damerau_levenshtein(a, b, ratio=False): # Transpose only M[i][j] = min(M[i][j], M[i - 2][j - 2] + 1) else: - # Traspose and capitalization + # Transpose and capitalization M[i][j] = min(M[i][j], M[i - 2][j - 2] + 1.5) if not ratio: diff --git a/src/uproot/reading.py b/src/uproot/reading.py index 935c6e646..e33b6fc3a 100644 --- a/src/uproot/reading.py +++ b/src/uproot/reading.py @@ -9,10 +9,14 @@ """ from __future__ import annotations +from __future__ import annotations + import struct import sys import uuid from collections.abc import Mapping, MutableMapping +from pathlib import Path +from typing import IO import uproot import uproot.behaviors.TBranch @@ -525,7 +529,7 @@ class ReadOnlyFile(CommonFileMethods): def __init__( self, - file_path, + file_path: str | Path | IO, *, object_cache=100, array_cache="100 MB", diff --git a/src/uproot/sink/file.py b/src/uproot/sink/file.py index 167c8da17..c631e7cc2 100644 --- a/src/uproot/sink/file.py +++ b/src/uproot/sink/file.py @@ -13,6 +13,9 @@ import numbers import os +from typing import IO + +import fsspec import uproot._util @@ -20,69 +23,84 @@ class FileSink: """ Args: - file_path (str): The filesystem path of the file to open. + urlpath_or_file_like (str, Path, or file-like object): If a string or Path, a + filesystem URL that specifies the file to open by fsspec. If a file-like object, it + must have ``read``, ``write``, ``seek``, ``tell``, and ``flush`` methods. + + An object that can write (and read) files on a local or remote filesystem. + It can be initialized from a file-like object (already opened) or a filesystem URL. + If initialized from a filesystem URL, fsspec is used to open the file. + In this case the file is opened in the first read or write operation. + """ - An object that can write (and read) files on a local filesystem, which either opens - a new file from a ``file_path`` in ``"r+b"`` mode or wraps a file-like object - with the :ref:`uproot.sink.file.FileSink.from_object` constructor. + def __init__(self, urlpath_or_file_like: str | IO, **storage_options): + self._open_file = None + self._file = None - With the ``file_path``-based constructor, the file is opened upon first read or - write. - """ + if uproot._util.is_file_like( + urlpath_or_file_like, readable=False, writable=False, seekable=False + ): + self._file = urlpath_or_file_like + + if not uproot._util.is_file_like( + self._file, readable=True, writable=True, seekable=True + ): + raise TypeError( + """writable file can only be created from a file path or an object that supports reading and writing""" + ) + else: + if not self._file_exists(urlpath_or_file_like, **storage_options): + self._truncate_file(urlpath_or_file_like, **storage_options) + + self._open_file = fsspec.open( + urlpath_or_file_like, mode="r+b", **storage_options + ) @classmethod - def from_object(cls, obj) -> FileSink: + def _file_exists(cls, urlpath: str, **storage_options) -> bool: """ Args: - obj (file-like object): An object with ``read``, ``write``, ``seek``, - ``tell``, and ``flush`` methods. + urlpath (str): A filesystem URL that specifies the file to check by fsspec. - Creates a :doc:`uproot.sink.file.FileSink` from a file-like object, such - as ``io.BytesIO``. The object must be readable, writable, and seekable - with ``"r+b"`` mode semantics. + Returns True if the file exists; False otherwise. """ - if uproot._util.is_file_like(obj, readable=True, writable=True, seekable=True): - self = cls(None) - self._file = obj - else: - raise TypeError( - """writable file can only be created from a file path or an object - - * that has 'read', 'write', 'seek', and 'tell' methods - * is 'readable() and writable() and seekable()'""" - ) - return self + fs, local_path = fsspec.core.url_to_fs(urlpath, **storage_options) + return fs.exists(local_path) @classmethod - def from_fsspec(cls, open_file) -> FileSink: - import fsspec + def _truncate_file(cls, urlpath: str, **storage_options) -> None: + """ + Args: + urlpath (str): A filesystem URL that specifies the file to truncate by fsspec. - if not isinstance(open_file, fsspec.core.OpenFile): - raise TypeError("""argument should be of type fsspec.core.OpenFile""") - self = cls(None) - self._fsspec_open_file = open_file - return self + Truncates the file to zero bytes. Creates parent directories if necessary. + """ + fs, local_path = fsspec.core.url_to_fs(urlpath, **storage_options) + parent_directory = fs.sep.join(local_path.split(fs.sep)[:-1]) + fs.mkdirs(parent_directory, exist_ok=True) + fs.touch(local_path, truncate=True) - def __init__(self, file_path: str | None): - self._file_path = file_path - self._file = None - self._fsspec_open_file = None + @property + def from_object(self) -> bool: + """ + True if constructed with a file-like object; False otherwise. + """ + return self._open_file is None @property def file_path(self) -> str | None: """ A path to the file, which is None if constructed with a file-like object. """ - return self._file_path + return self._open_file.path if self._open_file else None def _ensure(self): - if self._file: - return - - if self._fsspec_open_file: - self._file = self._fsspec_open_file.open() - else: - self._file = open(self._file_path, "r+b") + """ + Opens the file if it is not already open. + Sets the file's position to the beginning. + """ + if not self._file: + self._file = self._open_file.open() self._file.seek(0) @@ -95,14 +113,14 @@ def __setstate__(self, state): self.__dict__ = state self._file = None - def tell(self): + def tell(self) -> int: """ Calls the file or file-like object's ``tell`` method. """ self._ensure() return self._file.tell() - def flush(self): + def flush(self) -> None: """ Calls the file or file-like object's ``flush`` method. """ @@ -134,12 +152,9 @@ def __exit__(self, exception_type, exception_value, traceback): @property def in_path(self) -> str: - if self._file_path is None: - return "" - else: - return "\n\nin path: " + self._file_path + return f"\n\nin path: {self.file_path}" if self.file_path is not None else "" - def write(self, location, serialization): + def write(self, location: int, serialization) -> int: """ Args: location (int): Position in the file to write. @@ -150,23 +165,21 @@ def write(self, location, serialization): """ self._ensure() self._file.seek(location) - self._file.write(serialization) + return self._file.write(serialization) - def set_file_length(self, length): + def set_file_length(self, length: int): """ Sets the file's length to ``length``, truncating with zeros if necessary. Calls ``seek``, ``tell``, and possibly ``write``. """ self._ensure() - # self._file.truncate(length) - self._file.seek(0, os.SEEK_END) missing = length - self._file.tell() if missing > 0: self._file.write(b"\x00" * missing) - def read(self, location, num_bytes, insist=True) -> bytes: + def read(self, location: int, num_bytes: int, insist: bool = True) -> bytes: """ Args: location (int): Position in the file to read. diff --git a/src/uproot/version.py b/src/uproot/version.py index dfdb7f2ea..363472158 100644 --- a/src/uproot/version.py +++ b/src/uproot/version.py @@ -12,7 +12,7 @@ import re -__version__ = "5.2.0rc1" +__version__ = "5.2.0rc2" version = __version__ version_info = tuple(re.split(r"[-\.]", __version__)) diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index 870e50669..4d1d4b448 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -20,14 +20,12 @@ import datetime import itertools -import os import queue import sys import uuid from collections.abc import Mapping, MutableMapping from pathlib import Path from typing import IO -from urllib.parse import urlparse import uproot._util import uproot.compression @@ -40,7 +38,7 @@ from uproot._util import no_filter, no_rename -def create(file_path: str | IO, **options): +def create(file_path: str | Path | IO, **options): """ Args: file_path (str, ``pathlib.Path`` or file-like object): The filesystem path of the @@ -48,7 +46,7 @@ def create(file_path: str | IO, **options): options: See below. Opens a local file for writing. Like ROOT's ``"CREATE"`` option, this function - raises an error (``OSError``) if a file already exists at ``file_path``. + raises an error (``FileExistsError``) if a file already exists at ``file_path``. Returns a :doc:`uproot.writing.writable.WritableDirectory`. @@ -62,61 +60,23 @@ def create(file_path: str | IO, **options): the :doc:`uproot.writing.writable.WritableFile`. Default is ``uproot.ZLIB(1)``. See :doc:`uproot.writing.writable.WritableFile` for details on these options. + + Additional options are passed to as ``storage_options`` to the fsspec filesystem """ file_path = uproot._util.regularize_path(file_path) - if isinstance(file_path, str) and os.path.exists(file_path): - raise OSError( + storage_options = { + key: value for key, value in options.items() if key not in create.defaults + } + if isinstance(file_path, str) and uproot.sink.file.FileSink._file_exists( + file_path, **storage_options + ): + raise FileExistsError( "path exists and refusing to overwrite (use 'uproot.recreate' to " f"overwrite)\n\nfor path {file_path}" ) return recreate(file_path, **options) -def _sink_from_path( - file_path_or_object: str | Path | IO, **storage_options -) -> uproot.sink.file.FileSink: - if uproot._util.is_file_like(file_path_or_object): - return uproot.sink.file.FileSink.from_object(file_path_or_object) - - file_path = uproot._util.regularize_path(file_path_or_object) - file_path, obj = uproot._util.file_object_path_split(file_path) - if obj is not None: - raise ValueError(f"file path '{file_path}' cannot contain an object: {obj}") - - parsed_url = urlparse(file_path) - scheme = parsed_url.scheme - - if not scheme: - # no scheme, assume local file - if not os.path.exists(file_path): - # truncate the file - Path(file_path).parent.mkdir(parents=True, exist_ok=True) - with open(file_path, mode="wb"): - pass - return uproot.sink.file.FileSink(file_path) - - # use fsspec to open the file - try: - # TODO: remove try/except block when fsspec becomes a dependency - import fsspec - - # truncate the file if it doesn't exist (also create parent directories) - fs, local_path = fsspec.core.url_to_fs(file_path, **storage_options) - if not fs.exists(local_path): - parent_directory = fs.sep.join(local_path.split(fs.sep)[:-1]) - fs.mkdirs(parent_directory, exist_ok=True) - fs.touch(local_path, truncate=True) - - open_file = fsspec.open(file_path, mode="r+b", **storage_options) - return uproot.sink.file.FileSink.from_fsspec(open_file) - - except ImportError: - raise ImportError( - f"cannot open file path '{file_path}' with scheme '{scheme}' because " - "the fsspec package is not installed." - ) from None - - def recreate(file_path: str | Path | IO, **options): """ Args: @@ -139,13 +99,15 @@ def recreate(file_path: str | Path | IO, **options): the :doc:`uproot.writing.writable.WritableFile`. Default is ``uproot.ZLIB(1)``. See :doc:`uproot.writing.writable.WritableFile` for details on these options. + + Additional options are passed to as ``storage_options`` to the fsspec filesystem. """ + file_path = uproot._util.regularize_path(file_path) storage_options = { key: value for key, value in options.items() if key not in recreate.defaults } - sink = _sink_from_path(file_path, **storage_options) - + sink = uproot.sink.file.FileSink(file_path, **storage_options) compression = options.pop("compression", create.defaults["compression"]) initial_directory_bytes = options.pop( @@ -192,12 +154,15 @@ def update(file_path: str | Path | IO, **options): * uuid_function (callable; ``uuid.uuid1``) See :doc:`uproot.writing.writable.WritableFile` for details on these options. + + Additional options are passed to as ``storage_options`` to the fsspec filesystem """ + file_path = uproot._util.regularize_path(file_path) storage_options = { key: value for key, value in options.items() if key not in update.defaults } - sink = _sink_from_path(file_path, **storage_options) + sink = uproot.sink.file.FileSink(file_path, **storage_options) initial_directory_bytes = options.pop( "initial_directory_bytes", create.defaults["initial_directory_bytes"] diff --git a/tests/test_0325_fix_windows_file_uris.py b/tests/test_0325_fix_windows_file_uris.py deleted file mode 100644 index 39ddfeac2..000000000 --- a/tests/test_0325_fix_windows_file_uris.py +++ /dev/null @@ -1,91 +0,0 @@ -# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE - -import numpy as np -import pytest - -import uproot._util -import uproot.reading - - -@pytest.mark.skipif( - not uproot._util.win, reason="Drive letters only parsed on Windows." -) -def test_windows_drive_letters(): - assert ( - uproot._util.file_path_to_source_class( - "file:///g:/mydir/file.root", uproot.reading.open.defaults - )[1] - == "g:/mydir/file.root" - ) - - assert ( - uproot._util.file_path_to_source_class( - "file:/g:/mydir/file.root", uproot.reading.open.defaults - )[1] - == "g:/mydir/file.root" - ) - - assert ( - uproot._util.file_path_to_source_class( - "file:g:/mydir/file.root", uproot.reading.open.defaults - )[1] - == "g:/mydir/file.root" - ) - - assert ( - uproot._util.file_path_to_source_class( - "/g:/mydir/file.root", uproot.reading.open.defaults - )[1] - == "g:/mydir/file.root" - ) - - assert ( - uproot._util.file_path_to_source_class( - r"\g:/mydir/file.root", uproot.reading.open.defaults - )[1] - == "g:/mydir/file.root" - ) - - assert ( - uproot._util.file_path_to_source_class( - r"g:\mydir\file.root", uproot.reading.open.defaults - )[1] - == r"g:\mydir\file.root" - ) - - assert ( - uproot._util.file_path_to_source_class( - r"\g:\mydir\file.root", uproot.reading.open.defaults - )[1] - == r"g:\mydir\file.root" - ) - - -def test_escaped_uri_codes(): - # If they're file:// paths, yes we should unquote the % signs. - assert ( - uproot._util.file_path_to_source_class( - "file:///my%20file.root", uproot.reading.open.defaults - )[1] - == "/my file.root" - ) - assert ( - uproot._util.file_path_to_source_class( - "file:///my%E2%80%92file.root", uproot.reading.open.defaults - )[1] - == "/my\u2012file.root" - ) - - # Otherwise, no we should not. - assert ( - uproot._util.file_path_to_source_class( - "/my%20file.root", uproot.reading.open.defaults - )[1] - == "/my%20file.root" - ) - assert ( - uproot._util.file_path_to_source_class( - "/my%E2%80%92file.root", uproot.reading.open.defaults - )[1] - == "/my%E2%80%92file.root" - ) diff --git a/tests/test_0692_fsspec_reading.py b/tests/test_0692_fsspec_reading.py index 052d916c6..b24921d80 100644 --- a/tests/test_0692_fsspec_reading.py +++ b/tests/test_0692_fsspec_reading.py @@ -7,6 +7,7 @@ import uproot.source.xrootd import uproot.source.s3 +from typing import BinaryIO import skhep_testdata import queue import fsspec @@ -15,6 +16,35 @@ import sys +@pytest.mark.parametrize( + "urlpath, source_class", + [ + ("file.root", uproot.source.fsspec.FSSpecSource), + ("s3://path/file.root", uproot.source.fsspec.FSSpecSource), + (r"C:\path\file.root", uproot.source.fsspec.FSSpecSource), + (r"file://C:\path\file.root", uproot.source.fsspec.FSSpecSource), + ("root://file.root", uproot.source.fsspec.FSSpecSource), + (BinaryIO(), uproot.source.object.ObjectSource), + ], +) +def test_default_source(urlpath, source_class): + assert uproot._util.file_path_to_source_class( + urlpath, options=uproot.reading.open.defaults + ) == (source_class, urlpath) + + +@pytest.mark.parametrize( + "to_open, handler", + [ + ("file.root", "invalid_handler"), + (BinaryIO(), uproot.source.fsspec.FSSpecSource), + ], +) +def test_invalid_handler(to_open, handler): + with pytest.raises(TypeError): + uproot.open(to_open, handler=handler) + + def test_open_fsspec_http(server): pytest.importorskip("aiohttp") diff --git a/tests/test_0692_fsspec_writing.py b/tests/test_0692_fsspec_writing.py index 5cb73b1ef..3fac30c2f 100644 --- a/tests/test_0692_fsspec_writing.py +++ b/tests/test_0692_fsspec_writing.py @@ -31,6 +31,52 @@ def test_fsspec_writing_local(tmp_path, scheme): assert f["tree"]["x"].array().tolist() == [1, 2, 3] +# https://github.com/scikit-hep/uproot5/issues/325 +@pytest.mark.parametrize( + "scheme", + [ + "", + "file:", # this is not a valid schema, but we should support it + "file://", + "simplecache::file://", + ], +) +@pytest.mark.parametrize( + "filename", ["file.root", "file%2Eroot", "my%E2%80%92file.root", "my%20file.root"] +) +@pytest.mark.parametrize( + "slash_prefix", + ["", "/", "\\"], +) +def test_fsspec_writing_local_uri(tmp_path, scheme, slash_prefix, filename): + uri = scheme + slash_prefix + os.path.join(tmp_path, "some", "path", filename) + print(uri) + with uproot.create(uri) as f: + f["tree"] = {"x": np.array([1, 2, 3])} + with uproot.open(uri) as f: + assert f["tree"]["x"].array().tolist() == [1, 2, 3] + + +@pytest.mark.parametrize( + "scheme", + [ + "", + "file://", + "simplecache::file://", + # "memory://", # uncomment when https://github.com/fsspec/filesystem_spec/pull/1426 is available in pypi + "simplecache::memory://", + ], +) +def test_fsspec_writing_create(tmp_path, scheme): + uri = scheme + os.path.join(tmp_path, "some", "path", "file.root") + with uproot.create(uri) as f: + f["tree"] = {"x": np.array([1, 2, 3])} + + with pytest.raises(FileExistsError): + with uproot.create(uri): + pass + + def test_issue_1029(tmp_path): # https://github.com/scikit-hep/uproot5/issues/1029 urlpath = os.path.join(tmp_path, "some", "path", "file.root") diff --git a/tests/test_0976_path_object_split.py b/tests/test_0976_path_object_split.py index 84043f8c7..be6100e01 100644 --- a/tests/test_0976_path_object_split.py +++ b/tests/test_0976_path_object_split.py @@ -162,6 +162,13 @@ None, ), ), + ( + r"C:\tmp\test\dir\my%20file.root:Dir/Test", + ( + r"C:\tmp\test\dir\my%20file.root", + "Dir/Test", + ), + ), ], ) def test_url_split(input_value, expected_output):