"""Reading logic for ome-zarr.
Primary entry point is the :func:`~ome_zarr.io.parse_url` method.
"""
import logging
from pathlib import Path
from urllib.parse import urljoin
import dask.array as da
import zarr
from zarr.storage import FsspecStore, LocalStore, StoreLike
from .format import CurrentFormat, Format, detect_format
from .types import JSONDict
LOGGER = logging.getLogger("ome_zarr.io")
[docs]
class ZarrLocation:
"""
IO primitive for reading and writing Zarr data. Uses a store for all
data access.
No assumptions about the existence of the given path string are made.
Attempts are made to load various metadata files and cache them internally.
"""
def __init__(
self,
path: StoreLike,
mode: str = "r",
fmt: Format = CurrentFormat(),
) -> None:
LOGGER.debug("ZarrLocation.__init__ path: %s, fmt: %s", path, fmt.version)
self.__fmt = fmt
self.__mode = mode
if isinstance(path, Path):
self.__path = str(path.resolve())
elif isinstance(path, str):
self.__path = path
elif isinstance(path, FsspecStore):
self.__path = path.path
elif isinstance(path, LocalStore):
self.__path = str(path.root)
else:
raise TypeError(f"not expecting: {type(path)}")
loader = fmt
if loader is None:
loader = CurrentFormat()
self.__store: FsspecStore = (
path
if isinstance(path, (FsspecStore, LocalStore))
else loader.init_store(self.__path, mode)
)
self.__init_metadata()
detected = detect_format(self.__metadata, loader)
LOGGER.debug("ZarrLocation.__init__ %s detected: %s", path, detected)
if detected != fmt:
LOGGER.warning(
"version mismatch: detected: %s, requested: %s", detected, fmt
)
self.__fmt = detected
self.__store = detected.init_store(self.__path, mode)
self.__init_metadata()
def __init_metadata(self) -> None:
"""
Load the Zarr metadata files for the given location.
"""
self.zgroup: JSONDict = {}
self.zarray: JSONDict = {}
self.__metadata: JSONDict = {}
self.__exists: bool = True
# If we want to *create* a new zarr v2 group, we need to specify
# zarr_format. This is not needed for reading.
zarr_format = None
try:
# this group is used to get zgroup metadata
# used for info, download, Spec.match() via root_attrs() etc.
# and to check if the group exists for reading. Only need "r" mode for this.
group = zarr.open_group(
store=self.__store, path="/", mode="r", zarr_format=zarr_format
)
self.zgroup = group.attrs.asdict()
# For zarr v3, everything is under the "ome" namespace
if "ome" in self.zgroup:
self.zgroup = self.zgroup["ome"]
self.__metadata = self.zgroup
except (ValueError, FileNotFoundError):
# group doesn't exist. If we are in "w" mode, we need to create it.
if self.__mode == "w":
# If we are creating a new group, we need to specify the zarr_format.
zarr_format = self.__fmt.zarr_format
group = zarr.open_group(
store=self.__store, path="/", mode="w", zarr_format=zarr_format
)
else:
self.__exists = False
def __repr__(self) -> str:
"""Print the path as well as whether this is a group or an array."""
suffix = ""
if self.zgroup:
suffix += " [zgroup]"
if self.zarray:
suffix += " [zarray]"
return f"{self.subpath('')}{suffix}"
[docs]
def exists(self) -> bool:
"""Return true if either zgroup or zarray metadata exists."""
return self.__exists
@property
def fmt(self) -> Format:
return self.__fmt
@property
def mode(self) -> str:
return self.__mode
@property
def version(self) -> str:
"""Return the version of the OME-NGFF spec used for this location."""
return self.__fmt.version
@property
def path(self) -> str:
return self.__path
@property
def store(self) -> FsspecStore:
"""Return the initialized store for this location"""
assert self.__store is not None
return self.__store
@property
def root_attrs(self) -> JSONDict:
"""Return the contents of the zattrs file."""
return dict(self.__metadata)
[docs]
def load(self, subpath: str = "") -> da.core.Array:
"""Use dask.array.from_zarr to load the subpath."""
return da.from_zarr(self.__store, subpath)
def __eq__(self, rhs: object) -> bool:
if type(self) is not type(rhs):
return False
if not isinstance(rhs, ZarrLocation):
return False
return self.subpath() == rhs.subpath()
[docs]
def basename(self) -> str:
"""Return the last element of the underlying location.
>>> ZarrLocation("/tmp/foo").basename()
'foo'
>>> ZarrLocation("https://example.com/bar").basename()
'bar'
>>> ZarrLocation("https://example.com/baz/").basename()
'baz'
"""
path = (self.__path.endswith("/") and self.__path[0:-1]) or self.__path
return path.split("/")[-1]
# TODO: update to from __future__ import annotations with 3.7+
[docs]
def create(self, path: str) -> "ZarrLocation":
"""Create a new Zarr location for the given path."""
subpath = self.subpath(path)
LOGGER.debug("open(%s(%s))", self.__class__.__name__, subpath)
return self.__class__(subpath, mode=self.__mode, fmt=self.__fmt)
def parts(self) -> list[str]:
if self._isfile():
return list(Path(self.__path).parts)
else:
return self.__path.split("/")
def subpath(self, subpath: str = "") -> str:
if self._isfile():
filename = Path(self.__path) / subpath
filename = filename.resolve()
return str(filename)
elif self._ishttp():
url = str(self.__path)
if not url.endswith("/"):
url = f"{url}/"
return urljoin(url, subpath)
# Might require a warning
elif self.__path.endswith("/"):
return f"{self.__path}{subpath}"
else:
return f"{self.__path}/{subpath}"
def _isfile(self) -> bool:
"""
Return whether the current underlying implementation
points to a local file or not.
"""
return isinstance(self.__store, LocalStore)
def _ishttp(self) -> bool:
"""
Return whether the current underlying implementation
points to a URL
"""
if isinstance(self.__store.fs.protocol, tuple):
return any(proto in ["http", "https"] for proto in self.__store.fs.protocol)
return self.__store.fs.protocol in ["http", "https"]
[docs]
def parse_url(
path: Path | str, mode: str = "r", fmt: Format = CurrentFormat()
) -> ZarrLocation | None:
"""Convert a path string or URL to a ZarrLocation subclass.
:param path: Path to parse.
:param mode: Mode to open in.
:param fmt: Version of the OME-NGFF spec to open path with.
:return: `ZarrLocation`.
If mode is 'r', and the path does not exist returns None.
If there is an error opening the path, also returns None.
>>> parse_url('does-not-exist')
"""
loc = ZarrLocation(path, mode=mode, fmt=fmt)
if "r" in mode and not loc.exists():
return None
else:
return loc