Source code for ome_zarr.format
"""Hierarchy of format OME-Zarr implementations."""
import logging
from abc import ABC, abstractmethod
from collections.abc import Iterator, Mapping
from typing import Any
from zarr.storage import FsspecStore, LocalStore
LOGGER = logging.getLogger("ome_zarr.format")
def format_from_version(version: str) -> "Format":
for fmt in format_implementations():
# Support floating-point versions like `0.2`
if isinstance(version, float):
version = str(version)
if fmt.version == version:
return fmt
raise ValueError(f"Version {version} not recognized")
[docs]
def format_implementations() -> Iterator["Format"]:
"""
Return an instance of each format implementation, newest to oldest.
"""
yield FormatV05()
yield FormatV04()
yield FormatV03()
yield FormatV02()
yield FormatV01()
[docs]
def detect_format(metadata: dict, default: "Format") -> "Format":
"""
Give each format implementation a chance to take ownership of the
given metadata. If none matches, the default value will be returned.
"""
if metadata:
for fmt in format_implementations():
if fmt.matches(metadata):
return fmt
return default
[docs]
class Format(ABC):
"""
Abstract base class for format implementations.
"""
@property
@abstractmethod
def version(self) -> str: # pragma: no cover
raise NotImplementedError()
@property
@abstractmethod
def zarr_format(self) -> int: # pragma: no cover
raise NotImplementedError()
@property
@abstractmethod
def chunk_key_encoding(self) -> dict[str, str]: # pragma: no cover
raise NotImplementedError()
@abstractmethod
def matches(self, metadata: dict) -> bool: # pragma: no cover
raise NotImplementedError()
@abstractmethod
def init_store(self, path: str, mode: str = "r") -> FsspecStore | LocalStore:
raise NotImplementedError()
# @abstractmethod
def init_channels(self) -> None: # pragma: no cover
raise NotImplementedError()
def _get_metadata_version(self, metadata: dict) -> str | None:
"""
Checks the metadata dict for a version
Returns the version of the first object found in the metadata,
checking for 'multiscales', 'plate', 'well' etc
"""
multiscales = metadata.get("multiscales", [])
if multiscales:
dataset = multiscales[0]
return dataset.get("version", None)
for name in ["plate", "well", "image-label"]:
obj = metadata.get(name)
if obj:
return obj.get("version", None)
return None
def __repr__(self) -> str:
return self.__class__.__name__
def __eq__(self, other: object) -> bool:
return self.__class__ == other.__class__
@abstractmethod
def generate_well_dict(
self, well: str, rows: list[str], columns: list[str]
) -> dict: # pragma: no cover
raise NotImplementedError()
@abstractmethod
def validate_well_dict(
self, well: dict, rows: list[str], columns: list[str]
) -> None: # pragma: no cover
raise NotImplementedError()
@abstractmethod
def generate_coordinate_transformations(
self, shapes: list[tuple]
) -> list[list[dict[str, Any]]] | None: # pragma: no cover
raise NotImplementedError()
@abstractmethod
def validate_coordinate_transformations(
self,
ndim: int,
nlevels: int,
coordinate_transformations: list[list[dict[str, Any]]] | None = None,
) -> list[list[dict[str, Any]]] | None: # pragma: no cover
raise NotImplementedError()
[docs]
class FormatV01(Format):
"""
Initial format. (2020)
"""
REQUIRED_PLATE_WELL_KEYS: Mapping[str, type] = {"path": str}
@property
def version(self) -> str:
return "0.1"
@property
def zarr_format(self) -> int:
return 2
@property
def chunk_key_encoding(self) -> dict[str, str]:
return {"name": "v2", "separator": "."}
def matches(self, metadata: dict) -> bool:
version = self._get_metadata_version(metadata)
LOGGER.debug("%s matches %s?", self.version, version)
return version == self.version
[docs]
def init_store(self, path: str, mode: str = "r") -> FsspecStore | LocalStore:
"""
Not ideal. Stores should remain hidden
"dimension_separator" is specified at array creation time
"""
read_only = mode == "r"
if path.startswith(("http", "s3")):
store = FsspecStore.from_url(
path,
storage_options=None,
read_only=read_only,
)
else:
# No other kwargs supported
store = LocalStore(path, read_only=read_only)
LOGGER.debug("Created nested FsspecStore(%s, %s)", path, mode)
return store
def generate_well_dict(
self, well: str, rows: list[str], columns: list[str]
) -> dict:
return {"path": str(well)}
def validate_well_dict(
self, well: dict, rows: list[str], columns: list[str]
) -> None:
if any(e not in self.REQUIRED_PLATE_WELL_KEYS for e in well):
LOGGER.debug("%s contains unspecified keys", well)
for key, key_type in self.REQUIRED_PLATE_WELL_KEYS.items():
if key not in well:
raise ValueError(
"%s must contain a %s key of type %s", well, key, key_type
)
if not isinstance(well[key], key_type):
raise ValueError("%s path must be of %s type", well, key_type)
def generate_coordinate_transformations(
self, shapes: list[tuple]
) -> list[list[dict[str, Any]]] | None:
return None
def validate_coordinate_transformations(
self,
ndim: int,
nlevels: int,
coordinate_transformations: list[list[dict[str, Any]]] | None = None,
) -> None:
return None
[docs]
class FormatV02(FormatV01):
"""
Changelog: move to nested storage (April 2021)
"""
@property
def version(self) -> str:
return "0.2"
@property
def chunk_key_encoding(self) -> dict[str, str]:
return {"name": "v2", "separator": "/"}
[docs]
class FormatV03(FormatV02): # inherits from V02 to avoid code duplication
"""
Changelog: variable number of dimensions (up to 5),
introduce axes field in multiscales (June 2021)
"""
@property
def version(self) -> str:
return "0.3"
[docs]
class FormatV04(FormatV03):
"""
Changelog: axes is list of dicts,
introduce coordinate_transformations in multiscales (Nov 2021)
"""
REQUIRED_PLATE_WELL_KEYS: Mapping[str, type] = {
"path": str,
"rowIndex": int,
"columnIndex": int,
}
@property
def version(self) -> str:
return "0.4"
def generate_well_dict(
self, well: str, rows: list[str], columns: list[str]
) -> dict:
row, column = well.split("/")
if row not in rows:
raise ValueError("%s is not defined in the list of rows", row)
rowIndex = rows.index(row)
if column not in columns:
raise ValueError("%s is not defined in the list of columns", column)
columnIndex = columns.index(column)
return {"path": str(well), "rowIndex": rowIndex, "columnIndex": columnIndex}
def validate_well_dict(
self, well: dict, rows: list[str], columns: list[str]
) -> None:
super().validate_well_dict(well, rows, columns)
if len(well["path"].split("/")) != 2:
raise ValueError("%s path must exactly be composed of 2 groups", well)
row, column = well["path"].split("/")
if row not in rows:
raise ValueError("%s is not defined in the plate rows", row)
if well["rowIndex"] != rows.index(row):
raise ValueError("Mismatching row index for %s", well)
if column not in columns:
raise ValueError("%s is not defined in the plate columns", column)
if well["columnIndex"] != columns.index(column):
raise ValueError("Mismatching column index for %s", well)
def generate_coordinate_transformations(
self, shapes: list[tuple]
) -> list[list[dict[str, Any]]] | None:
data_shape = shapes[0]
coordinate_transformations: list[list[dict[str, Any]]] = []
# calculate minimal 'scale' transform based on pyramid dims
for shape in shapes:
assert len(shape) == len(data_shape)
scale = [full / level for full, level in zip(data_shape, shape)]
coordinate_transformations.append([{"type": "scale", "scale": scale}])
return coordinate_transformations
[docs]
def validate_coordinate_transformations(
self,
ndim: int,
nlevels: int,
coordinate_transformations: list[list[dict[str, Any]]] | None = None,
) -> None:
"""
Validates that a list of dicts contains a 'scale' transformation
Raises ValueError if no 'scale' found or doesn't match ndim.
:param ndim: Number of image dimensions.
"""
if coordinate_transformations is None:
raise ValueError("coordinate_transformations must be provided")
ct_count = len(coordinate_transformations)
if ct_count != nlevels:
raise ValueError(
f"coordinate_transformations count: {ct_count} must match "
f"datasets {nlevels}"
)
for transformations in coordinate_transformations:
assert isinstance(transformations, list)
types = [t.get("type", None) for t in transformations]
if any(t is None for t in types):
raise ValueError(f"Missing type in: {transformations}")
# validate scales...
if sum(t == "scale" for t in types) != 1:
raise ValueError(
"Must supply 1 'scale' item in coordinate_transformations"
)
# first transformation must be scale
if types[0] != "scale":
raise ValueError("First coordinate_transformations must be 'scale'")
first = transformations[0]
if "scale" not in transformations[0]:
raise ValueError(f"Missing scale argument in: {first}")
scale = first["scale"]
if len(scale) != ndim:
raise ValueError(
f"'scale' list {scale} must match "
f"number of image dimensions: {ndim}"
)
for value in scale:
if not isinstance(value, (float, int)):
raise ValueError(f"'scale' values must all be numbers: {scale}")
# validate translations...
translation_types = [t == "translation" for t in types]
if sum(translation_types) > 1:
raise ValueError(
"Must supply 0 or 1 'translation' item in"
"coordinate_transformations"
)
elif sum(translation_types) == 1:
transformation = transformations[types.index("translation")]
if "translation" not in transformation:
raise ValueError(f"Missing scale argument in: {first}")
translation = transformation["translation"]
if len(translation) != ndim:
raise ValueError(
f"'translation' list {translation} must match "
f"image dimensions count: {ndim}"
)
for value in translation:
if not isinstance(value, (float, int)):
raise ValueError(
f"'translation' values must all be numbers: {translation}"
)
[docs]
class FormatV05(FormatV04):
"""
Changelog: added FormatV05 (May 2025): writing not supported yet
"""
@property
def version(self) -> str:
return "0.5"
@property
def zarr_format(self) -> int:
return 3
@property
def chunk_key_encoding(self) -> dict[str, str]:
# this is default for Zarr v3. Could return None?
return {"name": "default", "separator": "/"}
CurrentFormat = FormatV05