Source code for ddm.data_handling.dask_image

# Copyright (c) 2017-2018, dask-image Developers (see AUTHORS.rst for details)
# All rights reserved.

import glob
import threading
import numbers
import warnings

from typing import Any

import dask.array as da
import numpy as np
import pims
from tifffile import natural_sorted


[docs]def read_data_into_dask(fname, nframes: int = 1, *, experiment: int = 0): """Read image data into a Dask Array. Provides a simple, fast mechanism to ingest image data into a Dask Array. Parameters ---------- fname : str or pathlib.Path A glob like string that may match one or multiple filenames. Where multiple filenames match, they are sorted using natural (as opposed to alphabetical) sort. nframes : int, optional Number of the frames to include in each chunk (default: 1). experiment : int, optional select experiment if image stack contains multiple measurement series (default: 0) Returns ------- array : dask.array.Array A Dask Array representing the contents of all image files. """ sfname = str(fname) if not isinstance(nframes, numbers.Integral): raise ValueError("`nframes` must be an integer.") if (nframes != -1) and not (nframes > 0): raise ValueError("`nframes` must be greater than zero.") arrayfunc = np.asanyarray with pims.open(sfname) as imgs: shape = (len(imgs),) + imgs.frame_shape dtype = np.dtype(imgs.pixel_type) if nframes == -1: nframes = shape[0] if nframes > shape[0]: warnings.warn( "`nframes` larger than number of frames in file." " Will truncate to number of frames in file.", RuntimeWarning, ) elif shape[0] % nframes != 0: warnings.warn( "`nframes` does not nicely divide number of frames in file." " Last chunk will contain the remainder.", RuntimeWarning, ) # Check experiment selection # place source filenames into dask array after sorting filenames = natural_sorted(glob.glob(sfname)) if len(filenames) > 1: ar = da.from_array(filenames, chunks=(nframes,)) multiple_files = True else: ar = da.from_array(filenames * shape[0], chunks=(nframes,)) multiple_files = False # read in data using encoded filenames dask_arr = ar.map_blocks( _map_read_frame, chunks=da.core.normalize_chunks((nframes,) + shape[1:], shape), multiple_files=multiple_files, new_axis=list(range(1, len(shape))), experiment=experiment, arrayfunc=arrayfunc, meta=arrayfunc([]).astype(dtype), # meta overwrites `dtype` argument ) return dask_arr
def _map_read_frame(x, multiple_files, block_info=None, **kwargs): fn = x[0] # get filename from input chunk if multiple_files: i, j = 0, 1 else: i, j = block_info[None]["array-location"][0] with threading.RLock(): return _read_frame(fn=fn, i=slice(i, j), **kwargs) def _read_frame(fn, i, *, arrayfunc=np.asanyarray, experiment=0): # with pims.Bioformats(fn, series=experiment, read_mode="jpype") as imgs: # with pims.Bioformats(fn) as imgs: with pims.open(fn) as imgs: data = arrayfunc(imgs[i]) data = data.copy() return data