From 8d458b5039150d4da402caf912703329e0d84efc Mon Sep 17 00:00:00 2001 From: Henri Payno Date: Mon, 12 Sep 2022 14:19:15 +0200 Subject: [PATCH] NXtomo: improvement saving data as a virtual dataset /close #118 --- nxtomomill/nexus/nxobject.py | 76 +++++++++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 18 deletions(-) diff --git a/nxtomomill/nexus/nxobject.py b/nxtomomill/nexus/nxobject.py index 3d34d23..d115ea6 100644 --- a/nxtomomill/nexus/nxobject.py +++ b/nxtomomill/nexus/nxobject.py @@ -33,10 +33,14 @@ from typing import Optional from silx.io.url import DataUrl from tomoscan.esrf.scan.utils import cwd_context from tomoscan.unitsystem import Unit +from tomoscan.io import HDF5File from tomoscan.nexus.paths.nxtomo import LATEST_VERSION as LATEST_NXTOMO_VERSION from silx.io.dictdump import dicttonx import h5py import os +from nxtomomill.utils.file_path import to_target_rel_path + +from nxtomomill.utils.h5pyutils import from_data_url_to_virtual_source class ElementWithUnit: @@ -228,27 +232,63 @@ class NXobject: dataset_path, v_sources_or_data_urls, ) in datasets_to_handle_in_postprocessing.items(): - for v_source_or_data_url in v_sources_or_data_urls: + data_type = None + vs_shape = None + n_frames = 0 + + v_sources_to_handle_in_postprocessing = [] + # convert DataUrl to VirtualSource + dataset_keys = v_sources_or_data_urls + for v_source_or_data_url in dataset_keys: if isinstance(v_source_or_data_url, DataUrl): - data = DataUrl( - file_path=v_source_or_data_url.file_path(), - data_path=v_source_or_data_url.data_path(), - scheme=v_source_or_data_url.scheme(), - data_slice=v_source_or_data_url.data_slice(), - ) + vs = from_data_url_to_virtual_source(v_source_or_data_url)[0] else: - data = v_source_or_data_url - from nxtomomill.utils.frameappender import ( - FrameAppender, - ) # avoid cyclic import - - frame_appender = FrameAppender( - data=data, - file_path=file_path, - data_path="/".join([data_path, dataset_path]), - where="end", + assert isinstance( + v_source_or_data_url, h5py.VirtualSource + ), "v_source_or_data_url is not a DataUrl or a VirtualSource" + vs = v_source_or_data_url + + if data_type is None: + data_type = vs.dtype + elif vs.dtype != data_type: + raise TypeError( + f"Virtual sources have incoherent data types (found {data_type} and {vs.dtype})" + ) + + if not len(vs.maxshape) == 3: + raise ValueError( + f"Virtual sources are expected to be 3D. {len(vs.maxshape)} found" + ) + if vs_shape is None: + vs_shape = vs.maxshape[1:] + elif vs_shape != vs.maxshape[1:]: + raise ValueError( + f"Virtual sources are expected to have same frame dimensions. found {vs_shape} and {vs.maxshape[1:]}" + ) + n_frames += vs.maxshape[0] + vs.path = to_target_rel_path(vs.path, file_path) + v_sources_to_handle_in_postprocessing.append(vs) + + if n_frames == 0: + # in the case there is no frame to be saved + return + + vs_shape = [ + n_frames, + ] + list(vs_shape) + layout = h5py.VirtualLayout(shape=tuple(vs_shape), dtype=data_type) + # fill virtual dataset + loc_pointer = 0 + for v_source in v_sources_to_handle_in_postprocessing: + layout[ + loc_pointer : (loc_pointer + v_source.maxshape[0]) + ] = v_source + loc_pointer += v_source.maxshape[0] + + with HDF5File(file_path, mode="a") as h5s: + h5s.create_virtual_dataset( + "/".join([data_path, dataset_path]), layout ) - frame_appender.process() # write attributes of dataset defined from a list of DataUrl or VirtualSource assert os.path.exists(file_path) -- GitLab