diff --git a/nabu/io/utils.py b/nabu/io/utils.py index 03bce08d9bc272c66f5bf12c7219bd5be386b23b..ef367d47fa5cb7a659b3a0d616b82d4a9aa4ffc5 100644 --- a/nabu/io/utils.py +++ b/nabu/io/utils.py @@ -1,5 +1,6 @@ import numpy as np from silx.io.url import DataUrl +from tomoscan.io import HDF5File def get_compacted_dataslices(urls, subsampling=None): @@ -96,3 +97,9 @@ def get_compacted_dataslices(urls, subsampling=None): next_pos = abs(-n_imgs % subsampling) return res + + +def get_first_hdf5_entry(fname): + with HDF5File(fname, "r") as fid: + entry = list(fid.keys())[0] + return entry diff --git a/nabu/resources/cli/cli_configs.py b/nabu/resources/cli/cli_configs.py index 86bb5c0ed56ea37659b69efadaeda108f805343f..646b7e732a246b1d22d96078943905599a70e7f3 100644 --- a/nabu/resources/cli/cli_configs.py +++ b/nabu/resources/cli/cli_configs.py @@ -3,7 +3,6 @@ # # Default configuration for "bootstrap" command - BootstrapConfig = { "bootstrap": { "help": "Bootstrap a configuration file from scratch.", @@ -35,7 +34,6 @@ BootstrapConfig = { # Default configuration for "validate" command - ValidateConfig = { "input_file": { "help": "Nabu input file", @@ -44,8 +42,38 @@ ValidateConfig = { } -# Default configuration for "reconstruct" command +# Default configuration for "zsplit" command +ZSplitConfig = { + "input_file": { + "help": "Input HDF5-Nexus file", + "mandatory": True, + }, + "output_directory": { + "help": "Output directory to write split files.", + "mandatory": True, + }, + "loglevel": { + "help": "Logging level. Can be 'debug', 'info', 'warning', 'error'. Default is 'info'.", + "default": "info", + }, + "entry": { + "help": "HDF5 entry to take in the input file. By default, the first entry is taken.", + "default": "", + }, + "n_stages": { + "help": "Number of expected stages (i.e different 'Z' values). By default it is inferred from the dataset.", + "default": -1, + "type": int, + }, + "use_virtual_dataset": { + "help": "Whether to use virtual datasets for output file. Not using a virtual dataset duplicates data and thus results in big files ! However virtual datasets currently have performance issues. Default is False", + "default": 0, + "type": int, + }, +} + +# Default configuration for "reconstruct" command ReconstructConfig = { "input_file": { "help": "Nabu input file", @@ -104,4 +132,3 @@ ReconstructConfig = { "type": int, }, } - diff --git a/nabu/resources/cli/nx_z_splitter.py b/nabu/resources/cli/nx_z_splitter.py new file mode 100644 index 0000000000000000000000000000000000000000..3799f1333f141e09fbadfc80794de5af1d3b234a --- /dev/null +++ b/nabu/resources/cli/nx_z_splitter.py @@ -0,0 +1,150 @@ +import warnings +from shutil import copy as copy_file +from os import path +from h5py import VirtualSource, VirtualLayout +from tomoscan.io import HDF5File +from ..logger import Logger, LoggerOrPrint +from .cli_configs import ZSplitConfig +from .utils import parse_params_values +from ...io.utils import get_first_hdf5_entry + +warnings.warn( + "This command-line utility is intended as a temporary solution. Please do not rely too much on it.", + Warning +) + + +def _get_z_translations(fname, entry): + z_path = path.join(entry, "sample", "z_translation") + with HDF5File(fname, "r") as fid: + z_transl = fid[z_path][:] + return z_transl + + +class NXZSplitter: + def __init__(self, fname, output_dir, n_stages=None, entry=None, logger=None, use_virtual_dataset=False): + self.fname = fname + self._ext = path.splitext(fname)[-1] + self.output_dir = output_dir + self.n_stages = n_stages + if entry is None: + entry = get_first_hdf5_entry(fname) + self.entry = entry + self.logger = LoggerOrPrint(logger) + self.use_virtual_dataset = use_virtual_dataset + + + def _patch_nx_file(self, fname, mask): + orig_fname = self.fname + detector_path = path.join(self.entry, "instrument", "detector") + sample_path = path.join(self.entry, "sample") + with HDF5File(fname, "a") as fid: + def patch_nx_entry(name): + newval = fid[name][mask] + del fid[name] + fid[name] = newval + detector_entries = [ + path.join(detector_path, what) + for what in [ + "count_time", "image_key", "image_key_control" + ] + ] + sample_entries = [ + path.join(sample_path, what) + for what in [ + "rotation_angle", "x_translation", "y_translation", "z_translation" + ] + ] + for what in detector_entries + sample_entries: + self.logger.debug("Patching %s" % what) + patch_nx_entry(what) + # Patch "data" using a virtual dataset + self.logger.debug("Patching data") + data_path = path.join(detector_path, "data") + if self.use_virtual_dataset: + data_shape = fid[data_path].shape + data_dtype = fid[data_path].dtype + new_data_shape = (int(mask.sum()), ) + data_shape[1:] + vlayout = VirtualLayout( + shape=new_data_shape, dtype=data_dtype + ) + vsource = VirtualSource( + orig_fname, name=data_path, shape=data_shape, dtype=data_dtype + ) + vlayout[:] = vsource[mask, :, :] + del fid[data_path] + fid[detector_path].create_virtual_dataset("data", vlayout) + + if not(self.use_virtual_dataset): + data_path = path.join(self.entry, "instrument", "detector", "data") + with HDF5File(orig_fname, "r") as fid: + data_arr = fid[data_path][mask, :, :] # Actually load data. Heavy ! + with HDF5File(fname, "a") as fid: + del fid[data_path] + fid[data_path] = data_arr + + + def z_split(self): + """ + Split a HDF5-NX file according to different z_translation. + + Parameters + ---------- + entry: str, optional + HDF5 entry. By default, the first entry is taken. + n_stages: int, optional + Number of expected different "z". + """ + z_transl = _get_z_translations(self.fname, self.entry) + different_z = set(z_transl) + n_z = len(different_z) + self.logger.info( + "Detected %d different z values: %s" % (n_z, str(different_z)) + ) + if n_z <= 1: + raise ValueError("Detected only %d z-value. Stopping." % n_z) + if self.n_stages is not None and n_stages != n_z: + raise ValueError( + "Expected %d different stages, but I detected %d" + % (n_stages, n_z) + ) + masks = [(z_transl == z) for z in different_z] + for i_z, mask in enumerate(masks): + fname_curr_z = path.join( + self.output_dir, + path.splitext(path.basename(self.fname))[0] + str("_%04d" % i_z) + self._ext + ) + self.logger.info("Creating %s" % fname_curr_z) + copy_file(self.fname, fname_curr_z) + self._patch_nx_file(fname_curr_z, mask) + + +def zsplit(): + # Parse arguments + args = parse_params_values( + ZSplitConfig, + parser_description="Split a HDF5-Nexus file according to z translation (z-series)" + ) + # Sanitize arguments + fname = args["input_file"] + output_dir = args["output_directory"] + loglevel = args["loglevel"].upper() + entry = args["entry"] + if len(entry) == 0: + entry = None + n_stages = args["n_stages"] + if n_stages < 0: + n_stages = None + use_virtual_dataset = bool(args["use_virtual_dataset"]) + # Instantiate and execute + logger = Logger("NX_z-splitter", level=loglevel, logfile="nxzsplit.log") + nx_splitter = NXZSplitter( + fname, output_dir, + n_stages=n_stages, entry=entry, logger=logger, + use_virtual_dataset=use_virtual_dataset + ) + nx_splitter.z_split() + + +if __name__ == "__main__": + zsplit() diff --git a/nabu/resources/utils.py b/nabu/resources/utils.py index 5461b759207efda1b27744dce77b640d8be1162f..17b95eb3f827407269cfb7ce38c1cb8738b3be1a 100644 --- a/nabu/resources/utils.py +++ b/nabu/resources/utils.py @@ -82,3 +82,4 @@ def get_threads_per_node(max_threads, is_percentage=True): def is_hdf5_extension(ext): return FileFormat.from_value(files_formats[ext]) == FileFormat.HDF5 + diff --git a/setup.py b/setup.py index 442dd3b5a3e0153af6191653415ce3a60d8d7e46..7daf9bb096559ff89e443c3bbc0d701c2f21e8cf 100644 --- a/setup.py +++ b/setup.py @@ -72,6 +72,7 @@ def setup_package(): 'console_scripts': [ "nabu-test=nabu.tests:nabu_test", "nabu-config=nabu.resources.cli.bootstrap:bootstrap", + "nabu-zsplit=nabu.resources.cli.nx_z_splitter:zsplit", "nabu=nabu.resources.cli.reconstruct:main", ], },