From ab74951814a2494774f855450838d0ca547c9be3 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Thu, 24 Sep 2020 10:06:38 +0200 Subject: [PATCH 1/4] Add ZSplitter --- nabu/resources/cli/nx_z_splitter.py | 153 ++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 nabu/resources/cli/nx_z_splitter.py diff --git a/nabu/resources/cli/nx_z_splitter.py b/nabu/resources/cli/nx_z_splitter.py new file mode 100644 index 00000000..a24fa8ab --- /dev/null +++ b/nabu/resources/cli/nx_z_splitter.py @@ -0,0 +1,153 @@ +import warnings +from shutil import copy as copy_file +from os import path +from h5py import VirtualSource, VirtualLayout +from tomoscan.io import HDF5File +from nabu.resources.logger import Logger, LoggerOrPrint +from .cli_configs import ZSplitConfig +from .utils import parse_params_values + + +warnings.warn( + "This command-line utility is intended as a temporary solution. Please do not rely too much on it.", + Warning +) + + +def _get_first_entry(fname): + with HDF5File(fname, "r") as fid: + entry = list(fid.keys())[0] + return entry + + +def _get_z_translations(fname, entry): + z_path = path.join(entry, "sample", "z_translation") + with HDF5File(fname, "r") as fid: + z_transl = fid[z_path][:] + return z_transl + + +class NXZSplitter: + def __init__(self, fname, output_dir, n_stages=None, entry=None, logger=None, use_virtual_dataset=False): + self.fname = fname + self._ext = path.splitext(fname)[-1] + self.output_dir = output_dir + self.n_stages = n_stages + if entry is None: + entry = _get_first_entry(fname) + self.entry = entry + self.logger = LoggerOrPrint(logger) + self.use_virtual_dataset = use_virtual_dataset + + + def _patch_nx_file(self, fname, mask): + orig_fname = self.fname + detector_path = path.join(self.entry, "instrument", "detector") + sample_path = path.join(self.entry, "sample") + with HDF5File(fname, "a") as fid: + def patch_nx_entry(name): + newval = fid[name][mask] + del fid[name] + fid[name] = newval + detector_entries = [ + path.join(detector_path, what) + for what in [ + "count_time", "image_key", "image_key_control" + ] + ] + sample_entries = [ + path.join(sample_path, what) + for what in [ + "rotation_angle", "x_translation", "y_translation", "z_translation" + ] + ] + for what in detector_entries + sample_entries: + self.logger.debug("Patching %s" % what) + patch_nx_entry(what) + # Patch "data" using a virtual dataset + self.logger.debug("Patching data") + data_path = path.join(detector_path, "data") + if self.use_virtual_dataset: + data_shape = fid[data_path].shape + data_dtype = fid[data_path].dtype + new_data_shape = (int(mask.sum()), ) + data_shape[1:] + vlayout = VirtualLayout( + shape=new_data_shape, dtype=data_dtype + ) + vsource = VirtualSource( + orig_fname, name=data_path, shape=data_shape, dtype=data_dtype + ) + vlayout[:] = vsource[mask, :, :] + del fid[data_path] + fid[detector_path].create_virtual_dataset("data", vlayout) + + if not(self.use_virtual_dataset): + data_path = path.join(self.entry, "instrument", "detector", "data") + with HDF5File(orig_fname, "r") as fid: + data_arr = fid[data_path][mask, :, :] # Actually load data. Heavy ! + with HDF5File(fname, "a") as fid: + del fid[data_path] + fid[data_path] = data_arr + + + def z_split(self): + """ + Split a HDF5-NX file according to different z_translation. + + Parameters + ---------- + entry: str, optional + HDF5 entry. By default, the first entry is taken. + n_stages: int, optional + Number of expected different "z". + """ + z_transl = _get_z_translations(self.fname, self.entry) + different_z = set(z_transl) + self.logger.info( + "Detected %d different z values: %s" % (len(different_z), str(different_z)) + ) + if self.n_stages is not None and n_stages != len(different_z): + raise ValueError( + "Expected %d different stages, but I detected %d" + % (n_stages, len(different_z)) + ) + masks = [(z_transl == z) for z in different_z] + for i_z, mask in enumerate(masks): + fname_curr_z = path.join( + self.output_dir, + path.splitext(path.basename(self.fname))[0] + str("_%04d" % i_z) + self._ext + ) + self.logger.info("Creating %s" % fname_curr_z) + copy_file(self.fname, fname_curr_z) + self._patch_nx_file(fname_curr_z, mask) + + +def zsplit(): + # Parse arguments + args = parse_params_values( + ZSplitConfig, + parser_description="Split a HDF5-Nexus file according to z translation (z-series)" + ) + # Sanitize arguments + fname = args["input_file"] + output_dir = args["output_directory"] + loglevel = args["loglevel"].upper() + entry = args["entry"] + if len(entry) == 0: + entry = None + n_stages = args["n_stages"] + if n_stages < 0: + n_stages = None + use_virtual_dataset = bool(args["use_virtual_dataset"]) + # Instantiate and execute + logger = Logger("NX_z-splitter", level=loglevel, logfile="nxzsplit.log") + nx_splitter = NXZSplitter( + fname, output_dir, + n_stages=n_stages, entry=entry, logger=logger, + use_virtual_dataset=use_virtual_dataset + ) + nx_splitter.z_split() + + +if __name__ == "__main__": + zsplit() -- GitLab From cb8b472b225c708a91856dfcd4ddbcb1647f8a98 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Thu, 24 Sep 2020 10:07:01 +0200 Subject: [PATCH 2/4] Add Z splitter CLI tool --- nabu/resources/cli/cli_configs.py | 35 +++++++++++++++++++++++++++---- setup.py | 1 + 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/nabu/resources/cli/cli_configs.py b/nabu/resources/cli/cli_configs.py index 86bb5c0e..646b7e73 100644 --- a/nabu/resources/cli/cli_configs.py +++ b/nabu/resources/cli/cli_configs.py @@ -3,7 +3,6 @@ # # Default configuration for "bootstrap" command - BootstrapConfig = { "bootstrap": { "help": "Bootstrap a configuration file from scratch.", @@ -35,7 +34,6 @@ BootstrapConfig = { # Default configuration for "validate" command - ValidateConfig = { "input_file": { "help": "Nabu input file", @@ -44,8 +42,38 @@ ValidateConfig = { } -# Default configuration for "reconstruct" command +# Default configuration for "zsplit" command +ZSplitConfig = { + "input_file": { + "help": "Input HDF5-Nexus file", + "mandatory": True, + }, + "output_directory": { + "help": "Output directory to write split files.", + "mandatory": True, + }, + "loglevel": { + "help": "Logging level. Can be 'debug', 'info', 'warning', 'error'. Default is 'info'.", + "default": "info", + }, + "entry": { + "help": "HDF5 entry to take in the input file. By default, the first entry is taken.", + "default": "", + }, + "n_stages": { + "help": "Number of expected stages (i.e different 'Z' values). By default it is inferred from the dataset.", + "default": -1, + "type": int, + }, + "use_virtual_dataset": { + "help": "Whether to use virtual datasets for output file. Not using a virtual dataset duplicates data and thus results in big files ! However virtual datasets currently have performance issues. Default is False", + "default": 0, + "type": int, + }, +} + +# Default configuration for "reconstruct" command ReconstructConfig = { "input_file": { "help": "Nabu input file", @@ -104,4 +132,3 @@ ReconstructConfig = { "type": int, }, } - diff --git a/setup.py b/setup.py index 442dd3b5..7daf9bb0 100644 --- a/setup.py +++ b/setup.py @@ -72,6 +72,7 @@ def setup_package(): 'console_scripts': [ "nabu-test=nabu.tests:nabu_test", "nabu-config=nabu.resources.cli.bootstrap:bootstrap", + "nabu-zsplit=nabu.resources.cli.nx_z_splitter:zsplit", "nabu=nabu.resources.cli.reconstruct:main", ], }, -- GitLab From 87962ce4c33874e11fa13c710359ca5e82b97473 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Thu, 24 Sep 2020 13:34:55 +0200 Subject: [PATCH 3/4] Move get_first_entry to io.utils --- nabu/io/utils.py | 7 +++++++ nabu/resources/cli/nx_z_splitter.py | 12 +++--------- nabu/resources/utils.py | 1 + 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/nabu/io/utils.py b/nabu/io/utils.py index 03bce08d..ef367d47 100644 --- a/nabu/io/utils.py +++ b/nabu/io/utils.py @@ -1,5 +1,6 @@ import numpy as np from silx.io.url import DataUrl +from tomoscan.io import HDF5File def get_compacted_dataslices(urls, subsampling=None): @@ -96,3 +97,9 @@ def get_compacted_dataslices(urls, subsampling=None): next_pos = abs(-n_imgs % subsampling) return res + + +def get_first_hdf5_entry(fname): + with HDF5File(fname, "r") as fid: + entry = list(fid.keys())[0] + return entry diff --git a/nabu/resources/cli/nx_z_splitter.py b/nabu/resources/cli/nx_z_splitter.py index a24fa8ab..3cc83451 100644 --- a/nabu/resources/cli/nx_z_splitter.py +++ b/nabu/resources/cli/nx_z_splitter.py @@ -3,10 +3,10 @@ from shutil import copy as copy_file from os import path from h5py import VirtualSource, VirtualLayout from tomoscan.io import HDF5File -from nabu.resources.logger import Logger, LoggerOrPrint +from ..logger import Logger, LoggerOrPrint from .cli_configs import ZSplitConfig from .utils import parse_params_values - +from ...io.utils import get_first_hdf5_entry warnings.warn( "This command-line utility is intended as a temporary solution. Please do not rely too much on it.", @@ -14,12 +14,6 @@ warnings.warn( ) -def _get_first_entry(fname): - with HDF5File(fname, "r") as fid: - entry = list(fid.keys())[0] - return entry - - def _get_z_translations(fname, entry): z_path = path.join(entry, "sample", "z_translation") with HDF5File(fname, "r") as fid: @@ -34,7 +28,7 @@ class NXZSplitter: self.output_dir = output_dir self.n_stages = n_stages if entry is None: - entry = _get_first_entry(fname) + entry = get_first_hdf5_entry(fname) self.entry = entry self.logger = LoggerOrPrint(logger) self.use_virtual_dataset = use_virtual_dataset diff --git a/nabu/resources/utils.py b/nabu/resources/utils.py index 5461b759..17b95eb3 100644 --- a/nabu/resources/utils.py +++ b/nabu/resources/utils.py @@ -82,3 +82,4 @@ def get_threads_per_node(max_threads, is_percentage=True): def is_hdf5_extension(ext): return FileFormat.from_value(files_formats[ext]) == FileFormat.HDF5 + -- GitLab From 93cf6b885d1c72fdab98eb615f8b5a6b13e2daa7 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Thu, 24 Sep 2020 13:35:10 +0200 Subject: [PATCH 4/4] Abort splitting if n_z <= 1 --- nabu/resources/cli/nx_z_splitter.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/nabu/resources/cli/nx_z_splitter.py b/nabu/resources/cli/nx_z_splitter.py index 3cc83451..3799f133 100644 --- a/nabu/resources/cli/nx_z_splitter.py +++ b/nabu/resources/cli/nx_z_splitter.py @@ -97,13 +97,16 @@ class NXZSplitter: """ z_transl = _get_z_translations(self.fname, self.entry) different_z = set(z_transl) + n_z = len(different_z) self.logger.info( - "Detected %d different z values: %s" % (len(different_z), str(different_z)) + "Detected %d different z values: %s" % (n_z, str(different_z)) ) - if self.n_stages is not None and n_stages != len(different_z): + if n_z <= 1: + raise ValueError("Detected only %d z-value. Stopping." % n_z) + if self.n_stages is not None and n_stages != n_z: raise ValueError( "Expected %d different stages, but I detected %d" - % (n_stages, len(different_z)) + % (n_stages, n_z) ) masks = [(z_transl == z) for z in different_z] for i_z, mask in enumerate(masks): -- GitLab