diff --git a/nabu/app/fullfield.py b/nabu/app/fullfield.py index 5191dd26367d42e4b99ea4d2364dcf4a497b9162..3a56120c0223ca6892cfcbdd5a01a93afa84caaf 100644 --- a/nabu/app/fullfield.py +++ b/nabu/app/fullfield.py @@ -10,8 +10,9 @@ from ..preproc.double_flatfield import DoubleFlatField from ..preproc.phase import PaganinPhaseRetrieval from ..preproc.sinogram import SinoProcessing from ..misc.unsharp import UnsharpMask +from ..misc.histogram import PartialHistogram, hist_as_2Darray from ..resources.utils import is_hdf5_extension -# ~ from ..io.writer import Writers + class FullFieldPipeline: """ @@ -30,6 +31,7 @@ class FullFieldPipeline: SinoProcessingClass = SinoProcessing MLogClass = Log FBPClass = None # For now we don't have a plain python/numpy backend for reconstruction + HistogramClass = PartialHistogram def __init__(self, process_config, sub_region, logger=None, extra_options=None, phase_margin=None): """ @@ -180,9 +182,13 @@ class FullFieldPipeline: return n_recs - def _get_process_name(self): - # TODO support "incomplete" processing pipeline - return "reconstruction" + def _get_process_name(self, kind="reconstruction"): + # In the future, might be something like "reconstruction-" + if kind == "reconstruction": + return "reconstruction" + elif kind == "histogram": + return "histogram" + return kind def register_callback(self, step_name, callback): @@ -310,6 +316,7 @@ class FullFieldPipeline: self._init_sino_builder() self._prepare_reconstruction() self._init_reconstruction() + self._init_histogram() self._init_writer() @use_options("read_chunk", "chunk_reader") @@ -468,6 +475,13 @@ class FullFieldPipeline: if options["fbp_filter_type"] == "none": self.reconstruction.fbp = self.reconstruction.backproj + @use_options("histogram", "histogram") + def _init_histogram(self): + options = self.processing_options["histogram"] + self.histogram = PartialHistogram( + method="fixed_bins_number", num_bins=options["histogram_bins"] + ) + @use_options("save", "writer") def _init_writer(self): options = self.processing_options["save"] @@ -480,7 +494,8 @@ class FullFieldPipeline: ) nx_info = None - if is_hdf5_extension(options["file_format"]): + self._hdf5_output = is_hdf5_extension(options["file_format"]) + if self._hdf5_output: fname_start_index = None options["file_prefix"] = self._orig_file_prefix + str( "_%04d" % (self.z_min + self._phase_margin_up) @@ -574,7 +589,6 @@ class FullFieldPipeline: def _take_log(self): self.mlog.take_logarithm(self.radios) - @pipeline_step("radios_movements", "Applying radios movements") def _radios_movements(self, radios=None): if radios is None: @@ -604,6 +618,12 @@ class FullFieldPipeline: sinos[i], output=self.recs[i] ) + @pipeline_step("histogram", "Computing histogram") + def _compute_histogram(self, data=None): + if data is None: + data = self.recs + self.recs_histogram = self.histogram.compute_histogram(data.ravel()) + @pipeline_step("writer", "Saving data") def _write_data(self, data=None): if data is None: @@ -611,6 +631,17 @@ class FullFieldPipeline: self.writer.write(data, *self._writer_exec_args, **self._writer_exec_kwargs) self.logger.info("Wrote %s" % self.writer.get_filename()) self.processing_options["save"]["file_prefix"] = self._orig_file_prefix + if "histogram" in self.processing_steps and self._hdf5_output: + self.logger.info("Saving histogram") + self.writer.write( + hist_as_2Darray(self.recs_histogram), + self._get_process_name(kind="histogram"), + processing_index=self._writer_exec_kwargs["processing_index"]+1, + config={ + "file": path.basename(self.writer.get_filename()), + "bins": self.processing_options["histogram"]["histogram_bins"], + } + ) def _process_chunk(self): @@ -623,6 +654,7 @@ class FullFieldPipeline: self._radios_movements() self._build_sino() self._reconstruct() + self._compute_histogram() self._write_data() self._process_finalize() diff --git a/nabu/app/fullfield_cuda.py b/nabu/app/fullfield_cuda.py index 297e74d72d35e75adf412597d2311a1f4e1b5457..7a7e31fa6d988db4f0d4a62c89a9127b5efbb8ed 100644 --- a/nabu/app/fullfield_cuda.py +++ b/nabu/app/fullfield_cuda.py @@ -9,6 +9,7 @@ from ..preproc.phase_cuda import CudaPaganinPhaseRetrieval from ..preproc.sinogram_cuda import CudaSinoProcessing from ..preproc.sinogram import SinoProcessing from ..misc.unsharp_cuda import CudaUnsharpMask +from ..misc.histogram import PartialHistogram from ..reconstruction.fbp import Backprojector from ..cuda.utils import get_cuda_context, __has_pycuda__, __pycuda_error_msg__, copy_big_gpuarray, replace_array_memory from .fullfield import FullFieldPipeline @@ -31,6 +32,7 @@ class CudaFullFieldPipeline(FullFieldPipeline): SinoProcessingClass = CudaSinoProcessing MLogClass = CudaLog FBPClass = Backprojector + HistogramClass = PartialHistogram def __init__(self, process_config, sub_region, logger=None, extra_options=None, phase_margin=None, cuda_options=None): self._init_cuda(cuda_options) @@ -442,6 +444,7 @@ class CudaFullFieldPipelineLimitedMemory(CudaFullFieldPipeline): self._reconstruct(sinos=self._d_sinos) # Copy D2H self._d_recs[:transfer_size].get(ary=self._h_recs[start_idx:end_idx]) + self._compute_histogram(data=self._h_recs) self.logger.debug("End of processing steps on sinos") self._h_radios = self._orig_h_radios # Write diff --git a/nabu/app/local_reconstruction.py b/nabu/app/local_reconstruction.py index 3feaee6aab96d6aebbf1a3fa464ef8c61292ff7a..f74fd6f8761abc86fde48d4e0fc347aad8584742 100644 --- a/nabu/app/local_reconstruction.py +++ b/nabu/app/local_reconstruction.py @@ -2,13 +2,15 @@ from os.path import join, isfile, basename from math import ceil import gc from psutil import virtual_memory -from nabu.resources.logger import LoggerOrPrint -from ..io.writer import merge_hdf5_files +from silx.io import get_data +from silx.io.url import DataUrl +from ..resources.logger import LoggerOrPrint +from ..io.writer import merge_hdf5_files, NXProcessWriter from ..cuda.utils import collect_cuda_gpus from ..resources.computations import estimate_chunk_size from ..preproc.phase import compute_paganin_margin from ..app.fullfield_cuda import CudaFullFieldPipeline, CudaFullFieldPipelineLimitedMemory - +from ..misc.histogram import PartialHistogram, add_last_bin, hist_as_2Darray def get_gpus_ids(resources_cfg): gpus_ids = resources_cfg["gpu_id"] @@ -307,22 +309,76 @@ class LocalReconstruction: self.logger.error("No files to merge") return files.sort() - files = [join(out_cfg["file_prefix"], basename(fname)) for fname in files] + self._local_files = [ + join(out_cfg["file_prefix"], basename(fname)) + for fname in files + ] entry = getattr(self.process_config.dataset_infos.dataset_scanner, "entry", "entry") + # TODO "reconstruction" is hard-coded, might be problematic in the future h5_path = join(entry, *["reconstruction", "results", "data"]) process_name = "reconstruction" + # self.logger.info("Merging reconstructions to %s" % output_file) merge_hdf5_files( - files, h5_path, output_file, process_name, + self._local_files, h5_path, output_file, process_name, output_entry=entry, output_filemode="a", processing_index=0, config={ "reconstruction_stages": { - str(k): v for k, v in zip(self.results.keys(), files) + str(k): v for k, v in zip(self.results.keys(), self._local_files) }, "nabu_config": self.process_config.nabu_config }, base_dir=out_cfg["location"], overwrite=out_cfg["overwrite_results"] ) + # TODO dont hard-code process name + self.merge_histograms(entry, "histogram", output_file, entry, process_name) + + + def merge_histograms( + self, masterfile_entry, masterfile_process_name, + output_file, output_entry, output_process_name + ): + """ + Merge the partial histograms + """ + if not(self.process_config.nabu_config["postproc"]["output_histogram"]): + return + self.logger.info("Merging histograms") + + # + h5_path = join(masterfile_entry, *[masterfile_process_name, "results", "data"]) + # + files = sorted(self.results.values()) + data_urls = [] + for fname in files: + url = DataUrl( + file_path=fname, data_path=h5_path, data_slice=None, scheme="silx" + ) + data_urls.append(url) + histograms = [] + for data_url in data_urls: + h2D = get_data(data_url) + histograms.append( + (h2D[0], add_last_bin(h2D[1])) + ) + histograms_merger = PartialHistogram( # TODO configurable + method="fixed_bins_number", num_bins=histograms[0][0].size + ) + merged_hist = histograms_merger.merge_histograms(histograms) + + writer = NXProcessWriter( + output_file, entry=output_entry, filemode="a", overwrite=True + ) + writer.write( + hist_as_2Darray(merged_hist), + "histogram", # TODO don't hard-code + processing_index=1, + config={ + "files": self._local_files, + "bins": self.process_config.nabu_config["postproc"]["histogram_bins"], + } + ) + diff --git a/nabu/io/config.py b/nabu/io/config.py index ec98f95fd84c91ae66c0c7b52e08b0f1a1e225f3..ec8b5fbfe4528f2e6c99ce8565d0d40d8a226993 100644 --- a/nabu/io/config.py +++ b/nabu/io/config.py @@ -161,6 +161,12 @@ def validate_nabu_config(config): raise ValueError("Unknown option '%s' in section [%s]" % (key, section)) validator = nabu_config[section][key]["validator"] res_config[section][key] = validator(section, key, value) + # Handle sections missing in config + for section in (set(nabu_config.keys()) - set(res_config.keys())): + res_config[section] = _extract_nabuconfig_section(section) + for key, value in res_config[section].items(): + validator = nabu_config[section][key]["validator"] + res_config[section][key] = validator(section, key, value) return res_config diff --git a/nabu/io/reader.py b/nabu/io/reader.py index 1b7f1c148ba630c158a25aec527a9aeadad2ad4d..6310ae3928094e5f4bd447409fc2ea038dac0ebd 100644 --- a/nabu/io/reader.py +++ b/nabu/io/reader.py @@ -60,9 +60,6 @@ class Reader(object): pass - - - class NPReader(Reader): multi_load = True @@ -148,7 +145,6 @@ class EDFReader(Reader): return self.read(data_url.file_path()) - class HDF5Reader(Reader): multi_load = True @@ -190,7 +186,6 @@ class HDF5Reader(Reader): self.release() - Readers = { "edf": EDFReader, "hdf5": HDF5Reader, @@ -201,7 +196,6 @@ Readers = { } - class ChunkReader(object): """ A reader of chunk of images. @@ -450,4 +444,3 @@ class ChunkReader(object): self._loaded = True - diff --git a/nabu/io/writer.py b/nabu/io/writer.py index 1ed84e3fb7b63b541cd3b8f6664dbb60ccdc8ba3..2cd40d34b400e031c5304fd70ff4232b74461765 100644 --- a/nabu/io/writer.py +++ b/nabu/io/writer.py @@ -77,8 +77,9 @@ class NXProcessWriter(Writer): Dictionary containing the configuration. """ with HDF5File(self.fname, self._filemode, swmr=True) as fid: - if self.overwrite and self.data_path in fid: - del fid[self.data_path] + results_path = path.join(self.data_path, process_name) + if self.overwrite and results_path in fid: + del fid[results_path] nx_entry = fid.require_group(self.data_path) if "NX_class" not in nx_entry.attrs: nx_entry.attrs["NX_class"] = "NXentry" diff --git a/nabu/misc/histogram.py b/nabu/misc/histogram.py new file mode 100644 index 0000000000000000000000000000000000000000..6093d22e241d01865c962e6a4856c8e772c8052a --- /dev/null +++ b/nabu/misc/histogram.py @@ -0,0 +1,204 @@ +from math import log2 +import numpy as np +from ..utils import check_supported + + +class PartialHistogram: + """ + A class for computing histogram progressively. + + In certain cases, it is cumbersome to compute a histogram directly on a big chunk of + data (ex. data not fitting in memory, disk access too slow) while some parts of the + data are readily available in-memory. + """ + + histogram_methods = [ + "fixed_bins_width", + "fixed_bins_number" + ] + + bin_width_policies = [ + "uint16", + ] + + + def __init__(self, method="fixed_bins_width", bin_width="uint16", num_bins=None, min_bins=None): + """ + Initialize a PartialHistogram class. + + Parameters + ---------- + method: str, optional + Partial histogram computing method. Available are: + - `fixed_bins_width`: all the histograms are computed with the same bin + width. The class adapts to the data range and computes the number of + bins accordingly. + - `fixed_bins_number`: all the histograms are computed with the same + number of bins. The class adapts to the data range and computes the + bin width accordingly. + Default is "fixed_bins_width" + bin_width: str or float, optional + Policy for histogram bins when method="fixed_bins_width". Available are: + - "uint16": The bin width is computed so that floating-point elements + `f1` and `f2` satisfying `|f1 - f2| < bin_width` implies + `f1_converted - f2_converted < 1` once cast to uint16. + - A number: all the bins have this fixed width. + Default is "uint16" + num_bins: int, optional + Number of bins when method = 'fixed_bins_number'. + min_bins: int, optional + Minimum number of bins when method = 'fixed_bins_width'. + """ + check_supported(method, self.histogram_methods, "histogram computing method") + self.method = method + self._set_bin_width(bin_width) + self._set_num_bins(num_bins) + self.min_bins = min_bins + self._set_histogram_methods() + + + def _set_bin_width(self, bin_width): + if self.method == "fixed_bins_number": + self.bin_width = None + return + if isinstance(bin_width, str): + check_supported(bin_width, self.bin_width_policies, "bin width policy") + self._fixed_bw = False + else: + bin_width = float(bin_width) + self._fixed_bw = True + self.bin_width = bin_width + + + def _set_num_bins(self, num_bins): + if self.method == "fixed_bins_width": + self.num_bins = None + return + if self.method == "fixed_bins_number" and num_bins is None: + raise ValueError("Need to specify num_bins for method='fixed_bins_number'") + self.num_bins = int(num_bins) + + + def _set_histogram_methods(self): + self._histogram_methods = { + "fixed_bins_number": { + "compute": self._compute_histogram_fixed_nbins, + "merge": self._merge_histograms_fixed_nbins, + }, + "fixed_bins_width": { + "compute": self._compute_histogram_fixed_bw, + "merge": self._merge_histograms_fixed_bw, + }, + } + assert set(self._histogram_methods.keys()) == set(self.histogram_methods) + + + @staticmethod + def _get_histograms_and_bins(histograms, center=False): + histos = [h[0] for h in histograms] + if center: + bins = [0.5 * (h[1][1:] + h[1][:-1]) for h in histograms] + else: + bins = [h[1][:-1] for h in histograms] + return histos, bins + + # + # Histogram with fixed number of bins + # + + def _compute_histogram_fixed_nbins(self, data, data_range=None): + dmin, dmax = data.min(), data.max() if data_range is None else data_range + res = np.histogram(data, bins=self.num_bins) + return res + + + def _merge_histograms_fixed_nbins(self, histograms): + histos, bins = self._get_histograms_and_bins(histograms) + res = np.histogram( + np.hstack(bins), + weights=np.hstack(histos), + bins=self.num_bins, + ) + return res + + # + # Histogram with fixed bin width + # + + def _bin_width_u16(self, dmin, dmax): + return (dmax - dmin) / 65535. + + + def _bin_width_fixed(self, dmin, dmax): + return self.bin_width + + + def get_bin_width(self, dmin, dmax): + if self._fixed_bw: + return self._bin_width_fixed(dmin, dmax) + elif self.bin_width == "uint16": + return self._bin_width_u16(dmin, dmax) + else: + raise ValueError() + + + def _compute_histogram_fixed_bw(self, data, data_range=None): + dmin, dmax = data.min(), data.max() if data_range is None else data_range + min_bins = self.min_bins or 1 + bw_max = self.get_bin_width(dmin, dmax) + nbins = 0 + bw_factor = 1 + while nbins < min_bins: + bw = 2**round(log2(bw_max)) / bw_factor + nbins = int((dmax - dmin)/bw) + bw_factor *= 2 + res = np.histogram(data, bins=nbins) + return res + + + def _merge_histograms_fixed_bw(self, histograms): + histos, bins = self._get_histograms_and_bins(histograms, center=False) + dmax = max([b[-1] for b in bins]) + dmin = min([b[0] for b in bins]) + bw_max = max([b[1] - b[0] for b in bins]) + res = np.histogram( + np.hstack(bins), + weights=np.hstack(histos), + bins=int((dmax - dmin)/bw_max) + ) + return res + + # + # Dispatch methods + # + + def compute_histogram(self, data, data_range=None): + compute_hist_func = self._histogram_methods[self.method]["compute"] + return compute_hist_func(data, data_range=data_range) + + + def merge_histograms(self, histograms): + merge_hist_func = self._histogram_methods[self.method]["merge"] + return merge_hist_func(histograms) + + +def hist_as_2Darray(hist, center=True, dtype="f"): + hist, bins = hist + if center: + bins = 0.5 * (bins[1:] + bins[:-1]) + else: + bins = bins[:-1] + res = np.zeros((2, hist.size), dtype=dtype) + res[0] = hist + res[1] = bins.astype(dtype) + return res + + +def add_last_bin(histo_bins): + """ + Add the last bin (max value) to a list of bin edges. + """ + res = np.zeros(histo_bins.size + 1, dtype=histo_bins.dtype) + res[:-1] = histo_bins[:] + res[-1] = res[-2] + (res[1] - res[0]) + return res diff --git a/nabu/misc/tests/test_histogram.py b/nabu/misc/tests/test_histogram.py new file mode 100644 index 0000000000000000000000000000000000000000..b9666c4580c2cb53645b3ba4c1c3deb47e730678 --- /dev/null +++ b/nabu/misc/tests/test_histogram.py @@ -0,0 +1,37 @@ +from os import path +from tempfile import mkdtemp +import pytest +import numpy as np +from nabu.testutils import get_data +from nabu.misc.histogram import PartialHistogram + + +@pytest.fixture(scope='class') +def bootstrap(request): + cls = request.cls + cls.data = get_data("mri_rec_astra.npz")["data"] + cls.data /= 10 + cls.data[:100] *= 10 + cls.data_part_1 = cls.data[:100].ravel() + cls.data_part_2 = cls.data[100:].ravel() + cls.data0 = cls.data.ravel() + cls.bin_tol = 1e-5 * (cls.data0.max() - cls.data0.min()) + cls.hist_rtol = 1.5e-3 + + +@pytest.mark.usefixtures('bootstrap') +class TestPartialHistogram: + + def compare_histograms(self, hist1, hist2): + errmax_bins = np.max(np.abs(hist1[1] - hist2[1])) + assert errmax_bins < self.bin_tol + errmax_hist = np.max(np.abs(hist1[0] - hist2[0])/hist2[0].max()) + assert errmax_hist/hist2[0].max() < self.hist_rtol + + def test_fixed_nbins(self): + partial_hist = PartialHistogram(method="fixed_bins_number", num_bins=1e6) + hist1 = partial_hist.compute_histogram(self.data_part_1) + hist2 = partial_hist.compute_histogram(self.data_part_2) + hist = partial_hist.merge_histograms([hist1, hist2]) + ref = np.histogram(self.data0, bins=partial_hist.num_bins) + self.compare_histograms(hist, ref) diff --git a/nabu/resources/nabu_config.py b/nabu/resources/nabu_config.py index 00bf9406d9c8883aac04da7de9d284a5b6b1fad9..3089dff3ecdbb5301f95ad48005cb4a9eb7eca3a 100644 --- a/nabu/resources/nabu_config.py +++ b/nabu/resources/nabu_config.py @@ -278,6 +278,20 @@ nabu_config = { "type": "required", }, }, + "postproc": { + "output_histogram": { + "default": "0", + "help": "Whether to compute a histogram of the volume.", + "validator": boolean_validator, + "type": "optional", + }, + "histogram_bins": { + "default": "1000000", + "help": "Number of bins for the output histogram. Default is one million. ", + "validator": nonnegative_integer_validator, + "type": "advanced", + }, + }, "resources": { "method": { "default": "local", diff --git a/nabu/resources/processconfig.py b/nabu/resources/processconfig.py index 8ffa6bfaf414ffe3a02d83f1499b180008c7be89..d3e559316689b6551228d2fe399f976ad97b2419 100644 --- a/nabu/resources/processconfig.py +++ b/nabu/resources/processconfig.py @@ -1,6 +1,7 @@ import os from ..utils import PlaceHolder, DataPlaceHolder, copy_dict_items from ..io.config import NabuConfigParser, validate_nabu_config +from .utils import is_hdf5_extension from .dataset_analyzer import analyze_dataset, EDFDatasetAnalyzer, HDF5DatasetAnalyzer from .dataset_validator import NabuValidator from .cor import CORFinder @@ -209,7 +210,22 @@ class ProcessConfig: # New key rec_options["cor_estimated_auto"] = (nabu_config["reconstruction"]["rotation_axis_position"] == "auto") + # + # Histogram + # + if nabu_config["postproc"]["output_histogram"]: + # Current limitation + if not(is_hdf5_extension(nabu_config["output"]["file_format"])): + raise ValueError("Histograms are only supported when output is HDF5") + # + tasks.append("histogram") + options["histogram"] = copy_dict_items( + nabu_config["postproc"], ["histogram_bins"] + ) + # + # Save + # if nabu_config["output"]["location"] is not None: tasks.append("save") options["save"] = copy_dict_items( diff --git a/nabu/resources/tests/test_nxflatfield.py b/nabu/resources/tests/test_nxflatfield.py index 5aed695bd970052d5a9dafc89b4c8505496506c2..174142664fc358c0b531587336251fbcffad81b2 100644 --- a/nabu/resources/tests/test_nxflatfield.py +++ b/nabu/resources/tests/test_nxflatfield.py @@ -2,10 +2,9 @@ from os import path from tempfile import mkdtemp import pytest import numpy as np -from silx.io import get_data from silx.io.url import DataUrl from nabu.io.config import import_h5_to_dict, export_dict_to_h5 -from nabu.testutils import get_data, NXDatasetMock +from nabu.testutils import NXDatasetMock from nabu.resources.nxflatfield import NXFlatField, val_to_nxkey