From 9cd4ca6ea18293f78dceb488abcbed725c48c753 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Mon, 7 Sep 2020 17:12:01 +0200 Subject: [PATCH 01/24] Start PartialHistogram class --- nabu/misc/histogram.py | 101 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 nabu/misc/histogram.py diff --git a/nabu/misc/histogram.py b/nabu/misc/histogram.py new file mode 100644 index 00000000..f693682c --- /dev/null +++ b/nabu/misc/histogram.py @@ -0,0 +1,101 @@ +from math import log2 +import numpy as np +from ..utils import check_supported + +histogram_methods = [ + "fixed_bins_width", + "fixed_bins_number" +] + +bin_width_policies = [ + "uint16", +] + +class PartialHistogram: + """ + A class for computing histogram progressively. + + In certain cases, it is cumbersome to compute a histogram directly on a big chunk of + data (ex. data not fitting in memory, disk access too slow) while some parts of the + data are readily available in-memory. + """ + + def __init__(self, method="fixed_bins_width", bin_width="uint16"): + """ + Initialize a PartialHistogram class. + + Parameters + ---------- + method: str, optional + Partial histogram computing method. Available are: + - `fixed_bins_width`: all the histograms are computed with the same bin + width. The class adapts to the data range and computes the number of + bins accordingly. + - `fixed_bins_number`: all the histograms are computed with the same + number of bins. The class adapts to the data range and computes the + bin width accordingly. + Default is "fixed_bins_width" + bin_width: str or float, optional + Policy for histogram bins when method="fixed_bins_width". Available are: + - "uint16": The bin width is computed so that floating-point elements + `f1` and `f2` satisfying `|f1 - f2| < bin_width` implies + `f1_converted - f2_converted < 1` once cast to uint16. + - A number: all the bins have this fixed width. + Default is "uint16" + """ + check_supported(method, histogram_methods, "histogram computing method") + self.method = method + if isinstance(bin_width, str): + check_supported(bin_width, bin_width_policies, "bin width policy") + self._fixed_bw = False + else: + bin_width = float(bin_width) + self._fixed_bw = True + self.bin_width = bin_width + + + def _bin_width_u16(self, dmin, dmax): + return (dmax - dmin) / 65535. + + + def _bin_width_fixed(self, dmin, dmax): + return self.bin_width + + + def get_bin_width(self, dmin, dmax): + if self._fixed_bw: + return self._bin_width_fixed(dmin, dmax) + elif self.bin_width == "uint16": + return self._bin_width_u16(dmin, dmax) + else: + raise ValueError() + + + def _compute_histogram_fixed_bw(self, data, min_bins=None, data_range=None): + dmin, dmax = data.min(), data.max() if data_range is None else data_range + min_bins = min_bins or 1 + bw_max = self.get_bin_width(dmin, dmax) + nbins = 0 + bw_factor = 1 + + while nbins < min_bins: + bw = 2**round(log2(bw_max)) / bw_factor + nbins = int((dmax - dmin)/bw) + bw_factor *= 2 + + + res = np.histogram(data, bins=nbins) + return res + + + def _merge_histograms_fixed_bw(self, histograms): + histos = [h[0] for h in histograms] + bins = [h[1][:-1] for h in histograms] # 0.5 * (h[1][1:] + h[1][:-1]) ? + res = np.histogram( + np.hstack(bins), + weights=np.hstack(histos), + bins=min([h.size for h in histos]) + ) + return res + + -- GitLab From 6fd78e0d0d6bf4b366b5859e48d3d83be710cf62 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 11:29:20 +0200 Subject: [PATCH 02/24] PartialHistogram: create dispatch methods --- nabu/misc/histogram.py | 122 ++++++++++++++++++++++++++++++++++------- 1 file changed, 102 insertions(+), 20 deletions(-) diff --git a/nabu/misc/histogram.py b/nabu/misc/histogram.py index f693682c..44b61bd0 100644 --- a/nabu/misc/histogram.py +++ b/nabu/misc/histogram.py @@ -2,14 +2,6 @@ from math import log2 import numpy as np from ..utils import check_supported -histogram_methods = [ - "fixed_bins_width", - "fixed_bins_number" -] - -bin_width_policies = [ - "uint16", -] class PartialHistogram: """ @@ -20,7 +12,17 @@ class PartialHistogram: data are readily available in-memory. """ - def __init__(self, method="fixed_bins_width", bin_width="uint16"): + histogram_methods = [ + "fixed_bins_width", + "fixed_bins_number" + ] + + bin_width_policies = [ + "uint16", + ] + + + def __init__(self, method="fixed_bins_width", bin_width="uint16", num_bins=None, min_bins=None): """ Initialize a PartialHistogram class. @@ -42,11 +44,25 @@ class PartialHistogram: `f1_converted - f2_converted < 1` once cast to uint16. - A number: all the bins have this fixed width. Default is "uint16" + num_bins: int, optional + Number of bins when method = 'fixed_bins_number'. + min_bins: int, optional + Minimum number of bins when method = 'fixed_bins_width'. """ - check_supported(method, histogram_methods, "histogram computing method") + check_supported(method, self.histogram_methods, "histogram computing method") self.method = method + self._set_bin_width(bin_width) + self._set_num_bins(num_bins) + self.min_bins = min_bins + self._set_histogram_methods() + + + def _set_bin_width(self, bin_width): + if self.method == "fixed_bins_number": + self.bin_width = None + return if isinstance(bin_width, str): - check_supported(bin_width, bin_width_policies, "bin width policy") + check_supported(bin_width, self.bin_width_policies, "bin width policy") self._fixed_bw = False else: bin_width = float(bin_width) @@ -54,6 +70,61 @@ class PartialHistogram: self.bin_width = bin_width + def _set_num_bins(self, num_bins): + if self.method == "fixed_bins_width": + self.num_bins = None + return + if self.method == "fixed_bins_number" and num_bins is None: + raise ValueError("Need to specify num_bins for method='fixed_bins_number'") + self.num_bins = int(num_bins) + + + def _set_histogram_methods(self): + self._histogram_methods = { + "fixed_bins_number": { + "compute": self._compute_histogram_fixed_nbins, + "merge": self._merge_histograms_fixed_nbins, + }, + "fixed_bins_width": { + "compute": self._compute_histogram_fixed_bw, + "merge": self._merge_histograms_fixed_bw, + }, + } + assert set(self._histogram_methods.keys()) == set(self.histogram_methods) + + + @staticmethod + def _get_histograms_and_bins(histograms, center=False): + histos = [h[0] for h in histograms] + if center: + bins = [0.5 * (h[1][1:] + h[1][:-1]) for h in histograms] + else: + bins = [h[1][:-1] for h in histograms] + return histos, bins + + # + # Histogram with fixed number of bins + # + + def _compute_histogram_fixed_nbins(self, data, data_range=None): + dmin, dmax = data.min(), data.max() if data_range is None else data_range + res = np.histogram(data, bins=self.num_bins) + return res + + + def _merge_histograms_fixed_nbins(self, histograms): + histos, bins = self._get_histograms_and_bins(histograms) + res = np.histogram( + np.hstack(bins), + weights=np.hstack(histos), + bins=self.num_bins, + ) + return res + + # + # Histogram with fixed bin width + # + def _bin_width_u16(self, dmin, dmax): return (dmax - dmin) / 65535. @@ -71,31 +142,42 @@ class PartialHistogram: raise ValueError() - def _compute_histogram_fixed_bw(self, data, min_bins=None, data_range=None): + def _compute_histogram_fixed_bw(self, data, data_range=None): dmin, dmax = data.min(), data.max() if data_range is None else data_range - min_bins = min_bins or 1 + min_bins = self.min_bins or 1 bw_max = self.get_bin_width(dmin, dmax) nbins = 0 bw_factor = 1 - while nbins < min_bins: bw = 2**round(log2(bw_max)) / bw_factor nbins = int((dmax - dmin)/bw) bw_factor *= 2 - - res = np.histogram(data, bins=nbins) return res def _merge_histograms_fixed_bw(self, histograms): - histos = [h[0] for h in histograms] - bins = [h[1][:-1] for h in histograms] # 0.5 * (h[1][1:] + h[1][:-1]) ? - res = np.histogram( + histos, bins = self._get_histograms_and_bins(histograms, center=False) + dmax = max([b[-1] for b in bins]) + dmin = min([b[0] for b in bins]) + bw_max = max([b[1] - b[0] for b in bins]) + res = np.histogram( np.hstack(bins), weights=np.hstack(histos), - bins=min([h.size for h in histos]) + bins=int((dmax - dmin)/bw_max) ) return res + # + # Dispatch methods + # + + def compute_histogram(self, data, data_range=None): + compute_hist_func = self._histogram_methods[self.method]["compute"] + return compute_hist_func(data, data_range=data_range) + + + def merge_histograms(self, histograms): + merge_hist_func = self._histogram_methods[self.method]["merge"] + return merge_hist_func(histograms) -- GitLab From 65de400d69a295a3f4f9ab503e4f4c5a4f225f5f Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 11:48:28 +0200 Subject: [PATCH 03/24] Integrate histogram in nabu config --- nabu/resources/nabu_config.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/nabu/resources/nabu_config.py b/nabu/resources/nabu_config.py index 00bf9406..b331cefb 100644 --- a/nabu/resources/nabu_config.py +++ b/nabu/resources/nabu_config.py @@ -221,6 +221,18 @@ nabu_config = { "validator": integer_validator, "type": "optional", }, + "output_histogram": { + "default": "0", + "help": "Whether to compute a histogram of the volume.", + "validator": boolean_validator, + "type": "optional", + }, + "histogram_bins": { + "default": "1000000", + "help": "Number of bins for the output histogram. Default is one million. ", + "validator": nonnegative_integer_validator, + "type": "advanced", + }, "iterations": { "default": "200", "help": "\nParameters for iterative algorithms\n------------------------------------\nNumber of iterations", -- GitLab From 64ec2617d7ba088ea37e54bc8d704c4da02d74ac Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 13:34:20 +0200 Subject: [PATCH 04/24] nabu_config: create a new dedicated section postproc --- nabu/resources/nabu_config.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/nabu/resources/nabu_config.py b/nabu/resources/nabu_config.py index b331cefb..3089dff3 100644 --- a/nabu/resources/nabu_config.py +++ b/nabu/resources/nabu_config.py @@ -221,18 +221,6 @@ nabu_config = { "validator": integer_validator, "type": "optional", }, - "output_histogram": { - "default": "0", - "help": "Whether to compute a histogram of the volume.", - "validator": boolean_validator, - "type": "optional", - }, - "histogram_bins": { - "default": "1000000", - "help": "Number of bins for the output histogram. Default is one million. ", - "validator": nonnegative_integer_validator, - "type": "advanced", - }, "iterations": { "default": "200", "help": "\nParameters for iterative algorithms\n------------------------------------\nNumber of iterations", @@ -290,6 +278,20 @@ nabu_config = { "type": "required", }, }, + "postproc": { + "output_histogram": { + "default": "0", + "help": "Whether to compute a histogram of the volume.", + "validator": boolean_validator, + "type": "optional", + }, + "histogram_bins": { + "default": "1000000", + "help": "Number of bins for the output histogram. Default is one million. ", + "validator": nonnegative_integer_validator, + "type": "advanced", + }, + }, "resources": { "method": { "default": "local", -- GitLab From 2b13db1cd2fcc0ff46ca24fa0ca1d34c71dc5731 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 13:34:32 +0200 Subject: [PATCH 05/24] io.config: handle new sections --- nabu/io/config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nabu/io/config.py b/nabu/io/config.py index ec98f95f..38dbb471 100644 --- a/nabu/io/config.py +++ b/nabu/io/config.py @@ -161,6 +161,9 @@ def validate_nabu_config(config): raise ValueError("Unknown option '%s' in section [%s]" % (key, section)) validator = nabu_config[section][key]["validator"] res_config[section][key] = validator(section, key, value) + # Handle sections missing in config + for section in (set(nabu_config.keys()) - set(res_config.keys())): + res_config[section] = _extract_nabuconfig_section(section) return res_config -- GitLab From d38416b96dd2f878a264f6b06e26fd82efcbb69d Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 15:16:10 +0200 Subject: [PATCH 06/24] Fix unused imports --- nabu/resources/tests/test_nxflatfield.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nabu/resources/tests/test_nxflatfield.py b/nabu/resources/tests/test_nxflatfield.py index 5aed695b..17414266 100644 --- a/nabu/resources/tests/test_nxflatfield.py +++ b/nabu/resources/tests/test_nxflatfield.py @@ -2,10 +2,9 @@ from os import path from tempfile import mkdtemp import pytest import numpy as np -from silx.io import get_data from silx.io.url import DataUrl from nabu.io.config import import_h5_to_dict, export_dict_to_h5 -from nabu.testutils import get_data, NXDatasetMock +from nabu.testutils import NXDatasetMock from nabu.resources.nxflatfield import NXFlatField, val_to_nxkey -- GitLab From 00863c659fe4be9b79685920cae0b427a852e789 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 15:16:28 +0200 Subject: [PATCH 07/24] Add unit test --- nabu/misc/tests/test_histogram.py | 37 +++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 nabu/misc/tests/test_histogram.py diff --git a/nabu/misc/tests/test_histogram.py b/nabu/misc/tests/test_histogram.py new file mode 100644 index 00000000..b9666c45 --- /dev/null +++ b/nabu/misc/tests/test_histogram.py @@ -0,0 +1,37 @@ +from os import path +from tempfile import mkdtemp +import pytest +import numpy as np +from nabu.testutils import get_data +from nabu.misc.histogram import PartialHistogram + + +@pytest.fixture(scope='class') +def bootstrap(request): + cls = request.cls + cls.data = get_data("mri_rec_astra.npz")["data"] + cls.data /= 10 + cls.data[:100] *= 10 + cls.data_part_1 = cls.data[:100].ravel() + cls.data_part_2 = cls.data[100:].ravel() + cls.data0 = cls.data.ravel() + cls.bin_tol = 1e-5 * (cls.data0.max() - cls.data0.min()) + cls.hist_rtol = 1.5e-3 + + +@pytest.mark.usefixtures('bootstrap') +class TestPartialHistogram: + + def compare_histograms(self, hist1, hist2): + errmax_bins = np.max(np.abs(hist1[1] - hist2[1])) + assert errmax_bins < self.bin_tol + errmax_hist = np.max(np.abs(hist1[0] - hist2[0])/hist2[0].max()) + assert errmax_hist/hist2[0].max() < self.hist_rtol + + def test_fixed_nbins(self): + partial_hist = PartialHistogram(method="fixed_bins_number", num_bins=1e6) + hist1 = partial_hist.compute_histogram(self.data_part_1) + hist2 = partial_hist.compute_histogram(self.data_part_2) + hist = partial_hist.merge_histograms([hist1, hist2]) + ref = np.histogram(self.data0, bins=partial_hist.num_bins) + self.compare_histograms(hist, ref) -- GitLab From b8b7b2de00ba3c08cd9388a228de267646853dfb Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 15:44:26 +0200 Subject: [PATCH 08/24] config: validate new section missing from user config file --- nabu/io/config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nabu/io/config.py b/nabu/io/config.py index 38dbb471..ec8b5fbf 100644 --- a/nabu/io/config.py +++ b/nabu/io/config.py @@ -164,6 +164,9 @@ def validate_nabu_config(config): # Handle sections missing in config for section in (set(nabu_config.keys()) - set(res_config.keys())): res_config[section] = _extract_nabuconfig_section(section) + for key, value in res_config[section].items(): + validator = nabu_config[section][key]["validator"] + res_config[section][key] = validator(section, key, value) return res_config -- GitLab From 66a625abca541490eb1c89febbf7d47080a8bd13 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 15:44:49 +0200 Subject: [PATCH 09/24] app.FullFieldPipeline: start integrating histogram --- nabu/app/fullfield.py | 8 ++++++++ nabu/app/fullfield_cuda.py | 2 ++ 2 files changed, 10 insertions(+) diff --git a/nabu/app/fullfield.py b/nabu/app/fullfield.py index 5191dd26..ed0154f0 100644 --- a/nabu/app/fullfield.py +++ b/nabu/app/fullfield.py @@ -10,6 +10,7 @@ from ..preproc.double_flatfield import DoubleFlatField from ..preproc.phase import PaganinPhaseRetrieval from ..preproc.sinogram import SinoProcessing from ..misc.unsharp import UnsharpMask +from ..misc.histogram import PartialHistogram from ..resources.utils import is_hdf5_extension # ~ from ..io.writer import Writers @@ -30,6 +31,7 @@ class FullFieldPipeline: SinoProcessingClass = SinoProcessing MLogClass = Log FBPClass = None # For now we don't have a plain python/numpy backend for reconstruction + HistogramClass = PartialHistogram def __init__(self, process_config, sub_region, logger=None, extra_options=None, phase_margin=None): """ @@ -310,6 +312,7 @@ class FullFieldPipeline: self._init_sino_builder() self._prepare_reconstruction() self._init_reconstruction() + self._init_histogram() self._init_writer() @use_options("read_chunk", "chunk_reader") @@ -468,6 +471,11 @@ class FullFieldPipeline: if options["fbp_filter_type"] == "none": self.reconstruction.fbp = self.reconstruction.backproj + @use_options("histogram", "histogram") + def _init_histogram(self): + pass + + @use_options("save", "writer") def _init_writer(self): options = self.processing_options["save"] diff --git a/nabu/app/fullfield_cuda.py b/nabu/app/fullfield_cuda.py index 297e74d7..78ff6b5b 100644 --- a/nabu/app/fullfield_cuda.py +++ b/nabu/app/fullfield_cuda.py @@ -9,6 +9,7 @@ from ..preproc.phase_cuda import CudaPaganinPhaseRetrieval from ..preproc.sinogram_cuda import CudaSinoProcessing from ..preproc.sinogram import SinoProcessing from ..misc.unsharp_cuda import CudaUnsharpMask +from ..misc.histogram import PartialHistogram from ..reconstruction.fbp import Backprojector from ..cuda.utils import get_cuda_context, __has_pycuda__, __pycuda_error_msg__, copy_big_gpuarray, replace_array_memory from .fullfield import FullFieldPipeline @@ -31,6 +32,7 @@ class CudaFullFieldPipeline(FullFieldPipeline): SinoProcessingClass = CudaSinoProcessing MLogClass = CudaLog FBPClass = Backprojector + HistogramClass = PartialHistogram def __init__(self, process_config, sub_region, logger=None, extra_options=None, phase_margin=None, cuda_options=None): self._init_cuda(cuda_options) -- GitLab From 74d001b984e7cfb0e314efd92ebe4fce913e6b74 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 15:45:00 +0200 Subject: [PATCH 10/24] processconfig: integrate histogram --- nabu/resources/processconfig.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/nabu/resources/processconfig.py b/nabu/resources/processconfig.py index 8ffa6bfa..4698c6ff 100644 --- a/nabu/resources/processconfig.py +++ b/nabu/resources/processconfig.py @@ -209,7 +209,18 @@ class ProcessConfig: # New key rec_options["cor_estimated_auto"] = (nabu_config["reconstruction"]["rotation_axis_position"] == "auto") + # + # Histogram + # + if nabu_config["postproc"]["output_histogram"]: + tasks.append("histogram") + options["histogram"] = copy_dict_items( + nabu_config["postproc"], ["histogram_bins"] + ) + # + # Save + # if nabu_config["output"]["location"] is not None: tasks.append("save") options["save"] = copy_dict_items( -- GitLab From b15aea37df043ac0797b3a713b95f430131ade5e Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 16:05:58 +0200 Subject: [PATCH 11/24] Integrate histogram in (Cuda)FullFieldPipeline --- nabu/app/fullfield.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/nabu/app/fullfield.py b/nabu/app/fullfield.py index ed0154f0..5e919655 100644 --- a/nabu/app/fullfield.py +++ b/nabu/app/fullfield.py @@ -12,7 +12,7 @@ from ..preproc.sinogram import SinoProcessing from ..misc.unsharp import UnsharpMask from ..misc.histogram import PartialHistogram from ..resources.utils import is_hdf5_extension -# ~ from ..io.writer import Writers + class FullFieldPipeline: """ @@ -473,7 +473,10 @@ class FullFieldPipeline: @use_options("histogram", "histogram") def _init_histogram(self): - pass + options = self.processing_options["histogram"] + self.histogram = PartialHistogram( + method="fixed_bins_number", num_bins=options["histogram_bins"] + ) @use_options("save", "writer") @@ -582,7 +585,6 @@ class FullFieldPipeline: def _take_log(self): self.mlog.take_logarithm(self.radios) - @pipeline_step("radios_movements", "Applying radios movements") def _radios_movements(self, radios=None): if radios is None: @@ -612,6 +614,10 @@ class FullFieldPipeline: sinos[i], output=self.recs[i] ) + @pipeline_step("histogram", "Computing histogram") + def _compute_histogram(self): + self.recs_histogram = self.histogram.compute_histogram(self.recs.ravel()) + @pipeline_step("writer", "Saving data") def _write_data(self, data=None): if data is None: @@ -631,6 +637,7 @@ class FullFieldPipeline: self._radios_movements() self._build_sino() self._reconstruct() + self._compute_histogram() self._write_data() self._process_finalize() -- GitLab From 22786a7edeca75ff38c180fdb78ae766dfa08281 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 17:42:21 +0200 Subject: [PATCH 12/24] utils: add hist_as_2Darray --- nabu/utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/nabu/utils.py b/nabu/utils.py index 63760e8e..eb0a6f5e 100644 --- a/nabu/utils.py +++ b/nabu/utils.py @@ -291,6 +291,18 @@ def array_tostring(arr, show_dtype=False): return "%s(%s)" % (name, attrs) +def hist_as_2Darray(hist, center=True, dtype="f"): + hist, bins = hist + if center: + bins = 0.5 * (bins[1:] + bins[:-1]) + else: + bins = bins[:-1] + res = np.zeros((2, hist.size), dtype=dtype) + res[0] = hist + res[1] = bins.astype(dtype) + return res + + # ------------------------------------------------------------------------------ # ------------------------ Image (move elsewhere ?) ---------------------------- # ------------------------------------------------------------------------------ -- GitLab From e6ee4d60fe8516a97590a0b5c251f33638eb8c64 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 17:43:01 +0200 Subject: [PATCH 13/24] Integrate histogram in LimitedMemory --- nabu/app/fullfield_cuda.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nabu/app/fullfield_cuda.py b/nabu/app/fullfield_cuda.py index 78ff6b5b..7a7e31fa 100644 --- a/nabu/app/fullfield_cuda.py +++ b/nabu/app/fullfield_cuda.py @@ -444,6 +444,7 @@ class CudaFullFieldPipelineLimitedMemory(CudaFullFieldPipeline): self._reconstruct(sinos=self._d_sinos) # Copy D2H self._d_recs[:transfer_size].get(ary=self._h_recs[start_idx:end_idx]) + self._compute_histogram(data=self._h_recs) self.logger.debug("End of processing steps on sinos") self._h_radios = self._orig_h_radios # Write -- GitLab From 0b953206afc183762cf8b10a42ebd6c6f942d585 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 17:43:36 +0200 Subject: [PATCH 14/24] app.FullField: write histogram in HDF5 file --- nabu/app/fullfield.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/nabu/app/fullfield.py b/nabu/app/fullfield.py index 5e919655..8b573612 100644 --- a/nabu/app/fullfield.py +++ b/nabu/app/fullfield.py @@ -2,7 +2,7 @@ from os import path, mkdir import numpy as np from ..resources.logger import LoggerOrPrint from .utils import use_options, pipeline_step, WriterConfigurator -from ..utils import check_supported +from ..utils import check_supported, hist_as_2Darray from ..io.reader import ChunkReader from ..preproc.ccd import FlatField, Log, CCDCorrection from ..preproc.shift import VerticalShift @@ -478,7 +478,6 @@ class FullFieldPipeline: method="fixed_bins_number", num_bins=options["histogram_bins"] ) - @use_options("save", "writer") def _init_writer(self): options = self.processing_options["save"] @@ -615,8 +614,10 @@ class FullFieldPipeline: ) @pipeline_step("histogram", "Computing histogram") - def _compute_histogram(self): - self.recs_histogram = self.histogram.compute_histogram(self.recs.ravel()) + def _compute_histogram(self, data=None): + if data is None: + data = self.recs + self.recs_histogram = self.histogram.compute_histogram(data.ravel()) @pipeline_step("writer", "Saving data") def _write_data(self, data=None): @@ -625,6 +626,18 @@ class FullFieldPipeline: self.writer.write(data, *self._writer_exec_args, **self._writer_exec_kwargs) self.logger.info("Wrote %s" % self.writer.get_filename()) self.processing_options["save"]["file_prefix"] = self._orig_file_prefix + if "histogram" in self.processing_steps: + self.logger.info("Saving histogram") + self.writer.write( + hist_as_2Darray(self.recs_histogram), + "histogram", + processing_index=self._writer_exec_kwargs["processing_index"]+1, + config={ + "file": path.basename(self.writer.get_filename()), + "bins": self.processing_options["histogram"]["histogram_bins"], + } + ) + def _process_chunk(self): -- GitLab From 92f9a12aaf535738d12ab4ede43e775d587f3c06 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 17:46:24 +0200 Subject: [PATCH 15/24] Write histogram only for HDF5 output --- nabu/app/fullfield.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nabu/app/fullfield.py b/nabu/app/fullfield.py index 8b573612..dfabc882 100644 --- a/nabu/app/fullfield.py +++ b/nabu/app/fullfield.py @@ -490,7 +490,8 @@ class FullFieldPipeline: ) nx_info = None - if is_hdf5_extension(options["file_format"]): + self._hdf5_output = is_hdf5_extension(options["file_format"]) + if self._hdf5_output: fname_start_index = None options["file_prefix"] = self._orig_file_prefix + str( "_%04d" % (self.z_min + self._phase_margin_up) @@ -626,7 +627,7 @@ class FullFieldPipeline: self.writer.write(data, *self._writer_exec_args, **self._writer_exec_kwargs) self.logger.info("Wrote %s" % self.writer.get_filename()) self.processing_options["save"]["file_prefix"] = self._orig_file_prefix - if "histogram" in self.processing_steps: + if "histogram" in self.processing_steps and self._hdf5_output: self.logger.info("Saving histogram") self.writer.write( hist_as_2Darray(self.recs_histogram), -- GitLab From 91db5d1e8f76fd23c4ddafb6bc0473e70551e2e6 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Tue, 8 Sep 2020 17:46:43 +0200 Subject: [PATCH 16/24] io.NXProcessWriter: dont delete whole entry, only NXProcess --- nabu/io/writer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nabu/io/writer.py b/nabu/io/writer.py index 1ed84e3f..2cd40d34 100644 --- a/nabu/io/writer.py +++ b/nabu/io/writer.py @@ -77,8 +77,9 @@ class NXProcessWriter(Writer): Dictionary containing the configuration. """ with HDF5File(self.fname, self._filemode, swmr=True) as fid: - if self.overwrite and self.data_path in fid: - del fid[self.data_path] + results_path = path.join(self.data_path, process_name) + if self.overwrite and results_path in fid: + del fid[results_path] nx_entry = fid.require_group(self.data_path) if "NX_class" not in nx_entry.attrs: nx_entry.attrs["NX_class"] = "NXentry" -- GitLab From 099de777923617b1d26c52a6bdbcc4533e26b8a2 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Wed, 9 Sep 2020 12:29:31 +0200 Subject: [PATCH 17/24] ProcessConfig: do histogram only if HDF5 output --- nabu/resources/processconfig.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nabu/resources/processconfig.py b/nabu/resources/processconfig.py index 4698c6ff..d3e55931 100644 --- a/nabu/resources/processconfig.py +++ b/nabu/resources/processconfig.py @@ -1,6 +1,7 @@ import os from ..utils import PlaceHolder, DataPlaceHolder, copy_dict_items from ..io.config import NabuConfigParser, validate_nabu_config +from .utils import is_hdf5_extension from .dataset_analyzer import analyze_dataset, EDFDatasetAnalyzer, HDF5DatasetAnalyzer from .dataset_validator import NabuValidator from .cor import CORFinder @@ -213,6 +214,10 @@ class ProcessConfig: # Histogram # if nabu_config["postproc"]["output_histogram"]: + # Current limitation + if not(is_hdf5_extension(nabu_config["output"]["file_format"])): + raise ValueError("Histograms are only supported when output is HDF5") + # tasks.append("histogram") options["histogram"] = copy_dict_items( nabu_config["postproc"], ["histogram_bins"] -- GitLab From e14a878a61894ef8b9e9c646fc29d31ad9381a45 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Wed, 9 Sep 2020 12:29:43 +0200 Subject: [PATCH 18/24] histogram: add_last_bin --- nabu/misc/histogram.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/nabu/misc/histogram.py b/nabu/misc/histogram.py index 44b61bd0..8101f132 100644 --- a/nabu/misc/histogram.py +++ b/nabu/misc/histogram.py @@ -181,3 +181,16 @@ class PartialHistogram: merge_hist_func = self._histogram_methods[self.method]["merge"] return merge_hist_func(histograms) + + +def add_last_bin(histo_bins): + """ + Add the last bin (max value) to a list of bin edges. + """ + res = np.zeros(histo_bins.size + 1, dtype=histo_bins.dtype) + res[:-1] = histo_bins[:] + res[-1] = res[-2] + (res[1] - res[0]) + return res + + + -- GitLab From 3bd563030e3bfddfca915150c2344ac0714801e9 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Wed, 9 Sep 2020 12:29:52 +0200 Subject: [PATCH 19/24] io.reader: formatting --- nabu/io/reader.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/nabu/io/reader.py b/nabu/io/reader.py index 1b7f1c14..6310ae39 100644 --- a/nabu/io/reader.py +++ b/nabu/io/reader.py @@ -60,9 +60,6 @@ class Reader(object): pass - - - class NPReader(Reader): multi_load = True @@ -148,7 +145,6 @@ class EDFReader(Reader): return self.read(data_url.file_path()) - class HDF5Reader(Reader): multi_load = True @@ -190,7 +186,6 @@ class HDF5Reader(Reader): self.release() - Readers = { "edf": EDFReader, "hdf5": HDF5Reader, @@ -201,7 +196,6 @@ Readers = { } - class ChunkReader(object): """ A reader of chunk of images. @@ -450,4 +444,3 @@ class ChunkReader(object): self._loaded = True - -- GitLab From 908b93bd65035dbed1566a34c69b79410765219c Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Wed, 9 Sep 2020 12:30:14 +0200 Subject: [PATCH 20/24] LocalReconstruction: WIP add merge_histograms --- nabu/app/local_reconstruction.py | 57 ++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/nabu/app/local_reconstruction.py b/nabu/app/local_reconstruction.py index 3feaee6a..b858bd83 100644 --- a/nabu/app/local_reconstruction.py +++ b/nabu/app/local_reconstruction.py @@ -2,13 +2,15 @@ from os.path import join, isfile, basename from math import ceil import gc from psutil import virtual_memory -from nabu.resources.logger import LoggerOrPrint -from ..io.writer import merge_hdf5_files +from silx.io import get_data +from silx.io.url import DataUrl +from ..resources.logger import LoggerOrPrint +from ..io.writer import merge_hdf5_files, NXProcessWriter from ..cuda.utils import collect_cuda_gpus from ..resources.computations import estimate_chunk_size from ..preproc.phase import compute_paganin_margin from ..app.fullfield_cuda import CudaFullFieldPipeline, CudaFullFieldPipelineLimitedMemory - +from ..misc.histogram import PartialHistogram, add_last_bin def get_gpus_ids(resources_cfg): gpus_ids = resources_cfg["gpu_id"] @@ -309,8 +311,10 @@ class LocalReconstruction: files.sort() files = [join(out_cfg["file_prefix"], basename(fname)) for fname in files] entry = getattr(self.process_config.dataset_infos.dataset_scanner, "entry", "entry") + # TODO "reconstruction" is hard-coded, might be problematic in the future h5_path = join(entry, *["reconstruction", "results", "data"]) process_name = "reconstruction" + # self.logger.info("Merging reconstructions to %s" % output_file) merge_hdf5_files( files, h5_path, output_file, process_name, @@ -326,3 +330,50 @@ class LocalReconstruction: base_dir=out_cfg["location"], overwrite=out_cfg["overwrite_results"] ) + # TODO dont hard-code process name + self.merge_histograms(entry, "histogram", output_file, entry, process_name) + + + def merge_histograms(self, masterfile_entry, masterfile_process_name, output_file, output_entry, output_process_name): + """ + Merge the partial histograms + """ + if not(self.process_config.nabu_config["postproc"]["output_histogram"]): + return + self.logger.info("Merging histograms") + + # + h5_path = join(masterfile_entry, *[masterfile_process_name, "results", "data"]) + # + files = sorted(self.results.values()) + data_urls = [] + for fname in files: + url = DataUrl( + file_path=fname, data_path=h5_path, data_slice=None, scheme="silx" + ) + data_urls.append(url) + histograms = [] + for data_url in data_urls: + h2D = get_data(data_url) + histograms.append( + (h2D[0], add_last_bin(h2D[1])) + ) + histograms_merger = PartialHistogram( # TODO configurable + method="fixed_bins_number", num_bins=histograms[0][0].size + ) + merged_hist = histograms_merger.merge_histograms(histograms) + print(merged_hist) + + + + # ~ writer = NXProcessWriter... + # ~ writer.write( + # ~ hist_as_2Darray(self.recs_histogram), + # ~ self._get_process_name(kind="histogram"), + # ~ processing_index=self._writer_exec_kwargs["processing_index"]+1, + # ~ config={ + # ~ "file": path.basename(self.writer.get_filename()), + # ~ "bins": self.processing_options["histogram"]["histogram_bins"], + # ~ } + # ~ ) + -- GitLab From aa5afb2ee2da3f8e212c3dc483c5fb010b076ca5 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Wed, 9 Sep 2020 12:30:34 +0200 Subject: [PATCH 21/24] app.FullField: more flexible get_process_name --- nabu/app/fullfield.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/nabu/app/fullfield.py b/nabu/app/fullfield.py index dfabc882..68bd7b4d 100644 --- a/nabu/app/fullfield.py +++ b/nabu/app/fullfield.py @@ -182,9 +182,13 @@ class FullFieldPipeline: return n_recs - def _get_process_name(self): - # TODO support "incomplete" processing pipeline - return "reconstruction" + def _get_process_name(self, kind="reconstruction"): + # In the future, might be something like "reconstruction-" + if kind == "reconstruction": + return "reconstruction" + elif kind == "histogram": + return "histogram" + return kind def register_callback(self, step_name, callback): @@ -631,7 +635,7 @@ class FullFieldPipeline: self.logger.info("Saving histogram") self.writer.write( hist_as_2Darray(self.recs_histogram), - "histogram", + self._get_process_name(kind="histogram"), processing_index=self._writer_exec_kwargs["processing_index"]+1, config={ "file": path.basename(self.writer.get_filename()), @@ -640,7 +644,6 @@ class FullFieldPipeline: ) - def _process_chunk(self): self._flatfield() self._double_flatfield() -- GitLab From aa13ac96d098938559f4f48352f8940a7012dd8e Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Wed, 9 Sep 2020 15:09:27 +0200 Subject: [PATCH 22/24] Move hist_as_2Darray from utils to misc.histogram --- nabu/app/fullfield.py | 4 ++-- nabu/app/local_reconstruction.py | 2 +- nabu/misc/histogram.py | 14 +++++++++++--- nabu/utils.py | 12 ------------ 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/nabu/app/fullfield.py b/nabu/app/fullfield.py index 68bd7b4d..3a56120c 100644 --- a/nabu/app/fullfield.py +++ b/nabu/app/fullfield.py @@ -2,7 +2,7 @@ from os import path, mkdir import numpy as np from ..resources.logger import LoggerOrPrint from .utils import use_options, pipeline_step, WriterConfigurator -from ..utils import check_supported, hist_as_2Darray +from ..utils import check_supported from ..io.reader import ChunkReader from ..preproc.ccd import FlatField, Log, CCDCorrection from ..preproc.shift import VerticalShift @@ -10,7 +10,7 @@ from ..preproc.double_flatfield import DoubleFlatField from ..preproc.phase import PaganinPhaseRetrieval from ..preproc.sinogram import SinoProcessing from ..misc.unsharp import UnsharpMask -from ..misc.histogram import PartialHistogram +from ..misc.histogram import PartialHistogram, hist_as_2Darray from ..resources.utils import is_hdf5_extension diff --git a/nabu/app/local_reconstruction.py b/nabu/app/local_reconstruction.py index b858bd83..34383595 100644 --- a/nabu/app/local_reconstruction.py +++ b/nabu/app/local_reconstruction.py @@ -10,7 +10,7 @@ from ..cuda.utils import collect_cuda_gpus from ..resources.computations import estimate_chunk_size from ..preproc.phase import compute_paganin_margin from ..app.fullfield_cuda import CudaFullFieldPipeline, CudaFullFieldPipelineLimitedMemory -from ..misc.histogram import PartialHistogram, add_last_bin +from ..misc.histogram import PartialHistogram, add_last_bin, hist_as_2Darray def get_gpus_ids(resources_cfg): gpus_ids = resources_cfg["gpu_id"] diff --git a/nabu/misc/histogram.py b/nabu/misc/histogram.py index 8101f132..6093d22e 100644 --- a/nabu/misc/histogram.py +++ b/nabu/misc/histogram.py @@ -182,6 +182,17 @@ class PartialHistogram: return merge_hist_func(histograms) +def hist_as_2Darray(hist, center=True, dtype="f"): + hist, bins = hist + if center: + bins = 0.5 * (bins[1:] + bins[:-1]) + else: + bins = bins[:-1] + res = np.zeros((2, hist.size), dtype=dtype) + res[0] = hist + res[1] = bins.astype(dtype) + return res + def add_last_bin(histo_bins): """ @@ -191,6 +202,3 @@ def add_last_bin(histo_bins): res[:-1] = histo_bins[:] res[-1] = res[-2] + (res[1] - res[0]) return res - - - diff --git a/nabu/utils.py b/nabu/utils.py index eb0a6f5e..63760e8e 100644 --- a/nabu/utils.py +++ b/nabu/utils.py @@ -291,18 +291,6 @@ def array_tostring(arr, show_dtype=False): return "%s(%s)" % (name, attrs) -def hist_as_2Darray(hist, center=True, dtype="f"): - hist, bins = hist - if center: - bins = 0.5 * (bins[1:] + bins[:-1]) - else: - bins = bins[:-1] - res = np.zeros((2, hist.size), dtype=dtype) - res[0] = hist - res[1] = bins.astype(dtype) - return res - - # ------------------------------------------------------------------------------ # ------------------------ Image (move elsewhere ?) ---------------------------- # ------------------------------------------------------------------------------ -- GitLab From e146381925117c9c08679265772da5e08ab95ede Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Wed, 9 Sep 2020 15:17:00 +0200 Subject: [PATCH 23/24] Merge histograms in HDF5 master file --- nabu/app/local_reconstruction.py | 35 ++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/nabu/app/local_reconstruction.py b/nabu/app/local_reconstruction.py index 34383595..48e8b6cd 100644 --- a/nabu/app/local_reconstruction.py +++ b/nabu/app/local_reconstruction.py @@ -309,7 +309,10 @@ class LocalReconstruction: self.logger.error("No files to merge") return files.sort() - files = [join(out_cfg["file_prefix"], basename(fname)) for fname in files] + self._local_files = [ + join(out_cfg["file_prefix"], basename(fname)) + for fname in files + ] entry = getattr(self.process_config.dataset_infos.dataset_scanner, "entry", "entry") # TODO "reconstruction" is hard-coded, might be problematic in the future h5_path = join(entry, *["reconstruction", "results", "data"]) @@ -334,7 +337,10 @@ class LocalReconstruction: self.merge_histograms(entry, "histogram", output_file, entry, process_name) - def merge_histograms(self, masterfile_entry, masterfile_process_name, output_file, output_entry, output_process_name): + def merge_histograms( + self, masterfile_entry, masterfile_process_name, + output_file, output_entry, output_process_name + ): """ Merge the partial histograms """ @@ -362,18 +368,17 @@ class LocalReconstruction: method="fixed_bins_number", num_bins=histograms[0][0].size ) merged_hist = histograms_merger.merge_histograms(histograms) - print(merged_hist) - - - # ~ writer = NXProcessWriter... - # ~ writer.write( - # ~ hist_as_2Darray(self.recs_histogram), - # ~ self._get_process_name(kind="histogram"), - # ~ processing_index=self._writer_exec_kwargs["processing_index"]+1, - # ~ config={ - # ~ "file": path.basename(self.writer.get_filename()), - # ~ "bins": self.processing_options["histogram"]["histogram_bins"], - # ~ } - # ~ ) + writer = NXProcessWriter( + output_file, entry=output_entry, filemode="a", overwrite=True + ) + writer.write( + hist_as_2Darray(merged_hist), + "histogram", # TODO don't hard-code + processing_index=1, + config={ + "files": self._local_files, + "bins": self.process_config.nabu_config["postproc"]["histogram_bins"], + } + ) -- GitLab From ebf4ed65fba6e82e541897eff3a47b0f12ec6df5 Mon Sep 17 00:00:00 2001 From: Pierre Paleo Date: Thu, 10 Sep 2020 13:48:19 +0200 Subject: [PATCH 24/24] Bugfix: use relative file paths --- nabu/app/local_reconstruction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nabu/app/local_reconstruction.py b/nabu/app/local_reconstruction.py index 48e8b6cd..f74fd6f8 100644 --- a/nabu/app/local_reconstruction.py +++ b/nabu/app/local_reconstruction.py @@ -320,13 +320,13 @@ class LocalReconstruction: # self.logger.info("Merging reconstructions to %s" % output_file) merge_hdf5_files( - files, h5_path, output_file, process_name, + self._local_files, h5_path, output_file, process_name, output_entry=entry, output_filemode="a", processing_index=0, config={ "reconstruction_stages": { - str(k): v for k, v in zip(self.results.keys(), files) + str(k): v for k, v in zip(self.results.keys(), self._local_files) }, "nabu_config": self.process_config.nabu_config }, -- GitLab