dataset_analyzer.py 12.5 KB
Newer Older
Pierre Paleo's avatar
Pierre Paleo committed
1
import os
Pierre Paleo's avatar
Pierre Paleo committed
2
import numpy as np
3
4
from silx.io.url import DataUrl
from tomoscan.io import HDF5File
5
from tomoscan.esrf.edfscan import EDFTomoScan
6
7
from tomoscan.esrf.hdf5scan import HDF5TomoScan, ImageKey
from ..utils import check_supported
8
from ..thirdparty.tomwer_load_flats_darks import get_flats_frm_process_file, get_darks_frm_process_file
9
from .logger import LoggerOrPrint
10

11
12
13
14
15
val_to_nxkey = {
    "flats": ImageKey.FLAT_FIELD.value,
    "darks": ImageKey.DARK_FIELD.value,
    "projections": ImageKey.PROJECTION.value,
}
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
dataset_infos = {
    "num_radios": None,
    "num_darks": None,
    "num_flats": None,
    "radios": None,
    "darks": None,
    "flats": None,
    "frame_dims": None,
    "energy_kev": None,
    "distance_m": None,
    "pixel_size_microns": None,
}


class DatasetAnalyzer(object):
    """
    Base class for datasets analyzers.
    """
    def __init__(self, location):
        self.location = location


    def _init_dataset_scan(self, filetype, **kwargs):
        scanners = {
            "edf": EDFTomoScan,
            "hdf5": HDF5TomoScan,
        }
        if filetype not in scanners.keys():
            raise ValueError("No scanner for file type: %s" % filetype)
        scanner = scanners[filetype]
        self.dataset_scanner = scanner(self.location, **kwargs)
Pierre Paleo's avatar
Pierre Paleo committed
47
48
49
        self.projections = self.dataset_scanner.projections
        self.flats = self.dataset_scanner.flats
        self.darks = self.dataset_scanner.darks
Pierre Paleo's avatar
Pierre Paleo committed
50
51
        self.n_angles = self.dataset_scanner.tomo_n
        self.radio_dims = (self.dataset_scanner.dim_1, self.dataset_scanner.dim_2)
52
        self._binning = (1, 1)
53
        self.translations = None
54
        self.axis_position = None
Pierre Paleo's avatar
Pierre Paleo committed
55
        self._radio_dims_notbinned = self.radio_dims
56
57
58

    @property
    def energy(self):
Pierre Paleo's avatar
Pierre Paleo committed
59
60
61
        """
        Return the energy in kev.
        """
Pierre Paleo's avatar
Pierre Paleo committed
62
        return self.dataset_scanner.energy
63
64
65

    @property
    def distance(self):
Pierre Paleo's avatar
Pierre Paleo committed
66
67
68
        """
        Return the sample-detector distance in meters.
        """
Pierre Paleo's avatar
Pierre Paleo committed
69
        return abs(self.dataset_scanner.distance)
70

Pierre Paleo's avatar
Pierre Paleo committed
71
72
73
74
75
    @property
    def pixel_size(self):
        """
        Return the pixel size in microns.
        """
Pierre Paleo's avatar
Pierre Paleo committed
76
        return self.dataset_scanner.pixel_size * 1e6 # TODO X and Y pixel size
77

78
79
80
81
82
83
84
    @property
    def binning(self):
        """
        Return the binning in (x, y)
        """
        return self._binning

Pierre Paleo's avatar
Pierre Paleo committed
85
86
87
88
89
90
91
    @property
    def rotation_angles(self):
        """
        Return the rotation angles in radians.
        """
        return self._get_rotation_angles()

92

93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
    def remove_unused_radios(self):
        """
        Remove "unused" radios.
        This is used for legacy ESRF scans.
        """
        # This should only be used with EDF datasets
        if self.dataset_scanner.type != "edf":
            return
        # Extraneous projections are assumed to be on the end
        projs_indices = sorted(self.projections.keys())
        used_radios_range = range(projs_indices[0], self.n_angles)
        radios_not_used = []
        for idx in self.projections.keys():
            if idx not in used_radios_range:
                radios_not_used.append(idx)
        for idx in radios_not_used:
            self.projections.pop(idx)
        return radios_not_used



114
115
116
117
class EDFDatasetAnalyzer(DatasetAnalyzer):
    """
    EDF Dataset analyzer for legacy ESRF acquisitions
    """
Pierre Paleo's avatar
Pierre Paleo committed
118
    def __init__(self, location, n_frames=1):
119
120
121
122
123
124
125
126
127
128
129
        """
        EDF Dataset analyzer.

        Parameters
        -----------
        location: str
            Location of the folder containing EDF files
        """
        super().__init__(location)
        if not(os.path.isdir(location)):
            raise ValueError("%s is not a directory" % location)
Pierre Paleo's avatar
Pierre Paleo committed
130
        self._init_dataset_scan("edf", n_frames=n_frames)
131

Pierre Paleo's avatar
Pierre Paleo committed
132
133
134
135
    def _get_rotation_angles(self):
        # Not available in EDF dataset
        return None

136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152

class HDF5DatasetAnalyzer(DatasetAnalyzer):
    """
    HDF5 dataset analyzer
    """
    def __init__(self, location):
        """
        HDF5 Dataset analyzer.

        Parameters
        -----------
        location: str
            Location of the HDF5 master file
        """
        super().__init__(location)
        if not(os.path.isfile(location)):
            raise ValueError("%s is not a file" % location)
Pierre Paleo's avatar
Pierre Paleo committed
153
        self._init_dataset_scan("hdf5")
154
        self._get_flats_darks()
Pierre Paleo's avatar
Pierre Paleo committed
155
        self._rot_angles = None
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180


    def _load_flats_from_tomwer(self, tomwer_processes_fname=None):
        tomwer_processes_fname = tomwer_processes_fname or "tomwer_processes.h5"
        tomwer_processes_file = os.path.join(self.dataset_scanner.path, "tomwer_processes.h5")
        if not(os.path.isfile(tomwer_processes_file)):
            raise NotImplementedError(
                "Flat-fielding with raw flats/darks is not implemented yet - Expected to find %s"
                % tomwer_processes_file
            )
        print("Loading darks and refs from %s" % tomwer_processes_file)
        new_flats = get_flats_frm_process_file(
            tomwer_processes_file, self.dataset_scanner.entry
        )
        new_darks = get_darks_frm_process_file(
            tomwer_processes_file, self.dataset_scanner.entry
        )
        self.flats = new_flats
        self.darks = new_darks


    def _get_flats_darks(self):
        if len(self.flats) > 2:
            self._load_flats_from_tomwer()

Pierre Paleo's avatar
Pierre Paleo committed
181

Pierre Paleo's avatar
Pierre Paleo committed
182
183
184
185
186
187
188
189
    def _get_rotation_angles(self):
        if self._rot_angles is None:
            angles = np.array(self.dataset_scanner.rotation_angle)
            projs_idx = np.array(list(self.projections.keys()))
            angles = angles[projs_idx]
            self._rot_angles = np.deg2rad(angles)
        return self._rot_angles

Pierre Paleo's avatar
Pierre Paleo committed
190
191
192
193
194


def analyze_dataset(dataset_path):
    if not(os.path.isdir(dataset_path)):
        if not(os.path.isfile(dataset_path)):
195
            raise ValueError("Error: %s no such file or directory" % dataset_path)
payno's avatar
payno committed
196
        if os.path.splitext(dataset_path)[-1] not in [".hdf5", ".h5", ".nx"]:
Pierre Paleo's avatar
Pierre Paleo committed
197
198
199
200
201
202
203
            raise ValueError("Error: expected a HDF5 file")
        dataset_analyzer_class = HDF5DatasetAnalyzer
    else: # directory -> assuming EDF
        dataset_analyzer_class = EDFDatasetAnalyzer
    dataset_structure = dataset_analyzer_class(dataset_path)
    return dataset_structure

204
205
206
207
208
209
210
211
212
213
214
215

class FlatFieldLoader:
    """
    A helper class to load flats and darks, or to compute the final ones.

    At ESRF, several darks/flats series are acquired during a scan.
    Each series undergoes a reduction (mean, median, ...) to get a "final" image.
    For example, there is one series of flats in the beginning, and one in the end ;
    thus there are two corresponding "final" flats.
    """

    def __init__(
Pierre Paleo's avatar
Pierre Paleo committed
216
        self, data_url, image_keys, image_type,
217
218
219
220
221
222
223
        reduction_method="mean", lookup_files=None, logger=None
    ):
        """
        Initialize a FlatFieldLoader helper.

        Parameters
        -----------
Pierre Paleo's avatar
Pierre Paleo committed
224
225
        data_url: silx.io.url.DataUrl
            A DataUrl object containing the URL to the volume data.
226
227
228
229
230
231
232
233
234
235
236
237
238
        image_keys: list of int
            List of keys corresponding to each image of the data volume.
            See Nexus Format specification, NXtomo, "image_key".
        image_type: str
            Which image type to load. Can be "flats" or "darks".
        reduction_method: str, optional
            Which reduction method to use. Can be "mean", "median" or "sum".
            Default is "mean".
        lookup_files: list of DataUrl, optional
            List of paths (DataUrl) to inspect to load existing "final" flats/darks.
        logger: Logger object, optional
            Logging object
        """
Pierre Paleo's avatar
Pierre Paleo committed
239
        self.data_url = data_url
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
        self.image_keys = image_keys
        self._set_image_type(image_type)
        self._set_reduction_method(reduction_method)
        self.lookup_files = lookup_files or []
        self.logger = LoggerOrPrint(logger)


    def _set_image_type(self, image_type):
        img_types = {
            "flats": "flats",
            "flat": "flats",
            ImageKey.FLAT_FIELD.value: "flats",
            "darks": "darks",
            "dark": "darks",
            ImageKey.DARK_FIELD.value: "darks",
        }
        check_supported(image_type, img_types, "Image type")
        self.image_type = img_types[image_type]


    def _set_reduction_method(self, reduction_method):
        red_methods = {
            "mean": np.mean,
            "median": np.median,
            "sum": np.sum
        }
        check_supported(reduction_method, red_methods, "reduction method")
        self.reduction_method = reduction_method
        self.reduction_function = red_methods[reduction_method]


    def browse_existing_flatfield_results(self):
        """
        Attempt at loading already computed "final" darks and flats from existing files.
        """
Pierre Paleo's avatar
Pierre Paleo committed
275
        basedir = os.path.dirname(self.data_url.file_path())
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
        existing_data = None
        for fpath in self.lookup_files:
            existing_data = self.load_existing_flatfield(fpath)
            if existing_data is not None:
                self.logger.info("Loaded %s from %s" % (self.image_type, fpath.file_path()))
                break
        if existing_data is None:
            self.logger.debug("%s not loaded from any file" % self.image_type)
        return existing_data


    def load_existing_flatfield(self, file_url):
        """
        Attempt at loading already computed "final" darks and flats from an existing file.

        Parameters
        -----------
        fname: DataUrl
            URL of the flat/dark to load.
        """
        results_path = os.path.join(file_url.data_path(), "results") # NXProcess
        res = {}
        try:
            with HDF5File(file_url.file_path(), "r", swmr=True) as f:
                results = f[results_path]
                for key, val in results[self.image_type].items():
                    res[key] = val[:]
        except Exception as exc:
            self.logger.error(
                "Could not load %s from %s: %s"
                % (self.image_type, file_url.file_path(), str(exc))
            )
            res = None
        return res


    @staticmethod
    def get_data_slices(image_keys, key):
        """
        Return indices in the data volume where images correspond to a certain key.

        Parameters
        ----------
        image_keys: list of int
            List of keys corresponding to each image of the data volume.
            See Nexus Format specification, NXtomo, "image_key".
        key: int
            Key to search in the `image_keys` list.

        Returns
        --------
        slices: list of slice
            A list where each item is a slice.
        """
        diffs = np.diff(image_keys == key)
        indices = np.arange(diffs.size)[diffs]
        assert indices.size % 2 == 0
        indices += 1
        slices = []
        for i in range(0, indices.size, 2):
            start_idx = indices[i]
            end_idx = indices[i+1]
            slices.append(
                slice(start_idx, end_idx)
            )
        return slices


    def get_data_chunk(self, data_slice):
        """
        Get a data chunk from the data volume whose path was specified at class instantiation.
        """
Pierre Paleo's avatar
Pierre Paleo committed
348
349
        with HDF5File(self.data_url.file_path(), "r", swmr=True) as f:
            data_raw = f[self.data_url.data_path()][data_slice]
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
        return data_raw


    def apply_reduction(self, volume):
        """
        Apply a reduction function on a data volume.
        The data is assumed to be three dimensional, a "stack of images".

        Parameters
        -----------
        volume: numpy.ndarray
            Data volume
        """
        return self.reduction_function(volume, axis=0)


    def get_final_images(self):
        """
        Main function of this class.

        This function first attempts at loading already-computed flat/dark.
        If nothing is found, then the final flats/darks are computed from raw ones.

        Returns
        -------
        imgs: dict
            Dictionary where the key is the index of the final flats/darks,
            and the values are the corresponding reduced data.
        """
        existing_imgs = self.browse_existing_flatfield_results()
        if existing_imgs is not None:
            return existing_imgs
        img_slices = self.get_data_slices(self.image_keys, val_to_nxkey[self.image_type])
        if img_slices == []:
Pierre Paleo's avatar
Pierre Paleo committed
384
            self.logger.error("No %s found in %s" % (self.image_type, self.data_url.file_path()))
385
386
387
388
389
390
391
392
393
            return None
        res = {}
        for data_slice in img_slices:
            data_chunk = self.get_data_chunk(data_slice)
            img = self.apply_reduction(data_chunk)
            res[data_slice.start] = img
        return res