dataset_analyzer.py 17 KB
Newer Older
Pierre Paleo's avatar
Pierre Paleo committed
1
import os
2
3
import posixpath
from tempfile import mkdtemp
Pierre Paleo's avatar
Pierre Paleo committed
4
import numpy as np
5
from silx.io import get_data
6
from silx.io.url import DataUrl
7
from tomoscan.esrf.edfscan import EDFTomoScan
Pierre Paleo's avatar
Pierre Paleo committed
8
from tomoscan.esrf.hdf5scan import HDF5TomoScan
9
from ..thirdparty.tomwer_load_flats_darks import get_flats_frm_process_file, get_darks_frm_process_file
10
from .nxflatfield import NXFlatField
11
from ..utils import is_writeable
12
from .utils import is_hdf5_extension, get_values_from_file
Pierre Paleo's avatar
Pierre Paleo committed
13
14
from .logger import LoggerOrPrint

Pierre Paleo's avatar
Pierre Paleo committed
15

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
dataset_infos = {
    "num_radios": None,
    "num_darks": None,
    "num_flats": None,
    "radios": None,
    "darks": None,
    "flats": None,
    "frame_dims": None,
    "energy_kev": None,
    "distance_m": None,
    "pixel_size_microns": None,
}


class DatasetAnalyzer(object):
    """
    Base class for datasets analyzers.
    """
Pierre Paleo's avatar
Pierre Paleo committed
34
    def __init__(self, location, processes_file=None, extra_options=None, logger=None):
35
36
37
38
39
40
41
42
43
44
45
46
47
        """
        Initialize a Dataset analyzer.

        Parameters
        ----------
        location: str
            Dataset location (directory or file name)
        processes_file: str, optional
            Processes file providing supplementary information (ex. pre-processed data)
        extra_options: dict, optional
            Extra options on how to interpret the dataset.
            Available options are the following:
              - force_flatfield
48
              - output_dir
Pierre Paleo's avatar
Pierre Paleo committed
49
50
        logger: logging object, optional
            Logger. If not set, messages will just be printed in stdout.
51
        """
Pierre Paleo's avatar
Pierre Paleo committed
52
        self.logger = LoggerOrPrint(logger)
53
        self.location = location
54
55
        self.processes_file = processes_file
        self._set_extra_options(extra_options)
56
        self._get_excluded_projections()
57
        self._set_other_default_dataset_values()
58
59
60
61
62
63
64


    def _set_extra_options(self, extra_options):
        if extra_options is None:
            extra_options = {}
        advanced_options = {
            "force_flatfield": False,
65
            "output_dir": None,
66
            "exclude_projections": None,
67
            "hdf5_entry": None,
68
69
70
        }
        advanced_options.update(extra_options)
        self.extra_options = advanced_options
71

72
73
74
75
76
77
78
79
    def _get_excluded_projections(self):
        excluded_projs = self.extra_options["exclude_projections"]
        if excluded_projs is None:
            return
        projs_idx = get_values_from_file(excluded_projs, any_size=True).astype(np.int32).tolist()
        self.logger.info("Ignoring projections: %s" % (str(projs_idx)))
        self.extra_options["exclude_projections"] = projs_idx

80
81
82
83
84
85
86
87

    def _init_dataset_scan(self, filetype, **kwargs):
        scanners = {
            "edf": EDFTomoScan,
            "hdf5": HDF5TomoScan,
        }
        if filetype not in scanners.keys():
            raise ValueError("No scanner for file type: %s" % filetype)
88
89
        if filetype == "hdf5" and self.extra_options["hdf5_entry"] is not None:
            kwargs["entry"] = self.extra_options["hdf5_entry"]
90
        scanner = scanners[filetype]
91
92
93
94
95
        self.dataset_scanner = scanner(
            self.location,
            ignore_projections=self.extra_options["exclude_projections"],
            **kwargs
        )
Pierre Paleo's avatar
Pierre Paleo committed
96
97
98
        self.projections = self.dataset_scanner.projections
        self.flats = self.dataset_scanner.flats
        self.darks = self.dataset_scanner.darks
99
        self.n_angles = len(self.dataset_scanner.projections)
Pierre Paleo's avatar
Pierre Paleo committed
100
        self.radio_dims = (self.dataset_scanner.dim_1, self.dataset_scanner.dim_2)
101
        self._binning = (1, 1)
102
        self.translations = None
Pierre Paleo's avatar
Pierre Paleo committed
103
        self.ctf_translations = None
104
        self.axis_position = None
Pierre Paleo's avatar
Pierre Paleo committed
105
        self._radio_dims_notbinned = self.radio_dims
106

107
108
109
110
111

    def _set_other_default_dataset_values(self):
        self._detector_tilt = None


112
113
114
115
    def _get_tomwer_process_file_name(self):
        raise NotImplementedError("Base class")


116
117
    @property
    def energy(self):
Pierre Paleo's avatar
Pierre Paleo committed
118
119
120
        """
        Return the energy in kev.
        """
Pierre Paleo's avatar
Pierre Paleo committed
121
        return self.dataset_scanner.energy
122
123
124

    @property
    def distance(self):
Pierre Paleo's avatar
Pierre Paleo committed
125
126
127
        """
        Return the sample-detector distance in meters.
        """
Pierre Paleo's avatar
Pierre Paleo committed
128
        return abs(self.dataset_scanner.distance)
129

Pierre Paleo's avatar
Pierre Paleo committed
130
131
132
133
134
    @property
    def pixel_size(self):
        """
        Return the pixel size in microns.
        """
Pierre Paleo's avatar
Pierre Paleo committed
135
        raise ValueError("Must be implemented by inheriting class")
136

137
138
139
140
141
142
143
    @property
    def binning(self):
        """
        Return the binning in (x, y)
        """
        return self._binning

Pierre Paleo's avatar
Pierre Paleo committed
144
145
146
147
148
149
150
    @property
    def rotation_angles(self):
        """
        Return the rotation angles in radians.
        """
        return self._get_rotation_angles()

151

152
153
154
155
156
157
158
159
    @property
    def is_halftomo(self):
        """
        Indicates whether the current dataset was performed with half acquisition.
        """
        return self._is_halftomo()


160
161
162
163
164
165
166
167
168
169
    def remove_unused_radios(self):
        """
        Remove "unused" radios.
        This is used for legacy ESRF scans.
        """
        # This should only be used with EDF datasets
        if self.dataset_scanner.type != "edf":
            return
        # Extraneous projections are assumed to be on the end
        projs_indices = sorted(self.projections.keys())
170
        used_radios_range = range(projs_indices[0], len(self.projections))
171
172
173
174
175
176
177
178
179
        radios_not_used = []
        for idx in self.projections.keys():
            if idx not in used_radios_range:
                radios_not_used.append(idx)
        for idx in radios_not_used:
            self.projections.pop(idx)
        return radios_not_used


180
181
182
183
184
185
186
187
188
189
190
191
192
    @property
    def detector_tilt(self):
        """
        Return the detector tilt in degrees
        """
        return self._detector_tilt


    @detector_tilt.setter
    def detector_tilt(self, tilt):
        self._detector_tilt = tilt


193
194
195
196
class EDFDatasetAnalyzer(DatasetAnalyzer):
    """
    EDF Dataset analyzer for legacy ESRF acquisitions
    """
Pierre Paleo's avatar
Pierre Paleo committed
197
    def __init__(self, location, n_frames=1, processes_file=None, extra_options=None, logger=None):
198
199
200
201
202
203
204
205
        """
        EDF Dataset analyzer.

        Parameters
        -----------
        location: str
            Location of the folder containing EDF files
        """
Pierre Paleo's avatar
Pierre Paleo committed
206
207
208
        super().__init__(
            location, processes_file=processes_file, extra_options=extra_options, logger=logger
        )
209
210
        if not(os.path.isdir(location)):
            raise ValueError("%s is not a directory" % location)
Pierre Paleo's avatar
Pierre Paleo committed
211
        self._init_dataset_scan("edf", n_frames=n_frames)
212

Pierre Paleo's avatar
Pierre Paleo committed
213
214
215
216
    def _get_rotation_angles(self):
        # Not available in EDF dataset
        return None

217
218
219
220
221
222
223
224
    def _get_tomwer_process_file_name(self):
        if self.dataset_scanner.path is not None:
            basename = os.path.basename(self.dataset_scanner.path)
            basename = "_".join((basename, "tomwer_processes.h5"))
            return os.path.join(self.dataset_scanner.path, basename)
        else:
            return None

Pierre Paleo's avatar
Pierre Paleo committed
225
226
227
228
229
230
231
232
233
    @property
    def pixel_size(self):
        """
        Return the pixel size in microns.
        """
        # TODO X and Y pixel size
        return self.dataset_scanner.pixel_size * 1e6


234
235
236
237
238
239
240
241
242
    @property
    def hdf5_entry(self):
        """
        Return the HDF5 entry of the current dataset.
        Not applicable for EDF (return None)
        """
        return None


243
244
245
246
    def _is_halftomo(self):
        return None


247
248
249
250
251

class HDF5DatasetAnalyzer(DatasetAnalyzer):
    """
    HDF5 dataset analyzer
    """
Pierre Paleo's avatar
Pierre Paleo committed
252
    def __init__(self, location, processes_file=None, extra_options=None, logger=None):
253
254
255
256
257
258
259
260
        """
        HDF5 Dataset analyzer.

        Parameters
        -----------
        location: str
            Location of the HDF5 master file
        """
Pierre Paleo's avatar
Pierre Paleo committed
261
262
263
        super().__init__(
            location, processes_file=processes_file, extra_options=extra_options, logger=logger
        )
264
265
        if not(os.path.isfile(location)):
            raise ValueError("%s is not a file" % location)
Pierre Paleo's avatar
Pierre Paleo committed
266
        self._init_dataset_scan("hdf5")
267
        self._get_flats_darks()
Pierre Paleo's avatar
Pierre Paleo committed
268
        self._rot_angles = None
269
270


271
    def _get_flats_darks(self):
272
        if len(self.flats) == 0 and not(self.extra_options["force_flatfield"]):
273
274
            # No flats at all in the dataset. Do nothing.
            return
275
        if self.processes_file is None and self._load_flats_from_tomwer():
276
            # Loaded from [XXX]_tomwer_processes.h5
277
278
279
280
281
            return
        # Otherwise load or compute flats/darks with nabu
        self._compute_or_load_flats()


282
283
284
285
286
287
288
289
    def _load_flats_from_tomwer(self):
        tomwer_processes_file = self._get_tomwer_process_file_name()
        if tomwer_processes_file is None or not(os.path.isfile(tomwer_processes_file)):
            # if not found try with the old one
            tomwer_processes_file = os.path.join(self.dataset_scanner.path,
                                                 "tomwer_processes.h5")
        if tomwer_processes_file is None or not (os.path.isfile(tomwer_processes_file)):
                return False
290
291
292
293
294
295
        new_flats = get_flats_frm_process_file(
            tomwer_processes_file, self.dataset_scanner.entry
        )
        new_darks = get_darks_frm_process_file(
            tomwer_processes_file, self.dataset_scanner.entry
        )
296
297
298
        if not (len(new_flats) > 0 and len(new_darks) > 0):
            return False
        self.logger.info("Loading darks and refs from %s" % tomwer_processes_file)
299
300
        self.flats = new_flats
        self.darks = new_darks
301
302
303
        return True


304
305
306
307
308
309
310
311
312
313
    def _get_tomwer_process_file_name(self, scan):
        if scan.path is not None:
            basename, _ = os.path.splitext(scan.master_file)
            basename = os.path.basename(basename)
            basename = "_".join((basename, "tomwer_processes.h5"))
            return os.path.join(scan.path, basename)
        else:
            return None


314
    def _get_results_file(self):
315
316
317
318
319
        if self.processes_file not in (None, ""):
            if is_writeable(self.processes_file):
                return self.processes_file
            else:
                self.logger.error("Cannot write to %s" % self.processes_file)
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
        results_relfname = os.path.splitext(
            os.path.basename(self.dataset_scanner.master_file)
        )[0] + "_nabu_processes.hdf5"
        # Attempt 1: write in the same dir as the master NX file
        results_dir = os.path.dirname(self.dataset_scanner.master_file)
        if is_writeable(results_dir):
            return os.path.join(results_dir, results_relfname)
        # Attempt 2: write in the "output" dir, if specified
        out_dir = self.extra_options["output_dir"]
        if out_dir is not None and is_writeable(out_dir):
            return os.path.join(out_dir, results_relfname)
        # Last attempt: write in a temporary directory
        tempdir = mkdtemp(prefix="nabu_flatfield_")
        return os.path.join(tempdir, results_relfname)


336
    def _compute_or_load_flats(self):
Pierre Paleo's avatar
Pierre Paleo committed
337
        h5_entry = self.hdf5_entry or "entry"
338
339
340
341
342
343
344
345
        h5_path = posixpath.join(h5_entry, "flat_field_images")
        # In this case, we always want to return a dict of DataUrl,
        # so we have to write the final flats/darks somewhere.
        # Therefore results_url cannot be None when instantiating NXFlatField.
        # It will be ignored if data can be loaded from self.processes_file
        results_file = self._get_results_file()
        results_url = DataUrl(file_path=results_file, data_path=h5_path)

346
347
348
349
350
351
352
353
354
        lookup_files = None
        if self.processes_file is not None:
            lookup_files = [DataUrl(file_path=self.processes_file, data_path=h5_path)]
        else:
            if os.path.isfile(results_file):
                # We use an existing "results file"  as lookup file.
                # NXFlatField wont overwrite it if it has the good configuration
                lookup_files = [DataUrl(file_path=results_file, data_path=h5_path)]

355
356
357
358
        self._nxflatfield = NXFlatField(
            self._get_dataset_hdf5_url(),
            self.dataset_scanner.image_key,
            lookup_files=lookup_files,
359
            results_url=results_url,
360
            force_load_existing_results=self.extra_options["force_flatfield"],
Pierre Paleo's avatar
Pierre Paleo committed
361
            flats_reduction="median",
Pierre Paleo's avatar
Pierre Paleo committed
362
363
            darks_reduction="mean",
            logger=self.logger
364
        )
365
        res = self._nxflatfield.get_final_urls()
366
367
        self.flats = res["flats"]
        self.darks = res["darks"]
368
369


370
371
372
373
374
375
376
377
378
379
    def _get_tomwer_process_file_name(self):
        if self.dataset_scanner.path is not None:
            basename, _ = os.path.splitext(self.dataset_scanner.master_file)
            basename = os.path.basename(basename)
            basename = "_".join((basename, "tomwer_processes.h5"))
            return os.path.join(self.dataset_scanner.path, basename)
        else:
            return None


Pierre Paleo's avatar
Pierre Paleo committed
380
381
382
383
384
385
386
387
    def _get_rotation_angles(self):
        if self._rot_angles is None:
            angles = np.array(self.dataset_scanner.rotation_angle)
            projs_idx = np.array(list(self.projections.keys()))
            angles = angles[projs_idx]
            self._rot_angles = np.deg2rad(angles)
        return self._rot_angles

Pierre Paleo's avatar
Pierre Paleo committed
388

389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
    def _get_dataset_hdf5_url(self):
        first_proj_idx = sorted(self.projections.keys())[0]
        first_proj_url = self.projections[first_proj_idx]
        return DataUrl(
            file_path=first_proj_url.file_path(),
            data_path=first_proj_url.data_path(),
            data_slice=None
        )


    @property
    def dataset_hdf5_url(self):
        return self._get_dataset_hdf5_url()


Pierre Paleo's avatar
Pierre Paleo committed
404
405
406
407
408
409
410
411
412
    @property
    def pixel_size(self):
        """
        Return the pixel size in microns.
        """
        # TODO X and Y pixel size
        try:
            xs, yz = self.dataset_scanner._get_x_y_magnified_pixel_values()
            ps = xs * 1e6
413
        except:
Pierre Paleo's avatar
Pierre Paleo committed
414
415
416
417
            ps = self.dataset_scanner.pixel_size * 1e6
        return ps


418
419
420
421
422
423
424
425
    @property
    def hdf5_entry(self):
        """
        Return the HDF5 entry of the current dataset
        """
        return self.dataset_scanner.entry


426
427
428
429
430
431
    def _is_halftomo(self):
        try:
            is_halftomo = self.dataset_scanner.field_of_view.value.lower() == "half"
        except:
            is_halftomo = None
        return is_halftomo
Pierre Paleo's avatar
Pierre Paleo committed
432
433


Pierre Paleo's avatar
Pierre Paleo committed
434
def analyze_dataset(dataset_path, processes_file=None, extra_options=None, logger=None):
Pierre Paleo's avatar
Pierre Paleo committed
435
436
    if not(os.path.isdir(dataset_path)):
        if not(os.path.isfile(dataset_path)):
437
            raise ValueError("Error: %s no such file or directory" % dataset_path)
438
        if not(is_hdf5_extension(os.path.splitext(dataset_path)[-1].replace(".", ""))):
Pierre Paleo's avatar
Pierre Paleo committed
439
440
441
442
            raise ValueError("Error: expected a HDF5 file")
        dataset_analyzer_class = HDF5DatasetAnalyzer
    else: # directory -> assuming EDF
        dataset_analyzer_class = EDFDatasetAnalyzer
443
    dataset_structure = dataset_analyzer_class(
Pierre Paleo's avatar
Pierre Paleo committed
444
445
446
447
        dataset_path,
        processes_file=processes_file,
        extra_options=extra_options,
        logger=logger
448
449
    )
    return dataset_structure
Pierre Paleo's avatar
Pierre Paleo committed
450

451
452


453
def get_0_180_radios(dataset_info, angles=None, return_indices=False):
454
455
456
457
458
459
460
461
462
    """
    Get the radios at 0 degres and 180 degrees.

    Parameters
    ----------
    dataset_info: `dataset_infos`
        Data structure with the dataset information
    angles: array, optional
        Array with the rotation angles. If provided, it overwrites the information from 'dataset_info', if any.
463
464
465
466
467
468
469
470
    return_indices: bool, optional
        Whether to return radios indices along with the radios array.

    Returns
    -------
    res: array or tuple
        If return_indices is True, return a tuple (radios, indices).
        Otherwise, return an array with the radios.
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
    """

    if angles is None:
        angles = dataset_info.rotation_angles
    radios_indices = []
    radios_indices = sorted(dataset_info.projections.keys())
    # Take angles 0 and 180 degrees. It might not work of there is an offset
    i_0 = np.argmin(np.abs(angles))
    i_180 = np.argmin(np.abs(angles - np.pi))
    _min_indices = [i_0, i_180]
    radios_indices = [
        radios_indices[i_0],
        radios_indices[i_180]
    ]
    n_radios = 2
    radios = np.zeros((n_radios, ) + dataset_info.radio_dims[::-1], "f")
    for i in range(n_radios):
        radio_idx = radios_indices[i]
        radios[i] = get_data(dataset_info.projections[radio_idx]).astype("f")
490
491
492
493
    if return_indices:
        return radios, radios_indices
    else:
        return radios
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529


def get_angles(dataset_info, logger=None):
    """
    Get rotation angles (in radians) from a dataset_info data structure.
    If no such information is available, generates an array with standard angles.

    Parameters
    ----------
    dataset_info: `dataset_infos`
        Data structure with the dataset information
    logger: Logger
        Logger object
    """
    logger = LoggerOrPrint(logger)
    dataset_angles = dataset_info.rotation_angles
    if dataset_angles is not None:
        return dataset_angles
    theta_min, theta_max = 0, np.pi
    msg = "no information on angles was found for this dataset. Using default range "
    endpoint = False
    if dataset_info.is_halftomo:
        theta_max *= 2
        endpoint = True
        msg += "[0, 360]"
    else:
        msg += "[0, 180["
    logger.warning(msg)
    angles = np.linspace(
        theta_min, theta_max, len(dataset_info.projections),
        endpoint=endpoint
    )
    return angles