dataset_analyzer.py 8.78 KB
Newer Older
Pierre Paleo's avatar
Pierre Paleo committed
1
import os
Pierre Paleo's avatar
Pierre Paleo committed
2
import numpy as np
3
from silx.io.url import DataUrl
4
from tomoscan.esrf.edfscan import EDFTomoScan
Pierre Paleo's avatar
Pierre Paleo committed
5
from tomoscan.esrf.hdf5scan import HDF5TomoScan
6
from ..thirdparty.tomwer_load_flats_darks import get_flats_frm_process_file, get_darks_frm_process_file
7
from .nxflatfield import NXFlatField
8
from .utils import is_hdf5_extension
Pierre Paleo's avatar
Pierre Paleo committed
9

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
dataset_infos = {
    "num_radios": None,
    "num_darks": None,
    "num_flats": None,
    "radios": None,
    "darks": None,
    "flats": None,
    "frame_dims": None,
    "energy_kev": None,
    "distance_m": None,
    "pixel_size_microns": None,
}


class DatasetAnalyzer(object):
    """
    Base class for datasets analyzers.
    """
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
    def __init__(self, location, processes_file=None, extra_options=None):
        """
        Initialize a Dataset analyzer.

        Parameters
        ----------
        location: str
            Dataset location (directory or file name)
        processes_file: str, optional
            Processes file providing supplementary information (ex. pre-processed data)
        extra_options: dict, optional
            Extra options on how to interpret the dataset.
            Available options are the following:
              - force_flatfield
        """
43
        self.location = location
44
45
46
47
48
49
50
51
52
53
54
55
        self.processes_file = processes_file
        self._set_extra_options(extra_options)


    def _set_extra_options(self, extra_options):
        if extra_options is None:
            extra_options = {}
        advanced_options = {
            "force_flatfield": False,
        }
        advanced_options.update(extra_options)
        self.extra_options = advanced_options
56
57
58
59
60
61
62
63
64
65
66


    def _init_dataset_scan(self, filetype, **kwargs):
        scanners = {
            "edf": EDFTomoScan,
            "hdf5": HDF5TomoScan,
        }
        if filetype not in scanners.keys():
            raise ValueError("No scanner for file type: %s" % filetype)
        scanner = scanners[filetype]
        self.dataset_scanner = scanner(self.location, **kwargs)
Pierre Paleo's avatar
Pierre Paleo committed
67
68
69
        self.projections = self.dataset_scanner.projections
        self.flats = self.dataset_scanner.flats
        self.darks = self.dataset_scanner.darks
Pierre Paleo's avatar
Pierre Paleo committed
70
71
        self.n_angles = self.dataset_scanner.tomo_n
        self.radio_dims = (self.dataset_scanner.dim_1, self.dataset_scanner.dim_2)
72
        self._binning = (1, 1)
73
        self.translations = None
74
        self.axis_position = None
Pierre Paleo's avatar
Pierre Paleo committed
75
        self._radio_dims_notbinned = self.radio_dims
76
77
78

    @property
    def energy(self):
Pierre Paleo's avatar
Pierre Paleo committed
79
80
81
        """
        Return the energy in kev.
        """
Pierre Paleo's avatar
Pierre Paleo committed
82
        return self.dataset_scanner.energy
83
84
85

    @property
    def distance(self):
Pierre Paleo's avatar
Pierre Paleo committed
86
87
88
        """
        Return the sample-detector distance in meters.
        """
Pierre Paleo's avatar
Pierre Paleo committed
89
        return abs(self.dataset_scanner.distance)
90

Pierre Paleo's avatar
Pierre Paleo committed
91
92
93
94
95
    @property
    def pixel_size(self):
        """
        Return the pixel size in microns.
        """
Pierre Paleo's avatar
Pierre Paleo committed
96
        return self.dataset_scanner.pixel_size * 1e6 # TODO X and Y pixel size
97

98
99
100
101
102
103
104
    @property
    def binning(self):
        """
        Return the binning in (x, y)
        """
        return self._binning

Pierre Paleo's avatar
Pierre Paleo committed
105
106
107
108
109
110
111
    @property
    def rotation_angles(self):
        """
        Return the rotation angles in radians.
        """
        return self._get_rotation_angles()

112

113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
    def remove_unused_radios(self):
        """
        Remove "unused" radios.
        This is used for legacy ESRF scans.
        """
        # This should only be used with EDF datasets
        if self.dataset_scanner.type != "edf":
            return
        # Extraneous projections are assumed to be on the end
        projs_indices = sorted(self.projections.keys())
        used_radios_range = range(projs_indices[0], self.n_angles)
        radios_not_used = []
        for idx in self.projections.keys():
            if idx not in used_radios_range:
                radios_not_used.append(idx)
        for idx in radios_not_used:
            self.projections.pop(idx)
        return radios_not_used


133
134
135
136
class EDFDatasetAnalyzer(DatasetAnalyzer):
    """
    EDF Dataset analyzer for legacy ESRF acquisitions
    """
137
    def __init__(self, location, n_frames=1, processes_file=None, extra_options=None):
138
139
140
141
142
143
144
145
        """
        EDF Dataset analyzer.

        Parameters
        -----------
        location: str
            Location of the folder containing EDF files
        """
146
        super().__init__(location, processes_file=processes_file, extra_options=extra_options)
147
148
        if not(os.path.isdir(location)):
            raise ValueError("%s is not a directory" % location)
Pierre Paleo's avatar
Pierre Paleo committed
149
        self._init_dataset_scan("edf", n_frames=n_frames)
150

Pierre Paleo's avatar
Pierre Paleo committed
151
152
153
154
    def _get_rotation_angles(self):
        # Not available in EDF dataset
        return None

155
156
157
158
159

class HDF5DatasetAnalyzer(DatasetAnalyzer):
    """
    HDF5 dataset analyzer
    """
160
    def __init__(self, location, processes_file=None, extra_options=None):
161
162
163
164
165
166
167
168
        """
        HDF5 Dataset analyzer.

        Parameters
        -----------
        location: str
            Location of the HDF5 master file
        """
169
        super().__init__(location, processes_file=processes_file, extra_options=extra_options)
170
171
        if not(os.path.isfile(location)):
            raise ValueError("%s is not a file" % location)
Pierre Paleo's avatar
Pierre Paleo committed
172
        self._init_dataset_scan("hdf5")
173
        self._get_flats_darks()
Pierre Paleo's avatar
Pierre Paleo committed
174
        self._rot_angles = None
175
176


177
    def _get_flats_darks(self):
178
        if len(self.flats) == 0 and not(self.extra_options["force_flatfield"]):
179
180
            # No flats at all in the dataset. Do nothing.
            return
181
        if self.processes_file is None and self._load_flats_from_tomwer():
182
183
184
185
186
187
            # Loaded from tomwer_processes.h5
            return
        # Otherwise load or compute flats/darks with nabu
        self._compute_or_load_flats()


188
189
190
191
    def _load_flats_from_tomwer(self, tomwer_processes_fname=None):
        tomwer_processes_fname = tomwer_processes_fname or "tomwer_processes.h5"
        tomwer_processes_file = os.path.join(self.dataset_scanner.path, "tomwer_processes.h5")
        if not(os.path.isfile(tomwer_processes_file)):
192
            return False
193
194
195
196
197
198
199
200
201
        print("Loading darks and refs from %s" % tomwer_processes_file)
        new_flats = get_flats_frm_process_file(
            tomwer_processes_file, self.dataset_scanner.entry
        )
        new_darks = get_darks_frm_process_file(
            tomwer_processes_file, self.dataset_scanner.entry
        )
        self.flats = new_flats
        self.darks = new_darks
202
203
204
205
        return True


    def _compute_or_load_flats(self):
206
207
208
        processes_file = self.processes_file
        if processes_file is None:
            processes_file = os.path.join(self.dataset_scanner.path, "nabu_processes.h5")
209
210
211
212
213
        # By default, write in processes_file. Not sure it is a good idea
        results_file = processes_file
        # Don't write in processes_file if flatfield = forced
        if self.extra_options["force_flatfield"]:
            results_file = None
214
        lookup_files = [
215
216
217
218
            DataUrl(
                file_path=processes_file,
                data_path=os.path.join(self.dataset_scanner.entry, "flat_field_images")
            )
219
220
221
222
223
        ]
        self._nxflatfield = NXFlatField(
            self._get_dataset_hdf5_url(),
            self.dataset_scanner.image_key,
            lookup_files=lookup_files,
Pierre Paleo's avatar
Pierre Paleo committed
224
225
            results_file=processes_file, # overwrite processes file
            flats_reduction="median",
226
227
            darks_reduction="mean"
        )
228
        res = self._nxflatfield.get_final_urls()
229
230
        self.flats = res["flats"]
        self.darks = res["darks"]
231
232


Pierre Paleo's avatar
Pierre Paleo committed
233
234
235
236
237
238
239
240
    def _get_rotation_angles(self):
        if self._rot_angles is None:
            angles = np.array(self.dataset_scanner.rotation_angle)
            projs_idx = np.array(list(self.projections.keys()))
            angles = angles[projs_idx]
            self._rot_angles = np.deg2rad(angles)
        return self._rot_angles

Pierre Paleo's avatar
Pierre Paleo committed
241

242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
    def _get_dataset_hdf5_url(self):
        first_proj_idx = sorted(self.projections.keys())[0]
        first_proj_url = self.projections[first_proj_idx]
        return DataUrl(
            file_path=first_proj_url.file_path(),
            data_path=first_proj_url.data_path(),
            data_slice=None
        )


    @property
    def dataset_hdf5_url(self):
        return self._get_dataset_hdf5_url()


257
def analyze_dataset(dataset_path, processes_file=None, extra_options=None):
Pierre Paleo's avatar
Pierre Paleo committed
258
259
    if not(os.path.isdir(dataset_path)):
        if not(os.path.isfile(dataset_path)):
260
            raise ValueError("Error: %s no such file or directory" % dataset_path)
261
        if not(is_hdf5_extension(os.path.splitext(dataset_path)[-1].replace(".", ""))):
Pierre Paleo's avatar
Pierre Paleo committed
262
263
264
265
            raise ValueError("Error: expected a HDF5 file")
        dataset_analyzer_class = HDF5DatasetAnalyzer
    else: # directory -> assuming EDF
        dataset_analyzer_class = EDFDatasetAnalyzer
266
267
268
269
    dataset_structure = dataset_analyzer_class(
        dataset_path, processes_file=processes_file, extra_options=extra_options
    )
    return dataset_structure
Pierre Paleo's avatar
Pierre Paleo committed
270