dataset.py 20.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# coding: utf-8
# /*##########################################################################
#
# Copyright (c) 2016-2017 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# ###########################################################################*/


__authors__ = ["J. Garriga"]
__license__ = "MIT"
29
__date__ = "16/12/2019"
30

31
import copy
32
import numpy
33
import logging
34
import cv2
35
36
37

import fabio

38
from silx.io import utils
39
from silx.io import fabioh5
40
from silx.io.url import DataUrl
41
from silx.gui import qt
42

43
44
45
46
47
48
49
50
51
52
53
_logger = logging.getLogger(__file__)

DEFAULT_METADATA = fabioh5.FabioReader.DEFAULT

COUNTER_METADATA = fabioh5.FabioReader.COUNTER

POSITIONER_METADATA = fabioh5.FabioReader.POSITIONER

_METADATA_TYPES = {'default': DEFAULT_METADATA,
                   'counter': COUNTER_METADATA,
                   'positioner': POSITIONER_METADATA}
54

55
56
57
58
59
60
_METADATA_TYPES_I = {}
"""used to retrieve the metadata name (str) for the silx.io.fabioh5 id"""
for key, value in _METADATA_TYPES.items():
    assert value not in _METADATA_TYPES_I
    _METADATA_TYPES_I[value] = key

61

62
class Dataset(qt.QObject):
63
    """Class to define a dataset from a series of raw data and dark data.
64

65
66
    :param raw_filename: first filename of raw data to iterate from.
    :param dark_filename: first filename of dark data to iterate from.
Henri Payno's avatar
Henri Payno committed
67
68
69
70
71
72
    :param Union[Generator,Iterator,List] filenames: Ordered list of filenames
        to process as a file series.
    :param Union[List,numpy.ndarray] data: list or numpy array of the dataset
        data.
    :param Union[List,numpy.ndarray] dark_data: list or numpy array of the
        dataset dark data.
73
    :param Union[Bool, None] load_data: If False, the data is not computed
74
75
        after the creation of the file_series. It waits until the load_data()
        method is called.
76
    """
77
78
    signalProgress = qt.Signal(int)

79
    def __init__(self, raw_filename=None, dark_filename=None, filenames=None,
80
                 filter_data=False, load_data=True):
81
        qt.QObject.__init__(self)
82

83
        self._data = None
84
        self._reshaped_data = None
85
        self._filter_data = filter_data
86
        self.dark_frames = []
87
        self.metadata = []
88
        self.raw_filename = raw_filename
89
        self.dark_filename = dark_filename
90
        self.filenames = filenames
91
        self.__dims = AcquisitionDims()
92
        self._dimensions_values = {}
93

Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
94
        # Initialize data
95
96
        self._file_series = fabio.open_series(first_filename=raw_filename,
                                              filenames=filenames)
97

98
99
        if load_data:
            self.load_data()
100

Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
101
        if dark_filename is not None:
102
103
104

            with fabio.open_series(first_filename=dark_filename) as series:
                for dark_frame in series.frames():
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
105
                    # TODO: save only data, headers??
106
                    self.dark_frames.append(dark_frame.data)
107
        self.dark_frames = numpy.array(self.dark_frames)
108

109
    def load_data(self, percentage=1):
110
111
112
113
        """
        Function that saves the data of the frames into DataUrls and the metadata
        into fabio Readers.
        """
114
        data_urls = []
115
116
117
118
        self.metadata = []

        for iFrame in numpy.arange(start=0, stop=self._file_series.nframes):
            frame = self._file_series.getframe(iFrame).file_container
119
120
            data_urls.append(DataUrl(file_path=frame.filename,
                                     scheme='fabio').path())
121
            self.metadata.append(fabioh5.EdfFabioReader(fabio_image=frame))
122
            self.signalProgress.emit(int(percentage * iFrame / self._file_series.nframes * 100))
123

124
125
        self.data_urls = numpy.array(data_urls)

126
    def compute_intensity_threshold(self, percentage=1, start=0):
127
        """
128
        Function that computes the data from the set of urls.
129
130
        If the filter_data flag is activated it filters the data following the next:
        -- First, it computes the intensity for each frame, by calculating the variance after
131
        passing a gaussian filter.
132
133
        -- Second, computes the histogram of the intensity.
        -- Finally, saves the data of the frames with an intensity bigger than a threshold.
Julia Garriga Ferrer's avatar
Docs    
Julia Garriga Ferrer committed
134
        The threshold is set to be the second bin of the histogram.
135
        """
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
136
        # TODO: enter the number of bins per parameter??
137
        intensity = []
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
138
        for i, frame in enumerate(self.data):
139
            intensity += [cv2.GaussianBlur(frame, (3, 3), 20).var()]
140
            self.signalProgress.emit(int(start + percentage * i / len(self.data) * 100))
141
        values, bins = numpy.histogram(intensity, int(self._data.shape[0]))
142
        self.threshold = numpy.array(intensity) >= bins[1]
143

144
145
    @property
    def data(self):
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
146
147
148
149
150
151
        """
        If data has not been computed, it reads the data from the urls.
        If flag for filter data is activated, computes the threshold.

        :returns: numpy.ndarray
        """
152
        if self._data is None:
153
154
155
156
            data = []
            for i, url in enumerate(self.data_urls):
                data += [utils.get_data(url)]
            self._data = numpy.array(data)
157
            if self._filter_data:
158
                self.compute_intensity_threshold()
159

160
        return self._data
161

162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
    def get_data(self, percentage=1, start=0):
        """
        If data has not been computed, it reads the data from the urls.
        If flag for filter data is activated, computes the threshold.

        :returns: numpy.ndarray
        """
        if self._data is None:
            data = []
            if self._filter_data:
                percentage /= 2
            for i, url in enumerate(self.data_urls):
                data += [utils.get_data(url)]
                self.signalProgress.emit(int(start + ((percentage * i) / len(self.data_urls) * 100)))
            self._data = numpy.array(data)
            if self._filter_data:
                self.compute_intensity_threshold(percentage / 2, start=start + (percentage * 100))

        return self._data

182
183
    @data.setter
    def data(self, data):
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
184
185
186
        """
        Sets data and reshapes the data in case reshaping has  been done before.
        """
187
        self._data = data
188
189
        if self._reshaped_data is not None:
            self.reshape_data()
190

191
    @property
192
    def hi_data(self):
193
        """
194
        :returns: The high intensity data if filter flag is activated, else data.
195
        """
196
197
198
199
        return self.data[self.threshold] if self._filter_data else self.data

    @hi_data.setter
    def hi_data(self, data):
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
200
201
202
203
204
        """
        Sets high intensity data, if filter data flag is active, else sets data.

        :param array_like data: data to set.
        """
205
206
207
208
        if self._filter_data:
            self.data[self.threshold] = data
        else:
            self.data = data
209
210
211

    @property
    def li_data(self):
212
213
214
        """
        :returns: The low intensity data. The filter flag has to be activated.
        """
215
216
217
218
        if self._filter_data:
            return self.data[~self.threshold]
        else:
            return None
219
220
221

    @li_data.setter
    def li_data(self, li_data):
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
222
223
224
        """
        Sets low intensity data. Expects filter data flag o be active.
        """
225
226
        assert self._filter_data, "Empty frames only exist when the flag filter_data \
                                  is activated"
227
228
        self.data[~self.threshold] = li_data

229
230
231
232
233
234
235
236
237
238
    def reshape_data(self):
        """
        Function that reshapes the data to fit the dimensions.
        """
        if self.__dims.ndim > 1:
            try:
                shape = list(self.__dims.shape)
                shape.append(self.data.shape[-2])
                shape.append(self.data.shape[-1])
                self._reshaped_data = self.data.reshape(shape)
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
239
            except Exception:
240
241
                raise ValueError("Failed to reshape data into dimensions {} \
                                  Try using another tolerance value.".format(' '.join(self.__dims.get_names())))
242
        else:
243
            raise ValueError("Not enough dimensions where found")
244
245
246
247
248
249

    @property
    def reshaped_data(self):
        return self._reshaped_data

    def set_reshaped_data(self, data, axis, index):
250
251
252
253
        """
        Function to substitue data in a certain dimension.

        :param ndarray data: data to be inserted.
254
255
        :param int axis: axis of the data.
        :param int index: index of the data.
256
        """
257
        self.data = self.data.astype(numpy.float32, copy=False)
258
259
        if self._filter_data:
            threshold = numpy.take(self.threshold.view().reshape(self.__dims.shape), index, axis=axis)
260
            numpy.swapaxes(self._reshaped_data, 0, axis)[index, :][threshold] = data
261
        else:
262
            numpy.swapaxes(self._reshaped_data, 0, axis)[index, :] = data
263

264
    def get_reshaped_data(self, axis, index):
265
266
267
268
        """
        Returns reshaped data.
        If axis is given returns data at a certain index along the axis.
        If not returns all the reshaped data.
269
270
271

        :param int axis: axis of the data.
        :param int index: index of the data.
272
        """
273
        if self._filter_data:
274
            threshold = numpy.take(self.threshold.reshape(self.__dims.shape), index, axis=axis)
275
276
            return numpy.take(self._reshaped_data, index, axis=axis)[threshold]
        return numpy.take(self._reshaped_data, index, axis=axis)
277

278
279
280
281
282
283
    @property
    def nframes(self):
        if self.data is None:
            return 0
        else:
            return self.data.shape[0]
284

285
286
287
288
    @property
    def dims(self):
        return self.__dims

289
290
    @dims.setter
    def dims(self, _dims):
291
292
        assert isinstance(_dims, AcquisitionDims), "Dimensions dictionary has " \
            "to be of class `AcquisitionDims`"
293
294
        self.__dims = _dims

295
296
297
    def clear_dims(self):
        self.__dims = AcquisitionDims()

298
299
300
301
302
303
304
305
306
    def add_dim(self, axis, dim):
        """
        Adds a dimension to the dimension's dictionary.

        :param int axis: axis of the dimension.
        :param :class:`Dimension` dim: dimension to be added.
        """
        self.__dims.add_dim(axis, dim)

307
308
309
310
311
312
313
314
    def remove_dim(self, axis):
        """
        Removes a dimension from the dimension's dictionary.

        :param int axis: axis of the dimension.
        """
        self.__dims.remove_dim(axis)

315
    def find_dimensions(self, kind, tolerance=1e-9):
316
        """
317
318
319
320
321
        Goes over all the headers from a given kind and finds the dimensions
        that move (have more than one value) along the data.

        Note: Before, only the dimensions that could fit where shown, now it
        shows all the dimensions and let the user choose the valid ones.
322

323
        :param int kind: Type of metadata to find the dimensions.
324
325
        :param float tolerance: Tolerance that will be used to compute the
        unique values.
326
        """
327
        self.__dims.clear()
328
        self._dimensions_values = {}
329

330
        keys = numpy.array(list(self.metadata[0].get_keys(kind)))
331
        values = numpy.array([[data.get_value(kind=kind, name=key)[0] for data
Julia Garriga Ferrer's avatar
Flake    
Julia Garriga Ferrer committed
332
                             in self.metadata] for key in keys])
333
334
335
336
337
338
339
        # Unique values for each key.
        unique_values = [numpy.unique(value, return_counts=True) for value in values]
        dimensions = []
        dataset_size = len(self.metadata)
        # For every key that has more than one different value, creates a new Dimension.
        for i, value in enumerate(unique_values):
            if value[1][0] != dataset_size:
340
                dimension = Dimension(kind, keys[i], tolerance=tolerance)
341
                dimension.setUniqueValues(numpy.unique(value[0]))
342
343
344
345
346
347
348
                # Value that tells when does the change of value occur. It is used to know the order
                # of the reshaping.
                dimension.changing_value = numpy.unique(values[i, :int(dataset_size / value[1][0])],
                                                        return_counts=True)[1][0]
                dimensions.append(dimension)

        for dimension in sorted(dimensions, key=lambda x: x.changing_value):
349
            self.__dims.add_dim(axis=self.__dims.ndim, dim=dimension)
350
351
            _logger.info("Dimension {} of size {} has been added for reshaping"
                         .format(dimension.name, dimension.size))
352

353
    def get_dimensions_values(self):
354
355
356
        """
        Returns all the metadata values of the dimensions.
        The values are assumed to be numbers.
357

358
359
        :returns: array_like
        """
360
        if not self._dimensions_values:
361
            data = self.metadata
362
363
364
365
366
367
            for dimension in self.__dims:
                values = numpy.empty((len(data)))
                for row, metadata_frame in enumerate(data):
                    values[row] = (metadata_frame.get_value(kind=dimension[1].kind,
                                   name=dimension[1].name)[0])
                self._dimensions_values[dimension[1].name] = values
368
        return self._dimensions_values
369

370
371
372
373
374
    def __deepcopy__(self, memo):
        """
        Create copy of the dataset. The data numpy array is also copied using
        deep copy. The rest of the attributes are the same.
        """
375
        dataset = type(self)(self.raw_filename, self.dark_filename, self.filenames,
376
                             filter_data=self._filter_data, load_data=False)
377
        dataset.data_urls = self.data_urls
378
        dataset.metadata = self.metadata
379
        dataset.data = copy.deepcopy(self.data, memo)
380
381
        if self._filter_data:
            dataset.threshold = copy.deepcopy(self.threshold, memo)
382
        dataset.dark_frames = copy.deepcopy(self.dark_frames, memo)
383
        dataset.dims = copy.deepcopy(self.__dims, memo)
384
385
        if self._reshaped_data is not None:
            dataset.reshape_data()
386
        return dataset
387

Julia Garriga Ferrer's avatar
[flake]    
Julia Garriga Ferrer committed
388

389
390
391
392
393
394
395
396
397
398
399
class AcquisitionDims(object):
    """
    Define the view of the data which has to be made
    """
    def __init__(self):
        self.__dims = {}

    def add_dim(self, axis, dim):
        assert isinstance(dim, Dimension)
        self.__dims[axis] = dim

400
401
402
403
    def remove_dim(self, axis):
        if axis in self.__dims:
            del self.__dims[axis]

404
405
    def clear(self):
        self.__dims = {}
406

407
408
    @property
    def ndim(self):
409
        return len(self.__dims)
410
411

    def get(self, axis):
412
        """
413
        Get Dimension at certain axis.
414

415
416
        :param int axis: axis of the dimension.
        :return: the requested dimension if exists.
417
418
419
420
421
422
423
        """
        assert type(axis) is int
        if axis in self.__dims:
            return self.__dims[axis]
        else:
            return None

424
    def get_names(self):
425
426
427
428
429
        """
        Get list with all the names of the dimensions.

        :return: array_like of strings
        """
430
431
432
433
434
435
        dims = []
        for dim in self.__dims.values():
            dims += [dim.name]

        return dims

436
437
438
439
440
441
442
443
444
445
446
447
448
449
    @property
    def shape(self):
        """
        :return: shape of the currently defined dims
        """
        shape = []
        for iDim in range(self.ndim):
            if iDim not in self.__dims:
                shape.append(1)
            else:
                shape.append(self.__dims[iDim].size or -1)
        return tuple(shape)

    def set_size(self, axis, size):
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
450
451
452
453
454
455
        """
        Recreated new dimension with new size and same name and kind.

        :param int axis: axis of the dimension
        :param int size: new size for the dimension
        """
456
        if axis not in self.__dims:
457
            _logger.error('axis %s is not defined yet, cannot define a size '
458
459
460
461
462
463
464
465
466
467
468
469
                          'for it' % axis)
        else:
            self.__dims[axis] = Dimension(name=self.__dims[axis].name,
                                          kind=self.__dims[axis].kind,
                                          size=size)

    def __iter__(self):
        for iAxis, dim in self.__dims.items():
            yield (iAxis, dim)


class Dimension(object):
470
    def __init__(self, kind, name, size=None, tolerance=1e-09):
471
472
473
        """
        Define a dimension used during the dataset

474
        :param int or str kind: metadata typr in fabioh5 mapping
475
476
477
        :param str name: name of the dimension (should fit the fabioh5 mapping
                         for now)
        :param int or None size: length of the dimension.
478
479
480
        """
        if type(kind) is str:
            assert kind in _METADATA_TYPES
481
            self.__kind = _METADATA_TYPES[kind]
482
        else:
483
484
            self.__kind = kind
        self.__name = name
485
        self._size = size
486
        self._tolerance = tolerance
487
        self.__unique_values = []
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
488
        self.changing_value = 0
489
        """Ordered values through the dimension"""
490
491
492

    @property
    def kind(self):
493
494
495
496
        return self.__kind

    def _setKind(self, kind):
        self.__kind = kind
497
498
499

    @property
    def name(self):
500
501
502
503
504
        return self.__name

    def _setName(self, name):
        self.__name = name

505
506
507
508
    @property
    def size(self):
        return self._size

509
510
    def _setSize(self, size):
        self._size = size
511

512
513
514
515
516
517
518
519
    @property
    def tolerance(self):
        return self._tolerance

    def _setTolerance(self, tolerance):
        assert isinstance(tolerance, float), "Tolerance has to be float number"
        self._tolerance = tolerance

520
521
    @property
    def unique_values(self):
522
523
        return self.__unique_values

524
    def _find_unique_values(self, values):
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
525
526
527
528
529
530
        """
        Function that compares the values passed as parameter and returns only the unique
        ones given the dimension's tolerance.

        :param array_like values: list of values to compare.
        """
531
532
        unique_values = []
        import math
533
534
        if not numpy.all(numpy.isreal(values)):
            return values
535
536
537
538
539
540
541
542
543
544
545
546
547
        for val in values:
            if not unique_values:
                unique_values.append(val)
            else:
                unique = True
                for unique_value in unique_values:
                    if math.isclose(unique_value, val, rel_tol=self.tolerance):
                        unique = False
                        break
                if unique:
                    unique_values.append(val)
        return unique_values

548
    def setUniqueValues(self, values):
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
549
550
551
552
553
554
        """
        Sets the unique values of the dimension. If the size of the dimension is fixed,
        it automatically sets the first size values, else it finds the unique values.

        :param array_like values: list of values.
        """
555
556
557
        if self.size:
            self.__unique_values = values[:self.size]
        else:
558
            self.__unique_values = self._find_unique_values(values)
559
        self._setSize(len(self.__unique_values))
560
561
562
563
564

    def __str__(self):
        return " ".join((str(self.kind), str(self.name), 'size:', str(self.size)))

    def to_dict(self):
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
565
        """Translate the current Dimension to a dictionary"""
566
567
568
        return {
            'name': self.name,
            'kind': self.kind,
569
570
            'size': self.size,
            'tolerance': self.tolerance
571
572
573
574
575
576
577
        }

    @staticmethod
    def from_dict(_dict):
        """

        :param dict _dict: dict defining the dimension. Should contains the
578
579
580
                           following keys: name, kind, size.
                           Unique values are not stored into it because it
                           depends on the metadata and should be obtained from a
581
                           fit / set_dims
582
583
        :return: Dimension corresponding to the dict given
        :rtype: :class:`Dimension`
584
585
586
        """
        assert type(_dict) is dict
        missing_keys = []
587
        for _key in ('name', 'kind', 'size', 'tolerance'):
588
589
590
591
592
593
594
595
            if _key not in _dict:
                missing_keys.append(missing_keys)
        if len(missing_keys) > 0:
            raise ValueError('There is some missing keys (%s), unable to create'
                             'a valid Dim')
        else:
            return Dimension(name=_dict['name'],
                             kind=_dict['kind'],
596
597
                             size=_dict['size'],
                             tolerance=_dict['tolerance'])