dataset.py 75.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# coding: utf-8
# /*##########################################################################
#
# Copyright (c) 2016-2017 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# ###########################################################################*/


__authors__ = ["J. Garriga"]
__license__ = "MIT"
29
__date__ = "09/07/2021"
30

31
import copy
32
import glob
33
import logging
34
35
import os
import threading
36
37
38
import multiprocessing
from functools import partial
from multiprocessing import Pool
39
40
41
42
from enum import IntEnum

import h5py
import numpy
43

44
import fabio
45
from silx.io import utils
46
from silx.io import fabioh5
47
48
from silx.io.url import DataUrl

49
from darfix.core.dimension import AcquisitionDims, Dimension, POSITIONER_METADATA
50
from darfix.core.imageOperations import img2img_mean, chunk_image
51
52
from darfix.core.imageOperations import background_subtraction, background_subtraction_2D
from darfix.core.imageOperations import hot_pixel_removal_3D, hot_pixel_removal_2D
53
from darfix.core.imageOperations import threshold_removal
54
from darfix.core.imageOperations import Method
55
from darfix.core.imageRegistration import shift_detection, apply_shift
56
from darfix.core.mapping import fit_data, compute_moments, compute_rsm, compute_magnification
57
from darfix.core.roi import apply_2D_ROI, apply_3D_ROI
58
from darfix.core.utils import wrapTo2pi
59
from darfix.decomposition.ipca import IPCA
60
from darfix.decomposition.nmf import NMF
61
from darfix.decomposition.nica import NICA
62
from darfix.io import utils as io_utils
63

64
65
_logger = logging.getLogger(__file__)

Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
66

67
68
class Operation(IntEnum):
    """
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
69
    Flags for different operations in Dataset
70
71
72
73
    """
    PARTITION = 0
    BS = 1
    HP = 2
74
    THRESHOLD = 3
75
76
77
78
    SHIFT = 4
    ROI = 5
    MOMENTS = 6
    FIT = 7
79

80

81
class Dataset():
82
83
84
85
    """Class to define a dataset from a series of data files.

    :param _dir: Global directory to use and save all the data in the different
        operations.
86
    :type _dir: str
87
88
    :param data: If not None, sets the Data array with the data of the dataset
        to use, default None
89
    :type data: :class:`Data`, optional
90
91
    :param raw_folder: Path to the folder that contains the data, defaults to
        None
92
    :type raw_folder: Union[None,str], optional
93
    :param filenames: Ordered list of filenames, defaults to None.
94
    :type filenames: Union[Generator,Iterator,List], optional
95
    :param treated: Name of the folder with the treated data, defaults 'treated_data'
96
    :type treated: str, optional
97
98
    :param in_memory: If True, data is loaded into memory, else, data is read in
        chunks depending on the algorithm to apply, defaults to False.
99
    :type in_memory: bool, optional
100
    """
101

102
    def __init__(self, _dir, data=None, first_filename=None, filenames=None,
103
                 dims=None, transformation=None, in_memory=True, copy_files=False):
104

105
        self._data = None
106
        self._frames_intensity = []
107
        self._lock = threading.Lock()
108
        self.running_data = None
109
        self.moments_dims = {}
110
        self.operations_state = numpy.zeros(len(Operation))
111
        self._dir = _dir
112
        self._transformation = transformation
113
114
115
116
117
118
119
        if copy_files:
            self._dir += "/treated"
            if not os.path.isdir(self._dir):
                try:
                    os.mkdir(self._dir)
                except PermissionError:
                    raise PermissionError("Add a directory in the Treated Data tab with WRITE permission")
120
121
122
123
124
125

        if dims is None:
            self.__dims = AcquisitionDims()
        else:
            assert isinstance(dims, AcquisitionDims), "Attribute dims has to be of class AcquisitionDims"
            self.__dims = dims
126
        # Keys: dimensions names, values: dimensions values
127
        self._dimensions_values = {}
128
        self._in_memory = in_memory
129

130
131
132
        if data is not None:
            self._data = data
        else:
133
            if filenames is None and first_filename is None:
134
                filenames = sorted([x for x in glob.glob(_dir + "/*") if os.path.isfile(x)])
135
            self.filenames = filenames
136
            self.first_filename = first_filename
137
138

            metadata = []
139
            data_urls = []
140

141
142
143
144
145
146
147
148
            with fabio.open_series(filenames=filenames, first_filename=first_filename) as series:
                for frame in series.frames():
                    filename = frame.file_container.filename
                    data_urls.append(DataUrl(file_path=filename,
                                             scheme='fabio'))
                    fabio_reader = fabioh5.EdfFabioReader(file_name=filename)
                    metadata.append(fabio_reader)
                    fabio_reader.close()
149

150
            self._data = Data(numpy.array(data_urls), metadata=metadata, in_memory=self._in_memory)
151

Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
152
    def stop_operation(self, operation):
153
        """
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
154
        Method used for cases where threads are created to apply functions to the dataset.
155
        If method is called, the flag concerning the stop is set to 0 so that if the concerned
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
156
157
158
159
        operation is running in another thread it knows to stop.

        :param int operation: operation to stop
        :type int: Union[int, `Operation`]
160
        """
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
161
        if self.operations_state[operation]:
162
            self._lock.acquire()
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
163
            self.operations_state[operation] = 0
164
            self._lock.release()
165
166
        if self.running_data is not None:
            self.running_data.stop_operation(operation)
167

168
    @property
169
170
    def transformation(self):
        return self._transformation
171

172
173
174
    @transformation.setter
    def transformation(self, value):
        self._transformation = value
175

176
177
178
179
180
181
182
183
    @property
    def dir(self):
        return self._dir

    @property
    def treated(self):
        return self._treated

184
    def compute_frames_intensity(self, kernel=(3, 3), sigma=20):
185
186
187
188
        """
        Returns for every image a number representing its intensity. This number
        is obtained by first blurring the image and then computing its variance.
        """
189
        _logger.info("Computing intensity per frame")
190
        io_utils.advancement_display(0, self.nframes, "Computing intensity")
191
        frames_intensity = []
192
193
194
195
        self._lock.acquire()
        self.operations_state[Operation.PARTITION] = 1
        self._lock.release()

196
        for i in range(self.nframes):
197
            import cv2
198
199
            if not self.operations_state[Operation.PARTITION]:
                return
200
            frames_intensity += \
201
                [cv2.GaussianBlur(self.get_data(i), kernel, sigma).var()]
202
            io_utils.advancement_display(i + 1, self.nframes, "Computing intensity")
203
        self._frames_intensity = frames_intensity
204
205
206
        self._lock.acquire()
        self.operations_state[Operation.PARTITION] = 0
        self._lock.release()
207
208
        return self._frames_intensity

209
    def partition_by_intensity(self, bins=None, num_bins=1):
210
        """
211
212
213
214
215
216
217
        Function that computes the data from the set of urls.
        If the filter_data flag is activated it filters the data following the next:
        -- First, it computes the intensity for each frame, by calculating the variance after
        passing a gaussian filter.
        -- Second, computes the histogram of the intensity.
        -- Finally, saves the data of the frames with an intensity bigger than a threshold.
        The threshold is set to be the second bin of the histogram.
218

219
        :param int num_bins: Number of bins to use as threshold.
220
        """
221
        frames_intensity = self._frames_intensity if self._frames_intensity else self.compute_frames_intensity()
222
223
        if frames_intensity is None:
            return
224
        values, bins = numpy.histogram(frames_intensity, self.nframes if bins is None else bins)
225
        threshold = numpy.array(frames_intensity) >= bins[num_bins]
226
        return numpy.flatnonzero(threshold), numpy.flatnonzero(~threshold)
227

228
229
230
231
232
    @property
    def in_memory(self):
        return self._in_memory

    @in_memory.setter
233
    def in_memory(self, in_memory):
234
235
236
        """
        Removes data from memory and sets that from now on data will be read from disk.
        """
237
238
        if self._in_memory is not in_memory:
            self._in_memory = in_memory
239
240
            self._data = Data(self.data.urls, self.data.metadata, self._in_memory)

241
242
243
    @property
    def data(self):
        return self._data
244

245
    def get_data(self, indices=None, dimension=None, return_indices=False):
246
        """
247
        Returns the data corresponding to certains indices and given some dimensions values.
248
249
250
251
252
        The data is always flattened to be a stack of images.

        :param array_like indices: If not None, data is filtered using this array.
        :param array_like dimension: If not None, return only the data corresponding to
            the given dimension. Dimension is a 2d vector, where the first component is
253
254
255
256
257
            a list of the axis and the second is a list of the indices of the values to extract.
            The dimension and value list length can be up to the number of dimensions - 1.
            The call get_data(dimension=[[1, 2], [2, 3]]) is equivalent to data[:, 2, 3] when data
            is in memory.
            The axis of the dimension is so that lower the axis, fastest the dimension (higher changing
258
            value).
259
260
261

        :return: Array with the new data.
        """
262
        if dimension is not None and len(self._data.shape) > 3:
263
264
265
266
267
            # Make sure dimension and value are lists
            if type(dimension[0]) is int:
                dimension[0] = [dimension[0]]
                dimension[1] = [dimension[1]]
            data = self.data
268
269
270
271
272
            indices = numpy.arange(self.nframes) if indices is None else indices
            # Init list of bool indices
            bool_indices = numpy.zeros(self.data.flatten().shape[:-2], dtype=bool)
            bool_indices[indices] = True
            bool_indices = bool_indices.reshape(self.data.shape[:-2])
273
            indx = numpy.arange(self.nframes).reshape(self.data.shape[:-2])
274
275
276
277
278
            # For every axis, get corresponding elements
            for i, dim in enumerate(sorted(dimension[0])):
                # Flip axis to be consistent with the data shape
                axis = self.dims.ndim - dim - 1
                data = data.take(indices=dimension[1][i], axis=axis)
279
280
                bool_indices = bool_indices.take(indices=dimension[1][i], axis=axis)
                indx = indx.take(indices=dimension[1][i], axis=axis)
281
            data = data[bool_indices]
282
            indx = indx[bool_indices]
283
            if return_indices:
284
                return data.flatten(), indx.flatten()
285
            return data.flatten()
286
        else:
287
            data = self.data.flatten()
288
289
            if return_indices:
                return (data, numpy.arange(len(data))) if indices is None else (data[indices], indices)
290
            return data if indices is None else data[indices]
291

292
293
    @property
    def nframes(self):
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
294
295
296
        """
        Return number of frames
        """
297
        if self.data is None:
298
299
            return 0
        else:
300
            return len(self.data.flatten())
301

302
    def to_memory(self, indices):
303
304
305
306
307
308
309
310
311
        """
        Method to load only part of the data into memory.
        Returns a new dataset with the data corresponding to given indices into memory.
        The new indices array has to be given, if all the data has to be set into
        memory please set `in_memory` to True instead, this way no new dataset will be
        created.

        :param array_like indices: Indices of the new dataset.
        """
312
313
314
315
316
317
318
        if not self._in_memory:
            data = self.get_data(indices)
            new_data = Data(data.urls, data.metadata, True)
        else:
            new_data = self.get_data(indices)
        return Dataset(_dir=self._dir, data=new_data, dims=self.__dims, in_memory=True)

319
320
321
322
    @property
    def dims(self):
        return self.__dims

323
324
    @dims.setter
    def dims(self, _dims):
325
326
        assert isinstance(_dims, AcquisitionDims), "Dimensions dictionary has " \
            "to be of class `AcquisitionDims`"
327
328
        self.__dims = _dims

329
330
331
    def clear_dims(self):
        self.__dims = AcquisitionDims()

332
333
334
335
336
    def add_dim(self, axis, dim):
        """
        Adds a dimension to the dimension's dictionary.

        :param int axis: axis of the dimension.
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
337
        :param `Dimension` dim: dimension to be added.
338
339
340
        """
        self.__dims.add_dim(axis, dim)

341
342
343
344
345
346
347
348
    def remove_dim(self, axis):
        """
        Removes a dimension from the dimension's dictionary.

        :param int axis: axis of the dimension.
        """
        self.__dims.remove_dim(axis)

349
    def zsum(self, indices=None, dimension=None):
350
351
        data = self.get_data(indices, dimension)
        return data.sum(axis=0)
352

353
354
355
356
    def reshape_data(self):
        """
        Function that reshapes the data to fit the dimensions.
        """
357
        if self.__dims.ndim == 1 and self.__dims.get(0).size == self.nframes:
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
358
            return self
359
        elif self.__dims.ndim > 1:
360
361
            try:
                shape = list(self.__dims.shape)
362
363
364
                shape.append(self.get_data().shape[-2])
                shape.append(self.get_data().shape[-1])
                data = self.get_data().reshape(shape)
365
                return Dataset(_dir=self.dir, data=data, dims=self.__dims, in_memory=self._in_memory)
366
367
368
369
370
371
            except Exception:
                raise ValueError("Failed to reshape data into dimensions {} \
                                  Try using another tolerance value.".format(' '.join(self.__dims.get_names())))
        else:
            raise ValueError("Not enough dimensions where found")

372
    def find_dimensions(self, kind, tolerance=1e-9):
373
        """
374
375
376
377
378
        Goes over all the headers from a given kind and finds the dimensions
        that move (have more than one value) along the data.

        Note: Before, only the dimensions that could fit where shown, now it
        shows all the dimensions and let the user choose the valid ones.
379

380
        :param int kind: Type of metadata to find the dimensions.
381
        :param float tolerance: Tolerance that will be used to compute the
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
382
            unique values.
383
        """
384
        self.__dims.clear()
385
        self._dimensions_values = {}
386

387
        keys = numpy.array(list(self.data.metadata[0].get_keys(kind)))
388
        values = numpy.array([[data.get_value(kind=kind, name=key)[0] for data
389
                             in self.data.metadata] for key in keys])
390
391
392
        # Unique values for each key.
        unique_values = [numpy.unique(value, return_counts=True) for value in values]
        dimensions = []
393
        dataset_size = len(self.data.metadata)
394
395
396
        # For every key that has more than one different value, creates a new Dimension.
        for i, value in enumerate(unique_values):
            if value[1][0] != dataset_size:
397
                dimension = Dimension(kind, keys[i], tolerance=tolerance)
398
                dimension.set_unique_values(numpy.unique(value[0]))
399
400
                # Value that tells when does the change of value occur. It is used to know the order
                # of the reshaping.
401
                dimension.changing_value = self.__compute_changing_value(values[i])
402
403
                dimensions.append(dimension)

404
        for dimension in sorted(dimensions, key=lambda x: x.changing_value, reverse=True):
405
            self.__dims.add_dim(axis=self.__dims.ndim, dim=dimension)
406
407
            _logger.info("Dimension {} of size {} has been added for reshaping"
                         .format(dimension.name, dimension.size))
408

409
410
    def get_metadata_values(self, kind, key):
        return numpy.unique([data.get_value(kind=kind, name=key)[0] for data
411
                             in self.get_data().metadata])
412

413
    def __compute_changing_value(self, values, changing_value=1):
414
415
416
417
        """
        Recursive method used to calculate how fast is a dimension. The speed of a dimension is the number of
        times the dimension changes of value while moving through the dataset.
        """
418
419
420
421
422
        if len(numpy.unique(values)) > 1:
            return self.__compute_changing_value(values[:int(len(values) / 2)], changing_value + 1)
        else:
            return changing_value

423
    def get_dimensions_values(self, indices=None):
424
425
426
        """
        Returns all the metadata values of the dimensions.
        The values are assumed to be numbers.
427

428
429
        :returns: array_like
        """
430
        if not self._dimensions_values:
431
            data = self.get_data(indices).metadata
432
433
434
435
436
437
            for dimension in self.__dims:
                values = numpy.empty((len(data)))
                for row, metadata_frame in enumerate(data):
                    values[row] = (metadata_frame.get_value(kind=dimension[1].kind,
                                   name=dimension[1].name)[0])
                self._dimensions_values[dimension[1].name] = values
438
        return self._dimensions_values
439

440
    def compute_mosaicity_colorkey(self, scale=100):
441
442
443
444
445
446
447
448
449
450
        """
        Computes a mosaicity colorkey from the dimensions, and returns also the
        orientation distribution image.
        """
        ori_dist = self.get_data().sum(axis=1)

        if self.__dims:
            new_shape = []
            dims_list = []
            for axis, dim in self.__dims:
451
                dims_list += [numpy.linspace(-1, 1, (dim.size - 1) * scale)]
452
                new_shape += [dim.size]
453

454
455
456
457
458
459
460
461
            dims_list.reverse()
            mesh = numpy.meshgrid(*dims_list)
            if len(mesh) == 2:
                data = numpy.arctan2(mesh[0], mesh[1])
                normalized_data = wrapTo2pi(data) / numpy.pi / 2
                sqr = numpy.sqrt(numpy.power(
                    mesh[0], 2) + numpy.power(mesh[1], 2)) / numpy.sqrt(2)
                hsv_key = numpy.stack((normalized_data, sqr, numpy.ones(
462
                    (len(dims_list[1]), len(dims_list[0])))), axis=2)
463
                return ori_dist.reshape(numpy.flip(new_shape)).T, hsv_key
464
        return None, None
465

466
    def apply_background_subtraction(self, background=None, method="median", indices=None,
467
                                     step=None, chunk_shape=[100, 100], _dir=None):
julia's avatar
julia committed
468
469
470
471
        """
        Applies background subtraction to the data and saves the new data
        into disk.

472
        :param background: Data to be used as background. If None, data with indices `indices` is used.
julia's avatar
julia committed
473
            If Dataset, data of the dataset is used. If array, use data with indices in the array.
474
        :type background: Union[None, array_like, Dataset]
julia's avatar
julia committed
475
476
        :param method: Method to use to compute the background.
        :type method: Method
477
478
479
        :param indices: Indices of the images to apply background subtraction.
            If None, the background subtraction is applied to all the data.
        :type indices: Union[None, array_like]
480
481
482
483
484
485
486
        :param int step: Distance between images to be used when computing the median.
            Parameter used only when flag in_memory is False and method is `Method.median`.
            If `step` is not None, all images with distance of `step`, starting at 0,
            will be loaded into memory for computing the median, if the data loading throws
            a MemoryError, the median computation is tried with `step += 1`.
        :param chunk_shape: Shape of the chunk image to use per iteration.
            Parameter used only when flag in_memory is False and method is `Method.median`.
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
487
488
489
490
        :type chunk_shape: array_like
        :returns: dataset with data of same size as `self.data` but with the
            modified images. The urls of the modified images are replaced with
            the new urls.
julia's avatar
julia committed
491
492
        :rtype: Dataset
        """
493
494
495
496
497
498

        _dir = self.dir if _dir is None else _dir

        if not os.path.isdir(_dir):
            os.mkdir(_dir)

499
        method = Method.from_value(method)
500

501
        self.running_data = self.get_data(indices)
502

503
504
505
506
        self._lock.acquire()
        self.operations_state[Operation.BS] = 1
        self._lock.release()

507
        if background is None:
508
            bg_data = self.running_data
509
            if indices is None:
510
                _logger.info("Computing background from " + method.name + " of raw data")
511
            else:
512
                _logger.info("Computing background from " + method.name + " of high intensity data")
513
514
        elif isinstance(background, Dataset):
            bg_data = background.data
515
            _logger.info("Computing background from " + method.name + " of `background` set")
516
        else:
517
            bg_data = self.get_data(background)
518
            _logger.info("Computing background from " + method.name + " of low intensity data")
519

520
        if self._in_memory:
521
            new_data = background_subtraction(self.running_data, bg_data, method).view(Data)
522
            new_data.save(_dir + "/data.hdf5")
523
            urls = new_data.urls
524
        else:
525
            bg = numpy.zeros(self.running_data[0].shape, self.running_data.dtype)
526
527
528
529
            if method == Method.mean:
                if bg_data.in_memory:
                    numpy.mean(bg_data, out=bg, axis=0)
                else:
530
                    io_utils.advancement_display(0, len(bg_data), "Computing mean image")
531
                    for i in range(len(bg_data)):
532
533
                        if not self.operations_state[Operation.BS]:
                            return
534
                        bg = img2img_mean(bg_data[i], bg, i)
535
                        io_utils.advancement_display(i + 1, len(bg_data), "Computing mean image")
536
537
538
539
            elif method == Method.median:
                if bg_data.in_memory:
                    numpy.median(bg_data, out=bg, axis=0)
                else:
540
                    if step is not None:
541
                        bg_indices = numpy.arange(0, len(bg_data), step)
542
543
                        try:
                            numpy.median(
544
                                Data(bg_data.urls[bg_indices], bg_data.metadata[bg_indices]),
545
546
547
548
                                out=bg, axis=0)
                        except MemoryError:
                            if not self.operations_state[Operation.BS]:
                                return
549
550
                            print("MemoryError, trying with step {}".format(step + 1))
                            return self.apply_background_subtraction(background, method, indices, step + 1)
551
552
                    else:
                        start = [0, 0]
553
                        img = self.running_data[0]
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
                        bg = numpy.empty(img.shape, img.dtype)
                        chunks_1 = int(numpy.ceil(img.shape[0] / chunk_shape[0]))
                        chunks_2 = int(numpy.ceil(img.shape[1] / chunk_shape[1]))
                        io_utils.advancement_display(0, chunks_1 * chunks_2 * len(bg_data), "Computing median image")
                        for i in range(chunks_1):
                            for j in range(chunks_2):
                                if not self.operations_state[Operation.BS]:
                                    return
                                c_images = []
                                cpus = multiprocessing.cpu_count()
                                with Pool(cpus - 1) as p:
                                    c_images = p.map(partial(chunk_image, start, chunk_shape), bg_data)
                                io_utils.advancement_display(
                                    i * chunks_2 + j + 1,
                                    chunks_1 * chunks_2,
                                    "Computing median image")
                                numpy.median(c_images, out=bg[start[0]: start[0] + chunk_shape[0],
                                             start[1]:start[1] + chunk_shape[1]], axis=0)
                                start[1] = chunk_shape[1] * (j + 1)
                            start[0] = chunk_shape[0] * (i + 1)
                            start[1] = 0
575
576
577
578

            if not self.operations_state[Operation.BS]:
                return

579
            urls = self.running_data.apply_funcs([(background_subtraction_2D, [bg])],
580
                                                 save=_dir + "/data.hdf5", text="Applying background subtraction",
581
                                                 operation=Operation.BS)
582
583
            if urls is None:
                return
584

585
586
587
588
        self._lock.acquire()
        self.operations_state[Operation.BS] = 0
        self._lock.release()

Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
589
        # Set urls as shape and dimension of original urls.
590
        if indices is not None:
591
            new_urls = numpy.array(self.get_data().urls, dtype=object)
592
            new_urls[indices] = urls
593
594
            new_data = Data(new_urls.reshape(self.data.urls.shape), self.data.metadata,
                            self._in_memory)
595
        else:
596
597
            new_data = Data(urls.reshape(self.data.urls.shape), self.data.metadata,
                            self._in_memory)
598

599
        return Dataset(_dir=_dir, data=new_data, dims=self.__dims, transformation=self.transformation,
600
                       in_memory=self._in_memory)
601

602
    def apply_hot_pixel_removal(self, kernel=3, indices=None, _dir=None):
julia's avatar
julia committed
603
604
605
        """
        Applies hot pixel removal to Data, and saves the new data
        into disk.
606

julia's avatar
julia committed
607
608
        :param int kernel: size of the kernel used to find the hot
            pixels.
609
        :param indices: Indices of the images to apply background subtraction.
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
610
            If None, the hot pixel removal is applied to all the data.
611
        :type indices: Union[None, array_like]
612
        :return: dataset with data of same size as `self.data` but with the
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
613
614
            modified images. The urls of the modified images are replaced with
            the new urls.
julia's avatar
julia committed
615
616
        :rtype: Dataset
        """
617

618
619
        self.running_data = self.get_data(indices)

620
621
622
623
        _dir = self.dir if _dir is None else _dir

        if not os.path.isdir(_dir):
            os.mkdir(_dir)
624

625
        if self._in_memory:
626
            new_data = hot_pixel_removal_3D(self.running_data, kernel).view(Data)
627
            new_data.save(_dir + "/data.hdf5")
628
            urls = new_data.urls
629
        else:
630
            urls = self.running_data.apply_funcs([(hot_pixel_removal_2D, [kernel])],
631
                                                 save=_dir + "/data.hdf5", text="Applying hot pixel removal",
632
                                                 operation=Operation.HP)
633
634
            if urls is None:
                return
635
636

        if indices is not None:
637
            new_urls = numpy.array(self.get_data().urls, dtype=object)
638
            new_urls[indices] = urls
639
640
            new_data = Data(new_urls.reshape(self.data.urls.shape), self.data.metadata,
                            self._in_memory)
641
        else:
642
643
            new_data = Data(urls.reshape(self.data.urls.shape), self.data.metadata,
                            self._in_memory)
644

645
646
        return Dataset(_dir=_dir, data=new_data, dims=self.__dims, transformation=self.transformation,
                       in_memory=self._in_memory)
647

648
    def apply_threshold_removal(self, bottom=None, top=None, indices=None, _dir=None):
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
        """
        Applies bottom threshold to Data, and saves the new data
        into disk.

        :param int bottom: bottom threshold to apply.
        :param int top: top threshold to apply.
        :param indices: Indices of the images to apply background subtraction.
            If None, the hot pixel removal is applied to all the data.
        :type indices: Union[None, array_like]
        :return: dataset with data of same size as `self.data` but with the
            modified images. The urls of the modified images are replaced with
            the new urls.
        :rtype: Dataset
        """

        self.running_data = self.get_data(indices)

666
667
668
669
        _dir = self.dir if _dir is None else _dir

        if not os.path.isdir(_dir):
            os.mkdir(_dir)
670
671
672

        if self._in_memory:
            new_data = threshold_removal(self.running_data, bottom, top).view(Data)
673
            new_data.save(_dir + "/data.hdf5")
674
675
676
            urls = new_data.urls
        else:
            urls = self.running_data.apply_funcs([(threshold_removal, [bottom, top])],
677
                                                 save=_dir + "/data.hdf5", text="Applying threshold",
678
                                                 operation=Operation.THRESHOLD)
679
680
681
682
683
684
685
686
687
688
689
690
            if urls is None:
                return

        if indices is not None:
            new_urls = numpy.array(self.get_data().urls, dtype=object)
            new_urls[indices] = urls
            new_data = Data(new_urls.reshape(self.data.urls.shape), self.data.metadata,
                            self._in_memory)
        else:
            new_data = Data(urls.reshape(self.data.urls.shape), self.data.metadata,
                            self._in_memory)

691
692
        return Dataset(_dir=_dir, data=new_data, dims=self.__dims, transformation=self.transformation,
                       in_memory=self._in_memory)
693

694
    def apply_roi(self, origin=None, size=None, center=None, indices=None, roi_dir=None):
julia's avatar
julia committed
695
696
697
698
699
700
701
702
703
        """
        Applies a region of interest to the data.

        :param origin: Origin of the roi
        :param size: [Height, Width] of the roi.
        :param center: Center of the roi
        :type origin: Union[2d vector, None]
        :type center: Union[2d vector, None]
        :type size: Union[2d vector, None]
704
        :param indices: Indices of the images to apply background subtraction.
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
705
            If None, the roi is applied to all the data.
706
        :type indices: Union[None, array_like]
707
708
        :param roi_dir: Directory path for the new dataset
        :type roi_dir: str
julia's avatar
julia committed
709
        :returns: dataset with data with roi applied.
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
710
711
            Note: To preserve consistence of shape between images, if `indices`
            is not None, only the data modified is returned.
julia's avatar
julia committed
712
713
        :rtype: Dataset
        """
714

715
        roi_dir = self.dir if roi_dir is None else roi_dir
716
717
        if not os.path.isdir(roi_dir):
            os.mkdir(roi_dir)
718
719

        self.running_data = self.get_data(indices)
720
        if self._in_memory:
721
            new_data = apply_3D_ROI(self.running_data, origin, size, center).view(Data)
722
            new_data.save(roi_dir + "/data.hdf5", new_shape=new_data.shape)
723
        else:
724
            shape = numpy.append([self.running_data.shape[0]], apply_2D_ROI(self.running_data[0], origin, size, center).shape)
725
            urls = self.running_data.apply_funcs([(apply_2D_ROI, [origin, size, center])],
726
727
728
                                                 save=roi_dir + "/data.hdf5", text="Applying roi",
                                                 operation=Operation.ROI,
                                                 new_shape=shape)
729
730
            if urls is None:
                return
731
            new_data = Data(urls, self.running_data.metadata, self._in_memory)
732

733
734
        transformation = ([apply_2D_ROI(axis, origin, size, center) for axis in self.transformation]
                          if self.transformation else None)
735
736
737
738
        if indices is None:
            shape = list(self.data.shape)[:-2]
            shape.append(new_data.shape[-2])
            shape.append(new_data.shape[-1])
739
            new_data = new_data.reshape(shape)
740
741
        return Dataset(_dir=roi_dir, data=new_data, dims=self.__dims, transformation=transformation,
                       in_memory=self._in_memory)
742

743
    def find_shift(self, dimension=None, steps=50, indices=None):
julia's avatar
julia committed
744
745
746
747
748
749
750
751
        """
        Find shift of the data or part of it.

        :param dimension: Parametes with the position of the data in the reshaped
            array.
        :type dimension: Union[None, tuple, array_like]
        :param float h_max: See `core.imageRegistration.shift_detection`
        :param float h_step: See `core.imageRegistration.shift_detection`
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
752
753
754
        :param indices: Boolean index list with True in the images to apply the shift to.
            If None, the hot pixel removal is applied to all the data.
        :type indices: Union[None, array_like]
julia's avatar
julia committed
755
756
        :returns: Array with shift per frame.
        """
757
        return shift_detection(self.get_data(indices, dimension), steps)
758

759
760
761
762
763
764
765
766
    def find_shift_along_dimension(self, dimension, steps=50, indices=None):
        shift = []
        for value in range(self.dims.get(dimension[0]).size):
            s = self.find_shift([dimension[0], value], steps, indices)
            shift.append(s)

        return numpy.array(shift)

767
768
    def apply_shift_along_dimension(self, shift, dimension, shift_approach="fft", indices=None,
                                    callback=None, _dir=None):
769
770
771

        dataset = self
        for value in range(self.dims.get(dimension[0]).size):
772
773
            data, rindices = self.get_data(indices=indices, dimension=[dimension[0], value],
                                           return_indices=True)
774
775
776
777
            frames = numpy.arange(self.get_data(indices=indices,
                                  dimension=[dimension[0], value]).shape[0])
            dataset = dataset.apply_shift(numpy.outer(shift[value], frames), [dimension[0], value],
                                          shift_approach, indices, callback, _dir)
778
779
780

        return dataset

781
782
    def apply_shift(self, shift, dimension=None, shift_approach="fft", indices=None,
                    callback=None, _dir=None):
julia's avatar
julia committed
783
784
785
786
787
788
789
790
        """
        Apply shift of the data or part of it and save new data into disk.

        :param array_like shift: Shift per frame.
        :param dimension: Parametes with the position of the data in the reshaped
            array.
        :type dimension: Union[None, tuple, array_like]
        :param Union['fft', 'linear'] shift_approach: Method to use to apply the shift.
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
791
792
793
        :param indices: Boolean index list with True in the images to apply the shift to.
            If None, the hot pixel removal is applied to all the data.
        :type indices: Union[None, array_like]
julia's avatar
julia committed
794
        :param Union[function, None] callback: Callback
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
795
796
797
        :returns: dataset with data of same size as `self.data` but with the
            modified images. The urls of the modified images are replaced with
            the new urls.
julia's avatar
julia committed
798
        """
799
800
        assert len(shift) > 0, "Shift list can't be empty"

801
802
803
        if not numpy.any(shift):
            return self

804
805
806
807
        _dir = self.dir if _dir is None else _dir

        if not os.path.isdir(_dir):
            os.mkdir(_dir)
808

809
        data, rindices = self.get_data(indices, dimension, return_indices=True)
810
811
812
        self._lock.acquire()
        self.operations_state[Operation.SHIFT] = 1
        self._lock.release()
813
814
815
        _file = h5py.File(_dir + '/data.hdf5', 'a')
        dataset_name = "dataset"
        if "dataset" in _file:
816
            _file.create_dataset("update_dataset", data=_file["dataset"])
817
818
            dataset_name = "update_dataset"
        else:
819
            _file.create_dataset("dataset", self.get_data().shape, dtype=self.data.dtype)
820

821
        io_utils.advancement_display(0, len(data), "Applying shift")
822
        if dimension is not None:
823

824
825
826
827
            # Convert dimension and value into list
            if type(dimension[0]) is int:
                dimension[0] = [dimension[0]]
                dimension[1] = [dimension[1]]
828
            urls = []
829
            for i, idx in enumerate(rindices):
830
                if not self.operations_state[Operation.SHIFT]:
831
                    del _file["update_dataset"]
832
                    return
833
                img = apply_shift(data[i], shift[:, i], shift_approach)
834
                if shift[:, i].all() > 1:
835
                    shift_approach = "linear"
836
837
838
                _file[dataset_name][idx] = img
                urls.append(DataUrl(file_path=_dir + '/data.hdf5', data_path="/dataset", data_slice=idx, scheme='silx'))
                io_utils.advancement_display(i + 1, len(rindices), "Applying shift")
839
840

            # Replace specific urls that correspond to the modified data
841
            new_urls = numpy.array(self.data.urls, dtype=object)
842
            copy_urls = new_urls
843
            if indices is not None:
844
845
846
847
                # Create array of booleans to know which indices we have
                bool_indices = numpy.zeros(self.get_data().shape[:-2], dtype=bool)
                bool_indices[indices] = True
                bool_indices = bool_indices.reshape(self.data.shape[:-2])