dataset.py 71.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# coding: utf-8
# /*##########################################################################
#
# Copyright (c) 2016-2017 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# ###########################################################################*/


__authors__ = ["J. Garriga"]
__license__ = "MIT"
29
__date__ = "05/07/2021"
30

31
import copy
32
import glob
33
import logging
34
35
import os
import threading
36
37
38
import multiprocessing
from functools import partial
from multiprocessing import Pool
39
40
41
42
from enum import IntEnum

import h5py
import numpy
43

44
import fabio
45
from silx.io import utils
46
from silx.io import fabioh5
47
48
from silx.io.url import DataUrl

49
from darfix.core.dimension import AcquisitionDims, Dimension, POSITIONER_METADATA
50
from darfix.core.imageOperations import img2img_mean, chunk_image
51
52
from darfix.core.imageOperations import background_subtraction, background_subtraction_2D
from darfix.core.imageOperations import hot_pixel_removal_3D, hot_pixel_removal_2D
53
from darfix.core.imageOperations import threshold_removal
54
from darfix.core.imageOperations import Method
55
from darfix.core.imageRegistration import shift_detection, apply_shift
56
from darfix.core.mapping import fit_data, compute_moments, compute_rsm, compute_magnification
57
from darfix.core.roi import apply_2D_ROI, apply_3D_ROI
58
from darfix.core.utils import wrapTo2pi
59
from darfix.decomposition.ipca import IPCA
60
from darfix.decomposition.nmf import NMF
61
from darfix.decomposition.nica import NICA
62
from darfix.io import utils as io_utils
63

64
65
_logger = logging.getLogger(__file__)

Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
66

67
68
class Operation(IntEnum):
    """
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
69
    Flags for different operations in Dataset
70
71
72
73
    """
    PARTITION = 0
    BS = 1
    HP = 2
74
    THRESHOLD = 3
75
76
77
78
    SHIFT = 4
    ROI = 5
    MOMENTS = 6
    FIT = 7
79

80

81
class Dataset():
82
83
84
85
    """Class to define a dataset from a series of data files.

    :param _dir: Global directory to use and save all the data in the different
        operations.
86
    :type _dir: str
87
88
    :param data: If not None, sets the Data array with the data of the dataset
        to use, default None
89
    :type data: :class:`Data`, optional
90
91
    :param raw_folder: Path to the folder that contains the data, defaults to
        None
92
    :type raw_folder: Union[None,str], optional
93
    :param filenames: Ordered list of filenames, defaults to None.
94
    :type filenames: Union[Generator,Iterator,List], optional
95
    :param treated: Name of the folder with the treated data, defaults 'treated_data'
96
    :type treated: str, optional
97
98
    :param in_memory: If True, data is loaded into memory, else, data is read in
        chunks depending on the algorithm to apply, defaults to False.
99
    :type in_memory: bool, optional
100
    """
101

102
    def __init__(self, _dir, data=None, first_filename=None, filenames=None,
103
                 dims=None, transformation=None, in_memory=True, copy_files=False):
104

105
        self._data = None
106
        self._frames_intensity = []
107
        self._lock = threading.Lock()
108
        self.running_data = None
109
        self.moments_dims = {}
110
        self.operations_state = numpy.zeros(len(Operation))
111
        self._dir = _dir
112
        self._transformation = transformation
113
114
115
116
117
118
119
        if copy_files:
            self._dir += "/treated"
            if not os.path.isdir(self._dir):
                try:
                    os.mkdir(self._dir)
                except PermissionError:
                    raise PermissionError("Add a directory in the Treated Data tab with WRITE permission")
120
121
122
123
124
125

        if dims is None:
            self.__dims = AcquisitionDims()
        else:
            assert isinstance(dims, AcquisitionDims), "Attribute dims has to be of class AcquisitionDims"
            self.__dims = dims
126
        # Keys: dimensions names, values: dimensions values
127
        self._dimensions_values = {}
128
        self._in_memory = in_memory
129

130
131
132
        if data is not None:
            self._data = data
        else:
133
            if filenames is None and first_filename is None:
134
                filenames = sorted([x for x in glob.glob(_dir + "/*") if os.path.isfile(x)])
135
            self.filenames = filenames
136
            self.first_filename = first_filename
137
138

            metadata = []
139
            data_urls = []
140

141
142
143
144
145
146
147
148
            with fabio.open_series(filenames=filenames, first_filename=first_filename) as series:
                for frame in series.frames():
                    filename = frame.file_container.filename
                    data_urls.append(DataUrl(file_path=filename,
                                             scheme='fabio'))
                    fabio_reader = fabioh5.EdfFabioReader(file_name=filename)
                    metadata.append(fabio_reader)
                    fabio_reader.close()
149

150
            self._data = Data(numpy.array(data_urls), metadata=metadata, in_memory=self._in_memory)
151

Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
152
    def stop_operation(self, operation):
153
        """
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
154
        Method used for cases where threads are created to apply functions to the dataset.
155
        If method is called, the flag concerning the stop is set to 0 so that if the concerned
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
156
157
158
159
        operation is running in another thread it knows to stop.

        :param int operation: operation to stop
        :type int: Union[int, `Operation`]
160
        """
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
161
        if self.operations_state[operation]:
162
            self._lock.acquire()
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
163
            self.operations_state[operation] = 0
164
            self._lock.release()
165
166
        if self.running_data is not None:
            self.running_data.stop_operation(operation)
167

168
    @property
169
170
    def transformation(self):
        return self._transformation
171

172
173
174
    @transformation.setter
    def transformation(self, value):
        self._transformation = value
175

176
177
178
179
180
181
182
183
    @property
    def dir(self):
        return self._dir

    @property
    def treated(self):
        return self._treated

184
    def compute_frames_intensity(self, kernel=(3, 3), sigma=20):
185
186
187
188
        """
        Returns for every image a number representing its intensity. This number
        is obtained by first blurring the image and then computing its variance.
        """
189
        _logger.info("Computing intensity per frame")
190
        io_utils.advancement_display(0, self.nframes, "Computing intensity")
191
        frames_intensity = []
192
193
194
195
        self._lock.acquire()
        self.operations_state[Operation.PARTITION] = 1
        self._lock.release()

196
        for i in range(self.nframes):
197
            import cv2
198
199
            if not self.operations_state[Operation.PARTITION]:
                return
200
            frames_intensity += \
201
                [cv2.GaussianBlur(self.get_data(i), kernel, sigma).var()]
202
            io_utils.advancement_display(i + 1, self.nframes, "Computing intensity")
203
        self._frames_intensity = frames_intensity
204
205
206
        self._lock.acquire()
        self.operations_state[Operation.PARTITION] = 0
        self._lock.release()
207
208
        return self._frames_intensity

209
    def partition_by_intensity(self, bins=None, num_bins=1):
210
        """
211
212
213
214
215
216
217
        Function that computes the data from the set of urls.
        If the filter_data flag is activated it filters the data following the next:
        -- First, it computes the intensity for each frame, by calculating the variance after
        passing a gaussian filter.
        -- Second, computes the histogram of the intensity.
        -- Finally, saves the data of the frames with an intensity bigger than a threshold.
        The threshold is set to be the second bin of the histogram.
218

219
        :param int num_bins: Number of bins to use as threshold.
220
        """
221
        frames_intensity = self._frames_intensity if self._frames_intensity else self.compute_frames_intensity()
222
223
        if frames_intensity is None:
            return
224
        values, bins = numpy.histogram(frames_intensity, self.nframes if bins is None else bins)
225
        threshold = numpy.array(frames_intensity) >= bins[num_bins]
226
        return numpy.flatnonzero(threshold), numpy.flatnonzero(~threshold)
227

228
229
230
231
232
    @property
    def in_memory(self):
        return self._in_memory

    @in_memory.setter
233
    def in_memory(self, in_memory):
234
235
236
        """
        Removes data from memory and sets that from now on data will be read from disk.
        """
237
238
        if self._in_memory is not in_memory:
            self._in_memory = in_memory
239
240
            self._data = Data(self.data.urls, self.data.metadata, self._in_memory)

241
242
243
    @property
    def data(self):
        return self._data
244

245
    def get_data(self, indices=None, dimension=None, return_indices=False):
246
        """
247
        Returns the data corresponding to certains indices and given some dimensions values.
248
249
250
251
252
        The data is always flattened to be a stack of images.

        :param array_like indices: If not None, data is filtered using this array.
        :param array_like dimension: If not None, return only the data corresponding to
            the given dimension. Dimension is a 2d vector, where the first component is
253
254
255
256
257
            a list of the axis and the second is a list of the indices of the values to extract.
            The dimension and value list length can be up to the number of dimensions - 1.
            The call get_data(dimension=[[1, 2], [2, 3]]) is equivalent to data[:, 2, 3] when data
            is in memory.
            The axis of the dimension is so that lower the axis, fastest the dimension (higher changing
258
            value).
259
260
261

        :return: Array with the new data.
        """
262
        if dimension is not None and len(self._data.shape) > 3:
263
264
265
266
267
            # Make sure dimension and value are lists
            if type(dimension[0]) is int:
                dimension[0] = [dimension[0]]
                dimension[1] = [dimension[1]]
            data = self.data
268
269
270
271
272
            indices = numpy.arange(self.nframes) if indices is None else indices
            # Init list of bool indices
            bool_indices = numpy.zeros(self.data.flatten().shape[:-2], dtype=bool)
            bool_indices[indices] = True
            bool_indices = bool_indices.reshape(self.data.shape[:-2])
273
            indx = numpy.arange(self.nframes).reshape(self.data.shape[:-2])
274
275
276
277
278
            # For every axis, get corresponding elements
            for i, dim in enumerate(sorted(dimension[0])):
                # Flip axis to be consistent with the data shape
                axis = self.dims.ndim - dim - 1
                data = data.take(indices=dimension[1][i], axis=axis)
279
280
                bool_indices = bool_indices.take(indices=dimension[1][i], axis=axis)
                indx = indx.take(indices=dimension[1][i], axis=axis)
281
            data = data[bool_indices]
282
            indx = indx[bool_indices]
283
            if return_indices:
284
                return data.flatten(), indx.flatten()
285
            return data.flatten()
286
        else:
287
288
            data = self.data.flatten()
            return data if indices is None else data[indices]
289

290
291
    @property
    def nframes(self):
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
292
293
294
        """
        Return number of frames
        """
295
        if self.data is None:
296
297
            return 0
        else:
298
            return len(self.data.flatten())
299

300
    def to_memory(self, indices):
301
302
303
304
305
306
307
308
309
        """
        Method to load only part of the data into memory.
        Returns a new dataset with the data corresponding to given indices into memory.
        The new indices array has to be given, if all the data has to be set into
        memory please set `in_memory` to True instead, this way no new dataset will be
        created.

        :param array_like indices: Indices of the new dataset.
        """
310
311
312
313
314
315
316
        if not self._in_memory:
            data = self.get_data(indices)
            new_data = Data(data.urls, data.metadata, True)
        else:
            new_data = self.get_data(indices)
        return Dataset(_dir=self._dir, data=new_data, dims=self.__dims, in_memory=True)

317
318
319
320
    @property
    def dims(self):
        return self.__dims

321
322
    @dims.setter
    def dims(self, _dims):
323
324
        assert isinstance(_dims, AcquisitionDims), "Dimensions dictionary has " \
            "to be of class `AcquisitionDims`"
325
326
        self.__dims = _dims

327
328
329
    def clear_dims(self):
        self.__dims = AcquisitionDims()

330
331
332
333
334
    def add_dim(self, axis, dim):
        """
        Adds a dimension to the dimension's dictionary.

        :param int axis: axis of the dimension.
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
335
        :param `Dimension` dim: dimension to be added.
336
337
338
        """
        self.__dims.add_dim(axis, dim)

339
340
341
342
343
344
345
346
    def remove_dim(self, axis):
        """
        Removes a dimension from the dimension's dictionary.

        :param int axis: axis of the dimension.
        """
        self.__dims.remove_dim(axis)

347
    def zsum(self, indices=None, dimension=None):
348

349
350
        data = self.get_data(indices, dimension)
        return data.sum(axis=0)
351

352
353
354
355
    def reshape_data(self):
        """
        Function that reshapes the data to fit the dimensions.
        """
356
        if self.__dims.ndim == 1 and self.__dims.get(0).size == self.nframes:
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
357
            return self
358
        elif self.__dims.ndim > 1:
359
360
            try:
                shape = list(self.__dims.shape)
361
362
363
                shape.append(self.get_data().shape[-2])
                shape.append(self.get_data().shape[-1])
                data = self.get_data().reshape(shape)
364
                return Dataset(_dir=self.dir, data=data, dims=self.__dims, in_memory=self._in_memory)
365
366
367
368
369
370
            except Exception:
                raise ValueError("Failed to reshape data into dimensions {} \
                                  Try using another tolerance value.".format(' '.join(self.__dims.get_names())))
        else:
            raise ValueError("Not enough dimensions where found")

371
    def find_dimensions(self, kind, tolerance=1e-9):
372
        """
373
374
375
376
377
        Goes over all the headers from a given kind and finds the dimensions
        that move (have more than one value) along the data.

        Note: Before, only the dimensions that could fit where shown, now it
        shows all the dimensions and let the user choose the valid ones.
378

379
        :param int kind: Type of metadata to find the dimensions.
380
        :param float tolerance: Tolerance that will be used to compute the
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
381
            unique values.
382
        """
383
        self.__dims.clear()
384
        self._dimensions_values = {}
385

386
        keys = numpy.array(list(self.data.metadata[0].get_keys(kind)))
387
        values = numpy.array([[data.get_value(kind=kind, name=key)[0] for data
388
                             in self.data.metadata] for key in keys])
389
390
391
        # Unique values for each key.
        unique_values = [numpy.unique(value, return_counts=True) for value in values]
        dimensions = []
392
        dataset_size = len(self.data.metadata)
393
394
395
        # For every key that has more than one different value, creates a new Dimension.
        for i, value in enumerate(unique_values):
            if value[1][0] != dataset_size:
396
                dimension = Dimension(kind, keys[i], tolerance=tolerance)
397
                dimension.set_unique_values(numpy.unique(value[0]))
398
399
                # Value that tells when does the change of value occur. It is used to know the order
                # of the reshaping.
400
                dimension.changing_value = self.__compute_changing_value(values[i])
401
402
                dimensions.append(dimension)

403
        for dimension in sorted(dimensions, key=lambda x: x.changing_value, reverse=True):
404
            self.__dims.add_dim(axis=self.__dims.ndim, dim=dimension)
405
406
            _logger.info("Dimension {} of size {} has been added for reshaping"
                         .format(dimension.name, dimension.size))
407

408
409
    def get_metadata_values(self, kind, key):
        return numpy.unique([data.get_value(kind=kind, name=key)[0] for data
410
                             in self.get_data().metadata])
411

412
    def __compute_changing_value(self, values, changing_value=1):
413
414
415
416
        """
        Recursive method used to calculate how fast is a dimension. The speed of a dimension is the number of
        times the dimension changes of value while moving through the dataset.
        """
417
418
419
420
421
        if len(numpy.unique(values)) > 1:
            return self.__compute_changing_value(values[:int(len(values) / 2)], changing_value + 1)
        else:
            return changing_value

422
    def get_dimensions_values(self, indices=None):
423
424
425
        """
        Returns all the metadata values of the dimensions.
        The values are assumed to be numbers.
426

427
428
        :returns: array_like
        """
429
        if not self._dimensions_values:
430
            data = self.get_data(indices).metadata
431
432
433
434
435
436
            for dimension in self.__dims:
                values = numpy.empty((len(data)))
                for row, metadata_frame in enumerate(data):
                    values[row] = (metadata_frame.get_value(kind=dimension[1].kind,
                                   name=dimension[1].name)[0])
                self._dimensions_values[dimension[1].name] = values
437
        return self._dimensions_values
438

439
    def compute_mosaicity_colorkey(self, scale=100):
440
441
442
443
444
445
446
447
448
449
        """
        Computes a mosaicity colorkey from the dimensions, and returns also the
        orientation distribution image.
        """
        ori_dist = self.get_data().sum(axis=1)

        if self.__dims:
            new_shape = []
            dims_list = []
            for axis, dim in self.__dims:
450
                dims_list += [numpy.linspace(-1, 1, (dim.size - 1) * scale)]
451
                new_shape += [dim.size]
452

453
454
455
456
457
458
459
460
            dims_list.reverse()
            mesh = numpy.meshgrid(*dims_list)
            if len(mesh) == 2:
                data = numpy.arctan2(mesh[0], mesh[1])
                normalized_data = wrapTo2pi(data) / numpy.pi / 2
                sqr = numpy.sqrt(numpy.power(
                    mesh[0], 2) + numpy.power(mesh[1], 2)) / numpy.sqrt(2)
                hsv_key = numpy.stack((normalized_data, sqr, numpy.ones(
461
                    (len(dims_list[1]), len(dims_list[0])))), axis=2)
462
                return ori_dist.reshape(numpy.flip(new_shape)).T, hsv_key
463
        return None, None
464

465
    def apply_background_subtraction(self, background=None, method="median", indices=None,
466
                                     step=None, chunk_shape=[100, 100], _dir=None):
julia's avatar
julia committed
467
468
469
470
        """
        Applies background subtraction to the data and saves the new data
        into disk.

471
        :param background: Data to be used as background. If None, data with indices `indices` is used.
julia's avatar
julia committed
472
            If Dataset, data of the dataset is used. If array, use data with indices in the array.
473
        :type background: Union[None, array_like, Dataset]
julia's avatar
julia committed
474
475
        :param method: Method to use to compute the background.
        :type method: Method
476
477
478
        :param indices: Indices of the images to apply background subtraction.
            If None, the background subtraction is applied to all the data.
        :type indices: Union[None, array_like]
479
480
481
482
483
484
485
        :param int step: Distance between images to be used when computing the median.
            Parameter used only when flag in_memory is False and method is `Method.median`.
            If `step` is not None, all images with distance of `step`, starting at 0,
            will be loaded into memory for computing the median, if the data loading throws
            a MemoryError, the median computation is tried with `step += 1`.
        :param chunk_shape: Shape of the chunk image to use per iteration.
            Parameter used only when flag in_memory is False and method is `Method.median`.
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
486
487
488
489
        :type chunk_shape: array_like
        :returns: dataset with data of same size as `self.data` but with the
            modified images. The urls of the modified images are replaced with
            the new urls.
julia's avatar
julia committed
490
491
        :rtype: Dataset
        """
492
493
494
495
496
497

        _dir = self.dir if _dir is None else _dir

        if not os.path.isdir(_dir):
            os.mkdir(_dir)

498
        method = Method.from_value(method)
499

500
        self.running_data = self.get_data(indices)
501

502
503
504
505
        self._lock.acquire()
        self.operations_state[Operation.BS] = 1
        self._lock.release()

506
        if background is None:
507
            bg_data = self.running_data
508
            if indices is None:
509
                _logger.info("Computing background from " + method.name + " of raw data")
510
            else:
511
                _logger.info("Computing background from " + method.name + " of high intensity data")
512
513
        elif isinstance(background, Dataset):
            bg_data = background.data
514
            _logger.info("Computing background from " + method.name + " of `background` set")
515
        else:
516
            bg_data = self.get_data(background)
517
            _logger.info("Computing background from " + method.name + " of low intensity data")
518

519
        if self._in_memory:
520
            new_data = background_subtraction(self.running_data, bg_data, method).view(Data)
521
            new_data.save(_dir + "/data")
522
            urls = new_data.urls
523
        else:
524
            bg = numpy.zeros(self.running_data[0].shape, self.running_data.dtype)
525
526
527
528
            if method == Method.mean:
                if bg_data.in_memory:
                    numpy.mean(bg_data, out=bg, axis=0)
                else:
529
                    io_utils.advancement_display(0, len(bg_data), "Computing mean image")
530
                    for i in range(len(bg_data)):
531
532
                        if not self.operations_state[Operation.BS]:
                            return
533
                        bg = img2img_mean(bg_data[i], bg, i)
534
                        io_utils.advancement_display(i + 1, len(bg_data), "Computing mean image")
535
536
537
538
            elif method == Method.median:
                if bg_data.in_memory:
                    numpy.median(bg_data, out=bg, axis=0)
                else:
539
                    if step is not None:
540
                        bg_indices = numpy.arange(0, len(bg_data), step)
541
542
                        try:
                            numpy.median(
543
                                Data(bg_data.urls[bg_indices], bg_data.metadata[bg_indices]),
544
545
546
547
                                out=bg, axis=0)
                        except MemoryError:
                            if not self.operations_state[Operation.BS]:
                                return
548
549
                            print("MemoryError, trying with step {}".format(step + 1))
                            return self.apply_background_subtraction(background, method, indices, step + 1)
550
551
                    else:
                        start = [0, 0]
552
                        img = self.running_data[0]
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
                        bg = numpy.empty(img.shape, img.dtype)
                        chunks_1 = int(numpy.ceil(img.shape[0] / chunk_shape[0]))
                        chunks_2 = int(numpy.ceil(img.shape[1] / chunk_shape[1]))
                        io_utils.advancement_display(0, chunks_1 * chunks_2 * len(bg_data), "Computing median image")
                        for i in range(chunks_1):
                            for j in range(chunks_2):
                                if not self.operations_state[Operation.BS]:
                                    return
                                c_images = []
                                cpus = multiprocessing.cpu_count()
                                with Pool(cpus - 1) as p:
                                    c_images = p.map(partial(chunk_image, start, chunk_shape), bg_data)
                                io_utils.advancement_display(
                                    i * chunks_2 + j + 1,
                                    chunks_1 * chunks_2,
                                    "Computing median image")
                                numpy.median(c_images, out=bg[start[0]: start[0] + chunk_shape[0],
                                             start[1]:start[1] + chunk_shape[1]], axis=0)
                                start[1] = chunk_shape[1] * (j + 1)
                            start[0] = chunk_shape[0] * (i + 1)
                            start[1] = 0
574
575
576
577

            if not self.operations_state[Operation.BS]:
                return

578
            urls = self.running_data.apply_funcs([(background_subtraction_2D, [bg])],
579
                                                 save=_dir + "/data", text="Applying background subtraction",
580
                                                 operation=Operation.BS)
581
582
            if urls is None:
                return
583

584
585
586
587
        self._lock.acquire()
        self.operations_state[Operation.BS] = 0
        self._lock.release()

Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
588
        # Set urls as shape and dimension of original urls.
589
        if indices is not None:
590
            new_urls = numpy.array(self.get_data().urls, dtype=object)
591
            new_urls[indices] = urls
592
593
            new_data = Data(new_urls.reshape(self.data.urls.shape), self.data.metadata,
                            self._in_memory)
594
        else:
595
596
            new_data = Data(urls.reshape(self.data.urls.shape), self.data.metadata,
                            self._in_memory)
597

598
        return Dataset(_dir=_dir, data=new_data, dims=self.__dims, transformation=self.transformation,
599
                       in_memory=self._in_memory)
600

601
    def apply_hot_pixel_removal(self, kernel=3, indices=None, _dir=None):
julia's avatar
julia committed
602
603
604
        """
        Applies hot pixel removal to Data, and saves the new data
        into disk.
605

julia's avatar
julia committed
606
607
        :param int kernel: size of the kernel used to find the hot
            pixels.
608
        :param indices: Indices of the images to apply background subtraction.
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
609
            If None, the hot pixel removal is applied to all the data.
610
        :type indices: Union[None, array_like]
611
        :return: dataset with data of same size as `self.data` but with the
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
612
613
            modified images. The urls of the modified images are replaced with
            the new urls.
julia's avatar
julia committed
614
615
        :rtype: Dataset
        """
616

617
618
        self.running_data = self.get_data(indices)

619
620
621
622
        _dir = self.dir if _dir is None else _dir

        if not os.path.isdir(_dir):
            os.mkdir(_dir)
623

624
        if self._in_memory:
625
            new_data = hot_pixel_removal_3D(self.running_data, kernel).view(Data)
626
            new_data.save(_dir + "/data")
627
            urls = new_data.urls
628
        else:
629
            urls = self.running_data.apply_funcs([(hot_pixel_removal_2D, [kernel])],
630
                                                 save=_dir + "/data", text="Applying hot pixel removal",
631
                                                 operation=Operation.HP)
632
633
            if urls is None:
                return
634
635

        if indices is not None:
636
            new_urls = numpy.array(self.get_data().urls, dtype=object)
637
            new_urls[indices] = urls
638
639
            new_data = Data(new_urls.reshape(self.data.urls.shape), self.data.metadata,
                            self._in_memory)
640
        else:
641
642
            new_data = Data(urls.reshape(self.data.urls.shape), self.data.metadata,
                            self._in_memory)
643

644
645
        return Dataset(_dir=_dir, data=new_data, dims=self.__dims, transformation=self.transformation,
                       in_memory=self._in_memory)
646

647
    def apply_threshold_removal(self, bottom=None, top=None, indices=None, _dir=None):
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
        """
        Applies bottom threshold to Data, and saves the new data
        into disk.

        :param int bottom: bottom threshold to apply.
        :param int top: top threshold to apply.
        :param indices: Indices of the images to apply background subtraction.
            If None, the hot pixel removal is applied to all the data.
        :type indices: Union[None, array_like]
        :return: dataset with data of same size as `self.data` but with the
            modified images. The urls of the modified images are replaced with
            the new urls.
        :rtype: Dataset
        """

        self.running_data = self.get_data(indices)

665
666
667
668
        _dir = self.dir if _dir is None else _dir

        if not os.path.isdir(_dir):
            os.mkdir(_dir)
669
670
671

        if self._in_memory:
            new_data = threshold_removal(self.running_data, bottom, top).view(Data)
672
            new_data.save(_dir + "/data")
673
674
675
            urls = new_data.urls
        else:
            urls = self.running_data.apply_funcs([(threshold_removal, [bottom, top])],
676
                                                 save=_dir + "/data", text="Applying threshold",
677
                                                 operation=Operation.THRESHOLD)
678
679
680
681
682
683
684
685
686
687
688
689
            if urls is None:
                return

        if indices is not None:
            new_urls = numpy.array(self.get_data().urls, dtype=object)
            new_urls[indices] = urls
            new_data = Data(new_urls.reshape(self.data.urls.shape), self.data.metadata,
                            self._in_memory)
        else:
            new_data = Data(urls.reshape(self.data.urls.shape), self.data.metadata,
                            self._in_memory)

690
691
        return Dataset(_dir=_dir, data=new_data, dims=self.__dims, transformation=self.transformation,
                       in_memory=self._in_memory)
692

693
    def apply_roi(self, origin=None, size=None, center=None, indices=None, roi_dir=None):
julia's avatar
julia committed
694
695
696
697
698
699
700
701
702
        """
        Applies a region of interest to the data.

        :param origin: Origin of the roi
        :param size: [Height, Width] of the roi.
        :param center: Center of the roi
        :type origin: Union[2d vector, None]
        :type center: Union[2d vector, None]
        :type size: Union[2d vector, None]
703
        :param indices: Indices of the images to apply background subtraction.
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
704
            If None, the roi is applied to all the data.
705
        :type indices: Union[None, array_like]
706
707
        :param roi_dir: Directory path for the new dataset
        :type roi_dir: str
julia's avatar
julia committed
708
        :returns: dataset with data with roi applied.
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
709
710
            Note: To preserve consistence of shape between images, if `indices`
            is not None, only the data modified is returned.
julia's avatar
julia committed
711
712
        :rtype: Dataset
        """
713

714
        roi_dir = self.dir if roi_dir is None else roi_dir
715
716
        if not os.path.isdir(roi_dir):
            os.mkdir(roi_dir)
717
718

        self.running_data = self.get_data(indices)
719
        if self._in_memory:
720
            new_data = apply_3D_ROI(self.running_data, origin, size, center).view(Data)
721
            new_data.save(roi_dir + "/data")
722
        else:
723
            urls = self.running_data.apply_funcs([(apply_2D_ROI, [origin, size, center])],
724
                                                 save=roi_dir + "/data", text="Applying roi",
725
                                                 operation=Operation.ROI)
726
727
            if urls is None:
                return
728
            new_data = Data(urls, self.running_data.metadata, self._in_memory)
729

730
731
        transformation = ([apply_2D_ROI(axis, origin, size, center) for axis in self.transformation]
                          if self.transformation else None)
732
733
734
735
        if indices is None:
            shape = list(self.data.shape)[:-2]
            shape.append(new_data.shape[-2])
            shape.append(new_data.shape[-1])
736
            new_data = new_data.reshape(shape)
737
738
        return Dataset(_dir=roi_dir, data=new_data, dims=self.__dims, transformation=transformation,
                       in_memory=self._in_memory)
739

740
    def find_shift(self, dimension=None, steps=50, indices=None):
julia's avatar
julia committed
741
742
743
744
745
746
747
748
        """
        Find shift of the data or part of it.

        :param dimension: Parametes with the position of the data in the reshaped
            array.
        :type dimension: Union[None, tuple, array_like]
        :param float h_max: See `core.imageRegistration.shift_detection`
        :param float h_step: See `core.imageRegistration.shift_detection`
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
749
750
751
        :param indices: Boolean index list with True in the images to apply the shift to.
            If None, the hot pixel removal is applied to all the data.
        :type indices: Union[None, array_like]
julia's avatar
julia committed
752
753
        :returns: Array with shift per frame.
        """
754
        return shift_detection(self.get_data(indices, dimension), steps)
755

756
757
    def apply_shift(self, shift, dimension=None, shift_approach="fft", indices=None,
                    callback=None, _dir=None):
julia's avatar
julia committed
758
759
760
761
762
763
764
765
        """
        Apply shift of the data or part of it and save new data into disk.

        :param array_like shift: Shift per frame.
        :param dimension: Parametes with the position of the data in the reshaped
            array.
        :type dimension: Union[None, tuple, array_like]
        :param Union['fft', 'linear'] shift_approach: Method to use to apply the shift.
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
766
767
768
        :param indices: Boolean index list with True in the images to apply the shift to.
            If None, the hot pixel removal is applied to all the data.
        :type indices: Union[None, array_like]
julia's avatar
julia committed
769
        :param Union[function, None] callback: Callback
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
770
771
772
        :returns: dataset with data of same size as `self.data` but with the
            modified images. The urls of the modified images are replaced with
            the new urls.
julia's avatar
julia committed
773
        """
774
775
        assert len(shift) > 0, "Shift list can't be empty"

776
777
778
        if not numpy.any(shift):
            return self

779
780
781
782
        _dir = self.dir if _dir is None else _dir

        if not os.path.isdir(_dir):
            os.mkdir(_dir)
783

784
        data = self.get_data(indices, dimension)
785
786
787
        self._lock.acquire()
        self.operations_state[Operation.SHIFT] = 1
        self._lock.release()
788

789
        io_utils.advancement_display(0, len(data), "Applying shift")
790
        if dimension is not None:
791
792
793
794
            # Convert dimension and value into list
            if type(dimension[0]) is int:
                dimension[0] = [dimension[0]]
                dimension[1] = [dimension[1]]
795
            urls = []
796
            for i in range(len(data)):
797
798
                if not self.operations_state[Operation.SHIFT]:
                    return
799
                filename = _dir + "/data" + str(i).zfill(4) + ".npy"
800
801
                if shift[:, i] > 1:
                    shift_approach = "linear"
802
803
804
                img = apply_shift(data[i], shift[:, i], shift_approach)
                numpy.save(filename, img)
                urls.append(DataUrl(file_path=filename, scheme='fabio'))
805
                io_utils.advancement_display(i + 1, len(data), "Applying shift")
806
807

            # Replace specific urls that correspond to the modified data
808
            new_urls = numpy.array(self.data.urls, dtype=object)
809
            copy_urls = new_urls
810
            if indices is not None:
811
812
813
814
                # Create array of booleans to know which indices we have
                bool_indices = numpy.zeros(self.get_data().shape[:-2], dtype=bool)
                bool_indices[indices] = True
                bool_indices = bool_indices.reshape(self.data.shape[:-2])
815
816
817
818
819
820
                for i, dim in enumerate(sorted(dimension[0])):
                    # Flip axis to be consistent with the data shape
                    axis = self.dims.ndim - dim - 1
                    copy_urls = numpy.swapaxes(copy_urls, 0, axis)[dimension[1][i], :]
                    bool_indices = numpy.swapaxes(bool_indices, 0, axis)[dimension[1][i], :]
                copy_urls[bool_indices] = urls
821
            else:
822
823
824
825
826
                for i, dim in enumerate(sorted(dimension[0])):
                    # Flip axis to be consistent with the data shape
                    axis = self.dims.ndim - dim - 1
                    copy_urls = numpy.swapaxes(copy_urls, 0, axis)[dimension[1][i], :]
                copy_urls[:] = urls
827
        else:
828
829
            urls = []
            for i in range(len(data)):
830
831
                if not self.operations_state[Operation.SHIFT]:
                    return
832
                filename = _dir + "/data" + str(i).zfill(4) + ".npy"
833
834
                if shift[:, i] > 1:
                    shift_approach = "linear"
835
                img = apply_shift(data[i], shift[:, i], shift_approach)
836
                numpy.save(filename, img)
837
                urls.append(DataUrl(file_path=filename, scheme='fabio'))
838
                io_utils.advancement_display(i + 1, len(data), "Applying shift")
839
            if indices is not None:
840
                new_urls = numpy.array(self.data.urls, dtype=object).flatten()
841
                numpy.put(new_urls, indices, urls)
842
843
            else:
                new_urls = numpy.array(urls)
844
845
846
847
848

        self._lock.acquire()
        self.operations_state[Operation.SHIFT] = 0
        self._lock.release()

849
        data = Data(new_urls.reshape(self.data.urls.shape), self.data.metadata, in_memory=self._in_memory)
850
        return Dataset(_dir=_dir, data=data, dims=self.__dims, transformation=self.transformation,
851
                       in_memory=self._in_memory)
852

853
    def find_and_apply_shift(self, dimension=None, steps=100, shift_approach="fft",
854
                             indices=None, callback=None):
julia's avatar
julia committed
855
856
857
858
859
860
861
862
863
        """
        Find the shift of the data or part of it and apply it.

        :param dimension: Parametes with the position of the data in the reshaped
            array.
        :type dimension: Union[None, tuple, array_like]
        :param float h_max: See `core.imageRegistration.shift_detection`
        :param float h_step: See `core.imageRegistration.shift_detection`
        :param Union['fft', 'linear'] shift_approach: Method to use to apply the shift.
Julia Garriga Ferrer's avatar
Julia Garriga Ferrer committed
864
865
866
        :param indices: Indices of the images to find and apply the shift to.
            If None, the hot pixel removal is applied to all the data.
        :type indices: Union[None, array_like]
julia's avatar
julia committed
867
868
869
        :param Union[function, None] callback: Callback
        :returns: Dataset with the new data.
        """
870
        shift = self.find_shift(dimension, steps, indices=indices)
871
        return self.apply_shift(shift, dimension, indices=indices)
872

873
    def _cascade_nmf(self, num_components, iterations, vstep=None, hstep=None