writer.py 14.3 KB
Newer Older
Pierre Paleo's avatar
Pierre Paleo committed
1
from glob import glob
Pierre Paleo's avatar
Pierre Paleo committed
2
from os import path, getcwd, chdir
3
4
from datetime import datetime
import numpy as np
5
6
from h5py import VirtualSource, VirtualLayout
from tomoscan.io import HDF5File
7
from silx.utils.enum import Enum
Pierre Paleo's avatar
Pierre Paleo committed
8
from silx.third_party.TiffIO import TiffIO
9
from .. import version
10
from ..misc.utils import rescale_data
Pierre Paleo's avatar
Pierre Paleo committed
11
from .config import export_dict_to_h5
12
13
from .utils import check_h5py_version # won't be necessary once h5py >= 3.0 required

Pierre Paleo's avatar
Pierre Paleo committed
14
15
16
17
try:
    from silx.third_party.EdfFile import EdfFile
except ImportError:
    EdfFile = None
Pierre Paleo's avatar
Pierre Paleo committed
18
19
20
21
22
try:
    from glymur import Jp2k
    __have_jp2k__ = True
except ImportError:
    __have_jp2k__ = False
23
24


Pierre Paleo's avatar
Pierre Paleo committed
25
26
27
28
29
30
31
def get_datetime():
    """
    Function used by some writers to indicate the current date.
    """
    return datetime.now().replace(microsecond=0).isoformat()


32
33
34
35
36
37
38
39
class Writer:
    """
    Base class for all writers.
    """
    def __init__(self, fname):
        self.fname = fname


Pierre Paleo's avatar
Pierre Paleo committed
40
41
42
43
    def get_filename(self):
        return self.fname


44
class NXProcessWriter(Writer):
45
    def __init__(self, fname, entry=None, filemode="a", overwrite=False):
46
47
48
49
50
51
52
53
54
55
56
57
        """
        Initialize a NXProcessWriter.

        Parameters
        -----------
        fname: str
            Path to the HDF5 file.
        entry: str, optional
            Entry in the HDF5 file. Default is "entry"
        """
        super().__init__(fname)
        self._set_entry(entry)
58
        self._filemode = filemode
59
        self.overwrite = overwrite
60
        check_h5py_version()
61
62
63
64
65
66
67
68
69
70


    def _set_entry(self, entry):
        self.entry = entry or "entry"
        data_path = "/".join([self.entry])
        if not(data_path.startswith("/")):
            data_path = "/" + data_path
        self.data_path = data_path


71
    def write(self, result, process_name, processing_index=0, config=None, is_frames_stack=True):
72
73
74
75
76
77
78
79
80
81
82
83
84
85
        """
        Write the result in the current NXProcess group.

        Parameters
        ----------
        result: numpy.ndarray
            Array containing the processing result
        process_name: str
            Name of the processing
        processing_index: int
            Index of the processing (in a pipeline)
        config: dict, optional
            Dictionary containing the configuration.
        """
86
        with HDF5File(self.fname, self._filemode, swmr=True) as fid:
87
88
89
            results_path = path.join(self.data_path, process_name)
            if self.overwrite and results_path in fid:
                del fid[results_path]
90
            nx_entry = fid.require_group(self.data_path)
91
92
            if "NX_class" not in nx_entry.attrs:
                nx_entry.attrs["NX_class"] = "NXentry"
93
94
95
96
97
98

            nx_process = nx_entry.require_group(process_name)
            nx_process.attrs['NX_class'] = "NXprocess"

            nx_process['program'] = "nabu"
            nx_process['version'] = version
Pierre Paleo's avatar
Pierre Paleo committed
99
            nx_process['date'] = get_datetime()
100
101
            nx_process['sequence_index'] = np.int32(processing_index)

102
103
104
105
106
107
108
109
110
            if config is not None:
                export_dict_to_h5(
                    config,
                    self.fname,
                    '/'.join([nx_process.name, 'configuration']),
                    overwrite_data=True,
                    mode="a"
                )
                nx_process['configuration'].attrs['NX_class'] = "NXcollection"
111
            if isinstance(result, dict):
112
                results_path = '/'.join([nx_process.name, 'results'])
113
114
115
                export_dict_to_h5(
                    result,
                    self.fname,
116
                    results_path,
117
118
119
120
121
                    overwrite_data=True,
                    mode="a"
                )
            else:
                nx_data = nx_process.require_group('results')
122
                results_path = nx_data.name
123
124
125
126
127
128
129
130
                nx_data.attrs['NX_class'] = "NXdata"
                nx_data.attrs['signal'] = "data"
                if isinstance(result, VirtualLayout):
                    nx_data.create_virtual_dataset("data", result)
                else: # assuming array-like
                    nx_data['data'] = result
                if is_frames_stack:
                    nx_data['data'].attrs['interpretation'] = "image"
131

V. Armando Solé's avatar
V. Armando Solé committed
132
133
            # prepare the direct access plots
            nx_process.attrs['default'] = 'results'
134
135
            if "default" not in nx_entry.attrs:
                nx_entry.attrs["default"] = '/'.join([nx_process.name, 'results'])
136
137
            # Return the internal path to "results"
            return results_path
138
139


Pierre Paleo's avatar
Pierre Paleo committed
140
141
142
def merge_hdf5_files(
    files_or_pattern, h5_path, output_file, process_name,
    output_entry=None, output_filemode="a",
143
144
    processing_index=0, config=None, base_dir=None,
    overwrite=False
Pierre Paleo's avatar
Pierre Paleo committed
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
):
    """
    Parameters
    -----------
    files_or_pattern: str or list
        A list of file names, or a wildcard pattern.
        If a list is provided, it will not be sorted! This will have to be
        done before calling this function.
    h5_path: str
        Path inside the HDF5 input file(s)
    output_file: str
        Path of the output file
    process_name: str
        Name of the process
    output_entry: str, optional
        Output HDF5 root entry (default is "/entry")
    output_filemode: str, optional
        File mode for output file. Default is "a" (append)
    processing_index: int, optional
        Processing index for the output file. Default is 0.
    config: dict, optional
        Dictionary describing the configuration needed to get the results.
167
168
    base_dir: str, optional
        Base directory when using relative file names.
169
170
171
    overwrite: bool, optional
        Whether to overwrite already existing data in the final file.
        Default is False.
Pierre Paleo's avatar
Pierre Paleo committed
172
    """
173
174
175
176
    prev_cwd = None
    if base_dir is not None:
        prev_cwd = getcwd()
        chdir(base_dir)
Pierre Paleo's avatar
Pierre Paleo committed
177
178
179
180
181
182
183
184
185
186
    if isinstance(files_or_pattern, str):
        files_list = glob(files_or_pattern)
        files_list.sort()
    else: # list
        files_list = files_or_pattern
    if files_list == []:
        raise ValueError("Nothing found as pattern %s" % files_or_pattern)
    virtual_sources = []
    shapes = []
    for fname in files_list:
187
        with HDF5File(fname, "r", swmr=True) as fid:
Pierre Paleo's avatar
Pierre Paleo committed
188
            shape = fid[h5_path].shape
189
        vsource = VirtualSource(fname, name=h5_path, shape=shape)
Pierre Paleo's avatar
Pierre Paleo committed
190
191
192
193
        virtual_sources.append(vsource)
        shapes.append(shape)

    n_images = sum([shape[0] for shape in shapes])
194
    virtual_layout = VirtualLayout(
Pierre Paleo's avatar
Pierre Paleo committed
195
196
197
198
199
200
201
202
        shape=(n_images, ) + shapes[0][1:],
        dtype='f'
    )
    start_idx = 0
    for vsource, shape in zip(virtual_sources, shapes):
        n_imgs = shape[0]
        virtual_layout[start_idx:start_idx + n_imgs] = vsource
        start_idx += n_imgs
203
204
205
206
    nx_file = NXProcessWriter(
        output_file,
        entry=output_entry, filemode=output_filemode, overwrite=overwrite
    )
Pierre Paleo's avatar
Pierre Paleo committed
207
208
209
210
211
212
213
    nx_file.write(
        virtual_layout,
        process_name,
        processing_index=processing_index,
        config=config,
        is_frames_stack=True
    )
214
215
    if base_dir is not None:
        chdir(prev_cwd)
Pierre Paleo's avatar
Pierre Paleo committed
216

Pierre Paleo's avatar
Pierre Paleo committed
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266

class TIFFWriter(Writer):
    def __init__(self, fname, multiframe=False, start_index=0, filemode="wb"):
        """
        Tiff writer.

        Parameters
        -----------
        fname: str
            Path to the output file name
        multiframe: bool, optional
            Whether to write all data in one single file. Default is
        start_index: int, optional
            When writing a stack of images, each image is written in a dedicated file
            (unless multiframe is set to True).
            In this case, the output is a series of files `filename_0000.tif`,
            `filename_0001.tif`, etc. This parameter is the starting index for
            file names.
            This option is ignored when multiframe is True.

        Notes
        ------
        If multiframe is False (default), then each image will be written in a
        dedicated tiff file.
        """
        super().__init__(fname)
        self.multiframe = multiframe
        self.filemode = filemode
        self.start_index = start_index


    def _write_tiff(self, data, config=None, filename=None, filemode=None):
        if filename is None:
            filename = self.fname
        if filemode is None:
            filemode = self.filemode
        tif = TiffIO(filename, mode=filemode)
        tif.writeImage(
            data,
            software=str("nabu %s" % version),
            info=config,
            date=get_datetime()
        )
        tif = None


    def write(self, data, *args, config=None, **kwargs):
        if data.ndim < 3:
            self._write_tiff(data, config=config)
            return
267
268
269
        if (data.ndim == 3) and (data.shape[0] == 1):
            self._write_tiff(data[0], config=config)
            return
Pierre Paleo's avatar
Pierre Paleo committed
270
271
272
273
274
275
276
277
278
279
280
281
        if self.multiframe:
            self._write_tiff(data[0], config=config)
            for i in range(1, data.shape[0]):
                self._write_tiff(data[i], config=config, filemode="rb+") # ?!
        else:
            dirname, rel_filename = path.split(self.fname)
            prefix, ext = path.splitext(rel_filename)
            for i in range(data.shape[0]):
                curr_rel_filename = prefix + str("_%04d" % (self.start_index + i)) + ext
                fname = path.join(dirname, curr_rel_filename)
                self._write_tiff(data[i], filename=fname, config=config)

Pierre Paleo's avatar
Pierre Paleo committed
282
283
284
285
286
    def get_filename(self):
        if self.multiframe:
            return self.fname
        else:
            return path.dirname(self.fname)
Pierre Paleo's avatar
Pierre Paleo committed
287
288


Pierre Paleo's avatar
Pierre Paleo committed
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
class EDFWriter(Writer):
    def __init__(self, fname, start_index=0, filemode="w"):
        """
        EDF (ESRF Data Format) writer.

        Parameters
        -----------
        fname: str
            Path to the output file name
        start_index: int, optional
            When writing a stack of images, each image is written in a dedicated file
            In this case, the output is a series of files `filename_0000.tif`,
            `filename_0001.edf`, etc. This parameter is the starting index for
            file names.
        """
        super().__init__(fname)
        self.filemode = filemode
        self.start_index = start_index

    def _write_edf(self, data, config=None, filename=None, filemode=None):
        if filename is None:
            filename = self.fname
        edf = EdfFile(filename)
        config = config or {}
        config["software"] = str("nabu %s" % version)
        config["date"] = get_datetime()
        edf.WriteImage(
            config,
            data
        )
        edf = None


    def write(self, data, *args, config=None, **kwargs):
        if data.ndim < 3:
            self._write_edf(data, config=config)
            return
        if (data.ndim == 3) and (data.shape[0] == 1):
            self._write_edf(data[0], config=config)
            return
        dirname, rel_filename = path.split(self.fname)
        prefix, ext = path.splitext(rel_filename)
        for i in range(data.shape[0]):
            curr_rel_filename = prefix + str("_%04d" % (self.start_index + i)) + ext
            fname = path.join(dirname, curr_rel_filename)
            self._write_edf(data[i], filename=fname, config=config)

    def get_filename(self):
        return path.dirname(self.fname)


Pierre Paleo's avatar
Pierre Paleo committed
340
class JP2Writer(Writer):
341
    def __init__(self, fname, start_index=0, filemode="wb", psnr=None, auto_convert=True):
Pierre Paleo's avatar
Pierre Paleo committed
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
        """
        JPEG2000 writer. This class requires the python package `glymur` and the
        library `libopenjp2`.

        Parameters
        -----------
        fname: str
            Path to the output file name
        start_index: int, optional
            When writing a stack of images, each image is written in a dedicated file
            The output is a series of files `filename_0000.tif`, `filename_0001.tif`, etc.
            This parameter is the starting index for file names.
        psnr: list of int, optional
            The PSNR (Peak Signal-to-Noise ratio) for each jpeg2000 layer.
            This defines a quality metric for lossy compression.
            The number "0" stands for lossless compression.
358
359
360
        auto_convert: bool, optional
            Whether to automatically cast floating point data to uint16.
            Default is True.
Pierre Paleo's avatar
Pierre Paleo committed
361
362
363
364
365
366
        """
        super().__init__(fname)
        if not(__have_jp2k__):
            raise ValueError("Need glymur python package and libopenjp2 library")
        self.filemode = filemode
        self.start_index = start_index
367
        self.auto_convert = auto_convert
Pierre Paleo's avatar
Pierre Paleo committed
368
369
370
        if psnr is not None and np.isscalar(psnr):
            psnr = [psnr]
        self.psnr = psnr
371
372
        self._vmin = None
        self._vmax = None
Pierre Paleo's avatar
Pierre Paleo committed
373
374
375
376
377


    def _write_jp2k(self, data, filename=None):
        if filename is None:
            filename = self.fname
378
379
380
381
382
        # TODO this will have to change in future versions
        if data.dtype != np.uint16 and self.auto_convert:
            data = rescale_data(data, 0, 65535, data_min=self._vmin, data_max=self._vmax)
            data = data.astype(np.uint16)
        #
Pierre Paleo's avatar
Pierre Paleo committed
383
384
385
386
387
388
389
        jp2 = Jp2k(filename, data=data, psnr=self.psnr)


    def write(self, data, *args, **kwargs):
        if data.ndim < 3:
            self._write_jp2k(data)
            return
390
391
392
        if (data.ndim == 3) and (data.shape[0] == 1):
            self._write_jp2k(data[0])
            return
Pierre Paleo's avatar
Pierre Paleo committed
393
394
395
396
397
398
399
400
        dirname, rel_filename = path.split(self.fname)
        prefix, ext = path.splitext(rel_filename)
        for i in range(data.shape[0]):
            curr_rel_filename = prefix + str("_%04d" % (self.start_index + i)) + ext
            fname = path.join(dirname, curr_rel_filename)
            self._write_jp2k(data[i], filename=fname)


Pierre Paleo's avatar
Pierre Paleo committed
401
402
    def get_filename(self):
        return path.dirname(self.fname)
Pierre Paleo's avatar
Pierre Paleo committed
403
404


Pierre Paleo's avatar
Pierre Paleo committed
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
class NPYWriter(Writer):
    def __init__(self,  fname):
        super().__init__(fname)

    def write(self, result, *args, **kwargs):
        np.save(self.fname, result)


class NPZWriter(Writer):
    def __init__(self,  fname):
        super().__init__(fname)

    def write(self, result, *args, **kwargs):
        save_args = {"result": result}
        config = kwargs.get("config", None)
        if config is not None:
            save_args["configuration"] = config
        np.savez(self.fname, **save_args)


Writers = {
    "h5": NXProcessWriter,
    "hdf5": NXProcessWriter,
    "nx": NXProcessWriter,
    "nexus": NXProcessWriter,
    "npy": NPYWriter,
    "npz": NPZWriter,
    "tif": TIFFWriter,
    "tiff": TIFFWriter,
Pierre Paleo's avatar
Pierre Paleo committed
434
435
436
    "j2k": JP2Writer,
    "jp2": JP2Writer,
    "jp2k": JP2Writer,
Pierre Paleo's avatar
Pierre Paleo committed
437
    "edf": EDFWriter,
Pierre Paleo's avatar
Pierre Paleo committed
438
439
}