Commit 15136040 authored by Julia Garriga Ferrer's avatar Julia Garriga Ferrer
Browse files

[core][dataset] Save data into hdf5 file instead of numpy file

parent 03ed965b
......@@ -518,7 +518,7 @@ class Dataset():
if self._in_memory:
new_data = background_subtraction(self.running_data, bg_data, method).view(Data)
new_data.save(_dir + "/data")
new_data.save(_dir + "/data.hdf5")
urls = new_data.urls
else:
bg = numpy.zeros(self.running_data[0].shape, self.running_data.dtype)
......@@ -576,7 +576,7 @@ class Dataset():
return
urls = self.running_data.apply_funcs([(background_subtraction_2D, [bg])],
save=_dir + "/data", text="Applying background subtraction",
save=_dir + "/data.hdf5", text="Applying background subtraction",
operation=Operation.BS)
if urls is None:
return
......@@ -623,11 +623,11 @@ class Dataset():
if self._in_memory:
new_data = hot_pixel_removal_3D(self.running_data, kernel).view(Data)
new_data.save(_dir + "/data")
new_data.save(_dir + "/data.hdf5")
urls = new_data.urls
else:
urls = self.running_data.apply_funcs([(hot_pixel_removal_2D, [kernel])],
save=_dir + "/data", text="Applying hot pixel removal",
save=_dir + "/data.hdf5", text="Applying hot pixel removal",
operation=Operation.HP)
if urls is None:
return
......@@ -669,11 +669,11 @@ class Dataset():
if self._in_memory:
new_data = threshold_removal(self.running_data, bottom, top).view(Data)
new_data.save(_dir + "/data")
new_data.save(_dir + "/data.hdf5")
urls = new_data.urls
else:
urls = self.running_data.apply_funcs([(threshold_removal, [bottom, top])],
save=_dir + "/data", text="Applying threshold",
save=_dir + "/data.hdf5", text="Applying threshold",
operation=Operation.THRESHOLD)
if urls is None:
return
......@@ -718,11 +718,13 @@ class Dataset():
self.running_data = self.get_data(indices)
if self._in_memory:
new_data = apply_3D_ROI(self.running_data, origin, size, center).view(Data)
new_data.save(roi_dir + "/data")
new_data.save(roi_dir + "/data.hdf5")
else:
shape = numpy.append([self.nframes], apply_2D_ROI(self.running_data[0], origin, size, center).shape)
urls = self.running_data.apply_funcs([(apply_2D_ROI, [origin, size, center])],
save=roi_dir + "/data", text="Applying roi",
operation=Operation.ROI)
save=roi_dir + "/data.hdf5", text="Applying roi",
operation=Operation.ROI,
new_shape=shape)
if urls is None:
return
new_data = Data(urls, self.running_data.metadata, self._in_memory)
......@@ -785,6 +787,13 @@ class Dataset():
self._lock.acquire()
self.operations_state[Operation.SHIFT] = 1
self._lock.release()
_file = h5py.File(_dir + '/data.hdf5', 'a')
dataset_name = "dataset"
if "dataset" in _file:
_file.create_dataset("update_dataset", data.shape)
dataset_name = "update_dataset"
else:
_file.create_dataset("dataset", data.shape)
io_utils.advancement_display(0, len(data), "Applying shift")
if dimension is not None:
......@@ -795,13 +804,13 @@ class Dataset():
urls = []
for i in range(len(data)):
if not self.operations_state[Operation.SHIFT]:
del _file["update_dataset"]
return
filename = _dir + "/data" + str(i).zfill(4) + ".npy"
img = apply_shift(data[i], shift[:, i], shift_approach)
if shift[:, i].all() > 1:
shift_approach = "linear"
img = apply_shift(data[i], shift[:, i], shift_approach)
numpy.save(filename, img)
urls.append(DataUrl(file_path=filename, scheme='fabio'))
_file[dataset_name][i] = img
urls.append(DataUrl(file_path=_dir + '/data.hdf5', data_path="/dataset", data_slice=i, scheme='silx'))
io_utils.advancement_display(i + 1, len(data), "Applying shift")
# Replace specific urls that correspond to the modified data
......@@ -828,13 +837,13 @@ class Dataset():
urls = []
for i in range(len(data)):
if not self.operations_state[Operation.SHIFT]:
del _file["update_dataset"]
return
filename = _dir + "/data" + str(i).zfill(4) + ".npy"
if shift[:, i].all() > 1:
shift_approach = "linear"
img = apply_shift(data[i], shift[:, i], shift_approach)
numpy.save(filename, img)
urls.append(DataUrl(file_path=filename, scheme='fabio'))
_file[dataset_name][i] = img
urls.append(DataUrl(file_path=_dir + '/data.hdf5', data_path="/dataset", data_slice=i, scheme='silx'))
io_utils.advancement_display(i + 1, len(data), "Applying shift")
if indices is not None:
new_urls = numpy.array(self.data.urls, dtype=object).flatten()
......@@ -846,6 +855,13 @@ class Dataset():
self.operations_state[Operation.SHIFT] = 0
self._lock.release()
if dataset_name == "update_dataset":
del _file["dataset"]
_file["dataset"] = _file["update_dataset"]
del _file["update_dataset"]
_file.close()
data = Data(new_urls.reshape(self.data.urls.shape), self.data.metadata, in_memory=self._in_memory)
return Dataset(_dir=_dir, data=data, dims=self.__dims, transformation=self.transformation,
in_memory=self._in_memory)
......@@ -1433,7 +1449,7 @@ class Data(numpy.ndarray):
else:
return super(Data, self).ndim + 2
def apply_funcs(self, funcs=[], indices=None, save=False, text="", operation=None):
def apply_funcs(self, funcs=[], indices=None, save=False, text="", operation=None, new_shape=None):
"""
Method that applies a series of functions into the data. It can save the images
into disk or return them.
......@@ -1454,6 +1470,8 @@ class Data(numpy.ndarray):
"""
if indices is None:
indices = range(len(self))
if new_shape is None:
new_shape = self.shape
if isinstance(indices, int):
indices = [indices]
urls = []
......@@ -1465,27 +1483,46 @@ class Data(numpy.ndarray):
self._lock.acquire()
self.operations[operation] = 1
self._lock.release()
_file = h5py.File(save, 'a')
dataset_name = "dataset"
if "dataset" in _file:
_file.create_dataset("update_dataset", new_shape)
dataset_name = "update_dataset"
else:
_file.create_dataset("dataset", new_shape)
for i in indices:
if operation is not None and not self.operations[operation]:
if save:
for j in indices:
if j != i:
filename = save + str(j).zfill(4) + ".npy"
os.remove(filename)
else:
break
del _file["update_dataset"]
return
# if save:
# for j in indices:
# if j != i:
# filename = save + str(j).zfill(4) + ".npy"
# os.remove(filename)
# else:
# break
# return
img = self[int(i)]
for f, args in funcs:
img = f(*([img] + args))
if save:
filename = save + str(i).zfill(4) + ".npy"
numpy.save(filename, img)
urls.append(DataUrl(file_path=filename, scheme='fabio'))
_file[dataset_name][i] = img
urls.append(DataUrl(file_path=save, data_path="/dataset", data_slice=i, scheme='silx'))
# filename = save + str(i).zfill(4) + ".npy"
# numpy.save(filename, img)
# urls.append(DataUrl(file_path=filename, scheme='fabio'))
io_utils.advancement_display(i + 1, len(self.urls.flatten()), text)
self._lock.acquire()
self.operations[operation] = 0
self._lock.release()
if dataset_name == "update_dataset":
del _file["dataset"]
_file["dataset"] = _file["update_dataset"]
del _file["update_dataset"]
_file.close()
return numpy.array(urls)
def save(self, path, indices=None):
......@@ -1504,10 +1541,19 @@ class Data(numpy.ndarray):
data = self
else:
data = self[indices]
_file = h5py.File(path, 'a')
if "dataset" in _file:
del _file["dataset"]
_file.create_dataset("dataset", self.shape)
for i, img in enumerate(data):
filename = path + str(i).zfill(4) + ".npy"
numpy.save(filename, img)
urls.append(DataUrl(file_path=filename, scheme='fabio'))
_file["dataset"][i] = img
urls.append(DataUrl(file_path=path, data_path="/dataset", data_slice=i, scheme='silx'))
# filename = path + str(i).zfill(4) + ".npy"
# numpy.save(filename, img)
# urls.append(DataUrl(file_path=filename, scheme='fabio'))
_file.close()
self.urls = numpy.asarray(urls)
def convert_to_hdf5(self, _dir):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment