Commit e7b92167 authored by payno's avatar payno
Browse files

Revert "[add_dark_flat_nx_file] avoid reading the entire data"

This reverts commit 03cf6873.
parent 52f00e52
Pipeline #37051 failed with stages
in 7 minutes and 50 seconds
......@@ -440,84 +440,80 @@ def _insert_frame_data(data, file_path, data_path, where, logger=None):
)
# store DataUrl in the current virtual dataset
url = data
with HDF5File(data.data_file(), mode='r') as o_h5s:
if data.data_path() not in o_h5s:
raise KeyError('{} cannot be found in {}'.format(data.data_path(),
data.data_path()))
data_frm_url_node = o_h5s[data.data_path()]
if not data_frm_url_node.ndim in (2, 3):
raise ValueError(
"{} should point to 2D or 3D dataset ".format(url.path())
data_frm_url = get_data(data)
if not data_frm_url.ndim in (2, 3):
raise ValueError(
"{} should point to 2D or 3D dataset ".format(url.path())
)
if data_frm_url.ndim is 2:
new_shape = 1, data_frm_url[0], data_frm_url[1]
if logger is not None:
logger.info(
"reshape provided data to 3D (from {} to {})"
"".format(data_frm_url.shape, new_shape)
)
if data_frm_url_node.ndim is 2:
new_shape = 1, data_frm_url_node.shape[0], data_frm_url_node.shape[1]
if logger is not None:
logger.info(
"reshape provided data to 3D (from {} to {})"
"".format(data_frm_url_node.shape, new_shape)
data_frm_url = data_frm_url.reshape(new_shape)
n_frames, dim_2, dim_1 = data_frm_url.shape
def recreate_vs(vs_info, vds_file):
with cwd_context():
os.chdir(os.path.dirname(vds_file))
with HDF5File(vs_info.file_name, mode="r") as vs_node:
vs_shape = vs_node[vs_info.dset_name].shape
length = vs_shape[0]
return (
length,
h5py.VirtualSource(
vs_info.file_name,
vs_info.dset_name,
shape=vs_shape,
),
)
data_frm_url = data_frm_url_node.reshape(new_shape)
n_frames, dim_2, dim_1 = data_frm_url.shape
def recreate_vs(vs_info, vds_file):
with cwd_context():
os.chdir(os.path.dirname(vds_file))
with HDF5File(vs_info.file_name, mode="r") as vs_node:
vs_shape = vs_node[vs_info.dset_name].shape
length = vs_shape[0]
return (
length,
h5py.VirtualSource(
vs_info.file_name,
vs_info.dset_name,
shape=vs_shape,
),
)
virtual_sources_len = []
virtual_sources = []
# we need to recreate the VirtualSource they are not
# store or available from the API
for vs_info in h5s[data_path].virtual_sources():
length, vs = recreate_vs(vs_info, vds_file=file_path)
virtual_sources.append(vs)
virtual_sources_len.append(length)
vds_file_path = os.path.abspath(
os.path.relpath(url.file_path(), os.getcwd())
)
vds_file_path = os.path.realpath(vds_file_path)
vds_file_path = os.path.relpath(
vds_file_path, os.path.dirname(file_path)
)
new_virtual_source = h5py.VirtualSource(
path_or_dataset=vds_file_path,
name=url.data_path(),
shape=data_frm_url.shape,
)
n_frames += h5s[data_path].shape[0]
data_type = h5s[data_path].dtype
virtual_sources_len = []
virtual_sources = []
# we need to recreate the VirtualSource they are not
# store or available from the API
for vs_info in h5s[data_path].virtual_sources():
length, vs = recreate_vs(vs_info, vds_file=file_path)
virtual_sources.append(vs)
virtual_sources_len.append(length)
vds_file_path = os.path.abspath(
os.path.relpath(url.file_path(), os.getcwd())
)
vds_file_path = os.path.realpath(vds_file_path)
vds_file_path = os.path.relpath(
vds_file_path, os.path.dirname(file_path)
)
if where == "start":
virtual_sources.insert(0, new_virtual_source)
virtual_sources_len.insert(0, data_frm_url.shape[0])
else:
virtual_sources.append(new_virtual_source)
virtual_sources_len.append(data_frm_url.shape[0])
new_virtual_source = h5py.VirtualSource(
path_or_dataset=vds_file_path,
name=url.data_path(),
shape=data_frm_url.shape,
)
n_frames += h5s[data_path].shape[0]
data_type = h5s[data_path].dtype
# create the new virtual dataset
layout = h5py.VirtualLayout(
shape=(n_frames, dim_2, dim_1), dtype=data_type
)
last = 0
for v_source, vs_len in zip(virtual_sources, virtual_sources_len):
layout[last : vs_len + last] = v_source
last += vs_len
if data_path in h5s:
del h5s[data_path]
h5s.create_virtual_dataset(data_path, layout)
if where == "start":
virtual_sources.insert(0, new_virtual_source)
virtual_sources_len.insert(0, data_frm_url.shape[0])
else:
virtual_sources.append(new_virtual_source)
virtual_sources_len.append(data_frm_url.shape[0])
# create the new virtual dataset
layout = h5py.VirtualLayout(
shape=(n_frames, dim_2, dim_1), dtype=data_type
)
last = 0
for v_source, vs_len in zip(virtual_sources, virtual_sources_len):
layout[last : vs_len + last] = v_source
last += vs_len
if data_path in h5s:
del h5s[data_path]
h5s.create_virtual_dataset(data_path, layout)
else:
# for now we always duplicate data if provided from a DataUrl
# we could create a virtual dataset as well
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment