Commit 1678abef authored by Henri Payno's avatar Henri Payno
Browse files

Fix entry indexes.

The NXtomo refactoring a broken broke feature (very important for data automation):
- get / deduce entry name according to the INITI-ACQUISITION ENTRY.
  This was insuring consistency of the created NXtomo and avoiding conflict in naming files and entry.

This commit intend to fix the break done and even improve this mecanism by adding the `bam_single_file` option to inforce the creation of one file per entry.

Now each acquisition has a `start_index` values to know from which index he can create entries and files.
Each acquisition must also provide a way to inform the converter on many entries he will create (at most) from the `get_expected_nx_tomo` function.

For the ZSerieAcquistion and PCOTomoAcquisition this is not possible to provide during discovery of the initialization step.

* The ZSerieAcquisition will increase the start_index for the next acquisition each time he register a new url
* The PCOTomoAcquisition will postpone this when he will parse url for Projection. where this information is registered

It would be beniefic to refactor the `build_acquisition_classes_frm_titles` and `build_acquisition_classes_frm_urls` functions.
parent 28ab68e7
Pipeline #70070 passed with stages
in 3 minutes and 4 seconds
......@@ -109,6 +109,7 @@ class BaseAcquisition:
root_url: typing.Union[DataUrl, None],
configuration: TomoHDF5Config,
detector_sel_callback,
start_index: int,
):
self._root_url = root_url
self._detector_sel_callback = detector_sel_callback
......@@ -124,17 +125,23 @@ class BaseAcquisition:
self._plugins_pos_resources = {}
self._plugins_instr_resources = {}
self._configuration = configuration
self._start_index = start_index
@property
def configuration(self):
return self._configuration
@property
def start_index(self) -> int:
return self._start_index
def write_as_nxtomo(
self,
shift_entry: int,
input_file_path: str,
request_input: bool,
plugins,
divide_into_sub_files,
input_callback=None,
) -> tuple:
self.set_plugins(plugins)
......@@ -153,10 +160,10 @@ class BaseAcquisition:
output_file_basename.rstrip(possible_extension)
file_extension_ = possible_extension
def get_file_name_and_entry(index):
def get_file_name_and_entry(index, divide_sub_files):
entry = "entry" + str(index).zfill(4)
if self.configuration.single_file:
if self.configuration.single_file or not divide_sub_files:
en_output_file = self.configuration.output_file
else:
ext = file_extension_ or self.configuration.file_extension
......@@ -186,7 +193,9 @@ class BaseAcquisition:
result = []
for i_nx_tomo, nx_tomo in enumerate(nx_tomos):
output_file, data_path = get_file_name_and_entry((shift_entry + i_nx_tomo))
output_file, data_path = get_file_name_and_entry(
(shift_entry + i_nx_tomo), divide_sub_files=divide_into_sub_files
)
output_file = os.path.abspath(os.path.relpath(output_file, os.getcwd()))
output_file = os.path.realpath(output_file)
......@@ -277,6 +286,13 @@ class BaseAcquisition:
def root_url(self):
return self._root_url
def get_expected_nx_tomo(self):
"""
Return the expected number of nxtomo created for this acquisition.
This is required to get consistent entry and file name. At lest for automation
"""
raise NotImplementedError("Base class")
def read_entry(self):
return EntryReader(self._root_url)
......
......@@ -73,11 +73,13 @@ class PCOTomoAcquisition(StandardAcquisition):
root_url: Union[DataUrl, None],
configuration: TomoHDF5Config,
detector_sel_callback,
start_index,
):
super().__init__(
root_url=root_url,
configuration=configuration,
detector_sel_callback=detector_sel_callback,
start_index=start_index,
)
self._nb_loop = None
self._nb_tomo = None
......@@ -94,7 +96,7 @@ class PCOTomoAcquisition(StandardAcquisition):
super()._preprocess_registered_entry(entry_url=entry_url, type_=type_)
if type_ is AcquisitionStep.PROJECTION:
# nb loop parameter must be present only on projection entries
nb_loop = self._get_nb_loop(entry_url)
nb_loop = self.get_nb_loop(entry_url)
if (
nb_loop is not None
): # at this moment 02/2022 nb_loop is only defined on projection type
......@@ -105,7 +107,7 @@ class PCOTomoAcquisition(StandardAcquisition):
_logger.error(
f"Found entries with a different number of nb_loop: {entry_url.path()}"
)
nb_tomo = self._get_nb_tomo(entry_url)
nb_tomo = self.get_nb_tomo(entry_url)
if (
nb_tomo is not None
): # at this moment 02/2022 nb_loop is only defined on projection type
......@@ -117,18 +119,23 @@ class PCOTomoAcquisition(StandardAcquisition):
f"Found entries with a different number of _nb_tomo: {entry_url.path()}"
)
def _get_nb_loop(self, url) -> Optional[int]:
def get_nb_loop(self, url) -> Optional[int]:
with EntryReader(url) as entry:
if self._NB_LOOP_PATH in entry:
return entry[self._NB_LOOP_PATH][()]
return None
def _get_nb_tomo(self, url) -> Optional[int]:
def get_nb_tomo(self, url) -> Optional[int]:
with EntryReader(url) as entry:
if self._NB_LOOP_PATH in entry:
return entry[self._NB_TOMO_PATH][()]
return None
def get_expected_nx_tomo(self):
# the number of expected NXtomo is saved with projection
# and not with the init title. This is why it but be computed later
return 0
@docstring(StandardAcquisition)
def to_NXtomos(self, request_input, input_callback, check_tomo_n: bool) -> tuple:
nx_tomos = super().to_NXtomos(request_input, input_callback, check_tomo_n=False)
......
......@@ -81,11 +81,13 @@ class StandardAcquisition(BaseAcquisition):
root_url: Union[DataUrl, None],
configuration: TomoHDF5Config,
detector_sel_callback,
start_index,
):
super().__init__(
root_url=root_url,
configuration=configuration,
detector_sel_callback=detector_sel_callback,
start_index=start_index,
)
self._nx_tomos = [NXtomo("/")]
self._image_key_control = None
......@@ -107,6 +109,9 @@ class StandardAcquisition(BaseAcquisition):
"register dataset copied. Key if the original location as" "DataUrl.path. Value is the DataUrl it has been moved to"
# self._current_scan_n_frame = None
def get_expected_nx_tomo(self):
return 1
@property
def image_key_control(self):
return self._image_key_control
......
......@@ -49,7 +49,10 @@ def test_BaseAquisition():
url = DataUrl(file_path=file_path, data_path="/data/toto", scheme="silx")
std_acq = BaseAcquisition(
root_url=url, configuration=TomoHDF5Config(), detector_sel_callback=None
root_url=url,
configuration=TomoHDF5Config(),
detector_sel_callback=None,
start_index=0,
)
with std_acq.read_entry() as entry:
assert "dataset" in entry
......
......@@ -76,6 +76,7 @@ class XRDCTAcquisition(StandardAcquisition):
root_url: DataUrl,
configuration: TomoHDF5Config,
detector_sel_callback,
start_index,
copy_frames: bool = False,
):
"""
......@@ -95,6 +96,7 @@ class XRDCTAcquisition(StandardAcquisition):
root_url=root_url,
configuration=configuration,
detector_sel_callback=detector_sel_callback,
start_index=start_index,
)
# for XRD-CT data is contained in the 'acquisition' sequence
# and we only have projections
......
......@@ -111,6 +111,9 @@ class ZSeriesBaseAcquisition(BaseAcquisition):
self._acquisitions = {}
"""key is z value and value is _StandardAcquisition"""
def get_expected_nx_tomo(self):
return 1
def get_standard_sub_acquisitions(self) -> tuple:
"""
Return the tuple of all :class:`.StandardAcquisition` composing
......@@ -150,6 +153,7 @@ class ZSeriesBaseAcquisition(BaseAcquisition):
root_url=url,
configuration=self.configuration,
detector_sel_callback=self._detector_sel_callback,
start_index=self.start_index + len(self._acquisitions),
)
self._acquisitions[z].register_step(
url=url, entry_type=entry_type, copy_frames=copy_frames
......
......@@ -35,6 +35,7 @@ __date__ = "27/11/2020"
from nxtomomill.converter.hdf5.acquisition.pcotomoacquisition import PCOTomoAcquisition
from nxtomomill.utils.hdf5 import EntryReader
from .acquisition.utils import get_entry_type
from nxtomomill.io.acquisitionstep import AcquisitionStep
from .acquisition.standardacquisition import StandardAcquisition
......@@ -267,9 +268,16 @@ class _H5ToNxConverter(BaseConverter):
# step 2: treat FrameGroups
root_acquisition = None
start_index = 0
require_pcotomo_expected_nx_tomo = False
for frame_grp in data_frame_grps:
if frame_grp.frame_type is AcquisitionStep.INITIALIZATION:
current_format = self.configuration.format
if require_pcotomo_expected_nx_tomo is True:
_logger.warning(
f"Fail to retrieve expected number of nxtomo for {root_acquisition}"
)
require_pcotomo_expected_nx_tomo = False
if current_format is Format.STANDARD:
from nxtomomill.io.framegroup import filter_acqui_frame_type
......@@ -289,32 +297,44 @@ class _H5ToNxConverter(BaseConverter):
root_url=frame_grp.url,
configuration=self.configuration,
detector_sel_callback=self.detector_sel_callback,
start_index=start_index,
)
elif is_pcotomo_frm_titles(acqui_projs_urls, self.configuration):
root_acquisition = PCOTomoAcquisition(
root_url=frame_grp.url,
configuration=self.configuration,
detector_sel_callback=self.detector_sel_callback,
start_index=start_index,
)
start_index += 0
# this will be defined with the projections
self._require_pcotomo_expected_nx_tomo = True
else:
root_acquisition = StandardAcquisition(
root_url=frame_grp.url,
configuration=self.configuration,
detector_sel_callback=self.detector_sel_callback,
start_index=start_index,
)
start_index += root_acquisition.get_expected_nx_tomo()
elif current_format is Format.XRD_CT:
root_acquisition = XRDCTAcquisition(
root_url=frame_grp.url,
configuration=self.configuration,
detector_sel_callback=self.detector_sel_callback,
copy_frames=frame_grp.copy,
start_index=start_index,
)
start_index += root_acquisition.get_expected_nx_tomo()
elif current_format is Format.XRD_3D:
root_acquisition = XRD3DAcquisition(
root_url=frame_grp.url,
configuration=self.configuration,
detector_sel_callback=self.detector_sel_callback,
start_index=start_index,
)
start_index += root_acquisition.get_expected_nx_tomo()
else:
raise ValueError("Format {} is not handled".format(current_format))
self.acquisitions.append(root_acquisition)
......@@ -328,6 +348,22 @@ class _H5ToNxConverter(BaseConverter):
copy_frames=frame_grp.copy,
)
# in case of z we append an index according to if
# is already registered or not
if isinstance(root_acquisition, ZSeriesBaseAcquisition):
with EntryReader(frame_grp.url) as entry:
z = root_acquisition.get_z(entry)
if z not in self._acquisitions:
start_index += 1
if require_pcotomo_expected_nx_tomo:
if frame_grp.frame_type is AcquisitionStep.PROJECTION:
nb_loop = root_acquisition.get_nb_loop(frame_grp.url)
nb_tomo = root_acquisition.get_nb_tomo(frame_grp.url)
if nb_loop is not None and nb_tomo is not None:
start_index += int(nb_loop) * int(nb_tomo)
require_pcotomo_expected_nx_tomo = False
def build_acquisition_classes_frm_titles(self):
"""
Build Acquisition classes that will be used for conversion.
......@@ -369,6 +405,8 @@ class _H5ToNxConverter(BaseConverter):
# list of acquisitions. Once process each of those acquisition will
# create one 'scan'
current_acquisition = None
start_index = 0
require_pcotomo_expected_nx_tomo = False
for group_name in groups:
_logger.debug("parse {}".format(group_name))
entry = h5d[group_name]
......@@ -393,6 +431,11 @@ class _H5ToNxConverter(BaseConverter):
self.configuration.format = "standard"
entry_type = get_entry_type(url=url, configuration=self.configuration)
if entry_type is AcquisitionStep.INITIALIZATION:
if require_pcotomo_expected_nx_tomo is True:
_logger.warning(
f"Fail to retrieve expected number of nxtomo for {current_acquisition}"
)
# Handle XRD-CT dataset
if self.configuration.is_xrdc_ct:
......@@ -415,8 +458,11 @@ class _H5ToNxConverter(BaseConverter):
root_url=url,
configuration=self.configuration,
detector_sel_callback=self.detector_sel_callback,
start_index=start_index,
)
start_index += current_acquisition.get_expected_nx_tomo()
acquisitions.append(current_acquisition)
elif self._ignore_entry_frm_titles(group_name):
current_acquisition = None
elif not self._ignore_sub_entry(url):
......@@ -445,7 +491,9 @@ class _H5ToNxConverter(BaseConverter):
root_url=url,
configuration=self.configuration,
detector_sel_callback=self.detector_sel_callback,
start_index=start_index,
)
start_index += current_acquisition.get_expected_nx_tomo()
acquisitions.append(current_acquisition)
current_acquisition.register_step(
url=url,
......@@ -464,33 +512,46 @@ class _H5ToNxConverter(BaseConverter):
_logger.warning("ignore entry {}".format(entry))
# Handle "standard" tomo dataset
elif entry_type is AcquisitionStep.INITIALIZATION:
try:
if is_z_series_frm_titles(
entry=entry, configuration=self.configuration
):
current_acquisition = ZSeriesBaseAcquisition(
root_url=url,
configuration=self.configuration,
detector_sel_callback=self.detector_sel_callback,
start_index=start_index,
)
start_index += current_acquisition.get_expected_nx_tomo()
elif is_pcotomo_frm_titles(
entry=entry, configuration=self.configuration
):
current_acquisition = PCOTomoAcquisition(
root_url=url,
configuration=self.configuration,
detector_sel_callback=self.detector_sel_callback,
start_index=start_index,
)
start_index += 0
# this will be defined with the projections
self._require_pcotomo_expected_nx_tomo = True
else:
current_acquisition = StandardAcquisition(
root_url=url,
configuration=self.configuration,
detector_sel_callback=self.detector_sel_callback,
start_index=start_index,
)
start_index += current_acquisition.get_expected_nx_tomo()
except Exception as e:
if self._ignore_entry_frm_titles(group_name):
continue
else:
raise e
if self._ignore_entry_frm_titles(group_name):
current_acquisition = None
continue
if is_z_series_frm_titles(
entry=entry, configuration=self.configuration
):
current_acquisition = ZSeriesBaseAcquisition(
root_url=url,
configuration=self.configuration,
detector_sel_callback=self.detector_sel_callback,
)
elif is_pcotomo_frm_titles(
entry=entry, configuration=self.configuration
):
current_acquisition = PCOTomoAcquisition(
root_url=url,
configuration=self.configuration,
detector_sel_callback=self.detector_sel_callback,
)
else:
current_acquisition = StandardAcquisition(
root_url=url,
configuration=self.configuration,
detector_sel_callback=self.detector_sel_callback,
)
acquisitions.append(current_acquisition)
# continue "standard" tomo dataset handling
elif current_acquisition is not None and not self._ignore_sub_entry(
......@@ -501,6 +562,22 @@ class _H5ToNxConverter(BaseConverter):
entry_type=entry_type,
copy_frames=self.configuration.default_copy_behavior,
)
# in case of z we append an index according to if
# is already registered or not
if isinstance(current_acquisition, ZSeriesBaseAcquisition):
with EntryReader(url) as entry:
z = current_acquisition.get_z(entry)
if z not in self._acquisitions:
start_index += start_index
if require_pcotomo_expected_nx_tomo:
if entry_type is AcquisitionStep.PROJECTION:
nb_loop = current_acquisition.get_nb_loop(url)
nb_tomo = current_acquisition.get_nb_tomo(url)
if nb_loop is not None and nb_tomo is not None:
start_index += int(nb_loop) * int(nb_tomo)
require_pcotomo_expected_nx_tomo = False
else:
_logger.info("ignore entry {}".format(entry))
if self.progress is not None:
......@@ -555,25 +632,29 @@ class _H5ToNxConverter(BaseConverter):
"init (zserie) titles have been found. You can "
"provide more."
)
# step 2: check validity of all the acquisition sequence (consistency)
# or write output
if self.progress is not None:
self.progress.set_name("write sequences")
self.progress.set_max_advancement(len(self.acquisitions))
shift_entry = 0
# write nx_tomo per acquisition
divide_into_sub_files = self.configuration.bam_single_file or not (
self.configuration.single_file is False and len(self.acquisitions) == 1
)
for acquisition in self.acquisitions:
if self._ignore_sub_entry(acquisition.root_url):
continue
try:
new_entries = acquisition.write_as_nxtomo(
shift_entry=shift_entry,
shift_entry=acquisition.start_index,
input_file_path=self.configuration.input_file,
request_input=self.configuration.request_input,
input_callback=self.input_callback,
plugins=self.plugins,
divide_into_sub_files=divide_into_sub_files,
)
shift_entry += len(new_entries)
except Exception as e:
if self.configuration.raises_error:
raise e
......@@ -588,7 +669,7 @@ class _H5ToNxConverter(BaseConverter):
self.progress.increase_advancement()
# if we created one file per entry then create a master file with link to those entries
if self.configuration.single_file is False:
if self.configuration.single_file is False and divide_into_sub_files:
_logger.info("create link in %s" % self.configuration.output_file)
for (en_output_file, entry) in res:
with HDF5File(self.configuration.output_file, "a") as master_file:
......
......@@ -581,6 +581,7 @@ class TestStandardAcqConversionWithExternalUrls(unittest.TestCase):
proj_url_4 = DataUrl(file_path=file_2, data_path="/2.1", scheme="silx")
self.config.default_copy_behavior = True
self.config.bam_single_file = True
self.config.data_frame_grps = (
FrameGroup(frame_type="dark", url=dark_url_1),
FrameGroup(frame_type="flat", url=flat_url_1),
......
......@@ -322,6 +322,9 @@ class TomoHDF5Config:
self._no_input = False
self._format = Format.STANDARD
self._single_file = False
self._bam_single_file = False
# a single file is create by default if there is only one entry per file.
# but we can enfore multi-file writing
self._field_of_view = None
# information regarding keys and paths
......@@ -476,6 +479,17 @@ class TomoHDF5Config:
else:
self._single_file = single_file
@property
def bam_single_file(self):
return self._bam_single_file
@bam_single_file.setter
def bam_single_file(self, bam: bool):
if not isinstance(bam, bool):
raise TypeError("'bam' should be a boolean")
else:
self._bam_single_file = bam
@property
def field_of_view(self) -> Union[None, FieldOfView]:
return self._field_of_view
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment