Commit 200d9e6b authored by Wout De Nolf's avatar Wout De Nolf
Browse files

[writer] save groups and node/scan metadata

parent b763a482
Pipeline #19745 passed with stages
in 35 minutes and 40 seconds
......@@ -19,13 +19,13 @@ device:
session: test_session
```
Alternatively you can register the device manually or by using this helper script which ensures there there is only writer listening per BLISS session
The device class should always be __NexusWriter__ and the __session__ property should be the BLISS session name. If you want to register the device manually with the TANGO database, you can use a helper function to avoid mistakes (correct class name and session property, only one TANGO device per session)
```bash
$ python -m nexus_writer_service.nexus_register_writer test_session --domain id00 --instance nexuswriters
```
In this example we registered a writer for BLISS session __test_session__ which runs under domain __id00__ in TANGO server instance __nexuswriters__. The device family is __bliss_nxwriter__ by default and the device name is equal to the session name. Running multiple session writers in on TANGO server instance (i.e. one process) is allowed but not recommended if the associated BLISS sessions may produce lots of data simultaneously.
In this example we registered a writer for BLISS session __test_session__ which runs under domain __id00__ in TANGO server instance __nexuswriters__. By default the device family is __bliss_nxwriter__ and the device name is equal to the session name. Running multiple session writers in on TANGO server instance (i.e. one process) is allowed but not recommended if the associated BLISS sessions may produce lots of data simultaneously.
### Start the Tango server
......
......@@ -68,7 +68,7 @@ def close_files(*fds):
pass
else:
raise
except Exception as e:
except BaseException as e:
exceptions.append(e)
if exceptions:
raise Exception(exceptions)
......@@ -92,6 +92,39 @@ def asNxChar(s, raiseExtended=True):
return numpy.array(s, dtype=nxcharUnicode)
def isString(data):
"""
String from the Nexus point of view
"""
return isinstance(data, (unicode, bytes))
def asNxType(data):
"""
Convert data to Nexus data type
"""
if isString(data):
return asNxChar(data)
elif isinstance(data, (list, tuple)) and data:
# TODO: does not handle numpy string arrays
if all(map(isString, data)):
return asNxChar(data)
return data
def createNxValidate(createkws):
if "data" in createkws:
createkws["data"] = asNxType(createkws["data"])
dtype = createkws.get("dtype")
if dtype is not None:
if dtype is unicode:
createkws["dtype"] = nxcharUnicode
createkws.pop("fillvalue")
elif dtype is bytes:
createkws["dtype"] = nxcharBytes
createkws.pop("fillvalue")
class LocalTZinfo(datetime.tzinfo):
_offset = datetime.timedelta(seconds=-time.altzone)
......@@ -122,7 +155,7 @@ def datetime_to_nexus(tm):
"""
if tm.tzinfo is None:
tm = LocalTZinfo().localize(tm)
return asNxChar(tm.isoformat())
return asNxType(tm.isoformat())
def timestamp():
......@@ -353,7 +386,7 @@ def uriContains(uri, datasets=None, attributes=None):
for attr in attributes:
if attr not in attrs:
return False
except Exception:
except BaseException:
return False
return True
......@@ -502,7 +535,7 @@ def nxClassInstantiate(parent, name, nxclass, raise_on_exists=False):
try:
group = parent.create_group(name)
exists = False
except Exception as e:
except BaseException as e:
exists = True
group = parent[name]
if raise_on_exists:
......@@ -548,12 +581,10 @@ def updateDataset(parent, name, data):
:param str name:
:param data:
"""
if isinstance(data, (unicode, bytes)):
data = asNxChar(data)
if name in parent:
parent[name][()] = data
parent[name][()] = asNxType(data)
else:
parent[name] = data
parent[name] = asNxType(data)
def nxClassInit(
......@@ -823,7 +854,7 @@ class File(h5py.File):
# Try setting writing in SWMR mode
try:
self.swmr_mode = True
except Exception:
except BaseException:
pass
except OSError as e:
# errno.EAGAIN: file is locked
......@@ -1187,7 +1218,7 @@ def createLink(h5group, name, destination):
:param str or h5py.Dataset destination:
:returns: h5py link object
"""
if not isinstance(destination, (unicode, bytes)):
if not isString(destination):
destination = getUri(destination)
if "::" in destination:
destination = splitUri(destination)
......@@ -1246,9 +1277,8 @@ def nxCreateDataSet(h5group, name, value, attrs, stringasuri=False):
# link when attributes match the existing dataset
if not merge:
value = data
elif isinstance(data, (unicode, bytes)):
# dataset of string type
value["data"] = asNxChar(data)
else:
value["data"] = asNxType(data)
if value is None:
# dataset exists already or will be created elsewhere
pass
......@@ -1278,17 +1308,12 @@ def nxCreateDataSet(h5group, name, value, attrs, stringasuri=False):
# value['external'] = [(os.path.relpath(tpl[0], dirname),) + tpl[1:]
# for tpl in external]
logger.debug("Create HDF5 dataset {}/{}".format(getUri(h5group), name))
createNxValidate(value)
h5group.create_dataset(name, **value)
else:
# create dataset (internal) without extra options
if isinstance(value, (unicode, bytes)):
value = asNxChar(value)
elif isinstance(value, (list, tuple)):
if value:
if isinstance(value[0], (unicode, bytes)):
value = asNxChar(value)
logger.debug("Create HDF5 dataset {}/{}".format(getUri(h5group), name))
h5group[name] = value
h5group[name] = asNxType(value)
dset = h5group.get(name, None)
if attrs and dset is not None:
attrs = {k: v for k, v in attrs.items() if v is not None}
......@@ -1528,3 +1553,189 @@ def getDefaultUri(filename, signal=True):
return filename + "::" + path
else:
return None
def _delete_attributes(destination):
for k in list(destination.attrs.keys()):
del destination.attrs[k]
def _delete_children(destination):
for k in list(destination.keys()):
del destination[k]
def _update_attributes(destination, attrs):
for k, v in attrs.items():
destination.attrs[k] = v
def _dicttonx_create_attr(destination, name, value, update=False):
"""
Create a group or dataset attribute
:param h5py.Group or h5py.Dataset destination:
:param str name:
:param str value:
:param bool update: update value when exists
"""
if value is None:
return
if name.startswith("@"):
name = name[1:]
if name in destination.attrs and not update:
return
if not isString(value):
raise ValueError(
"Attribute {} of {} must be a string".format(
repr(name), repr(destination.name)
)
)
destination.attrs[name] = asNxChar(value)
def _dicttonx_create_dataset(destination, name, value, overwrite=False, update=False):
"""
Create a dataset attribute (optional: update value when exists)
:param h5py.Group destination:
:param str name:
:param str value:
:param bool overwrite: existing attributes are deleted
:param bool update: existing attributes are not deleted
"""
if name in destination:
if overwrite:
value = asNxType(value)
try:
# Preserve dataset when possible
destination[name][()] = value
except BaseException:
del destination[name]
destination[name] = value
else:
_delete_attributes(destination[name])
elif update:
value = asNxType(value)
try:
# Preserve dataset when possible
destination[name][()] = value
except BaseException:
attrs = dict(destination[name].attrs)
del destination[name]
destination[name] = value
_update_attributes(destination[name], attrs)
else:
destination[name] = asNxType(value)
return destination[name]
def _dicttonx_create_group(destination, name, overwrite=False):
"""
:param h5py.Group destination:
:param str name:
:param str value: ignored when `not overwrite`
:param bool overwrite: existing datasets and attributes are deleted
"""
if name in destination:
if overwrite:
if not isinstance(destination[name], h5py.Group):
del destination[name]
destination.create_group(name)
else:
_delete_attributes(destination[name])
_delete_children(destination[name])
elif not isinstance(destination[name], h5py.Group):
raise ValueError(
"{} already exists and is not a group".format(
repr(destination[name].name)
)
)
else:
destination.create_group(name)
return destination[name]
def dicttonx(treedict, destination, overwrite=False, update=False):
"""
Write a nested dictionary to as Nexus structure in HDF5.
Attributes are key-value pairs where the key starts with "@"
(attribute values need to be strings). A dictionary with only
attribute keys (starting with "@") and "@data" is treated as
a dataset.
:param dict treedict:
:param h5py.Group or h5py.Dataset destination:
:param bool overwrite: existing datasets/attributes may
disappear or be modified
:param bool update: existing datasets/attributes do not
disappear or be may be modified
"""
if isinstance(destination, h5py.Dataset):
# treedict: dataset attributes
if "NX_class" in treedict or "@NX_class" in treedict:
raise ValueError(
"{}: '@NX_class' attribute to allowed for datasets".format(
repr(destination.name)
)
)
for key, value in treedict.items():
_dicttonx_create_attr(destination, key, value, update=update)
return
treedict = treedict.copy()
if "NX_class" in treedict:
treedict.setdefault("@NX_class", treedict.pop("NX_class"))
if "NX_class" not in destination.attrs:
treedict.setdefault("@NX_class", "NXcollection")
for key, value in treedict.items():
if isinstance(value, dict):
nattrs = sum(k.startswith("@") for k in value.keys())
if nattrs == len(value) and "@data" in value:
value = value.copy()
rdestination = _dicttonx_create_dataset(
destination,
key,
value.pop("@data"),
overwrite=overwrite,
update=update,
)
dicttonx(value, rdestination, overwrite=overwrite, update=update)
else:
rdestination = _dicttonx_create_group(
destination, key, overwrite=overwrite
)
dicttonx(value, rdestination, overwrite=overwrite, update=update)
elif value is None:
pass
elif key.startswith("@"):
_dicttonx_create_attr(destination, key, value, update=update)
else:
_dicttonx_create_dataset(
destination, key, value, overwrite=overwrite, update=update
)
def nxtodict(node):
"""
Read a Nexus structure as a dictionary.
:param h5py.Group or h5py.Dataset node:
:returns dict:
"""
if isinstance(node, h5py.Dataset):
result = {"@" + k: v for k, v in node.attrs.items()}
result["@data"] = node[()]
return result
result = {}
for key, value in node.items():
if isinstance(value, h5py.Group):
result[key] = nxtodict(value)
else:
if value.attrs:
d = result[key] = {"@" + k: v for k, v in value.attrs.items()}
d["@data"] = value[()]
else:
result[key] = value[()]
for key, value in node.attrs.items():
result["@" + key] = value
return result
......@@ -28,14 +28,36 @@ def register_all_metadata_generators(force=False):
:param bool force: re-initialize when already done
"""
kwargs = {k: True for k in GENERATORS}
register_metadata_generators(force=force, **kwargs)
register_metadata_categories(force=force, **kwargs)
register_metadata_generators(**kwargs)
def register_metadata_generators(force=False, **kwargs):
def register_metadata_generators(**kwargs):
"""
Register metadata generators in a bliss session for
the scan writers (currently only one).
:param **kwargs: any key of `GENERATORS`
"""
# Generators are called at the start of the scan:
# bliss.scanning.scan.Scan.__init__
# and at the end of the scan
# run bliss.scanning.scan.Scan.run (cleanup section)
#
# The generator 'instrument.positioners' is an exception.
# It is only called at the beginning of the scan by
# removing it before calling the generators a second time.
generators = scan_meta.get_user_scan_meta()
for k, mod in GENERATORS.items():
if kwargs.get(k, False):
mod.register_metadata_generators(generators)
def register_metadata_categories(force=False, **kwargs):
"""
Register metadata categories in a bliss session for
the scan writers (currently only one).
:param bool force: re-initialize when already done
:param **kwargs: any key of `GENERATORS`
"""
......@@ -54,16 +76,4 @@ def register_metadata_generators(force=False, **kwargs):
scan_meta.CATEGORIES = enum.Enum(
scan_meta.CATEGORIES.__name__, list(categories)
)
generators = scan_meta.scan_meta()
scan_meta.USER_SCAN_META = generators
# Generators are called at the start of the scan:
# bliss.scanning.scan.Scan.__init__
# and at the end of the scan
# run bliss.scanning.scan.Scan.run (cleanup section)
#
# The generator 'instrument.positioners' is an exception.
# It is only called at the beginning of the scan by
# removing it before calling the generators a second time.
for k, mod in GENERATORS.items():
if kwargs.get(k, False):
mod.register_metadata_generators(generators)
scan_meta.USER_SCAN_META = scan_meta.scan_meta()
......@@ -106,7 +106,7 @@ def ensure_existence(
db = Database()
if not member:
member = session_name
if not domain():
if not domain:
domain = beamline()
dev_name = "/".join([domain, family, member])
if use_existing:
......
# -*- coding: utf-8 -*-
#
# This file is part of the nexus writer service of the BLISS project.
#
# Code is maintained by the ESRF Data Analysis Unit.
#
# Original author: Wout de Nolf
#
# Copyright (c) 2015-2019 ESRF
# Distributed under the GNU LGPLv3. See LICENSE for more info.
import os
import abc
import logging
import numpy
from contextlib import contextmanager
from ..utils.logging_utils import CustomLogger
from ..io import nexus
logger = logging.getLogger(__name__)
class BaseProxy(abc.ABC):
"""
Wraps HDF5 creation and growth.
"""
def __init__(self, filename=None, parent=None, filecontext=None, parentlogger=None):
"""
:param str filename: HDF5 file name
:param str filecontext: HDF5 open context manager
:param str parent: path in the HDF5 file
:param parentlogger:
"""
if filecontext is None:
filecontext = self._filecontext
self.filename = filename
self.filecontext = filecontext
self.parent = parent
if parentlogger is None:
parentlogger = logger
self.logger = CustomLogger(parentlogger, self)
self.npoints = 0
def __repr__(self):
if self.name:
return self.path
else:
return os.path.splitext(os.path.basename(self.filename))[0]
@property
def path(self):
if self.name:
return "/".join([self.parent, self.name])
else:
return self.parent
@property
def uri(self):
return self.filename + "::" + self.path
@abc.abstractproperty
def name(self):
pass
@contextmanager
def _filecontext(self):
with nexus.nxRoot(self.filename, mode="a") as nxroot:
yield nxroot
def ensure_existance(self):
with self.filecontext() as nxroot:
if self.exists:
return
self._create(nxroot)
@abc.abstractmethod
def _create(self, nxroot):
pass
@property
def exists(self):
"""
:returns bool:
"""
with self.filecontext() as nxroot:
return self.path in nxroot
@contextmanager
def open(self, ensure_existance=False):
"""
:param bool ensure_existance:
:yields h5py.Dataset or None:
"""
with self.filecontext() as nxroot:
if ensure_existance:
self.ensure_existance()
if self.path in nxroot:
yield nxroot[self.path]
else:
self.logger.warning(repr(self.uri) + " does not exist")
yield None
def add(self, newdata):
"""
Add data
:param sequence newdata:
"""
with self.open(ensure_existance=True) as destination:
try:
self.npoints += self._insert_data(destination, newdata)
except TypeError as e:
self.logger.error(e)
raise
@abc.abstractmethod
def _insert_data(self, destination, newdata):
"""
Insert new data in dataset
:param h5py.Dataset or h5py.Group dset:
:param sequence newdata:
:returns int: number of added points
"""
pass
@property
def npoints_expected(self):
return 0
@property
def complete(self):
"""
Variable length scans are marked complete when we have some data
"""
n, nall = self.npoints, self.npoints_expected
return n and n >= nall
@property
def progress(self):
if self.npoints_expected:
return self.npoints / self.npoints_expected
else:
if self.npoints:
return numpy.nan
else:
return 0
@property
def progress_string(self):
if self.npoints_expected:
sortkey = self.npoints / self.npoints_expected
s = "{:.0f}%".format(sortkey * 100)
else:
sortkey = self.npoints
s = "{:d}pts".format(sortkey)
return s, sortkey
@property
def _progress_log_suffix(self):
return ""
def log_progress(self, expect_complete=False):
"""
:param bool expect_complete:
:returns int, bool, str:
"""
npoints_expected = self.npoints_expected
npoints_current = self.npoints
complete = self.complete
if expect_complete:
if complete:
msg = "{}/{} points published{}".format(
npoints_current, npoints_expected, self._progress_log_suffix
)
self.logger.debug(msg)
else:
msg = "only {}/{} points published{}".format(
npoints_current, npoints_expected, self._progress_log_suffix
)
self.logger.warning(msg)
else:
msg = "progress {}/{}{}".format(
npoints_current, npoints_expected, self._progress_log_suffix
)
self.logger.debug(msg)
return complete