Skip to content
Snippets Groups Projects
Commit 4a27a736 authored by casagran's avatar casagran
Browse files

implement zwise median widget

parent da81e553
No related branches found
No related tags found
1 merge request!6Draft: Resolve "py-bkg-rm"
"""
Open 3D data from h5 files and return numpy array.
Open 3D _data from h5 files and return numpy array.
"""
import logging
......@@ -11,8 +11,7 @@ from Orange.widgets.settings import Setting
from Orange.widgets.widget import Output, OWWidget
from pydct import common
from pydct.common import Bad3DDataError, BadDataUrlError
from PyQt5.QtWidgets import QSizePolicy
from silx.gui.qt import QFileDialog, QPushButton, QThread
from silx.gui.qt import QFileDialog, QThread
from silx.io import get_data
from silx.io.url import DataUrl
......@@ -20,7 +19,7 @@ _logger = logging.getLogger("orangecontrib.pydct")
class LoadDataThread(QThread):
"""load the data using silx's `get_data`"""
"""load the _data using silx's `get_data`"""
def __init__(self, parent, url):
super().__init__(parent=parent)
......@@ -28,7 +27,7 @@ class LoadDataThread(QThread):
self.data = None
def run(self):
"""load the data using silx's `get_data`"""
"""load the _data using silx's `get_data`"""
try:
data = common.catch_silx_io_exceptions(get_data)(self.url)
......@@ -39,16 +38,16 @@ class LoadDataThread(QThread):
except Bad3DDataError as ex:
_logger.exception(ex)
# _logger.warning("data is not a 3D dataset?")
# _logger.warning("_data is not a 3D dataset?")
else:
self.data = data
class LoadDataFromDataUrl(OWWidget):
"""Build a silx.io.url.DataUrl and load the 3D data pointed by it."""
"""Build a silx.io.url.DataUrl and load the 3D _data pointed by it."""
name = "Load data from URL"
name = "Load _data from URL"
description = "Load a 3D dataset from an h5 url (scheme + file path + group link) as in silx.io.url"
icon = "icons/url.svg"
......@@ -85,8 +84,8 @@ class LoadDataFromDataUrl(OWWidget):
callback=self._select_filename,
)
# ========================= data link =========================
data_link_box = gui.widgetBox(self.controlArea, "data link")
# ========================= _data link =========================
data_link_box = gui.widgetBox(self.controlArea, "_data link")
self._data_link_line = gui.lineEdit(
data_link_box,
......@@ -142,13 +141,13 @@ class LoadDataFromDataUrl(OWWidget):
def _load_onclick(self):
if self._loading:
_logger.warning("already loading data")
_logger.warning("already loading _data")
return
self._url_label.setText(self.url_display)
# if self.url is None:
# self.error("Pick an h5 file and filling the data path!")
# self.error("Pick an h5 file and filling the _data path!")
# return
dataurl = DataUrl(self.url)
......@@ -158,10 +157,10 @@ class LoadDataFromDataUrl(OWWidget):
except BadDataUrlError as ex:
_logger.exception(ex)
self.error("Invalid data url, please verify the inputs!")
self.error("Invalid _data url, please verify the inputs!")
return
_logger.info("loading data from a data url")
_logger.info("loading _data from a _data url")
self._data_link_line.setDisabled(True)
self._select_file_button.setDisabled(True)
......@@ -180,10 +179,10 @@ class LoadDataFromDataUrl(OWWidget):
self._data_link_line.setDisabled(False)
self._select_file_button.setDisabled(False)
self.information()
_logger.info("done loading data")
_logger.info("done loading _data")
if self._thread.data is not None:
self.Outputs.data.send(self._thread.data)
else:
self.error("data not loaded, please verify the inputs (see logs for more detail)")
self.error("_data not loaded, please verify the inputs (see logs for more detail)")
"""Take the median on the z-axis (position 0)."""
import logging
import numpy as np
from numpy import ndarray
from Orange.widgets import gui, settings, widget
from Orange.widgets.utils.signals import Input, Output
from Orange.widgets.widget import OWWidget
from silx.gui.qt import QFileDialog, QThread
_logger = logging.getLogger("orangecontrib.pydct")
class ZwiseMedianThread(QThread):
"""todo"""
def __init__(self, parent, data):
super().__init__(parent=parent)
self.data = data # todo make dataclass
self.data_squeezed = None
def run(self):
""" """
self.data_squeezed = np.squeeze(np.median(self.data, axis=0))
class ZwiseMedian(OWWidget):
"""todo docstring of ZwiseMedian"""
name = "Z-wise median"
description = (
"Squeeze a volume on the z-axis to get a single frame where each pixel is the median of its position on the input."
)
icon = "icons/one_round.svg"
want_main_area = False
resizing_enabled = False
class Inputs:
data = Input("_data", ndarray)
class Outputs:
data_squeezed = Output("data_squeezed", ndarray)
def __init__(self):
super().__init__()
self._computing = False
@Inputs.data
def set_data(self, dataset):
"""todo add validations here"""
# todo add a button to force update?
if self._computing:
_logger.warning("already updating")
self._thread = ZwiseMedianThread(parent=self, data=dataset)
self._thread.finished.connect(self._send_signal)
self._computing = True
self.information("computing...")
self._thread.start()
def _send_signal(self):
self._thread.finished.disconnect(self._send_signal)
self._computing = False
self.information()
_logger.info("done computing zwise median")
if self._thread.data_squeezed is not None:
self.Outputs.data_squeezed.send(self._thread.data_squeezed)
else:
self.error("something went wrong and the thread did not return the squeezed data")
......@@ -27,9 +27,9 @@ update_mode: same values from silx.io.dictdump.dicttoh5 (files are saved in file
defaults to 'add'
Behavior:
- 'modify' will overwrite existing data, but not the whole file nor the entire group, only the affected stuff
- 'modify' will overwrite existing _data, but not the whole file nor the entire group, only the affected stuff
- 'add' will NOT overwrite, only create new stuff without modifying other links in the group;
- 'replace' will replace the whole data tree (group pointed by the link) in the h5, but not the entire file
- 'replace' will replace the whole _data tree (group pointed by the link) in the h5, but not the entire file
Reference: http://www.silx.org/doc/silx/latest/modules/io/dictdump.html#silx.io.dictdump.dicttoh5
"""
......@@ -119,7 +119,7 @@ def catch_silx_io_exceptions(get_data_func, logger_=None):
@functools.wraps(get_data_func)
def wrapper(data_url: DataUrl):
logger_.debug("loading data")
logger_.debug("loading _data")
try:
data = get_data_func(data_url)
......@@ -147,12 +147,12 @@ def catch_silx_io_exceptions(get_data_func, logger_=None):
logger_.exception(ex)
msg = ex.args[0]
# specify the cause when the data path is missing
# specify the cause when the _data path is missing
if msg == "Argument 'path' must not be None":
raise ValueError(f"{data_url.path()=} missing data path (h5 internal link after '::')") from ex
raise ValueError(f"{data_url.path()=} missing _data path (h5 internal link after '::')") from ex
if msg == "expected bytes, NoneType found":
raise ValueError(f"{data_url.path()=} missing data path (h5 internal link after '::')") from ex
raise ValueError(f"{data_url.path()=} missing _data path (h5 internal link after '::')") from ex
raise
......@@ -165,7 +165,7 @@ def catch_silx_io_exceptions(get_data_func, logger_=None):
if "Data path from URL" in msg and "not found" in msg:
raise BadDataUrlError(f"{data_url.data_path()=} not in {data_url.file_path()=}") from ex
logger_.debug(f"data successfully loaded with `{get_data_func.__name__}`")
logger_.debug(f"_data successfully loaded with `{get_data_func.__name__}`")
return data
......@@ -191,9 +191,9 @@ class namespace2kwargs:
def _data2shared(data: ndarray, multiprocessing_rawarray: Optional[RawArray]) -> Tuple[ndarray, RawArray]:
"""Check if the data is already in a shared memor space or return such if not.
"""Check if the _data is already in a shared memor space or return such if not.
This is to avoid recopying the data to a cross-process shared space, a very long operation.
This is to avoid recopying the _data to a cross-process shared space, a very long operation.
"""
if data.ndim != 3:
......
......@@ -13,9 +13,9 @@ from silx.io.url import DataUrl
@pytest.fixture(scope="session")
def realdata00_h5():
"""a volume from marana with 100 (2048, 2048)-frames"""
dir_ = Path("/data/id11/3dxrd/blc12852/id11/bmg_l1/bmg_l1_bmg_dct2/scan0002")
dir_ = Path("/_data/id11/3dxrd/blc12852/id11/bmg_l1/bmg_l1_bmg_dct2/scan0002")
path = dir_ / "marana_0000.h5"
datalink = "/entry_0000/ESRF-ID11/marana/data"
datalink = "/entry_0000/ESRF-ID11/marana/_data"
urlstr = f"silx:{str(path)}::{datalink}"
return path, datalink, urlstr
......@@ -26,7 +26,7 @@ def realdata00_url(realdata00_h5):
path, datalink, urlstr = realdata00_h5
url = DataUrl(urlstr)
# make sure the data exists
# make sure the _data exists
assert path.exists()
assert path.is_file()
assert url.is_valid()
......@@ -39,11 +39,11 @@ def realdata00_url(realdata00_h5):
@pytest.fixture
def output_h5(tmp_path):
"""generic url to use as data output"""
"""generic url to use as _data output"""
path = tmp_path / "output.h5"
grouplink = "/computed_suff"
urlstr = f"silx:{str(path.absolute())}::{grouplink}"
return path, grouplink, urlstr, f"{urlstr}/data"
return path, grouplink, urlstr, f"{urlstr}/_data"
@pytest.fixture
......@@ -66,7 +66,7 @@ def output_url(output_h5):
@pytest.fixture(scope="session")
def darkend00_h5():
"""a volume from marana with 21 (2048, 2048)-frames of a darkend acquisition"""
dir_ = Path("/data/id11/3dxrd/blc12852/id11/bmg_l1/bmg_l1_bmg_dct2")
dir_ = Path("/_data/id11/3dxrd/blc12852/id11/bmg_l1/bmg_l1_bmg_dct2")
path = dir_ / "bmg_l1_bmg_dct2.h5"
datalink = "/10.1/measurement/marana"
urlstr = f"silx:{str(path)}::{datalink}"
......@@ -75,7 +75,7 @@ def darkend00_h5():
@pytest.fixture(scope="session") # session because the `get_data` is slow
def darkend00_url(darkend00_h5):
"""make sure the data exists (to be used as parameter to the calls)"""
"""make sure the _data exists (to be used as parameter to the calls)"""
path, datalink, urlstr = darkend00_h5
url = DataUrl(urlstr)
......
......@@ -88,7 +88,7 @@ def main(
Do the entire preprocessing:
- todo list steps
!!IMPORTANT!! The data volume's axes order is assumed to be (z, x, y).
!!IMPORTANT!! The _data volume's axes order is assumed to be (z, x, y).
Relevant functions called:
......@@ -107,24 +107,24 @@ def main(
https://stackoverflow.com/questions/53751050/python-multiprocessing-understanding-logic-behind-chunksize
IO: reads data with `silx.io.get_data` and writes with `silx.io.dictdump.dicttoh5`
IO: reads _data with `silx.io.get_data` and writes with `silx.io.dictdump.dicttoh5`
Args:
input_url: url (scheme + path + link) to READ the INPUT data
file and its internal link with the data
ex: silx:/data/id11/my.h5::/2.1/measurement/marana
input_url: url (scheme + path + link) to READ the INPUT _data
file and its internal link with the _data
ex: silx:/_data/id11/my.h5::/2.1/measurement/marana
obs1: 'silx:' indicates the h5 scheme
obs2: silx (the library) also supports 'fabio', but that is not supported here
dark_url: like `url`, but for the dark image
output_url: where the modified data is dumped to (an h5 group)
output_url: where the modified _data is dumped to (an h5 group)
url (scheme + path + link) to WRITE the OUTPUT (see `url`)
the outputs are dump from a dict into an h5 group (see internal class Outputs)
normalization: method used to compensate beam oscillations
normalization_numerator: an arbitrary value used as numerator when normalizing the data
normalization_numerator: an arbitrary value used as numerator when normalizing the _data
margin_bounding_box: margin region used to compute the mean when using normalization method ~margin mean~
upper left and bottom right coordinates given as (x, y)
......@@ -189,7 +189,7 @@ def main(
raise ValueError(f"{median_window=} must be positive and at most {nz=}")
if (darknx, darkny) != (nx, ny):
raise ValueError(f"incompatible dark/data shapes on XY {dark_shape=} {data_shape=}")
raise ValueError(f"incompatible dark/_data shapes on XY {dark_shape=} {data_shape=}")
# ul = upper left, br = bottom right
((ulx, uly), (brx, bry)) = margin_bounding_box
......@@ -225,7 +225,7 @@ def main(
logger.debug("the args look good to go")
# ============================ data ============================
# ============================ _data ============================
logger.info("loading dark")
dark = common.catch_silx_io_exceptions(get_data, logger)(dark_url)
......@@ -238,17 +238,17 @@ def main(
logger.debug("the dark image looks fine")
logger.info("loading data")
logger.info("loading _data")
data = common.catch_silx_io_exceptions(get_data, logger)(input_url)
logger.debug("validating data")
logger.debug("validating _data")
data = common.validate_3d_data(data)
if data.dtype != np.float32:
logger.warning(f"converting {data.dtype=} to {np.float32.__name__}")
data = data.astype(np.float32)
logger.debug("the data looks fine")
logger.debug("the _data looks fine")
# ============================ processing ============================
......@@ -257,7 +257,7 @@ def main(
logger.debug("squeezing the dark volume on the z-axis")
dark = np.squeeze(np.mean(dark, axis=0)) # todo verify if this is median or mean
logger.debug("subtract dark from data")
logger.debug("subtract dark from _data")
data = data - dark
if normalization is None:
......@@ -365,7 +365,7 @@ preprocess_parser.add_argument(
"--in",
type=DataUrl,
metavar="silx:input.h5::/3d_data",
help="url (scheme + path + link) to the INPUT data",
help="url (scheme + path + link) to the INPUT _data",
required=True,
dest="url",
)
......@@ -374,7 +374,7 @@ preprocess_parser.add_argument(
"--dark",
type=DataUrl,
metavar="silx:darkend.h5::/3d_data",
help="url (scheme + path + link) to the DARK data (no beam)",
help="url (scheme + path + link) to the DARK _data (no beam)",
required=True,
dest="dark_url",
)
......@@ -496,10 +496,12 @@ class ExamplesAction(ExampleCallsActionAbstract):
@property
def examples(self) -> List[str]:
filename = Path(__file__).name
in_url_str = "silx:/data/id11/3dxrd/blc12852/id11/bmg_l1/bmg_l1_bmg_dct2/bmg_l1_bmg_dct2.h5::/10.1/measurement/marana "
out_url_str = "silx:/tmp/output.h5::/data"
in_url_str = (
"silx:/_data/id11/3dxrd/blc12852/id11/bmg_l1/bmg_l1_bmg_dct2/bmg_l1_bmg_dct2.h5::/10.1/measurement/marana "
)
out_url_str = "silx:/tmp/output.h5::/_data"
dark_url_str = (
"silx:/data/id11/3dxrd/blc12852/id11/bmg_l1/bmg_l1_bmg_dct2/bmg_l1_bmg_dct2.h5::/10.1/measurement/marana"
"silx:/_data/id11/3dxrd/blc12852/id11/bmg_l1/bmg_l1_bmg_dct2/bmg_l1_bmg_dct2.h5::/10.1/measurement/marana"
)
return [
f"{filename} --help",
......
......@@ -91,7 +91,7 @@ def test_parser_run_with_defaults(parser_file_args, output_h5):
assert path.exists()
assert path.is_file()
data_urlstr = f"{urlstr}/data" # this is specific to the `Outputs` obj in `main`
data_urlstr = f"{urlstr}/_data" # this is specific to the `Outputs` obj in `main`
data = get_data(data_urlstr)
assert data.shape == (100, 2048, 2048)
......@@ -195,7 +195,7 @@ def test_main(main_kwargs, output_h5):
assert path.exists()
assert path.is_file()
data_urlstr = f"{urlstr}/data" # this is specific to the `Outputs` obj in `main`
data_urlstr = f"{urlstr}/_data" # this is specific to the `Outputs` obj in `main`
data = get_data(data_urlstr)
assert data.shape == (100, 2048, 2048)
......@@ -211,7 +211,7 @@ def test_main(main_kwargs, output_h5):
params=[
lambda s: "," + s, # something wrong
lambda s: s.split(":", 1)[1], # missing scheme
lambda s: s.split("::", 1)[0], # missing data link
lambda s: s.split("::", 1)[0], # missing _data link
]
)
def main_kwargs_invalid_input_url(request, main_kwargs, realdata00_h5):
......@@ -251,7 +251,7 @@ def test_main_kwargs_invalid_moving_window_params(main_kwargs_bad_moving_window_
@pytest.fixture
def main_kwargs_input_doesnt_exist(main_kwargs):
"""should cause file not found"""
input_url = DataUrl("silx:asdf.h5::/data")
input_url = DataUrl("silx:asdf.h5::/_data")
return {**main_kwargs, **dict(input_url=input_url)}
......
......@@ -80,7 +80,7 @@ def _worker_init(
"""Process one median window in the z-axis for all the xy positions individually.
Args:
mp_data: data to process (read only)
mp_data: _data to process (read only)
mp_medians: where the medians are dumped
data_shape:
data_dtype: [description]
......@@ -129,7 +129,7 @@ def remove_moving_medians(
side of the index range.á
Striding and border effects:
- if `median_validity` is not a factor of the z-axis size of `data`
- if `median_validity` is not a factor of the z-axis size of `_data`
the last window will be valid for less slices and computed will
fewer slices on the righ sice (positive direction of the axis)
- since the `median_window` will usually be bigger than `median_validity`
......@@ -140,18 +140,18 @@ def remove_moving_medians(
(positive direction of the axis)
Args:
data: 3d; the data volume's axes order is assumed to be (z, x, y)
data: 3d; the _data volume's axes order is assumed to be (z, x, y)
median_validity: how many slices is a median valid for?
median_window: how many slices are use to compute a median slice?
nprocs: nb. processes in parallel. Defaults to None (number of cores in the computer).
multiprocessing_rawarray:
if none: will allocate one, create an ndarray from buffer, and copy the content from `data` to it
if given: `data` is supposed to be an ndarray from the shared memeory array
if none: will allocate one, create an ndarray from buffer, and copy the content from `_data` to it
if given: `_data` is supposed to be an ndarray from the shared memeory array
see args in the argparse.parser
Returns:
data with moving medians removed, medians
_data with moving medians removed, medians
if `multiprocessing_rawarray` is None, alsor return the
multiprocessing rawarray object at the last position
"""
......@@ -200,7 +200,7 @@ def remove_moving_medians(
) as pool:
# notice that the median computation and removal cannot be done at once
# because we're modifying data (shared) directly, so removing the median
# because we're modifying _data (shared) directly, so removing the median
# from a slice could make another's computation go wrong
# btw, it's important to do the operation in-place becasue the memory allocation
# takes a considerable time
......
......@@ -73,11 +73,11 @@ def xyfilter_median(
todo test xyfilter_median
multiprocessing_rawarray:
if none: will allocate one, create an ndarray from buffer, and copy the content from `data` to it
if given: `data` is supposed to be an ndarray from the shared memeory array
if none: will allocate one, create an ndarray from buffer, and copy the content from `_data` to it
if given: `_data` is supposed to be an ndarray from the shared memeory array
Returns:
median-filtered data
median-filtered _data
if `multiprocessing_rawarray` is None, alsor return the
multiprocessing rawarray object at the last position
......@@ -117,7 +117,7 @@ def xyfilter_median(
) as pool:
# notice that the median computation and removal cannot be done at once
# because we're modifying data (shared) directly, so removing the median
# because we're modifying _data (shared) directly, so removing the median
# from a slice could make another's computation go wrong
# btw, it's important to do the operation in-place becasue the memory allocation
# takes a considerable time
......
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Compute the median of all pixels in a volume data getting a single slice.
This is to be used with the ref and darkend data.
Compute the median of all pixels in a volume _data getting a single slice.
This is to be used with the ref and darkend _data.
@author: joaopcbertoldo
"""
......@@ -55,17 +55,17 @@ def main(
update_mode: str,
) -> int:
"""
Compute a z-wise median (one per pixel spatial position) of 3d data block and save it.
Compute a z-wise median (one per pixel spatial position) of 3d _data block and save it.
The median computation is done for each xy-position individually considering all the z-axis values.
!!IMPORTANT!! The data volume's axes order is assumed to be (z, x, y).
!!IMPORTANT!! The _data volume's axes order is assumed to be (z, x, y).
IO: reads data with `silx.io.get_data` and writes with `silx.io.dictdump.dicttoh5`
IO: reads _data with `silx.io.get_data` and writes with `silx.io.dictdump.dicttoh5`
Args:
input_url: url (scheme + path + link) to READ the INPUT data
file and its internal link with the data
ex: silx:/data/id11/my.h5::/2.1/measurement/marana
input_url: url (scheme + path + link) to READ the INPUT _data
file and its internal link with the _data
ex: silx:/_data/id11/my.h5::/2.1/measurement/marana
obs1: 'silx:' indicates the h5 scheme
obs2: silx (the library) also supports 'fabio', but that is not supported here
......@@ -96,15 +96,15 @@ def main(
logger.debug("the args look good to go")
# ============================ data ============================
# ============================ _data ============================
data = common.catch_silx_io_exceptions(get_data, logger)(input_url)
logger.debug("validating data")
logger.debug("validating _data")
common.validate_3d_data(data)
logger.debug("the data looks fine")
logger.debug("the _data looks fine")
# ============================ go ============================
......@@ -139,8 +139,8 @@ def main(
parser = ArgumentParser(
prog="zwise_median",
description="Compute the median of all pixels in a volume data getting a single slice. This is to be used with "
"the ref and darkend data.",
description="Compute the median of all pixels in a volume _data getting a single slice. This is to be used with "
"the ref and darkend _data.",
fromfile_prefix_chars="@",
epilog=log.ABOUT_LOGS_COMMON_NOTE,
parents=[parse.verbosity_parser, parse.update_mode_parser],
......@@ -153,7 +153,7 @@ parser.add_argument(
"--in",
type=DataUrl,
metavar="silx:input.h5::/3d_data",
help="url (scheme + path + link) to the INPUT data",
help="url (scheme + path + link) to the INPUT _data",
required=True,
dest="url",
)
......@@ -181,8 +181,10 @@ class ExamplesAction(ExampleCallsActionAbstract):
@property
def examples(self) -> List[str]:
filename = Path(__file__).name
in_url_str = "silx:/data/id11/3dxrd/blc12852/id11/bmg_l1/bmg_l1_bmg_dct2/bmg_l1_bmg_dct2.h5::/10.1/measurement/marana "
out_url_str = "silx:/tmp/output.h5::/data"
in_url_str = (
"silx:/_data/id11/3dxrd/blc12852/id11/bmg_l1/bmg_l1_bmg_dct2/bmg_l1_bmg_dct2.h5::/10.1/measurement/marana "
)
out_url_str = "silx:/tmp/output.h5::/_data"
return [
f"{filename} --help",
f"{filename} -vv --in {in_url_str} --out {out_url_str}",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment