scheme.py 12.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# coding: utf-8
# /*##########################################################################
#
# Copyright (c) 2017 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# ###########################################################################*/

__authors__ = ["H.Payno"]
__license__ = "MIT"
__date__ = "17/12/2018"


from xml.etree.ElementTree import TreeBuilder, Element, ElementTree
from collections import defaultdict
from itertools import count, chain
import json
import pprint
import base64
import pickle
import logging
from .node import Node
from ast import literal_eval
import importlib

_logger = logging.getLogger(__name__)


class Scheme(object):
    """
    class to define a workflow scheme from nodes and links

    :param list nodes:
    :param list links:
    """
    def __init__(self, nodes=None, links=None):
        self.title = ''
        self.description = ''
        self.links = {}
        """keys are link ID, values are Link"""
        if links is not None:
            for link in links:
                self.links[link.id] = link
        self.nodes = nodes or []
        """list of nodes"""
        self.nodes_dict = {}
        """dict with node id as key and node as value"""
        for node in self.nodes:
            self.nodes_dict[node.id] = node

        if links is not None:
            self._update_nodes_from_links()

71
    def final_nodes(self):
72
73
74
75
76
77
78
79
80
81
82
83
84
        """

        :return: list of final nodes (with no output) and which hasn't any
                 control node upstream
        """
        res = []
        for node in self.nodes:
            assert isinstance(node, Node)
            if node.isfinal():
                res.append(node)
        return res

    def start_nodes(self):
85
86
87
88
89
90
        """

        :return: list of nodes starting the workflow. Those does not require
        any input_data
        :rtype: list
        """
91
92
93
94
95
96
97
98
        res = []
        for node in self.nodes:
            assert isinstance(node, Node)
            if node.isstart():
                res.append(node)
        return res

    def endlessNodes(self):
99
100
101
102
103
        """

        :return: list of final nodes.
        :rtype: list
        """
104
105
106
107
108
109
110
111
112
113
        res = []
        for node in self.nodes:
            assert isinstance(node, Node)
            if node.endless is True:
                res.append(node)
        return res

    def save_to(self, output_file):
        """
        Save the scheme as an xml formated file to `stream`
114
115
116
117

        :param output_file: name of the output file. For now only manage xml
                            files
        :type: str
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
        """
        tree = self.scheme_to_etree(data_format="literal")
        indent(tree.getroot(), 0)

        tree.write(output_file)

    def scheme_to_etree(self, data_format="literal", pickle_fallback=False):
        """
        Return an `xml.etree.ElementTree` representation of the `scheme.
        """
        builder = TreeBuilder(element_factory=Element)
        builder.start("scheme", {"version": "2.0",
                                 "title": self.title or "",
                                 "description": self.description or ""})

        # Nodes
        node_ids = defaultdict(count().__next__)
        builder.start("nodes", {})
        for node in self.nodes:  # type: SchemeNode
            attrs = {"id": node.id,
                     "qualified_name": node._qualified_name,
                     }

            if type(node) is not Node:
                attrs["scheme_node_type"] = "%s.%s" % (type(node).__name__,
                                                       type(node).__module__)
            builder.start("node", attrs)
            builder.end("node")

        builder.end("nodes")

        # Links
        link_ids = defaultdict(count().__next__)
        builder.start("links", {})
        for link in self.links:  # type: SchemeLink
            source = link.source_node_id
            sink = link.sink_node_id
            source_id = node_ids[source]
            sink_id = node_ids[sink]
            attrs = {"id": str(link_ids[link]),
                     "source_node_id": str(source_id),
                     "sink_node_id": str(sink_id),
                     "source_channel": link.source_channel,
                     "sink_channel": link.sink_channel,
                     "enabled": "true" if link.enabled else "false",
                     }
            builder.start("link", attrs)
            builder.end("link")

        builder.end("links")

        # Annotations
        annotation_ids = defaultdict(count().__next__)
        builder.start("thumbnail", {})
        builder.end("thumbnail")

        # Node properties/settings
        builder.start("node_properties", {})
        for node in self.nodes:
            data = None
            if node.properties:
                try:
                    data, format = dumps(node.properties, format=data_format,
                                         pickle_fallback=pickle_fallback)
                except Exception:
                    _logger.error("Error serializing properties for node %r",
                              node.title, exc_info=True)
                if data is not None:
                    builder.start("properties",
                                  {"node_id": str(node_ids[node]),
                                   "format": format})
                    builder.data(data)
                    builder.end("properties")

        builder.end("node_properties")

        builder.end("scheme")
        root = builder.close()
        tree = ElementTree(root)
        return tree

    def _update_nodes_from_links(self):
        """
        Update upstream and downstream nodes from links definition
        """
        self._clear_nodes_connections()
        for link_id, link in self.links.items():
            source_node = self.nodes_dict[link.source_node_id]
            sink_node = self.nodes_dict[link.sink_node_id]
            source_node.downstream_nodes.add(self.nodes_dict[link.sink_node_id])
            sink_node.upstream_nodes.add(self.nodes_dict[link.source_node_id])

    def _clear_nodes_connections(self):
        """
        clear for all nodes downstream and upstream nodes
        """
        for node in self.nodes:
            assert isinstance(node, Node)
            node.downstream_nodes = set()
            node.upstream_nodes = set()

    def has_final_join(self):
220
221
222
223
224
        """
        :return: True if we need to send a 'end' signal before closing the
                 workflow. This is needed in the 'acquisition workflow' like
                 tomwer and the DataWatcher process for example.
        :rtype: bool
225
226
227
228
229
230
231
232
        """
        for node in self.nodes:
            if node.need_stop_join:
                return True
        return False

    @staticmethod
    def from_desc(desc):
233
234
235
236
237
238
        """

        :param desc:
        :return: instance of Scheme from it description.
        :rtype: :class:`Scheme`
        """
239
240
241
242
        nodes = []
        nodes_dict = {}

        for node_d in desc.nodes:
243
            node = Node(id=node_d.id, processing_pt=node_d.qualified_name)
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
            nodes.append(node)
            nodes_dict[node.id] = node
            data = node_d.data
            if data:
                properties = loads(data.data, data.format)
                node.properties = properties
            else:
                node.properties = {}
            node.qualified_name = node_d.qualified_name

        for link_d in desc.links:
            upstream_node = nodes_dict[link_d.source_node_id]
            assert isinstance(upstream_node, Node)
            downstream_node = nodes_dict[link_d.sink_node_id]
            upstream_node.downstream_nodes.add(downstream_node)
            downstream_node.upstream_nodes.add(upstream_node)

        scheme = Scheme(nodes=nodes, links=desc.links)
        scheme.title = desc.title
        scheme.description = desc.description

        return scheme

267
    def load_handlers(self):
268
269
270
        """
        load all nodes handlers.
        """
271
272
273
        for node in self.nodes:
            node.load_handlers()

274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395

def contains_control_nodes(nodes_list):
    for _node in nodes_list:
        if _node.endless or contains_control_nodes(_node.upstream_nodes):
            return True
    return False


def loads(string, format):
    if format == "literal":
        return literal_eval(string)
    elif format == "json":
        return json.loads(string)
    elif format == "pickle":
        return pickle.loads(base64.decodebytes(string.encode('ascii')))
    else:
        raise ValueError("Unknown format")


def indent(element, level=0, indent="\t"):
    """
    Indent an instance of a :class:`Element`. Based on
    (http://effbot.org/zone/element-lib.htm#prettyprint).

    """
    def empty(text):
        return not text or not text.strip()

    def indent_(element, level, last):
        child_count = len(element)

        if child_count:
            if empty(element.text):
                element.text = "\n" + indent * (level + 1)

            if empty(element.tail):
                element.tail = "\n" + indent * (level + (-1 if last else 0))

            for i, child in enumerate(element):
                indent_(child, level + 1, i == child_count - 1)

        else:
            if empty(element.tail):
                element.tail = "\n" + indent * (level + (-1 if last else 0))

    return indent_(element, level, True)


def dumps(obj, format="literal", prettyprint=False, pickle_fallback=False):
    """
    Serialize `obj` using `format` ('json' or 'literal') and return its
    string representation and the used serialization format ('literal',
    'json' or 'pickle').

    If `pickle_fallback` is True and the serialization with `format`
    fails object's pickle representation will be returned

    """
    if format == "literal":
        try:
            return (literal_dumps(obj, prettyprint=prettyprint, indent=1),
                    "literal")
        except (ValueError, TypeError) as ex:
            if not pickle_fallback:
                raise

            _logger.debug("Could not serialize to a literal string")

    elif format == "json":
        try:
            return (json.dumps(obj, indent=1 if prettyprint else None),
                    "json")
        except (ValueError, TypeError):
            if not pickle_fallback:
                raise

            _logger.debug("Could not serialize to a json string")

    elif format == "pickle":
        return base64.encodebytes(pickle.dumps(obj)).decode('ascii'), "pickle"

    else:
        raise ValueError("Unsupported format %r" % format)

    if pickle_fallback:
        _logger.warning("Using pickle fallback")
        return base64.encodebytes(pickle.dumps(obj)).decode('ascii'), "pickle"
    else:
        raise Exception("Something strange happened.")


# This is a subset of PyON serialization.
def literal_dumps(obj, prettyprint=False, indent=4):
    """
    Write obj into a string as a python literal.
    """
    memo = {}
    NoneType = type(None)

    def check(obj):
        if type(obj) in [int, float, bool, NoneType, str, bytes]:
            return True

        if id(obj) in memo:
            raise ValueError("{0} is a recursive structure".format(obj))

        memo[id(obj)] = obj

        if type(obj) in [list, tuple]:
            return all(map(check, obj))
        elif type(obj) is dict:
            return all(map(check, chain(iter(obj.keys()), iter(obj.values()))))
        else:
            raise TypeError("{0} can not be serialized as a python "
                             "literal".format(type(obj)))

    check(obj)

    if prettyprint:
        return pprint.pformat(obj, indent=indent)
    else:
        return repr(obj)