Reimplement annotators using classes

styling
LEdoian 1 year ago
parent 610198ea6b
commit 2ae6c5a44b

@ -0,0 +1,138 @@
"""Topology anotation
Annotations are an extensible way of providing a :class:TopologyV3 with
additional information. This information can be of any kind, e.g. which edges
lie on shortest path trees, which vertices are missing compared to the
reference &c.
The annotation data should be kept simple, so that the annotated topology can
be e.g. serialized to JSON.
The central object is an :class:AnnotatedTopology, which holds a set of :class:
Annotation objects and which vertices and edges they cover. The Annotations are
created by :class:`Annotator`s. Each Annotator has a :class:AnnotatorID, which
serves as a way to reference it and also to provide a namespace in order for
Annotations not to clash.
Under the hood, an Annotation is just several dictionaries, whose values are
the "tags" and the AnnotatedTopology just keeps track of which vertices and
edges are tagged by which Annotation (using AnnotatorID). This allows quick
processing of the sets of Annotations across the topology, but adds several
indirections to iterating over all annotation of a particular vertex/edge. We
believe that the latter is less common operation, so this seems like a good
approach."""
from .topo_v3 import TopologyV3, VertexID, Edge
from collections import defaultdict
from collections.abc import Hashable
from dataclasses import dataclass
from abc import ABC, abstractmethod
from typing import Any
class AnnotatedTopology:
def __init__(self, topology):
if not topology.frozen: raise ValueError('Can only annotate frozen topologies.')
self.topology = topology
self.annotations: dict['AnnotatorID', 'Annotation'] = dict()
# Keeping track of dependencies
self.running_annotations = set()
self.vertex_annotators: dict[VertexID, set['AnnotatorID']] = defaultdict(lambda: set())
self.edge_annotators: dict[Edge, set['AnnotatorID']] = defaultdict(lambda: set())
self.global_annotators: set['AnnotatorID'] = set()
def run_annotator(self, ann_id) -> Annotator | None:
"""This creates and runs an :class:Annotator
Note that we do only support running an Annotator based on its ID,
because we need a handle to it for bookkeeping. Annotators should not
be run in other ways, since that could create annotation cycles and
annotations would not added to the topology.
We do return the annotator, so that if it can provide any other useful
function, it may be used. (We are not aware of a use case, though.)
Also, when the annotator is not run, we return None instead."""
if ann_id in self.running_annotations: raise ValueError('This annotator is already running.')
if ann_id in self.annotations:
if ann_id.annotator.idempotent: return None # Shortcut :-)
# Scrap old data before re-running
old_annot = self.annotatons[ann_id]
for v in old_annot.for_vertex:
self.vertex_annotators[v].remove(ann_id)
for e in old_annot.for_edge:
self.edge_annotators[e].remove(ann_id)
self.global_annotators.discard(ann_id)
del self.annotations[ann_id]
self.running_annotations.add(ann_id)
annotator = ann_id.annotator(ann_id.param)
annotation = annotator.annotate(self)
for v in annotation.for_vertex:
self.vertex_annotation[v].add(ann_id)
for e in annotation.for_edge:
self.edge_annotators[e].add(ann_id)
if annotation.for_topology is not None:
self.global_annotators.add(ann_id)
self.annotations[ann_id] = annotation
self.running_annotations.remove(ann_id)
return annotator
@dataclass(frozen=True)
class AnnotatorID:
annotator: type['Annotator']
param: None | Hashable = None
@dataclass
class Annotation:
annotator_id: AnnotatorID
annotated_topology: AnnotatedTopology
# Use of Any here means "something reasonable and stringifiable". We do not
# know whether this can be specified reasonably.
for_vertex: dict[VertexID, Any]
for_edge: dict[VertexID, Any]
for_topology: Any | None
class Annotator(ABC):
"""Annotator itself.
We do not provide any specific implementation here. You may find annotators
in submodules of :module:`birdvisu.annotations`.
The Annotator Protocol
======================
In order to keep everything working, several rules must be followed. First,
in any case may Annotators alter the :class:AnnotatedTopology or other
:class:`Annotation`s. They may, however, check for already present
annotators and adjust their output according to present Annotations.
Annotators *are* allowed to run :method:`AnnotatedTopology.run_annotator`,
but the implementation must make sure that this does not lead to recursive
calling of that function. This allows Annotators to depend on other
Annotators and is safe as long as the dependency graph is a DAG.
The ``idempotent`` flag determines whether an Annotator should be re-run
when it has already annotated the specific topology. While treating
annotators by default as non-idempotent is correct, setting the flag may
save time (especially for time-consuming Annotators). Note however that if
an Annotator wishes to check results of other Annotators, it is not
idempotent.
Annotators do not have a say in how they are constructed, this is purely
determined by the :class:AnnotatorID. The ``param`` in that object serves
as a means of parametrising the construction and is passed to the
Annotator's initialiser. In other words, the AnnotatorID serves as an
immutable recipe for constructing the Annotator.
The only way an Annotator is allowed to communicate Annotations is the
return value of :method:annotate. The Annotations output must be
"reasonable", meaning that they should not be too complex. The intention is
that an AnnotatedTopology could be serialized to a simple format like JSON
for use by other programs. Using basic types like scalars, lists and
dictionaries is probably safe, but using sets is not (please, think of the
~~kittens~~ JSON and use dicts with 1 as a value.). Once the Annotation is
output, it must not be modified (and reference to it should not be kept)."""
idempotent: bool = False
@abstractmethod
def __init__(self, param: None | Hashable): ...
@abstractmethod
def annotate(self, topology: AnnotatedTopology) -> Annotation: ...
Loading…
Cancel
Save