Reimplement annotators using classes
parent
610198ea6b
commit
2ae6c5a44b
@ -0,0 +1,138 @@
|
|||||||
|
"""Topology anotation
|
||||||
|
|
||||||
|
Annotations are an extensible way of providing a :class:TopologyV3 with
|
||||||
|
additional information. This information can be of any kind, e.g. which edges
|
||||||
|
lie on shortest path trees, which vertices are missing compared to the
|
||||||
|
reference &c.
|
||||||
|
|
||||||
|
The annotation data should be kept simple, so that the annotated topology can
|
||||||
|
be e.g. serialized to JSON.
|
||||||
|
|
||||||
|
The central object is an :class:AnnotatedTopology, which holds a set of :class:
|
||||||
|
Annotation objects and which vertices and edges they cover. The Annotations are
|
||||||
|
created by :class:`Annotator`s. Each Annotator has a :class:AnnotatorID, which
|
||||||
|
serves as a way to reference it and also to provide a namespace in order for
|
||||||
|
Annotations not to clash.
|
||||||
|
|
||||||
|
Under the hood, an Annotation is just several dictionaries, whose values are
|
||||||
|
the "tags" and the AnnotatedTopology just keeps track of which vertices and
|
||||||
|
edges are tagged by which Annotation (using AnnotatorID). This allows quick
|
||||||
|
processing of the sets of Annotations across the topology, but adds several
|
||||||
|
indirections to iterating over all annotation of a particular vertex/edge. We
|
||||||
|
believe that the latter is less common operation, so this seems like a good
|
||||||
|
approach."""
|
||||||
|
|
||||||
|
from .topo_v3 import TopologyV3, VertexID, Edge
|
||||||
|
from collections import defaultdict
|
||||||
|
from collections.abc import Hashable
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
class AnnotatedTopology:
|
||||||
|
def __init__(self, topology):
|
||||||
|
if not topology.frozen: raise ValueError('Can only annotate frozen topologies.')
|
||||||
|
self.topology = topology
|
||||||
|
self.annotations: dict['AnnotatorID', 'Annotation'] = dict()
|
||||||
|
# Keeping track of dependencies
|
||||||
|
self.running_annotations = set()
|
||||||
|
self.vertex_annotators: dict[VertexID, set['AnnotatorID']] = defaultdict(lambda: set())
|
||||||
|
self.edge_annotators: dict[Edge, set['AnnotatorID']] = defaultdict(lambda: set())
|
||||||
|
self.global_annotators: set['AnnotatorID'] = set()
|
||||||
|
|
||||||
|
def run_annotator(self, ann_id) -> Annotator | None:
|
||||||
|
"""This creates and runs an :class:Annotator
|
||||||
|
|
||||||
|
Note that we do only support running an Annotator based on its ID,
|
||||||
|
because we need a handle to it for bookkeeping. Annotators should not
|
||||||
|
be run in other ways, since that could create annotation cycles and
|
||||||
|
annotations would not added to the topology.
|
||||||
|
|
||||||
|
We do return the annotator, so that if it can provide any other useful
|
||||||
|
function, it may be used. (We are not aware of a use case, though.)
|
||||||
|
|
||||||
|
Also, when the annotator is not run, we return None instead."""
|
||||||
|
if ann_id in self.running_annotations: raise ValueError('This annotator is already running.')
|
||||||
|
if ann_id in self.annotations:
|
||||||
|
if ann_id.annotator.idempotent: return None # Shortcut :-)
|
||||||
|
# Scrap old data before re-running
|
||||||
|
old_annot = self.annotatons[ann_id]
|
||||||
|
for v in old_annot.for_vertex:
|
||||||
|
self.vertex_annotators[v].remove(ann_id)
|
||||||
|
for e in old_annot.for_edge:
|
||||||
|
self.edge_annotators[e].remove(ann_id)
|
||||||
|
self.global_annotators.discard(ann_id)
|
||||||
|
del self.annotations[ann_id]
|
||||||
|
self.running_annotations.add(ann_id)
|
||||||
|
annotator = ann_id.annotator(ann_id.param)
|
||||||
|
annotation = annotator.annotate(self)
|
||||||
|
for v in annotation.for_vertex:
|
||||||
|
self.vertex_annotation[v].add(ann_id)
|
||||||
|
for e in annotation.for_edge:
|
||||||
|
self.edge_annotators[e].add(ann_id)
|
||||||
|
if annotation.for_topology is not None:
|
||||||
|
self.global_annotators.add(ann_id)
|
||||||
|
self.annotations[ann_id] = annotation
|
||||||
|
self.running_annotations.remove(ann_id)
|
||||||
|
return annotator
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class AnnotatorID:
|
||||||
|
annotator: type['Annotator']
|
||||||
|
param: None | Hashable = None
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Annotation:
|
||||||
|
annotator_id: AnnotatorID
|
||||||
|
annotated_topology: AnnotatedTopology
|
||||||
|
# Use of Any here means "something reasonable and stringifiable". We do not
|
||||||
|
# know whether this can be specified reasonably.
|
||||||
|
for_vertex: dict[VertexID, Any]
|
||||||
|
for_edge: dict[VertexID, Any]
|
||||||
|
for_topology: Any | None
|
||||||
|
|
||||||
|
class Annotator(ABC):
|
||||||
|
"""Annotator itself.
|
||||||
|
|
||||||
|
We do not provide any specific implementation here. You may find annotators
|
||||||
|
in submodules of :module:`birdvisu.annotations`.
|
||||||
|
|
||||||
|
The Annotator Protocol
|
||||||
|
======================
|
||||||
|
|
||||||
|
In order to keep everything working, several rules must be followed. First,
|
||||||
|
in any case may Annotators alter the :class:AnnotatedTopology or other
|
||||||
|
:class:`Annotation`s. They may, however, check for already present
|
||||||
|
annotators and adjust their output according to present Annotations.
|
||||||
|
|
||||||
|
Annotators *are* allowed to run :method:`AnnotatedTopology.run_annotator`,
|
||||||
|
but the implementation must make sure that this does not lead to recursive
|
||||||
|
calling of that function. This allows Annotators to depend on other
|
||||||
|
Annotators and is safe as long as the dependency graph is a DAG.
|
||||||
|
|
||||||
|
The ``idempotent`` flag determines whether an Annotator should be re-run
|
||||||
|
when it has already annotated the specific topology. While treating
|
||||||
|
annotators by default as non-idempotent is correct, setting the flag may
|
||||||
|
save time (especially for time-consuming Annotators). Note however that if
|
||||||
|
an Annotator wishes to check results of other Annotators, it is not
|
||||||
|
idempotent.
|
||||||
|
|
||||||
|
Annotators do not have a say in how they are constructed, this is purely
|
||||||
|
determined by the :class:AnnotatorID. The ``param`` in that object serves
|
||||||
|
as a means of parametrising the construction and is passed to the
|
||||||
|
Annotator's initialiser. In other words, the AnnotatorID serves as an
|
||||||
|
immutable recipe for constructing the Annotator.
|
||||||
|
|
||||||
|
The only way an Annotator is allowed to communicate Annotations is the
|
||||||
|
return value of :method:annotate. The Annotations output must be
|
||||||
|
"reasonable", meaning that they should not be too complex. The intention is
|
||||||
|
that an AnnotatedTopology could be serialized to a simple format like JSON
|
||||||
|
for use by other programs. Using basic types like scalars, lists and
|
||||||
|
dictionaries is probably safe, but using sets is not (please, think of the
|
||||||
|
~~kittens~~ JSON and use dicts with 1 as a value.). Once the Annotation is
|
||||||
|
output, it must not be modified (and reference to it should not be kept)."""
|
||||||
|
idempotent: bool = False
|
||||||
|
@abstractmethod
|
||||||
|
def __init__(self, param: None | Hashable): ...
|
||||||
|
@abstractmethod
|
||||||
|
def annotate(self, topology: AnnotatedTopology) -> Annotation: ...
|
Loading…
Reference in New Issue