Reimplement annotators using classes
parent
610198ea6b
commit
2ae6c5a44b
@ -0,0 +1,138 @@
|
||||
"""Topology anotation
|
||||
|
||||
Annotations are an extensible way of providing a :class:TopologyV3 with
|
||||
additional information. This information can be of any kind, e.g. which edges
|
||||
lie on shortest path trees, which vertices are missing compared to the
|
||||
reference &c.
|
||||
|
||||
The annotation data should be kept simple, so that the annotated topology can
|
||||
be e.g. serialized to JSON.
|
||||
|
||||
The central object is an :class:AnnotatedTopology, which holds a set of :class:
|
||||
Annotation objects and which vertices and edges they cover. The Annotations are
|
||||
created by :class:`Annotator`s. Each Annotator has a :class:AnnotatorID, which
|
||||
serves as a way to reference it and also to provide a namespace in order for
|
||||
Annotations not to clash.
|
||||
|
||||
Under the hood, an Annotation is just several dictionaries, whose values are
|
||||
the "tags" and the AnnotatedTopology just keeps track of which vertices and
|
||||
edges are tagged by which Annotation (using AnnotatorID). This allows quick
|
||||
processing of the sets of Annotations across the topology, but adds several
|
||||
indirections to iterating over all annotation of a particular vertex/edge. We
|
||||
believe that the latter is less common operation, so this seems like a good
|
||||
approach."""
|
||||
|
||||
from .topo_v3 import TopologyV3, VertexID, Edge
|
||||
from collections import defaultdict
|
||||
from collections.abc import Hashable
|
||||
from dataclasses import dataclass
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
class AnnotatedTopology:
|
||||
def __init__(self, topology):
|
||||
if not topology.frozen: raise ValueError('Can only annotate frozen topologies.')
|
||||
self.topology = topology
|
||||
self.annotations: dict['AnnotatorID', 'Annotation'] = dict()
|
||||
# Keeping track of dependencies
|
||||
self.running_annotations = set()
|
||||
self.vertex_annotators: dict[VertexID, set['AnnotatorID']] = defaultdict(lambda: set())
|
||||
self.edge_annotators: dict[Edge, set['AnnotatorID']] = defaultdict(lambda: set())
|
||||
self.global_annotators: set['AnnotatorID'] = set()
|
||||
|
||||
def run_annotator(self, ann_id) -> Annotator | None:
|
||||
"""This creates and runs an :class:Annotator
|
||||
|
||||
Note that we do only support running an Annotator based on its ID,
|
||||
because we need a handle to it for bookkeeping. Annotators should not
|
||||
be run in other ways, since that could create annotation cycles and
|
||||
annotations would not added to the topology.
|
||||
|
||||
We do return the annotator, so that if it can provide any other useful
|
||||
function, it may be used. (We are not aware of a use case, though.)
|
||||
|
||||
Also, when the annotator is not run, we return None instead."""
|
||||
if ann_id in self.running_annotations: raise ValueError('This annotator is already running.')
|
||||
if ann_id in self.annotations:
|
||||
if ann_id.annotator.idempotent: return None # Shortcut :-)
|
||||
# Scrap old data before re-running
|
||||
old_annot = self.annotatons[ann_id]
|
||||
for v in old_annot.for_vertex:
|
||||
self.vertex_annotators[v].remove(ann_id)
|
||||
for e in old_annot.for_edge:
|
||||
self.edge_annotators[e].remove(ann_id)
|
||||
self.global_annotators.discard(ann_id)
|
||||
del self.annotations[ann_id]
|
||||
self.running_annotations.add(ann_id)
|
||||
annotator = ann_id.annotator(ann_id.param)
|
||||
annotation = annotator.annotate(self)
|
||||
for v in annotation.for_vertex:
|
||||
self.vertex_annotation[v].add(ann_id)
|
||||
for e in annotation.for_edge:
|
||||
self.edge_annotators[e].add(ann_id)
|
||||
if annotation.for_topology is not None:
|
||||
self.global_annotators.add(ann_id)
|
||||
self.annotations[ann_id] = annotation
|
||||
self.running_annotations.remove(ann_id)
|
||||
return annotator
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AnnotatorID:
|
||||
annotator: type['Annotator']
|
||||
param: None | Hashable = None
|
||||
|
||||
@dataclass
|
||||
class Annotation:
|
||||
annotator_id: AnnotatorID
|
||||
annotated_topology: AnnotatedTopology
|
||||
# Use of Any here means "something reasonable and stringifiable". We do not
|
||||
# know whether this can be specified reasonably.
|
||||
for_vertex: dict[VertexID, Any]
|
||||
for_edge: dict[VertexID, Any]
|
||||
for_topology: Any | None
|
||||
|
||||
class Annotator(ABC):
|
||||
"""Annotator itself.
|
||||
|
||||
We do not provide any specific implementation here. You may find annotators
|
||||
in submodules of :module:`birdvisu.annotations`.
|
||||
|
||||
The Annotator Protocol
|
||||
======================
|
||||
|
||||
In order to keep everything working, several rules must be followed. First,
|
||||
in any case may Annotators alter the :class:AnnotatedTopology or other
|
||||
:class:`Annotation`s. They may, however, check for already present
|
||||
annotators and adjust their output according to present Annotations.
|
||||
|
||||
Annotators *are* allowed to run :method:`AnnotatedTopology.run_annotator`,
|
||||
but the implementation must make sure that this does not lead to recursive
|
||||
calling of that function. This allows Annotators to depend on other
|
||||
Annotators and is safe as long as the dependency graph is a DAG.
|
||||
|
||||
The ``idempotent`` flag determines whether an Annotator should be re-run
|
||||
when it has already annotated the specific topology. While treating
|
||||
annotators by default as non-idempotent is correct, setting the flag may
|
||||
save time (especially for time-consuming Annotators). Note however that if
|
||||
an Annotator wishes to check results of other Annotators, it is not
|
||||
idempotent.
|
||||
|
||||
Annotators do not have a say in how they are constructed, this is purely
|
||||
determined by the :class:AnnotatorID. The ``param`` in that object serves
|
||||
as a means of parametrising the construction and is passed to the
|
||||
Annotator's initialiser. In other words, the AnnotatorID serves as an
|
||||
immutable recipe for constructing the Annotator.
|
||||
|
||||
The only way an Annotator is allowed to communicate Annotations is the
|
||||
return value of :method:annotate. The Annotations output must be
|
||||
"reasonable", meaning that they should not be too complex. The intention is
|
||||
that an AnnotatedTopology could be serialized to a simple format like JSON
|
||||
for use by other programs. Using basic types like scalars, lists and
|
||||
dictionaries is probably safe, but using sets is not (please, think of the
|
||||
~~kittens~~ JSON and use dicts with 1 as a value.). Once the Annotation is
|
||||
output, it must not be modified (and reference to it should not be kept)."""
|
||||
idempotent: bool = False
|
||||
@abstractmethod
|
||||
def __init__(self, param: None | Hashable): ...
|
||||
@abstractmethod
|
||||
def annotate(self, topology: AnnotatedTopology) -> Annotation: ...
|
Loading…
Reference in New Issue