From 2ae6c5a44b555391d761e16664e1517efabcfb93 Mon Sep 17 00:00:00 2001 From: Pavel 'LEdoian' Turinsky Date: Sun, 9 Jul 2023 14:32:34 +0200 Subject: [PATCH] Reimplement annotators using classes --- birdvisu/annotations/__init__.py | 138 +++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 birdvisu/annotations/__init__.py diff --git a/birdvisu/annotations/__init__.py b/birdvisu/annotations/__init__.py new file mode 100644 index 0000000..4f2211a --- /dev/null +++ b/birdvisu/annotations/__init__.py @@ -0,0 +1,138 @@ +"""Topology anotation + +Annotations are an extensible way of providing a :class:TopologyV3 with +additional information. This information can be of any kind, e.g. which edges +lie on shortest path trees, which vertices are missing compared to the +reference &c. + +The annotation data should be kept simple, so that the annotated topology can +be e.g. serialized to JSON. + +The central object is an :class:AnnotatedTopology, which holds a set of :class: +Annotation objects and which vertices and edges they cover. The Annotations are +created by :class:`Annotator`s. Each Annotator has a :class:AnnotatorID, which +serves as a way to reference it and also to provide a namespace in order for +Annotations not to clash. + +Under the hood, an Annotation is just several dictionaries, whose values are +the "tags" and the AnnotatedTopology just keeps track of which vertices and +edges are tagged by which Annotation (using AnnotatorID). This allows quick +processing of the sets of Annotations across the topology, but adds several +indirections to iterating over all annotation of a particular vertex/edge. We +believe that the latter is less common operation, so this seems like a good +approach.""" + +from .topo_v3 import TopologyV3, VertexID, Edge +from collections import defaultdict +from collections.abc import Hashable +from dataclasses import dataclass +from abc import ABC, abstractmethod +from typing import Any + +class AnnotatedTopology: + def __init__(self, topology): + if not topology.frozen: raise ValueError('Can only annotate frozen topologies.') + self.topology = topology + self.annotations: dict['AnnotatorID', 'Annotation'] = dict() + # Keeping track of dependencies + self.running_annotations = set() + self.vertex_annotators: dict[VertexID, set['AnnotatorID']] = defaultdict(lambda: set()) + self.edge_annotators: dict[Edge, set['AnnotatorID']] = defaultdict(lambda: set()) + self.global_annotators: set['AnnotatorID'] = set() + + def run_annotator(self, ann_id) -> Annotator | None: + """This creates and runs an :class:Annotator + + Note that we do only support running an Annotator based on its ID, + because we need a handle to it for bookkeeping. Annotators should not + be run in other ways, since that could create annotation cycles and + annotations would not added to the topology. + + We do return the annotator, so that if it can provide any other useful + function, it may be used. (We are not aware of a use case, though.) + + Also, when the annotator is not run, we return None instead.""" + if ann_id in self.running_annotations: raise ValueError('This annotator is already running.') + if ann_id in self.annotations: + if ann_id.annotator.idempotent: return None # Shortcut :-) + # Scrap old data before re-running + old_annot = self.annotatons[ann_id] + for v in old_annot.for_vertex: + self.vertex_annotators[v].remove(ann_id) + for e in old_annot.for_edge: + self.edge_annotators[e].remove(ann_id) + self.global_annotators.discard(ann_id) + del self.annotations[ann_id] + self.running_annotations.add(ann_id) + annotator = ann_id.annotator(ann_id.param) + annotation = annotator.annotate(self) + for v in annotation.for_vertex: + self.vertex_annotation[v].add(ann_id) + for e in annotation.for_edge: + self.edge_annotators[e].add(ann_id) + if annotation.for_topology is not None: + self.global_annotators.add(ann_id) + self.annotations[ann_id] = annotation + self.running_annotations.remove(ann_id) + return annotator + +@dataclass(frozen=True) +class AnnotatorID: + annotator: type['Annotator'] + param: None | Hashable = None + +@dataclass +class Annotation: + annotator_id: AnnotatorID + annotated_topology: AnnotatedTopology + # Use of Any here means "something reasonable and stringifiable". We do not + # know whether this can be specified reasonably. + for_vertex: dict[VertexID, Any] + for_edge: dict[VertexID, Any] + for_topology: Any | None + +class Annotator(ABC): + """Annotator itself. + + We do not provide any specific implementation here. You may find annotators + in submodules of :module:`birdvisu.annotations`. + + The Annotator Protocol + ====================== + + In order to keep everything working, several rules must be followed. First, + in any case may Annotators alter the :class:AnnotatedTopology or other + :class:`Annotation`s. They may, however, check for already present + annotators and adjust their output according to present Annotations. + + Annotators *are* allowed to run :method:`AnnotatedTopology.run_annotator`, + but the implementation must make sure that this does not lead to recursive + calling of that function. This allows Annotators to depend on other + Annotators and is safe as long as the dependency graph is a DAG. + + The ``idempotent`` flag determines whether an Annotator should be re-run + when it has already annotated the specific topology. While treating + annotators by default as non-idempotent is correct, setting the flag may + save time (especially for time-consuming Annotators). Note however that if + an Annotator wishes to check results of other Annotators, it is not + idempotent. + + Annotators do not have a say in how they are constructed, this is purely + determined by the :class:AnnotatorID. The ``param`` in that object serves + as a means of parametrising the construction and is passed to the + Annotator's initialiser. In other words, the AnnotatorID serves as an + immutable recipe for constructing the Annotator. + + The only way an Annotator is allowed to communicate Annotations is the + return value of :method:annotate. The Annotations output must be + "reasonable", meaning that they should not be too complex. The intention is + that an AnnotatedTopology could be serialized to a simple format like JSON + for use by other programs. Using basic types like scalars, lists and + dictionaries is probably safe, but using sets is not (please, think of the + ~~kittens~~ JSON and use dicts with 1 as a value.). Once the Annotation is + output, it must not be modified (and reference to it should not be kept).""" + idempotent: bool = False + @abstractmethod + def __init__(self, param: None | Hashable): ... + @abstractmethod + def annotate(self, topology: AnnotatedTopology) -> Annotation: ...