Reimplement annotators using classes

1 year ago · 2ae6c5a44b
parent 610198ea6b
commit 2ae6c5a44b
1 changed files with 138 additions and 0 deletions
--- a/birdvisu/annotations/init.py
+++ b/birdvisu/annotations/init.py
@ -0,0 +1,138 @@
 """Topology anotation
 Annotations are an extensible way of providing a :class:TopologyV3 with
 additional information. This information can be of any kind, e.g. which edges
 lie on shortest path trees, which vertices are missing compared to the
 reference &c.
 The annotation data should be kept simple, so that the annotated topology can
 be e.g. serialized to JSON.
 The central object is an :class:AnnotatedTopology, which holds a set of :class:
 Annotation objects and which vertices and edges they cover. The Annotations are
 created by :class:`Annotator`s. Each Annotator has a :class:AnnotatorID, which
 serves as a way to reference it and also to provide a namespace in order for
 Annotations not to clash.
 Under the hood, an Annotation is just several dictionaries, whose values are
 the "tags" and the AnnotatedTopology just keeps track of which vertices and
 edges are tagged by which Annotation (using AnnotatorID). This allows quick
 processing of the sets of Annotations across the topology, but adds several
 indirections to iterating over all annotation of a particular vertex/edge. We
 believe that the latter is less common operation, so this seems like a good
 approach."""
 from .topo_v3 import TopologyV3, VertexID, Edge
 from collections import defaultdict
 from collections.abc import Hashable
 from dataclasses import dataclass
 from abc import ABC, abstractmethod
 from typing import Any
 class AnnotatedTopology:
 	def __init__(self, topology):
 		if not topology.frozen: raise ValueError('Can only annotate frozen topologies.')
 		self.topology = topology
 		self.annotations: dict['AnnotatorID', 'Annotation'] = dict()
 		# Keeping track of dependencies
 		self.running_annotations = set()
 		self.vertex_annotators: dict[VertexID, set['AnnotatorID']] = defaultdict(lambda: set())
 		self.edge_annotators: dict[Edge, set['AnnotatorID']] = defaultdict(lambda: set())
 		self.global_annotators: set['AnnotatorID'] = set()
 	def run_annotator(self, ann_id) -> Annotator | None:
 		"""This creates and runs an :class:Annotator
 		Note that we do only support running an Annotator based on its ID,
 		because we need a handle to it for bookkeeping. Annotators should not
 		be run in other ways, since that could create annotation cycles and
 		annotations would not added to the topology.
 		We do return the annotator, so that if it can provide any other useful
 		function, it may be used. (We are not aware of a use case, though.)
 		Also, when the annotator is not run, we return None instead."""
 		if ann_id in self.running_annotations: raise ValueError('This annotator is already running.')
 		if ann_id in self.annotations:
 			if ann_id.annotator.idempotent: return None # Shortcut :-)
 			# Scrap old data before re-running
 			old_annot = self.annotatons[ann_id]
 			for v in old_annot.for_vertex:
 				self.vertex_annotators[v].remove(ann_id)
 			for e in old_annot.for_edge:
 				self.edge_annotators[e].remove(ann_id)
 			self.global_annotators.discard(ann_id)
 			del self.annotations[ann_id]
 		self.running_annotations.add(ann_id)
 		annotator = ann_id.annotator(ann_id.param)
 		annotation = annotator.annotate(self)
 		for v in annotation.for_vertex:
 			self.vertex_annotation[v].add(ann_id)
 		for e in annotation.for_edge:
 			self.edge_annotators[e].add(ann_id)
 		if annotation.for_topology is not None:
 			self.global_annotators.add(ann_id)
 		self.annotations[ann_id] = annotation
 		self.running_annotations.remove(ann_id)
 		return annotator
@dataclass(frozen=True)
 class AnnotatorID:
 	annotator: type['Annotator']
 	param: None | Hashable = None
@dataclass
 class Annotation:
 	annotator_id: AnnotatorID
 	annotated_topology: AnnotatedTopology
 	# Use of Any here means "something reasonable and stringifiable". We do not
 	# know whether this can be specified reasonably.
 	for_vertex: dict[VertexID, Any]
 	for_edge: dict[VertexID, Any]
 	for_topology: Any | None
 class Annotator(ABC):
 	"""Annotator itself.
 	We do not provide any specific implementation here. You may find annotators
 	in submodules of :module:`birdvisu.annotations`.
 	The Annotator Protocol
 	======================
 	In order to keep everything working, several rules must be followed. First,
 	in any case may Annotators alter the :class:AnnotatedTopology or other
 	:class:`Annotation`s. They may, however, check for already present
 	annotators and adjust their output according to present Annotations.
 	Annotators *are* allowed to run :method:`AnnotatedTopology.run_annotator`,
 	but the implementation must make sure that this does not lead to recursive
 	calling of that function. This allows Annotators to depend on other
 	Annotators and is safe as long as the dependency graph is a DAG.
 	The ``idempotent`` flag determines whether an Annotator should be re-run
 	when it has already annotated the specific topology. While treating
 	annotators by default as non-idempotent is correct, setting the flag may
 	save time (especially for time-consuming Annotators). Note however that if
 	an Annotator wishes to check results of other Annotators, it is not
 	idempotent.
 	Annotators do not have a say in how they are constructed, this is purely
 	determined by the :class:AnnotatorID. The ``param`` in that object serves
 	as a means of parametrising the construction and is passed to the
 	Annotator's initialiser. In other words, the AnnotatorID serves as an
 	immutable recipe for constructing the Annotator.
 	The only way an Annotator is allowed to communicate Annotations is the
 	return value of :method:annotate. The Annotations output must be
 	"reasonable", meaning that they should not be too complex. The intention is
 	that an AnnotatedTopology could be serialized to a simple format like JSON
 	for use by other programs. Using basic types like scalars, lists and
 	dictionaries is probably safe, but using sets is not (please, think of the
 	~~kittens~~ JSON and use dicts with 1 as a value.). Once the Annotation is
 	output, it must not be modified (and reference to it should not be kept)."""
 	idempotent: bool = False
 	@abstractmethod
 	def __init__(self, param: None | Hashable): ...
 	@abstractmethod
 	def annotate(self, topology: AnnotatedTopology) -> Annotation: ...