Newer
Older
"""
This file contains the database models
"""
Jörn Künsemöller
committed
from django.db.models import ForeignKey, CharField, DateTimeField, BooleanField, IntegerField, BigIntegerField, \
ManyToManyField, CASCADE, SET_NULL, PROTECT
from django.contrib.postgres.fields import ArrayField, JSONField
from .specification import AbstractChoice, BasicCommon, Template, NamedCommon # , <TaskBlueprint
from enum import Enum
Jörn Künsemöller
committed
from rest_framework.serializers import HyperlinkedRelatedField
#
# I/O
#
Jörn Künsemöller
committed
class SubtaskConnector(BasicCommon):
"""
Represents the relation between input and output of the Subtasks. Some of these relations implement the Task
Relations. An input is tied to an output of another Subtask, and allows a filter to be specified.
"""
role = ForeignKey('Role', null=False, on_delete=PROTECT)
datatype = ForeignKey('Datatype', null=False, on_delete=PROTECT)
dataformats = ManyToManyField('Dataformat', blank=True)
Jörn Künsemöller
committed
output_of = ForeignKey('SubtaskTemplate', related_name='inputs', blank=True, on_delete=PROTECT)
input_of = ForeignKey('SubtaskTemplate', related_name='outputs', blank=True, on_delete=PROTECT)
#
# Choices
#
class SubtaskState(AbstractChoice):
Jörn Künsemöller
committed
"""Defines the model and predefined list of possible SubtaskStatusChoice's for Subtask.
The items in the Choices class below are automagically populated into the database via a data migration."""
class Choices(Enum):
Jörn Künsemöller
committed
DEFINING = "defining"
DEFINED = "defined"
SCHEDULING = "scheduling"
SCHEDULED = "scheduled"
QUEUEING = "queueing"
QUEUED = "queued"
STARTING = "starting"
STARTED = "started"
FINISHING = "finishing"
FINISHED = "finished"
CANCELLING = "cancelling"
CANCELLED = "cancelled"
ERROR = "error"
class SubtaskType(AbstractChoice):
"""Defines the model and predefined list of possible SubtaskType's for Subtask.
Jörn Künsemöller
committed
The items in the Choices class below are automagically populated into the database via a data migration."""
class Choices(Enum):
OBSERVATION = "observation"
PIPELINE = "pipeline"
INSPECTION = "inspection"
DELETION = "deletion"
Jörn Künsemöller
committed
MANUAL = 'manual'
OTHER = 'other'
class StationType(AbstractChoice):
"""Defines the model and predefined list of possible StationType's for AntennaSet.
Jörn Künsemöller
committed
The items in the Choices class below are automagically populated into the database via a data migration."""
class Choices(Enum):
CORE = "core"
REMOTE = "remote"
INTERNATIONAL = "international"
Jörn Künsemöller
committed
class Algorithm(AbstractChoice):
"""Defines the model and predefined list of possible Algorithm's for DataproductHash.
The items in the Choices class below are automagically populated into the database via a data migration."""
class Choices(Enum):
MD5 = 'md5'
AES256 = 'aes256'
class ScheduleMethod(AbstractChoice):
"""Defines the model and predefined list of possible Algorithm's for DataproductHash.
The items in the Choices class below are automagically populated into the database via a data migration."""
class Choices(Enum):
MANUAL = 'manual'
BATCH = 'batch'
DYNAMIC = 'dynamic'
#
# Templates
#
Jörn Künsemöller
committed
class SubtaskTemplate(Template):
type = ForeignKey('SubtaskType', null=False, on_delete=PROTECT)
queue = BooleanField(default=False)
realtime = BooleanField(default=False)
Jörn Künsemöller
committed
class DefaultSubtaskTemplate(BasicCommon):
name = CharField(max_length=128, unique=True)
Jörn Künsemöller
committed
template = ForeignKey('SubtaskTemplate', on_delete=PROTECT)
class DataproductSpecificationsTemplate(Template):
pass
class DefaultDataproductSpecificationsTemplate(BasicCommon):
name = CharField(max_length=128, unique=True)
template = ForeignKey('DataproductSpecificationsTemplate', on_delete=PROTECT)
Jörn Künsemöller
committed
class SubtaskInputSelectionTemplate(Template):
pass
# todo: so we need to specify a default?
Jörn Künsemöller
committed
class DataproductFeedbackTemplate(Template):
pass
# todo: do we need to specify a default?
Jörn Künsemöller
committed
#
# Instance Objects
#
Jörn Künsemöller
committed
class Subtask(BasicCommon):
"""
Represents a low-level task, which is an atomic unit of execution, such as running an observation, running
inspection plots on the observed data, etc. Each task has a specific configuration, will have resources allocated
to it, and represents a single run.
"""
start_time = DateTimeField(null=True, help_text='Start this subtask at the specified time (NULLable).')
stop_time = DateTimeField(null=True, help_text='Stop this subtask at the specified time (NULLable).')
state = ForeignKey('SubtaskState', null=False, on_delete=PROTECT, related_name='task_states', help_text='Subtask state (see Subtask State Machine).')
specifications_doc = JSONField(help_text='Final specifications, as input for the controller.')
Jörn Künsemöller
committed
task_blueprint = ForeignKey('TaskBlueprint', related_name='subtasks', null=True, on_delete=SET_NULL, help_text='Task Blueprint to which this Subtask belongs.')
specifications_template = ForeignKey('SubtaskTemplate', null=False, on_delete=PROTECT, help_text='Schema used for specifications_doc.')
do_cancel = DateTimeField(null=True, help_text='Timestamp when the subtask has been ordered to cancel (NULLable).')
priority = IntegerField(help_text='Absolute priority of this subtask (higher value means more important).')
schedule_method = ForeignKey('ScheduleMethod', null=False, on_delete=PROTECT, help_text='Which method to use for scheduling this Subtask. One of (MANUAL, BATCH, DYNAMIC).')
cluster = ForeignKey('Cluster', null=True, on_delete=PROTECT, help_text='Where the Subtask is scheduled to run (NULLable).')
scheduler_input_doc = JSONField(help_text='Partial specifications, as input for the scheduler.')
Jörn Künsemöller
committed
# resource_claim = ForeignKey("ResourceClaim", null=False, on_delete=PROTECT) # todo <-- how is this external reference supposed to work?
Jörn Künsemöller
committed
class SubtaskInput(BasicCommon):
subtask = ForeignKey('Subtask', null=False, on_delete=CASCADE, help_text='Subtask to which this input specification refers.')
task_relation_blueprint = ForeignKey('TaskRelationBlueprint', null=True, on_delete=SET_NULL, help_text='Task Relation Blueprint which this Subtask Input implements (NULLable).')
connector = ForeignKey('SubtaskConnector', null=True, on_delete=SET_NULL, help_text='Which connector this Task Input implements.')
producer = ForeignKey('SubtaskOutput', on_delete=PROTECT, help_text='The Subtask Output providing the input dataproducts.')
dataproducts = ManyToManyField('Dataproduct', help_text='The Dataproducts resulting from application of the filter at time of scheduling Although the dataproducts are simply the result of applying the filter on immutable data, the filter application could change over time. We thus store the result of this filtering directly to retain which input was specified for the task..')
selection_doc = JSONField(help_text='Filter to apply to the dataproducts of the producer, to derive input dataproducts when scheduling.')
selection_template = ForeignKey('SubtaskInputSelectionTemplate', on_delete=PROTECT, help_text='Schema used for selection_doc.')
Jörn Künsemöller
committed
class SubtaskOutput(BasicCommon):
subtask = ForeignKey('Subtask', null=False, on_delete=CASCADE, help_text='Subtask to which this output specification refers.')
connector = ForeignKey('SubtaskConnector', null=True, on_delete=SET_NULL, help_text='Which connector this Subtask Output implements.')
class Dataproduct(BasicCommon):
"""
A data product represents an atomic dataset, produced and consumed by subtasks. The consumed dataproducts are those
resulting from interpreting the Subtask Connector filters of the inputs. These links are explicitly saved, should
the interpretation of the filter change over time. The produced dataproducts enumerate everything produced by a
Subtask.
"""
filename = CharField(max_length=128, help_text='Name of the file (or top-level directory) of the dataproduct. Adheres to a naming convention, but is not meant for parsing.')
directory = CharField(max_length=1024, help_text='Directory where this dataproduct is (to be) stored.')
dataformat = ForeignKey('Dataformat', null=False, on_delete=PROTECT)
deleted_since = DateTimeField(null=True, help_text='When this dataproduct was removed from disk, or NULL if not deleted (NULLable).')
pinned_since = DateTimeField(null=True, help_text='When this dataproduct was pinned to disk, that is, forbidden to be removed, or NULL if not pinned (NULLable).')
specifications_doc = JSONField(help_text='Dataproduct properties (f.e. beam, subband), to distinguish them when produced by the same task, and to act as input for selections in the Task Input and Work Request Relation Blueprint objects.')
specifications_template = ForeignKey('DataproductSpecificationsTemplate', null=False, on_delete=CASCADE, help_text='Schema used for specifications_doc.')
producer = ForeignKey('SubtaskOutput', on_delete=PROTECT, help_text='Subtask Output which generates this dataproduct.')
do_cancel = DateTimeField(null=True, help_text='When this dataproduct was cancelled (NULLable). Cancelling a dataproduct triggers cleanup if necessary.')
expected_size = BigIntegerField(null=True, help_text='Expected size of dataproduct size, in bytes. Used for scheduling purposes. NULL if size is unknown (NULLable).')
size = BigIntegerField(null=True, help_text='Dataproduct size, in bytes. Used for accounting purposes. NULL if size is (yet) unknown (NULLable).')
feedback_doc = JSONField(help_text='Dataproduct properties, as reported by the producing process.')
feedback_template = ForeignKey('DataproductFeedbackTemplate', on_delete=PROTECT, help_text='Schema used for feedback_doc.')
class AntennaSet(NamedCommon):
station_type = ForeignKey('StationType', null=False, on_delete=PROTECT)
rcus = ArrayField(IntegerField(), size=128, blank=False)
inputs = ArrayField(CharField(max_length=128), size=128, blank=True)
Jörn Künsemöller
committed
class DataproductTransform(BasicCommon):
"""
Each output dataproduct of a Subtask is linked to the input dataproducts that are used to produce it.
These transforms encode the provenance information needed when tracking dependencies between dataproducts.
"""
input = ForeignKey('Dataproduct', related_name='inputs', on_delete=PROTECT, help_text='A dataproduct that was the input of a transformation.')
output = ForeignKey('Dataproduct', related_name='outputs', on_delete=PROTECT, help_text='A dataproduct that was produced from the input dataproduct.')
identity = BooleanField(help_text='TRUE if this transform only copies, tars, or losslessly compresses its input, FALSE if the transform changes the data. Allows for efficient reasoning about data duplication.')
Jörn Künsemöller
committed
class Filesystem(NamedCommon):
capacity = BigIntegerField(help_text='Capacity in bytes')
cluster = ForeignKey('Cluster', on_delete=PROTECT, help_text='Cluster hosting this filesystem.')
Jörn Künsemöller
committed
class Cluster(NamedCommon):
location = CharField(max_length=128, help_text='Human-readable location of the cluster.')
Jörn Künsemöller
committed
class DataproductArchiveInfo(BasicCommon):
dataproduct = ForeignKey('Dataproduct', on_delete=PROTECT, help_text='A dataproduct residing in the archive.')
storage_ticket = CharField(max_length=128, help_text='Archive-system identifier.')
public_since = DateTimeField(null=True, help_text='Dataproduct is available for public download since this moment, or NULL if dataproduct is not (NULLable).')
corrupted_since = DateTimeField(null=True, help_text='Earliest timestamp from which this dataproduct is known to be partially or fully corrupt, or NULL if dataproduct is not known to be corrupt (NULLable).')
Jörn Künsemöller
committed
class DataproductHash(BasicCommon):
dataproduct = ForeignKey('Dataproduct', on_delete=PROTECT, help_text='The dataproduct to which this hash refers.')
algorithm = ForeignKey('Algorithm', null=False, on_delete=PROTECT, help_text='Algorithm used (MD5, AES256).')
hash = CharField(max_length=128, help_text='Hash value.')