#!/usr/bin/env python

# Copyright (C) 2015-2017
# ASTRON (Netherlands Institute for Radio Astronomy)
# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
#
# This file is part of the LOFAR software suite.
# The LOFAR software suite is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# The LOFAR software suite is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
#
# $Id: resource_assigner.py 1580 2015-09-30 14:18:57Z loose $

"""
ResourceAssigner inserts/updates tasks and assigns resources to it based on incoming parset.
"""

import logging
from datetime import datetime, timedelta

from lofar.common.cache import cache
from lofar.common.datetimeutils import parseDatetime
from lofar.messaging.messages import EventMessage
from lofar.messaging.messagebus import ToBus
from lofar.messaging.RPC import RPC
from lofar.parameterset import parameterset

from lofar.sas.resourceassignment.resourceassigner.schedulechecker import movePipelineAfterItsPredecessors

from lofar.sas.resourceassignment.resourceassignmentservice.rpc import RARPC
from lofar.sas.resourceassignment.resourceassignmentservice.config import DEFAULT_BUSNAME as RADB_BUSNAME
from lofar.sas.resourceassignment.resourceassignmentservice.config import DEFAULT_SERVICENAME as RADB_SERVICENAME

from lofar.sas.resourceassignment.resourceassignmentestimator.config import DEFAULT_BUSNAME as RE_BUSNAME
from lofar.sas.resourceassignment.resourceassignmentestimator.config import DEFAULT_SERVICENAME as RE_SERVICENAME

from lofar.sas.otdb.otdbrpc import OTDBRPC
from lofar.sas.otdb.config import DEFAULT_OTDB_SERVICE_BUSNAME, DEFAULT_OTDB_SERVICENAME

from lofar.sas.resourceassignment.resourceassigner.config import DEFAULT_RA_NOTIFICATION_BUSNAME
from lofar.sas.resourceassignment.resourceassigner.config import DEFAULT_RA_NOTIFICATION_PREFIX

from lofar.sas.resourceassignment.resourceassigner.resource_availability_checker import ResourceAvailabilityChecker
from lofar.sas.resourceassignment.resourceassigner.schedulers import DwellScheduler

from lofar.mom.momqueryservice.momqueryrpc import MoMQueryRPC
from lofar.mom.momqueryservice.config import DEFAULT_MOMQUERY_BUSNAME, DEFAULT_MOMQUERY_SERVICENAME

from lofar.sas.datamanagement.storagequery.rpc import StorageQueryRPC
from lofar.sas.datamanagement.storagequery.config import DEFAULT_BUSNAME as DEFAULT_STORAGEQUERY_BUSNAME
from lofar.sas.datamanagement.storagequery.config import DEFAULT_SERVICENAME as DEFAULT_STORAGEQUERY_SERVICENAME

from lofar.sas.datamanagement.cleanup.rpc import CleanupRPC
from lofar.sas.datamanagement.cleanup.config import DEFAULT_BUSNAME as DEFAULT_CLEANUP_BUSNAME
from lofar.sas.datamanagement.cleanup.config import DEFAULT_SERVICENAME as DEFAULT_CLEANUP_SERVICENAME

logger = logging.getLogger(__name__)


class ResourceAssigner(object):
    """
    The ResourceAssigner inserts new tasks or updates existing tasks in the RADB and assigns resources to it based on
    a task's parset.
    """

    def __init__(self,
                 radb_busname=RADB_BUSNAME,
                 radb_servicename=RADB_SERVICENAME,
                 re_busname=RE_BUSNAME,
                 re_servicename=RE_SERVICENAME,
                 otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                 otdb_servicename=DEFAULT_OTDB_SERVICENAME,
                 storagequery_busname=DEFAULT_STORAGEQUERY_BUSNAME,
                 storagequery_servicename=DEFAULT_STORAGEQUERY_SERVICENAME,
                 cleanup_busname=DEFAULT_CLEANUP_BUSNAME,
                 cleanup_servicename=DEFAULT_CLEANUP_SERVICENAME,
                 ra_notification_busname=DEFAULT_RA_NOTIFICATION_BUSNAME,
                 ra_notification_prefix=DEFAULT_RA_NOTIFICATION_PREFIX,
                 mom_busname=DEFAULT_MOMQUERY_BUSNAME,
                 mom_servicename=DEFAULT_MOMQUERY_SERVICENAME,
                 broker=None,
                 radb_dbcreds=None):
        """
        Creates a ResourceAssigner instance

        :param radb_busname: name of the bus on which the radb service listens (default: lofar.ra.command)
        :param radb_servicename: name of the radb service (default: RADBService)
        :param re_busname: name of the bus on which the resource estimator service listens (default: lofar.ra.command)
        :param re_servicename: name of the resource estimator service (default: ResourceEstimation)
        :param otdb_busname: name of the bus on which OTDB listens (default: lofar.otdb.command)
        :param otdb_servicename: name of the OTDB service (default: OTDBService)
        :param storagequery_busname: name of the bus on which the StorageQueryService listens
                                     (default: lofar.dm.command)
        :param storagequery_servicename: name of the StorageQueryService (default: StorageQueryService)
        :param cleanup_busname: name of the bus on which the cleanup service listens (default: lofar.dm.command)
        :param cleanup_servicename: name of the CleanupService (default: CleanupService)
        :param ra_notification_busname: name of the bus on which the ResourceAssigner notifies registered parties
                                        (default: lofar.ra.notification)
        :param ra_notification_prefix: prefix used in notification message subject (default: ResourceAssigner.)
        :param mom_busname: name of the bus on which MOM listens for queries (default: lofar.ra.command)
        :param mom_servicename: name of the MOMQueryService (default: momqueryservice)
        :param broker: Valid Qpid broker host (default: None, which means localhost)
        :param radb_dbcreds: the credentials to be used for accessing the RADB (default: None, which means default)
        """

        self.radbrpc = RARPC(servicename=radb_servicename, busname=radb_busname, broker=broker, timeout=180)
        self.rerpc = RPC(re_servicename, busname=re_busname, broker=broker, ForwardExceptions=True, timeout=180)
        self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=broker, timeout=180)
        self.momrpc = MoMQueryRPC(servicename=mom_servicename, busname=mom_busname, broker=broker, timeout=180)
        self.sqrpc = StorageQueryRPC(busname=storagequery_busname, servicename=storagequery_servicename, broker=broker)
        self.curpc = CleanupRPC(busname=cleanup_busname, servicename=cleanup_servicename, broker=broker)
        self.ra_notification_bus = ToBus(address=ra_notification_busname, broker=broker)
        self.ra_notification_prefix = ra_notification_prefix

        self.resource_availability_checker = ResourceAvailabilityChecker(self.radbrpc)

        # For the DwellScheduler instances created during run-time we store the following variables
        self.radb_creds = radb_dbcreds
        self.broker = broker

    def __enter__(self):
        """Internal use only. (handles scope 'with')"""
        self.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Internal use only. (handles scope 'with')"""
        self.close()

    def open(self):
        """Open rpc connections to radb service and resource estimator service"""
        self.radbrpc.open()
        self.rerpc.open()
        self.otdbrpc.open()
        self.momrpc.open()
        self.sqrpc.open()
        self.curpc.open()
        self.ra_notification_bus.open()

    def close(self):
        """Close rpc connections to radb service and resource estimator service"""
        self.radbrpc.close()
        self.rerpc.close()
        self.otdbrpc.close()
        self.momrpc.close()
        self.sqrpc.close()
        self.curpc.close()
        self.ra_notification_bus.close()

    @property
    @cache
    def resource_types(self):
        """ Returns a dict of all the resource types, to convert name->id. """

        return {rt['name']: rt['id'] for rt in self.radbrpc.getResourceTypes()}

    def do_assignment(self, otdb_id, specification_tree):
        """
        Makes the given task known to RADB and attempts to assign (schedule) the its requested resources.

        If no list of requested resources could be determined for the task, its status will be set to "error" in RADB.
        If such list can be obtained but it is impossible to assign the requested resources, the task is in conflict
        with other tasks, hence its status will be set to "conflict" in RADB. If all requested resources are
        successfully assigned, its status will be put to "scheduled" in RADB.

        :param otdb_id: OTDB ID of the main task which resources need to be assigned
        :param specification_tree: the specification tree containing the main task and its resources

        :raises an Exception if something unforeseen happened while scheduling
        """

        logger.info(('do_assignment: otdb_id=%s specification_tree=%s' % (otdb_id, specification_tree)))

        # Make the task known to RADB
        task_id, task_type, task_status, task = self._insert_specification_into_radb(otdb_id, specification_tree)

        # Don't perform any scheduling for tasks that are already approved. Do this check after insertion of
        # specification, task and predecessor/successor relations, so approved tasks appear correctly in the web
        # scheduler.
        if task_status == 'approved':
            logger.info('Task otdb_id=%s is already approved, no resource assignment needed' % otdb_id)
        else:
            requested_resources = self._get_resource_estimates(specification_tree, otdb_id, task_type, task_id)
            if requested_resources is None:
                # No resource requests available, so change task status to "error" and notify our subscribers
                self._update_task_status(task, 'error')
                self._send_task_status_notification(task, 'error')
            else:
                if self._schedule_resources(task_id, specification_tree, requested_resources):
                    # Cleanup the data of any previous run of the task
                    self._cleanup_earlier_generated_data(otdb_id, task)

                    # Scheduling of resources for this task succeeded, so change task status to "scheduled" and notify
                    # our subscribers
                    self._update_task_status(task, 'scheduled')
                    self._send_task_status_notification(task, 'scheduled')
                else:
                    # Scheduling of resources for this task failed,
                    # check if any of the claims has status conflict,
                    # and hence (by the radb triggers) the task has status conflict as well
                    # if task not in conflict, then there was a specification/scheduling error
                    # so put task status to error (not conflict!)
                    if self.radbrpc.getTask(task['id'])['status'] == 'conflict':
                        # Updating the task status when it is already in 'conflict' seems unnecessary, but in order to
                        # satisfy the existing unit tests we put it in here.
                        # TODO: discuss if this can be removed
                        self._update_task_status(task, 'conflict')

                        # No need for a status change, but do notify our subscribers
                        self._send_task_status_notification(task, 'conflict')
                    else:
                        # The task is in an unexpected state, so force it to 'error' state and notify our subscribers
                        self._update_task_status(task, 'error')
                        self._send_task_status_notification(task, 'error')

    def _insert_specification_into_radb(self, otdb_id, specification_tree):
        """
        Tries to inserts the task's specification into RADB along with any of its predecessors and successors.

        :param otdb_id: the main task's OTDB ID
        :param specification_tree: the main task's specification

        :return: A 4-tuple (task_id, task_type, task_status, task) if the task's specification is successfully inserted
                 into RADB.

        :raises Exception if a task can't be inserted into RADB
        """

        task_status = self._get_is_assignable(otdb_id, specification_tree)

        task_type, start_time, end_time, cluster_name = self._prepare_to_insert_main_task(otdb_id, specification_tree)

        task_id, task = self._insert_main_task(specification_tree, start_time, end_time, cluster_name)

        self._link_predecessors_to_task_in_radb(task)
        self._link_successors_to_task_in_radb(task)

        logger.info('Successfully inserted main task and its predecessors and successors into RADB: task=%s', task)

        return task_id, task_type, task_status, task

    def _get_is_assignable(self, otdb_id, specification_tree):
        """
        Verifies if a task can actually be assigned by looking at its status. Raises an exception if the task is not
        assignable.

        :param otdb_id: ORDB ID of the task
        :param specification_tree: the specification tree of the task

        :returns the task's status if it is assignable
        :raises Exception if it can't be assigned
        """

        assignable_task_states = ['approved', 'prescheduled']
        status = specification_tree.get('state', '').lower()
        if status in assignable_task_states:
            logger.info('Task otdb_id=%s with status \'%s\' is assignable' % (otdb_id, status))
        else:
            assignable_task_states_str = ', '.join(assignable_task_states)
            logger.warn('Task otdb_id=%s with status \'%s\' is not assignable. Allowed statuses are %s' %
                        (otdb_id, status, assignable_task_states_str))

            message = "doAssignment: Unsupported status '%s' of task with OTDB ID: %s" % (status, otdb_id)
            raise Exception(message)

        return status

    def _prepare_to_insert_main_task(self, otdb_id, specification_tree):
        """
        Prepares for insertion of the main task by extracting start_time, end_time, and cluster_name from its
        specification.

        :param otdb_id: the main task's OTDB ID
        :param specification_tree: the main task's specification

        :return: 4-tuple (task_type, start_time, end_time, cluster_name) of the task prepared for RADB insertion
        """

        main_parset = self._get_main_parset(specification_tree)
        task_type, _ = self._get_task_type(specification_tree)
        cluster_name = self._get_clustername(otdb_id, main_parset, task_type)
        start_time, end_time = self._get_main_task_start_and_end_times(specification_tree)

        logger.info('preparations for inserting main task into RADB successful')

        return task_type, start_time, end_time, cluster_name

    def _update_task_status(self, task, new_status):
        """
        Finishes the resource assignment by updating a task's status in RADB and sending out a corresponding
        notification to registered parties on the Resource Assigner notification bus.

        :param task: the task at hand
        :param new_status: the new status to set the task to in RADB

        :raises Exception if updating RADB fails
        """

        logger.info('Updating task status for task_id=%s, new status=%s' % (task['id'], new_status))
        self.radbrpc.updateTask(task['id'], task_status=new_status)

    def _send_task_status_notification(self, task, status):
        """
        Sends a message about the task's status on the RA notification bus

        :param task:    the task concerned
        :param status:  the task's status

        :raises Exception if sending the notification fails
        """

        content = {
            'radb_id': task['id'],
            'otdb_id': task['otdb_id'],
            'mom_id': task['mom_id']
        }
        subject = 'Task' + status[0].upper() + status[1:]
        event_message = EventMessage(context=self.ra_notification_prefix + subject, content=content)

        logger.info('Sending notification %s: %s' % (subject, str(content).replace('\n', ' ')))
        self.ra_notification_bus.send(event_message)

    def _get_main_parset(self, specification_tree):
        """
        Extracts the main task's parset from a specification tree

        :param specification_tree: the task's specification tree

        :returns the main parset
        """

        return parameterset(specification_tree['specification'])

    def _get_task_type(self, specification_tree):
        """
        Extracts the task's type and subtype (if applicable) from a specification tree

        :param specification_tree: specification_tree: the task's specification tree

        :return: 2-tuple (task_type, task_subtype)
        """

        task_type = specification_tree['task_type']  # is required item
        if 'task_subtype' in specification_tree:  # is optional item
            task_subtype = specification_tree['task_subtype']
        else:
            task_subtype = ''

        return task_type, task_subtype

    def _get_clustername(self, otdb_id, parset, task_type):
        """
        Determines the name of the cluster to which to store the task's output - if it produces output at all that is.

        :param otdb_id: the ORDB ID of the task
        :param parset: the parset of the task
        :param task_type: the task's type

        :returns The name of the output cluster, or an empty string if none is applicable
        :raises Exception if the storage cluster required by the task is unknown to the system
        """

        cluster_name = ''
        if task_type not in ('reservation',):
            # Only assign resources for task output to known clusters
            cluster_name_set = self._get_cluster_names(parset)

            if str() in cluster_name_set or len(cluster_name_set) != 1:
                # Empty set or name is always an error.
                # TODO: To support >1 cluster per obs,
                # self.radbrpc.insertSpecificationAndTask() as called below and the radb would need to take >1
                # cluster name/ Also, there is only 1 processingClusterName in the parset, but we do not always want to
                # pipeline process all obs outputs, or not on 1 cluster
                logger.error(
                    'clusterNameSet must have a single non-empty name for all enabled DataProducts, but is: %s' %
                    cluster_name_set
                )
            else:
                cluster_name = cluster_name_set.pop()

                # Retrieve known cluster names (not all may be a valid storage target, but we cannot know...)
                known_cluster_set = {cluster['name'] for cluster in self.radbrpc.getResourceGroupNames('cluster')}
                logger.info('known clusters: %s', known_cluster_set)
                if cluster_name not in known_cluster_set:
                    raise Exception("skipping resource assignment for task with cluster name '" + cluster_name +
                                    "' not in known clusters " + str(known_cluster_set))
                else:
                    # fix for MoM bug introduced before NV's holiday
                    # MoM sets ProcessingCluster.clusterName to CEP2 even when inputxml says CEP4
                    # so, override it here if needed, and update to otdb
                    processing_cluster_name = parset.getString('Observation.Cluster.ProcessingCluster.clusterName',
                                                               '')
                    if processing_cluster_name != cluster_name:
                        logger.info('overwriting and uploading processingClusterName to otdb from \'%s\' to \'%s\' '
                                    'for otdb_id=%s', processing_cluster_name, cluster_name, otdb_id)
                        self.otdbrpc.taskSetSpecification(
                            otdb_id,
                            {'LOFAR.ObsSW.Observation.Cluster.ProcessingCluster.clusterName': cluster_name}
                        )

        return cluster_name

    def _get_cluster_names(self, parset):
        """
        Get the storage cluster names for all enabled output data product types in parset

        :param parset: the task's parset

        :raises Exception if an enabled output data product type has no storage cluster name specified.
        """
        cluster_names = set()

        keys = ['Output_Correlated',
                'Output_IncoherentStokes',
                'Output_CoherentStokes',
                'Output_InstrumentModel',
                'Output_SkyImage',
                'Output_Pulsar']
        for key in keys:
            if parset.getBool('Observation.DataProducts.%s.enabled' % key, False):
                # may raise; don't pass default arg
                name = parset.getString('Observation.DataProducts.%s.storageClusterName' % key)
                cluster_names.add(name)

        return cluster_names

    def _get_main_task_start_and_end_times(self, specification_tree):
        """
        Get the start time and end time of the main task modified such that (a) there's a period of 3 minutes between
        tasks and (b) the start time and end time are actually in the future.

        If the start time lies in the past or is not specified it is set to 3 minutes from the current time. The new end
        time in that case is calculated using the specified duration or, if that is not specified, from the original
        difference between start and end time. When a duration can't be determined the end time will be set to 1 hour
        after the start time.

        :param specification_tree: specification tree for the main task

        :returns 2-tuple (start_time, end_time) both in datetime format
        """

        def _get_start_and_end_times_from_parset(_parset):
            """
            Extract the start and end times from a parset

            :param _parset: the parset
            :return: A 2-tuple (start_time, end_time). start_time and end_time are returned as None when they were not
                     specified, or where specified in a wrong format.
            """

            logger.debug('_get_start_and_end_time_from_parset - parset: %s', _parset)

            try:
                parset_start_time = parseDatetime(_parset.getString('Observation.startTime'))
            except ValueError or KeyError:
                # Too bad no valid start time is specified!
                parset_start_time = None

            try:
                parset_end_time = parseDatetime(_parset.getString('Observation.stopTime'))
            except ValueError or KeyError:
                # Too bad no valid end time is specified!
                parset_end_time = None

            return parset_start_time, parset_end_time

        def _get_duration_from_parset(_parset):
            """
            Preferably use the duration specified by the parset. If that's not available, calculate the duration from
            the difference between start/end times. If that's also impossible, fall back to a default duration

            :param _parset: the task's parset containing start/end times and durations (usually)

            :returns the obtained, calculated, or default duration
            """

            try:
                duration = timedelta(seconds=_parset.getInt('Observation.Scheduler.taskDuration'))
            except Exception:
                _start_time, _end_time = _get_start_and_end_times_from_parset(_parset)

                if _start_time is not None and _end_time is not None and _start_time < _end_time:
                    duration = _end_time - _start_time
                else:
                    duration = timedelta(hours=1)

            return duration

        # TODO: add unit tests that verify the task_types logic
        def _get_need_to_push_back_start_and_end_times(_start_time, _end_time):
            """
            Determines whether or not a task's start/end times need to be pushed back in time

            :param _start_time: the task's start time
            :param _end_time:  the task's end time

            :return: True if start/end times need to be pushed back, False otherwise
            """

            task_type, _ = self._get_task_type(specification_tree)

            # The start time of reservations and maintenance tasks are allowed to lie in the past
            if task_type in ['reservation', 'maintenance']:
                do_push_back = False
            else:
                do_push_back = _start_time is None or \
                               _end_time is None or \
                               _start_time < datetime.utcnow()

            return do_push_back

        def _push_back_start_time_to_not_overlap_predecessors(_start_time, _specification_tree):
            """
            Determines a new start time for a task when the current start time of that task overlaps with its
            predecessors.

            :param _start_time: the task's start time
            :param _specification_tree: the specification tree holding both the task's information and information about
                                        its predecessors/successors etcetera.

            :return: The updated start time
            """

            pushed_back_start_time = _start_time

            # Make sure the start time lies past the end time of the task's predecessors
            max_predecessor_end_time = self._get_maximum_predecessor_end_time(_specification_tree)
            if max_predecessor_end_time and max_predecessor_end_time > _start_time:
                pushed_back_start_time = max_predecessor_end_time + timedelta(minutes=3)

            return pushed_back_start_time

        def _store_changed_start_and_end_times_to_otdb(_start_time, _end_time, _otdb_id):
            """
            Stores the modified start/end times to the OTDB

            :param _start_time: the task's start time
            :param _end_time:  the task's end time
            :param _otdb_id: the task's OTDB ID
            """

            logger.info('uploading auto-generated start/end time  (%s, %s) to otdb for otdb_id=%s',
                        _start_time, _end_time, _otdb_id)

            self.otdbrpc.taskSetSpecification(
                _otdb_id, {
                    'LOFAR.ObsSW.Observation.startTime': _start_time.strftime('%Y-%m-%d %H:%M:%S'),
                    'LOFAR.ObsSW.Observation.stopTime': _end_time.strftime('%Y-%m-%d %H:%M:%S')
                }
            )

        main_parset = self._get_main_parset(specification_tree)
        start_time, end_time = _get_start_and_end_times_from_parset(main_parset)

        # TODO: don't fix this crap here. Bad start/stop time has to go to error, like any other bad spec part.
        if _get_need_to_push_back_start_and_end_times(start_time, end_time):
            # Make sure the start time lies in the future and doesn't overlap with any predecessors
            if start_time is None or start_time < datetime.utcnow():
                start_time = datetime.utcnow() + timedelta(minutes=3)
            start_time = _push_back_start_time_to_not_overlap_predecessors(start_time, specification_tree)

            end_time = start_time + _get_duration_from_parset(main_parset)

            otdb_id = specification_tree['otdb_id']
            logger.warning('Applying sane defaults (%s, %s) for start/end time from specification for otdb_id=%s',
                           start_time, end_time, otdb_id)

            _store_changed_start_and_end_times_to_otdb(start_time, end_time, otdb_id)

        return start_time, end_time

    def _get_dwell_parameters(self, specification_tree):
        """
        Obtains the minimum start time, maximum end time and minimum/maximum duration. If they aree not specified in the
        original specification starttime, endtime, duration, duration will be returned respectively instead. (see
        momqueryservice)

        :param specification_tree: specification tree for the main task
        :return: 4-tuple (min_starttime, max_endtime, min_duration, max_duration)
        """

        main_parset = self._get_main_parset(specification_tree)
        mom_id = main_parset.getInt('Observation.momID', -1)
        time_restrictions = self.momrpc.get_time_restrictions(mom_id)

        format = "%Y-%m-%dT%H:%M:%S"
        min_startime = datetime.strptime(time_restrictions["minStartTime"], format) if time_restrictions["minStartTime"] else None
        max_endtime = datetime.strptime(time_restrictions["maxEndTime"], format) if time_restrictions["maxEndTime"] else None
        min_duration = int(time_restrictions["minDuration"]) if time_restrictions["minDuration"] else None
        max_duration = int(time_restrictions["maxDuration"]) if time_restrictions["maxDuration"] else None

        return min_startime, max_endtime, min_duration, max_duration

    def _insert_main_task(self, specification_tree, start_time, end_time, cluster_name):
        """
        Inserts the main task and its specification into the RADB. Any existing specification and task with same
        otdb_id will be deleted automatically.

        :param specification_tree: the task's specification tree
        :param start_time: the task's start time
        :param end_time: the task's end time
        :param cluster_name: the task's cluster name

        :return: 2-tuple (task_id, task) of the inserted task
        :raises Exception if there's an unforeseen problem while inserting the task and its specifications into RADB
        """

        task_type, _ = self._get_task_type(specification_tree)
        main_parset = self._get_main_parset(specification_tree)
        mom_id = main_parset.getInt('Observation.momID', -1)
        status = specification_tree.get('state', '').lower()
        otdb_id = specification_tree['otdb_id']
        logger.info(
            'insertSpecification mom_id=%s, otdb_id=%s, status=%s, task_type=%s, start_time=%s, end_time=%s '
            'cluster=%s' % (mom_id, otdb_id, status, task_type, start_time, end_time, cluster_name)
        )

        result = self.radbrpc.insertSpecificationAndTask(mom_id, otdb_id, status, task_type, start_time, end_time,
                                                         str(main_parset), cluster_name)

        specification_id = result['specification_id']
        task_id = result['task_id']
        logger.info('inserted specification (id=%s) and task (id=%s)' % (specification_id, task_id))

        task = self.radbrpc.getTask(task_id)  # if task_id is not None else None

        return task_id, task

    def _get_resource_estimates(self, specification_tree, otdb_id, task_type, task_id):
        """
        Obtains the resource estimates from the Resource Estimator for the main task in the specification tree and
        validates them.

        :param specification_tree: the task's specification tree
        :param otdb_id: the task's OTDB ID
        :param task_type: the task's type
        :param task_id: the task's ID

        :return A list of resource estimates for the given task or None in case none could be obtained or if the
                validation failed.
        """

        try:
            re_reply, rerpc_status = self.rerpc({"specification_tree": specification_tree}, timeout=10)
            logger.info('Resource Estimator reply = %s', re_reply)

            if str(otdb_id) not in re_reply:
                raise ValueError("no otdb_id %s found in estimator results %s" % (otdb_id, re_reply))
            estimates = re_reply[str(otdb_id)]

            if task_type not in estimates:
                raise ValueError("no task type %s found in estimator results %s" % (task_type, estimates))
            estimates = estimates[task_type]

            if 'errors' in estimates and estimates['errors']:
                for error in estimates['errors']:
                    logger.error("Error from Resource Estimator: %s", error)
                raise ValueError("Error(s) in estimator for otdb_id=%s radb_id=%s" % (otdb_id, task_id))

            if 'estimates' not in estimates or any('resource_types' not in est for est in estimates['estimates']):
                raise ValueError("missing 'resource_types' in 'estimates' in estimator results: %s" % estimates)
            estimates = estimates['estimates']

            if not all(est_val > 0 for est in estimates for est_val in est['resource_types'].values()):
                # Avoid div by 0 and inf looping from estimate <= 0 later on.
                raise ValueError("at least one of the estimates is not a positive number")
        except Exception as e:
            estimates = None

            logger.error('An exception occurred while obtaining resource estimates. Exception=%s' % str(e))

        return estimates

    def _schedule_resources(self, task_id, specification_tree, requested_resources):
        """
        Schedule the requested resources for a task

        :param task_id: the task's ID
        :param specification_tree:  the task's specification tree
        :param requested_resources: the resources requested by the task

        :returns: True if successful, or False otherwise
        """

        # Get the 'sane' start and end times and the mininimum starttime/maximum endtime from the original specification
        start_time, end_time = self._get_main_task_start_and_end_times(specification_tree)
        min_starttime, max_endtime, _, _ = self._get_dwell_parameters(specification_tree)

        # The DwellScheduler works with a minimum starttime/maximum starttime and a fixed measurement duration, so we
        # need to convert the above to that somehow.
        min_starttime, max_starttime, duration = self._calculate_min_max_starttimes(start_time, end_time,
                                                                                    min_starttime, max_endtime)

        scheduler = DwellScheduler(task_id=task_id,
                                   resource_availability_checker=self.resource_availability_checker,
                                   radbcreds=self.radb_creds,
                                   min_starttime=min_starttime,
                                   max_starttime=max_starttime,
                                   duration=duration)

        try:
            result = scheduler.allocate_resources(requested_resources)
        except Exception as e:
            logger.error('Error in calling scheduler.allocate_resources: %s', e)
            result = False

        if result:
            logger.info('Resources successfully allocated task_id=%s' % task_id)
        else:
            logger.info('No resources allocated task_id=%s' % task_id)

        return result

    def _calculate_min_max_starttimes(self, start_time, end_time, min_starttime, max_endtime):
        """
        Use any specified min_starttime/max_endtime to calculate the min_starttime/max_starttime. All timestamps are in
        datetime format

        :param start_time:      Task fixed start time
        :param end_time:        Task fixed end time
        :param min_starttime:   Task minimum start time
        :param max_endtime:     Task maximum end time

        :return: 3-tuple (min_starttime, max_starttime, duration)
        """

        # If no dwelling is specified, this is the task's duration
        duration = end_time - start_time

        # Calculate the effective dwelling time
        dwell_time = 0
        if min_starttime is not None and max_endtime is not None:
            max_duration = max_endtime - min_starttime
            dwell_time = max_duration - duration

        # Calculate min/max starttime
        min_starttime = max(start_time, min_starttime) if min_starttime is not None else start_time
        max_starttime = min_starttime + dwell_time

        return min_starttime, max_starttime, duration

    def _cleanup_earlier_generated_data(self, otdb_id, task):
        """
        Remove any output and/or intermediate data from any previous run of the task

        :param otdb_id: the task's OTDB ID
        :param task: the task object
        """

        # Only needed for pipeline tasks
        if task['type'] == 'pipeline':
            try:
                du_result = self.sqrpc.getDiskUsageForOTDBId(task['otdb_id'],
                                                             include_scratch_paths=True,
                                                             force_update=True)

                if du_result['found'] and du_result.get('disk_usage', 0) > 0:
                    logger.info("removing data on disk from previous run for otdb_id %s", otdb_id)
                    result = self.curpc.removeTaskData(task['otdb_id'])
                    if not result['deleted']:
                        logger.warning("could not remove all data on disk from previous run for otdb_id %s: %s",
                                       otdb_id, result['message'])
            except Exception as e:
                # in line with failure as warning just above: allow going to scheduled state here too
                logger.error(str(e))

    def _link_predecessors_to_task_in_radb(self, task):
        """
        Links a task to its predecessors in RADB

        :param task: the task at hand
        """

        mom_id = task['mom_id']

        predecessor_ids = self.momrpc.getPredecessorIds(mom_id)
        if str(mom_id) not in predecessor_ids or not predecessor_ids[str(mom_id)]:
            logger.info('no predecessors for otdb_id=%s mom_id=%s', task['otdb_id'], mom_id)
            return
        predecessor_mom_ids = predecessor_ids[str(mom_id)]

        logger.info('processing predecessor mom_ids=%s for mom_id=%s otdb_id=%s', predecessor_mom_ids, task['mom_id'],
                    task['otdb_id'])

        for predecessor_mom_id in predecessor_mom_ids:
            # check if the predecessor needs to be linked to this task
            predecessor_task = self.radbrpc.getTask(mom_id=predecessor_mom_id)
            if predecessor_task:
                if predecessor_task['id'] not in task['predecessor_ids']:
                    logger.info('connecting predecessor task with mom_id=%s otdb_id=%s to its successor with mom_id=%s '
                                'otdb_id=%s', predecessor_task['mom_id'], predecessor_task['otdb_id'], task['mom_id'],
                                task['otdb_id'])
                    self.radbrpc.insertTaskPredecessor(task['id'], predecessor_task['id'])
            else:
                # Occurs when setting a pipeline to prescheduled while a predecessor has e.g. never been beyond
                # approved, which is in principle valid. The link in the radb will be made later via processSuccessors()
                # below. Alternatively, a predecessor could have been deleted.
                logger.warning('could not find predecessor task with mom_id=%s in radb for task otdb_id=%s',
                               predecessor_mom_id, task['otdb_id'])

    def _link_successors_to_task_in_radb(self, task):
        """
        Links a task to its successors in RADB

        :param task: the task at hand
        """
        mom_id = task['mom_id']

        successor_ids = self.momrpc.getSuccessorIds(mom_id)
        if str(mom_id) not in successor_ids or not successor_ids[str(mom_id)]:
            logger.info('no successors for otdb_id=%s mom_id=%s', task['otdb_id'], mom_id)
            return
        successor_mom_ids = successor_ids[str(mom_id)]

        logger.info('processing successor mom_ids=%s for mom_id=%s otdb_id=%s', successor_mom_ids, task['mom_id'],
                    task['otdb_id'])

        for successor_mom_id in successor_mom_ids:
            # check if the successor needs to be linked to this task
            successor_task = self.radbrpc.getTask(mom_id=successor_mom_id)
            if successor_task:
                if successor_task['id'] not in task['successor_ids']:
                    logger.info(
                        'connecting successor task with mom_id=%s otdb_id=%s to its predecessor with mom_id=%s'
                        ' otdb_id=%s', successor_task['mom_id'], successor_task['otdb_id'], task['mom_id'],
                        task['otdb_id']
                    )

                    self.radbrpc.insertTaskPredecessor(successor_task['id'], task['id'])

                movePipelineAfterItsPredecessors(successor_task, self.radbrpc)
            else:
                # Occurs when settings a obs or task to prescheduled while a successor has e.g. not yet been beyond
                # approved, which is quite normal. The link in the radb will be made later via processPredecessors()
                # above. Alternatively, a successor could have been deleted.
                logger.warning('could not find successor task with mom_id=%s in radb for task otdb_id=%s',
                               successor_mom_id, task['otdb_id'])

    def _get_maximum_predecessor_end_time(self, specification_tree):
        """
        Determine the highest end time of all predecessors of a task

        :param specification_tree: the task's specification tree

        :return: the maximum predecessor end time found, or None in case no predecessors are specified
        """

        predecessor_specs = [parameterset(tree['specification']) for tree in specification_tree['predecessors']]
        predecessor_end_times = [parseDatetime(spec.getString('Observation.stopTime')) for spec in predecessor_specs]
        if predecessor_end_times:
            return max(predecessor_end_times)
        return None