Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
migrate_momdb_to_tmss.py 35.19 KiB
#!/usr/bin/env python3

from lofar.common import dbcredentials

import logging
import datetime
import pymysql
from optparse import OptionParser
import os
import django
import sys
import re

logger = logging.getLogger(__file__)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)


# 'mom2id' -> 'tmss object' mapping
# (so we know what has been created already and refer to that)
mom2id_to_tmss_representation = {}
stats = {"projects_skipped": 0, "projects_updated": 0, "projects_created":0,
         "subtasks_skipped": 0, "subtasks_updated": 0, "subtasks_created": 0}

def _execute_query(query, data=None):
    try:
        db = pymysql.connect(dbcreds.host, dbcreds.user, dbcreds.password, dbcreds.database)
        cursor = db.cursor(pymysql.cursors.DictCursor)
        cursor.execute(query, data)
        return cursor.fetchall()

    except Exception as e:
        logger.error("Could not execute query! %s" % e)


def query_project_details_from_momdb():
    """
    Queries MoM database for project details and returns the list of results
    :return: list of details as dict
    """
    logger.info("...querying MoM database for projects")
    query = """SELECT project.priority,
                      project.allowtriggers,
                      mom2object.name,
                      mom2object.description,
                      mom2object.mom2id,
                      resourcetype.name AS resourcetypename,
                      resource.projectpath
                FROM project
                JOIN mom2object ON project.mom2objectid=mom2object.id
                LEFT JOIN resource ON projectid=project.id AND resource.resourcetypeid IN (2,3,4,5,6,7,8,9,10,12)
                LEFT JOIN resourcetype ON resource.resourcetypeid=resourcetype.id
                ORDER BY mom2id;
                """

    results = _execute_query(query)

    # MoM resourcetypes
    #
    # mysql> SELECT * FROM lofar_mom_test_lsmr.resourcetype;
    # +----+----------------------------------+--------------------------------------------------------------------------------+----------------+
    # | id | name                             | hosturi                                                                        | type           |
    # +----+----------------------------------+--------------------------------------------------------------------------------+----------------+
    # |  1 | Lofar Observing Time             | NULL                                                                           | OBSERVING_TIME |
    # |  2 | Lofar Storage (SARA)             | srm://srm.grid.sara.nl:8443/pnfs/grid.sara.nl/data/lofar/ops/projects/         | LTA_STORAGE    |
    # |  3 | Lofar Test Storage (SARA)        | srm://srm.grid.sara.nl:8443/pnfs/grid.sara.nl/data/lofar/ops/test/projects/    | LTA_STORAGE    |
    # |  4 | Lofar Storage (TARGET) old       | gsiftp://lotar1.staging.lofar/target/gpfs2/lofar/home/lofarops/ops/projects/   | LTA_STORAGE    |
    # |  5 | Lofar Storage (Jülich)           | srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/ | LTA_STORAGE    |
    # |  6 | Lofar User Disk Storage (SARA)   | srm://srm.grid.sara.nl/pnfs/grid.sara.nl/data/lofar/user/disk/projects/        | LTA_STORAGE    |
    # |  7 | Lofar Tape Storage (Target)      | srm://srm.target.rug.nl:8444/lofar/ops/projects/                               | LTA_STORAGE    |
    # |  8 | Lofar Tape Test Storage (Target) | srm://srm.target.rug.nl:8444/lofar/ops/test/projects/                          | LTA_STORAGE    |
    # |  9 | Lofar Disk Storage (Target)      | srm://srm.target.rug.nl:8444/lofar/ops/disk/projects/                          | LTA_STORAGE    |
    # | 10 | Lofar Disk Test Storage (Target) | srm://srm.target.rug.nl:8444/lofar/ops/disk/test/projects/                     | LTA_STORAGE    |
    # | 11 | Lofar Processing Time            | NULL                                                                           | OBSERVING_TIME |
    # | 12 | Lofar Storage (Poznan)           | srm://lta-head.lofar.psnc.pl:8443/lofar/ops/projects/                          | LTA_STORAGE    |
    # | 13 | Lofar Triggers                   | NULL                                                                           | LOFAR_TRIGGERS |
    # +----+----------------------------------+--------------------------------------------------------------------------------+----------------+

    return results


def query_subtask_details_for_project_from_momdb(project_mom2id):
    """
    Obtains details of observations and pipelines from the MoM database.
    Note: Some of these details still need to be converted to sth. that is understood by the tmss data model for subtasks.
    :param project_mom2id:
    :return:
    """
    logger.info("...querying MoM database for subtasks of project %s" % project_mom2id)
    # todo: double-check the correct use of ids. What refers to a mom2id and what refers to a database entry pk does not seem systematic and is very comfusing.
    # todo: clarify: Measurements correspond to subtask and Observations correspond to task level?
    #  We have lofar_observation and lofar_pipeline tables, but lofar_observation seems to refer to a 'task level mom2object'
    #  with measurement and pipeline children, where the latter are further described on the lofar_pipeline table.
    #  So I'm assuming that type '%%MEASUREMENT%%' here correspond to observation subtasks, which also means that some
    #  info is apparently squashed together with that of other subtasks of the same task in the lofar_observation entry
    #  of the parent/'task'.
    # todo: clarify: Is there info on template/start/stop on Measurement level somewhere? Using the parent task/observation now.
    query = '''SELECT mom2object.mom2id, mom2object.name, mom2object.description, mom2object.mom2objecttype, status.code,
                      lofar_pipeline.template AS template, lofar_observation.default_template as obs_template, lofar_pipeline.starttime, lofar_pipeline.endtime,
                      lofar_observation_specification.starttime AS obs_starttime, lofar_observation_specification.endtime AS obs_endtime,
                      parent_mom2object.mom2id as parent_mom2id
                      FROM mom2object
                      INNER JOIN mom2object AS ownerproject_mom2object ON mom2object.ownerprojectid = ownerproject_mom2object.id
                      INNER JOIN mom2objectstatus ON mom2object.currentstatusid = mom2objectstatus.id
                      INNER JOIN status ON mom2objectstatus.statusid = status.id
                      LEFT JOIN mom2object AS parent_mom2object ON mom2object.parentid = parent_mom2object.id
                      LEFT JOIN lofar_pipeline ON mom2object.id = lofar_pipeline.mom2objectid
                      LEFT JOIN lofar_observation ON mom2object.parentid = lofar_observation.mom2objectid
                      LEFT JOIN lofar_observation_specification ON lofar_observation.user_specification_id = lofar_observation_specification.id
                      WHERE ownerproject_mom2object.mom2id = %s
                      AND (mom2object.mom2objecttype LIKE '%%MEASUREMENT%%' OR mom2object.mom2objecttype LIKE '%%PIPELINE%%');  
                      '''

    parameters = (project_mom2id,)

    results = _execute_query(query, parameters)

    return results


def query_task_details_for_subtask_from_momdb(subtask_mom2id):
    """
    Obtains details of observations and pipelines from the MoM database.
    Note: Some of these details still need to be converted to sth. that is understood by the tmss data model for subtasks.
    :param project_mom2id:
    :return:
    """
    logger.info("...querying MoM database for parent task of subtask mom2id=%s" % subtask_mom2id)
    query = '''SELECT parent_mom2object.mom2id, parent_mom2object.name, parent_mom2object.description, parent_mom2object.mom2objecttype, status.code,
                      lofar_observation.template, lofar_observation_specification.starttime AS starttime, lofar_observation_specification.endtime AS endtime
                      FROM mom2object
                      INNER JOIN mom2object AS parent_mom2object ON mom2object.parentid = parent_mom2object.id
                      INNER JOIN mom2objectstatus ON parent_mom2object.currentstatusid = mom2objectstatus.id
                      INNER JOIN status ON mom2objectstatus.statusid = status.id
                      LEFT JOIN lofar_observation ON parent_mom2object.id = lofar_observation.mom2objectid
                      LEFT JOIN lofar_observation_specification ON lofar_observation.user_specification_id = lofar_observation_specification.id
                      WHERE mom2object.mom2id = %s                      
                      '''
    parameters = (subtask_mom2id,)

    results = _execute_query(query, parameters)

    return results

def query_related_tasks_from_momdb(task_mom2id):
    """
    Obtains details of observations and pipelines from the MoM database.
    Note: Some of these details still need to be converted to sth. that is understood by the tmss data model for subtasks.
    :param project_mom2id:
    :return:
    """
    logger.info("...querying MoM database for tasks related to task mom2id=%s" % task_mom2id)
    query = '''SELECT TIMEDIFF(related_lofar_observation_specification.starttime, lofar_observation_specification.endtime) AS startdiff, 
               TIMEDIFF(lofar_observation_specification.starttime, related_lofar_observation_specification.endtime) AS enddiff, 
               related_mom2object.mom2id, related_mom2object.name, related_mom2object.description, related_mom2object.mom2objecttype, status.code,
               related_lofar_observation.template, related_lofar_observation_specification.starttime AS starttime, related_lofar_observation_specification.endtime AS endtime
               FROM mom2object
               INNER JOIN mom2object AS related_mom2object ON mom2object.parentid = related_mom2object.parentid
               INNER JOIN mom2objectstatus ON related_mom2object.currentstatusid = mom2objectstatus.id
               INNER JOIN status ON mom2objectstatus.statusid = status.id
               LEFT JOIN lofar_observation AS related_lofar_observation ON related_mom2object.id = related_lofar_observation.mom2objectid
               LEFT JOIN lofar_observation_specification AS related_lofar_observation_specification ON related_lofar_observation.user_specification_id = related_lofar_observation_specification.id 
               LEFT JOIN lofar_observation ON mom2object.id = lofar_observation.mom2objectid
               LEFT JOIN lofar_observation_specification ON lofar_observation.user_specification_id = lofar_observation_specification.id
               WHERE mom2object.mom2id = %s
               '''

    parameters = (task_mom2id,)

    results = _execute_query(query, parameters)

    return results


def get_project_details_from_momdb():
    """
    Obtains project details from MoM database and translates it into details as understood by the tmss data model.
    :return: dict mom2id -> project details as dict
    """
    logger.info("Getting project details from MoM database")
    mom_results = query_project_details_from_momdb()
    results = {}

    for mom_details in mom_results:

        # derive values for TMSS:

        #   filesystem   todo: how to deal with Target locations?
        if mom_details['resourcetypename']:
            try:
                archive_location = models.Filesystem.objects.get(name=mom_details['resourcetypename'])
            except:
                logger.error("No Filesystem matching '%(resourcetypename)s' in tmss database! Skipping project name=%(name)s" % mom_details)
                continue
        else:
            logger.warning("Missing archive info in MoM details, using None! name=%(name)s" % mom_details)
            archive_location = None
            mom_details['projectpath'] = ""

        # create new tmss details
        details = {"name": mom_details['name'],
                   "description":  "" if not mom_details['description'] else mom_details['description'],
                   "tags": ["migrated_from_MoM", "migration_incomplete"],
                   "priority_rank": mom_details['priority'],
                   "trigger_priority": 1000,
                   "can_trigger": mom_details['allowtriggers'],
                   "private_data": True,  # todo: check project.releasedate and compare to now or how to determine???
                   "archive_subdirectory": mom_details['projectpath'],
                   # optional:
                   # "project_category":,
                   # "period_category":,
                   "archive_location": archive_location
                   }

        # add to return dict
        results[mom_details['mom2id']] = details

    return results


def get_or_create_scheduling_set_for_project(project):
    """
    Returns the common scheduling set for all scheduling units of the given project or creates a new one if not found in TMSS.
    """
    try:
        scheduling_set = models.SchedulingSet.objects.get(name=project.name)
    except:
        dummy_scheduling_set_details = {"name": project.name,
                            "description": "Common scheduling set for all scheduling units in this project (created during MoM migration)",
                            "tags": ["migrated_from_MoM", "migration_incomplete"],
                            "generator_doc": {},
                            "project": project,
                            "generator_template": None}
        scheduling_set = models.SchedulingSet.objects.create(**dummy_scheduling_set_details)

    return scheduling_set


def get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_template, task_mom2id, max_time_distance=900):
    """
    Returns a scheduling unit for the given subtask mom2id. It is either newly created or an existing scheduling unit
    of related subtasks. Subtasks are considered related when they are within the same folder and one task does not start
    more than max_time_distance seconds  before of after the other task.

    # todo: we have groups/topologies as well, need some guidance here...
    # todo: where to get the specification time from?

    :returns tuple(scheduling_unit_draft, scheduling_unit_template)
    """

    related_task_details = query_related_tasks_from_momdb(task_mom2id)
    if related_task_details:
        for details in related_task_details:
            if details["mom2id"] in mom2id_to_tmss_representation.keys():
                related_task = mom2id_to_tmss_representation[details["mom2id"]]  # we kept a blueprint reference
                if details['startdiff'] and details['enddiff']:
                    time_distance = min(abs(details['startdiff'].total_seconds()), abs(details['enddiff'].total_seconds()))
                    if time_distance < max_time_distance:
                        blueprint = related_task.scheduling_unit_blueprint
                        draft = blueprint.draft
                        logger.info("...using scheduling unit draft_id=%s blueprint_id=%s from related task mom2id=%s for task mom2id=%s" % (draft.id, blueprint.id, details["mom2id"], task_mom2id))
                        return draft, blueprint
                    else:
                        logger.info("...related task mom2id=%s starts too far apart (seconds=%s threshold=%s)" % (details["mom2id"], time_distance, max_time_distance))
                        continue
                else:
                    logger.warning("Cannot compare times, assuming task mom2id=%s is not related to %s" % (task_mom2id, details))  # todo: Investigate... is this because sometimes user sometimes system specified?
                    continue

    scheduling_unit_draft_details = {"name": 'dummy',
                                     "description": "Scheduling unit draft (created during MoM migration for task mom2id=%s)" % task_mom2id,
                                     "tags": ["migrated_from_MoM", "migration_incomplete"],
                                     "requirements_doc": {},
                                     "scheduling_set": scheduling_set,
                                     "requirements_template": scheduling_unit_template
                                     # optional:
                                     # "copy_reason": models.CopyReason.objects.get(value='template'),
                                     # "copies": None,
                                     # "generator_instance_doc" : {},
                                     # "scheduling_constraints_doc": {},
                                     # "scheduling_constraints_template": None,
                                     # "observation_strategy_template": None,
                                     }

    draft = models.SchedulingUnitDraft.objects.create(**scheduling_unit_draft_details)

    scheduling_unit_blueprint_details = {"name": 'dummy',
                                         "description": "Scheduling unit blueprint (created during MoM migration for task mom2id=%s)" % task_mom2id,
                                         "tags": ["migrated_from_MoM", "migration_incomplete"],
                                         "requirements_doc": {},
                                         "do_cancel": False,
                                         "draft": draft,
                                         "requirements_template": scheduling_unit_template}

    blueprint = models.SchedulingUnitBlueprint.objects.create(**scheduling_unit_blueprint_details)
    logger.info("Created new scheduling unit draft_id=%s blueprint_id=%s for task mom2id=%s" % (draft.id, blueprint.id, task_mom2id))
    return draft, blueprint


def get_or_create_task_for_subtask(scheduling_unit_draft, scheduling_unit_blueprint, subtask_mom2id):
    """
    Returns a TMSS task for the given subtask.
    It is either newly created or an existing task of related subtasks. Subtasks are considered related when they have
    the same parentid in MoM database.
    :returns tuple(task_draft, task_blueprint)
    """

    task_details = query_task_details_for_subtask_from_momdb(subtask_mom2id)
    if task_details:
        for details in task_details:   # there should be exactly one, actually
            if details["mom2id"] in mom2id_to_tmss_representation.keys():
                blueprint = mom2id_to_tmss_representation[details["mom2id"]]
                draft = blueprint.draft
                logger.info("...using existing task draft_id=%s blueprint_id=%s for subtask mom2id=%s" % (draft.id, blueprint.id, subtask_mom2id))
                return draft, blueprint
            else:
                try:
                    return models.TaskTemplate.objects.get(name=details['default_template'])
                except:
                    task_template = _dummy_task_template(details['template'])

                task_draft_details = {"name": details["name"],
                                            "description": "" if not details['description'] else details['description'],
                                            "tags": ["migrated_from_MoM", "migration_incomplete"],
                                            "specifications_doc": {},
                                            # "copy_reason": models.CopyReason.objects.get(value='template'),
                                            # "copies": None,
                                            "scheduling_unit_draft": scheduling_unit_draft,
                                            "specifications_template": task_template}

                task_draft = models.TaskDraft.objects.create(**task_draft_details)

                task_blueprint_details = {"name": details["name"],
                                                "description": "" if not details['description'] else details['description'],
                                                "tags": ["migrated_from_MoM", "migration_incomplete"],
                                                "specifications_doc": {},
                                                "do_cancel": False,
                                                "draft": task_draft,
                                                "specifications_template": task_template,
                                                "scheduling_unit_blueprint": scheduling_unit_blueprint}

                task_blueprint = models.TaskBlueprint.objects.create(**task_blueprint_details)

                mom2id_to_tmss_representation[details["mom2id"]] = task_blueprint
                logger.info("...created new task draft_id=%s blueprint_id=%s for subtask mom2id=%s" % (task_draft.id, task_blueprint.id, subtask_mom2id))
                return task_draft, task_blueprint


def _dummy_subtask_template(name):
    template_name = "%s_dummy" % name
    try:
        return models.SubtaskTemplate.objects.get(name=template_name)
    except:
        dummy_template_details = {"name": template_name,
                                  "description": "Dummy subtask template for MoM migration, when no matching template in TMSS",
                                  "version": '1',
                                  "schema": {"$id":"http://tmss.lofar.org/api/schemas/subtasktemplate/empty/1#",
                                             "$schema": "http://json-schema.org/draft-06/schema#"},
                                  "realtime": False,
                                  "queue": False,
                                  "tags": ["DUMMY"],
                                  "type": models.SubtaskType.objects.get(value='other')}

        return models.SubtaskTemplate.objects.create(**dummy_template_details)


def _dummy_scheduling_unit_template(name):
    template_name = "%s_dummy" % name
    try:
        return models.SchedulingUnitTemplate.objects.get(name=template_name)
    except:
        dummy_scheduling_unit_template_details = {"name": template_name,
                                  "description": "Dummy scheduling unit template for MoM migration, when no matching template in TMSS",
                                  "version": 'v0.314159265359',
                                  "schema": {"$id":"http://tmss.lofar.org/api/schemas/schedulingunittemplate/empty/1#",
                                             "$schema": "http://json-schema.org/draft-06/schema#"},
                                  "tags": ["DUMMY"]}

        return models.SchedulingUnitTemplate.objects.create(**dummy_scheduling_unit_template_details)


def _dummy_task_template(name):
    template_name = "%s_dummy" % name
    try:
        return models.TaskTemplate.objects.get(name=template_name)
    except:
        dummy_task_template_details = {"name": template_name,
                                       "description": 'Dummy task template for MoM migration, when no matching template in TMSS',
                                       "validation_code_js": "",
                                       "version": 'v0.314159265359',
                                       "schema": {"$id":"http://tmss.lofar.org/api/schemas/tasktemplate/empty/1#",
                                                  "$schema": "http://json-schema.org/draft-06/schema#"},
                                       "tags": ["DUMMY"],
                                       "type": models.TaskType.objects.get(value='other')}

        return models.TaskTemplate.objects.create(**dummy_task_template_details)


def create_subtask_trees_for_project_in_momdb(project_mom2id, project):
    """
    Migrates all observations and pipelines that belong to the given project as Subtasks to TMSS.
    This also creates associated Task and SchedulingUnit drafts and blueprints in order to link the subtask to its project.
    :param project_mom2id: The mom id of the project to migrate
    :param project: The TMSS project object to refer to
    """

    global stats
    global mom2id_to_tmss_representation

    logger.info("Getting subtask details from MoM database")
    mom_results = query_subtask_details_for_project_from_momdb(project_mom2id)
    logger.info("There are %s subtasks to migrate in project name=%s" % (len(mom_results), project.name))

    for mom_details in mom_results:

        logger.info("...now migrating subtask mom2id=%s mom2objecttype=%s" % (mom_details['mom2id'], mom_details['mom2objecttype']))

        # derive values for TMSS

        #   type and start/end times

        if 'MEASUREMENT' in mom_details['mom2objecttype']:
            template_name = mom_details['obs_template']
            start_time = mom_details['obs_starttime']
            stop_time = mom_details['obs_endtime']
        elif 'PIPELINE' in mom_details['mom2objecttype']:
            template_name = mom_details['template']
            start_time = mom_details['starttime']
            stop_time = mom_details['endtime']
        else:
            logger.error('Unknown type %(mom2objecttype)s - Skipping subtask mom2id=%(mom2id)s' % mom_details)
            stats['subtasks_skipped'] += 1
            continue

        #   timestamps

        if start_time is None:
            start_time = datetime.datetime.utcfromtimestamp(0).isoformat()  # not-null constraint

        if stop_time is None:
            stop_time = datetime.datetime.utcfromtimestamp(0).isoformat()  # not-null constraint

        #   state

        #   todo: check mapping is correct and complete.
        #    This now only includes what I ran into during testing and is mapped to what felt right by my intuition (i.e. probably wrong)
        #   Note: status codes with a verbatim counterpart in TMSS do not need to be mapped here.
        #   Valid TMSS values are: "defining", "defined", "scheduling", "scheduled", "queueing", "queued", "starting", "started", "finishing", "finished", "cancelling", "cancelled", "error"
        mom_state_to_subtask_state = {"opened": models.SubtaskState.objects.get(value="defining"),
                                      "described": models.SubtaskState.objects.get(value="defined"),
                                      "suspended": models.SubtaskState.objects.get(value="cancelled"),
                                      "prepared": models.SubtaskState.objects.get(value="scheduling"),
                                      "aborted": models.SubtaskState.objects.get(value="cancelled"),
                                      "hold": models.SubtaskState.objects.get(value="cancelled"),
                                      "approved": models.SubtaskState.objects.get(value="queued"),
                                      "failed": models.SubtaskState.objects.get(value="error"),
                                      "successful": models.SubtaskState.objects.get(value="finished"),}

        if mom_details['code'] in mom_state_to_subtask_state:
            state = mom_state_to_subtask_state[mom_details['code']]
        else:
            try:
                state = models.SubtaskState.objects.get(value=mom_details['code'])
            except:
                logger.error("No SubtaskState choice matching '%(code)s' in tmss database! - Skipping subtask mom2id=%(mom2id)s" % mom_details)
                stats['subtasks_skipped'] += 1
                continue

        #     template

        if template_name is not None:
            try:
                specifications_template = models.SubtaskTemplate.objects.get(name=template_name)
                logger.info('...found SubtaskTemplate id=%s for subtask mom2id=%s templatename=%s' % (specifications_template.id, mom_details["mom2id"], template_name))
            except:
                # todo: create a lot of templates to reflect what was used for the actual task?
                #  Then raise Exception once we have proper templates for everything?
                specifications_template = _dummy_subtask_template(template_name)
                logger.warning("No SubtaskTemplate matching '%s' in tmss database! Using dummy id=%s instead for subtask mom2id=%s" % (template_name, specifications_template.id, mom_details['mom2id']))

        else:
            logger.error('Missing template name in MoM details! - Skipping subtask mom2id=%(mom2id)s' % mom_details)
            stats['subtasks_skipped'] += 1
            continue

        # scheduling set
        scheduling_set = get_or_create_scheduling_set_for_project(project)

        # scheduling unit template
        try:
            scheduling_unit_template = models.SchedulingUnitTemplate.objects.get(name=template_name)
            logger.info('...found SchedulingUnitTemplate id=%s for subtask mom2id=%s templatename=%s' % (scheduling_unit_template.id, mom_details["mom2id"], template_name))
        except:
            scheduling_unit_template = _dummy_scheduling_unit_template(template_name)
            logger.warning('No SchedulingUnitTemplate was found for subtask mom2id=%s templatename=%s. Using dummy template id=%s' % (mom_details["mom2id"], template_name, scheduling_unit_template.id))

        # scheduling unit draft + blueprint
        scheduling_unit_draft, scheduling_unit_blueprint = get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_template, mom_details["parent_mom2id"])

        # task draft + blueprint
        task_draft, task_blueprint = get_or_create_task_for_subtask(scheduling_unit_draft, scheduling_unit_blueprint, mom_details["mom2id"])

        details = {"id": mom_details['mom2id'],
                   "state": state,
                   "specifications_doc": {},   # todo: where from? We have user_specification_id (for task?) and system_specification_id (for subtask?) on lofar_observation (I guess referring to lofar_observation_specification). Shall we piece things together from that, or is there a text blob to use? Also: pipeline info lives in obs_spec too?
                   "task_blueprint": task_blueprint,
                   "specifications_template": specifications_template,
                   "tags": ["migrated_from_MoM", "migration_incomplete"],   # todo: set complete once it is verified that all info is present
                   "priority": project.priority_rank,  # todo: correct to derive from project?
                   # optional:
                   "start_time": start_time,
                   "stop_time": stop_time
                   # "created_or_updated_by_user" = None,
                   # "raw_feedback" = None,
                   # "do_cancel": None,
                   #"cluster": None  # I guess from lofar_observation.storage_cluster_id
                   }

        subtask_qs = models.Subtask.objects.filter(id=details["id"])
        if subtask_qs.count():
            # todo: this will update the subtask, but other TMSS objects do not share id with MoM and get recreated with every migration run. Can we clean this up somehow?
            subtask_qs.update(**details)
            subtask = subtask_qs.first()
            logger.info("...updated existing subtask tmss id=%s" % subtask.id)
            stats['subtasks_updated'] += 1
        else:
            subtask = models.Subtask.objects.create(**details)
            logger.info("...created new subtask tmss id=%s" % subtask.id)
            stats['subtasks_created'] += 1

        mom2id_to_tmss_representation[mom_details['mom2id']] = subtask

        logger.info("...handled %s TMSS objects so far | %s" % (len(mom2id_to_tmss_representation), stats))


def get_or_create_cycle_for_project(project):
    """
    Returns a cycle for a given project. Since cycles don't seem to be a thing in MoM, the cycle is derived from the
    project name. Returns None, if that fails.
    """
    name = project.name
    if name.lower().startswith('lc'):
        cycle_no = re.search(r'\d+', name.lower()).group()
        cycle_name = "Cycle %02d" % int(cycle_no)
        try:
            cycle = models.Cycle.objects.get(name=cycle_name)
            logger.info("...found existing cycle name=%s for project name=%s" % (cycle.name, project.name))
            return cycle
        except:
            details = {"name": cycle_name,
                       "description": "Cycle %s (created during MoM migration)" % cycle_no,
                       "tags": ["migrated_from_MoM", "migration_incomplete"],
                       "start": "1970-01-01T00:00:00",  # todo, where from?
                       "stop": "1970-01-01T00:00:00",   # todo, where from?
                       }
            cycle = models.Cycle.objects.create(**details)
            logger.info("...created new cycle name=% for project name=%s" % (cycle.name, project.name))
            return cycle

    logger.warning("Could not determine cycle for project name=%s. Using None." % (project.name))



def main():
    """
    Migrates data from a MoM database to a TMSS database.
    Existing objects in TMSS of same name or id are updated, otherwise new objects are created.
    """

    global mom2id_to_tmss_representation
    global stats

    # query details of all projects in MoM database
    project_details = get_project_details_from_momdb()
    logger.info("There are %s projects to migrate" % len(project_details))

    # iterate projects
    for p_id, p_details in project_details.items():

        try:
            logger.info("Now migrating project mom_name=%s mom2id=%s" % (p_details['name'], p_id))

            # create or update project
            project_qs = models.Project.objects.filter(name=p_details["name"])
            if project_qs.count():
                project_qs.update(**p_details)
                project = project_qs.first()
                logger.info("...updated existing project tmss_name=%s" % project.name)
                stats["projects_updated"] += 1
            else:
                project = models.Project.objects.create(**p_details)
                logger.info("...created new project tmss_name=%s" % project.name)
                stats["projects_created"] += 1

            # create all subtasks and related objects for the project
            create_subtask_trees_for_project_in_momdb(p_id, project)

            # add project to existing or new cycle
            cycle = get_or_create_cycle_for_project(project)
            if cycle:
                project.cycles.set([cycle])

            logger.info("...done migrating project mom_name=%s mom2id=%s tmss_name=%s." % (p_details['name'], p_id, project.name))

        except Exception as ex:
            logger.error(ex, exc_info=True)
            logger.error("Skipping migration of project mom_name=%s mom2id=%s details=%s." % (p_details['name'], p_id, p_details))
            stats["projects_skipped"] += 1

    logger.info("Done. Handled %s TMSS objects in total | %s" % (len(mom2id_to_tmss_representation), stats))


if __name__ == "__main__":
    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
                        level=logging.DEBUG)

    # Check the invocation arguments
    parser = OptionParser("%prog [options]", description='run the mom to tmss db conversion script')
    parser.add_option_group(dbcredentials.options_group(parser))
    parser.set_defaults(dbcredentials="MoM")
    (options, args) = parser.parse_args()

    global dbcreds
    dbcreds = dbcredentials.parse_options(options)
    logger.info("Using MoM dbcreds: %s", dbcreds.stringWithHiddenPassword())

    # note: this requires a config file .lofar/dbcredentials/mom.ini, with contents as such (adapt as needed):
    #
    # [database:MoM]
    # type = mysql
    # host = localhost
    # port = 3306
    # user = momuser
    # password = mompass
    # database = lofar_mom_test_tmss

    from lofar.sas.tmss.tmss import setup_and_check_tmss_django_database_connection_and_exit_on_error
    setup_and_check_tmss_django_database_connection_and_exit_on_error(options.dbcredentials)

    main()