diff --git a/Docker/lofar-ci/Dockerfile_ci_sas b/Docker/lofar-ci/Dockerfile_ci_sas index 1aa8f6689b56f7529d3a0a17e0022128a9ab2bbc..ffcfb4133ecabcbb5004bef01b945d8251ae9127 100644 --- a/Docker/lofar-ci/Dockerfile_ci_sas +++ b/Docker/lofar-ci/Dockerfile_ci_sas @@ -16,7 +16,7 @@ RUN yum erase -y postgresql postgresql-server postgresql-devel && \ cd /bin && ln -s /usr/pgsql-9.6/bin/initdb && ln -s /usr/pgsql-9.6/bin/postgres ENV PATH /usr/pgsql-9.6/bin:$PATH -RUN pip3 install cython kombu lxml requests pygcn xmljson mysql-connector-python python-dateutil Django==3.0.9 djangorestframework==3.11.1 djangorestframework-xml ldap==1.0.2 flask fabric coverage python-qpid-proton PyGreSQL numpy h5py psycopg2 testing.postgresql Flask-Testing scipy Markdown django-filter python-ldap python-ldap-test ldap3 django-jsonforms django-json-widget django-jsoneditor drf-yasg flex swagger-spec-validator django-auth-ldap mozilla-django-oidc jsonschema comet pyxb==1.2.5 graphviz isodate astropy packaging django-debug-toolbar +RUN pip3 install cython kombu lxml requests pygcn xmljson mysql-connector-python python-dateutil Django==3.0.9 djangorestframework==3.11.1 djangorestframework-xml ldap==1.0.2 flask fabric coverage python-qpid-proton PyGreSQL numpy h5py psycopg2 testing.postgresql Flask-Testing scipy Markdown django-filter python-ldap python-ldap-test ldap3 django-jsonforms django-json-widget django-jsoneditor drf-yasg flex swagger-spec-validator django-auth-ldap mozilla-django-oidc jsonschema comet pyxb==1.2.5 graphviz isodate astropy packaging django-debug-toolbar pymysql #Viewflow package RUN pip3 install django-material django-viewflow diff --git a/SAS/TMSS/src/migrate_momdb_to_tmss.py b/SAS/TMSS/src/migrate_momdb_to_tmss.py index 07fa5d2ffccbcf7298d5e273e6f688acc5d3149a..13efa43bbc7759f453875c51cdbfb3f9b5734fb9 100755 --- a/SAS/TMSS/src/migrate_momdb_to_tmss.py +++ b/SAS/TMSS/src/migrate_momdb_to_tmss.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from tmss.wsgi import application # required to set up django, even though not explicitly used -from tmss.tmssapp import models from lofar.common import dbcredentials @@ -8,8 +6,24 @@ import logging import datetime import pymysql from optparse import OptionParser +import os +import django +import sys +import re logger = logging.getLogger(__file__) +handler = logging.StreamHandler(sys.stdout) +handler.setLevel(logging.DEBUG) +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +handler.setFormatter(formatter) +logger.addHandler(handler) + + +# 'mom2id' -> 'tmss object' mapping +# (so we know what has been created already and refer to that) +mom2id_to_tmss_representation = {} +stats = {"projects_skipped": 0, "projects_updated": 0, "projects_created":0, + "subtasks_skipped": 0, "subtasks_updated": 0, "subtasks_created": 0} def _execute_query(query, data=None): try: @@ -27,74 +41,77 @@ def query_project_details_from_momdb(): Queries MoM database for project details and returns the list of results :return: list of details as dict """ - logger.info("Querying MoM database for projects") + logger.info("...querying MoM database for projects") query = """SELECT project.priority, project.allowtriggers, mom2object.name, mom2object.description, - mom2object.mom2id + mom2object.mom2id, + resourcetype.name AS resourcetypename, + resource.projectpath FROM project JOIN mom2object ON project.mom2objectid=mom2object.id + LEFT JOIN resource ON projectid=project.id AND resource.resourcetypeid IN (2,3,4,5,6,7,8,9,10,12) + LEFT JOIN resourcetype ON resource.resourcetypeid=resourcetype.id ORDER BY mom2id; """ results = _execute_query(query) - # dummy data: - # results = [{"mom2id": 42, - # "name": "dummyproject", - # "description": "fake description", - # "priority": 1234, - # "allowtriggers": True}] + # MoM resourcetypes + # + # mysql> SELECT * FROM lofar_mom_test_lsmr.resourcetype; + # +----+----------------------------------+--------------------------------------------------------------------------------+----------------+ + # | id | name | hosturi | type | + # +----+----------------------------------+--------------------------------------------------------------------------------+----------------+ + # | 1 | Lofar Observing Time | NULL | OBSERVING_TIME | + # | 2 | Lofar Storage (SARA) | srm://srm.grid.sara.nl:8443/pnfs/grid.sara.nl/data/lofar/ops/projects/ | LTA_STORAGE | + # | 3 | Lofar Test Storage (SARA) | srm://srm.grid.sara.nl:8443/pnfs/grid.sara.nl/data/lofar/ops/test/projects/ | LTA_STORAGE | + # | 4 | Lofar Storage (TARGET) old | gsiftp://lotar1.staging.lofar/target/gpfs2/lofar/home/lofarops/ops/projects/ | LTA_STORAGE | + # | 5 | Lofar Storage (Jülich) | srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/ | LTA_STORAGE | + # | 6 | Lofar User Disk Storage (SARA) | srm://srm.grid.sara.nl/pnfs/grid.sara.nl/data/lofar/user/disk/projects/ | LTA_STORAGE | + # | 7 | Lofar Tape Storage (Target) | srm://srm.target.rug.nl:8444/lofar/ops/projects/ | LTA_STORAGE | + # | 8 | Lofar Tape Test Storage (Target) | srm://srm.target.rug.nl:8444/lofar/ops/test/projects/ | LTA_STORAGE | + # | 9 | Lofar Disk Storage (Target) | srm://srm.target.rug.nl:8444/lofar/ops/disk/projects/ | LTA_STORAGE | + # | 10 | Lofar Disk Test Storage (Target) | srm://srm.target.rug.nl:8444/lofar/ops/disk/test/projects/ | LTA_STORAGE | + # | 11 | Lofar Processing Time | NULL | OBSERVING_TIME | + # | 12 | Lofar Storage (Poznan) | srm://lta-head.lofar.psnc.pl:8443/lofar/ops/projects/ | LTA_STORAGE | + # | 13 | Lofar Triggers | NULL | LOFAR_TRIGGERS | + # +----+----------------------------------+--------------------------------------------------------------------------------+----------------+ return results -def get_project_details_from_momdb(): +def query_subtask_details_for_project_from_momdb(project_mom2id): """ - Obtains project details from MoM database and translates it into details as understood by the tmss data model. - :return: dict mom2id -> project details as dict + Obtains details of observations and pipelines from the MoM database. + Note: Some of these details still need to be converted to sth. that is understood by the tmss data model for subtasks. + :param project_mom2id: + :return: """ - logger.info("Getting project details from MoM database") - mom_results = query_project_details_from_momdb() - results = {} - - for mom_details in mom_results: - - # create new tmss details based on MoM details - details = {"name": mom_details['name'], - "description": mom_details['description'], - "tags": ["migrated_from_MoM"], - "priority": mom_details['priority'], - "can_trigger": mom_details['allowtriggers'], - "private_data": True # todo: check project.releasedate and compare to now or how to determine??? - } - - # alterations to comply with constraints: - if details['description'] is None: - details['description'] = '' - - # add to return dict - results[mom_details['mom2id']] = details - - return results - - -def query_subtask_details_for_project_from_momdb(project_mom2id): - logger.info("Querying MoM database for tasks of project %s" % project_mom2id) + logger.info("...querying MoM database for subtasks of project %s" % project_mom2id) # todo: double-check the correct use of ids. What refers to a mom2id and what refers to a database entry pk does not seem systematic and is very comfusing. + # todo: clarify: Measurements correspond to subtask and Observations correspond to task level? + # We have lofar_observation and lofar_pipeline tables, but lofar_observation seems to refer to a 'task level mom2object' + # with measurement and pipeline children, where the latter are further described on the lofar_pipeline table. + # So I'm assuming that type '%%MEASUREMENT%%' here correspond to observation subtasks, which also means that some + # info is apparently squashed together with that of other subtasks of the same task in the lofar_observation entry + # of the parent/'task'. + # todo: clarify: Is there info on template/start/stop on Measurement level somewhere? Using the parent task/observation now. query = '''SELECT mom2object.mom2id, mom2object.name, mom2object.description, mom2object.mom2objecttype, status.code, - lofar_pipeline.template, lofar_observation.default_template, lofar_pipeline.starttime, lofar_pipeline.endtime, - lofar_observation_specification.starttime AS obs_starttime, lofar_observation_specification.endtime AS obs_endtime + lofar_pipeline.template AS template, lofar_observation.default_template as obs_template, lofar_pipeline.starttime, lofar_pipeline.endtime, + lofar_observation_specification.starttime AS obs_starttime, lofar_observation_specification.endtime AS obs_endtime, + parent_mom2object.mom2id as parent_mom2id FROM mom2object INNER JOIN mom2object AS ownerproject_mom2object ON mom2object.ownerprojectid = ownerproject_mom2object.id INNER JOIN mom2objectstatus ON mom2object.currentstatusid = mom2objectstatus.id INNER JOIN status ON mom2objectstatus.statusid = status.id + LEFT JOIN mom2object AS parent_mom2object ON mom2object.parentid = parent_mom2object.id LEFT JOIN lofar_pipeline ON mom2object.id = lofar_pipeline.mom2objectid - LEFT JOIN lofar_observation ON mom2object.id = lofar_observation.mom2objectid + LEFT JOIN lofar_observation ON mom2object.parentid = lofar_observation.mom2objectid LEFT JOIN lofar_observation_specification ON lofar_observation.user_specification_id = lofar_observation_specification.id WHERE ownerproject_mom2object.mom2id = %s - AND (mom2object.mom2objecttype = 'LOFAR_OBSERVATION' OR mom2object.mom2objecttype LIKE '%%PIPELINE%%'); + AND (mom2object.mom2objecttype LIKE '%%MEASUREMENT%%' OR mom2object.mom2objecttype LIKE '%%PIPELINE%%'); ''' parameters = (project_mom2id,) @@ -104,242 +121,497 @@ def query_subtask_details_for_project_from_momdb(project_mom2id): return results -def _dummy_subtask_template(): - try: - return models.SubtaskTemplate.objects.get(name='dummy') - except: - dummy_template_details = {"name": "dummy", - "description": 'Dummy Template', - "version": '1', - "schema": {}, - "realtime": False, - "queue": False, - "tags": ["DUMMY"]} +def query_task_details_for_subtask_from_momdb(subtask_mom2id): + """ + Obtains details of observations and pipelines from the MoM database. + Note: Some of these details still need to be converted to sth. that is understood by the tmss data model for subtasks. + :param project_mom2id: + :return: + """ + logger.info("...querying MoM database for parent task of subtask mom2id=%s" % subtask_mom2id) + query = '''SELECT parent_mom2object.mom2id, parent_mom2object.name, parent_mom2object.description, parent_mom2object.mom2objecttype, status.code, + lofar_observation.template, lofar_observation_specification.starttime AS starttime, lofar_observation_specification.endtime AS endtime + FROM mom2object + INNER JOIN mom2object AS parent_mom2object ON mom2object.parentid = parent_mom2object.id + INNER JOIN mom2objectstatus ON parent_mom2object.currentstatusid = mom2objectstatus.id + INNER JOIN status ON mom2objectstatus.statusid = status.id + LEFT JOIN lofar_observation ON parent_mom2object.id = lofar_observation.mom2objectid + LEFT JOIN lofar_observation_specification ON lofar_observation.user_specification_id = lofar_observation_specification.id + WHERE mom2object.mom2id = %s + ''' + parameters = (subtask_mom2id,) - return models.SubtaskTemplate.objects.create(**dummy_template_details) + results = _execute_query(query, parameters) + return results -def _dummy_scheduling_set(project): - dummy_scheduling_set_details = {"name": 'dummy', - "description": "Dummy scheduling unit set", - "tags": ["DUMMY"], - "generator_doc": "{}", - "project": project, - "generator_template": None} +def query_related_tasks_from_momdb(task_mom2id): + """ + Obtains details of observations and pipelines from the MoM database. + Note: Some of these details still need to be converted to sth. that is understood by the tmss data model for subtasks. + :param project_mom2id: + :return: + """ + logger.info("...querying MoM database for tasks related to task mom2id=%s" % task_mom2id) + query = '''SELECT TIMEDIFF(related_lofar_observation_specification.starttime, lofar_observation_specification.endtime) AS startdiff, + TIMEDIFF(lofar_observation_specification.starttime, related_lofar_observation_specification.endtime) AS enddiff, + related_mom2object.mom2id, related_mom2object.name, related_mom2object.description, related_mom2object.mom2objecttype, status.code, + related_lofar_observation.template, related_lofar_observation_specification.starttime AS starttime, related_lofar_observation_specification.endtime AS endtime + FROM mom2object + INNER JOIN mom2object AS related_mom2object ON mom2object.parentid = related_mom2object.parentid + INNER JOIN mom2objectstatus ON related_mom2object.currentstatusid = mom2objectstatus.id + INNER JOIN status ON mom2objectstatus.statusid = status.id + LEFT JOIN lofar_observation AS related_lofar_observation ON related_mom2object.id = related_lofar_observation.mom2objectid + LEFT JOIN lofar_observation_specification AS related_lofar_observation_specification ON related_lofar_observation.user_specification_id = related_lofar_observation_specification.id + LEFT JOIN lofar_observation ON mom2object.id = lofar_observation.mom2objectid + LEFT JOIN lofar_observation_specification ON lofar_observation.user_specification_id = lofar_observation_specification.id + WHERE mom2object.mom2id = %s + ''' + + parameters = (task_mom2id,) + + results = _execute_query(query, parameters) + return results - return models.SchedulingSet.objects.create(**dummy_scheduling_set_details) +def get_project_details_from_momdb(): + """ + Obtains project details from MoM database and translates it into details as understood by the tmss data model. + :return: dict mom2id -> project details as dict + """ + logger.info("Getting project details from MoM database") + mom_results = query_project_details_from_momdb() + results = {} -def _dummy_scheduling_unit_template(): - dummy_scheduling_unit_template_details = {"name": "dummy", - "description": 'Dummy scheduling unit template', - "version": 'v0.314159265359', - "schema": {"mykey": "my value"}, - "tags": ["DUMMY"]} + for mom_details in mom_results: + + # derive values for TMSS: - return models.RunTemplate.objects.create(**dummy_scheduling_unit_template_details) + # filesystem todo: how to deal with Target locations? + if mom_details['resourcetypename']: + try: + archive_location = models.Filesystem.objects.get(name=mom_details['resourcetypename']) + except: + logger.error("No Filesystem matching '%(resourcetypename)s' in tmss database! Skipping project name=%(name)s" % mom_details) + continue + else: + logger.warning("Missing archive info in MoM details, using None! name=%(name)s" % mom_details) + archive_location = None + mom_details['projectpath'] = "" + # create new tmss details + details = {"name": mom_details['name'], + "description": "" if not mom_details['description'] else mom_details['description'], + "tags": ["migrated_from_MoM", "migration_incomplete"], + "priority_rank": mom_details['priority'], + "trigger_priority": 1000, + "can_trigger": mom_details['allowtriggers'], + "private_data": True, # todo: check project.releasedate and compare to now or how to determine??? + "archive_subdirectory": mom_details['projectpath'], + # optional: + # "project_category":, + # "period_category":, + "archive_location": archive_location + } -def _dummy_scheduling_unit_draft(scheduling_set, template): - dummy_scheduling_unit_draft_details = {"name": 'dummy', - "description": "Dummy scheduling_unit draft", - "tags": ["DUMMY"], - "requirements_doc": "{}", - "copy_reason": models.CopyReason.objects.get(value='template'), - "generator_instance_doc": "para", - "copies": None, - "scheduling_set": scheduling_set, - "generator_source": None, - "template": template} + # add to return dict + results[mom_details['mom2id']] = details - return models.RunDraft.objects.create(**dummy_scheduling_unit_draft_details) + return results -def _dummy_scheduling_unit_blueprint(draft, template): +def get_or_create_scheduling_set_for_project(project): + """ + Returns the common scheduling set for all scheduling units of the given project or creates a new one if not found in TMSS. + """ + try: + scheduling_set = models.SchedulingSet.objects.get(name=project.name) + except: + dummy_scheduling_set_details = {"name": project.name, + "description": "Common scheduling set for all scheduling units in this project (created during MoM migration)", + "tags": ["migrated_from_MoM", "migration_incomplete"], + "generator_doc": {}, + "project": project, + "generator_template": None} + scheduling_set = models.SchedulingSet.objects.create(**dummy_scheduling_set_details) - dummy_scheduling_unit_blueprint_details = {"name": 'dummy', - "description": "Dummy scheduling_unit blueprint", - "tags": ["DUMMY"], - "requirements_doc": "{}", - "do_cancel": False, - "draft": draft, - "template": template} + return scheduling_set - return models.RunBlueprint.objects.create(**dummy_scheduling_unit_blueprint_details) +def get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_template, task_mom2id, max_time_distance=900): + """ + Returns a scheduling unit for the given subtask mom2id. It is either newly created or an existing scheduling unit + of related subtasks. Subtasks are considered related when they are within the same folder and one task does not start + more than max_time_distance seconds before of after the other task. + # todo: we have groups/topologies as well, need some guidance here... + # todo: where to get the specification time from? -def _dummy_task_template(): + :returns tuple(scheduling_unit_draft, scheduling_unit_template) + """ - dummy_task_template_details = {"name": "dummy", - "description": 'Dummy work request template', - "validation_code_js": "", - "version": 'v0.314159265359', - "schema": {"mykey": "my value"}, - "tags": ["DUMMY"]} + related_task_details = query_related_tasks_from_momdb(task_mom2id) + if related_task_details: + for details in related_task_details: + if details["mom2id"] in mom2id_to_tmss_representation.keys(): + related_task = mom2id_to_tmss_representation[details["mom2id"]] # we kept a blueprint reference + if details['startdiff'] and details['enddiff']: + time_distance = min(abs(details['startdiff'].total_seconds()), abs(details['enddiff'].total_seconds())) + if time_distance < max_time_distance: + blueprint = related_task.scheduling_unit_blueprint + draft = blueprint.draft + logger.info("...using scheduling unit draft_id=%s blueprint_id=%s from related task mom2id=%s for task mom2id=%s" % (draft.id, blueprint.id, details["mom2id"], task_mom2id)) + return draft, blueprint + else: + logger.info("...related task mom2id=%s starts too far apart (seconds=%s threshold=%s)" % (details["mom2id"], time_distance, max_time_distance)) + continue + else: + logger.warning("Cannot compare times, assuming task mom2id=%s is not related to %s" % (task_mom2id, details)) # todo: Investigate... is this because sometimes user sometimes system specified? + continue + + scheduling_unit_draft_details = {"name": 'dummy', + "description": "Scheduling unit draft (created during MoM migration for task mom2id=%s)" % task_mom2id, + "tags": ["migrated_from_MoM", "migration_incomplete"], + "requirements_doc": {}, + "scheduling_set": scheduling_set, + "requirements_template": scheduling_unit_template + # optional: + # "copy_reason": models.CopyReason.objects.get(value='template'), + # "copies": None, + # "generator_instance_doc" : {}, + # "scheduling_constraints_doc": {}, + # "scheduling_constraints_template": None, + # "observation_strategy_template": None, + } + + draft = models.SchedulingUnitDraft.objects.create(**scheduling_unit_draft_details) + + scheduling_unit_blueprint_details = {"name": 'dummy', + "description": "Scheduling unit blueprint (created during MoM migration for task mom2id=%s)" % task_mom2id, + "tags": ["migrated_from_MoM", "migration_incomplete"], + "requirements_doc": {}, + "do_cancel": False, + "draft": draft, + "requirements_template": scheduling_unit_template} + + blueprint = models.SchedulingUnitBlueprint.objects.create(**scheduling_unit_blueprint_details) + logger.info("Created new scheduling unit draft_id=%s blueprint_id=%s for task mom2id=%s" % (draft.id, blueprint.id, task_mom2id)) + return draft, blueprint + + +def get_or_create_task_for_subtask(scheduling_unit_draft, scheduling_unit_blueprint, subtask_mom2id): + """ + Returns a TMSS task for the given subtask. + It is either newly created or an existing task of related subtasks. Subtasks are considered related when they have + the same parentid in MoM database. + :returns tuple(task_draft, task_blueprint) + """ - return models.TaskTemplate.objects.create(**dummy_task_template_details) + task_details = query_task_details_for_subtask_from_momdb(subtask_mom2id) + if task_details: + for details in task_details: # there should be exactly one, actually + if details["mom2id"] in mom2id_to_tmss_representation.keys(): + blueprint = mom2id_to_tmss_representation[details["mom2id"]] + draft = blueprint.draft + logger.info("...using existing task draft_id=%s blueprint_id=%s for subtask mom2id=%s" % (draft.id, blueprint.id, subtask_mom2id)) + return draft, blueprint + else: + try: + return models.TaskTemplate.objects.get(name=details['default_template']) + except: + task_template = _dummy_task_template(details['template']) + + task_draft_details = {"name": details["name"], + "description": "" if not details['description'] else details['description'], + "tags": ["migrated_from_MoM", "migration_incomplete"], + "specifications_doc": {}, + # "copy_reason": models.CopyReason.objects.get(value='template'), + # "copies": None, + "scheduling_unit_draft": scheduling_unit_draft, + "specifications_template": task_template} + + task_draft = models.TaskDraft.objects.create(**task_draft_details) + + task_blueprint_details = {"name": details["name"], + "description": "" if not details['description'] else details['description'], + "tags": ["migrated_from_MoM", "migration_incomplete"], + "specifications_doc": {}, + "do_cancel": False, + "draft": task_draft, + "specifications_template": task_template, + "scheduling_unit_blueprint": scheduling_unit_blueprint} + + task_blueprint = models.TaskBlueprint.objects.create(**task_blueprint_details) + + mom2id_to_tmss_representation[details["mom2id"]] = task_blueprint + logger.info("...created new task draft_id=%s blueprint_id=%s for subtask mom2id=%s" % (task_draft.id, task_blueprint.id, subtask_mom2id)) + return task_draft, task_blueprint + + +def _dummy_subtask_template(name): + template_name = "%s_dummy" % name + try: + return models.SubtaskTemplate.objects.get(name=template_name) + except: + dummy_template_details = {"name": template_name, + "description": "Dummy subtask template for MoM migration, when no matching template in TMSS", + "version": '1', + "schema": {"$id":"http://tmss.lofar.org/api/schemas/subtasktemplate/empty/1#", + "$schema": "http://json-schema.org/draft-06/schema#"}, + "realtime": False, + "queue": False, + "tags": ["DUMMY"], + "type": models.SubtaskType.objects.get(value='other')} + return models.SubtaskTemplate.objects.create(**dummy_template_details) -def _dummy_task_draft(scheduling_unit_draft, template): - dummy_task_draft_details = {"name": 'dummy', - "description": "Dummy work request draft", - "tags": ["DUMMY"], - "requirements_doc": "{}", - "copy_reason": models.CopyReason.objects.get(value='template'), - "copies": None, - "scheduling_unit_draft": scheduling_unit_draft, - "template": template} +def _dummy_scheduling_unit_template(name): + template_name = "%s_dummy" % name + try: + return models.SchedulingUnitTemplate.objects.get(name=template_name) + except: + dummy_scheduling_unit_template_details = {"name": template_name, + "description": "Dummy scheduling unit template for MoM migration, when no matching template in TMSS", + "version": 'v0.314159265359', + "schema": {"$id":"http://tmss.lofar.org/api/schemas/schedulingunittemplate/empty/1#", + "$schema": "http://json-schema.org/draft-06/schema#"}, + "tags": ["DUMMY"]} - return models.TaskDraft.objects.create(**dummy_task_draft_details) + return models.SchedulingUnitTemplate.objects.create(**dummy_scheduling_unit_template_details) -def _dummy_task_blueprint(draft, template, scheduling_unit_blueprint): +def _dummy_task_template(name): + template_name = "%s_dummy" % name + try: + return models.TaskTemplate.objects.get(name=template_name) + except: + dummy_task_template_details = {"name": template_name, + "description": 'Dummy task template for MoM migration, when no matching template in TMSS', + "validation_code_js": "", + "version": 'v0.314159265359', + "schema": {"$id":"http://tmss.lofar.org/api/schemas/tasktemplate/empty/1#", + "$schema": "http://json-schema.org/draft-06/schema#"}, + "tags": ["DUMMY"], + "type": models.TaskType.objects.get(value='other')} - dummy_task_blueprint_details = {"name": 'dummy', - "description": "Dummy work request blueprint", - "tags": ["DUMMY"], - "requirements_doc": "{}", - "do_cancel": False, - "draft": draft, - "template": template, - "scheduling_unit_blueprint": scheduling_unit_blueprint} + return models.TaskTemplate.objects.create(**dummy_task_template_details) - return models.TaskBlueprint.objects.create(**dummy_task_blueprint_details) +def create_subtask_trees_for_project_in_momdb(project_mom2id, project): + """ + Migrates all observations and pipelines that belong to the given project as Subtasks to TMSS. + This also creates associated Task and SchedulingUnit drafts and blueprints in order to link the subtask to its project. + :param project_mom2id: The mom id of the project to migrate + :param project: The TMSS project object to refer to + """ -def get_subtask_details_from_momdb(project_mom2id, project): + global stats + global mom2id_to_tmss_representation logger.info("Getting subtask details from MoM database") mom_results = query_subtask_details_for_project_from_momdb(project_mom2id) - results = {} + logger.info("There are %s subtasks to migrate in project name=%s" % (len(mom_results), project.name)) for mom_details in mom_results: - # different types have some info in different spots, so they end up in different columns. - # put same information in same spot to keep following code same for all tasks. - # (maybe we want to instead separate these into different queries instead or union them in SQL?) + logger.info("...now migrating subtask mom2id=%s mom2objecttype=%s" % (mom_details['mom2id'], mom_details['mom2objecttype'])) + + # derive values for TMSS + + # type and start/end times - if 'OBSERVATION' in mom_details['mom2objecttype']: - type = models.SubtaskType.objects.get(value='observation') - template_name = mom_details['default_template'] + if 'MEASUREMENT' in mom_details['mom2objecttype']: + template_name = mom_details['obs_template'] start_time = mom_details['obs_starttime'] - end_time = mom_details['obs_endtime'] + stop_time = mom_details['obs_endtime'] elif 'PIPELINE' in mom_details['mom2objecttype']: - type = models.SubtaskType.objects.get(value='pipeline') template_name = mom_details['template'] start_time = mom_details['starttime'] - end_time = mom_details['endtime'] + stop_time = mom_details['endtime'] else: - logger.warning('Unknown type %(mom2objecttype)s' % mom_details) - logger.warning('Skipping %s' % mom_details) + logger.error('Unknown type %(mom2objecttype)s - Skipping subtask mom2id=%(mom2id)s' % mom_details) + stats['subtasks_skipped'] += 1 continue - # create new tmss details (leave out stuff that might go wrong) + # timestamps - details = {"type": type, - "start_time": None, # mandatory - "stop_time": None, # mandatory - "state": None, # mandatory - "requested_state": None, # mandatory - "specification": "{}", - "task_blueprint": None, # optional, but required for project reference - "template": None, # mandatory - "tags": ["migrated_from_MoM"]} + if start_time is None: + start_time = datetime.datetime.utcfromtimestamp(0).isoformat() # not-null constraint - # timestamps + if stop_time is None: + stop_time = datetime.datetime.utcfromtimestamp(0).isoformat() # not-null constraint - if start_time is not None: - details['start_time'] = start_time - else: - details['start_time'] = datetime.datetime.utcfromtimestamp(0).isoformat() # not-null constraint - - if end_time is not None: - details['stop_time'] = end_time - else: - details['stop_time'] = datetime.datetime.utcfromtimestamp(0).isoformat() # not-null constraint + # state - # state + # todo: check mapping is correct and complete. + # This now only includes what I ran into during testing and is mapped to what felt right by my intuition (i.e. probably wrong) + # Note: status codes with a verbatim counterpart in TMSS do not need to be mapped here. + # Valid TMSS values are: "defining", "defined", "scheduling", "scheduled", "queueing", "queued", "starting", "started", "finishing", "finished", "cancelling", "cancelled", "error" + mom_state_to_subtask_state = {"opened": models.SubtaskState.objects.get(value="defining"), + "described": models.SubtaskState.objects.get(value="defined"), + "suspended": models.SubtaskState.objects.get(value="cancelled"), + "prepared": models.SubtaskState.objects.get(value="scheduling"), + "aborted": models.SubtaskState.objects.get(value="cancelled"), + "hold": models.SubtaskState.objects.get(value="cancelled"), + "approved": models.SubtaskState.objects.get(value="queued"), + "failed": models.SubtaskState.objects.get(value="error"), + "successful": models.SubtaskState.objects.get(value="finished"),} - try: - state = models.SubtaskState.objects.get(value=mom_details['code']) - details['state'] = state - details['requested_state'] = state - except Exception as e: - logger.error("No state choice matching '%s' in tmss database! %s" % (mom_details['code'], e)) - logger.warning('Skipping %s' % mom_details) - continue + if mom_details['code'] in mom_state_to_subtask_state: + state = mom_state_to_subtask_state[mom_details['code']] + else: + try: + state = models.SubtaskState.objects.get(value=mom_details['code']) + except: + logger.error("No SubtaskState choice matching '%(code)s' in tmss database! - Skipping subtask mom2id=%(mom2id)s" % mom_details) + stats['subtasks_skipped'] += 1 + continue - # template + # template if template_name is not None: try: - details['template'] = models.SubtaskTemplate.objects.get(name=template_name) - except Exception as e: - logger.warning("No task template matching '%s' in tmss database! Using dummy instead! %s" % (template_name, e)) - + specifications_template = models.SubtaskTemplate.objects.get(name=template_name) + logger.info('...found SubtaskTemplate id=%s for subtask mom2id=%s templatename=%s' % (specifications_template.id, mom_details["mom2id"], template_name)) + except: # todo: create a lot of templates to reflect what was used for the actual task? - # todo: raise Exception (or continue) once we have proper templates for everything. - details["template"] = _dummy_subtask_template() + # Then raise Exception once we have proper templates for everything? + specifications_template = _dummy_subtask_template(template_name) + logger.warning("No SubtaskTemplate matching '%s' in tmss database! Using dummy id=%s instead for subtask mom2id=%s" % (template_name, specifications_template.id, mom_details['mom2id'])) else: - logger.warning('Missing template name in MoM details!') - logger.warning('Skipping %s' % mom_details) + logger.error('Missing template name in MoM details! - Skipping subtask mom2id=%(mom2id)s' % mom_details) + stats['subtasks_skipped'] += 1 continue - # ---------------- - # todo: the following entries are needed to relate a task to it's project. - # todo: we should substitute the dummy items by items that reflect the actual task details - # scheduling set - scheduling_set = _dummy_scheduling_set(project) + scheduling_set = get_or_create_scheduling_set_for_project(project) # scheduling unit template - scheduling_unit_template = _dummy_scheduling_unit_template() + try: + scheduling_unit_template = models.SchedulingUnitTemplate.objects.get(name=template_name) + logger.info('...found SchedulingUnitTemplate id=%s for subtask mom2id=%s templatename=%s' % (scheduling_unit_template.id, mom_details["mom2id"], template_name)) + except: + scheduling_unit_template = _dummy_scheduling_unit_template(template_name) + logger.warning('No SchedulingUnitTemplate was found for subtask mom2id=%s templatename=%s. Using dummy template id=%s' % (mom_details["mom2id"], template_name, scheduling_unit_template.id)) + + # scheduling unit draft + blueprint + scheduling_unit_draft, scheduling_unit_blueprint = get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_template, mom_details["parent_mom2id"]) + + # task draft + blueprint + task_draft, task_blueprint = get_or_create_task_for_subtask(scheduling_unit_draft, scheduling_unit_blueprint, mom_details["mom2id"]) + + details = {"id": mom_details['mom2id'], + "state": state, + "specifications_doc": {}, # todo: where from? We have user_specification_id (for task?) and system_specification_id (for subtask?) on lofar_observation (I guess referring to lofar_observation_specification). Shall we piece things together from that, or is there a text blob to use? Also: pipeline info lives in obs_spec too? + "task_blueprint": task_blueprint, + "specifications_template": specifications_template, + "tags": ["migrated_from_MoM", "migration_incomplete"], # todo: set complete once it is verified that all info is present + "priority": project.priority_rank, # todo: correct to derive from project? + # optional: + "start_time": start_time, + "stop_time": stop_time, + "schedule_method": models.ScheduleMethod.objects.get(value="manual"), # todo: correct? Or leave None? + # "created_or_updated_by_user" = None, + # "raw_feedback" = None, + # "do_cancel": None, + #"cluster": None # I guess from lofar_observation.storage_cluster_id + } - # scheduling unit draft - scheduling_unit_draft = _dummy_scheduling_unit_draft(scheduling_set, scheduling_unit_template) + subtask_qs = models.Subtask.objects.filter(id=details["id"]) + if subtask_qs.count(): + # todo: this will update the subtask, but other TMSS objects do not share id with MoM and get recreated with every migration run. Can we clean this up somehow? + subtask_qs.update(**details) + subtask = subtask_qs.first() + logger.info("...updated existing subtask tmss id=%s" % subtask.id) + stats['subtasks_updated'] += 1 + else: + subtask = models.Subtask.objects.create(**details) + logger.info("...created new subtask tmss id=%s" % subtask.id) + stats['subtasks_created'] += 1 - # scheduling unit blueprint - scheduling_unit_blueprint = _dummy_scheduling_unit_blueprint(scheduling_unit_draft, scheduling_unit_template) + mom2id_to_tmss_representation[mom_details['mom2id']] = subtask - # work request template - task_template = _dummy_task_template() + logger.info("...handled %s TMSS objects so far | %s" % (len(mom2id_to_tmss_representation), stats)) - # work request draft - task_draft = _dummy_task_draft(scheduling_unit_draft, task_template) - # work request blueprint - details['task_blueprint'] = _dummy_task_blueprint(task_draft, - task_template, - scheduling_unit_blueprint) - # ---------------- +def get_or_create_cycle_for_project(project): + """ + Returns a cycle for a given project. Since cycles don't seem to be a thing in MoM, the cycle is derived from the + project name. Returns None, if that fails. + """ + name = project.name + if name.lower().startswith('lc'): + cycle_no = re.search(r'\d+', name.lower()).group() + cycle_name = "Cycle %02d" % int(cycle_no) + try: + cycle = models.Cycle.objects.get(name=cycle_name) + logger.info("...found existing cycle name=%s for project name=%s" % (cycle.name, project.name)) + return cycle + except: + details = {"name": cycle_name, + "description": "Cycle %s (created during MoM migration)" % cycle_no, + "tags": ["migrated_from_MoM", "migration_incomplete"], + "start": "1970-01-01T00:00:00", # todo, where from? + "stop": "1970-01-01T00:00:00", # todo, where from? + } + cycle = models.Cycle.objects.create(**details) + logger.info("...created new cycle name=% for project name=%s" % (cycle.name, project.name)) + return cycle - # add task mom2id and its details to return dict - results[mom_details['mom2id']] = details + logger.warning("Could not determine cycle for project name=%s. Using None." % (project.name)) - return results def main(): + """ + Migrates data from a MoM database to a TMSS database. + Existing objects in TMSS of same name or id are updated, otherwise new objects are created. + """ + + global mom2id_to_tmss_representation + global stats + + # query details of all projects in MoM database project_details = get_project_details_from_momdb() + logger.info("There are %s projects to migrate" % len(project_details)) + # iterate projects for p_id, p_details in project_details.items(): - logger.info("---\nNow migrating project %s..." % p_details['name']) - project = models.Project.objects.create(**p_details) - logger.info("...created new project with tmss id %s" % project.id) + try: + logger.info("Now migrating project mom_name=%s mom2id=%s" % (p_details['name'], p_id)) + + # create or update project + project_qs = models.Project.objects.filter(name=p_details["name"]) + if project_qs.count(): + project_qs.update(**p_details) + project = project_qs.first() + logger.info("...updated existing project tmss_name=%s" % project.name) + stats["projects_updated"] += 1 + else: + project = models.Project.objects.create(**p_details) + logger.info("...created new project tmss_name=%s" % project.name) + stats["projects_created"] += 1 - task_details = get_subtask_details_from_momdb(p_id, project) - for t_id, t_details in task_details.items(): + # create all subtasks and related objects for the project + create_subtask_trees_for_project_in_momdb(p_id, project) - logger.info("...creating new task mom2id %s" % t_id) - task = models.Subtask.objects.create(**t_details) - logger.info("...created new task with tmss id %s" % task.id) + # add project to existing or new cycle + cycle = get_or_create_cycle_for_project(project) + if cycle: + project.cycles.set([cycle]) - logger.info("...done migrating project %s." % p_details['name']) + logger.info("...done migrating project mom_name=%s mom2id=%s tmss_name=%s." % (p_details['name'], p_id, project.name)) + + except Exception as ex: + logger.error(ex, exc_info=True) + logger.error("Skipping migration of project mom_name=%s mom2id=%s details=%s." % (p_details['name'], p_id, p_details)) + stats["projects_skipped"] += 1 + + logger.info("Done. Handled %s TMSS objects in total | %s" % (len(mom2id_to_tmss_representation), stats)) if __name__ == "__main__": @@ -354,7 +626,7 @@ if __name__ == "__main__": global dbcreds dbcreds = dbcredentials.parse_options(options) - logger.info("Using dbcreds: %s", dbcreds.stringWithHiddenPassword()) + logger.info("Using MoM dbcreds: %s", dbcreds.stringWithHiddenPassword()) # note: this requires a config file .lofar/dbcredentials/mom.ini, with contents as such (adapt as needed): # @@ -366,5 +638,14 @@ if __name__ == "__main__": # password = mompass # database = lofar_mom_test_tmss + # set up Django + creds_name = os.environ.get('TMSS_DBCREDENTIALS', 'tmss') + os.environ['TMSS_DBCREDENTIALS'] = creds_name + tmss_dbcreds = dbcredentials.DBCredentials().get(creds_name) + logger.info("Using TMSS dbcreds: %s", tmss_dbcreds.stringWithHiddenPassword()) + + os.environ.setdefault("DJANGO_SETTINGS_MODULE", 'lofar.sas.tmss.tmss.settings') + django.setup() + from lofar.sas.tmss.tmss.tmssapp import models # has to happen after Django setup main() diff --git a/SAS/TMSS/src/tmss/settings.py b/SAS/TMSS/src/tmss/settings.py index d5a948d2878938f25e4ba512f8c03930cbcea036..909caac96ed6807ff88769ebf39379506f2ec40c 100644 --- a/SAS/TMSS/src/tmss/settings.py +++ b/SAS/TMSS/src/tmss/settings.py @@ -137,6 +137,7 @@ DEBUG_TOOLBAR_CONFIG = { MIDDLEWARE = [ 'django.middleware.gzip.GZipMiddleware', + 'debug_toolbar.middleware.DebugToolbarMiddleware', 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.common.CommonMiddleware', @@ -155,7 +156,7 @@ ROOT_URLCONF = 'lofar.sas.tmss.tmss.urls' TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [BASE_DIR, os.path.join(BASE_DIR, 'templates'), os.path.join(BASE_DIR, '../frontend','tmss_webapp')], + 'DIRS': [BASE_DIR, os.path.join(BASE_DIR, 'templates'), os.path.join(os.environ.get('LOFARROOT'), 'SAS/TMSS/frontend','tmss_webapp')], 'APP_DIRS': True, 'OPTIONS': { 'context_processors': [ @@ -169,7 +170,7 @@ TEMPLATES = [ ] STATICFILES_DIRS = [ - os.path.join(BASE_DIR, '../frontend','tmss_webapp/build/static') + os.path.join(os.environ.get('LOFARROOT'), 'SAS/TMSS/frontend','tmss_webapp/build/static') ] WSGI_APPLICATION = 'lofar.sas.tmss.tmss.wsgi.application' diff --git a/SAS/TMSS/src/tmss/tmssapp/views.py b/SAS/TMSS/src/tmss/tmssapp/views.py index 58a389fd6e332c7fea88e113fda8fe8e0d734217..cf57dc6832f0f7340d7483d8789b2f0b2b2e12b8 100644 --- a/SAS/TMSS/src/tmss/tmssapp/views.py +++ b/SAS/TMSS/src/tmss/tmssapp/views.py @@ -33,7 +33,7 @@ def subtask_parset(request, subtask_pk:int): def index(request): - return render(request, os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), '../../frontend','tmss_webapp/build/index.html')) + return render(request, os.path.join(os.environ.get('LOFARROOT'), 'SAS/TMSS/frontend','tmss_webapp/build/index.html')) #return render(request, "../../../frontend/frontend_poc/build/index.html")