From ebce5e612da0ad406a994f23fc84d82d380d8dff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20K=C3=BCnsem=C3=B6ller?= <jkuensem@physik.uni-bielefeld.de> Date: Wed, 8 Aug 2018 16:32:07 +0000 Subject: [PATCH] Task LSMR-20: Added an initial script that migrates projects (as projects) and related observations and pipelines (as tasks) from mom to lsmr. Still incomplete. --- .gitattributes | 1 + SAS/LSMR/src/migrate_momdb_to_lsmr.py | 212 ++++++++++++++++++ .../test/t_schedulechecker.py | 2 +- 3 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 SAS/LSMR/src/migrate_momdb_to_lsmr.py diff --git a/.gitattributes b/.gitattributes index 90df7afcb8c..11b6d8ff8aa 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4266,6 +4266,7 @@ SAS/LSMR/src/lsmr/settings.py -text SAS/LSMR/src/lsmr/urls.py -text SAS/LSMR/src/lsmr/wsgi.py -text SAS/LSMR/src/manage.py -text +SAS/LSMR/src/migrate_momdb_to_lsmr.py -text SAS/LSMR/src/remakemigrations.py -text SAS/LSMR/test/CMakeLists.txt -text SAS/LSMR/test/__init__.py -text diff --git a/SAS/LSMR/src/migrate_momdb_to_lsmr.py b/SAS/LSMR/src/migrate_momdb_to_lsmr.py new file mode 100644 index 00000000000..c34f84e8279 --- /dev/null +++ b/SAS/LSMR/src/migrate_momdb_to_lsmr.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +from lsmr.wsgi import application # required to set up django, even though not explicitly used +from lsmr.lsmrapp import models +import logging +import datetime +import pymysql + +logger = logging.getLogger(__file__) + +mom_db_host = 'localhost' +mom_db_port = '3306' +mom_db_name = 'lofar_mom_test_lsmr' +mom_db_user = 'root' +mom_db_pass = 'the_great_password' + + +def _execute_query(query, data=None): + try: + db = pymysql.connect(mom_db_host, mom_db_user, mom_db_pass, mom_db_name) + cursor = db.cursor(pymysql.cursors.DictCursor) + cursor.execute(query, data) + return cursor.fetchall() + + except Exception as e: + logger.error("Could not execute query! %s" % e) + + +def query_project_details_from_momdb(): + """ + Queries MoM database for project details and returns the list of results + :return: list of details as dict + """ + logger.info("Querying MoM database for projects") + query = """SELECT project.priority, + project.allowtriggers, + mom2object.name, + mom2object.description, + mom2object.mom2id + FROM project + JOIN mom2object ON project.mom2objectid=mom2object.id + ORDER BY mom2id; + """ + + results = _execute_query(query) + + # dummy data: + # results = [{"mom2id": 42, + # "name": "dummyproject", + # "description": "fake description", + # "priority": 1234, + # "allowtriggers": True}] + + return results + + +def get_project_details_from_momdb(): + """ + Obtains project details from MoM database and translates it into details as understood by the lsmr data model. + :return: dict mom2id -> project details as dict + """ + logger.info("Getting project details from MoM database") + mom_results = query_project_details_from_momdb() + results = {} + + for mom_details in mom_results: + + # create new lsmr details based on MoM details + details = {"name": mom_details['name'], + "description": mom_details['description'], + "tags": [], + "priority": mom_details['priority'], + "can_trigger": mom_details['allowtriggers'], + "private_data": True # todo: check project.releasedate and compare to now or how to determine??? + } + + # alterations to comply with constraints: + if details['description'] is None: + details['description'] = '' + + # add to return dict + results[mom_details['mom2id']] = details + + return results + + +def query_task_details_for_project_from_momdb(project_mom2id): + logger.info("Querying MoM database for tasks of project %s" % project_mom2id) + # todo: double-check the correct use of ids. What refers to a mom2id and what refers to a database entry pk does not seem systematic and is very comfusing. + query = '''SELECT mom2object.mom2id, mom2object.name, mom2object.description, mom2object.mom2objecttype, status.code, lofar_pipeline.template, lofar_observation.default_template + FROM mom2object + INNER JOIN mom2object as ownerproject_mom2object on mom2object.ownerprojectid = ownerproject_mom2object.id + INNER JOIN mom2objectstatus on mom2object.currentstatusid = mom2objectstatus.id + INNER JOIN status on mom2objectstatus.statusid = status.id + LEFT JOIN lofar_pipeline on mom2object.id = lofar_pipeline.mom2objectid + LEFT JOIN lofar_observation on mom2object.id = lofar_observation.mom2objectid + WHERE ownerproject_mom2object.mom2id = %s + AND (mom2object.mom2objecttype = 'LOFAR_OBSERVATION' or mom2object.mom2objecttype like '%%PIPELINE%%'); + ''' + + parameters = (project_mom2id,) + + results = _execute_query(query, parameters) + + return results + + +def get_task_details_from_momdb(project_mom2id): + + logger.info("Getting task details from MoM database") + mom_results = query_task_details_for_project_from_momdb(project_mom2id) + results = {} + + for mom_details in mom_results: + + # create new lsmr details based on MoM details + details = {"type": None, + "start_time": datetime.datetime.utcnow().isoformat(), # todo: determine from mom db + "stop_time": datetime.datetime.utcnow().isoformat(), # todo: determine from mom db + "state": None, + "requested_state": None, + "specification": "{}", + "work_request_blueprint": None, + "template": None, + "tags": []} + + try: + state = models.TaskStateChoice.objects.get(value=mom_details['code']) + details['state'] = state + details['requested_state'] = state + except Exception as e: + logger.error("No state choice matching '%s' in lsmr database! %s" % (mom_details['code'], e)) + logger.warning('Skipping %s' % mom_details) + continue + + if 'OBSERVATION' in mom_details['mom2objecttype']: + details['type'] = models.TaskTypeChoice.objects.get(value='observation') + template_name = mom_details['default_template'] + elif 'PIPELINE' in mom_details['mom2objecttype']: + details['type'] = models.TaskTypeChoice.objects.get(value='pipeline') + template_name = mom_details['template'] + else: + logger.warning('Unknown type %(mom2objecttype)s' % mom_details) + logger.warning('Skipping %s' % mom_details) + continue + + # todo: create a lot of templates to reflect what was used for the actual task? + # todo: -> models.TaskTemplate.objects.create(...) + + if template_name is not None: + try: + details['template'] = models.TaskTemplate.objects.get(name=template_name) + except Exception as e: + logger.error("No task template matching '%s' in lsmr database! %s" % (template_name, e)) + + logger.error("Using dummy instead!") + dummy_template_details = {"name": "dummy", + "description": 'Dummy Template', + "version": '1', + "schema": {}, + "realtime": False, + "queue": False, + "tags": ["DUMMY"]} + details["template"] = models.TaskTemplate.objects.create(**dummy_template_details) + + else: + logger.warning('Missing template name in MoM details!') + logger.warning('Skipping %s' % mom_details) + continue + + # add to return dict + results[mom_details['mom2id']] = details + + return results + + +def main(): + + project_details = get_project_details_from_momdb() + + for p_id, p_details in project_details.items(): + + logger.info("---\nNow migrating project %s..." % p_details['name']) + project = models.Project.objects.create(**p_details) + logger.info("...created new project with lsmr id %s" % project.id) + + task_details = get_task_details_from_momdb(p_id) + for t_id, t_details in task_details.items(): + logger.info("...creating new task mom2id %s" % t_id) + task = models.Task.objects.create(**t_details) + logger.info("...created new task with lsmr id %s" % task.id) + + # todo: In order to reference project <-> task, we need the following intermediate entries: + # todo: - task_template (using dummy currently, see above) + # todo: - work_request_blueprint + # todo: - work_request_template + # todo: - work_request_draft + # todo: - run_blueprint + # todo: - run_template + # todo: - run_draft + # todo: - run_set + + logger.info("...done migrating project %s." % p_details['name']) + + +if __name__ == "__main__": + logger.setLevel(logging.DEBUG) + + handler = logging.StreamHandler() + handler.setLevel(logging.INFO) + logger.addHandler(handler) + + main() diff --git a/SAS/ResourceAssignment/ResourceAssigner/test/t_schedulechecker.py b/SAS/ResourceAssignment/ResourceAssigner/test/t_schedulechecker.py index 7892f76eeee..962ef64cef1 100755 --- a/SAS/ResourceAssignment/ResourceAssigner/test/t_schedulechecker.py +++ b/SAS/ResourceAssignment/ResourceAssigner/test/t_schedulechecker.py @@ -123,7 +123,7 @@ class ScheduleCheckerTest(unittest.TestCase): """ Test whether all scheduled/queued pipelines get a move request. """ self.rarpc_mock.getTasks.return_value = [ { 'id': 'id', 'status': 'scheduled', 'type': 'pipeline', 'starttime': datetime.datetime.utcnow() } ] - self.rarpc_mock.getTask.return_value = { 'id': 'id', 'status': 'scheduled', 'type': 'pipeline', 'starttime': datetime.datetime.utcnow() } + self.rarpc_mock.getTask.return_value = { 'id': 'id', 'status': 'scheduled', 'type': 'pipeline', 'starttime': datetime.datetime.utcnow() } with TestingScheduleChecker(self.rarpc_mock, self.momrpc_mock, self.curpc_mock, self.otdbrpc_mock) as schedulechecker: schedulechecker.checkScheduledAndQueuedPipelines() -- GitLab