From ebce5e612da0ad406a994f23fc84d82d380d8dff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B6rn=20K=C3=BCnsem=C3=B6ller?=
 <jkuensem@physik.uni-bielefeld.de>
Date: Wed, 8 Aug 2018 16:32:07 +0000
Subject: [PATCH] Task LSMR-20: Added an initial script that migrates projects
 (as projects) and related observations and pipelines (as tasks) from mom to
 lsmr. Still incomplete.

---
 .gitattributes                                |   1 +
 SAS/LSMR/src/migrate_momdb_to_lsmr.py         | 212 ++++++++++++++++++
 .../test/t_schedulechecker.py                 |   2 +-
 3 files changed, 214 insertions(+), 1 deletion(-)
 create mode 100644 SAS/LSMR/src/migrate_momdb_to_lsmr.py

diff --git a/.gitattributes b/.gitattributes
index 90df7afcb8c..11b6d8ff8aa 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -4266,6 +4266,7 @@ SAS/LSMR/src/lsmr/settings.py -text
 SAS/LSMR/src/lsmr/urls.py -text
 SAS/LSMR/src/lsmr/wsgi.py -text
 SAS/LSMR/src/manage.py -text
+SAS/LSMR/src/migrate_momdb_to_lsmr.py -text
 SAS/LSMR/src/remakemigrations.py -text
 SAS/LSMR/test/CMakeLists.txt -text
 SAS/LSMR/test/__init__.py -text
diff --git a/SAS/LSMR/src/migrate_momdb_to_lsmr.py b/SAS/LSMR/src/migrate_momdb_to_lsmr.py
new file mode 100644
index 00000000000..c34f84e8279
--- /dev/null
+++ b/SAS/LSMR/src/migrate_momdb_to_lsmr.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+from lsmr.wsgi import application  # required to set up django, even though not explicitly used
+from lsmr.lsmrapp import models
+import logging
+import datetime
+import pymysql
+
+logger = logging.getLogger(__file__)
+
+mom_db_host = 'localhost'
+mom_db_port = '3306'
+mom_db_name = 'lofar_mom_test_lsmr'
+mom_db_user = 'root'
+mom_db_pass = 'the_great_password'
+
+
+def _execute_query(query, data=None):
+    try:
+        db = pymysql.connect(mom_db_host, mom_db_user, mom_db_pass, mom_db_name)
+        cursor = db.cursor(pymysql.cursors.DictCursor)
+        cursor.execute(query, data)
+        return cursor.fetchall()
+
+    except Exception as e:
+        logger.error("Could not execute query! %s" % e)
+
+
+def query_project_details_from_momdb():
+    """
+    Queries MoM database for project details and returns the list of results
+    :return: list of details as dict
+    """
+    logger.info("Querying MoM database for projects")
+    query = """SELECT project.priority,
+                      project.allowtriggers,
+                      mom2object.name,
+                      mom2object.description,
+                      mom2object.mom2id
+                FROM project
+                JOIN mom2object ON project.mom2objectid=mom2object.id
+                ORDER BY mom2id;
+                """
+
+    results = _execute_query(query)
+
+    # dummy data:
+    # results = [{"mom2id": 42,
+    #             "name": "dummyproject",
+    #             "description": "fake description",
+    #             "priority": 1234,
+    #             "allowtriggers": True}]
+
+    return results
+
+
+def get_project_details_from_momdb():
+    """
+    Obtains project details from MoM database and translates it into details as understood by the lsmr data model.
+    :return: dict mom2id -> project details as dict
+    """
+    logger.info("Getting project details from MoM database")
+    mom_results = query_project_details_from_momdb()
+    results = {}
+
+    for mom_details in mom_results:
+
+        # create new lsmr details based on MoM details
+        details = {"name": mom_details['name'],
+                   "description": mom_details['description'],
+                   "tags": [],
+                   "priority": mom_details['priority'],
+                   "can_trigger": mom_details['allowtriggers'],
+                   "private_data": True  # todo: check project.releasedate and compare to now or how to determine???
+                   }
+
+        # alterations to comply with constraints:
+        if details['description'] is None:
+            details['description'] = ''
+
+        # add to return dict
+        results[mom_details['mom2id']] = details
+
+    return results
+
+
+def query_task_details_for_project_from_momdb(project_mom2id):
+    logger.info("Querying MoM database for tasks of project %s" % project_mom2id)
+    # todo: double-check the correct use of ids. What refers to a mom2id and what refers to a database entry pk does not seem systematic and is very comfusing.
+    query = '''SELECT mom2object.mom2id, mom2object.name, mom2object.description, mom2object.mom2objecttype, status.code, lofar_pipeline.template, lofar_observation.default_template
+                      FROM mom2object
+                      INNER JOIN mom2object as ownerproject_mom2object on mom2object.ownerprojectid = ownerproject_mom2object.id
+                      INNER JOIN mom2objectstatus on mom2object.currentstatusid = mom2objectstatus.id
+                      INNER JOIN status on mom2objectstatus.statusid = status.id
+                      LEFT JOIN lofar_pipeline on mom2object.id = lofar_pipeline.mom2objectid
+                      LEFT JOIN lofar_observation on mom2object.id = lofar_observation.mom2objectid
+                      WHERE ownerproject_mom2object.mom2id = %s
+                      AND (mom2object.mom2objecttype = 'LOFAR_OBSERVATION' or mom2object.mom2objecttype like '%%PIPELINE%%');
+                      '''
+
+    parameters = (project_mom2id,)
+
+    results = _execute_query(query, parameters)
+
+    return results
+
+
+def get_task_details_from_momdb(project_mom2id):
+
+    logger.info("Getting task details from MoM database")
+    mom_results = query_task_details_for_project_from_momdb(project_mom2id)
+    results = {}
+
+    for mom_details in mom_results:
+
+        # create new lsmr details based on MoM details
+        details = {"type": None,
+                   "start_time": datetime.datetime.utcnow().isoformat(),  # todo: determine from mom db
+                   "stop_time": datetime.datetime.utcnow().isoformat(),  # todo: determine from mom db
+                   "state": None,
+                   "requested_state": None,
+                   "specification": "{}",
+                   "work_request_blueprint": None,
+                   "template": None,
+                   "tags": []}
+
+        try:
+            state = models.TaskStateChoice.objects.get(value=mom_details['code'])
+            details['state'] = state
+            details['requested_state'] = state
+        except Exception as e:
+            logger.error("No state choice matching '%s' in lsmr database! %s" % (mom_details['code'], e))
+            logger.warning('Skipping %s' % mom_details)
+            continue
+
+        if 'OBSERVATION' in mom_details['mom2objecttype']:
+            details['type'] = models.TaskTypeChoice.objects.get(value='observation')
+            template_name = mom_details['default_template']
+        elif 'PIPELINE' in mom_details['mom2objecttype']:
+            details['type'] = models.TaskTypeChoice.objects.get(value='pipeline')
+            template_name = mom_details['template']
+        else:
+            logger.warning('Unknown type %(mom2objecttype)s' % mom_details)
+            logger.warning('Skipping %s' % mom_details)
+            continue
+
+        # todo: create a lot of templates to reflect what was used for the actual task?
+        # todo: -> models.TaskTemplate.objects.create(...)
+
+        if template_name is not None:
+            try:
+                details['template'] = models.TaskTemplate.objects.get(name=template_name)
+            except Exception as e:
+                logger.error("No task template matching '%s' in lsmr database! %s" % (template_name, e))
+
+                logger.error("Using dummy instead!")
+                dummy_template_details = {"name": "dummy",
+                                          "description": 'Dummy Template',
+                                          "version": '1',
+                                          "schema": {},
+                                          "realtime": False,
+                                          "queue": False,
+                                          "tags": ["DUMMY"]}
+                details["template"] = models.TaskTemplate.objects.create(**dummy_template_details)
+
+        else:
+            logger.warning('Missing template name in MoM details!')
+            logger.warning('Skipping %s' % mom_details)
+            continue
+
+        # add to return dict
+        results[mom_details['mom2id']] = details
+
+    return results
+
+
+def main():
+
+    project_details = get_project_details_from_momdb()
+
+    for p_id, p_details in project_details.items():
+
+        logger.info("---\nNow migrating project %s..." % p_details['name'])
+        project = models.Project.objects.create(**p_details)
+        logger.info("...created new project with lsmr id %s" % project.id)
+
+        task_details = get_task_details_from_momdb(p_id)
+        for t_id, t_details in task_details.items():
+            logger.info("...creating new task mom2id %s" % t_id)
+            task = models.Task.objects.create(**t_details)
+            logger.info("...created new task with lsmr id %s" % task.id)
+
+            # todo: In order to reference project <-> task, we need the following intermediate entries:
+            # todo: - task_template (using dummy currently, see above)
+            # todo: - work_request_blueprint
+            # todo: - work_request_template
+            # todo: - work_request_draft
+            # todo: - run_blueprint
+            # todo: - run_template
+            # todo: - run_draft
+            # todo: - run_set
+
+        logger.info("...done migrating project %s." % p_details['name'])
+
+
+if __name__ == "__main__":
+    logger.setLevel(logging.DEBUG)
+
+    handler = logging.StreamHandler()
+    handler.setLevel(logging.INFO)
+    logger.addHandler(handler)
+
+    main()
diff --git a/SAS/ResourceAssignment/ResourceAssigner/test/t_schedulechecker.py b/SAS/ResourceAssignment/ResourceAssigner/test/t_schedulechecker.py
index 7892f76eeee..962ef64cef1 100755
--- a/SAS/ResourceAssignment/ResourceAssigner/test/t_schedulechecker.py
+++ b/SAS/ResourceAssignment/ResourceAssigner/test/t_schedulechecker.py
@@ -123,7 +123,7 @@ class ScheduleCheckerTest(unittest.TestCase):
         """ Test whether all scheduled/queued pipelines get a move request. """
 
         self.rarpc_mock.getTasks.return_value = [ { 'id': 'id', 'status': 'scheduled', 'type': 'pipeline', 'starttime': datetime.datetime.utcnow() } ]
-        self.rarpc_mock.getTask.return_value =    { 'id': 'id', 'status': 'scheduled', 'type': 'pipeline', 'starttime': datetime.datetime.utcnow() }
+        self.rarpc_mock.getTask.return_value = { 'id': 'id', 'status': 'scheduled', 'type': 'pipeline', 'starttime': datetime.datetime.utcnow() }
 
         with TestingScheduleChecker(self.rarpc_mock, self.momrpc_mock, self.curpc_mock, self.otdbrpc_mock) as schedulechecker:
             schedulechecker.checkScheduledAndQueuedPipelines()
-- 
GitLab