#!/usr/bin/env python3 from lofar.common import dbcredentials import logging import datetime import pymysql from optparse import OptionParser import os import django import sys import re logger = logging.getLogger(__file__) handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) # 'mom2id' -> 'tmss object' mapping # (so we know what has been created already and refer to that) mom2id_to_tmss_representation = {} stats = {"projects_skipped": 0, "projects_updated": 0, "projects_created":0, "subtasks_skipped": 0, "subtasks_updated": 0, "subtasks_created": 0} def _execute_query(query, data=None): try: db = pymysql.connect(dbcreds.host, dbcreds.user, dbcreds.password, dbcreds.database) cursor = db.cursor(pymysql.cursors.DictCursor) cursor.execute(query, data) return cursor.fetchall() except Exception as e: logger.error("Could not execute query! %s" % e) def query_project_details_from_momdb(): """ Queries MoM database for project details and returns the list of results :return: list of details as dict """ logger.info("...querying MoM database for projects") query = """SELECT project.priority, project.allowtriggers, mom2object.name, mom2object.description, mom2object.mom2id, resourcetype.name AS resourcetypename, resource.projectpath FROM project JOIN mom2object ON project.mom2objectid=mom2object.id LEFT JOIN resource ON projectid=project.id AND resource.resourcetypeid IN (2,3,4,5,6,7,8,9,10,12) LEFT JOIN resourcetype ON resource.resourcetypeid=resourcetype.id ORDER BY mom2id; """ results = _execute_query(query) # MoM resourcetypes # # mysql> SELECT * FROM lofar_mom_test_lsmr.resourcetype; # +----+----------------------------------+--------------------------------------------------------------------------------+----------------+ # | id | name | hosturi | type | # +----+----------------------------------+--------------------------------------------------------------------------------+----------------+ # | 1 | Lofar Observing Time | NULL | OBSERVING_TIME | # | 2 | Lofar Storage (SARA) | srm://srm.grid.sara.nl:8443/pnfs/grid.sara.nl/data/lofar/ops/projects/ | LTA_STORAGE | # | 3 | Lofar Test Storage (SARA) | srm://srm.grid.sara.nl:8443/pnfs/grid.sara.nl/data/lofar/ops/test/projects/ | LTA_STORAGE | # | 4 | Lofar Storage (TARGET) old | gsiftp://lotar1.staging.lofar/target/gpfs2/lofar/home/lofarops/ops/projects/ | LTA_STORAGE | # | 5 | Lofar Storage (Jülich) | srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/ | LTA_STORAGE | # | 6 | Lofar User Disk Storage (SARA) | srm://srm.grid.sara.nl/pnfs/grid.sara.nl/data/lofar/user/disk/projects/ | LTA_STORAGE | # | 7 | Lofar Tape Storage (Target) | srm://srm.target.rug.nl:8444/lofar/ops/projects/ | LTA_STORAGE | # | 8 | Lofar Tape Test Storage (Target) | srm://srm.target.rug.nl:8444/lofar/ops/test/projects/ | LTA_STORAGE | # | 9 | Lofar Disk Storage (Target) | srm://srm.target.rug.nl:8444/lofar/ops/disk/projects/ | LTA_STORAGE | # | 10 | Lofar Disk Test Storage (Target) | srm://srm.target.rug.nl:8444/lofar/ops/disk/test/projects/ | LTA_STORAGE | # | 11 | Lofar Processing Time | NULL | OBSERVING_TIME | # | 12 | Lofar Storage (Poznan) | srm://lta-head.lofar.psnc.pl:8443/lofar/ops/projects/ | LTA_STORAGE | # | 13 | Lofar Triggers | NULL | LOFAR_TRIGGERS | # +----+----------------------------------+--------------------------------------------------------------------------------+----------------+ return results def query_subtask_details_for_project_from_momdb(project_mom2id): """ Obtains details of observations and pipelines from the MoM database. Note: Some of these details still need to be converted to sth. that is understood by the tmss data model for subtasks. :param project_mom2id: :return: """ logger.info("...querying MoM database for subtasks of project %s" % project_mom2id) # todo: double-check the correct use of ids. What refers to a mom2id and what refers to a database entry pk does not seem systematic and is very comfusing. # todo: clarify: Measurements correspond to subtask and Observations correspond to task level? # We have lofar_observation and lofar_pipeline tables, but lofar_observation seems to refer to a 'task level mom2object' # with measurement and pipeline children, where the latter are further described on the lofar_pipeline table. # So I'm assuming that type '%%MEASUREMENT%%' here correspond to observation subtasks, which also means that some # info is apparently squashed together with that of other subtasks of the same task in the lofar_observation entry # of the parent/'task'. # todo: clarify: Is there info on template/start/stop on Measurement level somewhere? Using the parent task/observation now. query = '''SELECT mom2object.mom2id, mom2object.name, mom2object.description, mom2object.mom2objecttype, status.code, lofar_pipeline.template AS template, lofar_observation.default_template as obs_template, lofar_pipeline.starttime, lofar_pipeline.endtime, lofar_observation_specification.starttime AS obs_starttime, lofar_observation_specification.endtime AS obs_endtime, parent_mom2object.mom2id as parent_mom2id FROM mom2object INNER JOIN mom2object AS ownerproject_mom2object ON mom2object.ownerprojectid = ownerproject_mom2object.id INNER JOIN mom2objectstatus ON mom2object.currentstatusid = mom2objectstatus.id INNER JOIN status ON mom2objectstatus.statusid = status.id LEFT JOIN mom2object AS parent_mom2object ON mom2object.parentid = parent_mom2object.id LEFT JOIN lofar_pipeline ON mom2object.id = lofar_pipeline.mom2objectid LEFT JOIN lofar_observation ON mom2object.parentid = lofar_observation.mom2objectid LEFT JOIN lofar_observation_specification ON lofar_observation.user_specification_id = lofar_observation_specification.id WHERE ownerproject_mom2object.mom2id = %s AND (mom2object.mom2objecttype LIKE '%%MEASUREMENT%%' OR mom2object.mom2objecttype LIKE '%%PIPELINE%%'); ''' parameters = (project_mom2id,) results = _execute_query(query, parameters) return results def query_task_details_for_subtask_from_momdb(subtask_mom2id): """ Obtains details of observations and pipelines from the MoM database. Note: Some of these details still need to be converted to sth. that is understood by the tmss data model for subtasks. :param project_mom2id: :return: """ logger.info("...querying MoM database for parent task of subtask mom2id=%s" % subtask_mom2id) query = '''SELECT parent_mom2object.mom2id, parent_mom2object.name, parent_mom2object.description, parent_mom2object.mom2objecttype, status.code, lofar_observation.template, lofar_observation_specification.starttime AS starttime, lofar_observation_specification.endtime AS endtime FROM mom2object INNER JOIN mom2object AS parent_mom2object ON mom2object.parentid = parent_mom2object.id INNER JOIN mom2objectstatus ON parent_mom2object.currentstatusid = mom2objectstatus.id INNER JOIN status ON mom2objectstatus.statusid = status.id LEFT JOIN lofar_observation ON parent_mom2object.id = lofar_observation.mom2objectid LEFT JOIN lofar_observation_specification ON lofar_observation.user_specification_id = lofar_observation_specification.id WHERE mom2object.mom2id = %s ''' parameters = (subtask_mom2id,) results = _execute_query(query, parameters) return results def query_related_tasks_from_momdb(task_mom2id): """ Obtains details of observations and pipelines from the MoM database. Note: Some of these details still need to be converted to sth. that is understood by the tmss data model for subtasks. :param project_mom2id: :return: """ logger.info("...querying MoM database for tasks related to task mom2id=%s" % task_mom2id) query = '''SELECT TIMEDIFF(related_lofar_observation_specification.starttime, lofar_observation_specification.endtime) AS startdiff, TIMEDIFF(lofar_observation_specification.starttime, related_lofar_observation_specification.endtime) AS enddiff, related_mom2object.mom2id, related_mom2object.name, related_mom2object.description, related_mom2object.mom2objecttype, status.code, related_lofar_observation.template, related_lofar_observation_specification.starttime AS starttime, related_lofar_observation_specification.endtime AS endtime FROM mom2object INNER JOIN mom2object AS related_mom2object ON mom2object.parentid = related_mom2object.parentid INNER JOIN mom2objectstatus ON related_mom2object.currentstatusid = mom2objectstatus.id INNER JOIN status ON mom2objectstatus.statusid = status.id LEFT JOIN lofar_observation AS related_lofar_observation ON related_mom2object.id = related_lofar_observation.mom2objectid LEFT JOIN lofar_observation_specification AS related_lofar_observation_specification ON related_lofar_observation.user_specification_id = related_lofar_observation_specification.id LEFT JOIN lofar_observation ON mom2object.id = lofar_observation.mom2objectid LEFT JOIN lofar_observation_specification ON lofar_observation.user_specification_id = lofar_observation_specification.id WHERE mom2object.mom2id = %s ''' parameters = (task_mom2id,) results = _execute_query(query, parameters) return results def get_project_details_from_momdb(): """ Obtains project details from MoM database and translates it into details as understood by the tmss data model. :return: dict mom2id -> project details as dict """ logger.info("Getting project details from MoM database") mom_results = query_project_details_from_momdb() results = {} for mom_details in mom_results: # derive values for TMSS: # filesystem todo: how to deal with Target locations? if mom_details['resourcetypename']: try: archive_location = models.Filesystem.objects.get(name=mom_details['resourcetypename']) except: logger.error("No Filesystem matching '%(resourcetypename)s' in tmss database! Skipping project name=%(name)s" % mom_details) continue else: logger.warning("Missing archive info in MoM details, using None! name=%(name)s" % mom_details) archive_location = None mom_details['projectpath'] = "" # create new tmss details details = {"name": mom_details['name'], "description": "" if not mom_details['description'] else mom_details['description'], "tags": ["migrated_from_MoM", "migration_incomplete"], "priority_rank": mom_details['priority'], "trigger_priority": 1000, "can_trigger": mom_details['allowtriggers'], "private_data": True, # todo: check project.releasedate and compare to now or how to determine??? "archive_subdirectory": mom_details['projectpath'], # optional: # "project_category":, # "period_category":, "archive_location": archive_location } # add to return dict results[mom_details['mom2id']] = details return results def get_or_create_scheduling_set_for_project(project): """ Returns the common scheduling set for all scheduling units of the given project or creates a new one if not found in TMSS. """ try: scheduling_set = models.SchedulingSet.objects.get(name=project.name) except: dummy_scheduling_set_details = {"name": project.name, "description": "Common scheduling set for all scheduling units in this project (created during MoM migration)", "tags": ["migrated_from_MoM", "migration_incomplete"], "generator_doc": {}, "project": project, "generator_template": None} scheduling_set = models.SchedulingSet.objects.create(**dummy_scheduling_set_details) return scheduling_set def get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_template, task_mom2id, max_time_distance=900): """ Returns a scheduling unit for the given subtask mom2id. It is either newly created or an existing scheduling unit of related subtasks. Subtasks are considered related when they are within the same folder and one task does not start more than max_time_distance seconds before of after the other task. # todo: we have groups/topologies as well, need some guidance here... # todo: where to get the specification time from? :returns tuple(scheduling_unit_draft, scheduling_unit_template) """ related_task_details = query_related_tasks_from_momdb(task_mom2id) if related_task_details: for details in related_task_details: if details["mom2id"] in mom2id_to_tmss_representation.keys(): related_task = mom2id_to_tmss_representation[details["mom2id"]] # we kept a blueprint reference if details['startdiff'] and details['enddiff']: time_distance = min(abs(details['startdiff'].total_seconds()), abs(details['enddiff'].total_seconds())) if time_distance < max_time_distance: blueprint = related_task.scheduling_unit_blueprint draft = blueprint.draft logger.info("...using scheduling unit draft_id=%s blueprint_id=%s from related task mom2id=%s for task mom2id=%s" % (draft.id, blueprint.id, details["mom2id"], task_mom2id)) return draft, blueprint else: logger.info("...related task mom2id=%s starts too far apart (seconds=%s threshold=%s)" % (details["mom2id"], time_distance, max_time_distance)) continue else: logger.warning("Cannot compare times, assuming task mom2id=%s is not related to %s" % (task_mom2id, details)) # todo: Investigate... is this because sometimes user sometimes system specified? continue scheduling_unit_draft_details = {"name": 'dummy', "description": "Scheduling unit draft (created during MoM migration for task mom2id=%s)" % task_mom2id, "tags": ["migrated_from_MoM", "migration_incomplete"], "requirements_doc": {}, "scheduling_set": scheduling_set, "requirements_template": scheduling_unit_template # optional: # "copy_reason": models.CopyReason.objects.get(value='template'), # "copies": None, # "generator_instance_doc" : {}, # "scheduling_constraints_doc": {}, # "scheduling_constraints_template": None, # "observation_strategy_template": None, } draft = models.SchedulingUnitDraft.objects.create(**scheduling_unit_draft_details) scheduling_unit_blueprint_details = {"name": 'dummy', "description": "Scheduling unit blueprint (created during MoM migration for task mom2id=%s)" % task_mom2id, "tags": ["migrated_from_MoM", "migration_incomplete"], "requirements_doc": {}, "do_cancel": False, "draft": draft, "requirements_template": scheduling_unit_template} blueprint = models.SchedulingUnitBlueprint.objects.create(**scheduling_unit_blueprint_details) logger.info("Created new scheduling unit draft_id=%s blueprint_id=%s for task mom2id=%s" % (draft.id, blueprint.id, task_mom2id)) return draft, blueprint def get_or_create_task_for_subtask(scheduling_unit_draft, scheduling_unit_blueprint, subtask_mom2id): """ Returns a TMSS task for the given subtask. It is either newly created or an existing task of related subtasks. Subtasks are considered related when they have the same parentid in MoM database. :returns tuple(task_draft, task_blueprint) """ task_details = query_task_details_for_subtask_from_momdb(subtask_mom2id) if task_details: for details in task_details: # there should be exactly one, actually if details["mom2id"] in mom2id_to_tmss_representation.keys(): blueprint = mom2id_to_tmss_representation[details["mom2id"]] draft = blueprint.draft logger.info("...using existing task draft_id=%s blueprint_id=%s for subtask mom2id=%s" % (draft.id, blueprint.id, subtask_mom2id)) return draft, blueprint else: try: return models.TaskTemplate.objects.get(name=details['default_template']) except: task_template = _dummy_task_template(details['template']) task_draft_details = {"name": details["name"], "description": "" if not details['description'] else details['description'], "tags": ["migrated_from_MoM", "migration_incomplete"], "specifications_doc": {}, # "copy_reason": models.CopyReason.objects.get(value='template'), # "copies": None, "scheduling_unit_draft": scheduling_unit_draft, "specifications_template": task_template} task_draft = models.TaskDraft.objects.create(**task_draft_details) task_blueprint_details = {"name": details["name"], "description": "" if not details['description'] else details['description'], "tags": ["migrated_from_MoM", "migration_incomplete"], "specifications_doc": {}, "do_cancel": False, "draft": task_draft, "specifications_template": task_template, "scheduling_unit_blueprint": scheduling_unit_blueprint} task_blueprint = models.TaskBlueprint.objects.create(**task_blueprint_details) mom2id_to_tmss_representation[details["mom2id"]] = task_blueprint logger.info("...created new task draft_id=%s blueprint_id=%s for subtask mom2id=%s" % (task_draft.id, task_blueprint.id, subtask_mom2id)) return task_draft, task_blueprint def _dummy_subtask_template(name): template_name = "%s_dummy" % name try: return models.SubtaskTemplate.objects.get(name=template_name) except: dummy_template_details = {"name": template_name, "description": "Dummy subtask template for MoM migration, when no matching template in TMSS", "version": '1', "schema": {"$id":"http://tmss.lofar.org/api/schemas/subtasktemplate/empty/1#", "$schema": "http://json-schema.org/draft-06/schema#"}, "realtime": False, "queue": False, "tags": ["DUMMY"], "type": models.SubtaskType.objects.get(value='other')} return models.SubtaskTemplate.objects.create(**dummy_template_details) def _dummy_scheduling_unit_template(name): template_name = "%s_dummy" % name try: return models.SchedulingUnitTemplate.objects.get(name=template_name) except: dummy_scheduling_unit_template_details = {"name": template_name, "description": "Dummy scheduling unit template for MoM migration, when no matching template in TMSS", "version": 'v0.314159265359', "schema": {"$id":"http://tmss.lofar.org/api/schemas/schedulingunittemplate/empty/1#", "$schema": "http://json-schema.org/draft-06/schema#"}, "tags": ["DUMMY"]} return models.SchedulingUnitTemplate.objects.create(**dummy_scheduling_unit_template_details) def _dummy_task_template(name): template_name = "%s_dummy" % name try: return models.TaskTemplate.objects.get(name=template_name) except: dummy_task_template_details = {"name": template_name, "description": 'Dummy task template for MoM migration, when no matching template in TMSS', "validation_code_js": "", "version": 'v0.314159265359', "schema": {"$id":"http://tmss.lofar.org/api/schemas/tasktemplate/empty/1#", "$schema": "http://json-schema.org/draft-06/schema#"}, "tags": ["DUMMY"], "type": models.TaskType.objects.get(value='other')} return models.TaskTemplate.objects.create(**dummy_task_template_details) def create_subtask_trees_for_project_in_momdb(project_mom2id, project): """ Migrates all observations and pipelines that belong to the given project as Subtasks to TMSS. This also creates associated Task and SchedulingUnit drafts and blueprints in order to link the subtask to its project. :param project_mom2id: The mom id of the project to migrate :param project: The TMSS project object to refer to """ global stats global mom2id_to_tmss_representation logger.info("Getting subtask details from MoM database") mom_results = query_subtask_details_for_project_from_momdb(project_mom2id) logger.info("There are %s subtasks to migrate in project name=%s" % (len(mom_results), project.name)) for mom_details in mom_results: logger.info("...now migrating subtask mom2id=%s mom2objecttype=%s" % (mom_details['mom2id'], mom_details['mom2objecttype'])) # derive values for TMSS # type and start/end times if 'MEASUREMENT' in mom_details['mom2objecttype']: template_name = mom_details['obs_template'] start_time = mom_details['obs_starttime'] stop_time = mom_details['obs_endtime'] elif 'PIPELINE' in mom_details['mom2objecttype']: template_name = mom_details['template'] start_time = mom_details['starttime'] stop_time = mom_details['endtime'] else: logger.error('Unknown type %(mom2objecttype)s - Skipping subtask mom2id=%(mom2id)s' % mom_details) stats['subtasks_skipped'] += 1 continue # timestamps if start_time is None: start_time = datetime.datetime.utcfromtimestamp(0).isoformat() # not-null constraint if stop_time is None: stop_time = datetime.datetime.utcfromtimestamp(0).isoformat() # not-null constraint # state # todo: check mapping is correct and complete. # This now only includes what I ran into during testing and is mapped to what felt right by my intuition (i.e. probably wrong) # Note: status codes with a verbatim counterpart in TMSS do not need to be mapped here. # Valid TMSS values are: "defining", "defined", "scheduling", "scheduled", "queueing", "queued", "starting", "started", "finishing", "finished", "cancelling", "cancelled", "error" mom_state_to_subtask_state = {"opened": models.SubtaskState.objects.get(value="defining"), "described": models.SubtaskState.objects.get(value="defined"), "suspended": models.SubtaskState.objects.get(value="cancelled"), "prepared": models.SubtaskState.objects.get(value="scheduling"), "aborted": models.SubtaskState.objects.get(value="cancelled"), "hold": models.SubtaskState.objects.get(value="cancelled"), "approved": models.SubtaskState.objects.get(value="queued"), "failed": models.SubtaskState.objects.get(value="error"), "successful": models.SubtaskState.objects.get(value="finished"),} if mom_details['code'] in mom_state_to_subtask_state: state = mom_state_to_subtask_state[mom_details['code']] else: try: state = models.SubtaskState.objects.get(value=mom_details['code']) except: logger.error("No SubtaskState choice matching '%(code)s' in tmss database! - Skipping subtask mom2id=%(mom2id)s" % mom_details) stats['subtasks_skipped'] += 1 continue # template if template_name is not None: try: specifications_template = models.SubtaskTemplate.objects.get(name=template_name) logger.info('...found SubtaskTemplate id=%s for subtask mom2id=%s templatename=%s' % (specifications_template.id, mom_details["mom2id"], template_name)) except: # todo: create a lot of templates to reflect what was used for the actual task? # Then raise Exception once we have proper templates for everything? specifications_template = _dummy_subtask_template(template_name) logger.warning("No SubtaskTemplate matching '%s' in tmss database! Using dummy id=%s instead for subtask mom2id=%s" % (template_name, specifications_template.id, mom_details['mom2id'])) else: logger.error('Missing template name in MoM details! - Skipping subtask mom2id=%(mom2id)s' % mom_details) stats['subtasks_skipped'] += 1 continue # scheduling set scheduling_set = get_or_create_scheduling_set_for_project(project) # scheduling unit template try: scheduling_unit_template = models.SchedulingUnitTemplate.objects.get(name=template_name) logger.info('...found SchedulingUnitTemplate id=%s for subtask mom2id=%s templatename=%s' % (scheduling_unit_template.id, mom_details["mom2id"], template_name)) except: scheduling_unit_template = _dummy_scheduling_unit_template(template_name) logger.warning('No SchedulingUnitTemplate was found for subtask mom2id=%s templatename=%s. Using dummy template id=%s' % (mom_details["mom2id"], template_name, scheduling_unit_template.id)) # scheduling unit draft + blueprint scheduling_unit_draft, scheduling_unit_blueprint = get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_template, mom_details["parent_mom2id"]) # task draft + blueprint task_draft, task_blueprint = get_or_create_task_for_subtask(scheduling_unit_draft, scheduling_unit_blueprint, mom_details["mom2id"]) details = {"id": mom_details['mom2id'], "state": state, "specifications_doc": {}, # todo: where from? We have user_specification_id (for task?) and system_specification_id (for subtask?) on lofar_observation (I guess referring to lofar_observation_specification). Shall we piece things together from that, or is there a text blob to use? Also: pipeline info lives in obs_spec too? "task_blueprint": task_blueprint, "specifications_template": specifications_template, "tags": ["migrated_from_MoM", "migration_incomplete"], # todo: set complete once it is verified that all info is present "priority": project.priority_rank, # todo: correct to derive from project? # optional: "start_time": start_time, "stop_time": stop_time # "created_or_updated_by_user" = None, # "raw_feedback" = None, # "do_cancel": None, #"cluster": None # I guess from lofar_observation.storage_cluster_id } subtask_qs = models.Subtask.objects.filter(id=details["id"]) if subtask_qs.count(): # todo: this will update the subtask, but other TMSS objects do not share id with MoM and get recreated with every migration run. Can we clean this up somehow? subtask_qs.update(**details) subtask = subtask_qs.first() logger.info("...updated existing subtask tmss id=%s" % subtask.id) stats['subtasks_updated'] += 1 else: subtask = models.Subtask.objects.create(**details) logger.info("...created new subtask tmss id=%s" % subtask.id) stats['subtasks_created'] += 1 mom2id_to_tmss_representation[mom_details['mom2id']] = subtask logger.info("...handled %s TMSS objects so far | %s" % (len(mom2id_to_tmss_representation), stats)) def get_or_create_cycle_for_project(project): """ Returns a cycle for a given project. Since cycles don't seem to be a thing in MoM, the cycle is derived from the project name. Returns None, if that fails. """ name = project.name if name.lower().startswith('lc'): cycle_no = re.search(r'\d+', name.lower()).group() cycle_name = "Cycle %02d" % int(cycle_no) try: cycle = models.Cycle.objects.get(name=cycle_name) logger.info("...found existing cycle name=%s for project name=%s" % (cycle.name, project.name)) return cycle except: details = {"name": cycle_name, "description": "Cycle %s (created during MoM migration)" % cycle_no, "tags": ["migrated_from_MoM", "migration_incomplete"], "start": "1970-01-01T00:00:00", # todo, where from? "stop": "1970-01-01T00:00:00", # todo, where from? } cycle = models.Cycle.objects.create(**details) logger.info("...created new cycle name=% for project name=%s" % (cycle.name, project.name)) return cycle logger.warning("Could not determine cycle for project name=%s. Using None." % (project.name)) def main(): """ Migrates data from a MoM database to a TMSS database. Existing objects in TMSS of same name or id are updated, otherwise new objects are created. """ global mom2id_to_tmss_representation global stats # query details of all projects in MoM database project_details = get_project_details_from_momdb() logger.info("There are %s projects to migrate" % len(project_details)) # iterate projects for p_id, p_details in project_details.items(): try: logger.info("Now migrating project mom_name=%s mom2id=%s" % (p_details['name'], p_id)) # create or update project project_qs = models.Project.objects.filter(name=p_details["name"]) if project_qs.count(): project_qs.update(**p_details) project = project_qs.first() logger.info("...updated existing project tmss_name=%s" % project.name) stats["projects_updated"] += 1 else: project = models.Project.objects.create(**p_details) logger.info("...created new project tmss_name=%s" % project.name) stats["projects_created"] += 1 # create all subtasks and related objects for the project create_subtask_trees_for_project_in_momdb(p_id, project) # add project to existing or new cycle cycle = get_or_create_cycle_for_project(project) if cycle: project.cycles.set([cycle]) logger.info("...done migrating project mom_name=%s mom2id=%s tmss_name=%s." % (p_details['name'], p_id, project.name)) except Exception as ex: logger.error(ex, exc_info=True) logger.error("Skipping migration of project mom_name=%s mom2id=%s details=%s." % (p_details['name'], p_id, p_details)) stats["projects_skipped"] += 1 logger.info("Done. Handled %s TMSS objects in total | %s" % (len(mom2id_to_tmss_representation), stats)) if __name__ == "__main__": logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.DEBUG) # Check the invocation arguments parser = OptionParser("%prog [options]", description='run the mom to tmss db conversion script') parser.add_option_group(dbcredentials.options_group(parser)) parser.set_defaults(dbcredentials="MoM") (options, args) = parser.parse_args() global dbcreds dbcreds = dbcredentials.parse_options(options) logger.info("Using MoM dbcreds: %s", dbcreds.stringWithHiddenPassword()) # note: this requires a config file .lofar/dbcredentials/mom.ini, with contents as such (adapt as needed): # # [database:MoM] # type = mysql # host = localhost # port = 3306 # user = momuser # password = mompass # database = lofar_mom_test_tmss from lofar.sas.tmss.tmss import setup_and_check_tmss_django_database_connection_and_exit_on_error setup_and_check_tmss_django_database_connection_and_exit_on_error(options.dbcredentials) main()