diff --git a/SAS/TMSS/src/migrate_momdb_to_tmss.py b/SAS/TMSS/src/migrate_momdb_to_tmss.py index 64391e074443224887bc14048c1cf5dadfa32a35..13efa43bbc7759f453875c51cdbfb3f9b5734fb9 100755 --- a/SAS/TMSS/src/migrate_momdb_to_tmss.py +++ b/SAS/TMSS/src/migrate_momdb_to_tmss.py @@ -92,7 +92,12 @@ def query_subtask_details_for_project_from_momdb(project_mom2id): logger.info("...querying MoM database for subtasks of project %s" % project_mom2id) # todo: double-check the correct use of ids. What refers to a mom2id and what refers to a database entry pk does not seem systematic and is very comfusing. # todo: clarify: Measurements correspond to subtask and Observations correspond to task level? - # todo: clarify: Is there info on template/start/stop on Measurement level somewhere? Using the parent obs now. + # We have lofar_observation and lofar_pipeline tables, but lofar_observation seems to refer to a 'task level mom2object' + # with measurement and pipeline children, where the latter are further described on the lofar_pipeline table. + # So I'm assuming that type '%%MEASUREMENT%%' here correspond to observation subtasks, which also means that some + # info is apparently squashed together with that of other subtasks of the same task in the lofar_observation entry + # of the parent/'task'. + # todo: clarify: Is there info on template/start/stop on Measurement level somewhere? Using the parent task/observation now. query = '''SELECT mom2object.mom2id, mom2object.name, mom2object.description, mom2object.mom2objecttype, status.code, lofar_pipeline.template AS template, lofar_observation.default_template as obs_template, lofar_pipeline.starttime, lofar_pipeline.endtime, lofar_observation_specification.starttime AS obs_starttime, lofar_observation_specification.endtime AS obs_endtime, @@ -148,17 +153,20 @@ def query_related_tasks_from_momdb(task_mom2id): :return: """ logger.info("...querying MoM database for tasks related to task mom2id=%s" % task_mom2id) - query = '''SELECT related_mom2object.mom2id, related_mom2object.name, related_mom2object.description, related_mom2object.mom2objecttype, status.code, - lofar_observation.template, lofar_observation_specification.starttime AS starttime, lofar_observation_specification.endtime AS endtime + query = '''SELECT TIMEDIFF(related_lofar_observation_specification.starttime, lofar_observation_specification.endtime) AS startdiff, + TIMEDIFF(lofar_observation_specification.starttime, related_lofar_observation_specification.endtime) AS enddiff, + related_mom2object.mom2id, related_mom2object.name, related_mom2object.description, related_mom2object.mom2objecttype, status.code, + related_lofar_observation.template, related_lofar_observation_specification.starttime AS starttime, related_lofar_observation_specification.endtime AS endtime FROM mom2object INNER JOIN mom2object AS related_mom2object ON mom2object.parentid = related_mom2object.parentid INNER JOIN mom2objectstatus ON related_mom2object.currentstatusid = mom2objectstatus.id INNER JOIN status ON mom2objectstatus.statusid = status.id - LEFT JOIN lofar_observation ON related_mom2object.id = lofar_observation.mom2objectid + LEFT JOIN lofar_observation AS related_lofar_observation ON related_mom2object.id = related_lofar_observation.mom2objectid + LEFT JOIN lofar_observation_specification AS related_lofar_observation_specification ON related_lofar_observation.user_specification_id = related_lofar_observation_specification.id + LEFT JOIN lofar_observation ON mom2object.id = lofar_observation.mom2objectid LEFT JOIN lofar_observation_specification ON lofar_observation.user_specification_id = lofar_observation_specification.id - WHERE mom2object.mom2id = %s + WHERE mom2object.mom2id = %s ''' - #AND parent_mom2object.mom2objecttype LIKE '%%FOLDER%%' parameters = (task_mom2id,) @@ -231,11 +239,11 @@ def get_or_create_scheduling_set_for_project(project): return scheduling_set -def get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_template, task_mom2id, max_time_distance=300): +def get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_template, task_mom2id, max_time_distance=900): """ Returns a scheduling unit for the given subtask mom2id. It is either newly created or an existing scheduling unit - of related subtasks. Subtasks are considered related when they are within the same folder and were specified no - longer than max_time_distance seconds apart. + of related subtasks. Subtasks are considered related when they are within the same folder and one task does not start + more than max_time_distance seconds before of after the other task. # todo: we have groups/topologies as well, need some guidance here... # todo: where to get the specification time from? @@ -248,11 +256,19 @@ def get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_te for details in related_task_details: if details["mom2id"] in mom2id_to_tmss_representation.keys(): related_task = mom2id_to_tmss_representation[details["mom2id"]] # we kept a blueprint reference - # todo: check time distance - blueprint = related_task.scheduling_unit_blueprint - draft = blueprint.draft - logger.info("Using scheduling unit draft_id=%s blueprint_id=%s from related task mom2id=%s for task mom2id=%s" % (draft.id, blueprint.id, details["mom2id"], task_mom2id)) - return draft, blueprint + if details['startdiff'] and details['enddiff']: + time_distance = min(abs(details['startdiff'].total_seconds()), abs(details['enddiff'].total_seconds())) + if time_distance < max_time_distance: + blueprint = related_task.scheduling_unit_blueprint + draft = blueprint.draft + logger.info("...using scheduling unit draft_id=%s blueprint_id=%s from related task mom2id=%s for task mom2id=%s" % (draft.id, blueprint.id, details["mom2id"], task_mom2id)) + return draft, blueprint + else: + logger.info("...related task mom2id=%s starts too far apart (seconds=%s threshold=%s)" % (details["mom2id"], time_distance, max_time_distance)) + continue + else: + logger.warning("Cannot compare times, assuming task mom2id=%s is not related to %s" % (task_mom2id, details)) # todo: Investigate... is this because sometimes user sometimes system specified? + continue scheduling_unit_draft_details = {"name": 'dummy', "description": "Scheduling unit draft (created during MoM migration for task mom2id=%s)" % task_mom2id, @@ -339,7 +355,7 @@ def _dummy_subtask_template(name): return models.SubtaskTemplate.objects.get(name=template_name) except: dummy_template_details = {"name": template_name, - "description": "Dummy template for MoM migration, when no matching template in TMSS", + "description": "Dummy subtask template for MoM migration, when no matching template in TMSS", "version": '1', "schema": {"$id":"http://tmss.lofar.org/api/schemas/subtasktemplate/empty/1#", "$schema": "http://json-schema.org/draft-06/schema#"}, @@ -357,7 +373,7 @@ def _dummy_scheduling_unit_template(name): return models.SchedulingUnitTemplate.objects.get(name=template_name) except: dummy_scheduling_unit_template_details = {"name": template_name, - "description": "Dummy template for MoM migration, when no matching template in TMSS", + "description": "Dummy scheduling unit template for MoM migration, when no matching template in TMSS", "version": 'v0.314159265359', "schema": {"$id":"http://tmss.lofar.org/api/schemas/schedulingunittemplate/empty/1#", "$schema": "http://json-schema.org/draft-06/schema#"}, @@ -458,31 +474,28 @@ def create_subtask_trees_for_project_in_momdb(project_mom2id, project): if template_name is not None: try: specifications_template = models.SubtaskTemplate.objects.get(name=template_name) + logger.info('...found SubtaskTemplate id=%s for subtask mom2id=%s templatename=%s' % (specifications_template.id, mom_details["mom2id"], template_name)) except: - logger.warning("No SubtaskTemplate matching '%s' in tmss database! Using dummy instead! mom2id=%s" % (template_name, mom_details['mom2id'])) - # todo: create a lot of templates to reflect what was used for the actual task? - # todo: raise Exception (or continue) once we have proper templates for everything. + # Then raise Exception once we have proper templates for everything? specifications_template = _dummy_subtask_template(template_name) + logger.warning("No SubtaskTemplate matching '%s' in tmss database! Using dummy id=%s instead for subtask mom2id=%s" % (template_name, specifications_template.id, mom_details['mom2id'])) else: logger.error('Missing template name in MoM details! - Skipping subtask mom2id=%(mom2id)s' % mom_details) stats['subtasks_skipped'] += 1 continue - # ---------------- - # the following entries are needed to relate a task to it's project. - # scheduling set scheduling_set = get_or_create_scheduling_set_for_project(project) # scheduling unit template try: scheduling_unit_template = models.SchedulingUnitTemplate.objects.get(name=template_name) - logger.info('...found scheduling unit template id=%s for subtask mom2id=%s templatename=%s' % (scheduling_unit_template.id, mom_details["mom2id"], template_name)) + logger.info('...found SchedulingUnitTemplate id=%s for subtask mom2id=%s templatename=%s' % (scheduling_unit_template.id, mom_details["mom2id"], template_name)) except: scheduling_unit_template = _dummy_scheduling_unit_template(template_name) - logger.warning('No scheduling unit template was found for subtask mom2id=%s templatename=%s. Using dummy template id=%s' % (mom_details["mom2id"], template_name, scheduling_unit_template.id)) + logger.warning('No SchedulingUnitTemplate was found for subtask mom2id=%s templatename=%s. Using dummy template id=%s' % (mom_details["mom2id"], template_name, scheduling_unit_template.id)) # scheduling unit draft + blueprint scheduling_unit_draft, scheduling_unit_blueprint = get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_template, mom_details["parent_mom2id"]) @@ -490,12 +503,9 @@ def create_subtask_trees_for_project_in_momdb(project_mom2id, project): # task draft + blueprint task_draft, task_blueprint = get_or_create_task_for_subtask(scheduling_unit_draft, scheduling_unit_blueprint, mom_details["mom2id"]) - # ---------------- - - details = {"id": mom_details['mom2id'], "state": state, - "specifications_doc": {}, + "specifications_doc": {}, # todo: where from? We have user_specification_id (for task?) and system_specification_id (for subtask?) on lofar_observation (I guess referring to lofar_observation_specification). Shall we piece things together from that, or is there a text blob to use? Also: pipeline info lives in obs_spec too? "task_blueprint": task_blueprint, "specifications_template": specifications_template, "tags": ["migrated_from_MoM", "migration_incomplete"], # todo: set complete once it is verified that all info is present @@ -503,11 +513,11 @@ def create_subtask_trees_for_project_in_momdb(project_mom2id, project): # optional: "start_time": start_time, "stop_time": stop_time, - "schedule_method": models.ScheduleMethod.objects.get(value="manual"), # todo: correct? + "schedule_method": models.ScheduleMethod.objects.get(value="manual"), # todo: correct? Or leave None? # "created_or_updated_by_user" = None, # "raw_feedback" = None, # "do_cancel": None, - "cluster": None # set below + #"cluster": None # I guess from lofar_observation.storage_cluster_id } subtask_qs = models.Subtask.objects.filter(id=details["id"])