Skip to content
Snippets Groups Projects
Commit 327d96f3 authored by Jörn Künsemöller's avatar Jörn Künsemöller
Browse files

TMSS-403: add threshold for time difference for tasks placed in same scheduling unit

parent bf1034e5
No related branches found
No related tags found
1 merge request!257Resolve TMSS-403
...@@ -92,7 +92,12 @@ def query_subtask_details_for_project_from_momdb(project_mom2id): ...@@ -92,7 +92,12 @@ def query_subtask_details_for_project_from_momdb(project_mom2id):
logger.info("...querying MoM database for subtasks of project %s" % project_mom2id) logger.info("...querying MoM database for subtasks of project %s" % project_mom2id)
# todo: double-check the correct use of ids. What refers to a mom2id and what refers to a database entry pk does not seem systematic and is very comfusing. # todo: double-check the correct use of ids. What refers to a mom2id and what refers to a database entry pk does not seem systematic and is very comfusing.
# todo: clarify: Measurements correspond to subtask and Observations correspond to task level? # todo: clarify: Measurements correspond to subtask and Observations correspond to task level?
# todo: clarify: Is there info on template/start/stop on Measurement level somewhere? Using the parent obs now. # We have lofar_observation and lofar_pipeline tables, but lofar_observation seems to refer to a 'task level mom2object'
# with measurement and pipeline children, where the latter are further described on the lofar_pipeline table.
# So I'm assuming that type '%%MEASUREMENT%%' here correspond to observation subtasks, which also means that some
# info is apparently squashed together with that of other subtasks of the same task in the lofar_observation entry
# of the parent/'task'.
# todo: clarify: Is there info on template/start/stop on Measurement level somewhere? Using the parent task/observation now.
query = '''SELECT mom2object.mom2id, mom2object.name, mom2object.description, mom2object.mom2objecttype, status.code, query = '''SELECT mom2object.mom2id, mom2object.name, mom2object.description, mom2object.mom2objecttype, status.code,
lofar_pipeline.template AS template, lofar_observation.default_template as obs_template, lofar_pipeline.starttime, lofar_pipeline.endtime, lofar_pipeline.template AS template, lofar_observation.default_template as obs_template, lofar_pipeline.starttime, lofar_pipeline.endtime,
lofar_observation_specification.starttime AS obs_starttime, lofar_observation_specification.endtime AS obs_endtime, lofar_observation_specification.starttime AS obs_starttime, lofar_observation_specification.endtime AS obs_endtime,
...@@ -148,17 +153,20 @@ def query_related_tasks_from_momdb(task_mom2id): ...@@ -148,17 +153,20 @@ def query_related_tasks_from_momdb(task_mom2id):
:return: :return:
""" """
logger.info("...querying MoM database for tasks related to task mom2id=%s" % task_mom2id) logger.info("...querying MoM database for tasks related to task mom2id=%s" % task_mom2id)
query = '''SELECT related_mom2object.mom2id, related_mom2object.name, related_mom2object.description, related_mom2object.mom2objecttype, status.code, query = '''SELECT TIMEDIFF(related_lofar_observation_specification.starttime, lofar_observation_specification.endtime) AS startdiff,
lofar_observation.template, lofar_observation_specification.starttime AS starttime, lofar_observation_specification.endtime AS endtime TIMEDIFF(lofar_observation_specification.starttime, related_lofar_observation_specification.endtime) AS enddiff,
related_mom2object.mom2id, related_mom2object.name, related_mom2object.description, related_mom2object.mom2objecttype, status.code,
related_lofar_observation.template, related_lofar_observation_specification.starttime AS starttime, related_lofar_observation_specification.endtime AS endtime
FROM mom2object FROM mom2object
INNER JOIN mom2object AS related_mom2object ON mom2object.parentid = related_mom2object.parentid INNER JOIN mom2object AS related_mom2object ON mom2object.parentid = related_mom2object.parentid
INNER JOIN mom2objectstatus ON related_mom2object.currentstatusid = mom2objectstatus.id INNER JOIN mom2objectstatus ON related_mom2object.currentstatusid = mom2objectstatus.id
INNER JOIN status ON mom2objectstatus.statusid = status.id INNER JOIN status ON mom2objectstatus.statusid = status.id
LEFT JOIN lofar_observation ON related_mom2object.id = lofar_observation.mom2objectid LEFT JOIN lofar_observation AS related_lofar_observation ON related_mom2object.id = related_lofar_observation.mom2objectid
LEFT JOIN lofar_observation_specification AS related_lofar_observation_specification ON related_lofar_observation.user_specification_id = related_lofar_observation_specification.id
LEFT JOIN lofar_observation ON mom2object.id = lofar_observation.mom2objectid
LEFT JOIN lofar_observation_specification ON lofar_observation.user_specification_id = lofar_observation_specification.id LEFT JOIN lofar_observation_specification ON lofar_observation.user_specification_id = lofar_observation_specification.id
WHERE mom2object.mom2id = %s WHERE mom2object.mom2id = %s
''' '''
#AND parent_mom2object.mom2objecttype LIKE '%%FOLDER%%'
parameters = (task_mom2id,) parameters = (task_mom2id,)
...@@ -231,11 +239,11 @@ def get_or_create_scheduling_set_for_project(project): ...@@ -231,11 +239,11 @@ def get_or_create_scheduling_set_for_project(project):
return scheduling_set return scheduling_set
def get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_template, task_mom2id, max_time_distance=300): def get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_template, task_mom2id, max_time_distance=900):
""" """
Returns a scheduling unit for the given subtask mom2id. It is either newly created or an existing scheduling unit Returns a scheduling unit for the given subtask mom2id. It is either newly created or an existing scheduling unit
of related subtasks. Subtasks are considered related when they are within the same folder and were specified no of related subtasks. Subtasks are considered related when they are within the same folder and one task does not start
longer than max_time_distance seconds apart. more than max_time_distance seconds before of after the other task.
# todo: we have groups/topologies as well, need some guidance here... # todo: we have groups/topologies as well, need some guidance here...
# todo: where to get the specification time from? # todo: where to get the specification time from?
...@@ -248,11 +256,19 @@ def get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_te ...@@ -248,11 +256,19 @@ def get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_te
for details in related_task_details: for details in related_task_details:
if details["mom2id"] in mom2id_to_tmss_representation.keys(): if details["mom2id"] in mom2id_to_tmss_representation.keys():
related_task = mom2id_to_tmss_representation[details["mom2id"]] # we kept a blueprint reference related_task = mom2id_to_tmss_representation[details["mom2id"]] # we kept a blueprint reference
# todo: check time distance if details['startdiff'] and details['enddiff']:
blueprint = related_task.scheduling_unit_blueprint time_distance = min(abs(details['startdiff'].total_seconds()), abs(details['enddiff'].total_seconds()))
draft = blueprint.draft if time_distance < max_time_distance:
logger.info("Using scheduling unit draft_id=%s blueprint_id=%s from related task mom2id=%s for task mom2id=%s" % (draft.id, blueprint.id, details["mom2id"], task_mom2id)) blueprint = related_task.scheduling_unit_blueprint
return draft, blueprint draft = blueprint.draft
logger.info("...using scheduling unit draft_id=%s blueprint_id=%s from related task mom2id=%s for task mom2id=%s" % (draft.id, blueprint.id, details["mom2id"], task_mom2id))
return draft, blueprint
else:
logger.info("...related task mom2id=%s starts too far apart (seconds=%s threshold=%s)" % (details["mom2id"], time_distance, max_time_distance))
continue
else:
logger.warning("Cannot compare times, assuming task mom2id=%s is not related to %s" % (task_mom2id, details)) # todo: Investigate... is this because sometimes user sometimes system specified?
continue
scheduling_unit_draft_details = {"name": 'dummy', scheduling_unit_draft_details = {"name": 'dummy',
"description": "Scheduling unit draft (created during MoM migration for task mom2id=%s)" % task_mom2id, "description": "Scheduling unit draft (created during MoM migration for task mom2id=%s)" % task_mom2id,
...@@ -339,7 +355,7 @@ def _dummy_subtask_template(name): ...@@ -339,7 +355,7 @@ def _dummy_subtask_template(name):
return models.SubtaskTemplate.objects.get(name=template_name) return models.SubtaskTemplate.objects.get(name=template_name)
except: except:
dummy_template_details = {"name": template_name, dummy_template_details = {"name": template_name,
"description": "Dummy template for MoM migration, when no matching template in TMSS", "description": "Dummy subtask template for MoM migration, when no matching template in TMSS",
"version": '1', "version": '1',
"schema": {"$id":"http://tmss.lofar.org/api/schemas/subtasktemplate/empty/1#", "schema": {"$id":"http://tmss.lofar.org/api/schemas/subtasktemplate/empty/1#",
"$schema": "http://json-schema.org/draft-06/schema#"}, "$schema": "http://json-schema.org/draft-06/schema#"},
...@@ -357,7 +373,7 @@ def _dummy_scheduling_unit_template(name): ...@@ -357,7 +373,7 @@ def _dummy_scheduling_unit_template(name):
return models.SchedulingUnitTemplate.objects.get(name=template_name) return models.SchedulingUnitTemplate.objects.get(name=template_name)
except: except:
dummy_scheduling_unit_template_details = {"name": template_name, dummy_scheduling_unit_template_details = {"name": template_name,
"description": "Dummy template for MoM migration, when no matching template in TMSS", "description": "Dummy scheduling unit template for MoM migration, when no matching template in TMSS",
"version": 'v0.314159265359', "version": 'v0.314159265359',
"schema": {"$id":"http://tmss.lofar.org/api/schemas/schedulingunittemplate/empty/1#", "schema": {"$id":"http://tmss.lofar.org/api/schemas/schedulingunittemplate/empty/1#",
"$schema": "http://json-schema.org/draft-06/schema#"}, "$schema": "http://json-schema.org/draft-06/schema#"},
...@@ -458,31 +474,28 @@ def create_subtask_trees_for_project_in_momdb(project_mom2id, project): ...@@ -458,31 +474,28 @@ def create_subtask_trees_for_project_in_momdb(project_mom2id, project):
if template_name is not None: if template_name is not None:
try: try:
specifications_template = models.SubtaskTemplate.objects.get(name=template_name) specifications_template = models.SubtaskTemplate.objects.get(name=template_name)
logger.info('...found SubtaskTemplate id=%s for subtask mom2id=%s templatename=%s' % (specifications_template.id, mom_details["mom2id"], template_name))
except: except:
logger.warning("No SubtaskTemplate matching '%s' in tmss database! Using dummy instead! mom2id=%s" % (template_name, mom_details['mom2id']))
# todo: create a lot of templates to reflect what was used for the actual task? # todo: create a lot of templates to reflect what was used for the actual task?
# todo: raise Exception (or continue) once we have proper templates for everything. # Then raise Exception once we have proper templates for everything?
specifications_template = _dummy_subtask_template(template_name) specifications_template = _dummy_subtask_template(template_name)
logger.warning("No SubtaskTemplate matching '%s' in tmss database! Using dummy id=%s instead for subtask mom2id=%s" % (template_name, specifications_template.id, mom_details['mom2id']))
else: else:
logger.error('Missing template name in MoM details! - Skipping subtask mom2id=%(mom2id)s' % mom_details) logger.error('Missing template name in MoM details! - Skipping subtask mom2id=%(mom2id)s' % mom_details)
stats['subtasks_skipped'] += 1 stats['subtasks_skipped'] += 1
continue continue
# ----------------
# the following entries are needed to relate a task to it's project.
# scheduling set # scheduling set
scheduling_set = get_or_create_scheduling_set_for_project(project) scheduling_set = get_or_create_scheduling_set_for_project(project)
# scheduling unit template # scheduling unit template
try: try:
scheduling_unit_template = models.SchedulingUnitTemplate.objects.get(name=template_name) scheduling_unit_template = models.SchedulingUnitTemplate.objects.get(name=template_name)
logger.info('...found scheduling unit template id=%s for subtask mom2id=%s templatename=%s' % (scheduling_unit_template.id, mom_details["mom2id"], template_name)) logger.info('...found SchedulingUnitTemplate id=%s for subtask mom2id=%s templatename=%s' % (scheduling_unit_template.id, mom_details["mom2id"], template_name))
except: except:
scheduling_unit_template = _dummy_scheduling_unit_template(template_name) scheduling_unit_template = _dummy_scheduling_unit_template(template_name)
logger.warning('No scheduling unit template was found for subtask mom2id=%s templatename=%s. Using dummy template id=%s' % (mom_details["mom2id"], template_name, scheduling_unit_template.id)) logger.warning('No SchedulingUnitTemplate was found for subtask mom2id=%s templatename=%s. Using dummy template id=%s' % (mom_details["mom2id"], template_name, scheduling_unit_template.id))
# scheduling unit draft + blueprint # scheduling unit draft + blueprint
scheduling_unit_draft, scheduling_unit_blueprint = get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_template, mom_details["parent_mom2id"]) scheduling_unit_draft, scheduling_unit_blueprint = get_or_create_scheduling_unit_for_subtask(scheduling_set, scheduling_unit_template, mom_details["parent_mom2id"])
...@@ -490,12 +503,9 @@ def create_subtask_trees_for_project_in_momdb(project_mom2id, project): ...@@ -490,12 +503,9 @@ def create_subtask_trees_for_project_in_momdb(project_mom2id, project):
# task draft + blueprint # task draft + blueprint
task_draft, task_blueprint = get_or_create_task_for_subtask(scheduling_unit_draft, scheduling_unit_blueprint, mom_details["mom2id"]) task_draft, task_blueprint = get_or_create_task_for_subtask(scheduling_unit_draft, scheduling_unit_blueprint, mom_details["mom2id"])
# ----------------
details = {"id": mom_details['mom2id'], details = {"id": mom_details['mom2id'],
"state": state, "state": state,
"specifications_doc": {}, "specifications_doc": {}, # todo: where from? We have user_specification_id (for task?) and system_specification_id (for subtask?) on lofar_observation (I guess referring to lofar_observation_specification). Shall we piece things together from that, or is there a text blob to use? Also: pipeline info lives in obs_spec too?
"task_blueprint": task_blueprint, "task_blueprint": task_blueprint,
"specifications_template": specifications_template, "specifications_template": specifications_template,
"tags": ["migrated_from_MoM", "migration_incomplete"], # todo: set complete once it is verified that all info is present "tags": ["migrated_from_MoM", "migration_incomplete"], # todo: set complete once it is verified that all info is present
...@@ -503,11 +513,11 @@ def create_subtask_trees_for_project_in_momdb(project_mom2id, project): ...@@ -503,11 +513,11 @@ def create_subtask_trees_for_project_in_momdb(project_mom2id, project):
# optional: # optional:
"start_time": start_time, "start_time": start_time,
"stop_time": stop_time, "stop_time": stop_time,
"schedule_method": models.ScheduleMethod.objects.get(value="manual"), # todo: correct? "schedule_method": models.ScheduleMethod.objects.get(value="manual"), # todo: correct? Or leave None?
# "created_or_updated_by_user" = None, # "created_or_updated_by_user" = None,
# "raw_feedback" = None, # "raw_feedback" = None,
# "do_cancel": None, # "do_cancel": None,
"cluster": None # set below #"cluster": None # I guess from lofar_observation.storage_cluster_id
} }
subtask_qs = models.Subtask.objects.filter(id=details["id"]) subtask_qs = models.Subtask.objects.filter(id=details["id"])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment