Skip to content
Snippets Groups Projects
Commit c0c49f15 authored by Jörn Künsemöller's avatar Jörn Künsemöller
Browse files

TMSS-662: also provide data based on affected SU on sky duration (next to event duration)

parent e3747ca6
No related branches found
No related tags found
1 merge request!1247TMSS-662: also provide data based on affected SU on sky duration (next to event duration)
...@@ -679,17 +679,45 @@ class FailureReport(): ...@@ -679,17 +679,45 @@ class FailureReport():
for field_name in ['issue_type', 'issue_subtype']: for field_name in ['issue_type', 'issue_subtype']:
group_aggregates = list(self.system_events.values(f'{field_name}__value') group_aggregates = list(self.system_events.values(f'{field_name}__value')
.annotate(count=Count(f'{field_name}__value'), .annotate(count=Count(f'{field_name}__value'),
total_duration=Sum(Coalesce(F('stop'), datetime.utcnow()) - F('start')))) duration_lost_event=Sum(Coalesce(F('stop'), datetime.utcnow()) - F('start'))))
# convert durations to seconds and add derived values
for d in group_aggregates: for d in group_aggregates:
d.update({'total_duration': d['total_duration'].total_seconds(), # determine total duration of affected scheduling units
'percent_of_wall_time': 100 * d['total_duration'].total_seconds() / wall_time}) system_events = self.system_events.filter(**{f'{field_name}__value': d[f'{field_name}__value']})
affected_tasks = models.TaskBlueprint.objects.filter(system_events__in=system_events)
affected_units = (models.SchedulingUnitBlueprint.objects.filter(task_blueprints__in=affected_tasks))
total_on_sky_duration_lost = affected_units.aggregate(Sum(F('on_sky_duration')))['on_sky_duration__sum']
# convert durations to seconds and add derived values
d.update({'duration_lost_event': d['duration_lost_event'].total_seconds(),
'duration_lost_on_sky': total_on_sky_duration_lost.total_seconds() if total_on_sky_duration_lost else 0,
'percent_of_wall_time_lost_event': 100 * d['duration_lost_event'].total_seconds() / wall_time,
'percent_of_wall_time_lost_on_sky': total_on_sky_duration_lost.total_seconds() / wall_time})
aggregates[f'by_{field_name}'] = group_aggregates aggregates[f'by_{field_name}'] = group_aggregates
return aggregates return aggregates
def _get_lost_observing_time_histogram(self) -> {}: def _get_lost_observing_time_histogram(self) -> {}:
"""
Help method to get lost observing time over entire lifetime (not restricted to report period)
Note: This is using on sky times of scheduling unit that are effected by system events of severity 'failure'.
"""
histogram = {}
# do not use self.system_events here because we want to ignore the reporting period here
system_events = models.SystemEvent.objects.filter(severity__value=models.SystemEventSeverity.Choices.FAILURE.value)
affected_tasks = models.TaskBlueprint.objects.filter(system_events__in=system_events)
affected_units = (models.SchedulingUnitBlueprint.objects.filter(task_blueprints__in=affected_tasks))
for unit in affected_units.all():
start_bin = datetime(year=unit.on_sky_start_time.year, month=unit.on_sky_start_time.month, day=1)
histogram[start_bin] = histogram.get(start_bin, 0) + (unit.on_sky_duration.total_seconds())
# turn datetimes to serializable format and return
histogram = {k.date().isoformat():v for k,v in histogram.items()}
return histogram
def _get_system_event_time_histogram(self) -> {}:
""" """
Help method to get lost observing time over entire lifetime (not restricted to report period) Help method to get lost observing time over entire lifetime (not restricted to report period)
Note: This is based on reported system event start and stop times, not based on the affected observations. Note: This is based on reported system event start and stop times, not based on the affected observations.
...@@ -721,17 +749,6 @@ class FailureReport(): ...@@ -721,17 +749,6 @@ class FailureReport():
# ...account for partial month at stop # ...account for partial month at stop
histogram[stop_bin] = histogram.get(stop_bin, 0) + (event_stop - stop_bin).total_seconds() histogram[stop_bin] = histogram.get(stop_bin, 0) + (event_stop - stop_bin).total_seconds()
# todo: Or should this be based on the time that was actually lost in SUs?
# This would do the trick (SU start time determines bin these fall into):
# system_events = models.SystemEvent.objects.filter(severity__value=models.SystemEventSeverity.Choices.FAILURE.value)
# affected_tasks = models.TaskBlueprint.objects.filter(system_events__in=system_events)
# affected_units = (models.SchedulingUnitBlueprint.objects.filter(task_blueprints__in=affected_tasks))
#
# for unit in affected_units.all():
# start_bin = datetime(year=unit.on_sky_start_time.year, month=unit.on_sky_start_time.month, day=1)
# histogram[start_bin] = histogram.get(start_bin, 0) + (unit.on_sky_duration.total_seconds())
# ----
# turn datetimes to serializable format and return # turn datetimes to serializable format and return
histogram = {k.date().isoformat():v for k,v in histogram.items()} histogram = {k.date().isoformat():v for k,v in histogram.items()}
return histogram return histogram
...@@ -764,10 +781,13 @@ class FailureReport(): ...@@ -764,10 +781,13 @@ class FailureReport():
Create a failure report as a JSON object. Create a failure report as a JSON object.
""" """
result = {'system_event_summary': self._get_system_event_summary(), result = {'system_event_summary': self._get_system_event_summary(),
'lost_observing_time_histogram': self._get_lost_observing_time_histogram(), 'lost_time_histogram_on_sky': self._get_lost_observing_time_histogram(),
'lost_time_histogram_event': self._get_system_event_time_histogram(),
'failed_scheduling_units': self._get_failed_scheduling_units()} 'failed_scheduling_units': self._get_failed_scheduling_units()}
result['total_duration_lost'] = sum([d['total_duration'] for d in result['system_event_summary']['by_issue_type']]) result['total_duration_lost_event'] = sum([d['duration_lost_event'] for d in result['system_event_summary']['by_issue_type']])
result['total_percent_of_wall_time_lost'] = sum([d['percent_of_wall_time'] for d in result['system_event_summary']['by_issue_type']]) result['total_duration_lost_on_sky'] = sum([d['duration_lost_on_sky'] for d in result['system_event_summary']['by_issue_type']])
result['total_percent_of_wall_time_lost_event'] = sum([d['percent_of_wall_time_lost_event'] for d in result['system_event_summary']['by_issue_type']])
result['total_percent_of_wall_time_lost_on_sky'] = sum([d['percent_of_wall_time_lost_on_sky'] for d in result['system_event_summary']['by_issue_type']])
result['reporting_period_wall_time'] = (self.stop - self.start).total_seconds() result['reporting_period_wall_time'] = (self.stop - self.start).total_seconds()
result['report_start'] = self.start.isoformat() result['report_start'] = self.start.isoformat()
result['report_stop'] = self.stop.isoformat() result['report_stop'] = self.stop.isoformat()
......
...@@ -645,9 +645,11 @@ class FailureReportTest(unittest.TestCase): ...@@ -645,9 +645,11 @@ class FailureReportTest(unittest.TestCase):
# assertions, SUs 1 and 2 should be in here, SU 3 should be ignored due to lack of severity # assertions, SUs 1 and 2 should be in here, SU 3 should be ignored due to lack of severity
# check grand total # check grand total
self.assertIn('total_duration_lost', report) self.assertIn('total_duration_lost_event', report)
self.assertEqual(report['total_duration_lost'], 21600) # 2 failure events of 3 hours each self.assertEqual(report['total_duration_lost_event'], 21600) # 2 failure events of 3 hours each
self.assertAlmostEqual(report['total_percent_of_wall_time_lost'], 0.0684931506849315) self.assertAlmostEqual(report['total_percent_of_wall_time_lost_event'], 0.0684931506849315)
self.assertEqual(report['total_duration_lost_on_sky'], 9999) # 2 units: 6666 + 3333
self.assertAlmostEqual(report['total_percent_of_wall_time_lost_on_sky'], 0.0003170662100456621)
# check aggregates # check aggregates
self.assertIn('human', str(report['system_event_summary']['by_issue_type'])) # event 1 self.assertIn('human', str(report['system_event_summary']['by_issue_type'])) # event 1
...@@ -656,12 +658,16 @@ class FailureReportTest(unittest.TestCase): ...@@ -656,12 +658,16 @@ class FailureReportTest(unittest.TestCase):
self.assertNotIn('noisy', str(report['system_event_summary']['by_issue_subtype'])) # event 3 missing self.assertNotIn('noisy', str(report['system_event_summary']['by_issue_subtype'])) # event 3 missing
# check histogram (events get binned correctly?) # check histogram (events get binned correctly?)
self.assertEqual(report['lost_observing_time_histogram']['2023-01-01'], 3600) # event 1 self.assertEqual(report['lost_time_histogram_event']['2023-01-01'], 3600) # event 1
self.assertEqual(report['lost_observing_time_histogram']['2023-02-01'], 7200) # event 1 self.assertEqual(report['lost_time_histogram_event']['2023-02-01'], 7200) # event 1
self.assertEqual(report['lost_observing_time_histogram']['2023-03-01'], 3600) # event 2 self.assertEqual(report['lost_time_histogram_event']['2023-03-01'], 3600) # event 2
self.assertEqual(report['lost_observing_time_histogram']['2023-04-01'], 7200) # event 2 self.assertEqual(report['lost_time_histogram_event']['2023-04-01'], 7200) # event 2
self.assertNotIn('2023-05-01', report['lost_observing_time_histogram']) # event 3 missing self.assertEqual(report['lost_time_histogram_on_sky']['2023-01-01'], 6666) # event 1
self.assertNotIn('2023-06-01', report['lost_observing_time_histogram']) # event 3 missing self.assertEqual(report['lost_time_histogram_on_sky']['2023-03-01'], 3333) # event 2
self.assertNotIn('2023-05-01', report['lost_time_histogram_event']) # event 3 missing
self.assertNotIn('2023-06-01', report['lost_time_histogram_event']) # event 3 missing
self.assertNotIn('2023-05-01', report['lost_time_histogram_on_sky']) # event 3 missing
# check SUs # check SUs
self.assertEqual(len(report['failed_scheduling_units']), 2) self.assertEqual(len(report['failed_scheduling_units']), 2)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment