From c0c49f158217c73544373cbe5cad182ef1afb84e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20K=C3=BCnsem=C3=B6ller?= <jkuensem@physik.uni-bielefeld.de> Date: Fri, 8 Dec 2023 17:02:31 +0100 Subject: [PATCH] TMSS-662: also provide data based on affected SU on sky duration (next to event duration) --- .../src/tmss/tmssapp/adapters/reports.py | 56 +++++++++++++------ SAS/TMSS/backend/test/t_reports.py | 24 +++++--- 2 files changed, 53 insertions(+), 27 deletions(-) diff --git a/SAS/TMSS/backend/src/tmss/tmssapp/adapters/reports.py b/SAS/TMSS/backend/src/tmss/tmssapp/adapters/reports.py index 8df42757e9a..79bbd92d647 100644 --- a/SAS/TMSS/backend/src/tmss/tmssapp/adapters/reports.py +++ b/SAS/TMSS/backend/src/tmss/tmssapp/adapters/reports.py @@ -679,17 +679,45 @@ class FailureReport(): for field_name in ['issue_type', 'issue_subtype']: group_aggregates = list(self.system_events.values(f'{field_name}__value') .annotate(count=Count(f'{field_name}__value'), - total_duration=Sum(Coalesce(F('stop'), datetime.utcnow()) - F('start')))) + duration_lost_event=Sum(Coalesce(F('stop'), datetime.utcnow()) - F('start')))) + - # convert durations to seconds and add derived values for d in group_aggregates: - d.update({'total_duration': d['total_duration'].total_seconds(), - 'percent_of_wall_time': 100 * d['total_duration'].total_seconds() / wall_time}) + # determine total duration of affected scheduling units + system_events = self.system_events.filter(**{f'{field_name}__value': d[f'{field_name}__value']}) + affected_tasks = models.TaskBlueprint.objects.filter(system_events__in=system_events) + affected_units = (models.SchedulingUnitBlueprint.objects.filter(task_blueprints__in=affected_tasks)) + total_on_sky_duration_lost = affected_units.aggregate(Sum(F('on_sky_duration')))['on_sky_duration__sum'] + + # convert durations to seconds and add derived values + d.update({'duration_lost_event': d['duration_lost_event'].total_seconds(), + 'duration_lost_on_sky': total_on_sky_duration_lost.total_seconds() if total_on_sky_duration_lost else 0, + 'percent_of_wall_time_lost_event': 100 * d['duration_lost_event'].total_seconds() / wall_time, + 'percent_of_wall_time_lost_on_sky': total_on_sky_duration_lost.total_seconds() / wall_time}) aggregates[f'by_{field_name}'] = group_aggregates return aggregates def _get_lost_observing_time_histogram(self) -> {}: + """ + Help method to get lost observing time over entire lifetime (not restricted to report period) + Note: This is using on sky times of scheduling unit that are effected by system events of severity 'failure'. + """ + histogram = {} + # do not use self.system_events here because we want to ignore the reporting period here + system_events = models.SystemEvent.objects.filter(severity__value=models.SystemEventSeverity.Choices.FAILURE.value) + affected_tasks = models.TaskBlueprint.objects.filter(system_events__in=system_events) + affected_units = (models.SchedulingUnitBlueprint.objects.filter(task_blueprints__in=affected_tasks)) + + for unit in affected_units.all(): + start_bin = datetime(year=unit.on_sky_start_time.year, month=unit.on_sky_start_time.month, day=1) + histogram[start_bin] = histogram.get(start_bin, 0) + (unit.on_sky_duration.total_seconds()) + + # turn datetimes to serializable format and return + histogram = {k.date().isoformat():v for k,v in histogram.items()} + return histogram + + def _get_system_event_time_histogram(self) -> {}: """ Help method to get lost observing time over entire lifetime (not restricted to report period) Note: This is based on reported system event start and stop times, not based on the affected observations. @@ -721,17 +749,6 @@ class FailureReport(): # ...account for partial month at stop histogram[stop_bin] = histogram.get(stop_bin, 0) + (event_stop - stop_bin).total_seconds() - # todo: Or should this be based on the time that was actually lost in SUs? - # This would do the trick (SU start time determines bin these fall into): - # system_events = models.SystemEvent.objects.filter(severity__value=models.SystemEventSeverity.Choices.FAILURE.value) - # affected_tasks = models.TaskBlueprint.objects.filter(system_events__in=system_events) - # affected_units = (models.SchedulingUnitBlueprint.objects.filter(task_blueprints__in=affected_tasks)) - # - # for unit in affected_units.all(): - # start_bin = datetime(year=unit.on_sky_start_time.year, month=unit.on_sky_start_time.month, day=1) - # histogram[start_bin] = histogram.get(start_bin, 0) + (unit.on_sky_duration.total_seconds()) - # ---- - # turn datetimes to serializable format and return histogram = {k.date().isoformat():v for k,v in histogram.items()} return histogram @@ -764,10 +781,13 @@ class FailureReport(): Create a failure report as a JSON object. """ result = {'system_event_summary': self._get_system_event_summary(), - 'lost_observing_time_histogram': self._get_lost_observing_time_histogram(), + 'lost_time_histogram_on_sky': self._get_lost_observing_time_histogram(), + 'lost_time_histogram_event': self._get_system_event_time_histogram(), 'failed_scheduling_units': self._get_failed_scheduling_units()} - result['total_duration_lost'] = sum([d['total_duration'] for d in result['system_event_summary']['by_issue_type']]) - result['total_percent_of_wall_time_lost'] = sum([d['percent_of_wall_time'] for d in result['system_event_summary']['by_issue_type']]) + result['total_duration_lost_event'] = sum([d['duration_lost_event'] for d in result['system_event_summary']['by_issue_type']]) + result['total_duration_lost_on_sky'] = sum([d['duration_lost_on_sky'] for d in result['system_event_summary']['by_issue_type']]) + result['total_percent_of_wall_time_lost_event'] = sum([d['percent_of_wall_time_lost_event'] for d in result['system_event_summary']['by_issue_type']]) + result['total_percent_of_wall_time_lost_on_sky'] = sum([d['percent_of_wall_time_lost_on_sky'] for d in result['system_event_summary']['by_issue_type']]) result['reporting_period_wall_time'] = (self.stop - self.start).total_seconds() result['report_start'] = self.start.isoformat() result['report_stop'] = self.stop.isoformat() diff --git a/SAS/TMSS/backend/test/t_reports.py b/SAS/TMSS/backend/test/t_reports.py index 2c2c5919074..b39a324eea1 100755 --- a/SAS/TMSS/backend/test/t_reports.py +++ b/SAS/TMSS/backend/test/t_reports.py @@ -645,9 +645,11 @@ class FailureReportTest(unittest.TestCase): # assertions, SUs 1 and 2 should be in here, SU 3 should be ignored due to lack of severity # check grand total - self.assertIn('total_duration_lost', report) - self.assertEqual(report['total_duration_lost'], 21600) # 2 failure events of 3 hours each - self.assertAlmostEqual(report['total_percent_of_wall_time_lost'], 0.0684931506849315) + self.assertIn('total_duration_lost_event', report) + self.assertEqual(report['total_duration_lost_event'], 21600) # 2 failure events of 3 hours each + self.assertAlmostEqual(report['total_percent_of_wall_time_lost_event'], 0.0684931506849315) + self.assertEqual(report['total_duration_lost_on_sky'], 9999) # 2 units: 6666 + 3333 + self.assertAlmostEqual(report['total_percent_of_wall_time_lost_on_sky'], 0.0003170662100456621) # check aggregates self.assertIn('human', str(report['system_event_summary']['by_issue_type'])) # event 1 @@ -656,12 +658,16 @@ class FailureReportTest(unittest.TestCase): self.assertNotIn('noisy', str(report['system_event_summary']['by_issue_subtype'])) # event 3 missing # check histogram (events get binned correctly?) - self.assertEqual(report['lost_observing_time_histogram']['2023-01-01'], 3600) # event 1 - self.assertEqual(report['lost_observing_time_histogram']['2023-02-01'], 7200) # event 1 - self.assertEqual(report['lost_observing_time_histogram']['2023-03-01'], 3600) # event 2 - self.assertEqual(report['lost_observing_time_histogram']['2023-04-01'], 7200) # event 2 - self.assertNotIn('2023-05-01', report['lost_observing_time_histogram']) # event 3 missing - self.assertNotIn('2023-06-01', report['lost_observing_time_histogram']) # event 3 missing + self.assertEqual(report['lost_time_histogram_event']['2023-01-01'], 3600) # event 1 + self.assertEqual(report['lost_time_histogram_event']['2023-02-01'], 7200) # event 1 + self.assertEqual(report['lost_time_histogram_event']['2023-03-01'], 3600) # event 2 + self.assertEqual(report['lost_time_histogram_event']['2023-04-01'], 7200) # event 2 + self.assertEqual(report['lost_time_histogram_on_sky']['2023-01-01'], 6666) # event 1 + self.assertEqual(report['lost_time_histogram_on_sky']['2023-03-01'], 3333) # event 2 + + self.assertNotIn('2023-05-01', report['lost_time_histogram_event']) # event 3 missing + self.assertNotIn('2023-06-01', report['lost_time_histogram_event']) # event 3 missing + self.assertNotIn('2023-05-01', report['lost_time_histogram_on_sky']) # event 3 missing # check SUs self.assertEqual(len(report['failed_scheduling_units']), 2) -- GitLab