test_calculated_qualities.py

from django.test import TestCase
import json
import taskdatabase.tests.test_calculated_qualities_outputs as outputs

from taskdatabase.services import calculated_qualities as qualities
from taskdatabase.models import Configuration, Task, Workflow
from taskdatabase.services.common import get_summary_flavour, SummaryFlavour

class TestCalculatedQualities(TestCase):

    @classmethod
    def setUpTestData(cls):
        print("setUpTestData: Run once to set up non-modified data for all class methods.")
        # Set up non-modified objects used by all test methods
        quality_thresholds = {
            "moderate": 20,
            "poor": 50,
            "overall_poor": 50,
            "overall_good": 90,
        }
        Configuration.objects.create(key="quality_thresholds", value=json.dumps(quality_thresholds))

    def setUp(self):
        print("setUp: Run once for every test method to setup clean data.")


        # tasks without summary information (not allowed according to the interface definition)
        workflow_no_summary = Workflow(workflow_uri="unknown_workflow_no_summary")
        workflow_no_summary.save()
        Task.objects.get_or_create(sas_id=33333, status='processed', outputs=outputs.no_summary, workflow=workflow_no_summary)

        # create a list of Tasks with various values of rfi_percent to test the quality algorithms
        workflow_requantisation = Workflow(workflow_uri="psrfits_requantisation")
        workflow_requantisation.save()

        # rfi_percent=0
        Task.objects.get_or_create(sas_id=54321, status='processed', outputs=outputs.default_summary_flavour_with_rfi_percent_zero_1, workflow=workflow_requantisation)

        # rfi_percent 11,22,31,52
        Task.objects.get_or_create(sas_id=54321, status='processed', outputs=outputs.default_summary_flavour_with_rfi_1, workflow=workflow_requantisation)
        Task.objects.get_or_create(sas_id=54321, status='processed', outputs=outputs.default_summary_flavour_with_rfi_2, workflow=workflow_requantisation)
        Task.objects.get_or_create(sas_id=54321, status='processed', outputs=outputs.default_summary_flavour_with_rfi_3, workflow=workflow_requantisation)
        Task.objects.get_or_create(sas_id=54321, status='processed', outputs=outputs.default_summary_flavour_with_rfi_4, workflow=workflow_requantisation)

        # tasks without rfi_percent (so simulating an yet unknown pipeline with summary information, but no rfi percentage)
        workflow_no_rfi = Workflow(workflow_uri="unknown_workflow")
        workflow_no_rfi.save()
        Task.objects.get_or_create(sas_id=12345, status='processed', outputs=outputs.default_summary_flavour_without_rfi_1, workflow=workflow_no_rfi)

        # workflow_imaging_compression, rfi_percentage=1.7186448587105623
        workflow_imaging_compression = Workflow(workflow_uri="imaging_compress_pipeline_v011")
        workflow_imaging_compression.save()
        Task.objects.get_or_create(sas_id=55555, status='processed', outputs=outputs.imaging_compression_summary_flavor_with_rfi_1, workflow=workflow_imaging_compression)

        # LINK pipelines (no rfi_percent onboard yet)
        workflow_link_calibrator = Workflow(workflow_uri="linc_calibrator_v4_2")
        workflow_link_calibrator.save()
        Task.objects.get_or_create(sas_id=666666, status='processed', outputs=outputs.link_calibrator_summary_without_rfi, workflow=workflow_link_calibrator)

        workflow_link_target = Workflow(workflow_uri="linc_target_v4_2")
        workflow_link_target.save()
        Task.objects.get_or_create(sas_id=666667, status='processed', outputs=outputs.link_target_summary_without_rfi, workflow=workflow_link_target)


    def test_run_calculations_when_task_becomes_stored(self):
        for task in Task.objects.all():
            task.new_status = 'stored'
            # this triggers the overridden save function in models.task
            task.save()

        # only 4 of the 7 tasks should now have calculated_qualities
        count = 0
        for task in Task.objects.all():
            if task.calculated_qualities['per_sasid']:
                count += 1

        self.assertEqual(count,6)


    def test_calculated_qualities(self):
        """
        calculate the quality per task and per sas_id based on rfi_percent values
        The threshold values are written from a configuration jsonfield

        Using this algorithm from SDCO:
                rfi_i <= 20 % is good
                20% <= rfi_i <= 50 is moderate
                rfi_i > 50 is poor.
                except when rfi_percent	= 0

        Using this algorithm from SDCO:
             if more then 90 % of all files have a good quality then the dataset has good condition.
             If more then 50 % of all files have a poor quality then the dataset is poor
             otherwise is moderate.
        """

        # read the quality thresholds from the test database
        quality_thresholds = json.loads(Configuration.objects.get(key="quality_thresholds").value)

        # get the tasks for sas_id 54321
        tasks_for_this_sasid = Task.objects.filter(sas_id=54321)

        # run the algorithms and gather the values
        quality_values = {'poor': 0, 'moderate': 0, 'good': 0}

        for task in tasks_for_this_sasid:
            q = qualities.calculate_qualities(task, tasks_for_this_sasid, quality_thresholds)
            try:
                key = task.calculated_qualities['per_task']
                quality_values[key] = quality_values[key] + 1
                quality_per_sasid = task.calculated_qualities['per_sasid']
            except:
                # ignore the tasks that have no calculated quality.
                pass

        self.assertEqual(quality_values,{'poor': 1, 'moderate': 2, 'good': 2})

        # not 90% = good, and not >50% = poor so 'moderate'
        self.assertEqual(quality_per_sasid,'moderate')


    def test_calculated_qualities_with_optimistic_thresholds(self):
        """
        calculate the quality per task and per sas_id based on rfi_percent values
        The threshold values are extremely optimistic, simulating changes made by the user

        Using this algorithm from SDCO:
                rfi_i <= 50 % is good
                50% <= rfi_i <= 90 is moderate
                rfi_i > 90 is poor.
                except when rfi_percent	= 0

        Using this algorithm from SDCO:
             if more then 50 % of all files have a good quality then the dataset has good condition.
             If more then 10 % of all files have a poor quality then the dataset is poor
             otherwise is moderate.

        """

        # optimistic thresholds, poor data doesn't exist
        quality_thresholds = {
            "moderate": 50,
            "poor": 90,
            "overall_poor": 10,
            "overall_good": 50,
        }

        # get the tasks for sas_id 54321
        tasks_for_this_sasid = Task.objects.filter(sas_id=54321)

        # run the algorithms and gather the values
        quality_values = {'poor': 0, 'moderate': 0, 'good': 0}

        for task in tasks_for_this_sasid:
            q = qualities.calculate_qualities(task, tasks_for_this_sasid, quality_thresholds)
            try:
                key = task.calculated_qualities['per_task']
                quality_values[key] = quality_values[key] + 1
                quality_per_sasid = task.calculated_qualities['per_sasid']
            except:
                # ignore the tasks that have no calculated quality.
                pass

        # rfi_percentages are 11,22,31,52 for the tasks of this sasid
        # with the optimistic parameters that means that the first 3 are 'good', and last one is moderate. No poor
        self.assertEqual(quality_values,{'poor': 0, 'moderate': 1, 'good': 4})

        # 3 out of 4 are 'good', 75% is above the 50% threshold, so 'good'
        self.assertEqual(quality_per_sasid,'good')

    def test_faulty_thresholds(self):
        """
        what happens if the user makes a typo in the threshold?
        """

        # faulty thresholds
        quality_thresholds = {
            "moderate": "a",
            "poor": 50,
            "overall_poor": 50,
            "overall_good": 90,
        }

        # get the tasks for sas_id 54321
        tasks_for_this_sasid = Task.objects.filter(sas_id=54321)

        # run the algorithms and gather the values
        quality_values = {'poor': 0, 'moderate': 0, 'good': 0}
        quality_per_sasid = None

        for task in tasks_for_this_sasid:
            q = qualities.calculate_qualities(task, tasks_for_this_sasid, quality_thresholds)
            try:
                key = task.calculated_qualities['per_task']
                quality_values[key] = quality_values[key] + 1
                quality_per_sasid = task.calculated_qualities['per_sasid']
            except:
                # ignore the tasks that have no calculated quality.
                pass

        self.assertEqual(quality_values, {'poor': 0, 'moderate': 0, 'good': 0})
        self.assertEqual(quality_per_sasid, None)

    def test_no_summary(self):
        """
        test when there is no summary section found
        """

        # get the tasks for sas_id 54321
        tasks_for_this_sasid = Task.objects.filter(sas_id=33333)

        # 1 is enough, they all have the same summary_flavour
        task = tasks_for_this_sasid[0]
        summary_flavour = get_summary_flavour(task)

        self.assertEqual(summary_flavour,None)

    def test_default_summary_flavour(self):
        """
        test if the DEFAULT summary_flavour is detected
        """

        # get the tasks for sas_id 54321
        tasks_for_this_sasid = Task.objects.filter(sas_id=54321)

        # 1 is enough, they all have the same summary_flavour
        task = tasks_for_this_sasid[0]
        summary_flavour = get_summary_flavour(task)

        self.assertEqual(summary_flavour,SummaryFlavour.DEFAULT.value)

    def test_imaging_compression_summary_flavour(self):
        """
        test if the IMAGING_COMPRESSION summary_flavour is detected
        """

        tasks_for_this_sasid = Task.objects.filter(sas_id=55555)

        # 1 is enough, they all have the same summary_flavour
        task = tasks_for_this_sasid[0]
        summary_flavour = get_summary_flavour(task)

        self.assertEqual(summary_flavour,SummaryFlavour.IMAGING_COMPRESSION.value)

    def test_link_calibrator_summary_flavour(self):
        """
        test if the LINC_CALIBRATOR summary_flavour is detected
        """

        tasks_for_this_sasid = Task.objects.filter(sas_id=666666)

        # 1 is enough, they all have the same summary_flavour
        task = tasks_for_this_sasid[0]
        summary_flavour = get_summary_flavour(task)

        self.assertEqual(summary_flavour,SummaryFlavour.LINC_CALIBRATOR.value)

    def test_link_target_summary_flavour(self):
        """
        test if the LINC_TARGET summary_flavour is detected
        """

        tasks_for_this_sasid = Task.objects.filter(sas_id=666667)

        # 1 is enough, they all have the same summary_flavour
        task = tasks_for_this_sasid[0]
        summary_flavour = get_summary_flavour(task)

        self.assertEqual(summary_flavour,SummaryFlavour.LINC_TARGET.value)