calculated_qualities.py

import logging
from .common import get_summary_flavour, SummaryFlavour
logger = logging.getLogger(__name__)


def calculate_qualities(task, tasks_for_this_sasid, quality_thresholds):
    """"
    calculate the quality for this task, but also the quality for all the combined tasks of this sas_id
    """

    def calculate_quality_task(task):
        """
        calculate the quality of this task based on rfi_percent values
        The threshold values are written from a configuration json blob

        Using this algorithm from SDCO:
                rfi_i <= 20 % is good
                20% <= rfi_i <= 50 is moderate
                rfi_i > 50 is poor.
                except when rfi_percent	= 0
        """
        try:

            summary = task.quality_json["summary"]
            quality = None

            summary_flavour = get_summary_flavour(task)

            if summary_flavour == SummaryFlavour.IMAGING_COMPRESSION.value:
                rfi_percent = float(summary['details']['rfi_percentage'])

            if summary_flavour == SummaryFlavour.DEFAULT.value:
                # there is 1 key, but it is a filename which not known
                for key in summary:
                    record = summary[key]
                    rfi_percent = float(record['rfi_percent'])


            if rfi_percent > 0:
                quality = "good"
                if rfi_percent >= quality_thresholds['moderate']:
                    quality = "moderate"
                if rfi_percent > quality_thresholds['poor']:
                    quality = "poor"
            return quality

        except Exception as error:
            # when rfi_percentage is missing, then the quality cannot be calculated.
            # Just continue without it
            pass

    def calculate_quality_sasid(unsaved_task, tasks_for_this_sasid):
        """
        calculate the overall quality per sas_id, based on other tasks with the same sas_id
        The threshold values are written from a configuration json blob

        Using this algorithm from SDCO:
             if more then 90 % of all files have a good quality then the dataset has good condition.
             If more then 50 % of all files have a poor quality then the dataset is poor
             otherwise is moderate.
        """
        try:
            # gather the results of all the calculated_quality values for this sas_id
            qualities = {'poor': 0, 'moderate': 0, 'good': 0}

            for task in tasks_for_this_sasid:

                # because this all happens in the overridden 'Task.save', the actual saving has not yet occurred
                # So use the calculated quality from the unsaved task instead.
                if task.id == unsaved_task.id:
                    t = unsaved_task
                else:
                    t = task

                try:
                    key = t.calculated_qualities['per_task']
                    qualities[key] = qualities[key] + 1
                except:
                    # ignore the tasks that have no calculated quality.
                    pass


            total = qualities['poor'] + qualities['moderate'] + qualities['good']
            quality_sasid = None
            if total > 0:
                percentage_poor = (qualities['poor'] / total) * 100
                percentage_good = (qualities['good'] / total) * 100
                quality_sasid = "moderate"

                if percentage_poor >= quality_thresholds['overall_poor']:
                    quality_sasid = 'poor'

                if percentage_good >= quality_thresholds['overall_good']:
                    quality_sasid = 'good'

            return quality_sasid

        except Exception as error:
            logger.info(error)


    # --- main function body ---
    # calculate the quality for this task
    calculated_quality_task = calculate_quality_task(task)

    # store the result in task.calculated_qualities (not yet saved in the database)
    qualities = task.calculated_qualities
    if not qualities:
        qualities = {}
    qualities['per_task'] = calculated_quality_task
    task.calculated_qualities = qualities

    # update the overall quality of all tasks for this sas_id
    calculated_quality_sasid = calculate_quality_sasid(task, tasks_for_this_sasid)

    # store the result in task.calculated_qualities (not yet saved in the database)
    qualities['per_sasid'] = calculated_quality_sasid

    return qualities