-
Nico Vermaas authoredNico Vermaas authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
calculated_qualities.py 4.35 KiB
import logging
from .common import get_summary_flavour, SummaryFlavour
logger = logging.getLogger(__name__)
def calculate_qualities(task, tasks_for_this_sasid, quality_thresholds):
""""
calculate the quality for this task, but also the quality for all the combined tasks of this sas_id
"""
def calculate_quality_task(task):
"""
calculate the quality of this task based on rfi_percent values
The threshold values are written from a configuration json blob
Using this algorithm from SDCO:
rfi_i <= 20 % is good
20% <= rfi_i <= 50 is moderate
rfi_i > 50 is poor.
except when rfi_percent = 0
"""
try:
summary = task.quality_json["summary"]
quality = None
summary_flavour = get_summary_flavour(task)
if summary_flavour == SummaryFlavour.IMAGING_COMPRESSION.value:
rfi_percent = float(summary['details']['rfi_percentage'])
if summary_flavour == SummaryFlavour.DEFAULT.value:
# there is 1 key, but it is a filename which not known
for key in summary:
record = summary[key]
rfi_percent = float(record['rfi_percent'])
if rfi_percent > 0:
quality = "good"
if rfi_percent >= quality_thresholds['moderate']:
quality = "moderate"
if rfi_percent > quality_thresholds['poor']:
quality = "poor"
return quality
except Exception as error:
# when rfi_percentage is missing, then the quality cannot be calculated.
# Just continue without it
pass
def calculate_quality_sasid(unsaved_task, tasks_for_this_sasid):
"""
calculate the overall quality per sas_id, based on other tasks with the same sas_id
The threshold values are written from a configuration json blob
Using this algorithm from SDCO:
if more then 90 % of all files have a good quality then the dataset has good condition.
If more then 50 % of all files have a poor quality then the dataset is poor
otherwise is moderate.
"""
try:
# gather the results of all the calculated_quality values for this sas_id
qualities = {'poor': 0, 'moderate': 0, 'good': 0}
for task in tasks_for_this_sasid:
# because this all happens in the overridden 'Task.save', the actual saving has not yet occurred
# So use the calculated quality from the unsaved task instead.
if task.id == unsaved_task.id:
t = unsaved_task
else:
t = task
try:
key = t.calculated_qualities['per_task']
qualities[key] = qualities[key] + 1
except:
# ignore the tasks that have no calculated quality.
pass
total = qualities['poor'] + qualities['moderate'] + qualities['good']
quality_sasid = None
if total > 0:
percentage_poor = (qualities['poor'] / total) * 100
percentage_good = (qualities['good'] / total) * 100
quality_sasid = "moderate"
if percentage_poor >= quality_thresholds['overall_poor']:
quality_sasid = 'poor'
if percentage_good >= quality_thresholds['overall_good']:
quality_sasid = 'good'
return quality_sasid
except Exception as error:
logger.info(error)
# --- main function body ---
# calculate the quality for this task
calculated_quality_task = calculate_quality_task(task)
# store the result in task.calculated_qualities (not yet saved in the database)
qualities = task.calculated_qualities
if not qualities:
qualities = {}
qualities['per_task'] = calculated_quality_task
task.calculated_qualities = qualities
# update the overall quality of all tasks for this sas_id
calculated_quality_sasid = calculate_quality_sasid(task, tasks_for_this_sasid)
# store the result in task.calculated_qualities (not yet saved in the database)
qualities['per_sasid'] = calculated_quality_sasid
return qualities