Skip to content
Snippets Groups Projects
Commit 0f595da7 authored by Nico Vermaas's avatar Nico Vermaas
Browse files

changes after review

parent 928e4938
No related branches found
No related tags found
2 merge requests!304update branch with master,!302automatic quality validation
Pipeline #52168 passed
...@@ -5,6 +5,8 @@ from django.utils.timezone import datetime, timedelta ...@@ -5,6 +5,8 @@ from django.utils.timezone import datetime, timedelta
from django.conf import settings from django.conf import settings
import json import json
import logging import logging
from .services import calculated_qualities as qualities
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# constants # constants
...@@ -69,120 +71,6 @@ def convert_summary_to_list_for_template(task): ...@@ -69,120 +71,6 @@ def convert_summary_to_list_for_template(task):
def calculate_qualities(task):
""""
calculate the quality for this task, but also the quality for all the combined tasks of this sas_id
"""
# read the quality_thresholds from the Configuration table
try:
quality_thresholds = json.loads(Configuration.objects.get(key='quality_thresholds').value)
except:
quality_thresholds = {
"moderate": 20,
"poor": 50,
"overall_poor": 50,
"overall_good": 90,
}
def calculate_quality_task(task):
""""
calculate the quality of this task based on rfi_percent values
The threshold values are written from a configuration json blob
Using this algorithm from SDCO:
rfi_i <= 20 % is good
20% <= rfi_i <= 50 is moderate
rfi_i > 50 is poor.
except when rfi_percent = 0
"""
try:
summary = task.quality_json["summary"]
quality = None
for key in summary:
record = summary[key]
rfi_percent = int(record['rfi_percent'])
if rfi_percent > 0:
quality = "good"
if rfi_percent >= quality_thresholds['moderate']:
quality = "moderate"
if rfi_percent > quality_thresholds['poor']:
quality = "poor"
return quality
except Exception as error:
logger.info(error)
def calculate_quality_sasid(unsaved_task):
"""
calculate the overall quality per sas_id, based on other tasks with the same sas_id
The threshold values are written from a configuration json blob
Using this algorithm from SDCO:
if more then 90 % of all files have a good quality then the dataset has good condition.
If more then 50 % of all files have a poor quality then the dataset is poor
otherwise is moderate.
"""
try:
# gather the results of all the calculated_quality values for this sas_id
qualities = {'poor': 0, 'moderate': 0, 'good': 0}
for task in Task.objects.filter(sas_id=unsaved_task.sas_id):
# because this all happens in the overridden 'Task.save', the actual saving has not yet occurred
# So use the calculated quality from the unsaved task instead.
if task.id == unsaved_task.id:
t = unsaved_task
else:
t = task
try:
key = t.calculated_qualities['per_task']
qualities[key] = qualities[key] + 1
except:
# ignore the tasks that have no calculated quality (they are probably not 'stored').
pass
total = qualities['poor'] + qualities['moderate'] + qualities['good']
if total > 0:
percentage_poor = (qualities['poor'] / total) * 100
percentage_good = (qualities['good'] / total) * 100
quality_sasid = "moderate"
if percentage_poor >= quality_thresholds['overall_poor']:
quality_sasid = 'poor'
if percentage_good >= quality_thresholds['overall_good']:
quality_sasid = 'good'
return quality_sasid
except Exception as error:
logger.info(error)
# --- main function body ---
# calculate the quality for this task
calculated_quality_task = calculate_quality_task(task)
# store the result in task.calculated_qualities (not yet saved in the database)
d = task.calculated_qualities
if not d:
d = {}
d['per_task'] = calculated_quality_task
task.calculated_qualities = d
# update the overall quality of all tasks for this sas_id
calculated_quality_sasid = calculate_quality_sasid(task)
# store the result in task.calculated_qualities (not yet saved in the database)
d['per_sasid'] = calculated_quality_sasid
return d
class Task(models.Model): class Task(models.Model):
# Task control properties # Task control properties
...@@ -230,7 +118,26 @@ class Task(models.Model): ...@@ -230,7 +118,26 @@ class Task(models.Model):
# nv:19jun2023, calculate the qualities for this task # nv:19jun2023, calculate the qualities for this task
if (self.status != 'stored') & (self.new_status == 'stored'): if (self.status != 'stored') & (self.new_status == 'stored'):
self.calculated_qualities = calculate_qualities(self)
# read the quality_thresholds from the Configuration table
try:
quality_thresholds = json.loads(Configuration.objects.get(key='quality_thresholds').value)
except:
quality_thresholds = {
"moderate": 20,
"poor": 50,
"overall_poor": 50,
"overall_good": 90,
}
tasks_for_this_sasid = Task.objects.filter(sas_id=self.sas_id)
self.calculated_qualities = qualities.calculate_qualities(self, tasks_for_this_sasid, quality_thresholds)
try:
self.remarks['calculated_qualities'] = self.calculated_qualities
except:
self.remarks = {}
self.remarks['calculated_qualities'] = self.calculated_qualities
super(Task, self).save(*args, **kwargs) super(Task, self).save(*args, **kwargs)
...@@ -344,7 +251,6 @@ class Task(models.Model): ...@@ -344,7 +251,6 @@ class Task(models.Model):
except: except:
return None return None
@property @property
def sasid_is_verified(self): def sasid_is_verified(self):
for task in Task.objects.filter(sas_id=self.sas_id): for task in Task.objects.filter(sas_id=self.sas_id):
......
...@@ -842,14 +842,13 @@ def construct_summary(task): ...@@ -842,14 +842,13 @@ def construct_summary(task):
line += '<td colspan="2">' + str(round(record['size_ratio'],3)) + '</td>' line += '<td colspan="2">' + str(round(record['size_ratio'],3)) + '</td>'
line += '</tr>' line += '</tr>'
try: if 'rfi_percent' in record.keys():
# add RFI percentage (if present) # add RFI percentage (if present)
rfi = record['rfi_percent'] rfi = record['rfi_percent']
line += '<tr><td><b>RFI percentage</b></td>' line += '<tr><td><b>RFI percentage</b></td>'
line += '<td colspan="2">' + str(rfi) + '</td>' line += '<td colspan="2">' + str(rfi) + '</td>'
line += '</tr>' line += '</tr>'
except:
pass
try: try:
# add calculated quality (if present) # add calculated quality (if present)
......
import logging
logger = logging.getLogger(__name__)
import json
def calculate_qualities(task, tasks_for_this_sasid, quality_thresholds):
""""
calculate the quality for this task, but also the quality for all the combined tasks of this sas_id
"""
def calculate_quality_task(task):
""""
calculate the quality of this task based on rfi_percent values
The threshold values are written from a configuration json blob
Using this algorithm from SDCO:
rfi_i <= 20 % is good
20% <= rfi_i <= 50 is moderate
rfi_i > 50 is poor.
except when rfi_percent = 0
"""
try:
summary = task.quality_json["summary"]
quality = None
for key in summary:
record = summary[key]
rfi_percent = int(record['rfi_percent'])
if rfi_percent > 0:
quality = "good"
if rfi_percent >= quality_thresholds['moderate']:
quality = "moderate"
if rfi_percent > quality_thresholds['poor']:
quality = "poor"
return quality
except Exception as error:
logger.info(error)
def calculate_quality_sasid(unsaved_task, tasks_for_this_sasid):
"""
calculate the overall quality per sas_id, based on other tasks with the same sas_id
The threshold values are written from a configuration json blob
Using this algorithm from SDCO:
if more then 90 % of all files have a good quality then the dataset has good condition.
If more then 50 % of all files have a poor quality then the dataset is poor
otherwise is moderate.
"""
try:
# gather the results of all the calculated_quality values for this sas_id
qualities = {'poor': 0, 'moderate': 0, 'good': 0}
for task in tasks_for_this_sasid:
# because this all happens in the overridden 'Task.save', the actual saving has not yet occurred
# So use the calculated quality from the unsaved task instead.
if task.id == unsaved_task.id:
t = unsaved_task
else:
t = task
try:
key = t.calculated_qualities['per_task']
qualities[key] = qualities[key] + 1
except:
# ignore the tasks that have no calculated quality (they are probably not 'stored').
pass
total = qualities['poor'] + qualities['moderate'] + qualities['good']
if total > 0:
percentage_poor = (qualities['poor'] / total) * 100
percentage_good = (qualities['good'] / total) * 100
quality_sasid = "moderate"
if percentage_poor >= quality_thresholds['overall_poor']:
quality_sasid = 'poor'
if percentage_good >= quality_thresholds['overall_good']:
quality_sasid = 'good'
return quality_sasid
except Exception as error:
logger.info(error)
# --- main function body ---
# calculate the quality for this task
calculated_quality_task = calculate_quality_task(task)
# store the result in task.calculated_qualities (not yet saved in the database)
qualities = task.calculated_qualities
if not qualities:
qualities = {}
qualities['per_task'] = calculated_quality_task
task.calculated_qualities = qualities
# update the overall quality of all tasks for this sas_id
calculated_quality_sasid = calculate_quality_sasid(task, tasks_for_this_sasid)
# store the result in task.calculated_qualities (not yet saved in the database)
qualities['per_sasid'] = calculated_quality_sasid
return qualities
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment