Skip to content
Snippets Groups Projects
Commit 8d96bb67 authored by Nico Vermaas's avatar Nico Vermaas
Browse files

database change (add calculated_qualities)

add info to summary screen
expand and refactor quality calculation
parent 62e2a428
Branches
No related tags found
2 merge requests!304update branch with master,!302automatic quality validation
Pipeline #51783 passed
Showing
with 189 additions and 43 deletions
# Generated by Django 3.1.4 on 2023-06-21 07:08
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('taskdatabase', '0026_task_calculated_quality'),
]
operations = [
migrations.AddField(
model_name='task',
name='calculated_qualities',
field=models.JSONField(blank=True, null=True),
),
]
# Generated by Django 3.1.4 on 2023-06-21 07:46
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('taskdatabase', '0027_task_calculated_qualities'),
]
operations = [
migrations.RemoveField(
model_name='task',
name='calculated_quality',
),
]
...@@ -11,6 +11,7 @@ logger = logging.getLogger(__name__) ...@@ -11,6 +11,7 @@ logger = logging.getLogger(__name__)
datetime_format_string = '%Y-%m-%dT%H:%M:%SZ' datetime_format_string = '%Y-%m-%dT%H:%M:%SZ'
verified_statusses = ['stored','validated','scrubbed','archived','finished','suspended','discarded'] verified_statusses = ['stored','validated','scrubbed','archived','finished','suspended','discarded']
class Workflow(models.Model): class Workflow(models.Model):
description = models.CharField(max_length=500, blank=True, null=True) description = models.CharField(max_length=500, blank=True, null=True)
tag = models.CharField(max_length=30, blank=True, null=True) tag = models.CharField(max_length=30, blank=True, null=True)
...@@ -67,10 +68,23 @@ def convert_summary_to_list_for_template(task): ...@@ -67,10 +68,23 @@ def convert_summary_to_list_for_template(task):
return list return list
def calculate_quality(task):
def calculate_qualities(task):
""""
calculate the quality for this task, but also the quality for all the combined tasks of this sas_id
"""
def calculate_quality_task(task):
"""" """"
calculate the quality of this task based on rfi_percent values calculate the quality of this task based on rfi_percent values
The threshold values are written from a configuration json blob The threshold values are written from a configuration json blob
Using this algorithm from SDCO:
rfi_i <= 20 % is good
20% <= rfi_i <= 50 is moderate
rfi_i > 50 is poor.
except when rfi_percent = 0
""" """
try: try:
# read the quality_thresholds from the Configuration table # read the quality_thresholds from the Configuration table
...@@ -80,12 +94,12 @@ def calculate_quality(task): ...@@ -80,12 +94,12 @@ def calculate_quality(task):
quality_thresholds = { quality_thresholds = {
"moderate": 20, "moderate": 20,
"poor": 50, "poor": 50,
"overall_moderate" : 50, "overall_poor": 50,
"overall_good": 90, "overall_good": 90,
} }
summary = task.quality_json["summary"] summary = task.quality_json["summary"]
quality = task.quality quality = task.calculated_quality
for key in summary: for key in summary:
record = summary[key] record = summary[key]
...@@ -101,6 +115,67 @@ def calculate_quality(task): ...@@ -101,6 +115,67 @@ def calculate_quality(task):
except Exception as error: except Exception as error:
logger.info(error) logger.info(error)
def calculate_quality_sasid(task):
"""
calculate the overall quality per sas_id, based on other tasks with the same sas_id
The threshold values are written from a configuration json blob
Using this algorithm from SDCO:
if more then 90 % of all files have a good quality then the dataset has good condition.
If more then 50 % of all files have a poor quality then the dataset is poor
otherwise is moderate.
"""
try:
# read the quality_thresholds from the Configuration table
try:
quality_thresholds = json.loads(Configuration.objects.get(key='quality_thresholds').value)
except:
quality_thresholds = {
"moderate": 20,
"poor": 50,
"overall_poor": 50,
"overall_good": 90,
}
# gather the results of all the calculated_quality values for this sas_id
d = {'poor': 0, 'moderate': 0, 'good': 0}
for task in Task.objects.filter(sas_id=task.sas_id):
d[task.calculated_quality] = d[task.calculated_quality] + 1
quality_sasid = "moderate"
total = d['poor'] + d['moderate'] + d['good']
if total > 0:
percentage_poor = (d['poor'] / total) * 100
percentage_good = (d['good'] / total) * 100
if percentage_poor >= quality_thresholds['overall_poor']:
quality_sasid = 'poor'
if percentage_good >= quality_thresholds['overall_good']:
quality_sasid = 'good'
return quality_sasid
except Exception as error:
logger.info(error)
# --- main function body ---
# calculate the quality for this task
calculated_quality_task = calculate_quality_task(task)
# update the overall quality of all tasks for this sas_id
calculated_quality_sasid = calculate_quality_sasid(task)
# combine both calculated qualities in the json field (dict)
d = task.calculated_qualities
if not d:
d = {}
d['per_task'] = calculated_quality_task
d['per_sasid'] = calculated_quality_sasid
return d
class Task(models.Model): class Task(models.Model):
...@@ -111,8 +186,7 @@ class Task(models.Model): ...@@ -111,8 +186,7 @@ class Task(models.Model):
new_status = models.CharField(max_length=50, default="defining", null=True) new_status = models.CharField(max_length=50, default="defining", null=True)
status = models.CharField(db_index=True, default="unknown", max_length=50,blank=True, null=True) status = models.CharField(db_index=True, default="unknown", max_length=50,blank=True, null=True)
quality = models.CharField(max_length=10,blank=True, null=True) quality = models.CharField(max_length=10,blank=True, null=True)
calculated_quality = models.CharField(max_length=10, blank=True, null=True) calculated_qualities = models.JSONField(null=True, blank=True)
resume = models.BooleanField(verbose_name="Resume", default=True) resume = models.BooleanField(verbose_name="Resume", default=True)
creationTime = models.DateTimeField(verbose_name="CreationTime",default=datetime.utcnow, blank=True) creationTime = models.DateTimeField(verbose_name="CreationTime",default=datetime.utcnow, blank=True)
...@@ -148,9 +222,9 @@ class Task(models.Model): ...@@ -148,9 +222,9 @@ class Task(models.Model):
if (self.status != 'scrubbed') & (self.new_status == 'scrubbed'): if (self.status != 'scrubbed') & (self.new_status == 'scrubbed'):
self.resume = False self.resume = False
# nv:19jun2023, calculate the quality of this task # nv:19jun2023, calculate the qualities for this task
if (self.status != 'stored') & (self.new_status == 'stored'): if (self.status != 'stored') & (self.new_status == 'stored'):
self.quality = calculate_quality(self) self.calculated_qualities = calculate_qualities(self)
super(Task, self).save(*args, **kwargs) super(Task, self).save(*args, **kwargs)
......
...@@ -41,7 +41,7 @@ class TaskWriteSerializer(serializers.ModelSerializer): ...@@ -41,7 +41,7 @@ class TaskWriteSerializer(serializers.ModelSerializer):
'project','sas_id','priority','purge_policy','cleanup_policy','resume', 'project','sas_id','priority','purge_policy','cleanup_policy','resume',
'new_workflow_id','new_workflow_uri','workflow', 'new_workflow_id','new_workflow_uri','workflow',
'stage_request_id', 'stage_request_id',
'status','new_status','quality','calculated_quality', 'status','new_status','quality','calculated_qualities',
'inputs','outputs','metrics','status_history','remarks', 'inputs','outputs','metrics','status_history','remarks',
'size_to_process','size_processed','total_processing_time', 'size_to_process','size_processed','total_processing_time',
'log_entries','meta_scheduling','environment','archive' 'log_entries','meta_scheduling','environment','archive'
...@@ -95,7 +95,7 @@ class TaskReadSerializer(serializers.ModelSerializer): ...@@ -95,7 +95,7 @@ class TaskReadSerializer(serializers.ModelSerializer):
'project','sas_id','priority','purge_policy','cleanup_policy','resume', 'project','sas_id','priority','purge_policy','cleanup_policy','resume',
'workflow', 'workflow',
'stage_request_id', 'stage_request_id',
'status','new_status','quality','calculated_quality', 'status','new_status','quality','calculated_qualities',
'inputs','outputs','metrics','remarks','status_history', 'inputs','outputs','metrics','remarks','status_history',
'size_to_process', 'size_processed', 'total_processing_time', 'size_to_process', 'size_processed', 'total_processing_time',
'log_entries','meta_scheduling','environment','archive' 'log_entries','meta_scheduling','environment','archive'
...@@ -126,7 +126,7 @@ class TaskReadSerializerFast(serializers.ModelSerializer): ...@@ -126,7 +126,7 @@ class TaskReadSerializerFast(serializers.ModelSerializer):
'project','sas_id','priority','purge_policy','cleanup_policy','resume', 'project','sas_id','priority','purge_policy','cleanup_policy','resume',
'workflow', 'workflow',
'stage_request_id', 'stage_request_id',
'status','new_status','quality','calculated_quality', 'status','new_status','quality','calculated_qualities',
'inputs','outputs','metrics','archive', 'inputs','outputs','metrics','archive',
'size_to_process', 'size_processed', 'total_processing_time', 'size_to_process', 'size_processed', 'total_processing_time',
] ]
......
...@@ -840,6 +840,27 @@ def construct_summary(task): ...@@ -840,6 +840,27 @@ def construct_summary(task):
line += '<td colspan="2">' + str(round(record['size_ratio'],3)) + '</td>' line += '<td colspan="2">' + str(round(record['size_ratio'],3)) + '</td>'
line += '</tr>' line += '</tr>'
try:
# add RFI percentage (if present)
rfi = record['rfi_percent']
line += '<tr><td><b>RFI percentage</b></td>'
line += '<td colspan="2">' + str(rfi) + '</td>'
line += '</tr>'
except:
pass
try:
# add calculated quality (if present)
calculated_qualities = task.calculated_qualities
if calculated_qualities:
line += '<tr><td><b>Calculated Quality</b></td>'
task_quality = calculated_qualities['per_task']
line += '<td class="' + task_quality + '">' + str(task_quality) + '</td>'
line += '</tr>'
except:
pass
try: try:
added = record['added'] added = record['added']
if added: if added:
...@@ -870,6 +891,17 @@ def construct_summary(task): ...@@ -870,6 +891,17 @@ def construct_summary(task):
totals += '<tr><td colspan="2"><b>Input size</b></td><td>' + str(total_size_input) + '</td></tr>' totals += '<tr><td colspan="2"><b>Input size</b></td><td>' + str(total_size_input) + '</td></tr>'
totals += '<tr><td colspan="2"><b>Output size</b><td>' + str(total_size_output) + '</td></tr>' totals += '<tr><td colspan="2"><b>Output size</b><td>' + str(total_size_output) + '</td></tr>'
totals += '<tr><td colspan="2"><b>Ratio</b></td><td>' + str(round(total_size_output / total_size_input, 3)) + '</td></tr>' totals += '<tr><td colspan="2"><b>Ratio</b></td><td>' + str(round(total_size_output / total_size_input, 3)) + '</td></tr>'
try:
# add calculated quality per sasid (if present)
if calculated_qualities:
sasid_quality = calculated_qualities['per_sasid']
totals += '<tr><td colspan="2"><b>Calculated Quality</b></td>'
totals += '<td class="' + sasid_quality + '">' + str(sasid_quality) + '</td></tr>'
except:
pass
except: except:
pass pass
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
{% include 'taskdatabase/pagination.html' %} {% include 'taskdatabase/pagination.html' %}
</div> </div>
</div> </div>
<p class="footer"> Version 20 June 2023 <p class="footer"> Version 21 June 2023
</div> </div>
{% include 'taskdatabase/refresh.html' %} {% include 'taskdatabase/refresh.html' %}
......
...@@ -30,5 +30,7 @@ ...@@ -30,5 +30,7 @@
<th>Conditions</th> <th>Conditions</th>
<th>Plots</th> <th>Plots</th>
<th>Annotate</th> <th>Annotate</th>
<th>Calc Q</th>
<th>Quality</th> <th>Quality</th>
<th>Validate</th>
</tr> </tr>
\ No newline at end of file
...@@ -68,8 +68,13 @@ ...@@ -68,8 +68,13 @@
{% endif %} {% endif %}
{% endif %} {% endif %}
</td> </td>
<td class="{{ task.calculated_qualities.per_task }}">{{ task.calculated_qualities.per_task|default_if_none:"-" }}</td>
<td class="{{ task.quality }}">{{ task.quality|default_if_none:"-" }}</td> <td class="{{ task.quality }}">{{ task.quality|default_if_none:"-" }}</td>
<td>
{% if task.status == "stored" %}
<a href="{% url 'task-setstatus-view' task.pk 'validated' my_tasks.number %}" class="btn btn-success btn-sm" role="button"><i class="fas fa-check"></i> validate</a>
{% endif %}
</td>
</tr> </tr>
</div> </div>
{% endif %} {% endif %}
......
...@@ -68,9 +68,6 @@ ...@@ -68,9 +68,6 @@
<a href="{% url 'task-hold-resume' task.pk 'resume' my_tasks.number %}" class="btn btn-success btn-sm" role="button"><i class="fas fa-play"></i> start</a> <a href="{% url 'task-hold-resume' task.pk 'resume' my_tasks.number %}" class="btn btn-success btn-sm" role="button"><i class="fas fa-play"></i> start</a>
{% endif %} {% endif %}
{% if task.status == "stored" %}
<a href="{% url 'task-setstatus-view' task.pk 'validated' my_tasks.number %}" class="btn btn-success btn-sm" role="button"><i class="fas fa-check"></i> validate</a>
{% endif %}
{% endif %} {% endif %}
{% include "taskdatabase/failures/retry_buttons.html" %} {% include "taskdatabase/failures/retry_buttons.html" %}
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
<th>Plots</th> <th>Plots</th>
<th>Summary</th> <th>Summary</th>
<th>Annotate</th> <th>Annotate</th>
<th>Calc Q</th>
<th>Quality</th> <th>Quality</th>
<th>Validate (choose a Q)</th> <th>Validate (choose a Q)</th>
<th>Discard</th> <th>Discard</th>
......
...@@ -73,6 +73,7 @@ ...@@ -73,6 +73,7 @@
{% endif %} {% endif %}
{% endif %} {% endif %}
</td> </td>
<td class="{{ task.calculated_qualities.per_sasid }}">{{ task.calculated_qualities.per_sasid|default_if_none:"-" }}</td>
<td class="{{ task.quality }}">{{ task.quality|default_if_none:"-" }}</td> <td class="{{ task.quality }}">{{ task.quality|default_if_none:"-" }}</td>
<td>{% include "taskdatabase/validation/validation_buttons.html" %}</td> <td>{% include "taskdatabase/validation/validation_buttons.html" %}</td>
<td><a href="{% url 'task-discard-view-sasid' task.pk 'discarded' my_tasks.number %}" class="btn btn-danger btn-sm" role="button"><i class="fas fa-trash-alt"></i></a></td> <td><a href="{% url 'task-discard-view-sasid' task.pk 'discarded' my_tasks.number %}" class="btn btn-danger btn-sm" role="button"><i class="fas fa-trash-alt"></i></a></td>
......
...@@ -61,7 +61,6 @@ class TaskFilter(filters.FilterSet): ...@@ -61,7 +61,6 @@ class TaskFilter(filters.FilterSet):
'sas_id': ['exact', 'icontains', 'in'], 'sas_id': ['exact', 'icontains', 'in'],
'status': ['exact', 'icontains', 'in', 'startswith'], 'status': ['exact', 'icontains', 'in', 'startswith'],
'quality': ['exact', 'icontains', 'in', 'startswith'], 'quality': ['exact', 'icontains', 'in', 'startswith'],
'calculated_quality': ['exact', 'icontains', 'in'],
'purge_policy': ['exact'], 'purge_policy': ['exact'],
'cleanup_policy': ['exact','icontains','in'], 'cleanup_policy': ['exact','icontains','in'],
'priority': ['exact', 'lte', 'gte'], 'priority': ['exact', 'lte', 'gte'],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment