diff --git a/atdb/taskdatabase/migrations/0027_task_calculated_qualities.py b/atdb/taskdatabase/migrations/0027_task_calculated_qualities.py new file mode 100644 index 0000000000000000000000000000000000000000..70a41edfce437a98eaaf1a641e3cd9bcf12027a0 --- /dev/null +++ b/atdb/taskdatabase/migrations/0027_task_calculated_qualities.py @@ -0,0 +1,18 @@ +# Generated by Django 3.1.4 on 2023-06-21 07:08 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('taskdatabase', '0026_task_calculated_quality'), + ] + + operations = [ + migrations.AddField( + model_name='task', + name='calculated_qualities', + field=models.JSONField(blank=True, null=True), + ), + ] diff --git a/atdb/taskdatabase/migrations/0028_remove_task_calculated_quality.py b/atdb/taskdatabase/migrations/0028_remove_task_calculated_quality.py new file mode 100644 index 0000000000000000000000000000000000000000..5873eba6e49a031d0502bab2a384dc5092748c79 --- /dev/null +++ b/atdb/taskdatabase/migrations/0028_remove_task_calculated_quality.py @@ -0,0 +1,17 @@ +# Generated by Django 3.1.4 on 2023-06-21 07:46 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('taskdatabase', '0027_task_calculated_qualities'), + ] + + operations = [ + migrations.RemoveField( + model_name='task', + name='calculated_quality', + ), + ] diff --git a/atdb/taskdatabase/models.py b/atdb/taskdatabase/models.py index bee87c796d2f6116762481fa224b87ddb2ed5337..6679e14a3ccdb0b13b986f808935fe71d468d819 100644 --- a/atdb/taskdatabase/models.py +++ b/atdb/taskdatabase/models.py @@ -11,6 +11,7 @@ logger = logging.getLogger(__name__) datetime_format_string = '%Y-%m-%dT%H:%M:%SZ' verified_statusses = ['stored','validated','scrubbed','archived','finished','suspended','discarded'] + class Workflow(models.Model): description = models.CharField(max_length=500, blank=True, null=True) tag = models.CharField(max_length=30, blank=True, null=True) @@ -67,40 +68,114 @@ def convert_summary_to_list_for_template(task): return list -def calculate_quality(task): + + + +def calculate_qualities(task): """" - calculate the quality of this task based on rfi_percent values - The threshold values are written from a configuration json blob + calculate the quality for this task, but also the quality for all the combined tasks of this sas_id """ - try: - # read the quality_thresholds from the Configuration table + def calculate_quality_task(task): + """" + calculate the quality of this task based on rfi_percent values + The threshold values are written from a configuration json blob + + Using this algorithm from SDCO: + rfi_i <= 20 % is good + 20% <= rfi_i <= 50 is moderate + rfi_i > 50 is poor. + except when rfi_percent = 0 + """ try: - quality_thresholds = json.loads(Configuration.objects.get(key='quality_thresholds').value) - except: - quality_thresholds = { - "moderate" : 20, - "poor" : 50, - "overall_moderate" : 50, - "overall_good": 90, - } - - summary = task.quality_json["summary"] - quality = task.quality - - for key in summary: - record = summary[key] - rfi_percent = int(record['rfi_percent']) - if rfi_percent > 0: - quality = "good" - if rfi_percent > quality_thresholds['moderate']: - quality = "moderate" - elif rfi_percent > quality_thresholds['poor']: - quality = "poor" - return quality - - except Exception as error: - logger.info(error) + # read the quality_thresholds from the Configuration table + try: + quality_thresholds = json.loads(Configuration.objects.get(key='quality_thresholds').value) + except: + quality_thresholds = { + "moderate": 20, + "poor": 50, + "overall_poor": 50, + "overall_good": 90, + } + + summary = task.quality_json["summary"] + quality = task.calculated_quality + + for key in summary: + record = summary[key] + rfi_percent = int(record['rfi_percent']) + if rfi_percent > 0: + quality = "good" + if rfi_percent > quality_thresholds['moderate']: + quality = "moderate" + elif rfi_percent > quality_thresholds['poor']: + quality = "poor" + return quality + + except Exception as error: + logger.info(error) + + def calculate_quality_sasid(task): + """ + calculate the overall quality per sas_id, based on other tasks with the same sas_id + The threshold values are written from a configuration json blob + + Using this algorithm from SDCO: + if more then 90 % of all files have a good quality then the dataset has good condition. + If more then 50 % of all files have a poor quality then the dataset is poor + otherwise is moderate. + """ + try: + # read the quality_thresholds from the Configuration table + try: + quality_thresholds = json.loads(Configuration.objects.get(key='quality_thresholds').value) + except: + quality_thresholds = { + "moderate": 20, + "poor": 50, + "overall_poor": 50, + "overall_good": 90, + } + + # gather the results of all the calculated_quality values for this sas_id + d = {'poor': 0, 'moderate': 0, 'good': 0} + for task in Task.objects.filter(sas_id=task.sas_id): + d[task.calculated_quality] = d[task.calculated_quality] + 1 + quality_sasid = "moderate" + total = d['poor'] + d['moderate'] + d['good'] + if total > 0: + percentage_poor = (d['poor'] / total) * 100 + percentage_good = (d['good'] / total) * 100 + + if percentage_poor >= quality_thresholds['overall_poor']: + quality_sasid = 'poor' + + if percentage_good >= quality_thresholds['overall_good']: + quality_sasid = 'good' + + return quality_sasid + + except Exception as error: + logger.info(error) + + + # --- main function body --- + # calculate the quality for this task + calculated_quality_task = calculate_quality_task(task) + + # update the overall quality of all tasks for this sas_id + calculated_quality_sasid = calculate_quality_sasid(task) + + # combine both calculated qualities in the json field (dict) + d = task.calculated_qualities + if not d: + d = {} + + d['per_task'] = calculated_quality_task + d['per_sasid'] = calculated_quality_sasid + + return d class Task(models.Model): @@ -111,8 +186,7 @@ class Task(models.Model): new_status = models.CharField(max_length=50, default="defining", null=True) status = models.CharField(db_index=True, default="unknown", max_length=50,blank=True, null=True) quality = models.CharField(max_length=10,blank=True, null=True) - calculated_quality = models.CharField(max_length=10, blank=True, null=True) - + calculated_qualities = models.JSONField(null=True, blank=True) resume = models.BooleanField(verbose_name="Resume", default=True) creationTime = models.DateTimeField(verbose_name="CreationTime",default=datetime.utcnow, blank=True) @@ -148,9 +222,9 @@ class Task(models.Model): if (self.status != 'scrubbed') & (self.new_status == 'scrubbed'): self.resume = False - # nv:19jun2023, calculate the quality of this task + # nv:19jun2023, calculate the qualities for this task if (self.status != 'stored') & (self.new_status == 'stored'): - self.quality = calculate_quality(self) + self.calculated_qualities = calculate_qualities(self) super(Task, self).save(*args, **kwargs) diff --git a/atdb/taskdatabase/serializers.py b/atdb/taskdatabase/serializers.py index 82b0240a7dbdef4aee997519c66c2f009997f885..3b19c5d51807e7574368a5cceb5600cfee331282 100644 --- a/atdb/taskdatabase/serializers.py +++ b/atdb/taskdatabase/serializers.py @@ -41,7 +41,7 @@ class TaskWriteSerializer(serializers.ModelSerializer): 'project','sas_id','priority','purge_policy','cleanup_policy','resume', 'new_workflow_id','new_workflow_uri','workflow', 'stage_request_id', - 'status','new_status','quality','calculated_quality', + 'status','new_status','quality','calculated_qualities', 'inputs','outputs','metrics','status_history','remarks', 'size_to_process','size_processed','total_processing_time', 'log_entries','meta_scheduling','environment','archive' @@ -95,7 +95,7 @@ class TaskReadSerializer(serializers.ModelSerializer): 'project','sas_id','priority','purge_policy','cleanup_policy','resume', 'workflow', 'stage_request_id', - 'status','new_status','quality','calculated_quality', + 'status','new_status','quality','calculated_qualities', 'inputs','outputs','metrics','remarks','status_history', 'size_to_process', 'size_processed', 'total_processing_time', 'log_entries','meta_scheduling','environment','archive' @@ -126,7 +126,7 @@ class TaskReadSerializerFast(serializers.ModelSerializer): 'project','sas_id','priority','purge_policy','cleanup_policy','resume', 'workflow', 'stage_request_id', - 'status','new_status','quality','calculated_quality', + 'status','new_status','quality','calculated_qualities', 'inputs','outputs','metrics','archive', 'size_to_process', 'size_processed', 'total_processing_time', ] diff --git a/atdb/taskdatabase/services/algorithms.py b/atdb/taskdatabase/services/algorithms.py index 73cb0a5fb6630ec27d595d0b74871d5d33cc6e32..e336fc31fb7d0cfc499675f2b87371a49e64d2d7 100644 --- a/atdb/taskdatabase/services/algorithms.py +++ b/atdb/taskdatabase/services/algorithms.py @@ -840,6 +840,27 @@ def construct_summary(task): line += '<td colspan="2">' + str(round(record['size_ratio'],3)) + '</td>' line += '</tr>' + try: + # add RFI percentage (if present) + rfi = record['rfi_percent'] + line += '<tr><td><b>RFI percentage</b></td>' + line += '<td colspan="2">' + str(rfi) + '</td>' + line += '</tr>' + except: + pass + + try: + # add calculated quality (if present) + calculated_qualities = task.calculated_qualities + if calculated_qualities: + line += '<tr><td><b>Calculated Quality</b></td>' + task_quality = calculated_qualities['per_task'] + line += '<td class="' + task_quality + '">' + str(task_quality) + '</td>' + line += '</tr>' + + except: + pass + try: added = record['added'] if added: @@ -870,6 +891,17 @@ def construct_summary(task): totals += '<tr><td colspan="2"><b>Input size</b></td><td>' + str(total_size_input) + '</td></tr>' totals += '<tr><td colspan="2"><b>Output size</b><td>' + str(total_size_output) + '</td></tr>' totals += '<tr><td colspan="2"><b>Ratio</b></td><td>' + str(round(total_size_output / total_size_input, 3)) + '</td></tr>' + + try: + # add calculated quality per sasid (if present) + if calculated_qualities: + sasid_quality = calculated_qualities['per_sasid'] + totals += '<tr><td colspan="2"><b>Calculated Quality</b></td>' + totals += '<td class="' + sasid_quality + '">' + str(sasid_quality) + '</td></tr>' + + except: + pass + except: pass diff --git a/atdb/taskdatabase/templates/taskdatabase/index.html b/atdb/taskdatabase/templates/taskdatabase/index.html index 7e8f64480d752c6d1f7240ed74756106d3b08a32..b496c34575ecb7ee9db76270aaeaac99747afc55 100644 --- a/atdb/taskdatabase/templates/taskdatabase/index.html +++ b/atdb/taskdatabase/templates/taskdatabase/index.html @@ -31,7 +31,7 @@ {% include 'taskdatabase/pagination.html' %} </div> </div> - <p class="footer"> Version 20 June 2023 + <p class="footer"> Version 21 June 2023 </div> {% include 'taskdatabase/refresh.html' %} diff --git a/atdb/taskdatabase/templates/taskdatabase/quality/headers.html b/atdb/taskdatabase/templates/taskdatabase/quality/headers.html index dfbaafffa00569caa3939019e38222b688f15b91..fc98c2dcb3dd8d407585ab59e0a6a2defb54b7ea 100644 --- a/atdb/taskdatabase/templates/taskdatabase/quality/headers.html +++ b/atdb/taskdatabase/templates/taskdatabase/quality/headers.html @@ -30,5 +30,7 @@ <th>Conditions</th> <th>Plots</th> <th>Annotate</th> + <th>Calc Q</th> <th>Quality</th> + <th>Validate</th> </tr> \ No newline at end of file diff --git a/atdb/taskdatabase/templates/taskdatabase/quality/tasks.html b/atdb/taskdatabase/templates/taskdatabase/quality/tasks.html index 8db9779e87ed269a17237d6a2bd259a17f1d16d3..b4bc7eeb01e5cbec86374c23354179c671fb97a2 100644 --- a/atdb/taskdatabase/templates/taskdatabase/quality/tasks.html +++ b/atdb/taskdatabase/templates/taskdatabase/quality/tasks.html @@ -68,8 +68,13 @@ {% endif %} {% endif %} </td> + <td class="{{ task.calculated_qualities.per_task }}">{{ task.calculated_qualities.per_task|default_if_none:"-" }}</td> <td class="{{ task.quality }}">{{ task.quality|default_if_none:"-" }}</td> - + <td> + {% if task.status == "stored" %} + <a href="{% url 'task-setstatus-view' task.pk 'validated' my_tasks.number %}" class="btn btn-success btn-sm" role="button"><i class="fas fa-check"></i> validate</a> + {% endif %} + </td> </tr> </div> {% endif %} diff --git a/atdb/taskdatabase/templates/taskdatabase/tasks.html b/atdb/taskdatabase/templates/taskdatabase/tasks.html index f3147466eee459c4be083a81c9eb7c08b574993b..8b5ebad6a5def466c78dd6aeee0f1c9cf63ca275 100644 --- a/atdb/taskdatabase/templates/taskdatabase/tasks.html +++ b/atdb/taskdatabase/templates/taskdatabase/tasks.html @@ -68,9 +68,6 @@ <a href="{% url 'task-hold-resume' task.pk 'resume' my_tasks.number %}" class="btn btn-success btn-sm" role="button"><i class="fas fa-play"></i> start</a> {% endif %} - {% if task.status == "stored" %} - <a href="{% url 'task-setstatus-view' task.pk 'validated' my_tasks.number %}" class="btn btn-success btn-sm" role="button"><i class="fas fa-check"></i> validate</a> - {% endif %} {% endif %} {% include "taskdatabase/failures/retry_buttons.html" %} diff --git a/atdb/taskdatabase/templates/taskdatabase/validation/headers.html b/atdb/taskdatabase/templates/taskdatabase/validation/headers.html index 8d9206ca98b26fd37f7be57399c24576e87a9abf..70a3e1d8fbbc23bcf030f723c1bacfff344fd748 100644 --- a/atdb/taskdatabase/templates/taskdatabase/validation/headers.html +++ b/atdb/taskdatabase/templates/taskdatabase/validation/headers.html @@ -25,6 +25,7 @@ <th>Plots</th> <th>Summary</th> <th>Annotate</th> + <th>Calc Q</th> <th>Quality</th> <th>Validate (choose a Q)</th> <th>Discard</th> diff --git a/atdb/taskdatabase/templates/taskdatabase/validation/tasks.html b/atdb/taskdatabase/templates/taskdatabase/validation/tasks.html index 981e77c077752618f907253c62ca40945a6a2810..251d7af57210b6289a1e7c3f2e045f370cc583e2 100644 --- a/atdb/taskdatabase/templates/taskdatabase/validation/tasks.html +++ b/atdb/taskdatabase/templates/taskdatabase/validation/tasks.html @@ -73,6 +73,7 @@ {% endif %} {% endif %} </td> + <td class="{{ task.calculated_qualities.per_sasid }}">{{ task.calculated_qualities.per_sasid|default_if_none:"-" }}</td> <td class="{{ task.quality }}">{{ task.quality|default_if_none:"-" }}</td> <td>{% include "taskdatabase/validation/validation_buttons.html" %}</td> <td><a href="{% url 'task-discard-view-sasid' task.pk 'discarded' my_tasks.number %}" class="btn btn-danger btn-sm" role="button"><i class="fas fa-trash-alt"></i></a></td> diff --git a/atdb/taskdatabase/views.py b/atdb/taskdatabase/views.py index 9ea5e92def3b69884d6ec8250cd10fa4ed2fbe76..f732752c3c9e233ec80401fb66263f7ea9124041 100644 --- a/atdb/taskdatabase/views.py +++ b/atdb/taskdatabase/views.py @@ -61,7 +61,6 @@ class TaskFilter(filters.FilterSet): 'sas_id': ['exact', 'icontains', 'in'], 'status': ['exact', 'icontains', 'in', 'startswith'], 'quality': ['exact', 'icontains', 'in', 'startswith'], - 'calculated_quality': ['exact', 'icontains', 'in'], 'purge_policy': ['exact'], 'cleanup_policy': ['exact','icontains','in'], 'priority': ['exact', 'lte', 'gte'],