Commit f05498e5 authored by Nico Vermaas's avatar Nico Vermaas
Browse files

Merge branch 'master' into 'acceptance'

Query Page:

See merge request !143
parents df61969d 07c2aec4
Pipeline #22414 passed with stages
in 5 minutes and 53 seconds
......@@ -30,6 +30,11 @@ These diagrams show the current implementation and are kept up-to-date.
![](atdb/docs/ATDB-LDV%20Workflow%20Diagram.png)
### GUI implementation
* https://app.diagrams.net/#G16R8L06OFiKHFHBUA6FhrNVZVAaQBC2tU
![](atdb/docs/ATDB-LDV%20GUI.png)
## Deployed Instances
### main GUI:
......
......@@ -188,8 +188,8 @@ LOGOUT_REDIRECT_URL = '/atdb'
STATIC_URL = '/atdb/static/'
STATIC_ROOT = os.path.join(BASE_DIR, 'static')
ALL_STATUSSES = ['defining','defined','staging','staged','processing','processed','validated', 'scrubbing','scrubbed','archiving','archived','finished']
ACTIVE_STATUSSES = ['staging','staged','processing','processed','validated', 'scrubbing','scrubbed','archiving','archived']
STATUSSES_WITH_DATA = ['staged','processing','processed','validated', 'scrubbing','scrubbed','archiving','archived']
ALL_STATUSSES = ['defining','defined','staging','staged','fetching','fetched','processing','processed','storing','stored','validated', 'scrubbing','scrubbed','archiving','archived','finished']
ACTIVE_STATUSSES = ['staging','staged','fetching','fetched','processing','processed','validated','storing','stored','scrubbing','scrubbed','archiving','archived']
STATUSSES_WITH_DATA = ['staged','fetching','fetched','processing','processed','validated','storing','stored','scrubbing','scrubbed','archiving','archived']
QUERY_LIMIT_MULTI_CHANGE = 1000
\ No newline at end of file
......@@ -3,12 +3,12 @@ TD {
font-size: 12pt;
}
.defining,.staging,.processing,.scrub,.scrubbing,.archiving {
.defining,.staging,.fetching,.processing,.storing,.scrub,.scrubbing,.archiving {
font-style: italic;
color: green;
}
.defined,.staged,.processed,.validated,.scrubbed,.archived,.finished {
.defined,.staged,.fetched,.processed,.stored,.validated,.scrubbed,.archived,.finished {
background-color: lemonchiffon;
color: blue;
}
......@@ -17,7 +17,7 @@ TD {
background-color: lemonchiffon;
}
.error,.failed,.staging_failed,.processed_failed,.scrubbed_failed {
.error,.failed,.staging_failed,.processed_failed,.scrubbed_failed,.stored_failed {
color: red;
font-weight: bold;
}
......@@ -62,10 +62,9 @@ p.title {
.info {
background-color: #E0F8F8;
}
.max {
font-weight: bold;
color: darkgray;
color: green;
background-color: lightgreen;
}
......
atdb/docs/ATDB-LDV GUI.png

147 KB | W: | H:

atdb/docs/ATDB-LDV GUI.png

165 KB | W: | H:

atdb/docs/ATDB-LDV GUI.png
atdb/docs/ATDB-LDV GUI.png
atdb/docs/ATDB-LDV GUI.png
atdb/docs/ATDB-LDV GUI.png
  • 2-up
  • Swipe
  • Onion skin
# Generated by Django 3.1.4 on 2021-12-14 08:41
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('taskdatabase', '0006_configuration_filter'),
]
operations = [
migrations.AddField(
model_name='task',
name='meta_scheduling',
field=models.JSONField(blank=True, null=True),
),
migrations.AddField(
model_name='workflow',
name='meta_scheduling',
field=models.JSONField(blank=True, null=True),
),
migrations.AlterField(
model_name='task',
name='resume',
field=models.BooleanField(default=True, verbose_name='Resume'),
),
migrations.AlterField(
model_name='task',
name='sas_id',
field=models.CharField(blank=True, max_length=15, null=True, verbose_name='SAS_ID'),
),
]
......@@ -12,6 +12,7 @@ class Workflow(models.Model):
commit_id = models.CharField(max_length=15, blank=True, null=True)
path = models.CharField(max_length=100, blank=True, null=True)
oi_size_fraction = models.FloatField(blank=True, null=True)
meta_scheduling = models.JSONField(null=True, blank=True)
def __str__(self):
return str(self.id)
......@@ -25,7 +26,7 @@ class Task(models.Model):
new_status = models.CharField(max_length=50, default="defining", null=True)
status = models.CharField(db_index=True, default="unknown", max_length=50,blank=True, null=True)
resume = models.BooleanField(default=True)
resume = models.BooleanField(verbose_name="Resume", default=True)
creationTime = models.DateTimeField(verbose_name="CreationTime",default=datetime.utcnow, blank=True)
priority = models.IntegerField(default=100, null=True)
......@@ -34,11 +35,13 @@ class Task(models.Model):
# LOFAR properties
project = models.CharField(max_length=100, blank=True, null=True, default="unknown")
sas_id = models.CharField(max_length=15, blank=True, null=True)
sas_id = models.CharField(verbose_name="SAS_ID",max_length=15, blank=True, null=True)
inputs = models.JSONField(null=True, blank=True)
outputs = models.JSONField(null=True, blank=True)
metrics = models.JSONField(null=True, blank=True)
meta_scheduling = models.JSONField(null=True, blank=True)
size_to_process = models.PositiveBigIntegerField(default=0, null=True, blank=True)
size_processed = models.PositiveBigIntegerField(default=0, null=True, blank=True)
total_processing_time = models.IntegerField(default=0, null=True, blank=True)
......@@ -57,6 +60,12 @@ class Task(models.Model):
def get_absolute_url(self):
return reverse('task-detail-view-api', kwargs={'pk': self.pk})
@property
def predecessor_status(self):
try:
return self.predecessor.status
except:
return "no_predecessor"
class LogEntry(models.Model):
cpu_cycles = models.IntegerField(null=True,blank=True)
......
......@@ -73,6 +73,7 @@ class TaskWriteSerializer(serializers.ModelSerializer):
task.save()
return task
class TaskReadSerializer(serializers.ModelSerializer):
status_history = serializers.StringRelatedField(
......@@ -92,14 +93,15 @@ class TaskReadSerializer(serializers.ModelSerializer):
class Meta:
model = Task
fields = ['id','task_type','creationTime','filter','predecessor','successors',
fields = ['id','task_type','creationTime','filter',
'predecessor','predecessor_status','successors',
'project','sas_id','priority','purge_policy','resume',
'workflow',
'stage_request_id',
'status','new_status',
'inputs','outputs','metrics','status_history',
'size_to_process', 'size_processed', 'total_processing_time',
'log_entries'
'log_entries','meta_scheduling'
]
read_only_fields = fields
......@@ -123,7 +125,7 @@ class TaskReadSerializerFast(serializers.ModelSerializer):
"""
class Meta:
model = Task
fields = ['id','task_type','creationTime','filter','predecessor',
fields = ['id','task_type','creationTime','filter','predecessor','predecessor_status',
'project','sas_id','priority','purge_policy','resume',
'workflow',
'stage_request_id',
......
......@@ -4,7 +4,7 @@
Description: Business logic for ATDB. These functions are called from the views (views.py).
"""
from django.db.models import Q,Sum
from django.db.models import Q, Sum
import logging
from .common import timeit
from ..models import Task, LogEntry, Workflow
......@@ -16,6 +16,7 @@ DJANGO_TIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
logger = logging.getLogger(__name__)
@timeit
def get_size(status_list, type):
"""
......@@ -24,7 +25,7 @@ def get_size(status_list, type):
:return: summed sizes
"""
logger.info("get_size("+str(status_list)+")")
logger.info("get_size(" + str(status_list) + ")")
if type == 'processed':
field = 'size_processed'
......@@ -49,7 +50,7 @@ def convert_logentries_to_html(log_entries):
for log in log_entries:
line = "<tr><td><b>" + str(log.service) + '</b></td>'
line += "<td><b>" + str(log.step_name) + '</b></td>'
line +='<td class="' + log.status + '" >' + log.status + "</td>"
line += '<td class="' + log.status + '" >' + log.status + "</td>"
try:
line += "<td>" + str(log.timestamp.strftime("%m-%d-%Y, %H:%M:%S")) + "</td>"
except:
......@@ -57,7 +58,7 @@ def convert_logentries_to_html(log_entries):
line += "<td>" + str(log.cpu_cycles) + "</td>"
line += "<td>" + str(log.wall_clock_time) + "</td>"
if log.url_to_log_file!=None:
if log.url_to_log_file != None:
link = "<a href=" + '"' + str(log.url_to_log_file) + '" target="_blank">' + "logfile" + "</a>"
else:
link = "-"
......@@ -66,8 +67,8 @@ def convert_logentries_to_html(log_entries):
results += "</tbody>"
except Exception as err:
results = "<tr><td>"+str(err)+"</td></tr>"
#results = "<tr><td>no data</td></tr>"
results = "<tr><td>" + str(err) + "</td></tr>"
# results = "<tr><td>no data</td></tr>"
return results
......@@ -85,10 +86,10 @@ def convert_list_of_dicts_to_html(my_blob):
try:
for my_dict in my_list:
# iterate through the dict of key/values
for key,value in my_dict.items():
for key, value in my_dict.items():
try:
if "://" in value:
link = "<a href=" + '"' + value + '">' + key +"</a>"
link = "<a href=" + '"' + value + '">' + key + "</a>"
value = link
except:
pass
......@@ -100,6 +101,48 @@ def convert_list_of_dicts_to_html(my_blob):
return results
import xml.etree.ElementTree as ElementTree
from typing import Union, List, Dict
def _generate_html_from_json_tree(json_blob: Union[List, Dict], element: ElementTree.Element):
if isinstance(json_blob, list) or isinstance(json_blob, tuple):
if element.tag != 'tbody':
sub_table = ElementTree.SubElement(element, 'table')
else:
sub_table = element
for item in json_blob:
row = ElementTree.SubElement(sub_table, 'tr')
element = ElementTree.SubElement(row, 'td')
_generate_html_from_json_tree(item, element)
elif isinstance(json_blob, dict):
if element.tag != 'tbody':
sub_table = ElementTree.SubElement(element, 'table')
else:
sub_table = element
for key, value in json_blob.items():
row = ElementTree.SubElement(sub_table, 'tr')
key_element = ElementTree.SubElement(row, 'td')
bold_key = ElementTree.SubElement(key_element, 'b')
bold_key.text = key
value_element = ElementTree.SubElement(row, 'td')
_generate_html_from_json_tree(value, value_element)
else:
value = ElementTree.SubElement(element, 'td', attrib={"style": "max-width:25rem"})
value.text = str(json_blob)
def convert_json_to_nested_table(json_blob):
root_element = ElementTree.Element('tbody')
_generate_html_from_json_tree(json_blob, root_element)
return ElementTree.tostring(root_element, method='xml').decode()
def convert_config_to_html(querylist):
results = ""
try:
......@@ -111,12 +154,13 @@ def convert_config_to_html(querylist):
try:
if "://" in value:
link = "<a href=" + '"' + value + '">' + key +"</a>"
link = "<a href=" + '"' + value + '">' + key + "</a>"
value = link
except:
pass
line = "<tr><td><b>" + str(filter) + "</b></td> <td><b>" + str(key) + "</b></td><td>" + str(value) + "</td></tr>"
line = "<tr><td><b>" + str(filter) + "</b></td> <td><b>" + str(key) + "</b></td><td>" + str(
value) + "</td></tr>"
results = results + line
except:
results = "<tr><td>no data</td></tr>"
......@@ -126,7 +170,6 @@ def convert_config_to_html(querylist):
# aggregate information from the tasks table per workflow per status
def aggregate_resources_tasks(selection):
workflow_results = []
my_workflows = []
......@@ -140,7 +183,7 @@ def aggregate_resources_tasks(selection):
# construct the list of workflows (cheap)
for w in active_workflows:
try:
workflow = Workflow.objects.get(id = w['workflow'])
workflow = Workflow.objects.get(id=w['workflow'])
my_workflows.append(workflow)
except:
pass
......@@ -209,7 +252,7 @@ def aggregate_resources_logs(selection):
# construct the list of workflows (cheap)
for w in active_workflows:
try:
workflow = Workflow.objects.get(id = w['workflow'])
workflow = Workflow.objects.get(id=w['workflow'])
my_workflows.append(workflow)
except:
pass
......@@ -226,8 +269,8 @@ def aggregate_resources_logs(selection):
record = {}
# aggregate logentries per step for all active statusses (expensive)
logs = LogEntry.objects.filter(status=status)\
.filter(task__status__in=settings.ACTIVE_STATUSSES)\
logs = LogEntry.objects.filter(status=status) \
.filter(task__status__in=settings.ACTIVE_STATUSSES) \
.filter(task__workflow=workflow)
sum_cpu_cycles = logs.aggregate(Sum('cpu_cycles'))
......@@ -242,13 +285,12 @@ def aggregate_resources_logs(selection):
workflow_result['records_per_status'] = record_per_status
workflow_results.append(workflow_result)
return workflow_results
# aggregate information from the logentries table per workflow per status
def aggregate_resources_logs_version1():
records = []
# get all active tasks
......@@ -263,18 +305,18 @@ def aggregate_resources_logs_version1():
workflow_result = {}
# extract the workflow object (cheap)
workflow = Workflow.objects.get(id = w['workflow'])
workflow = Workflow.objects.get(id=w['workflow'])
# aggregate logentries per step for all active statusses
for status in settings.ACTIVE_STATUSSES:
record = {}
record['name'] = str(workflow.id) +' - '+ workflow.workflow_uri
record['name'] = str(workflow.id) + ' - ' + workflow.workflow_uri
# record['name'] = str(workflow.id)
record['status'] = status
# aggregate logentries per step for all active statusses (expensive)
logs = LogEntry.objects.filter(status=status)\
.filter(task__status__in=settings.ACTIVE_STATUSSES)\
logs = LogEntry.objects.filter(status=status) \
.filter(task__status__in=settings.ACTIVE_STATUSSES) \
.filter(task__workflow=workflow)
sum_cpu_cycles = logs.aggregate(Sum('cpu_cycles'))
......@@ -344,7 +386,6 @@ def human_readable(size_in_bytes):
def highlight_value(values, value_to_highlight):
# find 'class' left of the value
pos_value = values.find(str(value_to_highlight))
......@@ -362,8 +403,8 @@ def highlight_value(values, value_to_highlight):
return values
def construct_tasks_per_workflow_html(request, workflow_results):
def construct_tasks_per_workflow_html(request, workflow_results):
# --- Progress of tasks per active workflow ---
results_tasks = "<p>Progress of tasks per workflow</p>"
......@@ -382,7 +423,7 @@ def construct_tasks_per_workflow_html(request, workflow_results):
link = construct_link_to_workflow_api(request, workflow_result)
# values = "<tr><td colspan='5'><b>" + link + "</b></td></tr><tr>"
values = "<tr class='info'><td colspan='4'><b>" + link + "</b></td>"
values = "<tr class='info'><td colspan='6'><b>" + link + "</b></td>"
# add sizes
values += "<td><b>size to process:</b> " + str(human_readable(workflow_result['size_to_process'])) + "</td>"
......@@ -390,7 +431,8 @@ def construct_tasks_per_workflow_html(request, workflow_results):
percentage = round(int(workflow_result['size_processed']) / int(workflow_result['size_to_process']) * 100)
except:
percentage = 0
values += "<td><b>size processed:</b> " + str(human_readable(workflow_result['size_processed'])) + " (<b>"+ str(percentage) + "%</b>) </td>"
values += "<td><b>size processed:</b> " + str(
human_readable(workflow_result['size_processed'])) + " (<b>" + str(percentage) + "%</b>) </td>"
values += "<td><b>processing time:</b> " + str(workflow_result['total_processing_time']) + "</td>"
values += "<td colspan='8'></td></tr><tr>"
......@@ -409,7 +451,7 @@ def construct_tasks_per_workflow_html(request, workflow_results):
# distinguish active statusses
style = "inactive"
if key in settings.ACTIVE_STATUSSES or key=='active':
if key in settings.ACTIVE_STATUSSES or key == 'active':
style = "active"
# bonus: add a query link
......@@ -417,15 +459,15 @@ def construct_tasks_per_workflow_html(request, workflow_results):
values += "<td class=" + style + ">" + str(percentage) + "% (" + link + ")</td>"
# add sizes
# values += "<td>" + str(human_readable(workflow_result['size_to_process'])) + "</td>"
# try:
# percentage = round(int(workflow_result['size_processed']) / int(workflow_result['size_to_process']) * 100)
# except:
# percentage = 0
# values += "<td>" + str(human_readable(workflow_result['size_processed'])) + " ("+ str(percentage) + "%) </td>"
# values += "<td>" + str(workflow_result['total_processing_time']) + "</td>"
if max>0:
# values += "<td>" + str(human_readable(workflow_result['size_to_process'])) + "</td>"
# try:
# percentage = round(int(workflow_result['size_processed']) / int(workflow_result['size_to_process']) * 100)
# except:
# percentage = 0
# values += "<td>" + str(human_readable(workflow_result['size_processed'])) + " ("+ str(percentage) + "%) </td>"
# values += "<td>" + str(workflow_result['total_processing_time']) + "</td>"
if max > 0:
values = highlight_value(values, max)
results_tasks += "</tr><tr>" + values + "</tr>"
......@@ -448,7 +490,7 @@ def construct_logs_per_workflow_html_version1(log_records):
style = "active"
line = "<tr><td><b>" + record['name'] + "</b></td>" \
'<td class="' + style + '" >' + record['status'] + \
'<td class="' + style + '" >' + record['status'] + \
"</td><td>" + str(record['cpu_cycles']) + \
"</td><td>" + str(record['wall_clock_time']) + "</td><tr>"
......@@ -475,25 +517,25 @@ def construct_logs_per_workflow_html(request, workflow_results):
for status in records_per_status:
record = records_per_status[status]
# distinguish active statusses
style = ""
if status in settings.ACTIVE_STATUSSES or status=='active':
record = records_per_status[status]
# distinguish active statusses
style = ""
if status in settings.ACTIVE_STATUSSES or status == 'active':
style = "active"
# show the values (done with a weird ternary operator)
if record['cpu_cycles']:
cpu_cycles = str(record['cpu_cycles'])
else:
cpu_cycles = '0'
# show the values (done with a weird ternary operator)
if record['cpu_cycles']:
cpu_cycles = str(record['cpu_cycles'])
else:
cpu_cycles = '0'
if record['wall_clock_time']:
wall_clock_time = str(record['wall_clock_time'])
else:
wall_clock_time = '0'
if record['wall_clock_time']:
wall_clock_time = str(record['wall_clock_time'])
else:
wall_clock_time = '0'
value = cpu_cycles + '/' + wall_clock_time
values += "<td class=" + style + ">" + value + "</td>"
value = cpu_cycles + '/' + wall_clock_time
values += "<td class=" + style + ">" + value + "</td>"
results += "<tr>" + values + "</tr>"
......@@ -502,7 +544,6 @@ def construct_logs_per_workflow_html(request, workflow_results):
def construct_dashboard_html(request, selection):
# gather and construct the dashboard based on the requested selection
# --- Progress of tasks per active workflow ---
......@@ -515,5 +556,4 @@ def construct_dashboard_html(request, selection):
log_records = aggregate_resources_logs(selection)
results_logs = construct_logs_per_workflow_html(request, log_records)
return results_tasks,results_logs
return results_tasks, results_logs
......@@ -3,12 +3,12 @@ TD {
font-size: 12pt;
}
.defining,.staging,.processing,.scrub,.scrubbing,.archiving {
.defining,.staging,.fetching,.processing,.storing,.scrub,.scrubbing,.archiving {
font-style: italic;
color: green;
}
.defined,.staged,.processed,.validated,.scrubbed,.archived,.finished {
.defined,.staged,.fetched,.processed,.stored,.validated,.scrubbed,.archived,.finished {
background-color: lemonchiffon;
color: blue;
}
......@@ -17,7 +17,7 @@ TD {
background-color: lemonchiffon;
}
.error,.failed,.staging_failed,.processed_failed,.scrubbed_failed {
.error,.failed,.staging_failed,.processed_failed,.scrubbed_failed,.stored_failed,.archived_failed {
color: red;
font-weight: bold;
}
......
......@@ -2,24 +2,6 @@
import django_tables2 as tables
from .models import Task
# render the StatusColumn based on the style.css
class IDColumn(tables.Column):
attrs = {
"td": {
"class": lambda record: record.status
},
"tr": {
"class": lambda record: record.status
},
"a": {
"href" : lambda record: record.get_absolute_url,
"target": "_blank"
}
}
def render(self, record):
link = '<a href="{{ record.get_absolute_url }}" target="_blank">{{ record.id }} </a>'
return "{}".format(link)
# render the StatusColumn based on the style.css
class StatusColumn(tables.Column):
attrs = {
......@@ -33,19 +15,21 @@ class StatusColumn(tables.Column):
def render(self, record):
return "{}".format(record.status)
# render the StatusColumn based on the style.css
class ResumeColumn(tables.BooleanColumn):
def render(self, record):
return record.resume
class TaskTable(tables.Table):
class Meta:
model = Task
template_name = "django_tables2/bootstrap4.html"
fields = ("id", "workflow","filter","priority","status","project","sas_id","actions","buttons")
fields = ("id", "workflow","filter","priority","status","project","sas_id","resume","actions","buttons")
# columns that need specific rendering
# id = IDColumn() (too complicated, finish later.. if time
status = StatusColumn()
#creationtime = tables.Column(verbose_name='CreationTime')
#resume = ResumeColumn()