"""
    File name: algorithms.py
    Author: Nico Vermaas - Astron
    Description:  Business logic for ATDB. These functions are called from the views (views.py).
"""

from django.db.models import Q, Sum
import logging
from .common import timeit
from ..models import Task, LogEntry, Workflow
from django.conf import settings

DATE_FORMAT = "%Y-%m-%d"
TIME_FORMAT = "%Y-%m-%d %H:%M:%SZ"
DJANGO_TIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"

logger = logging.getLogger(__name__)


@timeit
def get_size(status_list, type):
    """
    aggregate the sizes of all task with a status in the list
    :param status_list: list of statuses to consider for the aggregation
    :return: summed sizes
    """

    logger.info("get_size(" + str(status_list) + ")")

    if type == 'processed':
        field = 'size_processed'
    else:
        field = 'size_to_process'

    query = field + '__sum'
    tasks = Task.objects.filter(status__in=status_list).filter(task_type='regular')
    sum_value = tasks.aggregate(Sum(field))[query]

    if sum_value == None:
        sum_value = 0.0
    return sum_value


@timeit
def get_min_start_and_max_end_time(sas_id):
    """
    Retrieve the minimum start time en maximum end time of a set of taskids (sas_id) which has the
    status 'archived' or 'finished'
    The start time is the moment when the task start 'processing'
    The end time is the moment when the task was 'processed'
    """
    min_start_time = None
    max_end_time = None
    logger.info("get_min_start_and_max_end_time(" + str(sas_id) + ")")
    tasks = Task.objects.filter(sas_id=sas_id).filter(status='archived') + Task.objects.filter(sas_id=sas_id).filter(status='finished')
    for task in tasks:
        # If more entrees are found for 'processing' task, get the latest
        start_time = LogEntry.objects.filter(task=task.pk).filter(step_name='running').filter(status='processing').lastest('timestamp').timestamp
        # If more entrees are found for 'processed' task, get the latest
        end_time = LogEntry.objects.filter(task=task.pk).filter(step_name='running').filter(status='processed').lastest('timestamp').timestamp
        if min_start_time is None:
            min_start_time = start_time
        elif start_time < min_start_time:
            min_start_time = start_time
        if max_end_time is None:
            max_end_time = end_time
        elif end_time > max_end_time:
            max_end_time = end_time
    return min_start_time, max_end_time


def convert_logentries_to_html(log_entries):
    results = ""

    try:
        results += "<th>service</th><th>step</th><th>status</th><th>timestamp</th><th>cpu_cycles</th><th>wall_clock_time</th><th>logfile</th>"
        results += "<tbody>"
        for log in log_entries:
            line = "<tr><td><b>" + str(log.service) + '</b></td>'
            line += "<td><b>" + str(log.step_name) + '</b></td>'
            line += '<td class="' + log.status + '" >' + log.status + "</td>"
            try:
                line += "<td>" + str(log.timestamp.strftime("%m-%d-%Y, %H:%M:%S")) + "</td>"
            except:
                line += "<td>no timetamp</td>"

            line += "<td>" + str(log.cpu_cycles) + "</td>"
            line += "<td>" + str(log.wall_clock_time) + "</td>"
            if log.url_to_log_file != None:
                link = "<a href=" + '"' + str(log.url_to_log_file) + '" target="_blank">' + "logfile" + "</a>"
            else:
                link = "-"
            line += "<td>" + link + "</td>"
            results += line

        results += "</tbody>"
    except Exception as err:
        results = "<tr><td>" + str(err) + "</td></tr>"
        # results = "<tr><td>no data</td></tr>"

    return results


def convert_list_of_dicts_to_html(my_blob):
    results = ""
    my_list = []

    # if the parameter is not a list, then make it a list first
    if not isinstance(my_blob, list):
        my_list.append(my_blob)
    else:
        my_list = my_blob

    try:
        for my_dict in my_list:
            # iterate through the dict of key/values
            for key, value in my_dict.items():
                try:
                    if "://" in value:
                        link = "<a href=" + '"' + value + '">' + key + "</a>"
                        value = link
                except:
                    pass
                line = "<tr><td><b>" + str(key) + "</b></td><td>" + str(value) + "</td></tr>"
                results = results + line
    except:
        results = "<tr><td>no data</td></tr>"

    return results


import xml.etree.ElementTree as ElementTree
from typing import Union, List, Dict


def _generate_html_from_json_tree(json_blob: Union[List, Dict], element: ElementTree.Element):
    if isinstance(json_blob, list) or isinstance(json_blob, tuple):

        if element.tag != 'tbody':
            sub_table = ElementTree.SubElement(element, 'table')
        else:
            sub_table = element
        for item in json_blob:
            row = ElementTree.SubElement(sub_table, 'tr')
            element = ElementTree.SubElement(row, 'td')
            _generate_html_from_json_tree(item, element)

    elif isinstance(json_blob, dict):
        if element.tag != 'tbody':
            sub_table = ElementTree.SubElement(element, 'table')
        else:
            sub_table = element
        for key, value in json_blob.items():
            row = ElementTree.SubElement(sub_table, 'tr')
            key_element = ElementTree.SubElement(row, 'td')
            bold_key = ElementTree.SubElement(key_element, 'b')
            bold_key.text = key
            value_element = ElementTree.SubElement(row, 'td')

            _generate_html_from_json_tree(value, value_element)

    else:
        value = ElementTree.SubElement(element, 'td', attrib={"style": "max-width:25rem"})
        value.text = str(json_blob)


def convert_json_to_nested_table(json_blob):
    root_element = ElementTree.Element('tbody')
    _generate_html_from_json_tree(json_blob, root_element)

    return ElementTree.tostring(root_element, method='xml').decode()


def convert_config_to_html(querylist):
    results = ""
    try:
        for record in querylist:
            # iterate through the dict of key/values
            key = record.key
            value = record.value
            filter = record.filter

            try:
                if "://" in value:
                    link = "<a href=" + '"' + value + '">' + key + "</a>"
                    value = link
            except:
                pass

            line = "<tr><td><b>" + str(filter) + "</b></td> <td><b>" + str(key) + "</b></td><td>" + str(
                value) + "</td></tr>"
            results = results + line
    except:
        results = "<tr><td>no data</td></tr>"

    return results


# aggregate information from the tasks table per workflow per status
def aggregate_resources_tasks(selection):
    workflow_results = []
    my_workflows = []

    # get all active tasks
    if 'active' in selection:
        active_tasks = Task.objects.filter(status__in=settings.ACTIVE_STATUSSES).filter(task_type='regular')

        # retrieve all unique workflows from the active tasks
        active_workflows = active_tasks.values('workflow').distinct()

        # construct the list of workflows (cheap)
        for w in active_workflows:
            try:
                workflow = Workflow.objects.get(id=w['workflow'])
                my_workflows.append(workflow)
            except:
                pass

    else:
        my_workflows = Workflow.objects.all()

    # iterate through the workflows
    for workflow in my_workflows:
        workflow_result = {}

        # get the numbers for this workflow

        # all tasks for this workflow for the 'grand total'
        tasks_per_workflow = Task.objects.filter(workflow=workflow).filter(task_type='regular')
        nr_of_tasks_per_workflow = tasks_per_workflow.count()

        sum_size_to_process = tasks_per_workflow.aggregate(Sum('size_to_process'))
        workflow_result['size_to_process'] = sum_size_to_process['size_to_process__sum']

        sum_size_processed = tasks_per_workflow.aggregate(Sum('size_processed'))
        workflow_result['size_processed'] = sum_size_processed['size_processed__sum']

        sum_total_processing_time = tasks_per_workflow.aggregate(Sum('total_processing_time'))
        workflow_result['total_processing_time'] = sum_total_processing_time['total_processing_time__sum']

        # all the active tasks
        active_tasks_per_workflow = tasks_per_workflow.filter(status__in=settings.ACTIVE_STATUSSES)
        nr_of_active_tasks_per_workflow = active_tasks_per_workflow.count()

        # split per status, to see the progress
        nr_per_status = {}

        for status in settings.ALL_STATUSSES:
            nr_for_this_status = tasks_per_workflow.filter(status=status).count()
            nr_per_status[status] = nr_for_this_status

        nr_per_status['failed'] = tasks_per_workflow.filter(status__icontains='failed').count()
        nr_per_status['active'] = nr_of_active_tasks_per_workflow
        nr_per_status['total'] = nr_of_tasks_per_workflow

        # store the results in a dict
        workflow_result['id'] = workflow.id
        workflow_result['name'] = workflow.workflow_uri
        workflow_result['nr_of_tasks'] = nr_of_tasks_per_workflow
        workflow_result['nr_of_active_tasks'] = nr_of_active_tasks_per_workflow
        workflow_result['nr_of_tasks_per_status'] = nr_per_status

        workflow_results.append(workflow_result)

    return workflow_results


# aggregate information from the logentries table per workflow per status
def aggregate_resources_logs(selection):
    workflow_results = []
    my_workflows = []

    # get all active tasks
    if 'active' in selection:
        active_tasks = Task.objects.filter(status__in=settings.ACTIVE_STATUSSES).filter(task_type='regular')

        # retrieve all unique workflows from the active tasks
        active_workflows = active_tasks.values('workflow').distinct()

        # construct the list of workflows (cheap)
        for w in active_workflows:
            try:
                workflow = Workflow.objects.get(id=w['workflow'])
                my_workflows.append(workflow)
            except:
                pass

    else:
        my_workflows = Workflow.objects.all()

    for workflow in my_workflows:
        workflow_result = {}

        # aggregate logentries per step for all active statusses
        record_per_status = {}
        for status in settings.ALL_STATUSSES:
            record = {}

            # aggregate logentries per step for all active statusses (expensive)
            logs = LogEntry.objects.filter(status=status) \
                .filter(task__status__in=settings.ACTIVE_STATUSSES) \
                .filter(task__workflow=workflow)

            sum_cpu_cycles = logs.aggregate(Sum('cpu_cycles'))
            record['cpu_cycles'] = sum_cpu_cycles['cpu_cycles__sum']

            wall_clock_time = logs.aggregate(Sum('wall_clock_time'))
            record['wall_clock_time'] = wall_clock_time['wall_clock_time__sum']
            record_per_status[status] = record

        workflow_result['id'] = workflow.id
        workflow_result['name'] = workflow.workflow_uri
        workflow_result['records_per_status'] = record_per_status

        workflow_results.append(workflow_result)

    return workflow_results


# aggregate information from the logentries table per workflow per status
def aggregate_resources_logs_version1():
    records = []

    # get all active tasks
    active_tasks = Task.objects.filter(status__in=settings.ACTIVE_STATUSSES).filter(task_type='regular')
    active_tasks_count = active_tasks.count()

    # retrieve all unique workflows
    active_workflows = active_tasks.values('workflow').distinct()

    # iterate through the filters and accumulate logentries
    for w in active_workflows:
        workflow_result = {}

        # extract the workflow object (cheap)
        workflow = Workflow.objects.get(id=w['workflow'])

        # aggregate logentries per step for all active statusses
        for status in settings.ACTIVE_STATUSSES:
            record = {}
            record['name'] = str(workflow.id) + ' - ' + workflow.workflow_uri
            # record['name'] = str(workflow.id)
            record['status'] = status

            # aggregate logentries per step for all active statusses (expensive)
            logs = LogEntry.objects.filter(status=status) \
                .filter(task__status__in=settings.ACTIVE_STATUSSES) \
                .filter(task__workflow=workflow)

            sum_cpu_cycles = logs.aggregate(Sum('cpu_cycles'))
            record['cpu_cycles'] = sum_cpu_cycles['cpu_cycles__sum']

            wall_clock_time = logs.aggregate(Sum('wall_clock_time'))
            record['wall_clock_time'] = wall_clock_time['wall_clock_time__sum']

            records.append(record)

    return records


def construct_link_to_tasks_api(request, status, workflow_id, count):
    link = str(count)
    try:
        if status in settings.ALL_STATUSSES:
            query = "?status=" + status + "&workflow__id=" + str(workflow_id)
        else:
            if 'failed' in status:
                query = "?status__icontains=failed&workflow__id=" + str(workflow_id)
            else:
                query = "?workflow__id=" + str(workflow_id)

        if settings.DEV == True:
            url = request.build_absolute_uri('/atdb/tasks') + query
        else:
            # Unclear why 'build_absolute_uri' doesn't return 'https' in production.
            # Probably because the https is handled fully outside the container by Traefik
            # and ATDB is not aware of that.

            url = "https://" + request.get_host() + '/atdb/tasks' + query
        link = '<a href="' + url + '" target="_blank">' + str(count) + "</a>"
    except:
        pass
    return link


def construct_link_to_workflow_api(request, workflow_result):
    title = str(workflow_result['id']) + ' - ' + str(workflow_result['name'])
    link = str(title)
    try:
        if settings.DEV == True:
            url = request.build_absolute_uri('/atdb/workflows/') + str(workflow_result['id'])
        else:
            # Unclear why 'build_absolute_uri' doesn't return 'https' in production.
            # Probably because the https is handled fully outside the container by Traefik
            # and ATDB is not aware of that.

            url = "https://" + request.get_host() + '/atdb/workflows/' + str(workflow_result['id'])

        link = '<a href="' + url + '" target="_blank">' + title + "</a>"
    except:
        pass
    return link


def human_readable(size_in_bytes):
    try:
        for count in ['Bytes', 'KB', 'MB', 'GB', 'TB']:
            if size_in_bytes > -1024.0 and size_in_bytes < 1024.0:
                return "%3.1f %s" % (size_in_bytes, count)
            size_in_bytes /= 1024.0
        return "%3.1f %s" % (size_in_bytes, 'PB')
    except:
        return "0"


def highlight_value(values, value_to_highlight):
    # find 'class' left of the value
    pos_value = values.find(str(value_to_highlight))

    # split up the values, left and right of the search area
    part1 = values[:pos_value - 15]
    part2 = values[pos_value:]

    substring = values[pos_value - 15:pos_value]
    if 'inactive' in substring:
        new_substring = substring.replace('inactive', 'max')
    else:
        new_substring = substring.replace('active', 'max')

    values = part1 + new_substring + part2

    return values


def construct_tasks_per_workflow_html(request, workflow_results):
    # --- Progress of tasks per active workflow ---
    results_tasks = "<p>Progress of tasks per workflow</p>"

    # construct the header
    ##header = "<th>Workflow</th>"
    header = ""
    for status in settings.ALL_STATUSSES:
        header += "<th>" + status + "</th>"

    header += '<th class="failed">failed</th><th class="active">active</th><th>total</th>'
    ## header += '<th>to process</th><th>processed</th><th>processing time</th>'
    results_tasks += header

    for workflow_result in workflow_results:

        link = construct_link_to_workflow_api(request, workflow_result)

        # values = "<tr><td colspan='5'><b>" + link + "</b></td></tr><tr>"
        values = "<tr class='info'><td colspan='6'><b>" + link + "</b></td>"

        # add sizes
        values += "<td><b>size to process:</b> " + str(human_readable(workflow_result['size_to_process'])) + "</td>"
        try:
            percentage = round(int(workflow_result['size_processed']) / int(workflow_result['size_to_process']) * 100)
        except:
            percentage = 0
        values += "<td><b>size processed:</b> " + str(
            human_readable(workflow_result['size_processed'])) + " (<b>" + str(percentage) + "%</b>) </td>"
        values += "<td><b>processing time:</b> " + str(workflow_result['total_processing_time']) + "</td>"

        values += "<td colspan='8'></td></tr><tr>"

        d = workflow_result['nr_of_tasks_per_status']
        max = 0

        for key in d:

            try:
                percentage = round(int(d[key]) / int(workflow_result['nr_of_tasks']) * 100)
                if (percentage > max) and (key in settings.ALL_STATUSSES):
                    max = percentage
            except:
                percentage = 0

            # distinguish active statusses
            style = "inactive"
            if key in settings.ACTIVE_STATUSSES or key == 'active':
                style = "active"

            # bonus: add a query link
            link = construct_link_to_tasks_api(request, key, workflow_result['id'], d[key])
            values += "<td class=" + style + ">" + str(percentage) + "% (" + link + ")</td>"

        # add sizes
        #        values += "<td>" + str(human_readable(workflow_result['size_to_process'])) + "</td>"
        #        try:
        #            percentage = round(int(workflow_result['size_processed']) / int(workflow_result['size_to_process']) * 100)
        #        except:
        #            percentage = 0
        #        values += "<td>" + str(human_readable(workflow_result['size_processed'])) + " ("+ str(percentage) + "%) </td>"
        #        values += "<td>" + str(workflow_result['total_processing_time']) + "</td>"

        if max > 0:
            values = highlight_value(values, max)

        results_tasks += "</tr><tr>" + values + "</tr>"

    results_tasks = "<tbody>" + results_tasks + "</tbody>"
    return results_tasks


def construct_logs_per_workflow_html_version1(log_records):
    results_logs = "<p>Resources used per step per active workflow</p>"

    # construct the header
    header = "<th>Workflow</th><th>Status</th><th>CPU cycles</th><th>wall clock time</th>"
    results_logs += header

    for record in log_records:
        # distinguish active statusses
        style = ""
        if record['status'] in settings.ACTIVE_STATUSSES:
            style = "active"

        line = "<tr><td><b>" + record['name'] + "</b></td>" \
                                                '<td class="' + style + '" >' + record['status'] + \
               "</td><td>" + str(record['cpu_cycles']) + \
               "</td><td>" + str(record['wall_clock_time']) + "</td><tr>"

        results_logs += line

    results_logs = "<tbody>" + results_logs + "</tbody>"
    return results_logs


def construct_logs_per_workflow_html(request, workflow_results):
    results = "<p>Resources used per step per workflow: <b>cpu_cycles/wall_clock_time</b></p>"

    # construct the header
    header = "<th>Workflow</th>"
    for status in settings.ALL_STATUSSES:
        header += "<th>" + status + "</th>"
    results += header

    for workflow_result in workflow_results:

        records_per_status = workflow_result['records_per_status']
        link = construct_link_to_workflow_api(request, workflow_result)
        values = "<td><b>" + link + "</b></td>"

        for status in records_per_status:

            record = records_per_status[status]
            # distinguish active statusses
            style = ""
            if status in settings.ACTIVE_STATUSSES or status == 'active':
                style = "active"

            # show the values (done with a weird ternary operator)
            if record['cpu_cycles']:
                cpu_cycles = str(record['cpu_cycles'])
            else:
                cpu_cycles = '0'

            if record['wall_clock_time']:
                wall_clock_time = str(record['wall_clock_time'])
            else:
                wall_clock_time = '0'

            value = cpu_cycles + '/' + wall_clock_time
            values += "<td class=" + style + ">" + value + "</td>"

        results += "<tr>" + values + "</tr>"

    results = "<tbody>" + results + "</tbody>"
    return results


def construct_dashboard_html(request, selection):
    # gather and construct the dashboard based on the requested selection

    # --- Progress of tasks per active workflow ---
    workflow_results = aggregate_resources_tasks(selection)
    results_tasks = construct_tasks_per_workflow_html(request, workflow_results)

    # --- logentries ---
    results_logs = "<p>Resources not shown. Click the 'Resources Invisible' toggle to show resources.</p>"
    if 'resources' in selection:
        log_records = aggregate_resources_logs(selection)
        results_logs = construct_logs_per_workflow_html(request, log_records)

    return results_tasks, results_logs