create_html

#!/usr/bin/python3
# -*- python -*-

import logging
import os, sys, glob, socket, shutil, subprocess
from optparse import OptionParser
import time
import datetime
import statistics
from numpy import array, floor


# For testing of the inspection plots on the production system without interfering with the existing script one can
# insert TESTLINE in the relevant filenames, e.g. index[TESTLINE].html. Ultimately, TESTLINE could be auto-filled
# depending on the TAG environment variable (if it exists and doesn't say 'latest'). For normal use, leave TESTLINE an
# empty string (""). Note that TESTLINE doesn't necessarily propagate to programs/scripts called from within this one.
#TESTLINE = "-test"
TESTLINE = ""

# Data with a completeness figure below this value is flagged as odd sized
DATA_INCOMPLETE_THRESHOLD = 0.005  # (==) 0.5%

#-----------------------------------------------------------------------------------------------------------------------
# Automatically generated definitions:

INDEX_HTML_FILENAME = "index"+TESTLINE+".html"
FULLINDEX_HTML_FILENAME = "fullindex"+TESTLINE+".html"
INDEX_TXT_FILENAME = "index"+TESTLINE+".txt"
CREATE_HTML_LOG_FILENAME = "create_html"+TESTLINE+".log"
COBALT_HEAD = 'cbm2head.control.lofar' # COBALT2
PARSET_PATH = '/opt/lofar/nfs/parset/' # COBALT2
#PARSET_PATH = '/opt/lofar/var/run/' # COBALT1
#PARSET_PATH = ''/localhome/lofarsystem/parsets/' # COBALT symlink

def initialize_logging(log_level, log_dir = '/globaldata/inspect/'):
    r'''
    Initialize the Python logging system. The log file will be written
    to ``/globaldata/inspect/create_html.log``.

    **Parameters**

    log_level : string
        Minimum log level of lines to write into the
        log file. Possible values are 'DEBUG', 'INFO', 'WARNING', and
        'ERROR'.

    log_dir : string
        Directory where the log file must be written.


    **Returns**

    A string containing the log file name.

    **Examples**

    >>> initialize_logging(log_level = 'INFO', log_dir='test-output')
    'test-output/create_html.log'
    '''
    log_levels   = {'DEBUG'  : logging.DEBUG,
                    'INFO'   : logging.INFO,
                    'WARNING': logging.WARNING,
                    'ERROR'  : logging.ERROR}
    level = log_levels[log_level]

    log_format     = ('create_html@' + socket.gethostname() +
                      ' %(asctime)s %(levelname)8s - %(message)s')
    log_file_name  = os.path.join(log_dir, CREATE_HTML_LOG_FILENAME)

    logger       = logging.root
    for handler in logger.handlers:
        logger.removeHandler(handler)

    logger.setLevel(level)
    formatter    = logging.Formatter(log_format)

    file_handler = logging.FileHandler(log_file_name)
    logger.addHandler(file_handler)

    for handler in logger.handlers:
        handler.setFormatter(formatter)
        handler.setLevel(level)
    logging.debug('Initialized logging')
    return log_file_name


if __name__ == '__main__':
    initialize_logging('DEBUG')

try:
    import matplotlib
    matplotlib.use('agg') # Prevent initialisation of GUI system
    from pyautoplot import __version__
except ImportError:
    if __name__ == '__main__':
        logging.warning(str(sys.exc_info()[1]))
        logging.warning('Command line: %s', ' '.join(sys.argv))
    __version__ = 'unknown'

if __name__ == '__main__':
    logging.debug('Using pyautoplot version %s', __version__)
    logging.debug('Environment:\n%s',
                  '\n'.join(['%15s: %s' %
                             (key.ljust(15), os.environ[key])
                             for key in os.environ.keys()]))


def create_html_main(argv):
    r'''
    The program's main function.

    **Parameters**

    argv : list of strings
        The contents of ``sys.argv``

    **Returns**

    An integer, where success is indicated by the value 0.

    **Examples**

    >> create_html_main(['create_html', '60807', '60806'])
    '''
    logging.debug('create_html_main(argv = %r)', argv)
    options, sas_ids = parse_command_line(argv)
    parset_names = [find_parset(sas_id) for sas_id in sas_ids]
    parsets = [parset_summary(sas_id, parset_name)
               for sas_id, parset_name in zip(sas_ids, parset_names)]

    for num, parset in enumerate(sorted(parsets)[::-1]):
        logging.debug('Processing SAS ID: %s, parset=%r', parset['sas_id'], parset)
        try:
            obs_plot_root = os.path.join(options.plot_root, parset['sas_id'])
            if parset['file_name'] and os.path.exists(parset['file_name']):
                logging.debug('SAS ID: %s. Parset exists at: %s', parset['sas_id'], parset['file_name'])
                intended_parset_path = os.path.join(obs_plot_root, os.path.basename(parset['file_name']))

                # Make sure parset is available in the intended location
                if not os.path.samefile(intended_parset_path, parset['file_name']):
                    try:
                        shutil.copy2(parset['file_name'], obs_plot_root)
                        parset['file_name'] = intended_parset_path
                    except:
                        logging.error('SAS ID: %s. Copying parset from %s to %s failed', parset['sas_id'], parset['file_name'], obs_plot_root)
            else:
                logging.debug('SAS ID: %s. Parset doesn\'t exists at: %s', parset['sas_id'], parset['file_name'])

            file_size_data = {}
            file_sizes_txt = os.path.join(obs_plot_root, 'file-sizes.txt')
            if os.path.exists(file_sizes_txt):
                file_sizes_dict = parse_file_sizes(file_sizes_txt)
                file_size_data  = file_size_analysis(parset, file_sizes_dict)
                logging.debug('SAS ID: %s. In create_html_main() file_size_data content: %r', parset['sas_id'], file_size_data)
            f0seqnr_completeness_dict = {}
            f0seqnr_sizes_txt = os.path.join(obs_plot_root, 'f0seqnr-sizes.txt')
            if os.path.exists(f0seqnr_sizes_txt):
                f0seqnr_sizes_dict = parse_file_sizes(f0seqnr_sizes_txt, os.path.dirname)
                f0seqnr_completeness_dict = f0seqnr_size_analysis(parset, f0seqnr_sizes_dict)
                logging.debug('SAS ID: %s. In create_html_main() f0seqnr_completeness_dict content: %r', parset['sas_id'], f0seqnr_completeness_dict)

            observation_html(parset,
                             file_size_data,
                             f0seqnr_completeness_dict,
                             obs_plot_root = obs_plot_root,
                             html_root     = options.html_root)
        except IOError:
            logging.error('SAS ID: %s. IOError: %', (parset['sas_id'], str(sys.exc_info()[1])))

        if num % 20 == 0:
            main_index_html(plot_root = options.plot_root,
                            html_root = options.html_root)


    main_index_html(plot_root = options.plot_root,
                    html_root = options.html_root)
    for start_num in [2,3,4,5,6,7,8,9]:
        os.system("cd /globaldata/inspect/; for sas in `find . -maxdepth 1  -mindepth 1 -iname \"%d*\" -ctime +21|sed -e 's/\.\///g'|sort`; do echo $sas ;rm -rf $sas HTML/$sas; done; " % start_num)
    return 0


def parse_command_line(argv):
    r'''
    Parse the command line arguments.

    **Parameters**

    argv : list of strings
        The contents of ``sys.argv``

    **Example**

    >>> parse_command_line(['create_html'])
    Traceback (most recent call last):
    ...
    ValueError: Specify at least one SAS ID

    >>> opts, sas = parse_command_line(['create_html', '1232456'])
    >>> str(opts)
    "{'plot_root': '/globaldata/inspect', 'html_root': '/globaldata/inspect/HTML'}"
    >>> sas
    [1232456]
    >>> opts, sas = parse_command_line(['create_html',
    ...                                 '--plot-root', '/home/user/plots',
    ...                                 '--html-root', '/home/user/public_html',
    ...                                 '1232456', '6542311'])
    >>> str(opts)
    "{'plot_root': '/home/user/plots', 'html_root': '/home/user/public_html'}"
    >>> sas
    [1232456, 6542311]

    '''
    logging.debug('parse_command_line(argv = %r)', argv)
    parser = OptionParser(usage   = '%prog <sas ID>',
                          version = '%prog (pyautoplot ' + __version__ + ')')

    parser.add_option(
        '--plot-root', type = 'string',
        dest    = 'plot_root',
        help    = 'Plots are found under DIR/<sasid> (default: %default)',
        metavar = 'DIR',
        default = os.path.join('/', 'globaldata', 'inspect'))

    parser.add_option(
        '--html-root', type = 'string',
        dest    = 'html_root',
        help    = 'Use DIR for HTML output (default: %default)',
        metavar = 'DIR',
        default = os.path.join('/', 'globaldata', 'inspect', 'HTML'))

    (options, args) = parser.parse_args(argv[1:])

    if len(args) == 0:
        raise ValueError('Specify at least one SAS ID')
    sas_ids = [int(arg) for arg in args]
    return options, sas_ids


def subband_from_file_name(plot_file_name):
    r'''
    Extract sub band name from ``plot_file_name``.

    **Parameters**

    plot_file_name : string
        File name of an inspection plot.

    **Returns**

    A string containing the sub band identifier with format 'SBnnn',
    where ``nnn`` are three digits.

    **Examples**

    >>> subband_from_file_name('/globaldata/inspect/60873/L60873_SAP000_SB049_uv-flags.png')
    'SB049'
    '''
    return [item for item in plot_file_name.split('_')
            if item[0:2] == 'SB' and len(item) == 5][0]


def sap_from_file_name(plot_file_name):
    r'''
    Extract sub array pointing name from ``plot_file_name``.

    **Parameters**

    plot_file_name : string
        File name of an inspection plot.

    **Returns**

    A string containing the SAP identifier with format 'SAPnnn',
    where ``nnn`` are three digits.

    **Examples**

    >>> sap_from_file_name('/globaldata/inspect/60873/L60873_SAP003_SB049_uv-flags.png')
    'SAP003'
    '''
    return [item for item in plot_file_name.split('_')
            if item[0:3] == 'SAP' and len(item) == 6][0]


def force_mkdir(path_name):
    r'''
    Create a directory if it does not already exists. Does not throw
    exceptions if the path already exists. It Does throw exceptions if
    anything else goes wrong.

    **Parameters**

    path_name : string
        Path to create.

    **Examples**

    >>> os.path.exists('test-output/new-dir/')
    False
    >>> force_mkdir('test-output/new-dir')
    >>> os.path.exists('test-output/new-dir/')
    True
    >>> force_mkdir('test-output/new-dir/')
    >>> os.path.exists('test-output/new-dir/')
    True
    >>> os.path.exists('/my-own-nice-little-corner/')
    False
    >>> force_mkdir('/my-own-nice-little-corner/')
    Traceback (most recent call last):
    ...
    PermissionError: [Errno 13] Permission denied: '/my-own-nice-little-corner/'

    '''
    logging.debug('force_mkdir(path_name = %r)', path_name)
    if not os.path.exists(path_name):
        os.mkdir(path_name)


def contains(sub_string, strings):
    r'''
    Returns the sorted list of strings from ``strings`` that contain
    ``sub_string``.

    **Parameters**

    sub_string : string
        The string to test for.

    strings : list of strings
        The strings in which to search.

    **Examples**

    >>> contains('abba', ['abda', '1287d1abba12kjb', 'ababababbba', 'abbababba'])
    ['1287d1abba12kjb', 'abbababba']
    '''
    return sorted([string for string in strings if sub_string in string])


def plot_html(plot_path, image_width = 700):
    r'''
    '''
    return '''<a href="%(plot_path)s">
    <img src="%(plot_path)s" width="%(image_width)dpx"></img>
</a>''' % {'plot_path'  : plot_path,
           'image_width': image_width}


def subband_html(parset, sb_name, rel_plot_files):
    r'''
    '''
    timeseries_html    = [plot_html(image)
                          for image in contains('timeseries', rel_plot_files)]
    sensitivities_html = [plot_html(image)
                          for image in contains('station-gain', rel_plot_files)]

    overview_rows = ['<tr><td>'+line_graph+'</td><td>'+bar_graph+'</td></tr>'
                     for (line_graph, bar_graph)
                     in zip(timeseries_html, sensitivities_html)]
    flagged_std   = plot_html(contains('flagged-standard-deviation',
                                       rel_plot_files)[0])
    flagged_mean  = plot_html(contains('flagged-mean', rel_plot_files)[0])
    flags         = plot_html(contains('flags'       , rel_plot_files)[0])
    zeros         = plot_html(contains('zeroes'      , rel_plot_files)[0])

    html_template = '''
<html>
    <head> <title}%(title)s</title> </head>
    <body>
    <center>
        <h1>%(title)s</h1>


        <p>
          <ul>
            <li><a href="../%(index_filename)s">Subbands</a> <a href="../../%(index_filename)s">Projects</a></li>
            <li><a href="https://astron.nl/radio-observatory/observing-capabilities/depth-technical-information/data-quality-inspection/data-qu">What do I see here?</a></li>
          </ul>
        </p>

        <table>
        %(overview_rows)s
        <tr>
            <td>%(flagged_std)s</td>
            <td>%(flagged_mean)s</td>
        </tr>
        <tr>
            <td>%(flags)s</td>
            <td>%(zeros)s</td>
        </tr>
        </table>
    </center>
    </body>
</html>'''

    return html_template % {
        'index_filename'    : INDEX_HTML_FILENAME,
        'title'             : parset['sas_id']+' '+sb_name,
        'overview_rows'     : '\n'.join(overview_rows),
        'flagged_std'       : flagged_std,
        'flagged_mean'      : flagged_mean,
        'flags'             : flags,
        'zeros'             : zeros}


def relative_plot_paths(obs_plot_root, current_directory):
    r'''
    '''

    patterns        = [os.path.join(obs_plot_root, extension)
                       for extension in ['*.png', '*.jpg']]
    rel_plot_files = []
    for pattern in patterns:
        rel_plot_files += [os.path.relpath(full_path, current_directory)
                           for full_path in glob.glob(pattern)]

    return rel_plot_files


def plot_paths_per_sap_subband(plot_file_names):
    r'''
    '''
    sb_plots = {}
    for plot_file in plot_file_names:
        keyword = '_'.join([sap_from_file_name(plot_file),
                            subband_from_file_name(plot_file)])
        try:
            if not keyword in sb_plots:
                sb_plots[keyword] = []
        except:
            if not sb_plots.has_key(keyword):
                sb_plots[keyword] = []
        sb_plots[keyword].append(plot_file)
    return sb_plots


def observation_html(parset, file_size_data, f0seqnr_completeness_dict, obs_plot_root, html_root):
    r'''
    '''
    logging.info('Creating HTML for observation %s', parset['sas_id'])
    logging.debug(
        'observation_html(parset = %r, obs_plot_root = %r, html_root = %r)', parset, obs_plot_root, html_root)
    obs_html_root = os.path.join(html_root    , parset['sas_id'])
    sb_html_root  = os.path.join(obs_html_root, 'SBpages')
    stn_html_root = os.path.join(obs_html_root, 'Stations')

    if not os.path.exists(obs_plot_root):
        raise IOError('Directory "%s" does not exist' % obs_plot_root)

    force_mkdir(obs_html_root)
    force_mkdir(sb_html_root)
    force_mkdir(stn_html_root)

    cobalt_error_file = os.path.join(obs_plot_root, 'rtcp-%d.errors' % int(parset['sas_id']))
    cobalt_error_log_html = ''
    if os.path.exists(cobalt_error_file):
        if len(open(cobalt_error_file).read().strip()) > 0:
            error_rel_path = os.path.relpath(
                cobalt_error_file, obs_html_root)
            cobalt_error_log_html = '<h3><a href="%s">Cobalt ERROR log</a></h3>' % error_rel_path


    input_loss_html = observation_input_loss_html(os.path.join(obs_plot_root, 'rtcp-%d.loss' % int(parset['sas_id'])))
    if input_loss_html == '':
        input_loss_html = '<h3>No input loss report</h3>'
    elif '<tr>' not in input_loss_html:
        input_loss_html = '<h3>Input loss report</h3><p>All station data arrived!</p>'
    else:
        input_loss_html = '<h3>Input loss report</h3>\n'+input_loss_html

    station_beamlet_html = '<h3>No beamlet plots found</h3>'
    plot_files = relative_plot_paths(obs_plot_root, stn_html_root)
    plot_files = [plot_file for plot_file in plot_files
                  if "beamlets" in plot_file ]
    plot_files = sorted(plot_files)
    if plot_files:
        file_name = os.path.join(stn_html_root, 'station_beamlets.html')
        open(file_name, 'w').write(
            station_beamlet_statistics_plots_html(parset, plot_files))
        station_beamlet_html = '<h3><a href="Stations/station_beamlets.html">Station Dynamic Spectra</a></h3>'

    file_size_html = '<h3>No file size information found</h3>'
    try:
        if file_size_data is None:
            raise RuntimeError('No file size data provided')
        if len(file_size_data['missing_data_sets']) == 0:
            missing_html = '<h3>All data sets are there</h3>'
        else:
            missing_html = ('<h3>Missing data:</h3><ul>' +
                            '<li>' +
                            '</li><li>'.join(
                                sorted(file_size_data['missing_data_sets'])) +
                            '</li></ul>')

        # Anonymous helper function to improve readability
        def positive_float_to_html(in_float):
            if in_float < 0:    return "N.A."
            else:               return "%0.1f%%" % (in_float)

        if len(file_size_data['odd_sized_data_sets']) == 0:
            odd_sized_html = ''
        else:
            odd_sized_html = (
                '<h3>Odd size data:</h3><table>'+
                '<tr>' +
                '</tr><tr>'.join(
                    ['<td>%s</td><td>:</td><td>%d MB</td><td></td><td title="Completeness based on f0seqnr-size">(%s)</td><td></td>' %
                     (name, size, positive_float_to_html(f0seqnr_completeness_dict.get(name, -1)))
                     for (name, size) in sorted(
                             file_size_data['odd_sized_data_sets'])]) +
                '</tr></table>')

        file_size_html = '''

        <h3>Max file sizes (MB):</h3>
        <table>
        <tr><td>Correlated data</td> <td>:</td> <td>%r</td></tr>
        <tr><td>Beamformed data</td> <td>:</td> <td>%r</td></tr>
        </table>

        %s

        %s
        ''' % (file_size_data['max_ms_size_mb'],
               file_size_data['max_raw_size_mb'],
               missing_html,
               odd_sized_html)
    except:
        logging.error('%s: %s', (str(sys.exc_info()[0].__name__), str(sys.exc_info()[1])))


    plot_files = relative_plot_paths(obs_plot_root, sb_html_root)
    plot_files = [ file for file in plot_files if "beamlets" not in file ]
    sb_plots   = plot_paths_per_sap_subband(plot_files)
    sb_list    = sorted(sb_plots.keys())
    for sb_name in sb_list:
        file_name = os.path.join(sb_html_root, sb_name+'.html')
        open(file_name, 'w').write(
            subband_html(parset, sb_name, sb_plots[sb_name]))

    observation_index_name = os.path.join(obs_html_root, INDEX_HTML_FILENAME)

    subband_list     = parset['subband_list']
    olap_subbands    = [int(sap_sb.split('_')[1][-3:]) for sap_sb in sb_list]
    station_subbands = []
    try:
        station_subbands = [subband_list[olap_sb] for olap_sb in olap_subbands]
    except IndexError:
        logging.warning('SAS %r: ListIndex out of range in subband_list[olap_sb]' % parset['sas_id'])
        station_subbands = []

    subband_freqs_hz = []
    try:
        subband_freqs_hz = [parset['subband_freqs_hz'][sb] for sb in olap_subbands]
    except IndexError:
        logging.warning('SAS %r: ListIndex out of range in parset[\'subband_freqs_hz\'][sb]' % parset['sas_id'])
        subband_freqs_hz = []

    sb_page_list = []
    try:
        sb_page_list = ['<tr><td><a href="SBpages/%s.html">%s</a>&nbsp;&nbsp;&nbsp;</td> <td>%d&nbsp;&nbsp;&nbsp;</td> <td>%7.3f&nbsp;&nbsp;&nbsp;</td></tr>' %
                        (name, name, subband, freq_hz/1e6)
                        for name, subband, freq_hz in zip(sb_list, station_subbands, subband_freqs_hz)]
    except Exception as e:
        logging.warning('SAS %r: Could not create sb_page_list, exception message: %s' % (parset['sas_id'], str(e)))
        sb_page_list = []

    open(observation_index_name, 'w').write('''
<html>
    <head><title>%(sas_prefix)s%(sas_id)s</title></head>
    <body>
    <h1>%(sas_prefix)s%(sas_id)s</h1>
    <p><ul>
        <li><a href="../%(index_filename)s">Projects</a></li>
    </ul></p>

    %(cobalt_error_log)s

    %(file_size_info)s

    %(input_loss)s

    %(station_beamlets)s

    <table>
    <tr><th>Name</th> <th>Subband</th> <th>Freq.</th></tr>
    <tr><th></th>     <th>(ID)</th> <th>(MHz)</th></tr>
    %(sub_band_pages)s
    </table>
    </body>
</html>
''' % {'index_filename'    : INDEX_HTML_FILENAME,
       'sas_prefix'    : parset['sas_prefix'],
       'sas_id'        : parset['sas_id'],
       'file_size_info': file_size_html,
       'input_loss'    : input_loss_html,
       'station_beamlets' : station_beamlet_html,
       'cobalt_error_log': cobalt_error_log_html,
       'sub_band_pages': ' '.join(sb_page_list)})


def parse_subband_list(parset_subband_list):
    r'''
    Parse a subband list from a parset.

    **Parameters**

    parset_subband_list : string
        Value of Observation.Beam[0].subbandList

    **Returns**

    A list of integers containing the subband numbers.

    **Examples**


    >>> parse_subband_list('[154..163,185..194,215..224,245..254,275..284,305..314,335..344,10*374]')
    [154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 374, 374, 374, 374, 374, 374, 374, 374, 374, 374]
    >>> parse_subband_list('[77..87,116..127,155..166,194..205,233..243,272..282,311..321]')
    [77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321]
    >>> parse_subband_list('[]')
    []
    '''
    stripped_subband_list = parset_subband_list.strip('[] \n\t')
    if stripped_subband_list == '':
        return []
    sub_lists = [word.strip().split('..')
                 for word in stripped_subband_list.split(',')]
    subbands = []
    for sub_list in sub_lists:
        if len(sub_list) == 1:
            multiplication = sub_list[0].split('*')
            if len(multiplication) == 2:
                subbands += [int(multiplication[1])]*int(multiplication[0])
            else:
                subbands.append(int(sub_list[0]))
        elif len(sub_list) == 2:
            subbands += range(int(sub_list[0]), int(sub_list[1])+1)
        else:
            logging.error('%r is not a valid sub_range in a subband list', sub_list)
            return []
    return subbands


def subband_frequencies(subband_width, band_filter, subband_numbers):
    r'''
    '''
    freq_offsets_hz = [sb_id*subband_width for sb_id in subband_numbers]
    base_freq = 0.0
    if 'HBA_110_190' == band_filter:
        base_freq = 100e6
    elif 'HBA_210_250' == band_filter:
        base_freq = 200e6
    elif 'HBA_170_230' == band_filter:
        base_freq = 160e6
    else:
        base_freq = 0.0
    return [freq_hz + base_freq for freq_hz in freq_offsets_hz]


def find_parset(in_sas_id):
    r'''
    Look for a parset. If it found the parset, it returns the
    path. Otherwise, it issues a warning and returns None.

    If no local path is found, but the file exists on another machine,
    it returns "<machine>:<path>", for example
    "cbm001:/localhome/lofarsystem/parsets/rtcp-123456.parset"
    '''

    # Note: parsets with different naming formats (L, T, rtcp-) are all accepted
    search_path = [os.path.join('/globaldata', 'inspect', str(in_sas_id), prefix+str(in_sas_id)+'.parset')
                   for prefix in ['L', 'T', 'rtcp-']]
    for path in search_path:
        if os.path.exists(path):
            return path

    search_path = [os.path.join('/globalhome', user, 'log', prefix+str(in_sas_id)+'.parset')
                   for prefix in ['L', 'T', 'rtcp-']
                   for user in ['lofarsystem', 'lofartest']]
    for path in search_path:
        if os.path.exists(path):
            return path

    # Nothing found yet, check if on Cobalt
    try:
        expected_fname = os.path.join(PARSET_PATH,
                                      'rtcp-%d.parset' % in_sas_id)
        fname = subprocess.check_output(['ssh', COBALT_HEAD, 'ls',
                                         '%s' % expected_fname])
        if fname.strip() == expected_fname.encode().strip():
            return COBALT_HEAD+':'+expected_fname
    except:
        pass

    logging.warning('Cannot find parset for SAS ID %d', in_sas_id)
    return None


def parset_summary(sas_id, parset_name):
    r'''

    Parse a parset into a dictionary.

    **Parameters**

    sas_id :  int
        SAS ID of the observation.

    parset_name : string
        Path to the file containing the parset.

    **Examples**

    >>> fe_parset = parset_summary(181718, 'testdata/flyseye-parset.txt')

    >>> xc_parset = parset_summary(178900, 'testdata/crosscor-parset.txt')

    test case 182238 parset

    '''
    def value(line):
        return line.split('=')[-1].strip('\' \n')

    parset = {'sas_id'       : str(sas_id),
              'sas_prefix'   : '',
              'campaign_name': '',
              'target'       : '',
              'start_time'   : '',
              'stop_time'    : '',
              'antenna_set'  : '',
              'band_filter'  : '',
              'subband_list' : [],
              'subband_freqs_hz': [],
              'subband_width': 0.0,
              '200_subband_width': 0.0,
              '160_subband_width': 0.0,
              'file_name'    : None,
              'clock_mhz'    : 0,
              'correlator_products': [],
              'correlator_locations': [],
              'beamformer_products': [],
              'beamformer_locations': [],
              'coherent_stokes_products': [],
              'coherent_stokes_locations': [],
              'incoherent_stokes_products': [],
              'incoherent_stokes_locations': [],
              'block_size'   : 0,
              'nr_integrations_per_block' : 0,
              'nr_blocks_per_integration' : 0,
              'nr_integration_periods' : 0
              }


    if parset_name and os.path.exists(parset_name):
        parset['file_name']  = parset_name
        sas_prefix = os.path.basename(parset_name).split('.parset')[0][0]
        # If the parset filename has prefix 'rtcp-', treat it as if it were one with prefix 'L'
        parset['sas_prefix'] = 'L' if sas_prefix is 'r' else sas_prefix
        parset_lines = open(parset_name).readlines()
        for line in parset_lines:
            if 'Campaign.name' in line:
                parset['campaign_name'] = value(line).strip().strip('"\'')
            elif 'Beam[0].target' in line:
                parset['target'] = value(line)
            elif 'Observation.startTime' in line:
                parset['start_time'] = value(line)
            elif 'Observation.stopTime' in line:
                parset['stop_time'] = value(line)
            elif 'Observation.antennaSet' in line:
                parset['antenna_set'] = value(line)
            elif  'Observation.bandFilter' in line:
                parset['band_filter'] = value(line)
            elif 'Observation.Beam[' in line and '].subbandList' in line:
                subband_list = parse_subband_list(value(line))
                if len(subband_list) == 0:
                    logging.warn('Empty subband list in %s', parset_name)
                else:
                    parset['subband_list'] += subband_list
            elif 'Observation.sampleClock' in line:
                parset['clock_mhz'] = int(value(line))
            elif 'Observation.subbandWidth' in line:
                parset['subband_width'] = float(value(line))
            elif 'Clock200.subbandWidth' in line:
                parset['200_subband_width'] = float(value(line))
            elif 'Clock160.subbandWidth' in line:
                parset['160_subband_width'] = float(value(line))
            elif 'Observation.DataProducts.Output_Correlated.filenames' in line:
                parset['correlator_products'] = [
                    name.strip()
                    for name in  value(line).strip('[] ').split(',')
                    if name.strip() != '']
            elif 'Observation.DataProducts.Output_Correlated.locations' in line:
                parset['correlator_locations'] = [
                    name.strip().split(':')[0]
                    for name in  value(line).strip('[] ').split(',')
                    if name.strip() != '' and name.strip().split(':')[0] != '']
            elif 'Observation.DataProducts.Output_Beamformed.filenames' in line:
                parset['beamformer_products'] += [
                    name.strip()
                    for name in  value(line).strip('[] ').split(',')
                    if name.strip() != '']
            elif 'Observation.DataProducts.Output_Beamformed.locations' in line:
                parset['beamformer_locations'] += [
                    name.strip().split(':')[0]
                    for name in  value(line).strip('[] ').split(',')
                    if name.strip() != '' and name.strip().split(':')[0] != '']
            elif 'Observation.DataProducts.Output_IncoherentStokes.filenames' in line:
                parset['beamformer_products'] += [
                    name.strip()
                    for name in  value(line).strip('[] ').split(',')
                    if name.strip() != '']
            elif 'Observation.DataProducts.Output_IncoherentStokes.locations' in line:
                parset['beamformer_locations'] += [
                    name.strip().split(':')[0]
                    for name in  value(line).strip('[] ').split(',')
                    if name.strip() != '' and name.strip().split(':')[0] != '']
            elif 'Observation.DataProducts.Output_CoherentStokes.filenames' in line:
                parset['beamformer_products'] += [
                    name.strip()
                    for name in  value(line).strip('[] ').split(',')
                    if name.strip() != '']
            elif 'Observation.DataProducts.Output_CoherentStokes.locations' in line:
                parset['beamformer_locations'] += [
                    name.strip().split(':')[0]
                    for name in  value(line).strip('[] ').split(',')
                    if name.strip() != '' and name.strip().split(':')[0] != '']
            elif 'Cobalt.blockSize' in line:
                parset['block_size'] = int(value(line))
            elif 'Cobalt.Correlator.nrIntegrationsPerBlock' in line:
                parset['nr_integrations_per_block'] = int(value(line))
            elif 'Cobalt.Correlator.nrBlocksPerIntegration' in line:
                parset['nr_blocks_per_integration'] = int(value(line))
            else:
                pass

        # Determine number of integration periods (for later use in output loss figure calculation)
        start_time = stop_time = datetime.datetime.now()
        try:
            start_time =                datetime.datetime.strptime(parset['start_time'], ('%Y-%m-%d %H:%M:%S.%f' if '.' in parset['start_time'] else '%Y-%m-%d %H:%M:%S'))
        except:
            logging.warning('SAS %r: Datetime format unsupported: parset[\'start_time\'] = %s' % (parset['sas_id'], parset['start_time']))
        try:
            stop_time =                 datetime.datetime.strptime(parset['stop_time'], ('%Y-%m-%d %H:%M:%S.%f' if '.' in parset['stop_time'] else '%Y-%m-%d %H:%M:%S'))
        except:
            logging.warning('SAS %r: Datetime format unsupported: parset[\'stop_time\'] = %s' % (parset['sas_id'], parset['stop_time']))
        duration_seconds =              (stop_time - start_time).total_seconds()

        subband_bandwidth_hz =          (1e6 * parset['clock_mhz'] / 1024)
        block_size =                    parset['block_size']
        nr_integrations_per_block =     parset['nr_integrations_per_block']
        nr_blocks_per_integration =     parset['nr_blocks_per_integration']

        if nr_blocks_per_integration == 0:
            logging.warning('SAS %r: nr_blocks_per_integration == 0' % parset['sas_id'])
        elif block_size == 0:
            logging.warning('SAS %r: block_size == 0' % parset['sas_id'])
        else:
            parset['nr_integration_periods'] = floor(floor(duration_seconds * subband_bandwidth_hz / block_size) * \
                                               nr_integrations_per_block / nr_blocks_per_integration)

            # A (parset['nr_integration_periods'] == 0) is not anticipated, so report this if it occurrs
            if parset['nr_integration_periods'] == 0:
                logging.warning("SAS %r: parset['nr_integration_periods'] == 0\
                                \n\tparset['start_time']=%s\
                                \n\tparset['stop_time']=%s\
                                \n\tparset['clock_mhz']=%s\
                                \n\tparset['block_size']=%s\
                                \n\tdurations_sec=%r\
                                \n\tsubband_bandwidth_hz=%r\
                                \n\tnr_integrations_per_block=%r\
                                \n\tnr_blocks_per_integration=%r" % (   parset['sas_id'],
                                                                        parset['start_time'],
                                                                        parset['stop_time'],
                                                                        parset['clock_mhz'],
                                                                        parset['block_size'],
                                                                        duration_seconds,
                                                                        subband_bandwidth_hz,
                                                                        nr_integrations_per_block,
                                                                        nr_blocks_per_integration))

    if parset['clock_mhz'] == 160:
        parset['subband_width'] = parset['160_subband_width']
    if parset['clock_mhz'] == 200:
        parset['subband_width'] = parset['200_subband_width']

    if parset['subband_width'] == 0.0:
        parset['subband_width'] = parset['clock_mhz']*1e6/1024.0/1000.0

    parset['subband_freqs_hz'] = subband_frequencies(
        parset['subband_width']*1000.0,
        parset['band_filter'],
        parset['subband_list'])

    return parset


def parse_file_sizes(file_name, path_mod_func=lambda x: x):
    r'''
    Parse the file-sizes.txt or f0seqnr-sizes.txt file normally residing at
    /globaldata/inspect/<sas_id>/file-sizes.txt
    /globaldata/inspect/<sas_id>/f0seqnr-sizes.txt

    **Parameters**

    file_name : string
       Full path to the file.

    **Returns**

    A dictionary with the file name of the data set (without full
    path) as a key, and a tuple (locus_node, full_path_to_data,
    file_size_in_mb)

    **Examples**

    >>> cc_sizes = parse_file_sizes('testdata/crosscor-file-sizes.txt')
    >>> cc_sizes['L178900_SAP000_SB000_uv.MS']
    ('locus001', '/data/L178900/L178900_SAP000_SB000_uv.MS', 468)
    >>> cc_sizes['L178900_SAP000_SB243_uv.MS']
    ('locus092', '/data/L178900/L178900_SAP000_SB243_uv.MS', 468)
    >>> fe_sizes = parse_file_sizes('testdata/flyseye-file-sizes.txt')
    '''
    lines = [line.strip().split() for line in open(file_name).readlines()
             if ('No such file or directory' not in line and
                 '************'  not in line and
                 'ssh_exchange_identification' not in line and
                 'Timeout, server not' not in line and
                 'connect to host' not in line and
                 'request failed' not in line and
                 'not resolve' not in line and
                 'remaining bytes' not in line and
                 'Write failed: Broken pipe' not in line and
                 'open display' not in line and
                 'Connection timed out' not in line)]
    results = {}
    locus_node = None
    for line in lines:
        if len(line) == 3 and 'locus' in line[1]:
            locus_node = line[1]
        elif len(line) == 2:
            data_size = int(line[0])
            full_path    = path_mod_func(line[1])
            file_name    = os.path.basename(full_path)
            results[file_name] = (locus_node, full_path, data_size)
        else:
            logging.warning('parse_file_sizes(): incomprehensible line %r', line)
    return results


def file_size_analysis(parset, file_sizes_dict):
    r'''
    Analyze the file sizes agains the expected files as listed in the parset.
    '''
    ms_sizes  = array([value[2]
                       for keyword, value in file_sizes_dict.items()
                       if '.MS'  in keyword])
    raw_sizes = array([value[2]
                       for keyword, value in file_sizes_dict.items()
                       if '.raw' in keyword])

    max_ms_size_mb = 0
    if len(ms_sizes) > 0:
        max_ms_size_mb  = max(ms_sizes.max(), 1.5)
    max_raw_size_mb = 0
    if len(raw_sizes) > 0:
        max_raw_size_mb = max(raw_sizes.max(), 1.5)

    missing_data_sets   = []
    odd_sized_data_sets = []

    total_data_products = (len(parset['correlator_products']) +
                           len(parset['beamformer_products']))
    problematic_data_products = 0

    for msname, locus_node in zip(parset['correlator_products'], parset['correlator_locations']):
        node_and_name = '%s:%s' % (locus_node, msname)
        try:
            locus_actual, full_path, data_size_mb = file_sizes_dict[msname]
            if max_ms_size_mb > 0:
                if abs(float(data_size_mb)/float(max_ms_size_mb) -1.0) > DATA_INCOMPLETE_THRESHOLD:
                    odd_sized_data_sets.append((node_and_name, data_size_mb))
                    problematic_data_products += 1
        except KeyError:
            missing_data_sets.append(node_and_name)
            problematic_data_products += 1


    for h5_name, locus_node in zip(parset['beamformer_products'], parset['beamformer_locations']):
        raw_name = h5_name.replace('.h5', '.raw')
        node_and_name = '%s:%s' % (locus_node, raw_name)
        try:
            locus_actual, full_path, data_size_mb = file_sizes_dict[raw_name]
            if max_raw_size_mb > 0:
                if abs((float(data_size_mb)/float(max_raw_size_mb)) -1.0) > DATA_INCOMPLETE_THRESHOLD:
                    odd_sized_data_sets.append((node_and_name, data_size_mb))
                    problematic_data_products += 1
        except KeyError:
            missing_data_sets.append(node_and_name)
            problematic_data_products += 1

    percentage_complete = 0.0
    if total_data_products > 0:
        percentage_complete = floor(100.0*float(total_data_products - problematic_data_products)/total_data_products)

    return {'max_ms_size_mb': max_ms_size_mb,
            'max_raw_size_mb': max_raw_size_mb,
            'missing_data_sets': missing_data_sets,
            'odd_sized_data_sets': odd_sized_data_sets,
            'percentage_complete': percentage_complete}


def f0seqnr_size_analysis(parset, f0seqnr_sizes_dict):
    r'''
    Analyze the f0seqnr integration periods (actual size) agains the number of integration periods (expected size) as
    listed in the parset.
    '''

    if parset['nr_integration_periods'] == 0:
        logging.warning('SAS %r: parset[\'nr_integration_periods\'] == 0' % parset['sas_id'])
        return None

    # Construct a correlator-location dict for node lookup later
    correlator_location_dict = dict(zip(parset['correlator_products'], parset['correlator_locations']))

    completeness_dict = {}
    for data_product_folder, (locus_node, full_path, nr_integration_periods_in_file) in f0seqnr_sizes_dict.items():
        # Calculate individual completeness
        completeness = (100*nr_integration_periods_in_file/4) / parset['nr_integration_periods']
        if completeness > 100:
            logging.warning('SAS %r: f0seqnr completeness > 100: %0.1f (%f/%d)' % (parset['sas_id'], completeness, nr_integration_periods_in_file, parset['nr_integration_periods']))

        node_and_name = '%s:%s' % (correlator_location_dict[data_product_folder], data_product_folder)
        completeness_dict[node_and_name] = completeness

    return completeness_dict


def parse_loss_log_line(line):
    words = line.split()
    station = words[7][:-1]
    try:
        loss_fraction = float(words[11][:-1])/100.0
    except ValueError:
        station = 'CSERR'
        loss_fraction = -1.0
    return (station, loss_fraction)


def sort_by_station(station_loss_tuples):
    return sorted(station_loss_tuples,
                  key=lambda x: (x[0][2:5])+(x[0][-1]))

def read_loss(log_file):
    with open(log_file, mode='r') as input_file:
        loss_lines = [parse_loss_log_line(line)
                      for line in input_file if 'loss' in line]
    return sort_by_station(loss_lines)


def observation_input_loss_html(input_loss_filename):
    if not os.path.exists(input_loss_filename):
        return ''
    if os.stat(input_loss_filename).st_size == 0:
        return ''
    table_rows = '\n'.join([
        '<tr><th>%s:</th><td>%8.4f%%</td></tr>' % (station, loss*100.0)
        for (station, loss) in read_loss(input_loss_filename)
        if loss > 0.0])
    return '<table>\n' + table_rows + '\n</table>\n'

def get_station_list():
    station_dict = dict()
    station_dict['core'] = [
        'CS001',
        'CS002',
        'CS003',
        'CS004',
        'CS005',
        'CS006',
        'CS007',
        'CS011',
        'CS013',
        'CS017',
        'CS021',
        'CS024',
        'CS026',
        'CS028',
        'CS030',
        'CS031',
        'CS032',
        'CS101',
        'CS103',
        'CS201',
        'CS301',
        'CS302',
        'CS401',
        'CS501'
    ]

    station_dict['remote'] = [
        'RS106',
        'RS205',
        'RS208',
        'RS210',
        'RS305',
        'RS306',
        'RS307',
        'RS310',
        'RS406',
        'RS407',
        'RS409',
        'RS503',
        'RS508',
        'RS509',
    ]

    station_dict['international'] = [
        'DE601',
        'DE602',
        'DE603',
        'DE604',
        'DE605',
        'DE609',
        'FR606',
        'IE613',
        'LV614',
        'PL610',
        'PL611',
        'PL612',
        'SE607',
        'UK608',
    ]

    station_dict['all'] = station_dict['core'] + station_dict['remote'] + station_dict['international']
    return station_dict


def create_plot_per_station(list_of_plots, station_name, image_width=800, image_height=600):
    plots = list(filter(lambda filename: station_name in filename, list_of_plots))

    html_template  = '''
    <tr><td>%(station_name)s</td></tr>
    <tr>
        <td>%(station_bst_zero)s</td>
        <td>%(station_bst_one)s</td>
        <td>%(station_zero_norm)s</td>
        <td>%(station_one_norm)s</td>
    </tr>
    '''
    NODATA = '''<img width:%(image_width)dpx >NO DATA</div>''' % dict(image_height=image_height, image_width=image_width)
    if plots:
        station_bst_zero = plot_html(list(filter(lambda filename: '_00.png' in filename, plots))[0], image_width=image_width)
        station_bst_one = plot_html(list(filter(lambda filename: '_01.png' in filename, plots))[0], image_width=image_width)
        station_zero_norm = plot_html(list(filter(lambda filename: '_00_norm.png' in filename, plots))[0], image_width=image_width)
        station_one_norm = plot_html(list(filter(lambda filename: '_01_norm.png' in filename, plots))[0], image_width=image_width)

    else:
        station_bst_zero = NODATA
        station_bst_one = NODATA
        station_zero_norm = NODATA
        station_one_norm = NODATA

    return html_template % dict(station_bst_one=station_bst_one,
                                station_bst_zero=station_bst_zero,
                                station_zero_norm=station_zero_norm,
                                station_one_norm=station_one_norm,
                                station_name=station_name)


def station_beamlet_statistics_plots_html(parset, rel_plot_files):


    bst_zero_html = [plot_html(image)
                          for image in rel_plot_files if "_00.png" in image]
    bst_one_html = [plot_html(image)
                          for image in rel_plot_files if "_01.png" in image]
    bst_zero_norm_html = [plot_html(image)
                          for image in rel_plot_files if "_00_norm.png" in image]
    bst_one_norm_html    = [plot_html(image)
                          for image in rel_plot_files if "_01_norm.png" in image]

    overview_rows_all = [create_plot_per_station(rel_plot_files, station_name) for station_name in get_station_list()['all']]
    overview_rows_core = [create_plot_per_station(rel_plot_files, station_name) for station_name in get_station_list()['core']]
    overview_rows_remote = [create_plot_per_station(rel_plot_files, station_name) for station_name in get_station_list()['remote']]
    overview_rows_international = [create_plot_per_station(rel_plot_files, station_name) for station_name in get_station_list()['international']]


    overview_rows = ['<tr><td>'+station_bst_zero+
                     '</td><td>'+station_bst_one+
                     '</td><td>'+station_zero_norm+
                     '</td><td>'+station_one_norm+
                     '</td></tr>' for (station_bst_zero,
                                       station_bst_one,
                                       station_zero_norm,
                                       station_one_norm) in zip(bst_zero_html,
                                                                bst_one_html,
                                                                bst_zero_norm_html,
                                                                bst_one_norm_html)]
    html_template = '''
<html>
    <head>
    <style type="text/css"> /* Style the tab */
        .tab {
            overflow: hidden;
            border: 1px solid #ccc;
            background-color: #f1f1f1;
        }

        /* Style the buttons that are used to open the tab content */
        .tab button {
            background-color: inherit;
            float: left;
            border: none;
            outline: none;
            cursor: pointer;
            padding: 14px 16px;
            transition: 0.3s;
        }

        /* Change background color of buttons on hover */
        .tab button:hover {
            background-color: #ddd;
        }

        /* Create an active/current tablink class */
        .tab button.active {
            background-color: #ccc;
        }

        /* Style the tab content */
        .tabcontent {
            display: none;
            padding: 6px 12px;
            border: 1px solid #ccc;
            border-top: none;
        }
        span {
            display: inline-block;
        }
        img {
            width: 400px;
        }</style>
    <title>%(title)s</title> </head>
    <body>
    <div class="tab">
        <button class="tablinks active" onclick="openCity(event, 'All')">All stations</button>
        <button class="tablinks" onclick="openCity(event, 'Core')">Core</button>
        <button class="tablinks" onclick="openCity(event, 'Remote')">Remote</button>
        <button class="tablinks" onclick="openCity(event, 'International')">International</button>
    </div>
    <center>
    <div>
        <img src="http://astron.nl/lofartools/img/LOFARlogo.png"></img>
        <h1>BST plots for %(title)s</h1>
    </div>
    </center>
    <div id="All" class="tabcontent" style="display: block;">
        <center>

        <h2>All Stations</h2>
            <p>
              <ul>
                <li><a href="../%(index_filename)s">Observation</a> <a href="../../%(index_filename)s">Projects</a></li>
                <li><a href="https://www.astron.nl/radio-observatory/observing-capabilities/depth-technical-information/data-quality-inspection/data-qu">What do I see here?</a></li>
              </ul>
            </p>

            <table>
            %(overview_rows_all)s
            </table>
        </center>
    </div>

    <div id="Core" class="tabcontent" style="display: none;">
        <center>
        <h2>Core Stations</h2>
            <p>
              <ul>
                <li><a href="../%(index_filename)s">Observation</a> <a href="../../%(index_filename)s">Projects</a></li>
                <li><a href="https://www.astron.nl/radio-observatory/observing-capabilities/depth-technical-information/data-quality-inspection/data-qu">What do I see here?</a></li>
              </ul>
            </p>

            <table>
            %(overview_rows_core)s
            </table>
        </center>
    </div>

    <div id="Remote" class="tabcontent" style="display: none;">
        <center>
        <h2>Remote Stations</h2>
            <p>
              <ul>
                <li><a href="../%(index_filename)s">Observation</a> <a href="../../%(index_filename)s">Projects</a></li>
                <li><a href="https://www.astron.nl/radio-observatory/observing-capabilities/depth-technical-information/data-quality-inspection/data-qu">What do I see here?</a></li>
              </ul>
            </p>

            <table>
            %(overview_rows_remote)s
            </table>
        </center>
    </div>

    <div id="International" class="tabcontent" style="display: none;">
        <center>
        <h2>International Stations</h2>
            <p>
              <ul>
                <li><a href="../%(index_filename)s">Observation</a> <a href="../../%(index_filename)s">Projects</a></li>
                <li><a href="https://www.astron.nl/radio-observatory/observing-capabilities/depth-technical-information/data-quality-inspection/data-qu">What do I see here?</a></li>
              </ul>
            </p>

            <table>
            %(overview_rows_international)s
            </table>
        </center>
    </div>


    <script>
    function openCity(evt, cityName) {
        // Declare all variables
        var i, tabcontent, tablinks;

        // Get all elements with class="tabcontent" and hide them
        tabcontent = document.getElementsByClassName("tabcontent");
        for (i = 0; i < tabcontent.length; i++) {
            tabcontent[i].style.display = "none";
        }

        // Get all elements with class="tablinks" and remove the class "active"
        tablinks = document.getElementsByClassName("tablinks");
        for (i = 0; i < tablinks.length; i++) {
            tablinks[i].className = tablinks[i].className.replace(" active", "");
        }

        // Show the current tab, and add an "active" class to the link that opened the tab
        document.getElementById(cityName).style.display = "block";
        evt.currentTarget.className += " active";
    }
    </script>
    </body>
</html>'''

    return html_template % {
        'index_filename'    : INDEX_HTML_FILENAME,
        'title'        : parset['sas_id'],
        'overview_rows_all': '\n'.join(overview_rows_all),
        'overview_rows_core' : '\n'.join(overview_rows_core),
        'overview_rows_remote': '\n'.join(overview_rows_remote),
        'overview_rows_international': '\n'.join(overview_rows_international),
    }


"""
Calculates statistics over the values in the given array, optionally after having applied clipping of these values.

Args:
    in_values:              1-dimensional array containing values
    in_thresholds:          Tuple (lower, upper) used to determine the number of values (after any clipping) that are
                            below the lower-threshold and those that are above the upper-threshold. Either or both lower
                            and upper can be 'None', in which case the corresponding return value (n_lower and n_upper
                            respectively will hold 'None' as well)
    in_clipping_range:      Tuple (low, high) reflecting the range outside which clipping should be applied. When left
                            empty (None, None) is passed and no clipping is performed.
    in_clip_values:         Tuple (floor, ceiling) respectively reflecting the lowest and highest value allowed in the
                            array when clipping is enabled (see in_clipping_range).

Result:
    A tuple (mean, standard_deviation, variance, (n_lower, n_upper)) calculated from the array after any clipping.

"""
def calculate_statistics(in_values=[], in_thresholds=(None, None), in_clipping_range=(None,None), in_clip_values=(None,None)):
    lower =low = floor = 0
    upper = high = ceiling = 1
    n_lower = n_upper = None
    do_floor = (in_clipping_range[low] != None)
    do_ceiling = (in_clipping_range[high] != None)
    do_lower_thresholding = (in_thresholds[lower] != None)
    do_upper_thresholding = (in_thresholds[upper] != None)

    if do_lower_thresholding: n_lower=0
    if do_upper_thresholding: n_upper=0

    # Perform sanity checking of arguments
    if do_floor and in_clip_values[floor] == None:                                  raise Exception("Invalid clip value provided (floor)")
    if do_ceiling and in_clip_values[ceiling] == None:                              raise Exception("Invalid clip value provided (ceiling)")
    if do_floor and do_ceiling and in_clip_values[floor] > in_clip_values[ceiling]: raise Exception("Ceiling is lower than floor")

    # Perform a copy so that we leave in_values alone
    aux_values=list(in_values)
    for value in aux_values:
        if do_floor and value < in_clipping_range[floor]:               value = in_clip_values[floor]
        if do_ceiling and value > in_clipping_range[ceiling]:           value = in_clip_values[ceiling]
        if do_lower_thresholding and (value < in_thresholds[lower]):    n_lower += 1
        if do_upper_thresholding and (value > in_thresholds[upper]):    n_upper += 1

    mean = statistics.mean(aux_values)
    stdev = statistics.stdev(aux_values, mean)
    variance = statistics.variance(aux_values, mean)
    return mean, stdev, variance, (n_lower, n_upper)


def observation_table_row(parset, file_sizes_dict, f0seqnr_completeness_dict, html_root, ascii_table=False):
    r'''
    '''
    percentage_complete = -1
    if file_sizes_dict is not None:
        percentage_complete = file_sizes_dict['percentage_complete']

    # Alternative (more truthful) way of calculating output loss figure (for correlator data)
    mean = stdev = variance = n_odd_sized_data = n_data_total = -1
    if  f0seqnr_completeness_dict is not None and len(f0seqnr_completeness_dict.values()) > 0:
        n_data_total = len(f0seqnr_completeness_dict.values())
        try:
            # Apply a data completeness threshold (percentage) for calculating the number of odd data products and mean
            th = 100*(1-DATA_INCOMPLETE_THRESHOLD)
            mean, stdev, variance, (n_odd_sized_data, _) = calculate_statistics(f0seqnr_completeness_dict.values(), (th, None), (None, th), (None, 100.0))
        except Exception as e:
            logging.warning('SAS %r: calculate_statistics() gave exception: %s' % (parset['sas_id'], str(e)))

    format_dict = parset.copy()
    format_dict['index_filename']      = INDEX_HTML_FILENAME
    format_dict['nr_subbands']         = len(parset['subband_list'])
    format_dict['percentage_complete'] = int(round(percentage_complete))
    format_dict['complete_f0seqnr_tooltip'] = "Average completeness percentage of data products\n(based on f0seqnr-sizes)"
    if mean < 0:
        format_dict['complete_f0seqnr_mean'] = "N.A."
    else:
        format_dict['complete_f0seqnr_tooltip'] += "\n\nData completeness statistics:\nmean = %0.2f\nstdev = %0.2f\nvariance = %0.2f\nodd size data ratio = %r/%r (= %0.2f%%)" % (mean, stdev, variance, n_odd_sized_data, n_data_total, 100*n_odd_sized_data/n_data_total)
        format_dict['complete_f0seqnr_mean'] = str(int(floor(mean))) + "%"

    if os.path.exists(html_root+'/'+parset['sas_id']+'/Stations/station_beamlets.html'):
        format_dict['station_beamlet_link'] = '<a href="'+parset['sas_id']+'/Stations/station_beamlets.html">BST</a>'
    else:
        format_dict['station_beamlet_link'] = '---'

    if parset['file_name']:
        format_dict['parset_relpath'] = os.path.relpath(
            parset['file_name'], html_root)
        html = '''
    <tr><th><a href="%(sas_id)s/%(index_filename)s">%(sas_prefix)s%(sas_id)s</a>&nbsp;&nbsp;&nbsp;</th>
        <td>%(campaign_name)s&nbsp;&nbsp;&nbsp;</td> <td>%(target)s&nbsp;&nbsp;&nbsp;</td>
        <td>%(station_beamlet_link)s&nbsp;&nbsp;&nbsp;</td>
        <td align="right"><a href="%(sas_id)s/%(index_filename)s">%(percentage_complete)d%%</a></td>
        <td align="right"><a href="%(sas_id)s/%(index_filename)s" title="%(complete_f0seqnr_tooltip)s">%(complete_f0seqnr_mean)s</a></td>
        <td>%(antenna_set)s&nbsp;&nbsp;&nbsp;</td>   <td>%(band_filter)s&nbsp;&nbsp;&nbsp;</td>
        <td>%(start_time)s&nbsp;&nbsp;&nbsp;</td>    <td>%(stop_time)s&nbsp;&nbsp;&nbsp;</td>
        <td>%(clock_mhz)d&nbsp;&nbsp;&nbsp;</td>     <td>%(nr_subbands)s&nbsp;&nbsp;&nbsp;</td>
        <td><a href="%(parset_relpath)s">parset</a>&nbsp;&nbsp;&nbsp;</td>
    </tr>
''' % format_dict

    else:
        html = '''
    <tr><th><a href="%(sas_id)s/%(index_filename)s">%(sas_prefix)s%(sas_id)s</a>&nbsp;&nbsp;&nbsp;</th>
        <td>%(campaign_name)s&nbsp;&nbsp;&nbsp;</td> <td>%(target)s&nbsp;&nbsp;&nbsp;</td>
        <td align="right">????</td>
        <td>%(antenna_set)s&nbsp;&nbsp;&nbsp;</td>   <td>%(band_filter)s&nbsp;&nbsp;&nbsp;</td>
        <td>%(start_time)s&nbsp;&nbsp;&nbsp;</td>    <td>%(stop_time)s&nbsp;&nbsp;&nbsp;</td>
        <td>%(clock_mhz)d&nbsp;&nbsp;&nbsp;</td>     <td>%(nr_subbands)s&nbsp;&nbsp;&nbsp;</td>
        <td>no parset&nbsp;&nbsp;&nbsp;</td>
    </tr>
''' % format_dict

    if ascii_table:
        html = '''%(sas_prefix)s%(sas_id)s  %(campaign_name)8s  %(target)19s   %(percentage_complete)d%% %(complete_f0seqnr_mean)s %(antenna_set)16s  %(band_filter)11s  %(start_time)s -- %(stop_time)s''' % format_dict

    return html


def main_index_html(plot_root, html_root):
    r'''
    '''
    logging.debug('main_index_html(plot_root = %r, html_root = %r)', plot_root, html_root)

    index_name      = os.path.join(html_root, INDEX_HTML_FILENAME)
    index_txt_name  = os.path.join(html_root, INDEX_TXT_FILENAME)
    long_index_name = os.path.join(html_root, FULLINDEX_HTML_FILENAME)
    beginning_of_short_index = datetime.datetime.now() - datetime.timedelta(7)

    plot_sas_id_pattern5 = os.path.join(plot_root, '[0123456789]'*5)
    plot_sas_id_pattern6 = os.path.join(plot_root, '[0123456789]'*6)
    plot_sas_id_pattern7 = os.path.join(plot_root, '[0123456789]'*7)
    sas_id_with_plot = [int(os.path.split(sas_id)[-1])
                        for sas_id in (glob.glob(plot_sas_id_pattern5)
                                       + glob.glob(plot_sas_id_pattern6)
                                       + glob.glob(plot_sas_id_pattern7))]

    html_sas_id_pattern5 = os.path.join(html_root, '[0123456789]'*5)
    html_sas_id_pattern6 = os.path.join(html_root, '[0123456789]'*6)
    html_sas_id_pattern7 = os.path.join(html_root, '[0123456789]'*7)
    sas_id_with_html = [int(os.path.split(sas_id)[-1])
                        for sas_id in (glob.glob(html_sas_id_pattern5)
                                       + glob.glob(html_sas_id_pattern6)
                                       + glob.glob(html_sas_id_pattern7))]
    sas_ids = sorted(set(sas_id_with_html).intersection(set(sas_id_with_plot)),
                     reverse = True)
    unsorted_parsets = [parset_summary(sas_id, find_parset(sas_id))
                        for sas_id in sas_ids]
    parsets = sorted(unsorted_parsets,
                     key     = lambda parset: parset['start_time'],
                     reverse = True)

    logging.debug('In main_index_html(): gathered parsets')

    file_sizes_array = []
    f0seqnr_completeness_array = []
    for parset in parsets:
        obs_plot_root = os.path.join(plot_root, parset['sas_id'])
        if parset['file_name']:
            new_parset_name = os.path.join(obs_plot_root,
                                           os.path.basename(parset['file_name']))
        else:
            new_parset_name = None
        if parset['file_name'] \
           and (':' in parset['file_name'] or os.path.exists(parset['file_name'])) \
           and not os.path.exists(new_parset_name):
            force_mkdir(obs_plot_root)
            if ':' in parset['file_name']:
                subprocess.check_call(['scp', parset['file_name'], obs_plot_root])
            else:
                shutil.copy2(parset['file_name'], obs_plot_root)

        if new_parset_name and os.path.exists(new_parset_name):
            parset['file_name'] = new_parset_name
        else:
            parset['file_name'] = None

        file_size_data = None
        file_sizes_txt = os.path.join(obs_plot_root, 'file-sizes.txt')
        if os.path.exists(file_sizes_txt):
            file_sizes_dict = parse_file_sizes(file_sizes_txt)
            file_size_data  = file_size_analysis(parset, file_sizes_dict)
        file_sizes_array.append(file_size_data)

        f0seqnr_completeness_dict = None
        f0seqnr_sizes_txt = os.path.join(obs_plot_root, 'f0seqnr-sizes.txt')
        if os.path.exists(f0seqnr_sizes_txt):
            f0seqnr_sizes_dict = parse_file_sizes(f0seqnr_sizes_txt, os.path.dirname)
            f0seqnr_completeness_dict = f0seqnr_size_analysis(parset, f0seqnr_sizes_dict)
        f0seqnr_completeness_array.append(f0seqnr_completeness_dict)


    logging.debug('In main_index_html() writing main html output files')

    open(index_txt_name, 'w').write('\n'.join(
            [observation_table_row(parset, file_size, f0seqnr_completeness_dict, html_root, ascii_table=True)
             for parset, file_size, f0seqnr_completeness_dict in zip(parsets, file_sizes_array, f0seqnr_completeness_array)
             if parset['start_time'].strip() != '']))

    logging.debug('In main_index_html() %s has been written', index_txt_name)

    data_completeness_threshold = 100*(1-DATA_INCOMPLETE_THRESHOLD)
    open(index_name, 'w').write('''
<html>
    <head>
        <meta http-equiv="refresh" content="60">
        <title>LOFAR Inspection plots</title>
    </head>
    <body>
        <h1>LOFAR inspection plots</h1>
        <p><i>Last modified: %s UTC</i>&nbsp;&nbsp;<a href="%s">Full list</a>&nbsp;&nbsp;<a href="%s">Ascii table</a></p>
        <table>
        <tr><th>SAS ID</th> <th>Campaign</th> <th>Target</th> <th>DynSpec</th> <th title="Percentage of good sized data products\n\nWhere 'good sized' is defined as:\nData products with more than %0.2f%% completeness.\n Datasize is determined as size of non-zero blocks.\n Completeness is compared to maximum file size.\nSAPs with a different number of subbands will therefore lead to lower reported completeness">Compl</th> <th title="Average completeness percentage of good sized (based on f0seqnr sizes) data products\n\nWhere 'good sized' is defined as:\nData products with more than %0.2f%% completeness">Compl*</th> <th>AntennaSet</th> <th>Band</th> <th>Start</th> <th>End</th> <th>Clock</th> <th>Subb</th> <th>Parset</th></tr>
        %s
        </table>
    </body>
</html>
''' % ( time.asctime(time.gmtime()),
        os.path.basename(long_index_name),
        os.path.basename(index_txt_name),
        data_completeness_threshold,
        data_completeness_threshold,
        '\n'.join([observation_table_row(parset, file_size, f0seqnr_completeness_dict, html_root)
                   for parset, file_size, f0seqnr_completeness_dict in zip(parsets, file_sizes_array, f0seqnr_completeness_array)
                   if parset['start_time'].strip() != '' and datetime.datetime.strptime(parset['start_time'], '%Y-%m-%d %H:%M:%S') > beginning_of_short_index]
                  )))

    logging.debug('In main_index_html() %s has been written', index_name)

    open(long_index_name, 'w').write('''
<html>
    <head>
        <meta http-equiv="refresh" content="60">
        <title>LOFAR Inspection plots</title>
    </head>
    <body>
        <h1>LOFAR inspection plots</h1>
        <p><i>Last modified: %s UTC</i>&nbsp;&nbsp;<a href="%s">Short index</a><p>
        <table>
        <tr><th>SAS ID</th> <th>Campaign</th> <th>Target</th> <th>DynSpec</th> <th title="Percentage of odd sized data products per project\n\nWhere 'odd sized' is defined as:\nData products with less than %0.2f%% completeness">Compl</th> <th title="Average completeness percentage of odd sized data products (based on f0seqnr sizes)\n\nWhere 'odd sized' is defined as:\nData products with less than %0.2f%% completeness">Compl*</th> <th>AntennaSet</th> <th>Band</th> <th>Start</th> <th>End</th> <th>Clock</th> <th>Subb</th> <th>Parset</th></tr>
        %s
        </table>
    </body>
</html>
''' % ( time.asctime(time.gmtime()),
        os.path.basename(index_name),
        data_completeness_threshold,
        data_completeness_threshold,
        '\n'.join([observation_table_row(parset, file_size, f0seqnr_completeness_dict, html_root)
                   for parset, file_size, f0seqnr_completeness_dict in zip(parsets, file_sizes_array, f0seqnr_completeness_array)]
                  )))

    logging.debug('In main_index_html() %s has been written', long_index_name)

if __name__ == '__main__':
    try:
        sys.exit(create_html_main(sys.argv))
    except SystemExit:
        raise
    except:
        logging.error('%s: %s', (str(sys.exc_info()[0].__name__), str(sys.exc_info()[1])))
        raise #sys.exit(2)