Select Git revision
AdassChecks.py
-
John Swinbank authoredJohn Swinbank authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
create_add_virtual_instrument.sql.py 21.27 KiB
#!/usr/bin/python
# create_add_virtual_instrument.sql.py: generate add_virtual_instrument.sql
#
# Copyright (C) 2016, 2017
# ASTRON (Netherlands Institute for Radio Astronomy)
# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
#
# This file is part of the LOFAR software suite.
# The LOFAR software suite is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# The LOFAR software suite is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
#
# $Id$
with open("add_virtual_instrument.sql", 'w+') as output:
output.write("-- This file was generated by the \'create_add_virtual_instrument.sql.py\' script.\n")
output.write("-- Please do not modify this sql file, but modify and the python script if you want a different virtual instrument.\n\n")
output.write("-- resourceassignment password for testing on mcu005 is the same as the password on the president's luggage +6\n")
output.write("-- psql resourceassignment -U resourceassignment -f add_virtual_instrument.sql -W\n")
output.write("BEGIN;\n")
output.write("-- start with clearing the old virtual instrument model\n")
output.write("-- for some reason truncate takes a looooong time to complete, so use delete from\n")
output.write("DELETE FROM virtual_instrument.resource_group_to_resource_group CASCADE;\n")
output.write("DELETE FROM virtual_instrument.resource_to_resource_group CASCADE;\n")
output.write("DELETE FROM virtual_instrument.resource_group CASCADE;\n")
output.write("DELETE FROM virtual_instrument.resource_group_type CASCADE;\n")
output.write("DELETE FROM virtual_instrument.resource CASCADE;\n")
output.write("DELETE FROM virtual_instrument.resource_type CASCADE;\n")
output.write("DELETE FROM virtual_instrument.unit CASCADE;\n")
output.write("-- end of initial cleanup\n\n")
# NOTE: each id is fixed once used in operations. When adding ids, only assign unused numbers!
# Removing is only possible once no longer present in operational DBs. (Better comment it out and mark as deprecated.)
# Repurposing an id would require renumbering ids in DBs in operations on every software roll-out and roll-back...
def assert_unique_ids(d, msg):
""" checks if all d.values() are unique """
if len(d.values()) != len(set(d.values())):
raise Exception(msg)
def assert_unique_ids0(d, msg):
""" checks if all d.values()[0] are unique """
if len(d.values()) != len({v[0] for v in d.values()}):
raise Exception(msg)
class SqlKeyword:
""" SQL keywords like DEFAULT, NULL, TRUE, FALSE, ... need to be written without
Python str quotes when printed, also inside a list, tuple, set, dict, ...
Wrapping such keywords in this simple object does the trick.
E.g. in ipython:
str(['DEFAULT']) -> "['DEFAULT']"
str([SqlKeyword('DEFAULT')]) -> '[DEFAULT]'
"""
def __init__(self, s):
self.s = s
def __repr__(self):
return self.s
def format_inverse_dict(d):
""" {'foo': 1, 'bar': 2, ...} -> "(1, 'foo'), (2, 'bar'), ..." """
return str(sorted(zip(d.values(), d.keys())))[1:-1]
def format_inverse_dict2(d, out_sort_idx):
""" {'foo': (1, 10), 'bar': (2, 20), ...} -> "(1, 'foo', 10), (2, 'bar', 20), ..." """
return str(sorted([(x[1][0], x[0], x[1][1]) for x in d.items()], key=lambda v: v[out_sort_idx]))[1:-1]
def format_inverse_dict3(d, out_sort_idx):
""" {'foo': (1, 10, 100), 'bar': (2, 20, 200), ...} -> "(1, 'foo', 10, 100), (2, 'bar', 20, 200), ..." """
return str(sorted([(x[1][0], x[0], x[1][1], x[1][2]) for x in d.items()], key=lambda v: v[out_sort_idx]))[1:-1]
#----- resource unit -----
resource_units = {
# name : id
'rsp_channel_bit' : 0,
'bytes' : 1,
'rcu_board' : 2,
'bits/second' : 3,
'cores' : 4,
}
assert_unique_ids(resource_units, 'Error: Not all ids in resource_units are unique!')
output.write("INSERT INTO virtual_instrument.unit VALUES %s;\n" %
format_inverse_dict(resource_units))
#----- resource type -----
resource_types = {
# name : (id, unit id)
'rsp' : ( 0, resource_units['rsp_channel_bit']),
'tbb' : ( 1, resource_units['bytes']),
'rcu' : ( 2, resource_units['rcu_board']),
'bandwidth' : ( 3, resource_units['bits/second']),
'processor' : ( 4, resource_units['cores']),
'storage' : ( 5, resource_units['bytes']),
}
assert_unique_ids0(resource_types, 'Error: Not all ids in resource_types are unique!')
output.write("INSERT INTO virtual_instrument.resource_type VALUES %s;\n" %
format_inverse_dict2(resource_types, 0))
#----- resource_group type -----
resource_group_types = {
# name : id
'instrument' : 0,
'cluster' : 1,
'station_group' : 2,
'station' : 3,
# NOTE: Some resource groups physically contain >1 resource of the same (e.g. storage)
# type plus for each such resource another resource of type bandwidth to access it.
# In that case, we place each resource + its bandwidth under their own 'virtual'
# node group. The virtual node groups fall under the original resource group.
# This allows the Resource Assigner to produce a claim for a data product on one of
# the resources and *its* bandwidth (instead of another resource's bandwidth).
# A 'standalone' bandwidth resource is still possible (e.g. inter-cluster bandwidth).
# The 'virtual' name was generalized from 'node_group' carrying a node (sub-)set.
'virtual' : 4,
'node' : 5,
}
assert_unique_ids(resource_group_types, 'Error: Not all ids in resource_group_types are unique!')
output.write("INSERT INTO virtual_instrument.resource_group_type VALUES %s;\n" %
format_inverse_dict(resource_group_types))
#----- resource groups -----
resource_groups = {
# name : ( id, resource_group_type id)
'LOFAR' : ( 0, resource_group_types['instrument']),
'CEP4' : ( 1, resource_group_types['cluster']),
'COBALT' : ( 2, resource_group_types['cluster']),
'DRAGNET' : ( 63, resource_group_types['cluster']),
'computenodes' : ( 3, resource_group_types['virtual']), # CEP4
'gpunodes' : ( 4, resource_group_types['virtual']), # CEP4
'drgnodes' : ( 64, resource_group_types['virtual']), # DRAGNET
'dragproc' : ( 65, resource_group_types['node']), # DRAGNET
#stationgroups = ['STATIONS', 'SUPERTERP', 'CORE', 'REMOTE', 'INTERNATIONAL', 'LORA', 'AARTFAAC']
# 'stationgroup1' : ( ??, resource_group_types['station_group']),
}
## CEP4 cpu nodes (cpu01 - cpu50)
num_cpu_nodes = 50 # if we get more cpu nodes, do not incr this, but instead add new sequence(s) to avoid repurposing ids
cpu01_id = 5 # ids 5-54 assigned
for i in xrange(num_cpu_nodes):
resource_groups['cpunode%02d' % (i + 1)] = (cpu01_id + i, resource_group_types['node'])
## COBALT nodes (cbt001 - cbt008)
num_cbt_nodes = 8 # if we get more cbt nodes, do not incr this, but instead add new sequence(s) to avoid repurposing ids
cbt001_id = 55 # ids 55-62 assigned
for i in xrange(num_cbt_nodes):
resource_groups['cbt%03d' % (i + 1)] = (cbt001_id + i, resource_group_types['node'])
## DRAGNET nodes (drg01 - drg23)
num_drg_nodes = 23
drg01_id = 66 # ids 66-134 assigned (NOTE: 63,64,65 assigned to 'DRAGNET', 'drgnodes', 'dragproc')
for i in xrange(num_drg_nodes):
resource_groups['drg%02d' % (i + 1)] = (drg01_id + 3 * i + 0, resource_group_types['node'])
resource_groups['drg%02d-data1' % (i + 1)] = (drg01_id + 3 * i + 1, resource_group_types['virtual'])
resource_groups['drg%02d-data2' % (i + 1)] = (drg01_id + 3 * i + 2, resource_group_types['virtual'])
##station this list should be imported from a central location?
##We might need to switch to CS001HBA1 CS003LBA notation at some point? This could also be a specification vs resource allocation problem.
#stations = ['CS001', 'CS002', 'CS003', 'CS004', 'CS005', 'CS006', 'CS007', 'CS011', 'CS013', 'CS017',
#'CS021', 'CS024', 'CS026', 'CS028', 'CS030', 'CS031', 'CS032', 'CS101', 'CS103', 'CS201', 'CS301',
#'CS302', 'CS401', 'CS501', 'RS106', 'RS205', 'RS208', 'RS210', 'RS305', 'RS306', 'RS307', 'RS310', 'RS406',
#'RS407', 'RS408', 'RS409', 'RS503', 'RS508', 'RS509', 'DE601', 'DE602', 'DE603', 'DE604', 'DE605', 'FR606',
#'SE607', 'UK608','DE609','PL610','PL611','PL612']
assert_unique_ids0(resource_groups, 'Error: Not all ids in resource_groups are unique!')
output.write("INSERT INTO virtual_instrument.resource_group VALUES %s;\n" %
format_inverse_dict2(resource_groups, 0))
#----- resource -----
resources = {
# resource name : ( id, resource_type id)
'CEP4_bandwidth:/data' : (116, resource_types['bandwidth'][0]),
'CEP4_storage:/data' : (117, resource_types['storage'][0]),
'dragproc_bandwidth:/data' : (118, resource_types['bandwidth'][0]),
'dragproc_processors' : (119, resource_types['processor'][0]),
'dragproc_storage:/data' : (120, resource_types['storage'][0]),
}
## CEP4 cpunodes
cpu_node_resource_id0 = 0 # ids 0-99 assigned
for i in xrange(num_cpu_nodes):
resources['cpunode%02d_bandwidth' % (i + 1)] = (cpu_node_resource_id0 + 2 * i + 0, resource_types['bandwidth'][0])
resources['cpunode%02d_processors' % (i + 1)] = (cpu_node_resource_id0 + 2 * i + 1, resource_types['processor'][0])
## COBALT nodes
cbt_resource_id0 = 100 # ids 100-115 assigned
for i in xrange(num_cbt_nodes):
resources['cbt%03d_bandwidth' % (i + 1)] = (cbt_resource_id0 + 2 * i + 0, resource_types['bandwidth'][0])
resources['cbt%03d_processors' % (i + 1)] = (cbt_resource_id0 + 2 * i + 1, resource_types['processor'][0])
## DRAGNET nodes (except dragproc, listed above)
drg_resource_id0 = 121 # ids 121-235 assigned
for i in xrange(num_drg_nodes):
resources['drg%02d_bandwidth:/data1' % (i + 1)] = (drg_resource_id0 + 5 * i + 0, resource_types['bandwidth'][0])
resources['drg%02d_bandwidth:/data2' % (i + 1)] = (drg_resource_id0 + 5 * i + 1, resource_types['bandwidth'][0])
resources['drg%02d_processors' % (i + 1)] = (drg_resource_id0 + 5 * i + 2, resource_types['processor'][0])
resources['drg%02d_storage:/data1' % (i + 1)] = (drg_resource_id0 + 5 * i + 3, resource_types['storage'][0])
resources['drg%02d_storage:/data2' % (i + 1)] = (drg_resource_id0 + 5 * i + 4, resource_types['storage'][0])
assert_unique_ids0(resources, 'Error: Not all ids in resources are unique!')
output.write("INSERT INTO virtual_instrument.resource VALUES %s;\n" %
format_inverse_dict2(resources, 0))
#----- resource_to_resource_group -----
resource_to_resource_group_relations = {
# child resource id : (DEFAULT, parent resource_group id)
resources['CEP4_bandwidth:/data'][0] : (SqlKeyword('DEFAULT'), resource_groups['CEP4'][0]),
resources['CEP4_storage:/data'][0] : (SqlKeyword('DEFAULT'), resource_groups['CEP4'][0]),
resources['dragproc_bandwidth:/data'][0] : (SqlKeyword('DEFAULT'), resource_groups['dragproc'][0]),
resources['dragproc_processors'][0] : (SqlKeyword('DEFAULT'), resource_groups['dragproc'][0]),
resources['dragproc_storage:/data'][0] : (SqlKeyword('DEFAULT'), resource_groups['dragproc'][0]),
}
## CEP4 cpunodes
for i in xrange(num_cpu_nodes):
resource_to_resource_group_relations[ resources['cpunode%02d_bandwidth' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['cpunode%02d' % (i + 1)][0])
resource_to_resource_group_relations[ resources['cpunode%02d_processors' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['cpunode%02d' % (i + 1)][0])
## COBALT nodes
for i in xrange(num_cbt_nodes):
resource_to_resource_group_relations[ resources['cbt%03d_bandwidth' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['cbt%03d' % (i + 1)][0])
resource_to_resource_group_relations[ resources['cbt%03d_processors' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['cbt%03d' % (i + 1)][0])
## DRAGNET nodes (except dragproc, listed above)
for i in xrange(num_drg_nodes):
resource_to_resource_group_relations[ resources['drg%02d_bandwidth:/data1' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['drg%02d-data1' % (i + 1)][0])
resource_to_resource_group_relations[ resources['drg%02d_bandwidth:/data2' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['drg%02d-data2' % (i + 1)][0])
resource_to_resource_group_relations[ resources['drg%02d_processors' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['drg%02d' % (i + 1)][0])
resource_to_resource_group_relations[ resources['drg%02d_storage:/data1' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['drg%02d-data1' % (i + 1)][0])
resource_to_resource_group_relations[ resources['drg%02d_storage:/data2' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['drg%02d-data2' % (i + 1)][0])
if len(resource_to_resource_group_relations) != len(resources):
raise Exception('Error: resource_to_resource_group_relations and resources must have the same size!')
output.write("INSERT INTO virtual_instrument.resource_to_resource_group VALUES %s;\n" %
format_inverse_dict2(resource_to_resource_group_relations, 1))
#----- resource_capacity -----
resource_capacities = {
# resource id : (DEFAULT, available cap, total cap)
# CEP4 total I/O bandwidth spec-ed and bench-ed at 80 GBytes/s. Safety guess: 75% achievable
resources['CEP4_bandwidth:/data'][0] : (SqlKeyword('DEFAULT'), int(0.75 * 80*1024*1024*1024 * 8), int(0.75 * 80*1024*1024*1024 * 8)), # see prev line; bits/second
resources['CEP4_storage:/data'][0] : (SqlKeyword('DEFAULT'), 3369564904320*1024, 3369564904320*1024), # from df(1) on /cep4-fs (~3.1 PiB)
resources['dragproc_bandwidth:/data'][0] : (SqlKeyword('DEFAULT'), 450*1024*1024 * 8, 450*1024*1024 * 8), # 450 MiB/s (dd(1): 490-530) disk write to /data
resources['dragproc_processors'][0] : (SqlKeyword('DEFAULT'), 16, 16), # dual 8 core (+ Hyperthr.) CPUs
resources['dragproc_storage:/data'][0] : (SqlKeyword('DEFAULT'), 23669957984256, 23669957984256), # ~22 TiB
}
## CEP4 cpunodes
for i in xrange(num_cpu_nodes):
# CEP4 nodes: bandwidth: FDR infiniband: iperf3: ~45.4 Gbit/s (tuned), 26 Gbit/s (untuned out of the box)
resource_capacities[ resources['cpunode%02d_bandwidth' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), 26*1000*1000*1000, 26*1000*1000*1000) # see prev line; bits/second
resource_capacities[ resources['cpunode%02d_processors' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), 24, 24) # dual 12 core (+ Hyperthr.) CPUs
## COBALT nodes
for i in xrange(num_cbt_nodes):
resource_capacities[ resources['cbt%03d_bandwidth' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), 2 * 26*1000*1000*1000, 2 * 26*1000*1000*1000) # see CEP4 node, but dual i/f; bits/second
resource_capacities[ resources['cbt%03d_processors' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), 24, 24) # dual 12 core (+ Hyperthr.) CPUs
## DRAGNET nodes (except dragproc, listed above)
for i in xrange(num_drg_nodes):
resource_capacities[ resources['drg%02d_bandwidth:/data1' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), 225*1024*1024 * 8, 225*1024*1024 * 8) # 225 MiB/s (dd(1): 288, cp(1): 225-279)
resource_capacities[ resources['drg%02d_bandwidth:/data2' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), 225*1024*1024 * 8, 225*1024*1024 * 8) # idem
resource_capacities[ resources['drg%02d_processors' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), 16, 16) # dual 8 core (+ Hyperthr.) CPUs
resource_capacities[ resources['drg%02d_storage:/data1' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), 7913168961536, 7913168961536) # ~7.2 TiB
resource_capacities[ resources['drg%02d_storage:/data2' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), 7913168961536, 7913168961536) # ~7.2 TiB
if len(resource_capacities) != len(resources):
raise Exception('Error: resource_capacities and resources must have the same size!')
output.write("INSERT INTO resource_monitoring.resource_capacity VALUES %s;\n" %
format_inverse_dict3(resource_capacities, 1))
#----- resource_availability -----
resource_availabilities = {
# resource id : (DEFAULT, is_available)
resources['CEP4_bandwidth:/data'][0] : (SqlKeyword('DEFAULT'), SqlKeyword('TRUE')),
resources['CEP4_storage:/data'][0] : (SqlKeyword('DEFAULT'), SqlKeyword('TRUE')),
resources['dragproc_bandwidth:/data'][0] : (SqlKeyword('DEFAULT'), SqlKeyword('TRUE')),
resources['dragproc_processors'][0] : (SqlKeyword('DEFAULT'), SqlKeyword('TRUE')),
resources['dragproc_storage:/data'][0] : (SqlKeyword('DEFAULT'), SqlKeyword('TRUE')),
}
## CEP4 cpunodes
for i in xrange(num_cpu_nodes):
resource_availabilities[ resources['cpunode%02d_bandwidth' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), SqlKeyword('TRUE'))
resource_availabilities[ resources['cpunode%02d_processors' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), SqlKeyword('TRUE'))
## COBALT nodes
for i in xrange(num_cbt_nodes):
resource_availabilities[ resources['cbt%03d_bandwidth' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), SqlKeyword('TRUE'))
resource_availabilities[ resources['cbt%03d_processors' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), SqlKeyword('TRUE'))
## DRAGNET nodes (except dragproc, listed above)
for i in xrange(num_drg_nodes):
resource_availabilities[ resources['drg%02d_bandwidth:/data1' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), SqlKeyword('TRUE'))
resource_availabilities[ resources['drg%02d_bandwidth:/data2' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), SqlKeyword('TRUE'))
resource_availabilities[ resources['drg%02d_processors' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), SqlKeyword('TRUE'))
resource_availabilities[ resources['drg%02d_storage:/data1' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), SqlKeyword('TRUE'))
resource_availabilities[ resources['drg%02d_storage:/data2' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), SqlKeyword('TRUE'))
if len(resource_availabilities) != len(resources):
raise Exception('Error: resource_availabilities and resources must have the same size!')
output.write("INSERT INTO resource_monitoring.resource_availability VALUES %s;\n" %
format_inverse_dict2(resource_availabilities, 1))
#----- resource_group_to_resource_group -----
resource_group_to_resource_group_relations = {
# child resource_group id : (DEFAULT, parent resource_group id)
resource_groups['LOFAR'][0] : (SqlKeyword('DEFAULT'), SqlKeyword('NULL')), # 'LOFAR' is the root, it has no parent
resource_groups['CEP4'][0] : (SqlKeyword('DEFAULT'), resource_groups['LOFAR'][0]),
resource_groups['COBALT'][0] : (SqlKeyword('DEFAULT'), resource_groups['LOFAR'][0]),
resource_groups['DRAGNET'][0] : (SqlKeyword('DEFAULT'), resource_groups['LOFAR'][0]),
resource_groups['computenodes'][0] : (SqlKeyword('DEFAULT'), resource_groups['CEP4'][0]),
resource_groups['gpunodes'][0] : (SqlKeyword('DEFAULT'), resource_groups['CEP4'][0]),
resource_groups['drgnodes'][0] : (SqlKeyword('DEFAULT'), resource_groups['DRAGNET'][0]),
resource_groups['dragproc'][0] : (SqlKeyword('DEFAULT'), resource_groups['DRAGNET'][0]),
}
## CEP4 cpunodes
for i in xrange(num_cpu_nodes):
resource_group_to_resource_group_relations[ resource_groups['cpunode%02d' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['computenodes'][0])
## COBALT nodes
for i in xrange(num_cbt_nodes):
resource_group_to_resource_group_relations[ resource_groups['cbt%03d' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['COBALT'][0])
## DRAGNET nodes (except dragproc, listed above)
for i in xrange(num_drg_nodes):
resource_group_to_resource_group_relations[ resource_groups['drg%02d' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['drgnodes'][0])
resource_group_to_resource_group_relations[ resource_groups['drg%02d-data1' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['drg%02d' % (i + 1)][0])
resource_group_to_resource_group_relations[ resource_groups['drg%02d-data2' % (i + 1)][0] ] = (SqlKeyword('DEFAULT'), resource_groups['drg%02d' % (i + 1)][0])
if len(resource_group_to_resource_group_relations) != len(resource_groups):
raise Exception('Error: resource_group_to_resource_group_relations and resource_groups must have the same size!')
output.write("INSERT INTO virtual_instrument.resource_group_to_resource_group VALUES %s;\n" %
format_inverse_dict2(resource_group_to_resource_group_relations, 1))
output.write("COMMIT;\n")