diff --git a/.gitattributes b/.gitattributes index 3a0e74b5f615806f88ddb81d93adce9850aacb07..c00d962d88fd525b33d9d0999ea2785bafe2b2c9 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2452,6 +2452,7 @@ MAC/APL/PIC/RSP_Protocol/include/APL/RSP_Protocol/SDOSelection.h -text MAC/APL/PIC/RSP_Protocol/src/Bitmode.cc -text MAC/APL/PIC/RSP_Protocol/src/SDOMode.cc -text MAC/APL/PIC/RSP_Protocol/src/SDOSelection.cc -text +MAC/APL/PIC/RSP_Protocol/test/tCableDelaySettings.cc -text MAC/APL/PIC/RSP_Protocol/test/tRCUSettings.cc -text MAC/APL/PIC/RSP_Protocol/test/tRCUSettings.stdout -text MAC/APL/PIC/TBB_Driver/src/UdpIpTools.cc -text @@ -3930,6 +3931,38 @@ MAC/_System/lofar29.sysconf -text svneol=native#application/octet-stream MAC/_System/lofar30.journal -text svneol=native#application/octet-stream MAC/_System/lofar30.sysconf -text svneol=native#application/octet-stream MAC/doc/package.dox -text +QA/CMakeLists.txt -text +QA/QA_Common/CMakeLists.txt -text +QA/QA_Common/bin/CMakeLists.txt -text +QA/QA_Common/bin/find_hdf5 -text +QA/QA_Common/bin/show_hdf5_info -text +QA/QA_Common/lib/CMakeLists.txt -text +QA/QA_Common/lib/__init__.py -text +QA/QA_Common/lib/cep4_utils.py -text +QA/QA_Common/lib/geoconversions.py -text +QA/QA_Common/lib/hdf5_io.py -text +QA/QA_Common/test/CMakeLists.txt -text +QA/QA_Common/test/create_test_hypercube -text +QA/QA_Common/test/t_cep4_utils.py -text +QA/QA_Common/test/t_cep4_utils.run -text +QA/QA_Common/test/t_cep4_utils.sh -text +QA/QA_Common/test/t_hdf5_io.py -text +QA/QA_Common/test/t_hdf5_io.run -text +QA/QA_Common/test/t_hdf5_io.sh -text +QA/QA_Common/test/test_utils.py -text +QA/QA_Service/CMakeLists.txt -text +QA/QA_Service/bin/CMakeLists.txt -text +QA/QA_Service/bin/qa_service -text +QA/QA_Service/bin/qa_service.ini -text +QA/QA_Service/lib/CMakeLists.txt -text +QA/QA_Service/lib/QABusListener.py -text +QA/QA_Service/lib/__init__.py -text +QA/QA_Service/lib/config.py -text +QA/QA_Service/lib/qa_service.py -text +QA/QA_Service/test/CMakeLists.txt -text +QA/QA_Service/test/t_qa_service.py -text +QA/QA_Service/test/t_qa_service.run -text +QA/QA_Service/test/t_qa_service.sh -text RTCP/Cobalt/BrokenAntennaInfo/CMakeLists.txt -text RTCP/Cobalt/BrokenAntennaInfo/test/CMakeLists.txt -text RTCP/Cobalt/BrokenAntennaInfo/test/debugbeaminfo.py -text @@ -4650,6 +4683,7 @@ SAS/ResourceAssignment/ResourceAssigner/lib/resource_availability_checker.py -te SAS/ResourceAssignment/ResourceAssigner/lib/schedulechecker.py -text SAS/ResourceAssignment/ResourceAssigner/lib/schedulers.py -text SAS/ResourceAssignment/ResourceAssigner/test/CMakeLists.txt -text +SAS/ResourceAssignment/ResourceAssigner/test/radb_common_testing.py -text SAS/ResourceAssignment/ResourceAssigner/test/t_resource_availability_checker.py -text SAS/ResourceAssignment/ResourceAssigner/test/t_resource_availability_checker.run -text SAS/ResourceAssignment/ResourceAssigner/test/t_resource_availability_checker.sh -text @@ -4668,12 +4702,10 @@ SAS/ResourceAssignment/ResourceAssignmentDatabase/config.py -text SAS/ResourceAssignment/ResourceAssignmentDatabase/doc/ResourceAssignmentDatabase.md -text SAS/ResourceAssignment/ResourceAssignmentDatabase/doc/package.dox -text SAS/ResourceAssignment/ResourceAssignmentDatabase/radb.py -text -SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/CMakeLists.txt -text SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/README -text SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/add_functions_and_triggers.sql -text SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/add_notifications.sql -text SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/add_resource_allocation_statics.sql -text -SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/add_triggers.sql -text SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/add_virtual_instrument.sql -text SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/create_add_notifications.sql.py -text SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/create_add_virtual_instrument.sql.py -text @@ -4684,6 +4716,7 @@ SAS/ResourceAssignment/ResourceAssignmentDatabase/radbpglistener -text SAS/ResourceAssignment/ResourceAssignmentDatabase/radbpglistener.ini -text SAS/ResourceAssignment/ResourceAssignmentDatabase/radbpglistener.py -text SAS/ResourceAssignment/ResourceAssignmentDatabase/tests/CMakeLists.txt -text +SAS/ResourceAssignment/ResourceAssignmentDatabase/tests/radb_common_testing.py -text SAS/ResourceAssignment/ResourceAssignmentDatabase/tests/radb_performance_test.py -text SAS/ResourceAssignment/ResourceAssignmentDatabase/tests/t_radb.py -text SAS/ResourceAssignment/ResourceAssignmentDatabase/tests/t_radb.run -text diff --git a/CEP/Pipeline/framework/lofarpipe/CMakeLists.txt b/CEP/Pipeline/framework/lofarpipe/CMakeLists.txt index 0b273a6cb9d98974144dd56a275f48c3fafcd6f1..17217dd8176aa5f5c8d1cbbe06316d268a01395d 100644 --- a/CEP/Pipeline/framework/lofarpipe/CMakeLists.txt +++ b/CEP/Pipeline/framework/lofarpipe/CMakeLists.txt @@ -35,4 +35,5 @@ python_install( support/utilities.py support/xmllogging.py support/usagestats.py + support/feedback_version.py DESTINATION lofarpipe) diff --git a/CEP/Pipeline/recipes/sip/bin/pulsar_pipeline.py b/CEP/Pipeline/recipes/sip/bin/pulsar_pipeline.py index 738a6ccb6775cec4c41d55481513398115646d8a..9edef4fddeb5a4e0467d3eb580da9198550761bd 100755 --- a/CEP/Pipeline/recipes/sip/bin/pulsar_pipeline.py +++ b/CEP/Pipeline/recipes/sip/bin/pulsar_pipeline.py @@ -189,7 +189,7 @@ class pulsar_pipeline(control): self.logger.error("Could not read feedback from %s: %s" % (metadata_file,e)) return 1 - self.send_feedback_processing(parameterset({'feedback_version': feedback_version})) + #self.send_feedback_processing(parameterset({'feedback_version': feedback_version})) self.send_feedback_dataproducts(metadata) return 0 diff --git a/CEP/Pipeline/recipes/sip/helpers/metadata.py b/CEP/Pipeline/recipes/sip/helpers/metadata.py index fdf83778f1ecbca87d9ab6073ebcef66cfe93387..128ec3cae75620be8ea3cf5e6f2144ad0f245593 100644 --- a/CEP/Pipeline/recipes/sip/helpers/metadata.py +++ b/CEP/Pipeline/recipes/sip/helpers/metadata.py @@ -236,10 +236,10 @@ class Correlated(DataProduct): 'duration' : endTime - startTime, 'integrationInterval' : exposure, 'centralFrequency' : spw.getcell('REF_FREQUENCY', 0), - 'channelWidth' : spw.getcell('RESOLUTION', [0])[0], + 'channelWidth' : spw.getcell('RESOLUTION', 0)[0], 'channelsPerSubband' : spw.getcell('NUM_CHAN', 0), # Assume subband name has format 'SB-nn' - 'subband' : int(spw.getcell('NAME', 'SB000')[3:]), + 'subband' : int(spw.getcell('NAME', 0)[3:]), 'stationSubband' : 0 ### NOT CORRECT! ### }) except Exception, error: diff --git a/CMake/LofarPackageList.cmake b/CMake/LofarPackageList.cmake index 3c531b6a1adc86e9eee5e58936aece1567ebedc1..ef02966164e359c9a5e26dae903fcc1045e50a7e 100644 --- a/CMake/LofarPackageList.cmake +++ b/CMake/LofarPackageList.cmake @@ -1,7 +1,7 @@ # - Create for each LOFAR package a variable containing the absolute path to # its source directory. # -# Generated by gen_LofarPackageList_cmake.sh at ma 16 apr 2018 13:39:17 CEST +# Generated by gen_LofarPackageList_cmake.sh at wo 18 jul 2018 15:04:44 CEST # # ---- DO NOT EDIT ---- # @@ -56,6 +56,7 @@ if(NOT DEFINED LOFAR_PACKAGE_LIST_INCLUDED) set(MAC_SOURCE_DIR ${CMAKE_SOURCE_DIR}/MAC) set(LCU_SOURCE_DIR ${CMAKE_SOURCE_DIR}/LCU) set(LTA_SOURCE_DIR ${CMAKE_SOURCE_DIR}/LTA) + set(QA_SOURCE_DIR ${CMAKE_SOURCE_DIR}/QA) set(SubSystems_SOURCE_DIR ${CMAKE_SOURCE_DIR}/SubSystems) set(ALC_SOURCE_DIR ${CMAKE_SOURCE_DIR}/LCS/ACC/ALC) set(PLC_SOURCE_DIR ${CMAKE_SOURCE_DIR}/LCS/ACC/PLC) @@ -149,6 +150,8 @@ if(NOT DEFINED LOFAR_PACKAGE_LIST_INCLUDED) set(TaskManagementClient_SOURCE_DIR ${CMAKE_SOURCE_DIR}/MAC/Services/TaskManagement/Client) set(TaskManagementCommon_SOURCE_DIR ${CMAKE_SOURCE_DIR}/MAC/Services/TaskManagement/Common) set(TaskManagementServer_SOURCE_DIR ${CMAKE_SOURCE_DIR}/MAC/Services/TaskManagement/Server) + set(QA_Common_SOURCE_DIR ${CMAKE_SOURCE_DIR}/QA/QA_Common) + set(QA_Service_SOURCE_DIR ${CMAKE_SOURCE_DIR}/QA/QA_Service) set(Cobalt_SOURCE_DIR ${CMAKE_SOURCE_DIR}/RTCP/Cobalt) set(InputProc_SOURCE_DIR ${CMAKE_SOURCE_DIR}/RTCP/Cobalt/InputProc) set(OutputProc_SOURCE_DIR ${CMAKE_SOURCE_DIR}/RTCP/Cobalt/OutputProc) diff --git a/CMakeLists.txt b/CMakeLists.txt index ea914605bb0e6cfd80c3f8dcb4117ad3c8697aa1..b1cf98b456f67b28a88bb5432fc1dc90b0f38943 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,7 @@ if(NOT DEFINED BUILD_PACKAGES) lofar_add_package(MAC) lofar_add_package(LCU) lofar_add_package(LTA) + lofar_add_package(QA) lofar_add_package(SubSystems) else(NOT DEFINED BUILD_PACKAGES) separate_arguments(BUILD_PACKAGES) diff --git a/LCS/PyCommon/datetimeutils.py b/LCS/PyCommon/datetimeutils.py index d21e9b36da20ed292b790f174ac43fea9b819d4c..038c880ea04a3f3b3027e62fc736835132c9c7f6 100644 --- a/LCS/PyCommon/datetimeutils.py +++ b/LCS/PyCommon/datetimeutils.py @@ -66,3 +66,37 @@ def format_timedelta(td): def parseDatetime(date_time): """ Parse the datetime format used in LOFAR parsets. """ return datetime.strptime(date_time, ('%Y-%m-%d %H:%M:%S.%f' if '.' in date_time else '%Y-%m-%d %H:%M:%S')) + +MDJ_EPOCH = datetime(1858, 11, 17, 0, 0, 0) + +def to_modified_julian_date(timestamp): + ''' + computes the modified_julian_date from a python datetime timestamp + :param timestamp: datetime a python datetime timestamp + :return: double, the modified_julian_date + ''' + return to_modified_julian_date_in_seconds(timestamp)/86400.0 + +def to_modified_julian_date_in_seconds(timestamp): + ''' + computes the modified_julian_date (in seconds as opposed to the official days) from a python datetime timestamp + :param timestamp: datetime a python datetime timestamp + :return: double, the modified_julian_date (fractional number of seconds since MJD_EPOCH) + ''' + return totalSeconds(timestamp - MDJ_EPOCH) + +def from_modified_julian_date(modified_julian_date): + ''' + computes the python datetime timestamp from a modified_julian_date + :param modified_julian_date: double, a timestamp expressed in modified_julian_date format (fractional number of days since MJD_EPOCH) + :return: datetime, the timestamp as python datetime + ''' + return from_modified_julian_date_in_seconds(modified_julian_date*86400.0) + +def from_modified_julian_date_in_seconds(modified_julian_date_secs): + ''' + computes the python datetime timestamp from a modified_julian_date (in seconds as opposed to the official days) + :param modified_julian_date: double, a timestamp expressed in modified_julian_date format (fractional number of seconds since MJD_EPOCH) + :return: datetime, the timestamp as python datetime + ''' + return MDJ_EPOCH + timedelta(seconds=modified_julian_date_secs) diff --git a/LCS/pyparameterset/src/__init__.py b/LCS/pyparameterset/src/__init__.py index 6a9810dff012da5b1ca6d4ac6e1abad14a118795..f84733f4f24f6da87d28f9eabac50e937f27f226 100755 --- a/LCS/pyparameterset/src/__init__.py +++ b/LCS/pyparameterset/src/__init__.py @@ -161,6 +161,16 @@ class parameterset(PyParameterSet): self.replace (str(k), str(v)) # k, v always type string + @staticmethod + def fromString(parset_string): + '''Create a parset from a plain text string. + Splits the string in lines, and parses each '=' seperated key/value pair. + ''' + lines = [l.strip() for l in parset_string.split('\n')] + kv_pairs = [tuple(l.split('=')) for l in lines if '=' in l] + parset_dict = dict(kv_pairs) + return parameterset(parset_dict) + def get(self, key): # type: (object) -> object """Get the parametervalue object of a parameter.""" diff --git a/LCU/checkhardware/check_hardware.py b/LCU/checkhardware/check_hardware.py index 333afef267e223f80d9237de9f207b7f94e252c8..06d18f2dc8e0897e5f088fd29a003909414f71f6 100755 --- a/LCU/checkhardware/check_hardware.py +++ b/LCU/checkhardware/check_hardware.py @@ -371,6 +371,11 @@ def safely_start_test_signal(start_cmd, stop_cmd): :param stop_cmd: the command to stop on exit as shell-executable string """ + # set things up sp signal is stopped when check_hardware terminates + register_signal_handlers(stop_cmd) + register_exit_handler(stop_cmd) + start_watchdog_daemon(os.getpid(), stop_cmd) # this alone would actually be sufficient + # start signal try: check_call(start_cmd, shell=True) @@ -378,11 +383,6 @@ def safely_start_test_signal(start_cmd, stop_cmd): logger.error("Could not start the test signal! Non-zero return code from start_cmd (%s)." % start_cmd, ex) raise - # set things up sp signal is stopped when check_hardware terminates - register_signal_handlers(stop_cmd) - register_exit_handler(stop_cmd) - start_watchdog_daemon(os.getpid(), stop_cmd) # this alone would actually be sufficient - def safely_start_test_signal_from_ParameterSet(settings): ''' diff --git a/LCU/checkhardware/checkhardware_lib/spectrum_checks/CMakeLists.txt b/LCU/checkhardware/checkhardware_lib/spectrum_checks/CMakeLists.txt index 2aaae58c10fc901e76fbb974969cc8e6e7155f17..291219377eeb9b62b637452e6851cdff46e7942a 100644 --- a/LCU/checkhardware/checkhardware_lib/spectrum_checks/CMakeLists.txt +++ b/LCU/checkhardware/checkhardware_lib/spectrum_checks/CMakeLists.txt @@ -2,7 +2,7 @@ set(_py_files __init__.py - cable_reflections.py + cable_reflection.py down.py down_old.py flat.py diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/lib/ingestmomadapter.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/lib/ingestmomadapter.py index edd7e331a0bbed13626e3d8ec2743dab5f017077..9f9117a575051ecfcdc33c9b739e05e59a6e2af4 100644 --- a/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/lib/ingestmomadapter.py +++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/lib/ingestmomadapter.py @@ -56,6 +56,7 @@ class IngestBusListenerForMomAdapter(IngestBusListener): self._busname = busname self._broker = broker self._momrpc = momrpc + self._removed_export_ids = set() # keep track of which export_id's were removed, so we don't have to remove them again super(IngestBusListenerForMomAdapter, self).__init__(busname=busname, subjects=subjects, broker=broker, **kwargs) @@ -73,10 +74,16 @@ class IngestBusListenerForMomAdapter(IngestBusListener): export_id = int(job_id.split('_')[1]) if export_id and export_id not in self._momrpc.getObjectDetails(export_id): - logger.warn('Export job %s cannot be found (anymore) in mom. Removing export job from ingest queue', export_id) + if export_id not in self._removed_export_ids: + logger.warn('Export job %s cannot be found (anymore) in mom. Removing export job from ingest queue', export_id) + + # keep track of which export_id's were removed, so we don't have to remove them again + # this keeps stuff flowing faster + self._removed_export_ids.add(export_id) + + with IngestRPC(broker=self._broker) as ingest_rpc: + ingest_rpc.removeExportJob(export_id) - with IngestRPC(broker=self._broker) as ingest_rpc: - ingest_rpc.removeExportJob(export_id) return with ToBus(self._busname, broker=self._broker) as tobus: diff --git a/MAC/APL/PIC/RSP_Driver/src/RCURead.cc b/MAC/APL/PIC/RSP_Driver/src/RCURead.cc index 6fbaf0cbb2edb03413933139dcc3bcdba3287e08..ed623ebbf3147046bb052db15566a08eabf01523 100644 --- a/MAC/APL/PIC/RSP_Driver/src/RCURead.cc +++ b/MAC/APL/PIC/RSP_Driver/src/RCURead.cc @@ -81,15 +81,29 @@ GCFEvent::TResult RCURead::handleack(GCFEvent& event, GCFPortInterface& /*port*/ RCUSettings::Control& x = Cache::getInstance().getBack().getRCUSettings()()((global_blp * 2)); RCUSettings::Control& y = Cache::getInstance().getBack().getRCUSettings()()((global_blp * 2) + 1); + if (0 == GET_CONFIG("RSPDriver.LOOPBACK_MODE", i)) { - EPA_Protocol::RCUHandler cachedvalue = { x.getDelay(), 0, y.getDelay(), 0 }; + uint16 x_delay = x.getDelay(); + uint16 y_delay = y.getDelay(); + uint8 x_delay_low = x_delay << 9 >> 9; // leaves us with lower 7 bits + uint8 x_delay_high = x_delay >> 7; // removed the lower 7 bits + uint8 y_delay_low = y_delay << 9 >> 9; // leaves us with lower 7 bits + uint8 y_delay_high = y_delay >> 7; // removed the lower 7 bits + + EPA_Protocol::RCUHandler cachedvalue = { x_delay_low, + 0, + y_delay_low, + 0, + 0, + x_delay_high, + y_delay_high}; if (memcmp(&cachedvalue, &rcusettings.ap, sizeof(EPA_Protocol::RCUHandler))) { LOG_WARN("LOOPBACK CHECK FAILED: RCURead mismatch "); } } else { - x.setDelay(rcusettings.ap.input_delay_x); - y.setDelay(rcusettings.ap.input_delay_y); + x.setDelay(rcusettings.ap.input_delay_x + (rcusettings.ap.input_delay_x_high << 7)); + y.setDelay(rcusettings.ap.input_delay_y + (rcusettings.ap.input_delay_y_high << 7)); } return GCFEvent::HANDLED; diff --git a/MAC/APL/PIC/RSP_Driver/src/RCUWrite.cc b/MAC/APL/PIC/RSP_Driver/src/RCUWrite.cc index beb524875deaf3e000065c52fc974a3df308f3fe..271386be8ae5d05208cc5f578dd0b38494e81132 100644 --- a/MAC/APL/PIC/RSP_Driver/src/RCUWrite.cc +++ b/MAC/APL/PIC/RSP_Driver/src/RCUWrite.cc @@ -67,27 +67,38 @@ void RCUWrite::sendrequest() RCUSettings::Control& x = Cache::getInstance().getBack().getRCUSettings()()((global_blp * 2)); RCUSettings::Control& y = Cache::getInstance().getBack().getRCUSettings()()((global_blp * 2) + 1); - LOG_DEBUG(formatString("%d.X control=0x%08x", global_blp, x.getRaw())); - LOG_DEBUG(formatString("%d.Y control=0x%08x", global_blp, y.getRaw())); + LOG_DEBUG(formatString("%d.X control=0x%016llx", global_blp, x.getRaw())); + LOG_DEBUG(formatString("%d.Y control=0x%016llx", global_blp, y.getRaw())); EPARcuSettingsEvent rcusettings; rcusettings.hdr.set(MEPHeader::RCU_SETTINGS_HDR, 1 << (getCurrentIndex() / N_WRITES)); // also sets payload_length rcusettings.ap = EPA_Protocol::RCUHandler(); // new delay is active after datastream restart + uint16 x_delay = x.getDelay(); + uint16 y_delay = y.getDelay(); + uint8 x_delay_low = x_delay << 9 >> 9; // leaves us with lower 7 bits + uint8 x_delay_high = x_delay >> 7; // removed the lower 7 bits + uint8 y_delay_low = y_delay << 9 >> 9; // leaves us with lower 7 bits + uint8 y_delay_high = y_delay >> 7; // removed the lower 7 bits + switch (getCurrentIndex() % N_WRITES) { case 0: { - rcusettings.ap.input_delay_x = x.getDelay(); + rcusettings.ap.input_delay_x = x_delay_low; rcusettings.ap.enable_x = 0; - rcusettings.ap.input_delay_y = y.getDelay(); + rcusettings.ap.input_delay_y = y_delay_low; rcusettings.ap.enable_y = 0; + rcusettings.ap.input_delay_x_high = x_delay_high; + rcusettings.ap.input_delay_y_high = y_delay_high; } break; case 1: { - rcusettings.ap.input_delay_x = x.getDelay(); + rcusettings.ap.input_delay_x = x_delay_low; rcusettings.ap.enable_x = x.getEnable(); - rcusettings.ap.input_delay_y = y.getDelay(); + rcusettings.ap.input_delay_y = y_delay_low; rcusettings.ap.enable_y = y.getEnable(); + rcusettings.ap.input_delay_x_high = x_delay_high; + rcusettings.ap.input_delay_y_high = y_delay_high; } break; } diff --git a/MAC/APL/PIC/RSP_Driver/src/rspctl.cc b/MAC/APL/PIC/RSP_Driver/src/rspctl.cc index 4be6294fe610930715bb2ef1404e03f95941d515..a8fcd36f50e2a5afc71d10d4a97b3ae2d44e5454 100644 --- a/MAC/APL/PIC/RSP_Driver/src/rspctl.cc +++ b/MAC/APL/PIC/RSP_Driver/src/rspctl.cc @@ -486,8 +486,8 @@ void RCUCommand::send() setrcu.settings()(0) = m_control; for (int i = 0; i < setrcu.settings().extent(firstDim); i++) { - printf("control(%d) =0x%08x\n", i, setrcu.settings()(i).getRaw()); - printf("modified(%d)=0x%08x\n", i, setrcu.settings()(i).getModified()); + printf("control(%d) =0x%016llx\n", i, setrcu.settings()(i).getRaw()); + printf("modified(%d)=0x%016llx\n", i, setrcu.settings()(i).getModified()); } m_rspport.send(setrcu); @@ -505,7 +505,7 @@ GCFEvent::TResult RCUCommand::ack(GCFEvent& e) int rcuin = 0; for (int rcuout = 0; rcuout < get_ndevices(); rcuout++) { if (mask[rcuout]) { - logMessage(cout,formatString("RCU[%2d].control=0x%08x => %s, mode:%d, delay=%02d, att=%02d", + logMessage(cout,formatString("RCU[%2d].control=0x%016llx => %s, mode:%d, delay=%04d, att=%02d", rcuout, ack.settings()(rcuin).getRaw(), (ack.settings()(rcuin).getRaw() & 0x80) ? " ON" : "OFF", @@ -1333,7 +1333,7 @@ GCFEvent::TResult RSUCommand::ack(GCFEvent& e) for (int boardout = 0; boardout < get_ndevices(); boardout++) { if (mask[boardout]) { - logMessage(cout,formatString("RSU[%2d].control=0x%08x",boardout, ack.settings()(boardin++).getRaw())); + logMessage(cout,formatString("RSU[%2d].control=0x%016llx",boardout, ack.settings()(boardin++).getRaw())); } } } @@ -3786,39 +3786,39 @@ static void usage(bool exportMode) cout << "rspctl --rcu [--select=<set>] # show current rcu control setting" << endl; cout << "rspctl --rcu=0x00000000 [--select=<set>] # set the rcu control registers" << endl; cout << " mask value " << endl; - cout << " 0x0000007F INPUT_DELAY Sample delay for the data from the RCU." << endl; - cout << " 0x00000080 INPUT_ENABLE Enable RCU input." << endl; + cout << " 0x000000000080 INPUT_ENABLE Enable RCU input." << endl; cout << endl; - cout << " 0x00000100 LBL-EN supply LBL antenna on (1) or off (0)" << endl; - cout << " 0x00000200 LBH-EN sypply LBH antenna on (1) or off (0)" << endl; - cout << " 0x00000400 HB-EN supply HB on (1) or off (0)" << endl; - cout << " 0x00000800 BANDSEL low band (1) or high band (0)" << endl; - cout << " 0x00001000 HB-SEL-0 HBA filter selection" << endl; - cout << " 0x00002000 HB-SEL-1 HBA filter selection" << endl; + cout << " 0x000000000100 LBL-EN supply LBL antenna on (1) or off (0)" << endl; + cout << " 0x000000000200 LBH-EN sypply LBH antenna on (1) or off (0)" << endl; + cout << " 0x000000000400 HB-EN supply HB on (1) or off (0)" << endl; + cout << " 0x000000000800 BANDSEL low band (1) or high band (0)" << endl; + cout << " 0x000000001000 HB-SEL-0 HBA filter selection" << endl; + cout << " 0x000000002000 HB-SEL-1 HBA filter selection" << endl; cout << " Options : HBA-SEL-0 HBA-SEL-1 Function" << endl; cout << " 0 0 210-270 MHz" << endl; cout << " 0 1 170-230 MHz" << endl; cout << " 1 0 110-190 MHz" << endl; cout << " 1 1 all off" << endl; - cout << " 0x00004000 VL-EN low band supply on (1) or off (0)" << endl; - cout << " 0x00008000 VH-EN high band supply on (1) or off (0)" << endl; + cout << " 0x000000004000 VL-EN low band supply on (1) or off (0)" << endl; + cout << " 0x000000008000 VH-EN high band supply on (1) or off (0)" << endl; cout << endl; - cout << " 0x00010000 VDIG-EN ADC supply on (1) or off (0)" << endl; - cout << " 0x00020000 LBL-LBH-SEL LB input selection 0=LBL, 1=LBH" << endl; - cout << " 0x00040000 LB-FILTER LB filter selection" << endl; - cout << " 0 10-90 MHz" << endl; - cout << " 1 30-80 MHz" << endl; - cout << " 0x00080000 ATT-CNT-4 on (1) is 1dB attenuation" << endl; - cout << " 0x00100000 ATT-CNT-3 on (1) is 2dB attenuation" << endl; - cout << " 0x00200000 ATT-CNT-2 on (1) is 4dB attenuation" << endl; - cout << " 0x00300000 ATT-CNT-1 on (1) is 8dB attenuation" << endl; - cout << " 0x00800000 ATT-CNT-0 on (1) is 16dB attenuation" << endl; + cout << " 0x000000010000 VDIG-EN ADC supply on (1) or off (0)" << endl; + cout << " 0x000000020000 LBL-LBH-SEL LB input selection 0=LBL, 1=LBH" << endl; + cout << " 0x000000040000 LB-FILTER LB filter selection" << endl; + cout << " 0 10-90 MHz" << endl; + cout << " 1 30-80 MHz" << endl; + cout << " 0x000000080000 ATT-CNT-4 on (1) is 1dB attenuation" << endl; + cout << " 0x000000100000 ATT-CNT-3 on (1) is 2dB attenuation" << endl; + cout << " 0x000000200000 ATT-CNT-2 on (1) is 4dB attenuation" << endl; + cout << " 0x000000300000 ATT-CNT-1 on (1) is 8dB attenuation" << endl; + cout << " 0x000000800000 ATT-CNT-0 on (1) is 16dB attenuation" << endl; cout << endl; - cout << " 0x01000000 PRSG pseudo random sequence generator on (1), off (0)" << endl; - cout << " 0x02000000 RESET on (1) hold board in reset" << endl; - cout << " 0x04000000 SPEC_INV Enable spectral inversion (1) if needed. see --specinv" << endl; - cout << " 0x08000000 TBD reserved" << endl; - cout << " 0xF0000000 RCU VERSION RCU version, read-only" << endl; + cout << " 0x000001000000 PRSG pseudo random sequence generator on (1), off (0)" << endl; + cout << " 0x000002000000 RESET on (1) hold board in reset" << endl; + cout << " 0x000004000000 SPEC_INV Enable spectral inversion (1) if needed. see --specinv" << endl; + cout << " 0x000008000000 TBD reserved" << endl; + cout << " 0x0000F0000000 RCU VERSION RCU version, read-only" << endl; + cout << " 0x7FF000000000 INPUT_DELAY Sample delay for the data from the RCU." << endl; cout << endl; cout << "rspctl [ --rcumode |" << endl; cout << " --rcuprsg |" << endl; @@ -3840,7 +3840,7 @@ static void usage(bool exportMode) cout << " --rcuprsg[=0] # turn psrg on (or off)" << endl; cout << " --rcureset[=0] # hold rcu in reset (or take out of reset)" << endl; cout << " --rcuattenuation=[0..31] # set the RCU attenuation (steps of 0.25dB)" << endl; - cout << " --rcudelay=[0..127] # set the delay for rcu's (steps of 5ns or 6.25ns)" << endl; + cout << " --rcudelay=[0..4089] # set the delay for rcu's (steps of 5ns or 6.25ns)" << endl; cout << " --rcuenable[=0] # enable (or disable) input from RCU's" << endl; cout << endl; cout << "rspctl --specinv[=0] [--select=<set>] # enable (or disable) spectral inversion" << endl; @@ -4221,13 +4221,13 @@ Command* RSPCtl::parse_options(int argc, char** argv) case 'y': // --rcudelay controlopt = strtoul(optarg, 0, 0); - if (controlopt > 127) { - logMessage(cerr,"Error: --rcudelay value should be <= 127"); + if (controlopt > 4089) { + logMessage(cerr,"Error: --rcudelay value should be <= 4089"); rspctl_exit_code = EXIT_FAILURE; delete command; return 0; } - rcumodecommand->control().setDelay((uint8)controlopt); + rcumodecommand->control().setDelay((uint16)controlopt); break; case 'E': // --rcuenable diff --git a/MAC/APL/PIC/RSP_Protocol/CMakeLists.txt b/MAC/APL/PIC/RSP_Protocol/CMakeLists.txt index 0b4200f5fc9b1a66a7b095200f73aa5f5655fdb2..c270c7f5bacc3bb3879b0db30061f664418bdeb8 100644 --- a/MAC/APL/PIC/RSP_Protocol/CMakeLists.txt +++ b/MAC/APL/PIC/RSP_Protocol/CMakeLists.txt @@ -4,6 +4,7 @@ lofar_package(RSP_Protocol 6.0 DEPENDS Common MACIO RTCCommon) include(LofarFindPackage) lofar_find_package(Blitz REQUIRED) +lofar_find_package(UnitTest++) add_subdirectory(include/APL/RSP_Protocol) add_subdirectory(src) diff --git a/MAC/APL/PIC/RSP_Protocol/include/APL/RSP_Protocol/RCUSettings.h b/MAC/APL/PIC/RSP_Protocol/include/APL/RSP_Protocol/RCUSettings.h index 4dd92886410204f30e61f32b5838a0ad64340ab1..53985300253d9e8f141ee4de95f1bb30cd672268 100644 --- a/MAC/APL/PIC/RSP_Protocol/include/APL/RSP_Protocol/RCUSettings.h +++ b/MAC/APL/PIC/RSP_Protocol/include/APL/RSP_Protocol/RCUSettings.h @@ -56,7 +56,7 @@ public: class Control { public: - Control() : m_value(0x00000000), m_modified(0x00000000) {} + Control() : m_value(0x0000000000000000), m_modified(0x0000000000000000) {} // no virtual to prevent creation of virtual pointer table // which adds to the size of the struct @@ -153,10 +153,11 @@ public: // 0x04000000 free used to be SPEC_INV, SI now in DIAG/Bypass // 0x08000000 TBD reserved // 0xF0000000 VERSION RCU version //PD - void setRaw(uint32 raw) { m_value = raw; m_modified = 0xFFFFFFFF; } - uint32 getRaw() const { return m_value; } + // 0x7FFF00000000 INPUT_DELAY Sample delay for the data from the RCU. + void setRaw(uint64 raw) { m_value = raw; m_modified = 0xFFFFFFFFFFFFFFFF; } + uint64 getRaw() const { return m_value; } // set protocol part of the raw byte - void setProtocolRaw(uint32 raw) { + void setProtocolRaw(uint64 raw) { m_value = (m_value & RCU_HANDLER_MASK) | (raw & RCU_PROTOCOL_MASK); m_modified = RCU_PROTOCOL_MASK; } @@ -188,13 +189,16 @@ public: } uint8 getAttenuation() const { return (m_value & ATT_MASK) >> (11 + 8); } - // Set sample delay (true time delay). Valid values are 0..127 (7 bits) - void setDelay(uint8 value) { + // Set sample delay (true time delay). Valid values are 0..4089. + // We will keep using bit mask for the m_modified but limit with and if + void setDelay(uint16 value) { + uint16 MAX_DELAY = 4089; + value = value < MAX_DELAY ? value : MAX_DELAY; m_value &= ~DELAY_MASK; - m_value |= (value & DELAY_MASK); + m_value |= ((uint64)value << 32) & DELAY_MASK; m_modified |= DELAY_MASK; } - uint8 getDelay() const { return m_value & DELAY_MASK; } + uint16 getDelay() const { return (m_value & DELAY_MASK) >> 32; } // Set rcu enable (0 = disable, 1 = enable) void setEnable(uint8 value) { @@ -219,12 +223,12 @@ public: // Reset value and modified mask. void reset() { - m_value = 0x00000000; - m_modified = 0x00000000; + m_value = 0x0000000000000000; + m_modified = 0x0000000000000000; } // Return modification mask - uint32 getModified() const { return m_modified; } + uint64 getModified() const { return m_modified; } // Assignment Control& operator=(const Control& rhs) { @@ -244,14 +248,14 @@ public: // print function for operator<< ostream& print (ostream& os) const - { os << formatString("%08X", m_value); return (os); } + { os << formatString("%016llx", m_value); return (os); } private: // constants used to set the appropriate mode static const uint32 m_mode[]; // masks used to set/get bits - static const uint32 DELAY_MASK = 0x0000007F; + static const uint64 DELAY_MASK = 0x7FFF00000000; static const uint32 ENABLE_MASK = 0x00000080; static const uint32 _30MHZ_MASK = 0x00040000; static const uint32 MODE_MASK = 0x0007FF00; @@ -273,8 +277,8 @@ public: static const uint32 RCU_PROTOCOL_MASK = 0xFFFFFF00; // ----- datamembers ----- - uint32 m_value; - uint32 m_modified; // mask of modified bits + uint64 m_value; + uint64 m_modified; // mask of modified bits }; // class Control /* get reference settings array */ diff --git a/MAC/APL/PIC/RSP_Protocol/src/EPA_Protocol.prot b/MAC/APL/PIC/RSP_Protocol/src/EPA_Protocol.prot index 5a010ffeaa89a0772d108761766ba5a908dc77fe..f71b6f2f1bff38a0a2ce1073b5e68a5006194c68 100644 --- a/MAC/APL/PIC/RSP_Protocol/src/EPA_Protocol.prot +++ b/MAC/APL/PIC/RSP_Protocol/src/EPA_Protocol.prot @@ -249,10 +249,13 @@ prelude = << PRELUDE_END struct RCUHandler { - uint8 input_delay_x:7; // input delay for X-receiver - uint8 enable_x :1; // enable X-receiver data output - uint8 input_delay_y:7; // input delay for Y-receiver - uint8 enable_y :1; // enable Y-receiver data output + uint8 input_delay_x:7; // input delay for X-receiver [6..0] + uint8 enable_x :1; // enable X-receiver data output + uint8 input_delay_y:7; // input delay for Y-receiver + uint8 enable_y :1; // enable Y-receiver data output + uint8 test_data; // register used verifying the RCU-RSP interface + uint8 input_delay_x_high; // the input delay for X-receiver [14..7] + uint8 input_delay_y_high; // the input delay for Y-receiver [14..7] }; struct CRControl diff --git a/MAC/APL/PIC/RSP_Protocol/test/CMakeLists.txt b/MAC/APL/PIC/RSP_Protocol/test/CMakeLists.txt index 5f69fe178c5a5df4eb7e20d0a8f406eabf334b60..93f46868405c89a96df527ac767252edb50d6f9d 100644 --- a/MAC/APL/PIC/RSP_Protocol/test/CMakeLists.txt +++ b/MAC/APL/PIC/RSP_Protocol/test/CMakeLists.txt @@ -6,6 +6,8 @@ include(LofarCTest) # files can be found. include_directories(${CMAKE_CURRENT_BINARY_DIR}) +lofar_add_test(tCableDelaySettings tCableDelaySettings.cc) + # Tests cannot be run at this stage, because they all need to connect to a # Service Broker, which has not been built yet. Therefore we will only build # the test programs. diff --git a/MAC/APL/PIC/RSP_Protocol/test/tCableDelaySettings.cc b/MAC/APL/PIC/RSP_Protocol/test/tCableDelaySettings.cc new file mode 100644 index 0000000000000000000000000000000000000000..4ed0a344c22dd9139274587ee6eea374c1e6403b --- /dev/null +++ b/MAC/APL/PIC/RSP_Protocol/test/tCableDelaySettings.cc @@ -0,0 +1,108 @@ +//# Copyright (C) 2018 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, softwaresupport@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: $ + +//# Always #include <lofar_config.h> first! +#include <cstdio> +#include <iostream> +#include <lofar_config.h> +#include <Common/LofarLogger.h> +#include <APL/RSP_Protocol/RCUSettings.h> + +//# Includes +#include <UnitTest++.h> + +using namespace LOFAR; +using namespace std; +using namespace RSP_Protocol; + +SUITE(CableDelaysSettings) { + TEST(instantiates_delay_to_zero) { + RCUSettings rcu_settings; + rcu_settings().resize(1); + + uint32 delay = rcu_settings()(0).getDelay(); + CHECK_EQUAL(0, delay); + } + + TEST(get_delay_should_return_one_after_setting_to_one) { + RCUSettings rcu_settings; + rcu_settings().resize(1); + + rcu_settings()(0).setDelay(1); + uint32 delay = rcu_settings()(0).getDelay(); + CHECK_EQUAL(1, delay); + } + + TEST(get_delay_should_return_two_after_setting_to_two) { + RCUSettings rcu_settings; + rcu_settings().resize(1); + + rcu_settings()(0).setDelay(2); + uint32 delay = rcu_settings()(0).getDelay(); + CHECK_EQUAL(2, delay); + } + + TEST(get_delay_should_return_three_after_setting_to_three) { + RCUSettings rcu_settings; + rcu_settings().resize(1); + + rcu_settings()(0).setDelay(3); + uint32 delay = rcu_settings()(0).getDelay(); + CHECK_EQUAL(3, delay); + } + + TEST(get_delay_should_return_upper_limit_after_setting_to_upper_limit) { + // upper limit is 4089 + RCUSettings rcu_settings; + rcu_settings().resize(1); + + rcu_settings()(0).setDelay(4089); + uint32 delay = rcu_settings()(0).getDelay(); + CHECK_EQUAL(4089, delay); + } + + TEST(get_delay_should_return_upper_limit_after_setting_to_upper_limit_plus_one) { + // upper limit is 4089 + RCUSettings rcu_settings; + rcu_settings().resize(1); + + rcu_settings()(0).setDelay(4090); + uint32 delay = rcu_settings()(0).getDelay(); + CHECK_EQUAL(4089, delay); + } + + TEST(get_raw_should_return_set_delay_value_in_the_upper_half_of_the_bits) { + // upper limit is 4089 + RCUSettings rcu_settings; + rcu_settings().resize(1); + + rcu_settings()(0).setDelay(42); + uint64 raw = rcu_settings()(0).getRaw(); + CHECK_EQUAL((uint64)42 << 32, raw); + } + +} + +int main() { + INIT_LOGGER("tCableDelaySettings"); + + return UnitTest::RunAllTests() > 0; +} + diff --git a/MAC/Deployment/data/StaticMetaData/RSPConnections_local.dat b/MAC/Deployment/data/StaticMetaData/RSPConnections_local.dat index 27f5bb66ca09145c25d78c525efc78642d40a02a..c4f3ee971564d58487312da919ac2db5afd257be 100644 --- a/MAC/Deployment/data/StaticMetaData/RSPConnections_local.dat +++ b/MAC/Deployment/data/StaticMetaData/RSPConnections_local.dat @@ -35,8 +35,8 @@ DE605_02 172.20.101.116 90:1b:0e:43:1b:d5 lofarD2-10-GbE DE605_03 172.20.101.114 90:1b:0e:43:1c:15 lofarD1-10-GbE FR606_00 10.211.6.2 00:25:90:92:58:CC ALLEGRO1-FR606 -FR606_01 10.212.6.2 00:25:90:61:77:40 ALLEGRO2-FR606 -FR606_02 10.213.6.2 00:25:90:61:78:14 ALLEGRO3-FR606 +FR606_01 10.212.6.2 0C:C4:7A:2B:02:8D ALLEGRO2-FR606-since-2018-07 +FR606_02 10.213.6.2 00:25:90:61:77:40 ALLEGRO3-FR606-since-2018-07 FR606_03 10.214.6.2 00:25:90:61:77:86 ALLEGRO4-FR606 SE607_00 10.211.7.2 00:60:dd:45:66:67 Dvalin-eth10 diff --git a/MAC/Services/src/ObservationControl2.py b/MAC/Services/src/ObservationControl2.py index 8e1de22dd474b5bebc09b95b880740aea2418ca8..e1a2d4c2581c681ad74177f38f2bf403bc44f2c6 100644 --- a/MAC/Services/src/ObservationControl2.py +++ b/MAC/Services/src/ObservationControl2.py @@ -24,8 +24,9 @@ from optparse import OptionParser from fabric.exceptions import NetworkError try: + # WARNING: This code only works with Fabric Version 1 from fabric import tasks - from fabric.api import env, run + from fabric.api import env, run, settings except ImportError as e: print str(e) print 'Please install python package fabric: sudo apt-get install fabric' @@ -63,15 +64,16 @@ class ObservationControlHandler(MessageHandlerInterface): killed = False - pid_line = run('pidof ObservationControl') - pids = pid_line.split(' ') + with settings(warn_only=True): + pid_line = run('pidof ObservationControl') + pids = pid_line.split(' ') - for pid in pids: - pid_sas_id = run("ps -p %s --no-heading -o command | awk -F[{}] '{ printf $2; }'" % pid) - if str(pid_sas_id) == str(sas_id): - logger.info("Killing ObservationControl with PID: %s for SAS ID: %s", pid, sas_id) - run('kill -SIGINT %s' % pid) - killed = True + for pid in pids: + pid_sas_id = run("ps -p %s --no-heading -o command | awk -F[{}] '{ printf $2; }'" % pid) + if str(pid_sas_id) == str(sas_id): + logger.info("Killing ObservationControl with PID: %s for SAS ID: %s", pid, sas_id) + run('kill -SIGINT %s' % pid) + killed = True return killed diff --git a/MAC/Services/test/tObservationControl2.py b/MAC/Services/test/tObservationControl2.py index fef8ecf3e145f27297d14169d4a1ea6323317a85..334cf8cdd5598cbd266fa73582e2863cd1845abe 100644 --- a/MAC/Services/test/tObservationControl2.py +++ b/MAC/Services/test/tObservationControl2.py @@ -40,6 +40,10 @@ class TestObservationControlHandler(unittest.TestCase): self.addCleanup(fabric_env_pathcher.stop) self.fabric_env_mock = fabric_env_pathcher.start() + fabric_settings_pathcher = mock.patch('lofar.mac.ObservationControl2.settings') + self.addCleanup(fabric_settings_pathcher.stop) + self.fabric_settings_mock = fabric_settings_pathcher.start() + logger_patcher = mock.patch('lofar.mac.ObservationControl2.logger') self.addCleanup(logger_patcher.stop) self.logger_mock = logger_patcher.start() @@ -55,15 +59,20 @@ class TestObservationControlHandler(unittest.TestCase): self.observation_control_handler._abort_observation_task(self.sas_id) self.fabric_run_mock.assert_any_call( - "ps -p %s --no-heading -o command | awk -F[{}] '{ print $2; }'" % self.pid1) + "ps -p %s --no-heading -o command | awk -F[{}] '{ printf $2; }'" % self.pid1) self.fabric_run_mock.assert_any_call( - "ps -p %s --no-heading -o command | awk -F[{}] '{ print $2; }'" % self.pid2) + "ps -p %s --no-heading -o command | awk -F[{}] '{ printf $2; }'" % self.pid2) def test_abort_observation_task_should_run_kill_when_sas_id_matches(self): self.observation_control_handler._abort_observation_task(self.sas_id) self.fabric_run_mock.assert_any_call('kill -SIGINT %s' % self.pid1) + def test_abort_observation_should_set_run_settings_with_warn_only_as_true(self): + self.observation_control_handler._abort_observation_task(self.sas_id) + + self.fabric_settings_mock.assert_called_with(warn_only=True) + @mock.patch.dict(os.environ, {'LOFARENV': 'TEST'}) def test_observation_control_should_select_test_host_if_lofar_environment_is_test(self): ObservationControlHandler() diff --git a/QA/CMakeLists.txt b/QA/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b17b024a033265167696087c70ebeab3a0ddd00 --- /dev/null +++ b/QA/CMakeLists.txt @@ -0,0 +1,21 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# $Id$ + +lofar_add_package(QA_Common) +lofar_add_package(QA_Service) diff --git a/QA/QA_Common/CMakeLists.txt b/QA/QA_Common/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5a4b1543df5a355492bffc8dc6407d8d1d89efa3 --- /dev/null +++ b/QA/QA_Common/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# $Id$ + +lofar_package(QA_Common 1.0 DEPENDS pyparameterset PyCommon) + +include(PythonInstall) + +add_subdirectory(lib) +add_subdirectory(bin) +add_subdirectory(test) diff --git a/QA/QA_Common/bin/CMakeLists.txt b/QA/QA_Common/bin/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c72417466da349a4ec956415b4a2ac11ee0844ad --- /dev/null +++ b/QA/QA_Common/bin/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# $Id$ + +lofar_add_bin_scripts(show_hdf5_info + find_hdf5) + diff --git a/QA/QA_Common/bin/find_hdf5 b/QA/QA_Common/bin/find_hdf5 new file mode 100755 index 0000000000000000000000000000000000000000..e5fe0540bda0cea0d7a245a06d3e886d36a4ac80 --- /dev/null +++ b/QA/QA_Common/bin/find_hdf5 @@ -0,0 +1,199 @@ +#!/usr/bin/env python + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +if __name__ == '__main__': + import logging + logger = logging.getLogger(__name__) + + import os + import os.path + import fnmatch + import glob + from optparse import OptionParser, OptionGroup + from datetime import datetime, timedelta + + from lofar.qa.hdf5_io import * + + # make sure we run in UTC timezone + os.environ['TZ'] = 'UTC' + + # Check the invocation arguments + parser = OptionParser(usage='find_hdf5 [options] <path_or_current_dir_if_omitted>', + description='find all h5 files in <path> matching the given filter options.') + + group = OptionGroup(parser, "Type", "Filter by observation/pipeline type. If all type options are omitted, then all types are selected") + group.add_option('-o', '--observation', dest='observation', action='store_true', default=False, + help='find observations. default: %default') + group.add_option('-p', '--pipeline', dest='pipeline', action='store_true', default=False, + help='find pipelines. default: %default') + parser.add_option_group(group) + + group = OptionGroup(parser, "Antenna", "Filter by antenna type (LBA/HBA). If all antenna options are omitted, then all types are selected. If an antenna option is given, then only observations are selected.") + group.add_option('--lba', dest='lba', action='store_true', default=False, + help='find LBA observations. default: %default') + group.add_option('--hba', dest='hba', action='store_true', default=False, + help='find HBA observations. default: %default') + parser.add_option_group(group) + + group = OptionGroup(parser, "Name/Project", "Filter by observation/project name. Wildcards are allowed.") + group.add_option('--name', dest='name', type='string', default=None, + help='find by observation name (use quotes when using wildcards). default: %default') + group.add_option('--project', dest='project', type='string', default=None, + help='find by project name/description (use quotes when using wildcards). default: %default') + parser.add_option_group(group) + + group = OptionGroup(parser, "Date/Duration", "Filter by starttime/endtime date and/or duration.") + group.add_option('-d', '--date', dest='date', type='string', default=None, + help='find by observations/pipelines by date (YYYY-MM-DD). default: %default') + group.add_option('--min_duration', dest='min_duration', type='string', default=None, + help='find by observations/pipelines which are at least this duration long (HH:MM). default: %default') + group.add_option('--max_duration', dest='max_duration', type='string', default=None, + help='find by observations/pipelines which are at most this duration long (HH:MM). default: %default') + parser.add_option_group(group) + + group = OptionGroup(parser, "Clusters", "Filter by cluster options.") + group.add_option('-c', '--clusters', dest='clusters', action="store_true", default=False, + help='find clustered h5 files. default: %default') + group.add_option('-C', '--no_clusters', dest='no_clusters', action="store_true", default=False, + help='find non-clustered h5 files. default: %default') + parser.add_option_group(group) + + group = OptionGroup(parser, "Output/Display", "Output format and display options. Display list of matching files by default.") + group.add_option('-i', '--info', dest='info', action='store_true', default=False, + help='show info for each matching file. default: %default') + group.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, + help='verbose logging. default: %default') + parser.add_option_group(group) + + (options, args) = parser.parse_args() + + logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', + level=logging.DEBUG if options.verbose else logging.WARN) + + # parse the options; if no specific filter for either obs or pipeline was given, then search for both. + if not options.observation and not options.pipeline: + options.observation = True + options.pipeline = True + + # parse the options; if no specific filter for either lba or hba was given, then search for both. + if options.lba or options.hba: + options.observation = True + options.pipeline = False + + path = os.path.dirname(os.path.expanduser(args[0]) if len(args) == 1 else os.getcwd()) + + files = glob.glob(os.path.join(path, '*.h*5')) + + if path == os.getcwd(): + files = [os.path.basename(file) for file in files] + + files = sorted(files) + + info_dicts = {} + + # gather all info_dicts for all files... + for file in files: + try: + info_dict = read_info_dict(file) + if info_dict: + info_dicts[file] = info_dict + except: + pass + + # ...and filter out the files that do not match the search filters + + if not (options.observation and options.pipeline): + if options.observation: + # keep observations + files = [f for f in files + if f in info_dicts and 'observation' in info_dicts[f].get('type', '').lower()] + + if options.pipeline: + # keep pipelines + files = [f for f in files + if f in info_dicts and 'pipeline' in info_dicts[f].get('type', '').lower()] + + + if not (options.lba and options.hba): + if options.lba: + # keep lba + files = [f for f in files + if f in info_dicts and 'lba' in info_dicts[f].get('antenna_array', '').lower()] + + if options.hba: + # keep hba + files = [f for f in files + if f in info_dicts and 'hba' in info_dicts[f].get('antenna_array', '').lower()] + + if options.name: + # keep matching task names + files = [f for f in files if f in info_dicts and + fnmatch.fnmatch(info_dicts[f].get('name', '').lower(), options.name.lower())] + + if options.project: + # keep matching project names + files = [f for f in files if f in info_dicts and + (fnmatch.fnmatch(info_dicts[f].get('project', '').lower(), options.project.lower()) or + fnmatch.fnmatch(info_dicts[f].get('project_description', '').lower(), options.project.lower()))] + + if options.date: + # keep matching date + options.date = datetime.strptime(options.date, '%Y-%m-%d').date() + files = [f for f in files if f in info_dicts and + 'start_time' in info_dicts[f] and info_dicts[f]['start_time'].date() == options.date] + + if options.min_duration: + # keep matching duration + hours, sep, minutes = options.min_duration.partition(':') + options.min_duration = timedelta(hours=int(hours), minutes=int(minutes)) + files = [f for f in files if f in info_dicts and + 'stop_time' in info_dicts[f] and info_dicts[f]['stop_time'].date() == options.date] + + if options.max_duration: + # keep matching duration + hours, sep, minutes = options.max_duration.partition(':') + options.max_duration = timedelta(hours=int(hours), minutes=int(minutes)) + files = [f for f in files + if f in info_dicts and info_dicts[f].get('duration', timedelta()) <= options.max_duration] + + if options.clusters or options.no_clusters: + # keep matching have/havenot clusters + def has_clusters(h5_path): + with h5py.File(h5_path, "r+") as file: + return len(file.get('clustering',{})) + + if options.clusters: + files = [f for f in files if has_clusters(f)] + + if options.no_clusters: + files = [f for f in files if not has_clusters(f)] + + # the final files list now contains only the files matching all given filters. + + # lastly, print the results... + if options.info: + # print the full file info + for file in files: + print read_info_from_hdf5(file, read_data_info=False) + else: + # just print the filtered filenames + print '\n'.join(files) + + + diff --git a/QA/QA_Common/bin/show_hdf5_info b/QA/QA_Common/bin/show_hdf5_info new file mode 100755 index 0000000000000000000000000000000000000000..b1ea563d55ea610cff6e9d5caf3fae79fcb21324 --- /dev/null +++ b/QA/QA_Common/bin/show_hdf5_info @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +if __name__ == '__main__': + import logging + logger = logging.getLogger(__name__) + + import os + import os.path + from optparse import OptionParser + + from lofar.qa.hdf5_io import * + + # make sure we run in UTC timezone + os.environ['TZ'] = 'UTC' + + # Check the invocation arguments + parser = OptionParser(usage='show_hdf5_info <input_MS_extract_hdf5_file> [options]', + description='show the meta data for the given MS_extract hdf5 file.') + parser.add_option('-d', '--data', dest='data', action='store_true', default=False, help='show data info (SAPs, #baselines, #subbands, #timeslots etc). (warning, slow!) default: %default') + + (options, args) = parser.parse_args() + + if len(args) != 1: + print parser.print_help() + exit(-1) + + logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', + level=logging.WARN) + + hdf_path = os.path.expanduser(args[0]) + + print read_info_from_hdf5(hdf_path, read_data_info=options.data, read_parset_info=True) diff --git a/QA/QA_Common/lib/CMakeLists.txt b/QA/QA_Common/lib/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..01be7c1d72e33506e1a92e13b64096d1a8b54c93 --- /dev/null +++ b/QA/QA_Common/lib/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# $Id$ + +python_install( + __init__.py + cep4_utils.py + hdf5_io.py + geoconversions.py + DESTINATION lofar/qa) + diff --git a/QA/QA_Common/lib/__init__.py b/QA/QA_Common/lib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6248f495c54e31977e3f61b2c86ebb9600501d21 --- /dev/null +++ b/QA/QA_Common/lib/__init__.py @@ -0,0 +1,16 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. diff --git a/QA/QA_Common/lib/cep4_utils.py b/QA/QA_Common/lib/cep4_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..66c4c6d44b190f0592084b7475cda16d3d9b8990 --- /dev/null +++ b/QA/QA_Common/lib/cep4_utils.py @@ -0,0 +1,209 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +from subprocess import check_output, Popen, PIPE +from random import randint + +import logging +logger = logging.getLogger(__name__) + +CEP4_HEAD_NODE = 'head.cep4.control.lofar' +LOFARSYS_AT_CEP4_HEAD_NODE = 'lofarsys@%s' % (CEP4_HEAD_NODE,) + +def ssh_cmd_list(host): + """ + returns a subprocess compliant command list to do an ssh call to the given node + uses ssh option -tt to force remote pseudo terminal + uses ssh option -q for ssh quiet mode (no ssh warnings/errors) + uses ssh option -o StrictHostKeyChecking=no to prevent prompts about host keys + :param host: the node name or ip address + :return: a subprocess compliant command list + """ + return ['ssh', '-T', '-q', '-o StrictHostKeyChecking=no', host] + + +def wrap_command_in_cep4_head_node_ssh_call(cmd): + """wrap the command in an ssh call to head.cep4 + :param list cmd: a subprocess cmd list + cpu node. Otherwise, the command is executed on the head node. + :return: the same subprocess cmd list, but then wrapped with cep4 ssh calls + """ + return ssh_cmd_list(LOFARSYS_AT_CEP4_HEAD_NODE) + cmd + +def wrap_command_in_cep4_random_cpu_node_ssh_call(cmd, via_head=True): + """wrap the command in an ssh call an available random cep4 cpu node (via head.cep4) + :param list cmd: a subprocess cmd list + :param bool via_head: when True, route the cmd first via the cep4 head node + :return: the same subprocess cmd list, but then wrapped with cep4 ssh calls + """ + # pick a random available cpu node + node_nrs = get_cep4_available_cpu_nodes() + node_nr = node_nrs[randint(0, len(node_nrs)-1)] + return wrap_command_in_cep4_cpu_node_ssh_call(cmd, node_nr, via_head=via_head) + +def wrap_command_in_cep4_available_cpu_node_with_lowest_load_ssh_call(cmd, via_head=True): + """wrap the command in an ssh call to the available random cep4 cpu node with the lowest load (via head.cep4) + :param list cmd: a subprocess cmd list + :param bool via_head: when True, route the cmd first via the cep4 head node + :return: the same subprocess cmd list, but then wrapped with cep4 ssh calls + """ + lowest_load_node_nr = get_cep4_available_cpu_node_with_lowest_load() + return wrap_command_in_cep4_cpu_node_ssh_call(cmd, lowest_load_node_nr, via_head=via_head) + +def wrap_command_in_cep4_cpu_node_ssh_call(cmd, cpu_node_nr, via_head=True): + """wrap the command in an ssh call the given cep4 cpu node (via head.cep4) + :param list cmd: a subprocess cmd list + :param int cpu_node_nr: the number of the cpu node where to execute the command + :param bool via_head: when True, route the cmd first via the cep4 head node + :return: the same subprocess cmd list, but then wrapped with cep4 ssh calls + """ + # hard-coded cpu-node hostname. Might change for future clusters or cluster upgrades. + lofarsys_at_cpu_node = 'lofarsys@cpu%02d.cep4' % (cpu_node_nr,) + remote_cmd = ssh_cmd_list(lofarsys_at_cpu_node) + cmd + if via_head: + return wrap_command_in_cep4_head_node_ssh_call(remote_cmd) + else: + return remote_cmd + +def wrap_command_for_docker(cmd, image_name, image_label=''): + """wrap the command to be run in a docker container for the lofarsys user and environment + :param list cmd: a subprocess cmd list + :param string image_name: the name of the docker image to run + :param string image_label: the optional label of the docker image to run + :return: the same subprocess cmd list, but then wrapped with docker calls + """ + #fetch the lofarsys user id and group id first from the cep4 head node + id_string = '%s:%s' % (check_output(wrap_command_in_cep4_head_node_ssh_call(['id', '-u'])).strip(), + check_output(wrap_command_in_cep4_head_node_ssh_call(['id', '-g'])).strip()) + + #return the docker run command for the lofarsys user and environment + return ['docker', 'run', '--rm', '--net=host', '-v', '/data:/data', + '-u', id_string, + '-v', '/etc/passwd:/etc/passwd:ro', + '-v', '/etc/group:/etc/group:ro', + '-v', '$HOME:$HOME', + '-e', 'HOME=$HOME', + '-e', 'USER=$USER', + '-w', '$HOME', + '%s:%s' % (image_name, image_label) if image_label else image_name] + cmd + +def get_cep4_available_cpu_nodes(): + """ + get a list of cep4 cpu nodes which are currently up and running according to slurm + :return: a list of cpu node numbers (ints) for the up and running cpu nodes + """ + available_cep4_nodes = [] + + try: + logger.debug('determining available cep4 cpu nodes') + + # find out which nodes are available + cmd = ['sinfo -p cpu -t idle,mix'] + cmd = wrap_command_in_cep4_head_node_ssh_call(cmd) + + logger.debug('executing command: %s', ' '.join(cmd)) + out = check_output(cmd) + lines = out.split('\n') + for state in ['idle', 'mix']: + try: + line = next(l for l in lines if state in l).strip() + # get nodes string part of line: + nodes_part = line.split(' ')[-1] + if '[' in nodes_part: + # example: line='cpu* up infinite 42 mix cpu[01-17,23-47]' + # then: nodes='01-17,23-47' + nodes = nodes_part[4:-1] + for part in nodes.split(','): + if '-' in part: + lower, sep, upper = part.partition('-') + available_cep4_nodes += list(range(int(lower), int(upper) + 1)) + else: + available_cep4_nodes.append(int(part)) + else: + # example: line='cpu* up infinite 42 mix cpu01' + # then: nodes='01' + node = int(nodes_part[3:]) + available_cep4_nodes += [node] + + except StopIteration: + pass # no line with state in line + + except Exception as e: + logger.exception(e) + + available_cep4_nodes = sorted(list(set(available_cep4_nodes))) + logger.debug('available cep4 cpu nodes: %s', ','.join(str(x) for x in available_cep4_nodes)) + if not available_cep4_nodes: + logger.warning('no cep4 cpu nodes available') + + return available_cep4_nodes + +def get_cep4_cpu_nodes_loads(node_nrs=None): + """ + get the 5min load for each given cep4 cpu node nr + :param node_nrs: optional list of node numbers to get the load for. If None, then all available nodes are queried. + :return: dict with node_nr -> load mapping + """ + if node_nrs is None: + node_nrs = get_cep4_available_cpu_nodes() + + procs = {} + loads = {} + logger.debug('getting 5min load for cep4 cpu nodes %s', ', '.join((str(x) for x in node_nrs))) + # spawn load commands in parallel + for node_nr in node_nrs: + load_cmd = ['cat', '/proc/loadavg', '|', 'awk', "'{print $2}'"] + node_load_cmd = wrap_command_in_cep4_cpu_node_ssh_call(load_cmd, node_nr, via_head=True) + logger.debug('executing command: %s', ' '.join(node_load_cmd)) + + proc = Popen(node_load_cmd, stdout=PIPE, stderr=PIPE) + procs[node_nr] = proc + + # wait for procs to finish, and try to parse the resulting load value + for node_nr, proc in procs.items(): + out, err = proc.communicate() + try: + load = float(out.strip()) + except: + load = 1e10 + loads[node_nr] = load + + logger.debug('5min loads for cep4 cpu nodes: %s', loads) + return loads + +def get_cep4_available_cpu_nodes_sorted_ascending_by_load(): + """ + get the cep4 available cpu node numbers sorted ascending by load (5min). + :return: sorted list of node numbers. + """ + node_nrs = get_cep4_available_cpu_nodes() + loads = get_cep4_cpu_nodes_loads(node_nrs) + sorted_loads = sorted(loads.items(), key=lambda x: x[1]) + sorted_node_nrs = [item[0] for item in sorted_loads] + logger.debug('cep4 cpu nodes sorted (asc) by load: %s', sorted_node_nrs) + return sorted_node_nrs + +def get_cep4_available_cpu_node_with_lowest_load(): + """ + get the cep4 cpu node which is available and has the lowest (5min) load of them all. + :return: the node number (int) with the lowest load. + """ + node_nrs = get_cep4_available_cpu_nodes_sorted_ascending_by_load() + if node_nrs: + logger.debug('cep4 cpu node with lowest load: %s', node_nrs[0]) + return node_nrs[0] + return None diff --git a/QA/QA_Common/lib/geoconversions.py b/QA/QA_Common/lib/geoconversions.py new file mode 100644 index 0000000000000000000000000000000000000000..7c93b4a08d9438e98ed59d231dda418832187222 --- /dev/null +++ b/QA/QA_Common/lib/geoconversions.py @@ -0,0 +1,134 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +from numpy import sqrt, sin, cos, arctan2, array, cross, dot, ones +from numpy.linalg.linalg import norm +from scipy.interpolate import Rbf # Radial basis function interpolation. +from numpy.linalg import lstsq + +__all__ = ['geographic_from_xyz', 'pqr_cs002_from_xyz'] + + +def normalized_earth_radius(latitude_rad): + wgs84_f = 1./298.257223563 + return 1.0/sqrt(cos(latitude_rad)**2 + ((1.0 - wgs84_f)**2)*(sin(latitude_rad)**2)) + + +def geographic_from_xyz(xyz_m): + """ + convert xyz coordinates to wgs84 coordinates + :param xyz_m: 1D array/list/tuple of x,y,z in meters + :return: tuple of lat_rad, lon_rad, height_m + """ + wgs84_a = 6378137.0 + wgs84_f = 1./298.257223563 + wgs84_e2 = wgs84_f*(2.0 - wgs84_f) + + x_m, y_m, z_m = xyz_m + lon_rad = arctan2(y_m, x_m) + r_m = sqrt(x_m**2 + y_m**2) + # Iterate to latitude solution + phi_previous = 1e4 + phi = arctan2(z_m, r_m) + while abs(phi -phi_previous) > 1.6e-12: + phi_previous = phi + phi = arctan2(z_m + wgs84_e2*wgs84_a*normalized_earth_radius(phi)*sin(phi), + r_m) + lat_rad = phi + height_m = r_m*cos(lat_rad) + z_m*sin(lat_rad) - wgs84_a*sqrt(1.0 - wgs84_e2*sin(lat_rad)**2) + return lat_rad, lon_rad, height_m + + +def xyz_from_geographic(lon_rad, lat_rad, height_m): + c = normalized_earth_radius(lat_rad) + wgs84_f = 1./298.257223563 + wgs84_a = 6378137.0 + s = c*((1 - wgs84_f)**2) + return array([ + ((wgs84_a*c) + height_m)*cos(lat_rad)*cos(lon_rad), + ((wgs84_a*c) + height_m)*cos(lat_rad)*sin(lon_rad), + ((wgs84_a*s) + height_m)*sin(lat_rad)]) + + + +def normal_vector_ellipsoid(lon_rad, lat_rad): + return array([cos(lat_rad)*cos(lon_rad), + cos(lat_rad)*sin(lon_rad), + sin(lat_rad)]) + +def normal_vector_meridian_plane(xyz_m): + x_m, y_m, _ = xyz_m + return array([y_m, -x_m, 0.0])/sqrt(x_m**2 + y_m**2) + +def projection_matrix(xyz0_m, normal_vector): + r_unit = normal_vector + meridian_normal = normal_vector_meridian_plane(xyz0_m) + q_unit = cross(meridian_normal, r_unit) + q_unit /= norm(q_unit) + p_unit = cross(q_unit, r_unit) + p_unit /= norm(p_unit) + return array([p_unit, q_unit, r_unit]).T + +def transform(xyz_m, xyz0_m, mat): + offsets = xyz_m - xyz0_m + return array([dot(mat, offset) for offset in offsets]) + +LOFAR_XYZ0_m = array([3826574.0, 461045.0, 5064894.5]) +LOFAR_REF_MERIDIAN_NORMAL = normal_vector_meridian_plane(LOFAR_XYZ0_m) +LOFAR_PQR_TO_ETRS_MATRIX = array([[ -1.19595105e-01, -7.91954452e-01, 5.98753002e-01], + [ 9.92822748e-01, -9.54186800e-02, 7.20990002e-02], + [ 3.30969000e-05, 6.03078288e-01, 7.97682002e-01]]) + + + +def pqr_from_xyz(xyz_m, xyz0_m=LOFAR_XYZ0_m, matrix=LOFAR_PQR_TO_ETRS_MATRIX): + return transform(xyz_m, xyz0_m, matrix.T) + +def interpolation_function(pqr): + """ + Return an interpolation function fn(x, y, z), which returns the value at x, y. + """ + rbfi = Rbf(pqr[:,0], pqr[:,1], 0.0*pqr[:,2], pqr[:,2], function='linear') + def interpolator(x_m, y_m): + return rbfi(x_m, y_m, y_m*0.0) + return interpolator + + +def fit_plane(xyz): + # data_model z = ax +by +c + # M colvec(a, b, c) = colvec(z) + # M row i = (x_i, y_i, 1.0) + mean_position = xyz.mean(axis=0) + + mat = array([xyz[:,0]- mean_position[0], + xyz[:,1]- mean_position[1], + ones(len(xyz[:,2]))]).T + a, b, c = lstsq(mat, xyz[:,2] - mean_position[2])[0] + normal_vector = array([-a, -b, 1.0]) + normal_vector /= norm(normal_vector) + return {'mean': mean_position, 'normal': normal_vector} + + +def pqr_cs002_from_xyz(xyz_m): + """ + convert xyz coordinates to lofar pqr coordinates with origin in CS002 + :param xyz_m: 1D array/list/tuple of x,y,z in meters + :return: tuple of pqr coords in meters + """ + pqr = pqr_from_xyz(array([xyz_m]), + xyz0_m=array([ 3826577.462, 461022.624, 5064892.526])) + return pqr[0][0], pqr[0][1], pqr[0][2] diff --git a/QA/QA_Common/lib/hdf5_io.py b/QA/QA_Common/lib/hdf5_io.py new file mode 100644 index 0000000000000000000000000000000000000000..c405077ac7828748462b5fcf48d96190c6c54e9b --- /dev/null +++ b/QA/QA_Common/lib/hdf5_io.py @@ -0,0 +1,1160 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# TODO: refactor large functions into collections of smaller function calls and isolate behaviour. + +"""Module hdf5_io offers various methods to read/write/modify hdf5 files containing lofar measurement data. +Such an h5 file is usually generated from Lofar Measurement Sets (MS/casacore format) using the ms2hdf5 conversion tool. + +Since the data is stored in hdf (hierchical data format) and we use python, it makes sense that we use (nested) dicts as data holders. +The file contents is as follows: +- TODO + +External developers using this api whill primarily use the read_hypercube. +If you would like to do your own clustering, then use write_clusters and read_clusters as well. + +:Example: + + from lofar.qa.hdf5_io import * + + # read the data + h5_path = '/my/path/to/myfile.h5' + data = read_hypercube(h5_path, visibilities_in_dB=True, python_datetimes=False, read_flagging=False) + + # do your own processing, for example make clusters (see write_clusters for dict format) + my_clusters = .... #results of your algorithm + + # write your clusters into the same h5 file + # in this case they are stored under 'my_fancy_clustering_attempt_1', and a 'latest' symlink is made to these clustering results. + # multiple clustering results can all be stored in the same file, each with a different algo_name. + write_clusters(h5_path, clusters, algo_name='my_fancy_clustering_attempt_1') +""" + +import os.path +from datetime import datetime, timedelta + +import warnings +with warnings.catch_warnings(): + import h5py + import numpy as np + +import logging +logger = logging.getLogger(__name__) + + +def write_hypercube(path, saps, parset=None, sas_id=None, wsrta_id=None, do_compress=True, **kwargs): + """ + write a hypercube of visibility/flagging data for all saps of an observation/pipeline. + + :param str path: full path of the resulting h5 file. By convention we advise to use <observation_id>.MS_exctract.h5 + where observation_id is L<sas_id> for lofar and WSRTA<wsrta_id> for apertif + :param dict saps: each key is the id of a sap, and holds per sap a dict with the following key/value pairs: + + baselines: [string], list of stationpairs (tuples) (these are the ticks on the baseline axis of the visibilities) + + timestamps: [np.double], list of Modified Julian Date (these are the ticks on the time axis of the visibilities) + + central_frequencies: [np.double], list of central frequencies of the subbands (these are the ticks on the frequency axis of the visibilities) + + subbands: [np.int], list of subbands numbers (each subband has a corresponding central_frequency) + + polarizations: [string], list of polarization, one up to four, any of 'XX', 'XY', 'YX', 'YY' + + visibilities: numpy.array, the 4D array of visibilities. In the file these are reduced from doubles to chars by taking the 10.log10 and normalizing the result to fit in the [-128..127] range. + + flagging: numpy.array, the 4D array of flagging booleans. + :param parameterset parset: the optional paramaterset with all the settings which were used for this observation/pipeline + :param int sas_id: the optional observation/pipeline sas_id (the main id to track lofar observations/pipelines) + :param int wsrta_id: the optional observation wsrta_id (the main id to track wsrt apertif observations) + :param bool do_compress: compress the visibilities and flagging data (with lzf compression, slower but smaller output size) + :param dict kwargs: optional extra arguments + :return None + """ + logger.info('writing hypercube to file: %s', path) + + save_dir = os.path.dirname(path) + if not os.path.isabs(save_dir): + save_dir = os.path.join(os.getcwd(), save_dir) + + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + with h5py.File(path, "w") as file: + version = '1.2' # 1.1 -> 1.2 change is not backwards compatible by design. + ds = file.create_dataset('version', (1,), h5py.special_dtype(vlen=str), version) + ds.attrs['description'] = 'version of this hdf5 MS extract file' + + measurement_group = file.create_group('measurement') + measurement_group.attrs['description'] = 'all data (visibilities, flagging, parset, ...) for this measurement (observation/pipeline)' + + if parset is not None: + parset_str = str(parset) + ds = file.create_dataset('measurement/parset', (1,), h5py.special_dtype(vlen=str), + [parset_str], + compression="lzf") + ds.attrs['description'] = 'the parset of this observation/pipeline with all settings how this data was created' + + if sas_id is not None: + ds = file.create_dataset('measurement/sas_id', data=[sas_id]) + ds.attrs['description'] = 'lofar observation/pipeline sas id' + + if wsrta_id is not None: + ds = file.create_dataset('measurement/wsrta_id', data=[wsrta_id]) + ds.attrs['description'] = 'apertif observation wsrta id' + + saps_group = file.create_group('measurement/saps') + saps_group.attrs['description'] = 'the data (visibilities, flagging, ...) is stored per sub-array-pointing (sap)' + + for sap_nr in sorted(saps.keys()): + sap_dict = saps[sap_nr] + baselines = sap_dict['baselines'] + timestamps = sap_dict['timestamps'] + central_frequencies = sap_dict['central_frequencies'] + subbands = sap_dict['subbands'] + polarizations = sap_dict['polarizations'] + visibilities = sap_dict['visibilities'] + flagging = sap_dict['flagging'] + antenna_locations = sap_dict.get('antenna_locations') + + sap_group = file.create_group('measurement/saps/%d' % sap_nr) + ds = sap_group.create_dataset('polarizations', (len(polarizations),), h5py.special_dtype(vlen=str), polarizations) + ds.attrs['description'] = 'polarizations of the visibilities' + + ds = sap_group.create_dataset('baselines', (len(baselines),2), h5py.special_dtype(vlen=str), + [[str(bl[0]), str(bl[1])] for bl in baselines]) + ds.attrs['description'] = 'pairs of baselines between stations' + + if any(isinstance(t, datetime) for t in timestamps): + # try to import lofar.common.datetimeutils here and not at the top of the file + # to make this hdf5_io module as loosly coupled to other lofar code as possible + # do raise the possible ImportError, because we cannot proceed without converted datetimes. + from lofar.common.datetimeutils import to_modified_julian_date_in_seconds + timestamps = [to_modified_julian_date_in_seconds(t) if isinstance(t, datetime) else t for t in timestamps] + + ds = sap_group.create_dataset('timestamps', data=timestamps) + ds.attrs['units'] = 'modified julian date, (fractional) seconds since epoch 1858-11-17 00:00:00' + + ds = sap_group.create_dataset('central_frequencies', data=central_frequencies) + ds.attrs['units'] = 'Hz' + + ds = sap_group.create_dataset('subbands', data=subbands) + ds.attrs['description'] = 'subband number' + + if antenna_locations: + location_group = sap_group.create_group('antenna_locations') + location_group.attrs['description'] = 'the antenna locations in XYZ, PQR, WGS84 coordinates (units: meters and/or radians)' + + for ref_frame in ['XYZ', 'PQR', 'WGS84']: + location_sub_group = location_group.create_group(ref_frame) + location_sub_group.attrs['description'] = 'the antenna locations in %s coordinates (units: meters and/or radians)' % (ref_frame,) + + for antenna, location in antenna_locations[ref_frame].items(): + location_sub_group.create_dataset(antenna, data=location) + + logger.info("""flagging NaN's and zero's in visibilities for file %s""", path) + zero_or_nan = np.absolute(visibilities) == 0.0 + zero_or_nan[np.isnan(visibilities)] = True + flagging[zero_or_nan] = True + + #we'll scale the 10log10(visibilities) so the complex-float can be mapped onto 2*int8 + logger.info('normalizing visibilities for file %s', path) + #remove any NaN and/or 0 values in the visibilities? log(0) or log(nan) crashes, + # so determine smallest non-zero abs value, and fill that in for the flagged visibilities + try: + abs_non_zero_or_nan_visibilities = np.abs(visibilities)[zero_or_nan == False] + min_non_zero_or_nan_abs_value = max(1e-9, np.min(abs_non_zero_or_nan_visibilities)) + del abs_non_zero_or_nan_visibilities + except ValueError: + min_non_zero_or_nan_abs_value = 1e-12 + + # overwrite all visibilities values where flagging (or 0's or NaN's) occur with the min_non_flagged_value + # that enables us to take the log, and have good dynamic range when scaling to -128...127 + visibilities[zero_or_nan] = min_non_zero_or_nan_abs_value + del zero_or_nan + + visibilities_dB = 10.0*np.log10(visibilities) + abs_vis_dB = np.absolute(visibilities_dB) + + #compute scale factor per subband to map the visibilities_dB per subband from complex64 to 2xint8 + scale_factors = np.empty((len(subbands),), dtype=np.float32) + for sb_nr in range(len(subbands)): + #use 99.9 percentile instead if max to get rid of spikes + max_abs_vis_sb = np.percentile(abs_vis_dB[:,:,sb_nr,:], 99.9) + scale_factor = 127.0 / max_abs_vis_sb + scale_factors[sb_nr] = 1.0/scale_factor + + ds = sap_group.create_dataset('visibility_scale_factors', data=scale_factors) + ds.attrs['description'] = 'multiply real and imag parts of visibilities with this factor per subband to un-normalize them and get the 10log10 values of the real and imag parts of the visibilities' + ds.attrs['units'] = '-' + + #create a array with one extra dimension, so we can split the complex value into two scaled int8's for real and imag part + #looping in python is not the most cpu efficient way + #but is saves us extra copies of the large visibilities array, which might not fit in memory? + logger.info('converting visibilities from complexfloat to 2xint8 for file %s', path) + extended_shape = visibilities_dB.shape[:] + (2,) + scaled_visibilities = np.empty(extended_shape, dtype=np.int8) + for sb_nr in range(len(subbands)): + scale_factor = 1.0 / scale_factors[sb_nr] + scaled_visibilities[:,:,sb_nr,:,0] = scale_factor*visibilities_dB[:,:,sb_nr,:].real + scaled_visibilities[:,:,sb_nr,:,1] = scale_factor*visibilities_dB[:,:,sb_nr,:].imag + + logger.info('reduced visibilities size from %s to %s bytes (factor %s)', + visibilities.nbytes, scaled_visibilities.nbytes, visibilities.nbytes/scaled_visibilities.nbytes) + + ds = sap_group.create_dataset('visibilities', data=scaled_visibilities, + compression="lzf" if do_compress else None) + ds.attrs['units'] = 'normalized dB within [-128..127]' + ds.attrs['dim[0]'] = 'baselines' + ds.attrs['dim[1]'] = 'timestamps' + ds.attrs['dim[2]'] = 'central_frequencies & subbands' + ds.attrs['dim[3]'] = 'polarizations' + ds.attrs['dim[4]'] = 'real part of normalized within [-128..127] 10log10(visibilities)' + ds.attrs['dim[5]'] = 'imag part of normalized within [-128..127] 10log10(visibilities)' + + ds = sap_group.create_dataset('flagging', data=flagging, + compression="lzf" if do_compress else None) + ds.attrs['units'] = 'bool (true=flagged)' + ds.attrs['dim[0]'] = 'baselines' + ds.attrs['dim[1]'] = 'timestamps' + ds.attrs['dim[2]'] = 'central_frequencies & subbands' + ds.attrs['dim[3]'] = 'polarizations' + ds.attrs['dim[4]'] = 'flagging values' + + if parset is not None: + fill_info_folder_from_parset(path) + + try: + # try to import the lofar.common.util.humanreadablesize here and not at the top of the file + # to make this hdf5_io module as loosly coupled to other lofar code as possible + from lofar.common.util import humanreadablesize + logger.info('finished writing %s hypercube to file: %s', humanreadablesize(os.path.getsize(path)), path) + except ImportError: + logger.info('finished writing hypercube to file: %s', path) + +def read_sap_numbers(path): + """ + read the sap numbers (keys) from the hypercube data from the hdf5 hypercube file given by path. + :param str path: path to the hdf5 file you want to read + :return list: list of sap numbers + """ + logger.info('reading sap numbers from from file: %s', path) + + with h5py.File(path, "r") as file: + version_str = file['version'][0] + + if version_str != '1.2': + raise ValueError('Cannot read version %s' % (version_str,)) + + return sorted([int(sap_nr) for sap_nr in file['measurement/saps'].keys()]) + +def read_hypercube(path, visibilities_in_dB=True, python_datetimes=False, read_visibilities=True, read_flagging=True, saps_to_read=None): + """ + read the hypercube data from the hdf5 hypercube file given by path. + + :param str path: path to the hdf5 file you want to read + :param bool visibilities_in_dB: return the in dB scale, or linear scale. + :param bool python_datetimes: return the timestamps as python datetime's when True (otherwise modified_julian_date/double) + :param bool read_visibilities: do/don't read visibilities (can save read-time and memory usage) + :param bool read_flagging: do/don't read flagging (can save read-time and memory usage) + :param list saps_to_read: only read these given SAPs (can save read-time and memory usage) + :return dict: same dict structure as in write_hypercube, parameter saps. + seealso:: write_hypercube + """ + logger.info('reading hypercube from file: %s', path) + + with h5py.File(path, "r") as file: + result = {} + + version_str = file['version'][0] + + if version_str != '1.2': + raise ValueError('Cannot read version %s' % (version_str,)) + + if 'measurement/parset' in file: + parset = read_hypercube_parset(path) + if parset: + result['parset'] = parset + + if 'measurement/sas_id' in file: + result['sas_id'] = file['measurement/sas_id'][0] + + if 'measurement/wsrta_id' in file: + result['wsrta_id'] = file['measurement/wsrta_id'][0] + + result['saps'] = {} + + for sap_nr, sap_dict in file['measurement/saps'].items(): + sap_nr = int(sap_nr) + if saps_to_read and sap_nr not in saps_to_read: + continue + + sap_result = {} + result['saps'][sap_nr] = sap_result + + polarizations = list(sap_dict['polarizations']) + sap_result['polarizations'] = polarizations + + baselines = sap_dict['baselines'][:] + baselines = [(bl[0], bl[1]) for bl in baselines] + sap_result['baselines'] = baselines + + timestamps = sap_dict['timestamps'][:] + if python_datetimes: + try: + # try to import lofar.common.datetimeutils here and not at the top of the file + # to make this hdf5_io module as loosly coupled to other lofar code as possible + from lofar.common.datetimeutils import from_modified_julian_date_in_seconds + timestamps = [from_modified_julian_date_in_seconds(t) for t in timestamps] + except ImportError as e: + logger.warning("Could not convert timestamps from modified julian date to python datetimes.") + + sap_result['timestamps'] = timestamps + + central_frequencies = sap_dict['central_frequencies'][:] + sap_result['central_frequencies'] = central_frequencies + + subbands = sap_dict['subbands'][:] + sap_result['subbands'] = subbands + + sap_result['antenna_locations'] = {} + if 'antenna_locations' in sap_dict: + location_group = sap_dict['antenna_locations'] + for ref_frame, location_sub_group in location_group.items(): + sap_result['antenna_locations'][ref_frame] = {} + for antenna, location in location_sub_group.items(): + sap_result['antenna_locations'][ref_frame][antenna] = tuple(location) + + if read_flagging: + flagging = sap_dict['flagging'][:] + sap_result['flagging'] = flagging + + if read_visibilities: + # read the visibility_scale_factors and (scaled_)visibilities + # denormalize them and convert back to complex + scale_factors = sap_dict['visibility_scale_factors'][:] + normalized_visibilities = sap_dict['visibilities'][:] + + logger.info('denormalizing and converting real/imag to complex visibilities for file sap %s in %s', sap_nr, path) + reduced_shape = normalized_visibilities.shape[:-1] + visibilities = np.empty(reduced_shape, dtype=np.complex64) + + for sb_nr, scale_factor in enumerate(scale_factors): + visibilities[:,:,sb_nr,:].real = scale_factor*normalized_visibilities[:,:,sb_nr,:,0] + visibilities[:,:,sb_nr,:].imag = scale_factor*normalized_visibilities[:,:,sb_nr,:,1] + + if not visibilities_in_dB: + logger.info('converting visibilities from dB to raw for file sap %s in %s', sap_nr, path) + visibilities = np.power(10, 0.1*visibilities) + + #HACK: explicitely set non-XX-polarizations to 0 for apertif + if 'measurement/wsrta_id' in file: + visibilities[:,:,:,1:] = 0 + + if 'flagging' in sap_result: + #explicitely set flagged visibilities to 0 + visibilities[sap_result['flagging']] = 0.0 + + sap_result['visibilities'] = visibilities + sap_result['visibilities_in_dB'] = visibilities_in_dB + + antennae = set([bl[0] for bl in sap_result['baselines']] + [bl[1] for bl in sap_result['baselines']]) + + logger.info('sap: %s, #subbands: %s, #timestamps: %s, #baselines: %s, #antennae: %s, #polarizations: %s', + sap_nr, + len(sap_result['subbands']), + len(sap_result['timestamps']), + len(sap_result['baselines']), + len(antennae), + len(sap_result['polarizations'])) + + logger.info('finished reading hypercube from file: %s', path) + + return result + +def add_parset_to_hypercube(h5_path, otdbrpc): + """ + helper method which tries to get the parset for the sas_id in the h5 file from otdb via the otdbrpc, and add it to the h5 file. + + :param str h5_path: path to the hdf5 file + :param lofar.sas.otdb.otdbrpc.OTDBRPC otdbrpc: an instance of a OTDBPC client + """ + try: + with h5py.File(h5_path, "r+") as file: + if 'measurement/parset' in file: + return + + if 'measurement/sas_id' in file: + sas_id = file['measurement/sas_id'][0] + + logger.info('trying to get the parset for sas_id %s', sas_id) + parset = otdbrpc.taskGetSpecification(otdb_id=sas_id)["specification"] + + if parset: + logger.info('adding parset for sas_id %s to %s hdf5 file', sas_id, os.path.basename(h5_path)) + parset_str = '\n'.join(['%s=%s'%(k,parset[k]) for k in sorted(parset.keys())]) + ds = file.create_dataset('measurement/parset', (1,), h5py.special_dtype(vlen=str), parset_str, + compression="lzf") + ds.attrs['description'] = 'the parset of this observation/pipeline with all settings how this data was created' + logger.info('added parset for sas_id %s to %s hdf5 file', sas_id, os.path.basename(h5_path)) + except Exception as e: + logger.error(e) + + +def read_hypercube_parset(h5_path, as_string=False): + """ + read the measurement parset from the given hdf5 hypercube file + + :param str h5_path: path to the hdf5 file + :param bool as_string: return the parset as string instead of as parameterset object if true + :return parameterset/string: the parset (as string or as parameterset) if any, else None + """ + logger.info('reading parset from %s hdf5 file', os.path.basename(h5_path)) + with h5py.File(h5_path, "r") as file: + if 'measurement/parset' in file: + parset_str = file['measurement/parset'][0] + if as_string: + return '\n'.join(sorted(line.strip() for line in parset_str.split('\n'))) + + # try to import the lofar.parameterset here and not at the top of the file + # to make this hdf5_io module as loosly coupled to other lofar code as possible + try: + from lofar.parameterset import parameterset + parset = parameterset.fromString(parset_str) + return parset + except ImportError as e: + logger.info("could not import parset because the parameterset module cannot be imported: %s", e) + +def get_observation_id_str(data): + if 'sas_id' in data: + return 'L%d' % data['sas_id'] + if 'wsrta_id' in data: + return 'WSRTA%d' % data['wsrta_id'] + return 'unknown_id' + +def get_default_h5_filename(data, timestamped_if_unknown=True): + obs_id = get_observation_id_str(data) + if 'unknown' in obs_id and timestamped_if_unknown: + return datetime.utcnow().strftime('%Y%m%d%H%M%s') + '.MS_extract.h5' + return obs_id + '.MS_extract.h5' + +def combine_hypercubes(input_paths, output_dir, output_filename=None, do_compress=True): + """ + combine list of hypercubes into one file, for example when you created many h5 file in parallel with one subband per file. + :param [str] input_paths: paths of the hdf5 files you want to read and combine + :param str output_dir: directory where to save the resulting combined h5 file + :param str output_filename: optional output filename. if None, then <get_observation_id_str>.MS_extract.h5 is used + :param bool do_compress: compress the visibilities and flagging data (with lzf compression, slower but smaller output size) + """ + input_files = [] + output_path = None + try: + input_paths = sorted(input_paths) + existing_paths = [p for p in input_paths if os.path.exists(p)] + if not existing_paths: + raise ValueError('No input h5 files with valid paths given: %s' % (', '.join(input_paths),)) + + input_files = [h5py.File(p, "r") for p in existing_paths] + + versions = set([file['version'][0] for file in input_files]) + + if len(versions) != 1: + raise ValueError('Cannot combine h5 files of multiple versions: %s' % (', '.join(versions),)) + + version_str = list(versions)[0] + + if version_str != '1.2': + raise ValueError('Cannot read version %s' % (version_str,)) + + sas_ids = set([file['measurement/sas_id'][0] for file in input_files if 'measurement/sas_id' in file]) + if len(sas_ids) > 1: + raise ValueError('Cannot combine h5 files of multiple observations with multiple sas_ids: %s' % (', '.join(sas_ids),)) + sas_id = list(sas_ids)[0] if sas_ids else None + + wsrta_ids = set([file['measurement/wsrta_id'][0] for file in input_files if 'measurement/wsrta_id' in file]) + if len(wsrta_ids) > 1: + raise ValueError('Cannot combine h5 files of multiple observations with multiple wsrta_ids: %s' % (', '.join(wsrta_ids),)) + wsrta_id = list(wsrta_ids)[0] if wsrta_ids else None + + if output_filename is None: + output_filename = get_default_h5_filename({'sas_id':sas_id} if sas_id else + {'wsrta_id': wsrta_id} if wsrta_id else None) + + output_path = os.path.join(output_dir, output_filename) + logger.info('combine_hypercubes: combining %s h5 files into %s', len(input_paths), output_path) + + with h5py.File(output_path, "w") as output_file: + version = '1.2' + ds = output_file.create_dataset('version', (1,), h5py.special_dtype(vlen=str), version) + ds.attrs['description'] = 'version of this hdf5 MS extract file' + + measurement_group = output_file.create_group('measurement') + measurement_group.attrs['description'] = 'all data (visibilities, flagging, parset, ...) for this measurement (observation/pipeline)' + + if sas_id is not None: + ds = output_file.create_dataset('measurement/sas_id', data=[sas_id]) + ds.attrs['description'] = 'observation/pipeline sas id' + + #copy parset from the first input file containing one. assume parset is equal in all input files. + try: + input_file = next(f for f in input_files if 'measurement/parset' in f) + h5py.h5o.copy(input_file.id, 'measurement/parset', output_file.id, 'measurement/parset') + except StopIteration: + pass #no input file with parset, so nothing to copy. + + #make saps group and description + saps_group = output_file.create_group('measurement/saps') + saps_group.attrs['description'] = 'the data (visibilities, flagging, ...) is stored per sub-array-pointing (sap)' + + #rest of the items are multi dimensional, and may have different dimensions across the input files (only along the subband axis) + #gather item values of all files, per sap, then combine, then write in output_file + value_dicts_per_sap = {} + for input_file in input_files: + logger.info('combine_hypercubes: parsing file %s', input_file.filename) + + for sap_nr, sap_dict in input_file['measurement/saps'].items(): + sap_nr = int(sap_nr) + logger.info('combine_hypercubes: parsing sap %d in file %s', sap_nr, input_file.filename) + + #gather all items of one sap of one file in one dict + file_sap_value_dict = {} + + for item in sap_dict.keys(): + key = 'measurement/saps/%s/%s' % (sap_nr, item) + if item == 'antenna_locations': + file_sap_value_dict[key] = {} + location_group = sap_dict['antenna_locations'] + for ref_frame, location_sub_group in location_group.items(): + file_sap_value_dict[key][ref_frame] = {} + for antenna, location in location_sub_group.items(): + file_sap_value_dict[key][ref_frame][antenna] = location + else: + file_sap_value_dict[key] = input_file[key][:] + + #now, all items of this sap in input_file have been gathered into file_sap_value_dict + #this sap of this input file may contain mutiple subbands + #split out file_value_dict per subband + if sap_nr not in value_dicts_per_sap: + #per sap we make lists of value_dicts (one value_dict per file) + #we'll sort and combine them later + value_dicts_per_sap[sap_nr] = [] + + num_subbands_in_sap_in_input_file = len(file_sap_value_dict['measurement/saps/%s/subbands' % (sap_nr,)]) + logger.info('combine_hypercubes: num_subbands=%d in sap %d in file %s', num_subbands_in_sap_in_input_file, sap_nr, input_file.filename) + + for sb_cntr in range(num_subbands_in_sap_in_input_file): + value_dict = {} + for key,data in file_sap_value_dict.items(): + if 'visibilities' in key: + value_dict[key] = data[:,:,sb_cntr,:,:] + elif 'flagging' in key: + value_dict[key] = data[:,:,sb_cntr,:] + elif any(item in key for item in ['baselines', 'polarizations', 'timestamps', 'antenna_locations']): + value_dict[key] = data + else: + value_dict[key] = data[sb_cntr] + + #append the value_dict holding the items of a single subband to the subband list of this sap + value_dicts_per_sap[sap_nr].append(value_dict) + + logger.info('combine_hypercubes: sorting and combining all subbands and saps into one output file: %s', output_path) + + #all saps and all subbands have been parsed and put into value_dicts_per_sap + #sort and combine them + for sap_nr,sap_value_dicts in value_dicts_per_sap.items(): + num_subbands = len(sap_value_dicts) + logger.info('combine_hypercubes: sorting and combining %d subbands for sap %d', num_subbands, sap_nr) + #sort the sap_value_dicts by subband + sap_value_dicts = sorted(sap_value_dicts, key=lambda x: x['measurement/saps/%s/subbands' % (sap_nr,)]) + + #combine all seperate subbands + if sap_value_dicts: + combined_value_dict = {} + #setup numpy arrays based on shape and type of first value_dict, extend sb dimension to num_subbands + for key,data in sap_value_dicts[0].items(): + if 'visibilities' in key or 'flagging' in key: + shape = list(data.shape) + shape.insert(2, num_subbands) + shape = tuple(shape) + else: + shape = (num_subbands,) + + if 'antenna_locations' not in key: + combined_value_dict[key] = np.empty(shape=shape, dtype=data.dtype) + + #now loop over all value_dicts and copy data to it's subband slice in the just created empty numpy arrays + for sb_cntr, value_dict in enumerate(sap_value_dicts): + for key,data in value_dict.items(): + if 'visibilities' in key: + combined_value_dict[key][:,:,sb_cntr,:,:] = data + elif 'flagging' in key: + combined_value_dict[key][:,:,sb_cntr,:] = data + elif any(item in key for item in ['baselines', 'polarizations', 'timestamps', 'antenna_locations']): + combined_value_dict[key] = data + else: + combined_value_dict[key][sb_cntr] = data + + for key,data in combined_value_dict.items(): + logger.info('combine_hypercubes: storing %s in %s', key, output_filename) + ds_out = None + if 'visibilities' in key or 'flagging' in key: + ds_out = output_file.create_dataset(key, data=data, + compression="lzf" if do_compress else None) + elif 'antenna_locations' in key: + location_group = output_file.create_group(key) + location_group.attrs['description'] = 'the antenna locations in XYZ, PQR, WGS84 coordinates (units: meters and/or radians)' + for ref_frame, antenna_locations in data.items(): + location_sub_group = location_group.create_group(ref_frame) + location_sub_group.attrs['description'] = 'the antenna locations in %s coordinates (units: meters and/or radians)' % (ref_frame,) + + for antenna, location in antenna_locations.items(): + location_sub_group.create_dataset(antenna, data=location) + else: + ds_out = output_file.create_dataset(key, data=data) + + #search first input_file containing this keys + #and copy all dataset attributes from the input_file to the output_file + try: + if ds_out: + input_file = next(f for f in input_files if key in f) + ds_in = input_file[key] + + for attr_key, attr_value in ds_in.attrs.items(): + ds_out.attrs[attr_key] = attr_value + except StopIteration: + pass #no input file with key, so nothing to copy. + + except Exception as e: + logger.exception('combine_hypercubes: %s', e) + finally: + for h5file in input_files: + h5file.close() + + logger.info('combine_hypercubes: finished combining %s h5 files into %s', len(input_paths), output_path) + return output_path + +DEFAULT_ALGO_NAME='scipy.cluster.hierarchical.single on visibility distance v1' + +def _write_common_clustering_groups(h5_path, saps_dict, label=DEFAULT_ALGO_NAME): + """ + helper method to write some common groups when writing clustering results into the h5_path + + :param str h5_path: path to the hdf5 file + :param dict saps_dict: clustering results dict, see clusters parameter in write_clusters. + :param str label: A name/label for this clustering result, for example 'my_clusterer_run_3'. + Multiple clustering results can be stored in the same h5 file, as long as the label is unique. + If the label was already present in the file, then it is overwritten. + The always present symlink 'latest' is updated to this clustering result. + :return str: the name of the saps_group into which the non-common results can be written. + """ + with h5py.File(h5_path, "r+") as file: + if 'clustering' in file: + clustering_group = file['clustering'] + else: + clustering_group = file.create_group('clustering') + clustering_group.attrs['description'] = 'clustering results' + + if label == 'latest': + raise ValueError('\'latest\' is a reserved label for a symlink to the actual latest clustering result.') + + if label in clustering_group: + algo_group = clustering_group[label] + else: + algo_group = clustering_group.create_group(label) + algo_group.attrs['description'] = 'clustering results for cluster method: %s' % label + + # always set/update the timestamp of this result + algo_group.attrs['timestamp'] = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + + # update the 'latest' symlink to this label + try: + symlink = h5py.SoftLink('/clustering/' + label) + if 'latest' in clustering_group: + del clustering_group['latest'] + clustering_group['latest'] = symlink + except RuntimeError: + #softlink was already present, just continue. + pass + + if 'saps' in algo_group: + saps_group = algo_group['saps'] + else: + saps_group = algo_group.create_group('saps') + saps_group.attrs['description'] = 'clustering results are stored per sub array pointing' + + for sap_nr, sap_item in saps_dict.items(): + if str(sap_nr) not in saps_group: + sap_group = saps_group.create_group(str(sap_nr)) + sap_group.attrs['description'] = 'clustering results for sub array pointing %d' % sap_nr + + return saps_group.name + + +def _delete_clustering_group_if_empty(h5_path, label): + """ + helper method to delete an empty clustering group + + :param str h5_path: path to the hdf5 file + :param str label: The name/label of the clustering group, for example 'my_clusterer_run_3'. + The always present symlink 'latest' is updated to the next latest clustering group result. + """ + with h5py.File(h5_path, "r+") as file: + if 'clustering' in file: + clustering_group = file['clustering'] + + if label in clustering_group: + algo_group = clustering_group[label] + + if not algo_group.keys(): #the algo groups is empty..., so delete it + del clustering_group[label] + + timestamped_algo_groups = [algo_group for algo_group in clustering_group.values() if 'timestamp' in algo_group.attrs] + + # update the 'latest' symlink to the latest result + latest = datetime(0, 0, 0) + for algo_group in timestamped_algo_groups: + if algo_group.attrs['timestamp'] >= latest: + clustering_group["latest"] = h5py.SoftLink('/clustering/' + algo_group.name) + +def write_clusters(h5_path, clusters, label=DEFAULT_ALGO_NAME): + """ + write the clusters into an h5 file. + :param str h5_path: path to the h5 file + :param dict clusters: the clusters results dict. + { <sapnr>: { 'clusters': { <nr>: <list_of_baselines>, # for example: [('CS001', 'CS002), ('CS001', 'CS003')] + ... }, + ... }, + ... } + :param str label: A name/label for this clustering result, for example 'my_clusterer_run_3'. + Multiple clustering results can be stored in the same h5 file, as long as the label is unique. + If the label was already present in the file, then it is overwritten. + The always present symlink 'latest' is updated to this clustering result. + """ + logger.info('writing clusters to %s under label \'%s\'', h5_path, label) + saps_group_name = _write_common_clustering_groups(h5_path, clusters, label=label) + + #add indirection level: cluster method (including run-timestamp) + #include parameters and description + with h5py.File(h5_path, "r+") as file: + saps_group = file[saps_group_name] + for sap_nr, sap_clusters_dict in clusters.items(): + sap_group = saps_group[str(sap_nr)] + + clusters_group = sap_group.create_group('clusters') + clusters_group.attrs['description'] = 'the clusters' + + sap_clusters = sap_clusters_dict['clusters'] + for cluster_nr in sorted(sap_clusters.keys()): + cluster_baselines = sorted(sap_clusters[cluster_nr]) + logger.debug('writing %d baselines in cluster %s for sap %d to %s', len(cluster_baselines), cluster_nr, sap_nr, h5_path) + + ds = clusters_group.create_dataset(str(cluster_nr), data=cluster_baselines) + ds.attrs['description'] = '%d baselines in cluster %d in sap %d' % (len(cluster_baselines), cluster_nr, sap_nr) + logger.info('finished writing clusters to %s', h5_path) + + +def read_clusters(h5_path, label='latest'): + """ + read the clusters from an h5 file. + :param str h5_path: path to the h5 file + :param str label: A name/label for this clustering result, for example 'my_clusterer_run_3', or the always present 'latest'. + :return (dict, list): the clustering_results dict, and the clustering_results annotations list. + + clustering_results = { <sapnr>: { 'clusters': { <nr>: <list_of_baselines>, # for example: [('CS001', 'CS002), ('CS001', 'CS003')] + ... }, + 'annotations': { <cluster_nr> : { 'annotation': <text>, + 'user': <user>, + 'timestamp: <datetime> }, + ... } + ... }, + ... } + + annotations list = [ { 'annotation': <text>, 'user': <user>, 'timestamp: <datetime> }, + { 'annotation': <text>, 'user': <user>, 'timestamp: <datetime> }, + .... ] + + + """ + result_clusters = {} + result_annotations = [] + + with h5py.File(h5_path, "r") as file: + if 'clustering' not in file: + logger.debug('could not find any clustering results in %s', h5_path) + return result_clusters, result_annotations + + clustering_group = file['clustering'] + + if label not in clustering_group: + logger.debug('could not find clusters for algorithm \'%s\' for in %s', label, h5_path) + return result_clusters, result_annotations + + algo_group = clustering_group[label] + + logger.info('reading annotations for algorithm \'%s\', timestamp=\'%s\' from %s', label, algo_group.attrs.get('timestamp', '<unknown>'), h5_path) + + if 'annotations' in algo_group: + for anno_nr, anno_ds in algo_group['annotations'].items(): + annotation = anno_ds[0] + cluster_nr = anno_ds.attrs.get('cluster_nr') + user = anno_ds.attrs.get('user') + timestamp = anno_ds.attrs.get('timestamp') + + result_annotations.append({'annotation': annotation, + 'user': user, + 'timestamp': datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')}) + + saps_group = algo_group['saps'] + + logger.info('reading clusters for algorithm \'%s\', timestamp=\'%s\' from %s', label, algo_group.attrs.get('timestamp', '<unknown>'), h5_path) + + for sap_nr, sap_dict in saps_group.items(): + sap_nr = int(sap_nr) + sap_clusters_result = {} + sap_clusters_annotations = {} + sap_result = {'clusters': sap_clusters_result, + 'annotations': sap_clusters_annotations } + + if 'clusters' in sap_dict: + logger.debug('reading clusters for sap %d in %s', sap_nr, h5_path) + + result_clusters[sap_nr] = sap_result + + for cluster_nr in sorted(sap_dict['clusters'].keys()): + baselines = sap_dict['clusters'][cluster_nr][:] + cluster_nr = int(cluster_nr) + baselines = [(bl[0], bl[1]) for bl in baselines] + sap_clusters_result[cluster_nr] = baselines + logger.debug('read %d baselines in cluster %d in sap %d', len(baselines), cluster_nr, sap_nr) + else: + logger.debug('could not find clusters for sap %d in %s', sap_nr, h5_path) + + if 'annotations' in sap_dict: + logger.debug('reading cluster annotations for sap %d in %s', sap_nr, h5_path) + + for anno_nr, anno_ds in sap_dict['annotations'].items(): + try: + annotation = anno_ds[0] + cluster_nr = int(anno_ds.attrs.get('cluster_nr')) + logger.debug("%s %s", cluster_nr, type(cluster_nr)) + user = anno_ds.attrs.get('user') + timestamp = anno_ds.attrs.get('timestamp') + + if cluster_nr not in sap_clusters_annotations: + sap_clusters_annotations[cluster_nr] = [] + + sap_clusters_annotations[cluster_nr].append({'annotation': annotation, + 'user': user, + 'timestamp': datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')}) + except: + pass + + for cluster_nr, sap_clusters_annotation_list in sap_clusters_annotations.items(): + logger.debug('read %d cluster annotations for cluster %d in sap %d', len(sap_clusters_annotation_list), cluster_nr, sap_nr) + else: + logger.debug('could not find cluster annotations for sap %d in %s', sap_nr, h5_path) + + logger.info('read %d clusters for sap %d from %s', len(sap_result), sap_nr, h5_path) + logger.info('finised reading clusters from %s', h5_path) + + return result_clusters, result_annotations + + +def delete_clusters(h5_path, label=DEFAULT_ALGO_NAME): + """ + delete the clustering results with the given label from the h5 file. + :param str h5_path: h5_path to the h5 file + :param str label: the name/label for of the clustering result, for example 'my_clusterer_run_3'. + The always present symlink 'latest' is updated to the next latest clustering result. + """ + with h5py.File(h5_path, "r+") as file: + if 'clustering' in file: + for name, group in file['clustering'].items(): + if label is None or name==label: + for sap_nr, sap_dict in group['saps'].items(): + if 'clusters' in sap_dict: + logger.info('deleting clusters for sap %s in %s', sap_nr, h5_path) + del sap_dict['clusters'] + + _delete_clustering_group_if_empty(h5_path, label) + + +def _add_annotation_to_group(annotations__parent_group, annotation, user=None, **kwargs): + """ + add an annotation to the cluster in the file at h5_path, given by the clustering label, sap_nr, cluster_nr. + :param str h5_path: h5_path to the h5 file + :param str label: the label of the clustering results group + :param int sap_nr: the sap number withing the clustering results group + :param int cluster_nr: the cluster number withing the sap within the clustering results group + :param str annotation: the annotation for this cluster (can be any free form text) + :param str user: an optional user name + """ + if 'annotations' in annotations__parent_group: + annotations_group = annotations__parent_group['annotations'] + else: + annotations_group = annotations__parent_group.create_group('annotations') + annotations_group.attrs['description'] = 'annotations on this cluster' + + seq_nr = len(annotations_group) + ds = annotations_group.create_dataset(str(seq_nr), (1,), h5py.special_dtype(vlen=str), annotation) + ds.attrs['user'] = user if user else 'anonymous' + ds.attrs['timestamp'] = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + + for key, value in kwargs.items(): + ds.attrs[key] = value + + +def annotate_cluster(h5_path, label, sap_nr, cluster_nr, annotation, user=None): + """ + add an annotation to the cluster in the file at h5_path, given by the clustering label, sap_nr, cluster_nr. + :param str h5_path: h5_path to the h5 file + :param str label: the label of the clustering results group + :param int sap_nr: the sap number withing the clustering results group + :param int cluster_nr: the cluster number withing the sap within the clustering results group + :param str annotation: the annotation for this cluster (can be any free form text) + :param str user: an optional user name + """ + with h5py.File(h5_path, "r+") as file: + if 'clustering' in file: + clustering_group = file['clustering'] + + if label in clustering_group: + algo_group = clustering_group[label] + saps_group = algo_group['saps'] + + if str(sap_nr) in saps_group: + sap_group = saps_group[str(sap_nr)] + _add_annotation_to_group(sap_group, annotation, user, cluster_nr=cluster_nr) + + +def annotate_clustering_results(h5_path, label, annotation, user=None): + """ + add an annotation at top level for the entire file at h5_path. + :param str h5_path: h5_path to the h5 file + :param str label: the label of the clustering results group + :param str annotation: the annotation for this cluster (can be any free form text) + :param str user: an optional user name + """ + with h5py.File(h5_path, "r+") as file: + if 'clustering' in file: + clustering_group = file['clustering'] + + if label in clustering_group: + algo_group = clustering_group[label] + _add_annotation_to_group(algo_group, annotation, user) + + +def annotate_file(h5_path, annotation, user=None): + """ + add an annotation at top level for the entire file at h5_path. + :param str h5_path: h5_path to the h5 file + :param str annotation: the annotation for this cluster (can be any free form text) + :param str user: an optional user name + """ + with h5py.File(h5_path, "r+") as file: + _add_annotation_to_group(file, annotation, user) + + +def read_file_annotations(h5_path): + """ + read the top level annotations on this file as a whole. + :param str h5_path: path to the h5 file + :return list: an annotations list with the top level annotations on this file as a whole. + + annotations list = [ { 'annotation': <text>, 'user': <user>, 'timestamp: <datetime> }, + { 'annotation': <text>, 'user': <user>, 'timestamp: <datetime> }, + .... ] + + + """ + result_annotations = [] + + with h5py.File(h5_path, "r") as file: + if 'annotations' in file: + for anno_nr, anno_ds in file['annotations'].items(): + annotation = anno_ds[0] + cluster_nr = anno_ds.attrs.get('cluster_nr') + user = anno_ds.attrs.get('user') + timestamp = anno_ds.attrs.get('timestamp') + + result_annotations.append({'annotation': annotation, + 'user': user, + 'timestamp': datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')}) + return result_annotations + + +def read_info_from_hdf5(h5_path, read_data_info=True, read_parset_info=True): + """ + Read basic info like Project, start/stoptime, stations, etc from h5 file. + :param str h5_path: h5_path to the h5 file + :param bool read_data_info: do/don't read data info (how many sap's, baselines, timestamps, subbands). + :param bool read_parset_info: do/don't read info from the parset (Project, PI, name, start/stop time, etc). + :return str: A human readable string with the requested info. + """ + info = '' + result = {} + + if read_data_info: + result = read_hypercube(h5_path, read_visibilities=False, read_flagging=False) + + if read_parset_info: + parset = read_hypercube_parset(h5_path) + if parset: + result['parset'] = parset + + file_annotations = read_file_annotations(h5_path) + clusters, clustering_algorithm_annotations = read_clusters(h5_path) + + return create_info_string(result, h5_path, file_annotations, clusters, clustering_algorithm_annotations) + + +def create_info_string(data, h5_path=None, file_annotations=None, clusters=None, cluster_annotations=None): + info = '' + + try: + parset = data['parset'] + if h5_path: + info += 'File : ' + os.path.basename(h5_path) + '\n' + info += 'Project : ' + parset.getString('ObsSW.Observation.Campaign.name') + '\n' + info += 'Project description : ' + parset.getString('ObsSW.Observation.Campaign.title') + '\n' + info += 'Project PI : ' + parset.getString('ObsSW.Observation.Campaign.PI') + '\n' + info += 'Type : ' + parset.getString('ObsSW.Observation.processSubtype') + '\n' + info += 'SAS id : ' + parset.getString('ObsSW.Observation.otdbID') + '\n' + info += 'name : ' + parset.getString('ObsSW.Observation.Scheduler.taskName') + '\n' + info += 'start time (UTC) : ' + parset.getString('ObsSW.Observation.startTime') + '\n' + info += 'stop time (UTC) : ' + parset.getString('ObsSW.Observation.stopTime') + '\n' + + try: + # try to import lofar.common.datetimeutils here and not at the top of the file + # to make this hdf5_io module as loosly coupled to other lofar code as possible + from lofar.common.datetimeutils import format_timedelta, parseDatetime + info += 'duration : ' + format_timedelta(parseDatetime(parset.getString('ObsSW.Observation.stopTime')) - + parseDatetime(parset.getString('ObsSW.Observation.startTime'))) + '\n' + except ImportError: + pass #just continue + + if 'observation' in parset.getString('ObsSW.Observation.processSubtype','').lower(): + info += '#Stations : ' + str(len(parset.getStringVector('ObsSW.Observation.VirtualInstrument.stationList'))) + '\n' + info += 'Stations : ' + ','.join(sorted(parset.getStringVector('ObsSW.Observation.VirtualInstrument.stationList'))) + '\n' + info += 'antenna array : ' + parset.getString('ObsSW.Observation.antennaArray') + '\n' + except: + #parset info not available + pass + + if file_annotations: + for i, anno in enumerate(file_annotations): + info += 'annotation[%02d] : \'%s\', by \'%s\' at \'%s\'\n' % (i, anno['annotation'], anno['user'], anno['timestamp'].strftime('%Y-%m-%d %H:%M:%S')) + + if 'saps' in data: + for sap_nr, sap_dict in data['saps'].items(): + info += 'data : sap: %s, #baselines: %s, #timestamps: %s, #subbands: %s, #polarizations: %s' % ( + sap_nr, len(sap_dict['baselines']), len(sap_dict['timestamps']), len(sap_dict['subbands']), len(sap_dict['polarizations'])) + '\n' + + if clusters: + for sap_nr in sorted(clusters.keys()): + sap_dict = clusters[sap_nr] + sap_cluster_dict = sap_dict['clusters'] + info += 'clusters : sap: %s, #clusters: %s, cluster sizes: %s' % ( + sap_nr, len(sap_cluster_dict), ', '.join([str(len(sap_cluster_dict[c_nr])) for c_nr in sorted(sap_cluster_dict.keys())])) + '\n' + + sap_cluster_annotation_dict = sap_dict.get('annotations', {}) + for sap_cluster_nr in sorted(sap_cluster_annotation_dict.keys()): + sap_cluster_annotations = sap_cluster_annotation_dict[sap_cluster_nr] + for sap_cluster_annotation in sap_cluster_annotations: + info += 'annotations : sap: %d cluster: %d : %s %s "%s"\n' % (sap_nr, sap_cluster_nr, + sap_cluster_annotation.get('user', '<unknown>'), + sap_cluster_annotation.get('timestamp', '<unknown>'), + sap_cluster_annotation.get('annotation', '<unknown>')) + + return info + + +def fill_info_folder_from_parset(h5_path): + try: + parset = read_hypercube_parset(h5_path) + + with h5py.File(h5_path, "r+") as file: + info_group = file.create_group('measurement/info') + info_group.attrs['description'] = 'Meta information about the measurement' + + if parset: + for name, key in [('project', 'Campaign.name'), + ('project_description', 'Campaign.title'), + ('PI', 'Campaign.PI'), + ('type', 'processType'), + ('subtype', 'processSubtype'), + ('SAS_id', 'Campaign.otdbID'), + ('antenna_array', 'antennaArray'), + ('name', 'Scheduler.taskName')]: + ps_key = 'ObsSW.Observation.' + key + ps_value = parset.getString(ps_key, '<unknown>') + info_group.create_dataset(name, (1,), h5py.special_dtype(vlen=str), [ps_value]) + + try: + # try to import lofar.common.datetimeutils here and not at the top of the file + # to make this hdf5_io module as loosly coupled to other lofar code as possible + from lofar.common.datetimeutils import format_timedelta, parseDatetime, totalSeconds + start_time = parset.getString('ObsSW.Observation.startTime') + stop_time = parset.getString('ObsSW.Observation.stopTime') + duration = parseDatetime(stop_time) - parseDatetime(start_time) + info_group.create_dataset('start_time', (1,), h5py.special_dtype(vlen=str), [start_time]) + info_group.create_dataset('stop_time', (1,), h5py.special_dtype(vlen=str), [stop_time]) + ds = info_group.create_dataset('duration', data=[totalSeconds(duration)]) + ds.attrs['description'] = 'duration in seconds' + except (ImportError, RuntimeError, ValueError) as e: + logger.warning('Could not convert start/end time and/or duration in fill_info_folder_from_parset for %s. error: %s', h5_path, e) + except Exception as e: + logger.error('Error whle running fill_info_folder_from_parset: %s', e) + +def read_info_dict(h5_path): + """ read the info about the observation/pipeline from the h5 file given by h5_path. + :param str h5_path: h5_path to the h5 file + :return: a dict with the info about the observation/pipeline in native python types, like: + {'PI': 'my_PI', + 'SAS_id': 'my_id', + 'duration': datetime.timedelta(0, 3600), + 'name': 'my_observation_name', + 'project': 'my_project_name', + 'project_description': 'my_project_description', + 'antenna_array': 'LBA', + 'start_time': datetime.datetime(2018, 6, 11, 11, 0), + 'stop_time': datetime.datetime(2018, 6, 11, 12, 0), + 'type': 'my_process_subtype'} """ + with h5py.File(h5_path, "r+") as file: + if not 'measurement/info' in file: + # try to convert old style file with parsets only into new files with info. + fill_info_folder_from_parset(h5_path) + + info_dict = {} + if 'measurement/info' in file: + for k, v in file['measurement/info'].items(): + k = str(k) + v = v[0] + info_dict[k] = v + + if k == 'start_time' or k == 'stop_time': + # try to import lofar.common.datetimeutils here and not at the top of the file + # to make this hdf5_io module as loosly coupled to other lofar code as possible + try: + from lofar.common.datetimeutils import parseDatetime + info_dict[k] = parseDatetime(v) + except ImportError: + pass + elif k == 'duration': + info_dict[k] = timedelta(seconds=v) + + return info_dict + diff --git a/QA/QA_Common/test/CMakeLists.txt b/QA/QA_Common/test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5b0d0bfca76035e2a1fdbea9e79f4e12d7f769bb --- /dev/null +++ b/QA/QA_Common/test/CMakeLists.txt @@ -0,0 +1,32 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# $Id$ +include(LofarCTest) + +IF(BUILD_TESTING) + # deploy test modules and scripts + python_install(test_utils.py + DESTINATION lofar/qa/test) + + lofar_add_bin_scripts(create_test_hypercube) +ENDIF(BUILD_TESTING) + +lofar_add_test(t_cep4_utils) +lofar_add_test(t_hdf5_io) + + diff --git a/QA/QA_Common/test/create_test_hypercube b/QA/QA_Common/test/create_test_hypercube new file mode 100755 index 0000000000000000000000000000000000000000..1d368470961a24869562a2443f60ec92a57127df --- /dev/null +++ b/QA/QA_Common/test/create_test_hypercube @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +import os +from optparse import OptionParser +from lofar.qa.test.test_utils import * +from lofar.qa.hdf5_io import write_hypercube + +import logging +logger = logging.getLogger(__name__) + +def main(): + # make sure we run in UTC timezone + os.environ['TZ'] = 'UTC' + + logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', + level=logging.INFO) + + ## Check the invocation arguments + parser = OptionParser(usage='create_test_hypercube [options] <path_to_new_h5_file>', + description='creates a test h5 hypercube with random data for the given number of stations, saps, subbands, timestamps.') + parser.add_option('-s', '--stations', dest='stations', type='int', default=3, help='number of stations to create, default: %default') + parser.add_option('-S', '--subbands', dest='subbands', type='int', default=244, help='number of subbands (per sap) to create, default: %default') + parser.add_option('-t', '--timestamps', dest='timestamps', type='int', default=128, help='number of timestamps to create, default: %default') + parser.add_option('--saps', dest='saps', type='int', default=1, help='number of saps to create, default: %default') + parser.add_option('-o', '--otdb_id', dest='otdb_id', type='int', default=None, help='optional (fake/test) otdb id, default: %default') + + (options, args) = parser.parse_args() + + if len(args) != 1: + print 'Please provide a file name for the h5 file which you want to create...' + print + parser.print_help() + exit(1) + + cube = create_hypercube(num_stations=options.stations, + num_saps=options.saps, + num_subbands_per_sap={sap:options.subbands for sap in range(options.saps)}, + num_timestamps=options.timestamps) + write_hypercube(args[0], cube, sas_id=options.otdb_id) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/QA/QA_Common/test/t_cep4_utils.py b/QA/QA_Common/test/t_cep4_utils.py new file mode 100755 index 0000000000000000000000000000000000000000..1a0b970a515f042e558c5506c90f53d3bab9fbdb --- /dev/null +++ b/QA/QA_Common/test/t_cep4_utils.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +import unittest +from subprocess import call + +import logging +from lofar.qa.cep4_utils import * + +logger = logging.getLogger(__name__) + +class TestCep4Utils(unittest.TestCase): + def test_01_wrap_command_in_cep4_head_node_ssh_call(self): + cmd = wrap_command_in_cep4_head_node_ssh_call(['true']) + logger.info('executing command: %s', ' '.join(cmd)) + self.assertEqual(0, call(cmd)) + + def test_02_get_cep4_available_cpu_nodes(self): + node_nrs = get_cep4_available_cpu_nodes() + self.assertTrue(isinstance(node_nrs, list)) + self.assertTrue(len(node_nrs) > 0) + + def test_03_wrap_command_in_cep4_random_cpu_node_ssh_call(self): + """ + this test calls and tests the functionality of the following methods via + wrap_command_in_cep4_random_cpu_node_ssh_call: get_cep4_available_cpu_nodes, wrap_command_in_cep4_cpu_node_ssh_call + """ + cmd = wrap_command_in_cep4_random_cpu_node_ssh_call(['true'], via_head=True) + logger.info('executing command: %s', ' '.join(cmd)) + self.assertEqual(0, call(cmd)) + + def test_04_wrap_command_in_cep4_available_cpu_node_with_lowest_load_ssh_call(self): + """ + this test calls and tests the functionality of the following methods via + wrap_command_in_cep4_random_cpu_node_ssh_call: + get_cep4_available_cpu_nodes, get_cep4_cpu_nodes_loads, + get_cep4_available_cpu_nodes_sorted_ascending_by_load, wrap_command_in_cep4_cpu_node_ssh_call + """ + cmd = wrap_command_in_cep4_available_cpu_node_with_lowest_load_ssh_call(['true'], via_head=True) + logger.info('executing command: %s', ' '.join(cmd)) + self.assertEqual(0, call(cmd)) + + def test_05_wrap_command_for_docker_in_cep4_head_node_ssh_call(self): + """ + this test calls and tests the functionality of wrap_command_for_docker and + wrap_command_in_cep4_head_node_ssh_call. + It is assumed that a docker image is available on head.cep4. + """ + #wrap the command in a docker call first, and then in an ssh call + cmd = wrap_command_for_docker(['true'], 'adder', 'latest') + cmd = wrap_command_in_cep4_head_node_ssh_call(cmd) + logger.info('executing command: %s', ' '.join(cmd)) + self.assertEqual(0, call(cmd)) + + def test_06_get_slurm_info_from_within_docker_via_cep4_head(self): + """ + test to see if we can execute a command via ssh on the head node, + from within a docker container, started via ssh on the head node (yes, that's multiple levels of indirection) + """ + # use the slurm sinfo command (because it's available on the head nodes only)... + cmd = ['sinfo'] + # ...called on cep4 headnode... + cmd = wrap_command_in_cep4_head_node_ssh_call(cmd) + # ...from with the docker container... + cmd = wrap_command_for_docker(cmd, 'adder', 'latest') + # ...which is started on the cep4 head node + cmd = wrap_command_in_cep4_head_node_ssh_call(cmd) + logger.info('executing command: %s', ' '.join(cmd)) + + #redirect stdout/stderr to /dev/null + with open('/dev/null', 'w') as dev_null: + self.assertEqual(0, call(cmd, stdout=dev_null, stderr=dev_null)) + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.DEBUG) + + # first try if we can reach cep4 + # this assumes the code in wrap_command_in_cep4_head_node_ssh_call is correct and working + # (which is also being tested in the unittests) + # if and only if the command to the head node succeeds, then we can do the tests + # otherwise, for whatever reason the ssh call fails, we skip the tests because we cannot reach cep4 head node. + cep4_true_cmd = wrap_command_in_cep4_head_node_ssh_call(['true']) + + if call(cep4_true_cmd) == 0: + logger.info('We can reach the CEP4 head node. Continuing with tests...') + unittest.main() + else: + logger.warning('Cannot reach the CEP4 head node. skipping tests...') + #exit with special 'skipped' exit-code + exit(3) + diff --git a/QA/QA_Common/test/t_cep4_utils.run b/QA/QA_Common/test/t_cep4_utils.run new file mode 100755 index 0000000000000000000000000000000000000000..4b5b4ef354d88b60635eb68d21c1e52f18e169ef --- /dev/null +++ b/QA/QA_Common/test/t_cep4_utils.run @@ -0,0 +1,23 @@ +#!/bin/bash + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# Run the unit test +source python-coverage.sh +python_coverage_test "*QA*" t_cep4_utils.py + diff --git a/QA/QA_Common/test/t_cep4_utils.sh b/QA/QA_Common/test/t_cep4_utils.sh new file mode 100755 index 0000000000000000000000000000000000000000..044222a17bf7ba7886b928ada2660a61de01bf41 --- /dev/null +++ b/QA/QA_Common/test/t_cep4_utils.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +./runctest.sh t_cep4_utils diff --git a/QA/QA_Common/test/t_hdf5_io.py b/QA/QA_Common/test/t_hdf5_io.py new file mode 100755 index 0000000000000000000000000000000000000000..7c8ef4498331036a77c66971d54abe82e32fc4f9 --- /dev/null +++ b/QA/QA_Common/test/t_hdf5_io.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +import unittest +import logging +import tempfile +import os +import random +import numpy as np +from datetime import datetime, timedelta + +from lofar.qa.hdf5_io import * +from lofar.parameterset import * +from lofar.common.datetimeutils import to_modified_julian_date_in_seconds + +from lofar.qa.test.test_utils import * + +logger = logging.getLogger(__name__) + +class TestHdf5_IO(unittest.TestCase): + def test_write_and_read_again(self): + logger.info('test_write_and_read_again') + + path = tempfile.mkstemp()[1] + try: + logger.info('generating test data') + num_saps=3 + num_stations=7 + num_timestamps=11 + saps_in = create_hypercube(num_saps=num_saps, num_stations=num_stations, num_timestamps=num_timestamps) + + parset=parameterset() + parset.adoptArgv(['foo=bar']) + + write_hypercube(path, saps_in, parset) + + parset2 = read_hypercube_parset(path) + self.assertEqual(str(parset), str(parset2)) + + result = read_hypercube(path, visibilities_in_dB=False, python_datetimes=True) + + self.assertTrue(result['saps']) + self.assertEqual(num_saps, len(result['saps'])) + + for sap_nr, sap_out in result['saps'].items(): + sap_in = saps_in[sap_nr] + + self.assertTrue(sap_out['timestamps']) + for t_in, t_out in zip(sap_in['timestamps'], sap_out['timestamps']): + self.assertEqual(t_in, t_out) + + self.assertFalse(sap_out['visibilities_in_dB']) + self.assertEqual(sap_in['visibilities'].shape, sap_out['visibilities'].shape) + + diff = sap_in['visibilities'] - sap_out['visibilities'] + error = np.absolute(diff/sap_in['visibilities']) + + median_error = np.median(error) + self.assertTrue(median_error < 0.05) + logger.info('median error %s < threshold %s', median_error, 0.05) + + self.assertTrue('antenna_locations' in sap_out) + for coords_type in ['XYZ', 'PQR', 'WGS84']: + self.assertTrue(coords_type in sap_out['antenna_locations']) + self.assertEqual(7, len(sap_out['antenna_locations'][coords_type])) + + #test the file annotations + annotate_file(path, 'This file was recorded in front of a live audience ;-)', 'test_user') + file_annotations = read_file_annotations(path) + + self.assertEqual(1, len(file_annotations)) + self.assertEqual('This file was recorded in front of a live audience ;-)', file_annotations[0]['annotation']) + self.assertEqual('test_user', file_annotations[0]['user']) + + finally: + logger.info('removing test file: %s', path) + os.remove(path) + + def test_combine_hypercubes(self): + logger.info('test_combine_hypercubes') + + paths = [] + try: + logger.info('generating test data') + num_saps=3 + num_stations=7 + num_timestamps=11 + saps_in = create_hypercube(num_saps=num_saps, num_stations=num_stations, num_timestamps=num_timestamps) + + #write each sap to a seperate file + for sap_nr, sap_in in saps_in.items(): + path = tempfile.mkstemp()[1] + paths.append(path) + logger.info('writing sap %d to %s', sap_nr, path) + write_hypercube(path, {sap_nr:sap_in}, sas_id=999999) + + combined_filepath = combine_hypercubes(paths, output_dir='/tmp', output_filename=os.path.basename(tempfile.mkstemp()[1])) + + if combined_filepath: + paths.append(combined_filepath) + + result = read_hypercube(combined_filepath, visibilities_in_dB=False, python_datetimes=True) + + self.assertTrue(result['saps']) + self.assertEqual(num_saps, len(result['saps'])) + + for sap_nr, sap_out in result['saps'].items(): + sap_in = saps_in[sap_nr] + + self.assertTrue(sap_out['timestamps']) + for t_in, t_out in zip(sap_in['timestamps'], sap_out['timestamps']): + self.assertEqual(t_in, t_out) + + self.assertFalse(sap_out['visibilities_in_dB']) + self.assertEqual(sap_in['visibilities'].shape, sap_out['visibilities'].shape) + + diff = sap_in['visibilities'] - sap_out['visibilities'] + error = np.absolute(diff/sap_in['visibilities']) + + median_error = np.median(error) + self.assertTrue(median_error < 0.05) + logger.info('median error %s < threshold %s', median_error, 0.05) + + finally: + for path in paths: + logger.info('removing test file: %s', path) + os.remove(path) + + def test_common_info_from_parset(self): + logger.info('test_common_info_from_parset') + + logger.info('generating test data') + num_saps=1 + num_stations=2 + num_timestamps=3 + saps_in = create_hypercube(num_saps=num_saps, num_stations=num_stations, num_timestamps=num_timestamps) + + parset = parameterset.fromString("""ObsSW.Observation.Campaign.PI="my_PI" + ObsSW.Observation.Campaign.name="my_project_name" + ObsSW.Observation.Campaign.title="my_project_description" + ObsSW.Observation.processType="my_process_type" + ObsSW.Observation.processSubtype="my_process_subtype" + ObsSW.Observation.Campaign.otdbID="my_id" + ObsSW.Observation.antennaArray="LBA" + ObsSW.Observation.Scheduler.taskName="my_task_name" + ObsSW.Observation.startTime="2018-06-11 11:00:00" + ObsSW.Observation.stopTime="2018-06-11 12:00:00" + foo="bar" """) + + path = tempfile.mkstemp()[1] + try: + write_hypercube(path, saps_in, parset) + + # make sure the info folder is in the file, + # and delete it so we can test fill_info_folder_from_parset later on + with h5py.File(path, "r+") as file: + self.assertTrue('measurement/info' in file) + del file['measurement/info'] + + with h5py.File(path, "r") as file: + self.assertFalse('measurement/info' in file) + + # call the actual method under test, fill_info_folder_from_parset + fill_info_folder_from_parset(path) + + with h5py.File(path, "r") as file: + self.assertTrue('measurement/info' in file) + + info = read_info_dict(path) + self.assertEqual('my_PI', info['PI']) + self.assertEqual('my_id', info['SAS_id']) + self.assertEqual('my_task_name', info['name']) + self.assertEqual('my_project_name', info['project']) + self.assertEqual('my_project_description', info['project_description']) + self.assertEqual('my_process_type', info['type']) + self.assertEqual('my_process_subtype', info['subtype']) + self.assertEqual('LBA', info['antenna_array']) + self.assertEqual(datetime(2018, 6, 11, 11, 0), info['start_time']) + self.assertEqual(datetime(2018, 6, 11, 12, 0), info['stop_time']) + self.assertEqual(timedelta(0, 3600), info['duration']) + finally: + os.remove(path) + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO) + unittest.main() diff --git a/QA/QA_Common/test/t_hdf5_io.run b/QA/QA_Common/test/t_hdf5_io.run new file mode 100755 index 0000000000000000000000000000000000000000..91d84bee6d0d464367988c328389f67ec7d0e59d --- /dev/null +++ b/QA/QA_Common/test/t_hdf5_io.run @@ -0,0 +1,23 @@ +#!/bin/bash + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# Run the unit test +source python-coverage.sh +python_coverage_test "*hdf5_io*" t_hdf5_io.py + diff --git a/QA/QA_Common/test/t_hdf5_io.sh b/QA/QA_Common/test/t_hdf5_io.sh new file mode 100755 index 0000000000000000000000000000000000000000..62cd139a238a701b6de2dde05de60af164de5e6e --- /dev/null +++ b/QA/QA_Common/test/t_hdf5_io.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +./runctest.sh t_hdf5_io diff --git a/QA/QA_Common/test/test_utils.py b/QA/QA_Common/test/test_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f91b6f456742dfd25fbe13b941d1bd1b72824ff4 --- /dev/null +++ b/QA/QA_Common/test/test_utils.py @@ -0,0 +1,79 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +import numpy as np +from datetime import datetime, timedelta + +from lofar.common.datetimeutils import to_modified_julian_date_in_seconds +from lofar.qa.geoconversions import * + +def create_hypercube(num_saps=3, num_stations=5, num_timestamps=11, num_subbands_per_sap=None): + data = {} + + if num_subbands_per_sap is None: + num_subbands_per_sap = {} + for sap_nr in range(num_saps): + num_subbands_per_sap[sap_nr] = 13*(sap_nr+1) + + stations = ['CS%03d' % (i + 1) for i in range(num_stations)] + baselines = [] + for idx, station1 in enumerate(stations): + for station2 in stations[idx:]: + baselines.append((station1, station2)) + + num_baselines = len(baselines) + + for sap_nr in range(num_saps): + #generate nice test visibilities + num_subbands = num_subbands_per_sap[sap_nr] + + #generate 'ticks' along the polarization-axes + polarizations = ['xx', 'xy', 'yx', 'yy'] + + visibilities = np.empty((num_baselines, num_timestamps, num_subbands, len(polarizations)), dtype=np.complex64) + visibilities.real = np.random.random(visibilities.shape) + visibilities.imag = np.random.random(visibilities.shape) + + #and some flagging + flagging = np.zeros(visibilities.shape, dtype=np.bool) + + now = datetime.utcnow() + timestamps = [now+timedelta(seconds=i) for i in range(num_timestamps)] + timestamps_mjds = np.array([to_modified_julian_date_in_seconds(t) for t in timestamps]) + + #generate 'ticks' along the central_frequencies-axes + central_frequencies = [1e11+i*1e10 for i in range(num_subbands)] + sb_offset = sum([len(sap['subbands']) for sap in data.values()]) + subbands = ['SB%03d'% i for i in range(sb_offset, sb_offset+num_subbands)] + + antenna_locations = {'XYZ': {}, 'PQR': {}, 'WGS84' : {}} + for station in stations: + xyz_pos = (0,0,0) + antenna_locations['XYZ'][station] = xyz_pos + antenna_locations['PQR'][station] = pqr_cs002_from_xyz(xyz_pos) + antenna_locations['WGS84'][station] = geographic_from_xyz(xyz_pos) + + data[sap_nr] = { 'baselines':baselines, + 'timestamps':timestamps, + 'central_frequencies':central_frequencies, + 'subbands':subbands, + 'polarizations':polarizations, + 'visibilities':visibilities, + 'flagging':flagging, + 'antenna_locations': antenna_locations} + return data + diff --git a/QA/QA_Service/CMakeLists.txt b/QA/QA_Service/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..99225b799351411d067de8c4ee195a1375af9131 --- /dev/null +++ b/QA/QA_Service/CMakeLists.txt @@ -0,0 +1,24 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# $Id$ + +lofar_package(QA_Service 1.0 DEPENDS QA_Common PyMessaging OTDB_Services) + +add_subdirectory(lib) +add_subdirectory(bin) +add_subdirectory(test) diff --git a/QA/QA_Service/bin/CMakeLists.txt b/QA/QA_Service/bin/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..51b7f46c1124c5dd50fe2c9a9650c16ed262f2f6 --- /dev/null +++ b/QA/QA_Service/bin/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# $Id$ + +lofar_add_bin_scripts(qa_service) + +# supervisord config files +install(FILES + qa_service.ini + DESTINATION etc/supervisord.d) + diff --git a/QA/QA_Service/bin/qa_service b/QA/QA_Service/bin/qa_service new file mode 100755 index 0000000000000000000000000000000000000000..64c50190c7bba7fba57d69ace8e5975a4f160f79 --- /dev/null +++ b/QA/QA_Service/bin/qa_service @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +from lofar.qa.service.qa_service import main + +if __name__ == '__main__': + main() diff --git a/QA/QA_Service/bin/qa_service.ini b/QA/QA_Service/bin/qa_service.ini new file mode 100644 index 0000000000000000000000000000000000000000..1ba42b547425ffff84f7544b51e5392525dd6b67 --- /dev/null +++ b/QA/QA_Service/bin/qa_service.ini @@ -0,0 +1,8 @@ +[program:qa_service] +command=/bin/bash -c 'source $LOFARROOT/lofarinit.sh;exec qa_service' +user=lofarsys +stopsignal=INT ; KeyboardInterrupt +stopasgroup=true ; bash does not propagate signals +stdout_logfile=%(program_name)s.log +redirect_stderr=true +stderr_logfile=NONE diff --git a/QA/QA_Service/lib/CMakeLists.txt b/QA/QA_Service/lib/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ed5533b3b616680fa748d52b95f6807fc68a7ae5 --- /dev/null +++ b/QA/QA_Service/lib/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# $Id$ + +python_install( + __init__.py + config.py + qa_service.py + QABusListener.py + DESTINATION lofar/qa/service) + diff --git a/QA/QA_Service/lib/QABusListener.py b/QA/QA_Service/lib/QABusListener.py new file mode 100644 index 0000000000000000000000000000000000000000..e6b8710a3cc2aa08da7a93c9a0aafb4812a96e1c --- /dev/null +++ b/QA/QA_Service/lib/QABusListener.py @@ -0,0 +1,85 @@ +# Copyright (C) 2015 +# ASTRON (Netherlands Institute for Radio Astronomy) +# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. +# +# $Id$ + +""" +QABusListener listens on the lofar qa message bus and calls (empty) on<SomeMessage> methods when such a message is received. +Typical usage is to derive your own subclass from QABusListener and implement the specific on<SomeMessage> methods that you are interested in. +""" + +from lofar.messaging.messagebus import AbstractBusListener +from lofar.qa.service.config import DEFAULT_QA_NOTIFICATION_BUSNAME, DEFAULT_QA_NOTIFICATION_SUBJECT_PREFIX + +import qpid.messaging +import logging +from datetime import datetime + +logger = logging.getLogger(__name__) + + +class QABusListener(AbstractBusListener): + def __init__(self, busname=DEFAULT_QA_NOTIFICATION_BUSNAME, subject=DEFAULT_QA_NOTIFICATION_SUBJECT_PREFIX, broker=None, **kwargs): + """ + QABusListener listens on the lofar qa message bus and calls (empty) on<SomeMessage> methods when such a message is received. + Typical usage is to derive your own subclass from QABusListener and implement the specific on<SomeMessage> methods that you are interested in. + :param address: valid Qpid address (default: lofar.otdb.status) + :param broker: valid Qpid broker host (default: None, which means localhost) + additional parameters in kwargs: + options= <dict> Dictionary of options passed to QPID + exclusive= <bool> Create an exclusive binding so no other services can consume duplicate messages (default: False) + numthreads= <int> Number of parallel threads processing messages (default: 1) + verbose= <bool> Output extra logging over stdout (default: False) + """ + address = "%s/%s.#" % (busname, subject) + super(QABusListener, self).__init__(address, broker, **kwargs) + + def _handleMessage(self, msg): + logger.debug("QABusListener.handleMessage: %s" %str(msg)) + + logger.info("received message subject=%s content=%s", msg.subject, msg.content) + subject_suffix = msg.subject.split('.')[-1] + + if subject_suffix == 'ConvertedMS2Hdf5': + self.onConvertedMS2Hdf5(msg.content) + elif subject_suffix == 'CreatedInspectionPlots': + self.onCreatedInspectionPlots(msg.content) + elif subject_suffix == 'Clustered': + self.onClustered(msg.content) + elif subject_suffix == 'Finished': + self.onFinished(msg.content) + elif subject_suffix == 'Error': + self.onError(msg.content) + + def onConvertedMS2Hdf5(self, msg_content): + pass + + def onClustered(self, msg_content): + pass + + def onCreatedInspectionPlots(self, msg_content): + pass + + def onFinished(self, msg_content): + pass + + def onError(self, msg_content): + pass + + +__all__ = ["QABusListener"] diff --git a/QA/QA_Service/lib/__init__.py b/QA/QA_Service/lib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..491a020ffc2353c60f86a2d734eb9d846bd22054 --- /dev/null +++ b/QA/QA_Service/lib/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + diff --git a/QA/QA_Service/lib/config.py b/QA/QA_Service/lib/config.py new file mode 100644 index 0000000000000000000000000000000000000000..dc54af1997319e566524eeefb75fec188cdd2618 --- /dev/null +++ b/QA/QA_Service/lib/config.py @@ -0,0 +1,23 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# $Id$ + +from lofar.messaging import adaptNameToEnvironment + +DEFAULT_QA_NOTIFICATION_BUSNAME= adaptNameToEnvironment('lofar.qa.notification') +DEFAULT_QA_NOTIFICATION_SUBJECT_PREFIX='QA' diff --git a/QA/QA_Service/lib/qa_service.py b/QA/QA_Service/lib/qa_service.py new file mode 100644 index 0000000000000000000000000000000000000000..c8fcbba860301db6bdb68c3fa813edaf4e21c323 --- /dev/null +++ b/QA/QA_Service/lib/qa_service.py @@ -0,0 +1,273 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# $Id$ + +import os.path +import logging +from subprocess import call, Popen, PIPE, STDOUT +from optparse import OptionParser, OptionGroup +from lofar.common.util import waitForInterrupt +from lofar.sas.otdb.OTDBBusListener import OTDBBusListener +from lofar.sas.otdb.config import DEFAULT_OTDB_NOTIFICATION_BUSNAME, DEFAULT_OTDB_NOTIFICATION_SUBJECT +from lofar.messaging.messagebus import ToBus +from lofar.messaging.messages import EventMessage +from lofar.qa.service.config import DEFAULT_QA_NOTIFICATION_BUSNAME, DEFAULT_QA_NOTIFICATION_SUBJECT_PREFIX +from lofar.qa.cep4_utils import * + +logger = logging.getLogger(__name__) + +#TODO: idea: convert periodically while observing? + +class QAService(OTDBBusListener): + """ + QAService listens on the lofar otdb message bus for NotificationMessages and starts qa processes + upon observation/pipeline completion. The qa processes convert MS (measurement sets) to hdf5 qa files, + and then starts generating plots from the hdf5 file. + """ + def __init__(self, + qa_notification_busname=DEFAULT_QA_NOTIFICATION_BUSNAME, + qa_notification_subject_prefix=DEFAULT_QA_NOTIFICATION_SUBJECT_PREFIX, + otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, + otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT, + broker=None, + qa_base_dir = '/data/qa', + **kwargs): + """ + Instantiate a QAService which listens on the given messagebus for Completion messages. + See also the superclass, OTDBBusListener. + :param string qa_notification_busname: valid Qpid address (default: DEFAULT_QA_NOTIFICATION_BUSNAME) + :param string qa_notification_subject: the subject to listen for. (default: DEFAULT_QA_NOTIFICATION_SUBJECT) + :param string otdb_notification_busname: valid Qpid address (default: DEFAULT_OTDB_NOTIFICATION_BUSNAME) + :param string otdb_notification_subject: the subject to listen for. (default: DEFAULT_OTDB_NOTIFICATION_SUBJECT) + :param broker: valid Qpid broker host (default: None, which means localhost) + """ + super(QAService, self).__init__(busname=otdb_notification_busname, + subject=otdb_notification_subject, + broker=broker, + **kwargs) + + self._qa_notification_subject_prefix = qa_notification_subject_prefix + self._send_bus = ToBus(qa_notification_busname, broker=broker) + self.qa_base_dir = qa_base_dir + + def start_listening(self, numthreads=None): + """ + start listening and open event _send_bus. This method is called in __enter__ when using 'with' context. + """ + super(QAService, self).start_listening(numthreads=numthreads) + self._send_bus.open() + + def stop_listening(self): + """ + stop listening and close event _send_bus. This method is called in __exit__ when using 'with' context. + """ + super(QAService, self).stop_listening() + self._send_bus.close() + + def onObservationCompleting(self, otdb_id, modificationTime): + """ + this mehod is called automatically upon receiving a Completion NotificationMessage + :param int otdb_id: the task's otdb database id + :param datetime modificationTime: timestamp when the task's status changed to completing + :return: None + """ + logger.info("task with otdb_id %s completed.", otdb_id) + self.do_qa(otdb_id=otdb_id) + + def do_qa(self, otdb_id): + """ + try to do all qa (quality assurance) steps for the given otdb_id + resulting in an h5 MS-extract file and inspection plots + :param int otdb_id: observation/pipeline otdb id for which the conversion needs to be done. + :return: None + """ + hdf5_file_path = self._convert_ms2hdf5(otdb_id) + if hdf5_file_path: + self._cluster_h5_file(hdf5_file_path, otdb_id) + + plot_dir_path = self._create_plots_for_h5_file(hdf5_file_path, otdb_id) + + if plot_dir_path: + self._send_event_message('Finished', {'otdb_id': otdb_id, + 'hdf5_file_path': hdf5_file_path, + 'plot_dir_path': plot_dir_path}) + + def _send_event_message(self, subject_suffix, content): + try: + subject = '%s.%s' % (self._qa_notification_subject_prefix, subject_suffix) + msg = EventMessage(context=subject, content=content) + logger.info('sending event message %s: %s', subject, content) + self._send_bus.send(msg) + except Exception as e: + logger.error('Could not send event message: %s', e) + + def _convert_ms2hdf5(self, otdb_id): + """ + convert the MS for the given otdb_id to an h5 MS-extract file. + The conversion will run via ssh on cep4 with massive parellelization. + When running on cep4, it is assumed that a docker image called adder exists on head.cep4 + When running locally, it is assumed that ms2hdf5 is installed locally. + :param int otdb_id: observation/pipeline otdb id for which the conversion needs to be done. + :return string: path to the generated h5 file. + """ + try: + logger.info('trying to convert MS uv dataset with otdb_id %s if any', otdb_id) + + cmd = ['ms2hdf5', '-o', str(otdb_id), '--cep4', '-p', '-20'] + + # define default h5 filename use default cep4 qa output dir + h5_filename = 'L%s.MS_extract.h5' % otdb_id + h5_dir_path = os.path.join(self.qa_base_dir, 'ms_extract') + cmd += ['--output_dir', h5_dir_path] + cmd += ['--output_filename', h5_filename] + + # wrap the command in a cep4 docker ssh call + cmd = wrap_command_for_docker(cmd, 'adder', 'latest') + cmd = wrap_command_in_cep4_available_cpu_node_with_lowest_load_ssh_call(cmd) + + logger.info('starting ms2hdf5, executing: %s', ' '.join(cmd)) + + if call(cmd) == 0: + hdf5_path = os.path.join(h5_dir_path, h5_filename) + logger.info('converted uv dataset with otdb_id %s to hdf5 file %s', otdb_id, hdf5_path) + self._send_event_message('ConvertedMS2Hdf5', {'otdb_id': otdb_id, 'hdf5_file_path': hdf5_path}) + return hdf5_path + else: + msg = 'could not convert dataset with otdb_id %s' % otdb_id + logger.error(msg) + self._send_event_message('Error', {'otdb_id': otdb_id, 'message': msg}) + + except Exception as e: + logging.exception('error in _convert_ms2hdf5: %s', e) + self._send_event_message('Error', {'otdb_id': otdb_id, 'message': e.message}) + return None + + def _create_plots_for_h5_file(self, hdf5_path, otdb_id=None): + """ + create plots for the given h5 file. The plots are created via an ssh call to cep4 + where the plots are created in parallel in the docker image. + :param hdf5_path: the full path to the hdf5 file for which we want the plots. + :param otdb_id: the otdb_id of the converted observation/pipeline (is used for logging only) + :return: the full directory path to the directory containing the created plots. + """ + try: + #use default cep4 qa output dir. + plot_dir_path = os.path.join(self.qa_base_dir, 'inspectionplots') + + cmd = ['plot_hdf5_dynamic_spectra', + '-o %s' % (plot_dir_path,), + '--mixed', + '-n', '0', + '--force', + hdf5_path] + + # wrap the command in a cep4 ssh call to docker container + cmd = wrap_command_for_docker(cmd, 'adder', 'latest') + cmd = wrap_command_in_cep4_available_cpu_node_with_lowest_load_ssh_call(cmd) + + logger.info('generating plots for otdb_id %s, executing: %s', otdb_id, ' '.join(cmd)) + + if call(cmd) == 0: + plot_dir_path = os.path.join(plot_dir_path, 'L%s' % otdb_id) + logger.info('generated plots for otdb_id %s in %s', otdb_id, plot_dir_path) + + self._send_event_message('CreatedInspectionPlots', {'otdb_id': otdb_id, + 'hdf5_file_path': hdf5_path, + 'plot_dir_path': plot_dir_path}) + + return plot_dir_path + else: + msg = 'could not generate plots for otdb_id %s' % otdb_id + logger.error(msg) + self._send_event_message('Error', {'otdb_id': otdb_id, 'message': msg}) + except Exception as e: + logging.exception('error in _create_plots_for_h5_file: %s', e) + self._send_event_message('Error', {'otdb_id': otdb_id, 'message': e.message}) + return None + + + def _cluster_h5_file(self, hdf5_path, otdb_id=None): + """ + Try to cluster the baselines based on visibilities in the h5 file + using the clustering docker image developed by e-science. + This method assumes the adder_clustering docker image is available on cep4. If not, or if anything else + goes wrong, then the qa steps can just continue on the un-clustered h5 file. + The docker image can be build from the source on github: + https://github.com/NLeSC/lofar-predictive-maintenance + This is a private repo until the project has been published. At astron, jorrit has access. + In the future, we might incorporate the clustering code from the github repo in to the LOFAR source tree. + :param hdf5_path: the full path to the hdf5 file for which we want the plots. + :param otdb_id: the otdb_id of the converted observation/pipeline (is used for logging only) + :return: None + """ + try: + # the command to cluster the given h5 file (executed in the e-science adder docker image) + cmd = ['cluster_this.py', hdf5_path] + cmd = wrap_command_for_docker(cmd, 'adder_clustering', 'latest') + cmd = wrap_command_in_cep4_head_node_ssh_call(cmd) + + logger.info('clustering hdf5 file %s otdb_id %s, executing: %s', hdf5_path, otdb_id, ' '.join(cmd)) + + if call(cmd) == 0: + logger.info('clustered hdf5 file %s otdb_id %s', hdf5_path, otdb_id) + + self._send_event_message('Clustered', {'otdb_id': otdb_id, + 'hdf5_file_path': hdf5_path}) + else: + msg = 'could not cluster hdf5 file %s otdb_id %s' % (hdf5_path, otdb_id) + logger.error(msg) + self._send_event_message('Error', {'otdb_id': otdb_id, 'message': msg}) + except Exception as e: + logging.exception('error in _cluster_h5_file: %s', e) + self._send_event_message('Error', {'otdb_id': otdb_id, 'message': e.message}) + return None + + +def main(): + """ + Run the qa service program with commandline arguments. + """ + + # Check the invocation arguments + parser = OptionParser("%prog [options]", + description='run the qa_service which listens for observations/pipelines finished events on ' + 'the bus and then starts the QA (Quality Assurance) processes to convert MS to ' + 'hdf5 files and generate inspection plots.') + group = OptionGroup(parser, 'QPid Messaging options') + group.add_option('-q', '--broker', dest='broker', type='string', default='localhost', help='Address of the qpid broker, default: %default') + group.add_option("--otdb_notification_busname", dest="otdb_notification_busname", type="string", + default=DEFAULT_OTDB_NOTIFICATION_BUSNAME, + help="Bus or queue where the OTDB notifications are published. [default: %default]") + group.add_option("--otdb_notification_subject", dest="otdb_notification_subject", type="string", + default=DEFAULT_OTDB_NOTIFICATION_SUBJECT, + help="Subject of OTDB notifications on otdb_notification_busname. [default: %default]") + parser.add_option_group(group) + (options, args) = parser.parse_args() + + #config logging + logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO) + + #start the qa service + with QAService(otdb_notification_busname=options.otdb_notification_busname, + otdb_notification_subject=options.otdb_notification_subject, + broker=options.broker): + #loop and wait for messages or interrupt. + waitForInterrupt() + +if __name__ == '__main__': + main() diff --git a/QA/QA_Service/test/CMakeLists.txt b/QA/QA_Service/test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7d5201dab79068a633d73ef93fd0f87e0a00302b --- /dev/null +++ b/QA/QA_Service/test/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# $Id$ +include(LofarCTest) + +lofar_add_test(t_qa_service) + + diff --git a/QA/QA_Service/test/t_qa_service.py b/QA/QA_Service/test/t_qa_service.py new file mode 100755 index 0000000000000000000000000000000000000000..98ded88a4c58fdc82cd0e86ba5014ee694f6c29b --- /dev/null +++ b/QA/QA_Service/test/t_qa_service.py @@ -0,0 +1,475 @@ +#!/usr/bin/env python + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +try: + from qpid.messaging import Connection + from qpid.messaging.exceptions import * + from qpidtoollibs import BrokerAgent +except ImportError: + print 'Cannot run test without qpid tools' + print 'Please source qpid profile' + exit(3) + +import unittest +import uuid +from threading import Event +import shutil +import mock + +import logging +logger = logging.getLogger(__name__) + +from lofar.qa.service.qa_service import QAService +from lofar.qa.service.QABusListener import * +from lofar.qa.hdf5_io import * +from lofar.messaging.messagebus import ToBus +from lofar.messaging.messages import EventMessage +from lofar.sas.otdb.config import DEFAULT_OTDB_NOTIFICATION_SUBJECT + + +# the tests below test is multi threaded (even multi process) +# define a QABusListener-derivative to handle synchronization (set the *_events) +class SynchronizingQABusListener(QABusListener): + """ + the tests below test is multi threaded (even multi process) + this QABusListener-derivative handles synchronization (set the *_events) + and stores the msg_content results for expected result checking + """ + def __init__(self, busname): + super(SynchronizingQABusListener, self).__init__(busname=busname) + self.converted_event = Event() + self.clustered_event = Event() + self.plotted_event = Event() + self.finished_event = Event() + self.error_event = Event() + + def onConvertedMS2Hdf5(self, msg_content): + self.converted_msg_content = msg_content + self.converted_event.set() + + def onCreatedInspectionPlots(self, msg_content): + self.plotted_msg_content = msg_content + self.plotted_event.set() + + def onFinished(self, msg_content): + self.finished_msg_content = msg_content + self.finished_event.set() + + def onClustered(self, msg_content): + self.clustered_msg_content = msg_content + self.clustered_event.set() + + def onError(self, msg_content): + self.error_msg_content = msg_content + self.error_event.set() + + +class TestQAService(unittest.TestCase): + """ + Tests for the QAService class + """ + def setUp(self): + """ + quite complicated setup to setup test qpid exhanges + and mock away ssh calls to cep4 + and mock away dockerized commands + """ + # setup broker connection + self.connection = Connection.establish('127.0.0.1') + self.broker = BrokerAgent(self.connection) + + # add test service exchange + self.TEST_UUID = uuid.uuid1() + self.busname = 'test-lofarbus-%s' % (self.TEST_UUID) + self.broker.addExchange('topic', self.busname) + + self.TEST_OTDB_ID = 999999 + + # where to store the test results + self.TEST_DIR = '/tmp/qa_service_%s' % self.TEST_UUID + self.TEST_H5_FILE = 'L%s.MS_extract.h5' % (self.TEST_OTDB_ID,) + self.TEST_H5_PATH = os.path.join(self.TEST_DIR, 'ms_extract', self.TEST_H5_FILE) + + # mock the calls to ssh cep4 and docker + def mocked_wrap_command_for_docker(cmd, image_name=None, image_label=None): + logger.info('mocked_wrap_command_for_docker returning original command: %s', ' '.join(cmd)) + return cmd + + def mocked_wrap_command_in_cep4_head_node_ssh_call(cmd): + logger.info('mocked_wrap_command_in_cep4_head_node_ssh_call returning original command: %s', ' '.join(cmd)) + return cmd + + def mocked_wrap_command_in_cep4_cpu_node_ssh_call(cmd, cpu_node_nr, via_head): + logger.info('mocked_wrap_command_in_cep4_cpu_node_ssh_call for cpu node nr %s via head=%s ' \ + 'returning original command: %s', cpu_node_nr, via_head, ' '.join(cmd)) + return cmd + + def mocked_get_cep4_available_cpu_nodes(): + logger.info('mocked_get_cep4_available_cpu_nodes for returning empty node list') + return [] + + # we need to patch the wrap_command_in_cep4_head_node_ssh_call function from module lofar.qa.service.qa_service, + # because that's were it's imported and used. + # (and not the original lofar.qa.cep4_utils.wrap_command_for_docker) + wrap_command_for_docker_patcher = mock.patch('lofar.qa.service.qa_service.wrap_command_for_docker') + self.addCleanup(wrap_command_for_docker_patcher.stop) + self.wrap_command_for_docker_mock = wrap_command_for_docker_patcher.start() + self.wrap_command_for_docker_mock.side_effect = mocked_wrap_command_for_docker + + wrap_command_in_cep4_head_node_ssh_call_patcher = mock.patch('lofar.qa.service.qa_service.wrap_command_in_cep4_head_node_ssh_call') + self.addCleanup(wrap_command_in_cep4_head_node_ssh_call_patcher.stop) + self.wrap_command_in_cep4_head_node_ssh_call_mock = wrap_command_in_cep4_head_node_ssh_call_patcher.start() + self.wrap_command_in_cep4_head_node_ssh_call_mock.side_effect = mocked_wrap_command_in_cep4_head_node_ssh_call + + wrap_command_in_cep4_cpu_node_ssh_call_patcher = mock.patch('lofar.qa.cep4_utils.wrap_command_in_cep4_cpu_node_ssh_call') + self.addCleanup(wrap_command_in_cep4_cpu_node_ssh_call_patcher.stop) + self.wrap_command_in_cep4_cpu_node_ssh_call_mock = wrap_command_in_cep4_cpu_node_ssh_call_patcher.start() + self.wrap_command_in_cep4_cpu_node_ssh_call_mock.side_effect = mocked_wrap_command_in_cep4_cpu_node_ssh_call + + get_cep4_available_cpu_nodes_patcher = mock.patch('lofar.qa.cep4_utils.get_cep4_available_cpu_nodes') + self.addCleanup(get_cep4_available_cpu_nodes_patcher.stop) + self.get_cep4_available_cpu_nodes_mock = get_cep4_available_cpu_nodes_patcher.start() + self.get_cep4_available_cpu_nodes_mock.side_effect = mocked_get_cep4_available_cpu_nodes + + # mock the ssh_cmd_list function, and check in each test if it was NOT called, + # because that is what we are trying to prevent by mocking the other methods. + # So, in principle it should not be needed to mock it, + # but when there is some error in the code/test/mock we would like to prevent + # an accidental ssh call to cep4 + ssh_cmd_list_patcher = mock.patch('lofar.qa.cep4_utils.ssh_cmd_list') + self.addCleanup(ssh_cmd_list_patcher.stop) + self.ssh_cmd_list_mock = ssh_cmd_list_patcher.start() + + def tearDown(self): + logger.info('removing test dir: %s', self.TEST_DIR) + shutil.rmtree(self.TEST_DIR, ignore_errors=True) + + # cleanup test bus and exit + if self.broker: + logger.info('removing test bus: %s', self.busname) + self.broker.delExchange(self.busname) + if self.connection: + self.connection.close() + + def send_otdb_task_completing_event(self): + """helper method: create a ToBus and send a completing EventMessage""" + with ToBus(self.busname) as sender: + msg = EventMessage(context=DEFAULT_OTDB_NOTIFICATION_SUBJECT, + content={"treeID": self.TEST_OTDB_ID, + "state": 'completing', + "time_of_change": datetime.utcnow()}) + sender.send(msg) + + def test_01_qa_service_for_expected_behaviour(self): + """ + This test starts a QAService, triggers a test observation completing event, + and tests if the generated h5 file and plots are as expected. + It is an end-to-end test which does not check the intermediate results. It is assumed that + the intermediate steps are tested in other tests/modules. + """ + + logger.info(' -- test_01_qa_service_for_expected_behaviour -- ') + + # override the mock behaviour from setUp for this specific test + def mocked_wrap_command_for_docker(cmd, image_name=None, image_label=None): + # replace the ms2hdf5 command which runs normally in the docker container + # by a call to the create_test_hypercube which fakes the ms2hdf5 conversion for this test. + if 'ms2hdf5' in cmd: + # the create_test_hypercube executable should be available in the PATH environment + create_test_hypercube_path = 'create_test_hypercube' + + mocked_cmd = [create_test_hypercube_path, '-s 4', '-S 8', '-t 16', + '-o', str(self.TEST_OTDB_ID), self.TEST_H5_PATH] + logger.info("""mocked_wrap_command_for_docker returning mocked command to create test h5 file: '%s', instead of original command: '%s' """, + ' '.join(mocked_cmd), ' '.join(cmd)) + return mocked_cmd + + if 'cluster_this.py' in cmd or 'plot_hdf5_dynamic_spectra' in cmd: + # replace the cluster command which runs normally in the docker container + # by a call to bash true, so the 'cluster_this' call returns 0 exit code + mocked_cmd = ['true'] + logger.info("""mocked_wrap_command_for_docker returning mocked command: '%s', instead of original command: '%s' """, + ' '.join(mocked_cmd), ' '.join(cmd)) + return mocked_cmd + + logger.info("""mocked_wrap_command_for_docker returning original command: '%s' """, ' '.join(cmd)) + return cmd + + self.wrap_command_for_docker_mock.side_effect = mocked_wrap_command_for_docker + + # start the QAService (the object under test) + with QAService(qa_notification_busname=self.busname, + otdb_notification_busname=self.busname, + qa_base_dir=self.TEST_DIR): + + # start listening for QA event messages from the QAService + with SynchronizingQABusListener(self.busname) as qa_listener: + # trigger a qa process by sending otdb task completing event + # this will result in the QAService actually doing its magic + self.send_otdb_task_completing_event() + + # start waiting until ConvertedMS2Hdf5 event message received (or timeout) + qa_listener.converted_event.wait(30) + + # ConvertedMS2Hdf5 event message should have been sent, so converted_event should have been set + self.assertTrue(qa_listener.converted_event.is_set()) + + # check the converted_msg_content + self.assertTrue('otdb_id' in qa_listener.converted_msg_content) + self.assertTrue('hdf5_file_path' in qa_listener.converted_msg_content) + + + # start waiting until Clustered event message received (or timeout) + qa_listener.clustered_event.wait(30) + + + # Clustered event message should have been sent, so clustered_event should have been set + self.assertTrue(qa_listener.clustered_event.is_set()) + + # check the clustered_msg_content + self.assertTrue('otdb_id' in qa_listener.clustered_msg_content) + self.assertTrue('hdf5_file_path' in qa_listener.clustered_msg_content) + + + # start waiting until CreatedInspectionPlots event message received (or timeout) + qa_listener.plotted_event.wait(30) + + # CreatedInspectionPlots event message should have been sent, so plotted_event should have been set + self.assertTrue(qa_listener.plotted_event.is_set()) + + # check the plotted_msg_content + self.assertTrue('otdb_id' in qa_listener.plotted_msg_content) + self.assertTrue('hdf5_file_path' in qa_listener.plotted_msg_content) + self.assertTrue('plot_dir_path' in qa_listener.plotted_msg_content) + + # check if the output dirs/files exist + self.assertTrue(os.path.exists(qa_listener.plotted_msg_content['hdf5_file_path'])) + logger.info(qa_listener.plotted_msg_content['plot_dir_path']) + + #DISABLED checks for plots because we mock plot_hdf5_dynamic_spectra + #self.assertTrue(os.path.exists(qa_listener.plotted_msg_content['plot_dir_path'])) + #plot_file_names = [f for f in os.listdir(qa_listener.plotted_msg_content['plot_dir_path']) + #if f.endswith('png')] + #self.assertEqual(10, len(plot_file_names)) + + #auto_correlation_plot_file_names = [f for f in plot_file_names + #if 'auto' in f] + #self.assertEqual(4, len(auto_correlation_plot_file_names)) + + #complex_plot_file_names = [f for f in plot_file_names + #if 'complex' in f] + #self.assertEqual(6, len(complex_plot_file_names)) + + # start waiting until QAFinished event message received (or timeout) + qa_listener.finished_event.wait(30) + + # QAFinished event message should have been sent, so finished_event should have been set + self.assertTrue(qa_listener.finished_event.is_set()) + + # check the result_msg_content + self.assertTrue('otdb_id' in qa_listener.finished_msg_content) + self.assertTrue('hdf5_file_path' in qa_listener.finished_msg_content) + self.assertTrue('plot_dir_path' in qa_listener.finished_msg_content) + + self.wrap_command_for_docker_mock.assert_called() + self.wrap_command_in_cep4_cpu_node_ssh_call_mock.assert_called() + self.wrap_command_in_cep4_head_node_ssh_call_mock.assert_called() + self.get_cep4_available_cpu_nodes_mock.assert_called() + self.ssh_cmd_list_mock.assert_not_called() + + def test_02_qa_service_for_error_in_ms2hdf5(self): + """ + This test starts a QAService, triggers a test observation completing event, + and tests if the conversion from MS to hdf5 fails (by intention). + It is an end-to-end test which does not check the intermediate results. It is assumed that + the intermediate steps are tested in other tests/modules. + """ + + logger.info(' -- test_02_qa_service_for_error_in_ms2hdf5 -- ') + + def mocked_wrap_command_for_docker(cmd, image_name=None, image_label=None): + if 'ms2hdf5' in cmd: + # replace the ms2hdf5 command which runs normally in the docker container + # by a call to bash false, so the 'ms2hdf5' call returns non-0 exit code + mocked_cmd = ['false'] + logger.info('mocked_wrap_command_for_docker returning mocked erroneous command: %s', mocked_cmd) + return mocked_cmd + + logger.info('mocked_wrap_command_for_docker returning original command: %s', cmd) + return cmd + + self.wrap_command_for_docker_mock.side_effect = mocked_wrap_command_for_docker + + # start the QAService (the object under test) + with QAService(qa_notification_busname=self.busname, + otdb_notification_busname=self.busname, + qa_base_dir=self.TEST_DIR): + # start listening for QA event messages from the QAService + with SynchronizingQABusListener(self.busname) as qa_listener: + # trigger a qa process by sending otdb task completing event + # this will result in the QAService actually doing its magic + self.send_otdb_task_completing_event() + + # start waiting until QAFinished event message received (or timeout) + qa_listener.error_event.wait(30) + + # ------------ + # Error event message should have been sent, so error_event should have been set + self.assertTrue(qa_listener.error_event.is_set()) + + self.assertTrue('otdb_id' in qa_listener.error_msg_content) + self.assertTrue('message' in qa_listener.error_msg_content) + + self.wrap_command_for_docker_mock.assert_called() + self.wrap_command_in_cep4_cpu_node_ssh_call_mock.assert_called() + self.get_cep4_available_cpu_nodes_mock.assert_called() + self.ssh_cmd_list_mock.assert_not_called() + + def test_03_qa_service_for_error_in_creating_plots(self): + """ + This test starts a QAService, triggers a test observation completing event, + and tests if the conversion from MS to hdf5 works, + but the plot generation fails (by intention). + It is an end-to-end test which does not check the intermediate results. It is assumed that + the intermediate steps are tested in other tests/modules. + """ + + logger.info(' -- test_03_qa_service_for_error_in_creating_plots -- ') + + # mock the calls to ssh cep4 and docker + def mocked_wrap_command_for_docker(cmd, image_name=None, image_label=None): + if 'ms2hdf5' in cmd: + # replace the ms2hdf5 command which runs normally in the docker container + # by a call to the create_test_hypercube which fakes the ms2hdf5 conversion for this test. + create_test_hypercube_path = os.path.normpath(os.path.join(os.getcwd(), '../../../bin/create_test_hypercube')) + mocked_cmd = [create_test_hypercube_path, '-s 4', '-S 8', '-t 16', + '-o', str(self.TEST_OTDB_ID), self.TEST_H5_PATH] + logger.info('mocked_wrap_command_for_docker returning mocked command to create test h5 file: %s', + ' '.join(mocked_cmd)) + return mocked_cmd + + if 'cluster_this.py' in cmd: + # replace the cluster command which runs normally in the docker container + # by a call to bash true, so the 'cluster_this' call returns 0 exit code + mocked_cmd = ['true'] + logger.info('mocked_wrap_command_for_docker returning mocked command: %s', mocked_cmd) + return mocked_cmd + + + if 'plot_hdf5_dynamic_spectra' in cmd: + # replace the ms2hdf5 command which runs normally in the docker container + # by a call to bash false, so the 'ms2hdf5' call returns non-0 exit code + mocked_cmd = ['false'] + logger.info('mocked_wrap_command_for_docker returning mocked erroneous command: %s', mocked_cmd) + return mocked_cmd + + logger.info('mocked_wrap_command_for_docker returning original command: %s', ' '.join(cmd)) + return cmd + + self.wrap_command_for_docker_mock.side_effect = mocked_wrap_command_for_docker + + # start the QAService (the object under test) + with QAService(qa_notification_busname=self.busname, + otdb_notification_busname=self.busname, + qa_base_dir=self.TEST_DIR): + # start listening for QA event messages from the QAService + with SynchronizingQABusListener(self.busname) as qa_listener: + # trigger a qa process by sending otdb task completing event + # this will result in the QAService actually doing its magic + self.send_otdb_task_completing_event() + + # start waiting until ConvertedMS2Hdf5 event message received (or timeout) + qa_listener.converted_event.wait(30) + + # ConvertedMS2Hdf5 event message should have been sent, so converted_event should have been set + self.assertTrue(qa_listener.converted_event.is_set()) + + # check the result_msg_content + self.assertTrue('otdb_id' in qa_listener.converted_msg_content) + self.assertTrue('hdf5_file_path' in qa_listener.converted_msg_content) + + # start waiting until Error event message received (or timeout) + qa_listener.error_event.wait(30) + + # Error event message should have been sent, so error_event should have been set + self.assertTrue(qa_listener.error_event.is_set()) + + # check the result_msg_content + self.assertTrue('otdb_id' in qa_listener.error_msg_content) + self.assertTrue('message' in qa_listener.error_msg_content) + + self.wrap_command_for_docker_mock.assert_called() + self.wrap_command_in_cep4_cpu_node_ssh_call_mock.assert_called() + self.get_cep4_available_cpu_nodes_mock.assert_called() + self.ssh_cmd_list_mock.assert_not_called() + + def test_04_qa_service_for_error_ssh(self): + """ + This test starts a QAService, triggers a test observation completing event, + and tests if conversion fails due to an intentionally failing (mocked) ssh call. + It is an end-to-end test which does not check the intermediate results. It is assumed that + the intermediate steps are tested in other tests/modules. + """ + + logger.info(' -- test_04_qa_service_for_error_ssh -- ') + + def mocked_wrap_command_in_cep4_cpu_node_ssh_call(cmd, cpu_node_nr, via_head): + logger.info('mocked_wrap_command_in_cep4_cpu_node_ssh_call for cpu node nr %s via head=%s ' \ + 'returning call to bash false', cpu_node_nr, via_head) + return ['false', ';'] + + self.wrap_command_in_cep4_cpu_node_ssh_call_mock.side_effect = mocked_wrap_command_in_cep4_cpu_node_ssh_call + + # start the QAService (the object under test) + with QAService(qa_notification_busname=self.busname, + otdb_notification_busname=self.busname, + qa_base_dir=self.TEST_DIR): + # start listening for QA event messages from the QAService + with SynchronizingQABusListener(self.busname) as qa_listener: + # trigger a qa process by sending otdb task completing event + # this will result in the QAService actually doing its magic + self.send_otdb_task_completing_event() + + # start waiting until Error event message received (or timeout) + qa_listener.error_event.wait(30) + + # Error event message should have been sent, so error_event should have been set + self.assertTrue(qa_listener.error_event.is_set()) + + # check the result_msg_content + self.assertTrue('otdb_id' in qa_listener.error_msg_content) + self.assertTrue('message' in qa_listener.error_msg_content) + + self.wrap_command_for_docker_mock.assert_called() + self.wrap_command_in_cep4_cpu_node_ssh_call_mock.assert_called() + self.get_cep4_available_cpu_nodes_mock.assert_called() + self.ssh_cmd_list_mock.assert_not_called() + + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO) + + try: + Connection.establish('127.0.0.1') + except ConnectError: + logger.warning("cannot connect to qpid broker. skipping test...") + exit(3) + + #run the unit tests + unittest.main() diff --git a/QA/QA_Service/test/t_qa_service.run b/QA/QA_Service/test/t_qa_service.run new file mode 100755 index 0000000000000000000000000000000000000000..e70f75e609424c6bdbd89ca326982b2c61312297 --- /dev/null +++ b/QA/QA_Service/test/t_qa_service.run @@ -0,0 +1,23 @@ +#!/bin/bash + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# Run the unit test +source python-coverage.sh +python_coverage_test "*QA*" t_qa_service.py + diff --git a/QA/QA_Service/test/t_qa_service.sh b/QA/QA_Service/test/t_qa_service.sh new file mode 100755 index 0000000000000000000000000000000000000000..5276ddeed8424eec743b96a02ae6e3c47a87b848 --- /dev/null +++ b/QA/QA_Service/test/t_qa_service.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +./runctest.sh t_qa_service diff --git a/SAS/DataManagement/DataManagementCommon/path.py b/SAS/DataManagement/DataManagementCommon/path.py index fa801d79c7fc0dcf3406360836de40eaeff05bdf..8c721c964b8ca50e7d864d5351d8024978629241 100644 --- a/SAS/DataManagement/DataManagementCommon/path.py +++ b/SAS/DataManagement/DataManagementCommon/path.py @@ -167,8 +167,8 @@ class PathResolver: # get the subdirectories of the given path cmd = ['lfs', 'find', '--type', 'd', '--maxdepth', '1', path.rstrip('/')] hostname = socket.gethostname() - if not 'mgmt0' in hostname: - cmd = ['ssh', 'lofarsys@mgmt01.cep4.control.lofar'] + cmd + if not 'head' in hostname: + cmd = ['ssh', 'lofarsys@head.cep4.control.lofar'] + cmd logger.debug(' '.join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = proc.communicate() @@ -189,8 +189,8 @@ class PathResolver: def pathExists(self, path): cmd = ['lfs', 'ls', path] hostname = socket.gethostname() - if not 'mgmt0' in hostname: - cmd = ['ssh', 'lofarsys@mgmt01.cep4.control.lofar'] + cmd + if not 'head' in hostname: + cmd = ['ssh', 'lofarsys@head.cep4.control.lofar'] + cmd logger.debug(' '.join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = proc.communicate() diff --git a/SAS/DataManagement/StorageQueryService/cache.py b/SAS/DataManagement/StorageQueryService/cache.py index 45b61063f80c212cfe989cfdcae17201b022bc45..7892a26440c7f705368c7e275c7a07459b6f8f41 100644 --- a/SAS/DataManagement/StorageQueryService/cache.py +++ b/SAS/DataManagement/StorageQueryService/cache.py @@ -16,6 +16,7 @@ from lofar.messaging import EventMessage, ToBus from lofar.common.util import humanreadablesize from lofar.common.datetimeutils import format_timedelta from lofar.sas.datamanagement.storagequery.diskusage import getDiskUsageForPath as du_getDiskUsageForPath +from lofar.sas.datamanagement.storagequery.diskusage import getOTDBIdFromPath from lofar.sas.datamanagement.storagequery.diskusage import DiskUsage from lofar.sas.datamanagement.common.datamanagementbuslistener import DataManagementBusListener from lofar.sas.otdb.OTDBBusListener import OTDBBusListener @@ -29,7 +30,7 @@ from lofar.mom.momqueryservice.config import DEFAULT_MOMQUERY_BUSNAME, DEFAULT_M logger = logging.getLogger(__name__) -MAX_CACHE_ENTRY_AGE = datetime.timedelta(hours=8) +MAX_CACHE_ENTRY_AGE = datetime.timedelta(hours=3*24) class CacheManager: def __init__(self, @@ -50,7 +51,7 @@ class CacheManager: self.otdb_listener = OTDBBusListener(busname=otdb_notification_busname, subject=otdb_notification_subject, broker=broker, - numthreads=2) + numthreads=1) self.otdb_listener.onObservationAborted = self.onObservationAborted self.otdb_listener.onObservationFinished = self.onObservationFinished @@ -58,7 +59,7 @@ class CacheManager: self.dm_listener = DataManagementBusListener(busname=dm_notification_busname, subjects=dm_notification_prefix + '*', broker=broker, - numthreads=2) + numthreads=1) self.dm_listener.onTaskDeleted = self.onTaskDeleted @@ -99,8 +100,9 @@ class CacheManager: try: if os.path.exists(self._cache_path): with open(self._cache_path, 'r') as file: + cache_from_disk = eval(file.read().strip()) #slow! with self._cacheLock: - self._cache = eval(file.read().strip()) + self._cache = cache_from_disk if not isinstance(self._cache, dict): self._cache = {'path_du_results': {}, 'otdb_id2path': {} } if 'path_du_results' not in self._cache: @@ -116,7 +118,7 @@ class CacheManager: def _writeCacheToDisk(self): try: # only persist (a subset of) the cache to disk every once in a while. - if datetime.datetime.utcnow() - self._last_cache_write_timestamp > datetime.timedelta(minutes=0.2): + if datetime.datetime.utcnow() - self._last_cache_write_timestamp > datetime.timedelta(minutes=5): tmp_path = '/tmp/tmp_storagequery_cache.py' cache_str = '' with self._cacheLock: @@ -127,7 +129,7 @@ class CacheManager: # Furthermore, once a deeper level du results is stored in the memory cache, then it is also available for fast lookup. # We just don't store these deep levels on disk. sub_cache = { path:du_result for path,du_result in self._cache['path_du_results'].items() - if self.getDepthToProjectsDir(path) <= 1 } + if self.getDepthToProjectsDir(path) <= 1 and du_result.get('found') } cache_str = str(sub_cache) with open(tmp_path, 'w') as file: @@ -153,10 +155,9 @@ class CacheManager: if path in path_cache: otdb_id = du_result.get('otdb_id') - if not du_result['found']: - #make sure disk_usage is set when not found - du_result['disk_usage'] = 0 - du_result['disk_usage_readable'] = '0B' + # if still None, try to get the id from the path + if otdb_id is None: + otdb_id = getOTDBIdFromPath(path) if not path in path_cache or path_cache[path]['disk_usage'] != du_result['disk_usage']: # update the cache entry, even when no du result found, @@ -164,13 +165,20 @@ class CacheManager: logger.info('updating cache entry: %s', du_result) path_cache[path] = du_result - path_cache[path]['cache_timestamp'] = datetime.datetime.utcnow() - path_cache[path]['needs_update'] = False - if otdb_id != None: otdb_id2path_cache[otdb_id] = path - self._writeCacheToDisk() + if not du_result['found']: + # even when the du for the path is not found, + # keep a copy in the cache for fast lookup by clients + # Make sure the size is 0 + du_result['disk_usage'] = 0 + du_result['disk_usage_readable'] = humanreadablesize(0) + + path_cache[path]['cache_timestamp'] = datetime.datetime.utcnow() + path_cache[path]['needs_update'] = False + + self._writeCacheToDisk() self._sendDiskUsageChangedNotification(path, du_result['disk_usage'], otdb_id) @@ -199,28 +207,34 @@ class CacheManager: try: def addSubDirectoriesToCache(directory): depth = self.getDepthToProjectsDir(directory) + MAX_SCAN_DEPTH=2 #depth=0 : projects #depth=1 : projects/<project> #depth=2 : projects/<project>/<obs> #depth=3 : projects/<project>/<obs>/<sub_dir> - if depth > 3: + if depth > MAX_SCAN_DEPTH: return + add_empty_du_result_to_cache = False with self._cacheLock: path_cache = self._cache['path_du_results'] - if not directory in path_cache: - logger.info('tree scan: adding \'%s\' with empty disk_usage to cache which will be du\'ed later', directory) - empty_du_result = {'found': True, 'disk_usage': None, 'path': directory, 'name': directory.split('/')[-1]} - self._updateCache(empty_du_result) + add_empty_du_result_to_cache = not directory in path_cache - if directory in path_cache: - # mark cache entry for directory to be updated - path_cache[directory]['needs_update'] = True + if add_empty_du_result_to_cache: + logger.info('tree scan: adding \'%s\' with empty disk_usage to cache which will be du\'ed later', directory) + empty_du_result = {'found': True, 'disk_usage': None, 'path': directory, 'name': directory.split('/')[-1]} + self._updateCache(empty_du_result) + + with self._cacheLock: + path_cache = self._cache['path_du_results'] + if directory in path_cache: + # mark cache entry for directory to be updated + path_cache[directory]['needs_update'] = True if not self._cacheThreadsRunning: return - if depth < 3: + if depth < MAX_SCAN_DEPTH: logger.info('tree scan: scanning \'%s\'', directory) sd_result = self.disk_usage.path_resolver.getSubDirectories(directory) @@ -251,12 +265,12 @@ class CacheManager: updateable_entries = old_entries + needs_update_entries - if updateable_entries: - logger.info('%s old cache entries need to be updated, #age:%s #needs_update:%s', - len(updateable_entries), - len(old_entries), - len(needs_update_entries)) + logger.info('%s old cache entries need to be updated, #age:%s #needs_update:%s', + len(updateable_entries), + len(old_entries), + len(needs_update_entries)) + if updateable_entries: # sort them oldest to newest, 'needs_update' paths first def compareFunc(entry1, entry2): if entry1.get('needs_update') and not entry2.get('needs_update'): @@ -264,6 +278,13 @@ class CacheManager: if not entry1.get('needs_update') and entry2.get('needs_update'): return 1 + depth1 = self.getDepthToProjectsDir(entry1['path']) + depth2 = self.getDepthToProjectsDir(entry2['path']) + + if depth1 != depth2: + # lower level dirs are sorted in front of higher level dirs + return depth2 - depth1 + if entry1['cache_timestamp'] < entry2['cache_timestamp']: return -1 if entry1['cache_timestamp'] > entry2['cache_timestamp']: @@ -274,34 +295,45 @@ class CacheManager: cacheUpdateStart = datetime.datetime.utcnow() - for i, cache_entry in enumerate(updateable_entries): + #do a quick update of each entry by applying the sum of the subdirs to the path's du result... + #this make a best guess immediately available... + for cache_entry in updateable_entries: try: path = cache_entry.get('path') if path: - logger.info('_updateOldEntriesInCache: examining entry %s/%s. timestamp:%s age:%s needs_update:%s path: \'%s\'', - i, - len(updateable_entries), - cache_entry['cache_timestamp'], - format_timedelta(now - cache_entry['cache_timestamp']), - cache_entry.get('needs_update', False), - path) - - #do a quick update of the entry sy applying the sum of the subdirs to the path's du result... - #this make a best guess immedeiately available... self._updatePathCacheEntryToSubDirTotal(path, False) + except Exception as e: + logger.error(str(e)) - #...and in the mean time, du a full update from disk, which might be (really) slow. - result = du_getDiskUsageForPath(path) - logger.debug('trying to update old entry in cache: %s', result) - self._updateCache(result) + for i, cache_entry in enumerate(updateable_entries): + try: + # it might be that the cache_entry was already updated via another way + # so only update it if still to old or needs_update + now = datetime.datetime.utcnow() + if now - cache_entry['cache_timestamp'] > MAX_CACHE_ENTRY_AGE or cache_entry.get('needs_update', False): + path = cache_entry.get('path') + if path: + logger.info('_updateOldEntriesInCache: examining entry %s/%s. timestamp:%s age:%s needs_update:%s path: \'%s\'', + i, + len(updateable_entries), + cache_entry['cache_timestamp'], + format_timedelta(now - cache_entry['cache_timestamp']), + cache_entry.get('needs_update', False), + path) + + #du a full update from disk, which might be (really) slow. + result = du_getDiskUsageForPath(path) + logger.debug('trying to update old entry in cache: %s', result) + self._updateCache(result) except Exception as e: logger.error(str(e)) if not self._cacheThreadsRunning: + logger.info('exiting _updateCacheThread') return if datetime.datetime.utcnow() - cacheUpdateStart > datetime.timedelta(minutes=10): - # break out of cache update loop if full update takes more than 10min + # break out of cache update loop if full update takes more than 1min # next loop we'll start with the oldest cache entries again logger.info('skipping remaining %s old cache entries updates, they will be updated next time', len(updateable_entries)-i) break @@ -309,31 +341,35 @@ class CacheManager: #update the CEP4 capacities in the RADB once in a while... self._updateCEP4CapacitiesInRADB() - #sleep for a minute, (or stop if requested) - for i in range(60): + #sleep for a while, (or stop if requested) + for i in range(10): sleep(1) if not self._cacheThreadsRunning: + logger.info('exiting _updateCacheThread') return except Exception as e: - logger.error(str(e)) + logger.exception(str(e)) def _updatePathCacheEntryToSubDirTotal(self, path, force_update=False): - sd_result = self.disk_usage.path_resolver.getSubDirectories(path) + with self._cacheLock: + path_cache_result = self._cache['path_du_results'].get(path) - if sd_result['found']: - subdir_paths = [os.path.join(path, sd) for sd in sd_result['sub_directories']] + if path_cache_result: + path_depth = path.count('/') + all_dirs = self._cache['path_du_results'].keys() + subdir_paths = [sdp for sdp in all_dirs + if sdp.startswith(path) and sdp.count('/') == path_depth+1] - subdir_du_results = [self.getDiskUsageForPath(sd, force_update=force_update) for sd in subdir_paths] - sum_du = sum([du['disk_usage'] for du in subdir_du_results]) + subdir_du_results = [self.getDiskUsageForPath(sd, force_update=force_update) for sd in subdir_paths] + valid_subdir_du_results = [du for du in subdir_du_results if du.get('disk_usage')] + sum_du = sum([du['disk_usage'] for du in valid_subdir_du_results]) - with self._cacheLock: - if path in self._cache['path_du_results']: - path_result = self._cache['path_du_results'][path] - path_result['disk_usage'] = sum_du - path_result['disk_usage_readable'] = humanreadablesize(sum_du) - path_result['needs_update'] = True - self._updateCache(path_result) + if sum_du > 0: + logger.info('_updatePathCacheEntryToSubDirTotal: setting disk usage for %s to sum of %s known cached subdirs of %s', + path, len(valid_subdir_du_results), humanreadablesize(sum_du)) + path_cache_result['disk_usage'] = sum_du + path_cache_result['disk_usage_readable'] = humanreadablesize(sum_du) def _updateCEP4CapacitiesInRADB(self): try: @@ -464,10 +500,8 @@ class CacheManager: scratch_path_du_result = self.getDiskUsageForPath(scratch_path, force_update=force_update) path_du_result['scratch_paths'][scratch_path] = scratch_path_du_result - self._updateCache(path_du_result) return path_du_result - self._updateCache(path_result) return {'found': False, 'path': path_result['path']} def getDiskUsageForTasks(self, radb_ids=None, mom_ids=None, otdb_ids=None, include_scratch_paths=True, force_update=False): @@ -491,8 +525,9 @@ class CacheManager: def getDiskUsageForPath(self, path, force_update=False): logger.info("cache.getDiskUsageForPath('%s', force_update=%s)", path, force_update) needs_cache_update = False - with self._cacheLock: - needs_cache_update |= path not in self._cache['path_du_results'] + if not force_update: + with self._cacheLock: + needs_cache_update |= path not in self._cache['path_du_results'] if needs_cache_update or force_update: logger.info("cache update needed for %s", path) diff --git a/SAS/DataManagement/StorageQueryService/diskusage.py b/SAS/DataManagement/StorageQueryService/diskusage.py index 67a96da340ce7dc21ccf8ff24d68f5cbf7526e49..cf8f15d507bf2b6f4e9a0d1d82b6f1e1f0ea6f7b 100644 --- a/SAS/DataManagement/StorageQueryService/diskusage.py +++ b/SAS/DataManagement/StorageQueryService/diskusage.py @@ -17,14 +17,26 @@ from lofar.mom.momqueryservice.config import DEFAULT_MOMQUERY_BUSNAME, DEFAULT_M logger = logging.getLogger(__name__) def getDiskUsageForPath(path): - logger.info('getDiskUsageForPath(\'%s\')', path) + # 20180829: until lustre has been updated and robinhood has been switched back on (in october) use normal du + return getDiskUsageForPath_du(path) + + result = getDiskUsageForPath_rbh_du(path) + + if not result.get('found') or result.get('nr_of_files', None) is None: + logger.info('getDiskUsageForPath(\'%s\') could not obtain valid robinhood result, trying normal du.', path) + result = getDiskUsageForPath_du(path) + + return result + +def getDiskUsageForPath_rbh_du(path): + logger.info('getDiskUsageForPath_rbh_du(\'%s\')', path) result = {'found': False, 'path': path, 'disk_usage': None, 'name': path.split('/')[-1] } cmd = ['rbh-du', '-bd', path] hostname = socket.gethostname() - if not 'mgmt0' in hostname: - cmd = ['ssh', 'lofarsys@mgmt01.cep4.control.lofar'] + cmd + if not 'head' in hostname: + cmd = ['ssh', 'lofarsys@head.cep4.control.lofar'] + cmd logger.info(' '.join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -56,24 +68,71 @@ def getDiskUsageForPath(path): if dir_lines: result['found'] = True result['disk_usage'] = 0 - result['nr_of_files'] = 0 - - try: - path_items = path.rstrip('/').split('/') - if len(path_items) >=3 and path_items[-1].startswith('L') and path_items[-1][1:].isdigit() and 'projects' in path_items[-3]: - logger.info('found path for otdb_id %s %s', path_items[-1][1:], path) - result['otdb_id'] = int(path_items[-1][1:]) - except Exception as e: - logger.error('Could not parse otdb_id from path %s %s', path, e) + result['nr_of_files'] = None else: logger.error(out + err) result['message'] = out result['disk_usage_readable'] = humanreadablesize(result['disk_usage']) - logger.info('getDiskUsageForPath(\'%s\') returning: %s', path, result) + otdb_id = getOTDBIdFromPath(path) + if otdb_id: + result['otdb_id'] = otdb_id + + logger.info('getDiskUsageForPath_rbh_du(\'%s\') returning: %s', path, result) return result +def getDiskUsageForPath_du(path): + logger.info('getDiskUsageForPath_du(\'%s\')', path) + + result = {'found': False, 'path': path, 'disk_usage': None, 'name': path.split('/')[-1] } + + cmd = ['du', '-bcs', path] + hostname = socket.gethostname() + if not 'head' in hostname: + cmd = ['ssh', 'lofarsys@head.cep4.control.lofar'] + cmd + logger.info(' '.join(cmd)) + + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = proc.communicate() + + if proc.returncode == 0: + # example of out + # 7025510839 /data/projects/HOLOG_WINDMILL_TESTS/L662734/uv/ + # 7025510839 total + + #parse out + lines = [l.strip() for l in out.split('\n')] + total_lines = [l for l in lines if 'total' in l] + if total_lines: + parts = [p.strip() for p in total_lines[0].split()] + if len(parts) == 2: + result['found'] = True + result['disk_usage'] = int(parts[0]) + result['nr_of_files'] = None + else: + logger.error(out + err) + result['message'] = out + + result['disk_usage_readable'] = humanreadablesize(result['disk_usage']) + + otdb_id = getOTDBIdFromPath(path) + if otdb_id: + result['otdb_id'] = otdb_id + + logger.info('getDiskUsageForPath_du(\'%s\') returning: %s', path, result) + return result + +def getOTDBIdFromPath(path): + try: + path_items = path.rstrip('/').split('/') + if len(path_items) >=3 and path_items[-1].startswith('L') and path_items[-1][1:].isdigit() and 'projects' in path_items[-3]: + logger.info('found path for otdb_id %s %s', path_items[-1][1:], path) + return int(path_items[-1][1:]) + except Exception as e: + logger.error('Could not parse otdb_id from path %s %s', path, e) + return None + def getDiskFreeSpaceForMountpoint(mountpoint=CEP4_DATA_MOUNTPOINT): logger.info('getDiskFreeSpaceForMountpoint(\'%s\')', mountpoint) @@ -81,8 +140,8 @@ def getDiskFreeSpaceForMountpoint(mountpoint=CEP4_DATA_MOUNTPOINT): cmd = ['df', mountpoint] hostname = socket.gethostname() - if not 'mgmt0' in hostname: - cmd = ['ssh', 'lofarsys@mgmt01.cep4.control.lofar'] + cmd + if not 'head' in hostname: + cmd = ['ssh', 'lofarsys@head.cep4.control.lofar'] + cmd logger.info(' '.join(cmd) + ' ...waiting for result...') proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) diff --git a/SAS/DataManagement/StorageQueryService/rpc.py b/SAS/DataManagement/StorageQueryService/rpc.py index 7192032deae37ba58db65d1d46a9bbf7f079d97e..e79c158746b2c2f64bea9d258c14e2c89249784e 100644 --- a/SAS/DataManagement/StorageQueryService/rpc.py +++ b/SAS/DataManagement/StorageQueryService/rpc.py @@ -12,8 +12,9 @@ logger = logging.getLogger(__name__) class StorageQueryRPC(RPCWrapper): def __init__(self, busname=DEFAULT_BUSNAME, servicename=DEFAULT_SERVICENAME, + timeout=18000, broker=None): - super(StorageQueryRPC, self).__init__(busname, servicename, broker, timeout=18000) + super(StorageQueryRPC, self).__init__(busname, servicename, broker, timeout=timeout) def _convertTimestamps(self, result): if isinstance(result, dict): diff --git a/SAS/DataManagement/StorageQueryService/service.py b/SAS/DataManagement/StorageQueryService/service.py index e027fec29e2a9ac92b22c044a4824b66dad57ee3..10f83a442c6a872d4e751c06ff47ac7217ee46de 100644 --- a/SAS/DataManagement/StorageQueryService/service.py +++ b/SAS/DataManagement/StorageQueryService/service.py @@ -67,7 +67,7 @@ def createService(busname=DEFAULT_BUSNAME, servicename=DEFAULT_SERVICENAME, brok busname=busname, broker=broker, use_service_methods=True, - numthreads=8, + numthreads=4, verbose=verbose, handler_args={'mountpoint': mountpoint, 'radb_busname':RADB_BUSNAME, diff --git a/SAS/ResourceAssignment/Common/CMakeLists.txt b/SAS/ResourceAssignment/Common/CMakeLists.txt index b6fd18fadafe49ea3687574c6f4541bfe2503267..7b3ba62ad2e7440917b29732cb594d0d79bf7d35 100644 --- a/SAS/ResourceAssignment/Common/CMakeLists.txt +++ b/SAS/ResourceAssignment/Common/CMakeLists.txt @@ -1,6 +1,6 @@ # $Id: CMakeLists.txt 30355 2014-11-04 13:46:05Z loose $ -lofar_package(RACommon 0.1 DEPENDS pyparameterset MoMQueryService ResourceAssignmentService ResourceAssigner) +lofar_package(RACommon 0.1 DEPENDS pyparameterset MoMQueryService ResourceAssignmentService ) include(PythonInstall) set(USE_PYTHON_COMPILATION Off) diff --git a/SAS/ResourceAssignment/Common/lib/specification.py b/SAS/ResourceAssignment/Common/lib/specification.py index da8ed2655143532eca4b3475b48ddea6afe902c2..611a04a1062822f00afe15ebc34e91cadb281d7c 100644 --- a/SAS/ResourceAssignment/Common/lib/specification.py +++ b/SAS/ResourceAssignment/Common/lib/specification.py @@ -255,11 +255,11 @@ class Specification: storagemanager = self.momquery.get_storagemanager(self.mom_id) if storagemanager: self.storagemanager = storagemanager - self.logger.info("Found a task mom_id=%s with storagemanager=%s from MoM: %s", + self.logger.info("Found a task mom_id=%s with storagemanager=%s from MoM", self.mom_id, self.storagemanager) except KeyError as ke: # set default - self.logger.exception("read_storagemanager_from_mom: " + str(ke), exc_info=False) + # self.logger.exception("read_storagemanager_from_mom: " + str(ke), exc_info=False) self.logger.info("Storagemanager not found in MoM") # If the key exists in the VIC tree in OTDB, we use that instead if read_from_otdb has been called. diff --git a/SAS/ResourceAssignment/Common/test/test_specification.py b/SAS/ResourceAssignment/Common/test/test_specification.py index 719e3b7516de22e950353048384973355e3b76b9..c9d7f826f73a841724c912f729897a7e92a3da03 100755 --- a/SAS/ResourceAssignment/Common/test/test_specification.py +++ b/SAS/ResourceAssignment/Common/test/test_specification.py @@ -648,8 +648,9 @@ class General(unittest.TestCase): self.assertEqual(self.specification.cluster, 'CEP4') self.otdbrpc_mock.taskGetSpecification.assert_any_call(otdb_id=559779) - def test_read_from_otdb_with_storagewriter(self): - """ Verify that _get_parset_from_OTDB gets the partset for a interferometer_observation task """ + def test_read_from_otdb_with_get_storagewriter_mocked(self): + """ Verify that _get_parset_from_OTDB gets the partset for a + preprocessing pipeline task if get_storage_writer returns a storagemanager """ # Arrange input_parset_file = os.path.join(self.data_sets_dir, "tSpecification.in_preprocessing") pipeline_specification_tree = parameterset(input_parset_file).dict() @@ -675,6 +676,31 @@ class General(unittest.TestCase): self.assertEqual(call_prefix, 'ObsSW.') self.assertEqual(self.specification.storagemanager, "dysco") + + def test_read_from_otdb_with_storagewriter(self): + """ Verify that _get_parset_from_OTDB gets the partset for a for a + preprocessing pipeline task with a storagemanager defined """ + # Arrange + input_parset_file = os.path.join(self.data_sets_dir, "tSpecification.in_preprocessing") + parset_file = open(input_parset_file) + pipeline_specification_tree = {} + for line in parset_file.readlines(): + if '=' in line: + key, value = line.split('=') + pipeline_specification_tree[key.strip()] = value.strip() + self.otdbrpc_mock.taskGetSpecification.return_value = {'otdb_id': 562063, 'specification': pipeline_specification_tree} + self.radbrpc_mock.getResourceGroupNames.return_value = [{'name': 'CEP4'}] + + # Act + predecessors = self.specification.read_from_otdb(562063) + + # Assert + #TODO not sure what more to assert here + self.assertEqual(predecessors, [{'source': 'otdb', 'id': 562059}]) + self.assertEqual(self.specification.cluster, 'CEP4') + self.otdbrpc_mock.taskGetSpecification.assert_any_call(otdb_id=562063) + self.assertEqual(self.specification.storagemanager, "dysco") + def test_convert_id_to_otdb_ids_other(self): """ Verify that _get_parset_from_OTDB gets the partset for a interferometer_observation task """ diff --git a/SAS/ResourceAssignment/RATaskSpecifiedService/lib/RATaskSpecified.py b/SAS/ResourceAssignment/RATaskSpecifiedService/lib/RATaskSpecified.py index ddf092625bd4d8bc295fd0e7b1599966a34deab8..5de6a6f8f5ba3003d353669dd46764df6ecbbe23 100755 --- a/SAS/ResourceAssignment/RATaskSpecifiedService/lib/RATaskSpecified.py +++ b/SAS/ResourceAssignment/RATaskSpecifiedService/lib/RATaskSpecified.py @@ -80,6 +80,12 @@ class RATaskSpecified(OTDBBusListener): self.momrpc.close() self.otdbrpc.close() + # This is mainly to trigger the propagation of misc field values through read_from_mom + # and then sending them to the RA to OTDB Service in the resource assigner. + # Might need to be a separate service if we take on more mom-otdb-adapter function. + def onObservationApproved(self, main_id, modificationTime): + self.createAndSendSpecifiedTask(main_id, 'approved') + def onObservationPrescheduled(self, main_id, modificationTime): self.createAndSendSpecifiedTask(main_id, 'prescheduled') diff --git a/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py b/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py index dd83184f0620934c0637b5153778e5cdb659bbb8..7992fb2fbe1b3fed5b84473e5a4bbc107bd66f9a 100755 --- a/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py +++ b/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py @@ -503,7 +503,7 @@ class RAtoOTDBTranslator(): ## control software, giving everyone a puzzled expression on their face and a big headache when figuring ## out why the system was sometimes behaving so funny... # FIXME: please find a better way to do this or remove this hack when not necessary any more! - if project_name in ['IPS_Commissioning', 'LC6_001', 'LC7_001', 'LC8_001', 'LC9_001']: + if project_name in ['IPS_Commissioning', 'LC6_001', 'LC7_001', 'LC8_001', 'LC9_001', 'LT10_001']: logging.info("CreateParset: Overwriting inspectionProgram parset key for dynspec") parset[PREFIX+'ObservationControl.OnlineControl.inspectionProgram'] = '/data/home/lofarsys/dynspec/scripts/inspection-dynspec-observation.sh' @@ -512,6 +512,6 @@ class RAtoOTDBTranslator(): storagemanager = mom_info.storagemanager if storagemanager is not None: # should be "" or "dysco" logging.info("Adding storagemanager to parset: %s" % storagemanager) - parset[PREFIX+"ObservationControl.PythonControl.DPPP.storagemanager.name"] = storagemanager + parset[PREFIX+"ObservationControl.PythonControl.DPPP.msout.storagemanager.name"] = storagemanager return parset diff --git a/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/test/t_translator.py b/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/test/t_translator.py index c24f3cb7ffaed2af3c0426655c3e3b0bd0165db8..f71fc532fcba5f915d595d65c8035a807ccd4673 100644 --- a/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/test/t_translator.py +++ b/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/test/t_translator.py @@ -29,7 +29,7 @@ class RAtoOTDBPropagatorTest(unittest.TestCase): value = RAtoOTDBTranslator().CreateParset(otdb_id, ra_info, project_name, mom_info) # assert: - self.assertEqual(value[PREFIX+"ObservationControl.PythonControl.DPPP.storagemanager.name"], storagemanager) + self.assertEqual(value[PREFIX+"ObservationControl.PythonControl.DPPP.msout.storagemanager.name"], storagemanager) if __name__ == "__main__": diff --git a/SAS/ResourceAssignment/ResourceAssigner/lib/rabuslistener.py b/SAS/ResourceAssignment/ResourceAssigner/lib/rabuslistener.py index bbeec2501294045f96587ce043fdb7ec67768db6..74bfd9330f42de6d85ab074670be2a4c4cceb39b 100644 --- a/SAS/ResourceAssignment/ResourceAssigner/lib/rabuslistener.py +++ b/SAS/ResourceAssignment/ResourceAssigner/lib/rabuslistener.py @@ -64,11 +64,18 @@ class RABusListener(AbstractBusListener): self.onTaskScheduled(msg.content) elif msg.subject == '%sTaskConflict' % self.subject_prefix: self.onTaskConflict(msg.content) + elif msg.subject == '%sTaskApproved' % self.subject_prefix: + self.onTaskApproved(msg.content) elif msg.subject == '%sTaskError' % self.subject_prefix: self.onTaskError(msg.content) else: logger.error("RABusListener.handleMessage: unknown subject: %s" %str(msg.subject)) + def onTaskApproved(self, task_ids): + '''onTaskApproved is called upon receiving a TaskApproved message. + :param task_ids: a dict containing radb_id, mom_id and otdb_id''' + pass + def onTaskScheduled(self, task_ids): '''onTaskScheduled is called upon receiving a TaskScheduled message. :param task_ids: a dict containing radb_id, mom_id and otdb_id''' diff --git a/SAS/ResourceAssignment/ResourceAssigner/lib/resource_availability_checker.py b/SAS/ResourceAssignment/ResourceAssigner/lib/resource_availability_checker.py index bb2ff9118223e7202cb3834bbeb75458ab1ce6eb..067d2fcbcc55a2ca2eea1e63d523ac993c5de29d 100644 --- a/SAS/ResourceAssignment/ResourceAssigner/lib/resource_availability_checker.py +++ b/SAS/ResourceAssignment/ResourceAssigner/lib/resource_availability_checker.py @@ -377,11 +377,12 @@ class ResourceAvailabilityChecker(object): res_group = self.resource_group_relations[gids[i]] for rid in res_group['resource_ids']: if rid in available_recources: - type_id = available_recources[rid]['type_id'] - if type_id in needed_resources_by_type_id and available_recources[rid]['active'] and \ - available_recources[rid]['available_capacity'] > 0: - resources[type_id] = available_recources[rid] - type_ids_seen.add(type_id) + available_recource = available_recources[rid] + type_id = available_recource['type_id'] + if type_id in needed_resources_by_type_id and available_recource['active']: + if available_recource['available_capacity'] > 0: + resources[type_id] = available_recource + type_ids_seen.add(type_id) else: logger.debug("requested resource id %s is not available/claimable", rid) diff --git a/SAS/ResourceAssignment/ResourceAssigner/lib/schedulers.py b/SAS/ResourceAssignment/ResourceAssigner/lib/schedulers.py index 79fbe86715533d1d27506e5cbddd6e06ad21ae64..d522baeee0d0f58af80130bd77054d11b466ed78 100644 --- a/SAS/ResourceAssignment/ResourceAssigner/lib/schedulers.py +++ b/SAS/ResourceAssignment/ResourceAssigner/lib/schedulers.py @@ -1,4 +1,5 @@ from datetime import datetime, timedelta +from copy import deepcopy from lofar.common.cache import cache @@ -78,9 +79,9 @@ class BasicScheduler(object): # Any resources that we cannot schedule on for some reason self.unusable_resources = [] - # Duration must be non-negative or weird stuff will happen - if self.starttime > self.endtime: - raise ValueError('BasicScheduler, starttime=%s should be >= endtime=%s', starttime, endtime) + # Duration must be positive or weird stuff will happen + if self.starttime >= self.endtime: + raise ValueError('BasicScheduler, starttime=%s should be >= endtime=%s', self.starttime, self.endtime) def allocate_resources(self): """ @@ -111,8 +112,11 @@ class BasicScheduler(object): allocation_successful = True except ScheduleException, e: - logger.exception("BasicScheduler: scheduling threw exception: %s", e) + logger.exception("%s: scheduling threw ScheduleException: %s", self.__class__.__name__, e) self._handle_schedule_exception() + except Exception, e: + logger.exception("%s: scheduling threw unhandled exception: %s", self.__class__.__name__, e) + raise return allocation_successful @@ -414,13 +418,17 @@ class StationScheduler(BasicScheduler): wanted_estimates = self._get_station_estimates() # Try to schedule all of the stations. - remaining_estimates = self._schedule_resources(wanted_estimates, available_resources, need_all=False) + # make a (deep)copy of available_resources and use that, + # because _schedule_resources modifies the available_capacity of the tested wanted stations. + # we rollback the radb later in this method, so we should keep the original available_resources intact. + available_resources_copy = deepcopy(available_resources) + remaining_estimates = self._schedule_resources(wanted_estimates, available_resources_copy, need_all=False) # See if our allocation meets the minimal criteria. Note that some stations may be partially allocated, # we do not count those as claimable. unclaimable_stations = set([e["station"] for e in remaining_estimates]) if not self._requirements_satisfied_without(expanded_requirements, unclaimable_stations): - raise ScheduleException("Could not allocate enough stations") + raise ScheduleException("Could not allocate enough stations. unclaimable_stations=%s" % (unclaimable_stations,)) allocated_stations = set([e["station"] for e in wanted_estimates if e not in remaining_estimates]) @@ -542,7 +550,21 @@ class PriorityScheduler(StationScheduler): logger.debug("kill_task_in_radb: task: %s", task) new_endtime = max(task['starttime'], datetime.utcnow()) - self.radb.updateTaskAndResourceClaims(task_id=task['id'], task_status='aborted', endtime=new_endtime, commit=False) + self.radb.updateTaskAndResourceClaims(task_id=task['id'], task_status='aborted', + endtime=new_endtime, commit=False) + + def _unschedule_task_in_radb(self, task): + """ + unschedule the task by setting its status to approved in RADB + and by releasing the task's claims (set them to tentative) + + :param task: task to 'set' to approved + """ + + logger.info("_unschedule_task_in_radb: task: %s", task) + + self.radb.updateTaskAndResourceClaims(task_id=task['id'], task_status='approved', + claim_status='tentative', commit=False) def _propose_potential_starttime(self, newstarttime): """ @@ -565,24 +587,30 @@ class PriorityScheduler(StationScheduler): """ # try to resolve the conflict, and mark any resources unavailable if resolution fails - tasks_to_kill = self._get_resolvable_conflicting_tasks(conflict_claim) - - for t in tasks_to_kill: - logger.info("_resolve_conflict: found task %s to kill for conflict_claim: %s", t, conflict_claim) - - # add it to the list to actually kill later - self.tasks_to_kill.append(t) - - self._kill_task_in_radb(t) + tasks_to_move_out_of_the_way = self._get_resolvable_conflicting_tasks(conflict_claim) + now = datetime.utcnow() + + for t in tasks_to_move_out_of_the_way: + logger.info("_resolve_conflict: found task %s to move out of the way for claim in conflict: %s", t, conflict_claim) + + # kill running task, unschedule otherwise in order to move the blocking task out of the way + if (t['starttime'] <= now and t['endtime'] >= now) or t['status'] == 'active': + # add it to the list to actually kill later + self.tasks_to_kill.append(t) + # and update the administration in the radb + self._kill_task_in_radb(t) + else: + # move the blocking task out of the way + self._unschedule_task_in_radb(t) - if not tasks_to_kill: + if not tasks_to_move_out_of_the_way: logger.info("_resolve_conflict: no tasks to kill for conflict_claim %s", conflict_claim) # record which resources cannot be used anymore, because we can't kill anything on it self.unusable_resources.append(conflict_claim["resource_id"]) # Return True if we killed anything - return tasks_to_kill != [] + return tasks_to_move_out_of_the_way != [] def _get_conflicting_claims_and_tasks(self, conflict_claim): """ @@ -615,7 +643,7 @@ class PriorityScheduler(StationScheduler): """ if conflict_claim["resource_type_id"] == self.resource_availability_checker.resource_types['storage']: - raise ScheduleException("Could not resolve conflict on storage resource") + raise ScheduleException("Cannot resolve conflict on storage resource") # find all conflicting claims & which tasks they belong to conflicting_claims, conflicting_tasks = self._get_conflicting_claims_and_tasks(conflict_claim) diff --git a/SAS/ResourceAssignment/ResourceAssigner/test/radb_common_testing.py b/SAS/ResourceAssignment/ResourceAssigner/test/radb_common_testing.py new file mode 120000 index 0000000000000000000000000000000000000000..83b3ca170d204c92cc70aa0f8d716393976b2899 --- /dev/null +++ b/SAS/ResourceAssignment/ResourceAssigner/test/radb_common_testing.py @@ -0,0 +1 @@ +../../ResourceAssignmentDatabase/tests/radb_common_testing.py \ No newline at end of file diff --git a/SAS/ResourceAssignment/ResourceAssigner/test/t_schedulers.py b/SAS/ResourceAssignment/ResourceAssigner/test/t_schedulers.py index 94a43b5c872a56fec7f6466aa01b92b79baad94c..a4d2b439f8380849e9358e5ccb54b7077ab318d0 100755 --- a/SAS/ResourceAssignment/ResourceAssigner/test/t_schedulers.py +++ b/SAS/ResourceAssignment/ResourceAssigner/test/t_schedulers.py @@ -21,11 +21,9 @@ import unittest import mock - import datetime -from copy import deepcopy -from lofar.sas.resourceassignment.resourceassigner.resource_availability_checker import CouldNotFindClaimException +from lofar.sas.resourceassignment.resourceassigner.resource_availability_checker import ResourceAvailabilityChecker, CouldNotFindClaimException from lofar.sas.resourceassignment.resourceassigner.schedulers import ScheduleException from lofar.sas.resourceassignment.resourceassigner.schedulers import BasicScheduler @@ -33,375 +31,228 @@ from lofar.sas.resourceassignment.resourceassigner.schedulers import StationSche from lofar.sas.resourceassignment.resourceassigner.schedulers import PriorityScheduler from lofar.sas.resourceassignment.resourceassigner.schedulers import DwellScheduler +from lofar.sas.resourceassignment.database.radb import _FETCH_ONE + import logging logger = logging.getLogger(__name__) -class FakeRADatabase(object): - """ Mimic an RA Database, assuming claims overlap fully or not at all. """ - - def __init__(self, resource_capacity): - # database - self.tasks = {} - self.claims = {} - self.next_claim_id = 0 - - # cache committed state here - self.committed_tasks = {} - self.committed_claims = {} - - # maximum capacity of our resource - self.resource_capacity = resource_capacity - - self.resources = [{"id": x} for x in xrange(6)] - - def addTask(self, id, task): - self.tasks[id] = task - self.tasks[id]["id"] = id - self.tasks[id]["specification_id"] = id - self.claims[id] = [] - - self.committed = False - self.rolled_back = False - - def _fits(self, claim): - usage = 0 - resource_id = claim["resource_id"] - - for claims in self.claims.values(): - for c in claims: - overlap_in_time = claim["starttime"] < c["endtime"] and claim["endtime"] > c["starttime"] - overlap_in_resource = c["resource_id"] == resource_id - - if c["status"] != "conflict" and \ - c["id"] != claim.get("id",None) and \ - overlap_in_resource and \ - overlap_in_time: - usage += c["claim_size"] - - return usage + claim["claim_size"] <= self.resource_capacity - - """ Methods to mock radb. """ - - def getTask(self, id): - return self.tasks[id] - - def getTasks(self, task_ids): - return [t for id, t in self.tasks.iteritems() if id in task_ids] - - def updateSpecification(self, specification_id, starttime=None, endtime=None, content=None, cluster=None, - commit=True): - - for task_id, task in self.tasks.iteritems(): - if self.tasks[task_id]["specification_id"] == specification_id: - if starttime is not None: - self.tasks[task_id]["starttime"] = starttime - if endtime is not None: - self.tasks[task_id]["endtime"] = endtime - - return True - - def getResources(self, *args, **kwargs): - # we model six resources, can expand if needed - return self.resources - - def getResourceGroupMemberships(self): - # We model 4 stations: 2 remote, and 2 core - station_groups = { - 100: { - "resource_group_id": 100, - "resource_group_name": "ALL", - "resource_group_type": "", - "child_ids": [101, 102] - }, - 101: { - "resource_group_id": 101, - "resource_group_name": "CORE", - "resource_group_type": "", - "child_ids": [1, 2] - }, - 102: { - "resource_group_id": 102, - "resource_group_name": "REMOTE", - "resource_group_type": "", - "child_ids": [3, 4] - } - } - - def station_name(nr): - if nr < 3: - return "CS%03d" % nr - else: - return "RS%03d" % nr - - stations = { - station_nr: { - "resource_group_id": station_nr, - "resource_group_name": station_name(station_nr), - "resource_group_type": "station", - "child_ids": [], - } for station_nr in xrange(1,5) - } - - resources = station_groups; - resources.update(stations) - - return {"groups": resources} - - def getResourceClaims(self, task_ids, status, extended): - for tid in task_ids: - assert tid in self.tasks - assert tid in self.claims - - return [claim for tid in task_ids for claim in self.claims[tid] if claim["status"] == status] - - def deleteResourceClaims(self, claim_ids, commit): - logger.info("Deleting claims %s", claim_ids) - - for tid in self.claims: - self.claims[tid] = [c for c in self.claims[tid] if c["id"] not in claim_ids] - - def updateResourceClaims(self, where_task_ids, status, commit): - # this is what we support - assert status == "claimed" - - for task_id in where_task_ids: - # can't update conflict claims to claimed - for c in self.claims[task_id]: - if c["status"] != "tentative": - return False - - # update statusses - for c in self.claims[task_id]: - c["status"] = "claimed" - - return True - - def updateTaskAndResourceClaims(self, task_id, starttime=None, endtime=None, **kwargs): - if starttime: - logger.info("Setting starttime of task %s to %s", task_id, starttime) - - self.tasks[task_id]["starttime"] = starttime - - for c in self.claims[task_id]: - c["starttime"] = starttime - - if endtime: - logger.info("Setting endtime of task %s to %s", task_id, endtime) - - self.tasks[task_id]["endtime"] = endtime - - for c in self.claims[task_id]: - c["endtime"] = endtime - - def insertResourceClaims(self, task_id, claims, *args, **kwargs): - for c in claims: - # check whether tasks do not get two claims of the same resource - assert c["resource_id"] not in [d["resource_id"] for d in self.claims[task_id]], "Resource %s claimed twice by task %s" % (c["resource_id"], task_id) - - # derive claim status - c["status"] = "tentative" if self._fits(c) else "conflict" - - # assign ids - c["task_id"] = task_id - c["id"] = self.next_claim_id - self.next_claim_id += 1 - - # add it to our claim list - self.claims[task_id].append(c) - - claim_ids = [c["id"] for c in claims] - logger.info("Added claims %s", claim_ids) - - return claim_ids - - def get_overlapping_claims(self, claim_id, claim_status="claimed"): - overlapping_claims = [] - - logger.info('get_overlapping_claims(claim_id=%s, claim_status=%s) self.claims content:', claim_id, claim_status) - for claim_id, claim_value in self.claims.iteritems(): - logger.info('%s: %s', claim_id, claim_value) - - # all claims overlap - claims_for_id = self.claims[claim_id] - for claim in claims_for_id: - overlapping_claims += [c for _, claims in self.claims.iteritems() for c in claims if - # overlap in space - c["resource_id"] == claim["resource_id"] and - # "conflict" claims do not actually claim resources - c["status"] == claim_status and - # be antireflexive - c["id"] != claim["id"]] - - return overlapping_claims - - def commit(self): - logger.info("Commit") - - self.rolled_back = False - self.committed = True - self.committed_claims = deepcopy(self.claims) - self.committed_tasks = deepcopy(self.tasks) - - def rollback(self): - logger.info("Rollback") - - self.rolled_back = True - self.claims = deepcopy(self.committed_claims) - self.tasks = deepcopy(self.committed_tasks) - +import radb_common_testing -class FakeResourceAvailabilityChecker(object): - resource_types = { - "storage": 0, - "bandwidth": 1} +def setUpModule(): + return radb_common_testing.setUpModule() - requested_resources = [] - available_resources = [] +def tearDownModule(): + return radb_common_testing.tearDownModule() - def get_is_claimable(self, requested_resources, available_resources): - self.requested_resources = requested_resources - self.available_resources = available_resources +class SchedulerTest(radb_common_testing.RADBCommonTest): + """ create test radb postgres instance, and use that in a ResourceAvailabilityChecker""" - if not available_resources: - raise CouldNotFindClaimException + def setUp(self): + super(SchedulerTest, self).setUp() + self.resource_availability_checker = ResourceAvailabilityChecker(self.radb) + self._enforce_limited_station_group_list() - # fullfil one request at a time to keep the code simple. We map it on - # the first available resource - r = requested_resources[0] + def _enforce_limited_station_group_list(self): + # for test simplicity, create a simple virtual instrument which makes debugging easier. + # this is safe, because we are working on a test database - # use resource 0, or resource #stationnr - rid = int(r["station"][2:]) if "station" in r else available_resources[0]["id"] - if rid not in [x["id"] for x in available_resources]: - raise CouldNotFindClaimException + LIMITED_STATION_GROUP_LIST = ('CS001', 'CS002', 'RS106', 'RS205') - rtype = r["resource_types"].keys()[0] - return [{ - 'requested_resources': [r], - 'claim_size': r["resource_types"][rtype], - 'resource_id': rid, - 'resource_type_id': self.resource_types[rtype] - }] + unwanted_resource_group_ids = [rg['id'] for rg in self.radb.getResourceGroups() + if rg['type'] == 'station' and rg['name'] not in LIMITED_STATION_GROUP_LIST] + self.radb._executeQuery("DELETE FROM virtual_instrument.resource_group rg WHERE rg.id in (%s)" % ( + ', '.join([str(id) for id in unwanted_resource_group_ids])),) + self.radb.commit() -class SchedulerTest(unittest.TestCase): - """ Setup mechanics to use a FakeRADatabase and FakeResourceAvailabilityChecker to simulate a system with - one resource at one point in time. """ - def mock_ra_database(self): - self.fake_ra_database = FakeRADatabase(resource_capacity=1024) +class BasicSchedulerTest(SchedulerTest): + def new_task(self, mom_otdb_id=0, starttime=None, endtime=None): + """ + insert a new test specification and task into the testing radb + :param mom_otdb_id: optional mom/otdb id + :param starttime: optional starttime if None, then datetime(2017, 1, 1, 1, 0, 0) is used + :param endtime: optional endtime if None, then datetime(2017, 1, 1, 2, 0, 0) is used + :return: the new radb's task id + """ - ra_database_patcher = mock.patch('lofar.sas.resourceassignment.resourceassigner.schedulers.RADatabase') - self.addCleanup(ra_database_patcher.stop) - self.ra_database_mock = ra_database_patcher.start() - self.ra_database_mock.return_value = self.fake_ra_database + if starttime is None: + starttime = datetime.datetime(2017, 1, 1, 1, 0, 0) - def mock_resource_availability_checker(self): - self.fake_resource_availability_checker = FakeResourceAvailabilityChecker() + if endtime is None: + endtime = datetime.datetime(2017, 1, 1, 2, 0, 0) - def setUp(self): - self.mock_ra_database() - self.mock_resource_availability_checker() + return self.radb.insertSpecificationAndTask(mom_id=mom_otdb_id, + otdb_id=mom_otdb_id, + task_status='approved', + task_type='observation', + starttime=starttime, + endtime=endtime, + content='', + cluster='CEP4')['task_id'] + def get_specification_tree(self, task_id): + return {} -class BasicSchedulerTest(SchedulerTest): - def new_task(self, task_id): - self.fake_ra_database.addTask(task_id, { - "starttime": datetime.datetime(2017, 1, 1, 1, 0, 0), - "endtime": datetime.datetime(2017, 1, 1, 2, 0, 0), - }) + def new_scheduler(self, task_id, resource_estimator=None, specification_tree=None): + """factory method returning a scheduler class specific for this test class. + In this case, in the BasicSchedulerTest class, it returns a new BasicScheduler.""" + return self.new_basic_scheduler(task_id, resource_estimator, specification_tree) - self.fake_ra_database.commit() - self.fake_ra_database.committed = False # dont confuse subsequent checks on whether the scheduler committed + def new_basic_scheduler(self, task_id, resource_estimator=None, specification_tree=None): + """return a new BasicScheduler""" + return BasicScheduler(task_id, + specification_tree if specification_tree else self.get_specification_tree(task_id), + resource_estimator if resource_estimator else lambda _:[], + self.resource_availability_checker, self.radb.dbcreds) - def get_specification_tree(self, task_id): - return {} + def get_station_bandwidth_max_capacity(self): + resource_CS001bw0 = [r for r in self.radb.getResources(resource_types="bandwidth", include_availability=True) + if r['name']=='CS001bw0'][0] + return resource_CS001bw0['total_capacity'] - def new_scheduler(self, task_id, resource_estimator): - return BasicScheduler(task_id, self.get_specification_tree(task_id), resource_estimator, self.fake_resource_availability_checker, None) + def get_CEP4_storage_max_capacity(self): + resource_cep4_storage = [r for r in self.radb.getResources(resource_types="storage", include_availability=True) + if r['name']=='CEP4_storage:/data'][0] + return resource_cep4_storage['total_capacity'] def test_schedule_task(self): """ Whether a task (that fits) can be scheduled. """ # Resources we need - self.new_task(0) - estimates = [{ 'resource_types': {'bandwidth': 512} }] - allocation_succesful = self.new_scheduler(0, lambda _: estimates).allocate_resources() + task_id = self.new_task(0) + estimates = [{ 'resource_types': {'bandwidth': 512}, + "root_resource_group": "CS001", + "resource_count": 1 } ] + scheduler = self.new_scheduler(task_id, lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() # Allocation must succeed and be committed self.assertTrue(allocation_succesful) - self.assertTrue(self.fake_ra_database.committed) - self.assertFalse(self.fake_ra_database.rolled_back) + self.assertTrue(scheduler.radb.committed) + self.assertFalse(scheduler.radb.rolled_back) # Claim must be present in database - claims = self.fake_ra_database.claims[0] + claims = self.radb.getResourceClaims(task_ids=task_id, extended=True) self.assertTrue(claims) self.assertEqual(len(claims), 1) # Claim must be valid claim = claims[0] - task = self.fake_ra_database.tasks[0] + task = self.radb.getTask(task_id) - self.assertEqual(claim["status"], "claimed") - self.assertEqual(claim["starttime"], task["starttime"]) - self.assertEqual(claim["endtime"], task["endtime"]) - self.assertEqual(claim["claim_size"], 512) - self.assertEqual(claim["resource_type_id"], FakeResourceAvailabilityChecker.resource_types["bandwidth"]) + self.assertEqual(claim["status"], "claimed") + self.assertEqual(claim["starttime"], task["starttime"]) + self.assertEqual(claim["endtime"], task["endtime"]) + self.assertEqual(claim["claim_size"], 512) + self.assertEqual(claim["resource_type_name"], "bandwidth") def test_multiple_resources(self): """ Whether a task (that fits) can be scheduled. """ # Resources we need - self.new_task(0) - estimates = [{ 'resource_types': {'bandwidth': 512} }, - { 'resource_types': {'bandwidth': 512} }] - allocation_succesful = self.new_scheduler(0, lambda _: estimates).allocate_resources() + task_id = self.new_task(0) + estimates = [{ 'resource_types': {'bandwidth': 512}, + "root_resource_group": "CS001", + "resource_count": 1 }, + {'resource_types': {'bandwidth': 512}, + "root_resource_group": "CS002", + "resource_count": 1} ] + + scheduler = self.new_scheduler(task_id, lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() + self.assertTrue(scheduler.radb.committed) + self.assertFalse(scheduler.radb.rolled_back) # Allocation must succeed self.assertTrue(allocation_succesful) + # Claim must be present in database + claims = self.radb.getResourceClaims(task_ids=task_id, extended=True) + self.assertTrue(claims) + self.assertEqual(len(claims), 2) + def test_schedule_too_large_task(self): """ Whether a task with too large claims will be rejected by the scheduler. """ # Resources we need - self.new_task(0) - estimates = [{ 'resource_types': {'bandwidth': 2048} }] - allocation_succesful = self.new_scheduler(0, lambda _: estimates).allocate_resources() + task_id = self.new_task(0) + estimates = [{ 'resource_types': {'bandwidth': 1e99}, + "root_resource_group": "CS001", + "resource_count": 1 } ] + scheduler = self.new_scheduler(task_id, lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() if self.__class__ == BasicSchedulerTest: # This inheritence of test is not ideal # Allocation must fail, and commit called so we get a conflicted state self.assertFalse(allocation_succesful) - self.assertTrue(self.fake_ra_database.committed) - self.assertFalse(self.fake_ra_database.rolled_back) + self.assertTrue(scheduler.radb.committed) + self.assertFalse(scheduler.radb.rolled_back) else: # Allocation must fail, and rollback called self.assertFalse(allocation_succesful) - self.assertFalse(self.fake_ra_database.committed) - self.assertTrue(self.fake_ra_database.rolled_back) + self.assertFalse(scheduler.radb.committed) + self.assertTrue(scheduler.radb.rolled_back) def test_schedule_two_tasks_too_large_task(self): """ Whether two tasks that fit individually but not together will be rejected by the scheduler. """ + max_bw_cap = self.get_station_bandwidth_max_capacity() + # First task must succeed - self.new_task(0) - estimates = [{ 'resource_types': {'bandwidth': 512} }] - allocation_succesful = self.new_scheduler(0, lambda _: estimates).allocate_resources() + # we claim two bandwidth resources because CS001 has two network lines + # they should both be claimed, so that the next task cannot just take the other free line. + task_id = self.new_task(0) + estimates = [{ 'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 1 }, + {'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 1} ] + scheduler = self.new_scheduler(task_id, lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) - # Second task must fail - self.new_task(1) - estimates = [{ 'resource_types': {'bandwidth': 513} }] - allocation_succesful = self.new_scheduler(1, lambda _: estimates).allocate_resources() + # Second task must fail, because both network lines were already filled. + task2_id = self.new_task(1) + estimates = [{ 'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 1 }, + {'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 1} ] + scheduler = self.new_scheduler(task2_id, lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertFalse(allocation_succesful) + class StationSchedulerTest(BasicSchedulerTest): # The StationScheduler must not regress on the BasicScheduler, so we inherit all its tests def get_specification_tree(self, task_id): - return { "task_type": "observation", "specification": { "Observation.VirtualInstrument.stationList": [] }, "station_requirements": [] } + return { "task_type": "observation", + "specification": { "Observation.VirtualInstrument.stationList": [] }, + "station_requirements": [] } + + def new_scheduler(self, task_id, resource_estimator=None, specification_tree=None): + """overridden factory method returning a scheduler class specific for this test class. + In this case, in the StationSchedulerTest class, it returns a new StationScheduler. + + Please note that in most/all of the tests in this StationSchedulerTest test class + we explicitly use the new_station_scheduler factory method to get the specific + StationScheduler. In derived test classes, this means that we then still use a StationScheduler + and not another scheduler type via a overridden new_scheduler method. + """ + return self.new_station_scheduler(task_id, resource_estimator, specification_tree) - def new_scheduler(self, task_id, resource_estimator): - return StationScheduler(task_id, self.get_specification_tree(task_id), resource_estimator, self.fake_resource_availability_checker, None) + def new_station_scheduler(self, task_id, resource_estimator=None, specification_tree=None): + """factory method returning a StationScheduler. + Can be overridden in derived test classes.""" + return StationScheduler(task_id, + specification_tree if specification_tree else self.get_specification_tree(task_id), + resource_estimator if resource_estimator else self.fake_resource_estimator, + self.resource_availability_checker, self.radb.dbcreds) def fake_resource_estimator(self, specification_tree): """ Return an estimate for each station, plus a fixed storage claim of half the available storage capacity. """ @@ -411,32 +262,31 @@ class StationSchedulerTest(BasicSchedulerTest): # We don't get here without requesting stations assert stations + max_bw_cap = self.get_station_bandwidth_max_capacity() + max_storage_cap = self.get_CEP4_storage_max_capacity() + return [ - { "resource_types": {"bandwidth": 1024}, + { "resource_types": {"bandwidth": max_bw_cap }, "resource_count": 1, "station": station_name, - "root_resource_group": "%sLBA" % (station_name,) + "root_resource_group": station_name } for station_name in stations ] + [ - { "resource_types": {"storage": 512}, + { "resource_types": {"storage": 0.4*max_storage_cap}, "resource_count": 1, - } + "root_resource_group": "CEP4" + } ] - def new_station_scheduler(self, task_id, specification_tree): - """ A new scheduler for station-specific tests. """ - - return StationScheduler(task_id, specification_tree, self.fake_resource_estimator, FakeResourceAvailabilityChecker(), None) - def test_expand_station_list(self): """ Test whether _expand_station_list correctly expands the station sets we defined in our FakeRADatabase. """ - self.new_task(0) - scheduler = self.new_station_scheduler(0, self.get_specification_tree(0)) + task_id = self.new_task(0) + scheduler = self.new_station_scheduler(task_id, specification_tree=self.get_specification_tree(0)) - self.assertEqual(sorted(scheduler._expand_station_list("ALL")), ["CS001","CS002","RS003","RS004"]) + self.assertEqual(sorted(scheduler._expand_station_list("ALL")), ["CS001","CS002","RS106","RS205"]) self.assertEqual(sorted(scheduler._expand_station_list("CORE")), ["CS001","CS002"]) - self.assertEqual(sorted(scheduler._expand_station_list("REMOTE")), ["RS003","RS004"]) + self.assertEqual(sorted(scheduler._expand_station_list("REMOTE")), ["RS106","RS205"]) self.assertEqual(sorted(scheduler._expand_station_list("CS002")), ["CS002"]) with self.assertRaises(ScheduleException): @@ -459,16 +309,17 @@ class StationSchedulerTest(BasicSchedulerTest): """ Test whether a requirement for a single station can be satisfied. """ specification_tree = self.get_specification_tree(0) - specification_tree["station_requirements"] = [ ("RS003", 1), ] + specification_tree["station_requirements"] = [ ("RS106", 1), ] - self.new_task(0) - allocation_succesful = self.new_station_scheduler(0, specification_tree).allocate_resources() + task_id = self.new_task(0) + scheduler = self.new_station_scheduler(task_id, specification_tree=specification_tree) + allocation_succesful = scheduler.allocate_resources() # Allocation must succeed self.assertTrue(allocation_succesful) # The specified station must be allocated, plus storage claim - self.assertTrue(len(self.fake_ra_database.claims[0]) == 2) + self.assertTrue(len(self.radb.getResourceClaims(task_ids=task_id, status='claimed')) == 2) def test_find_any_station(self): """ Test whether a requirement for a single station can be satisfied. """ @@ -476,39 +327,43 @@ class StationSchedulerTest(BasicSchedulerTest): specification_tree = self.get_specification_tree(0) specification_tree["station_requirements"] = [ ("ALL", 1), ] - self.new_task(0) - allocation_succesful = self.new_station_scheduler(0, specification_tree).allocate_resources() + task_id = self.new_task(0) + scheduler = self.new_station_scheduler(task_id, specification_tree=specification_tree) + allocation_succesful = scheduler.allocate_resources() # Allocation must succeed self.assertTrue(allocation_succesful) # All 4 stations must be allocated (allocation is greedy), plus storage claim - self.assertTrue(len(self.fake_ra_database.claims[0]) == 5) + self.assertTrue(len(self.radb.getResourceClaims(task_ids=task_id, status='claimed')) == 5) def test_find_zero_stations(self): """ Test whether a requirement for a zero station cannot be satisfied if no stations are left. """ + # preparation: do a first scheduling, which should succeed and claim the station specification_tree = self.get_specification_tree(0) - specification_tree["station_requirements"] = [ ("CS001", 0), ] + specification_tree["station_requirements"] = [ ("RS106", 1), ] + task_id = self.new_task(0) + scheduler = self.new_station_scheduler(task_id, specification_tree=specification_tree) + allocation_succesful = scheduler.allocate_resources() - self.new_task(0) - task = self.fake_ra_database.tasks[0] + self.assertTrue(allocation_succesful) + self.assertEqual(2, len(self.radb.getResourceClaims(task_ids=task_id, status='claimed'))) - # allocate CS001 by hand - self.fake_ra_database.claims["hidden"] = [{ - "id": "hidden", - "resource_id": 1, - "claim_size": 1024, - "starttime": task["starttime"], - "endtime": task["endtime"], - "status": "claimed", - "task_id": "hidden" - }] + # real test, try to claim same station again. Should fail now. + specification_tree = self.get_specification_tree(0) + specification_tree["station_requirements"] = [ ("RS106", 0), ] - allocation_succesful = self.new_station_scheduler(0, specification_tree).allocate_resources() + task_id = self.new_task(1) + scheduler = self.new_station_scheduler(task_id, specification_tree=specification_tree) + allocation_succesful = scheduler.allocate_resources() - # Allocation must succeed + # Allocation must fail self.assertFalse(allocation_succesful) + self.assertEqual(0, len(self.radb.getResourceClaims(task_ids=task_id, status='claimed'))) + self.assertFalse(scheduler.radb.committed) + self.assertTrue(scheduler.radb.rolled_back) + def test_find_overlap_stations(self): """ Test whether requirements for overlapping station sets can be satisfied. """ @@ -516,14 +371,15 @@ class StationSchedulerTest(BasicSchedulerTest): specification_tree = self.get_specification_tree(0) specification_tree["station_requirements"] = [ ("CORE", 2), ("ALL", 4), ] - self.new_task(0) - allocation_succesful = self.new_station_scheduler(0, specification_tree).allocate_resources() + task_id = self.new_task(0) + scheduler = self.new_station_scheduler(task_id, specification_tree=specification_tree) + allocation_succesful = scheduler.allocate_resources() # Allocation must succeed self.assertTrue(allocation_succesful) # All 4 stations must be allocated (allocation is greedy), plus storage claim - self.assertTrue(len(self.fake_ra_database.claims[0]) == 5) + self.assertTrue(len(self.radb.getResourceClaims(task_ids=task_id, status='claimed')) == 5) def test_require_too_many_stations(self): """ Test whether requiring too many stations (than exist) fails. """ @@ -531,71 +387,68 @@ class StationSchedulerTest(BasicSchedulerTest): specification_tree = self.get_specification_tree(0) specification_tree["station_requirements"] = [ ("CORE", 3), ] - self.new_task(0) - allocation_succesful = self.new_station_scheduler(0, specification_tree).allocate_resources() + task_id = self.new_task(0) + scheduler = self.new_station_scheduler(task_id, specification_tree=specification_tree) + allocation_succesful = scheduler.allocate_resources() # Allocation must fail self.assertFalse(allocation_succesful) - self.assertFalse(self.fake_ra_database.committed) - self.assertTrue(self.fake_ra_database.rolled_back) + self.assertFalse(scheduler.radb.committed) + self.assertTrue(scheduler.radb.rolled_back) def test_require_more_stations_than_available(self): """ Test whether requiring too many stations (than are available) fails. """ specification_tree = self.get_specification_tree(0) - specification_tree["station_requirements"] = [ ("CORE", 2), ] - - self.new_task(0) - task = self.fake_ra_database.tasks[0] + specification_tree["station_requirements"] = [ ("REMOTE", 2), ] - # allocate CS001 by hand - self.fake_ra_database.claims["hidden"] = [{ - "id": "hidden", - "resource_id": 1, - "claim_size": 1024, - "starttime": task["starttime"], - "endtime": task["endtime"], - "status": "claimed", - "task_id": "hidden" - }] + # preparation: do a first scheduling, which should succeed and claim the two remote stations + task_id = self.new_task(0) + scheduler = self.new_station_scheduler(task_id, specification_tree=specification_tree) + allocation_succesful = scheduler.allocate_resources() - self.fake_ra_database.commit() - self.fake_ra_database.committed = False # dont confuse subsequent checks on whether the scheduler committed + self.assertTrue(allocation_succesful) + self.assertEqual(3, len(self.radb.getResourceClaims(task_ids=task_id, status='claimed'))) - # try to allocate our task - allocation_succesful = self.new_station_scheduler(0, specification_tree).allocate_resources() + # real test, try to claim the two remote stations again. Should fail now. + task_id = self.new_task(1) + scheduler = self.new_station_scheduler(task_id, specification_tree=specification_tree) + allocation_succesful = scheduler.allocate_resources() - # Allocation must fail self.assertFalse(allocation_succesful) - self.assertFalse(self.fake_ra_database.committed) - self.assertTrue(self.fake_ra_database.rolled_back) + self.assertEqual(0, len(self.radb.getResourceClaims(task_ids=task_id, status='claimed'))) + self.assertFalse(scheduler.radb.committed) + self.assertTrue(scheduler.radb.rolled_back) + def test_2obs_coexist(self): """ Test whether 2 obs requiring different station sets can be scheduled in parallel. """ - for task_id in (0,1): - station_set = "CORE" if task_id == 0 else "REMOTE" - specification_tree = self.get_specification_tree(task_id) + for mom_id in (0,1): + station_set = "CORE" if mom_id == 0 else "REMOTE" + specification_tree = self.get_specification_tree(mom_id) specification_tree["station_requirements"] = [ (station_set, 2), ] - self.new_task(task_id) - allocation_succesful = self.new_station_scheduler(task_id, specification_tree).allocate_resources() + task_id = self.new_task(mom_id) + scheduler = self.new_station_scheduler(task_id, specification_tree=specification_tree) + allocation_succesful = scheduler.allocate_resources() # Allocation must succeed self.assertTrue(allocation_succesful) - self.assertTrue(len(self.fake_ra_database.claims[task_id]) == 3) # 2 stations + 1 storage claim + self.assertTrue(len(self.radb.getResourceClaims(task_ids=task_id, status='claimed')) == 3) # 2 stations + 1 storage claim def test_2obs_no_fit(self): """ Test whether 2 obs requiring station sets from the same set will conflict. """ allocation_succesful = {} # Two observations both requesting 2 core stations - for task_id in (0,1): - specification_tree = self.get_specification_tree(task_id) - specification_tree["station_requirements"] = [ ("CORE", 2), ] + for mom_id in (0,1): + specification_tree = self.get_specification_tree(mom_id) + specification_tree["station_requirements"] = [ ("REMOTE", 2), ] - self.new_task(task_id) - allocation_succesful[task_id] = self.new_station_scheduler(task_id, specification_tree).allocate_resources() + task_id = self.new_task(mom_id) + scheduler = self.new_station_scheduler(task_id, specification_tree=specification_tree) + allocation_succesful[mom_id] = scheduler.allocate_resources() # Second allocation must fail self.assertTrue(allocation_succesful[0]) @@ -606,13 +459,14 @@ class StationSchedulerTest(BasicSchedulerTest): allocation_succesful = {} # Two observations both requesting 2 core stations - for task_id in (0,1,2): - station_name = { 0: "CS001", 1: "CS002", 2: "RS003" }[task_id] - specification_tree = self.get_specification_tree(task_id) + for mom_id in (0,1,2): + station_name = { 0: "CS001", 1: "CS002", 2: "RS106" }[mom_id] + specification_tree = self.get_specification_tree(mom_id) specification_tree["station_requirements"] = [ (station_name, 1), ] - self.new_task(task_id) - allocation_succesful[task_id] = self.new_station_scheduler(task_id, specification_tree).allocate_resources() + task_id = self.new_task(mom_id) + scheduler = self.new_station_scheduler(task_id, specification_tree=specification_tree) + allocation_succesful[mom_id] = scheduler.allocate_resources() # Second allocation must fail self.assertTrue(allocation_succesful[0]) @@ -643,10 +497,11 @@ class PrioritySchedulerTest(StationSchedulerTest): def mock_datetime(self): datetime_patcher = mock.patch('lofar.sas.resourceassignment.resourceassigner.schedulers.datetime') self.addCleanup(datetime_patcher.stop) - datetime_mock = datetime_patcher.start() + self.datetime_mock = datetime_patcher.start() - datetime_mock.utcnow.return_value = datetime.datetime(2017, 1, 1, 0, 0, 0) - datetime_mock.max = datetime.datetime.max + # utcnow lies before the tasks we are scheduling (the tasks lie in the future) + self.datetime_mock.utcnow.return_value = datetime.datetime(2017, 1, 1, 0, 0, 0) + self.datetime_mock.max = datetime.datetime.max def setUp(self): super(PrioritySchedulerTest, self).setUp() @@ -655,225 +510,457 @@ class PrioritySchedulerTest(StationSchedulerTest): self.mock_obscontrol() self.mock_datetime() - def new_task(self, task_id): - self.fake_ra_database.addTask(task_id, { - "mom_id": 1000 + task_id, - "otdb_id": 2000 + task_id, - "type": "observation", - "starttime": datetime.datetime(2017, 1, 1, 1, 0, 0), - "endtime": datetime.datetime(2017, 1, 1, 2, 0, 0), - }) - - self.fake_ra_database.commit() - self.fake_ra_database.committed = False # dont confuse subsequent checks on whether the scheduler committed - - def new_task_without_momid(self, task_id): - self.fake_ra_database.addTask(task_id, { - "mom_id": None, - "otdb_id": 2000 + task_id, - "type": "observation", - "starttime": datetime.datetime(2017, 1, 1, 1, 0, 0), - "endtime": datetime.datetime(2017, 1, 1, 2, 0, 0), - }) - - self.fake_ra_database.commit() - self.fake_ra_database.committed = False # dont confuse subsequent checks on whether the scheduler committed - - def new_scheduler(self, task_id, resource_estimator): - return PriorityScheduler(task_id, self.get_specification_tree(task_id), resource_estimator, self.fake_resource_availability_checker, None) - - def new_station_scheduler(self, task_id, specification_tree): - return PriorityScheduler(task_id, specification_tree, self.fake_resource_estimator, FakeResourceAvailabilityChecker(), None) - - def test_kill_lower_priority(self): + def new_task_without_momid(self, otdb_id): + return self.radb.insertSpecificationAndTask(mom_id=None, + otdb_id=otdb_id, + task_status='approved', + task_type='observation', + starttime=datetime.datetime(2017, 1, 1, 1, 0, 0), + endtime=datetime.datetime(2017, 1, 1, 2, 0, 0), + content='', + cluster='CEP4')['task_id'] + + def new_scheduler(self, task_id, resource_estimator=None, specification_tree=None): + """overridden factory method returning a scheduler class specific for this test class. + In this case, in the PrioritySchedulerTest class, it returns a new PriorityScheduler.""" + return self.new_priority_scheduler(task_id, resource_estimator, specification_tree) + + def new_station_scheduler(self, task_id, resource_estimator=None, specification_tree=None): + """overridden factory method returning a scheduler class specific for this test class. + In this case, in the PrioritySchedulerTest class, it returns a new PriorityScheduler.""" + return self.new_priority_scheduler(task_id, resource_estimator, specification_tree) + + def new_priority_scheduler(self, task_id, resource_estimator=None, specification_tree=None): + return PriorityScheduler(task_id, + specification_tree if specification_tree else self.get_specification_tree(task_id), + resource_estimator if resource_estimator else self.fake_resource_estimator, + self.resource_availability_checker, self.radb.dbcreds) + + def test_unschedule_lower_priority_future_task(self): """ - Whether two tasks that fit individually but not together will be accepted by the scheduler by killing the + Whether two future tasks that fit individually but not together will be accepted by the scheduler by unscheduling the lower-priority task. """ + # utcnow lies before the tasks we are scheduling (the tasks lie in the future) + self.datetime_mock.utcnow.return_value = datetime.datetime(2017, 1, 1, 0, 0, 0) + + max_bw_cap = self.get_station_bandwidth_max_capacity() + + # First task must succeed (for the test the mom_id determines the prio) + task_id = self.new_task(0) + estimates = [{'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 } ] + scheduler = self.new_scheduler(task_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() + self.assertTrue(allocation_succesful) + + self.assertEqual('approved', self.radb.getTask(task_id)['status']) + self.radb.updateTask(task_id, task_status='scheduled') + self.assertEqual('scheduled', self.radb.getTask(task_id)['status']) + + # Second task must succeed as it has a higher priority (for the test the mom_id determines the prio) + task2_id = self.new_task(1000) + estimates = [{'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 } ] + scheduler = self.new_scheduler(task2_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() + self.assertEqual(2, len(self.radb.getResourceClaims(task_ids=task2_id, status='claimed'))) + + # First task must have been unscheduled + # as a result, it should not have any claimed claims anymore + self.assertEqual(0, len(self.radb.getResourceClaims(task_ids=task_id, status='claimed'))) + self.assertEqual(2, len(self.radb.getResourceClaims(task_ids=task_id, status='conflict'))) + # and the low-prio task should now have conflict state (cause the high-prio task claimed the resources) + self.assertEqual('conflict', self.radb.getTask(task_id)['status']) + + + def test_kill_lower_priority_running_task(self): + """ + Whether two tasks that fit individually but not together will be accepted by the scheduler by killing the + running lower-priority task. + """ + + # utcnow lies before the tasks we are scheduling (the tasks lie in the future) + self.datetime_mock.utcnow.return_value = datetime.datetime(2017, 1, 1, 0, 0, 0) + + max_bw_cap = self.get_station_bandwidth_max_capacity() + # First task must succeed - self.new_task(0) - estimates = [{'resource_types': {'bandwidth': 512}}] - allocation_succesful = self.new_scheduler(0, lambda _: estimates).allocate_resources() + # (for the test the mom_id determines the prio) + task_id = self.new_task(0, starttime=datetime.datetime(2017, 1, 1, 12, 0, 0), + endtime=datetime.datetime(2017, 1, 1, 13, 0, 0)) + estimates = [{'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "RS106", + "resource_count": 1 } ] + scheduler = self.new_scheduler(task_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) + self.assertEqual('approved', self.radb.getTask(task_id)['status']) + self.radb.updateTask(task_id, task_status='scheduled') + self.assertEqual('scheduled', self.radb.getTask(task_id)['status']) + self.assertEqual(datetime.datetime(2017, 1, 1, 12, 0, 0), self.radb.getTask(task_id)['starttime']) + self.assertEqual(datetime.datetime(2017, 1, 1, 13, 0, 0), self.radb.getTask(task_id)['endtime']) + + # shift utcnow and fake that the task is running + self.datetime_mock.utcnow.return_value = datetime.datetime(2017, 1, 1, 12, 10, 0) + self.radb.updateTask(task_id, task_status='active') + self.assertEqual('active', self.radb.getTask(task_id)['status']) + # Second task must succeed as it has a higher priority - self.new_task(1000) - estimates = [{'resource_types': {'bandwidth': 513}}] - allocation_succesful = self.new_scheduler(1000, lambda _: estimates).allocate_resources() + # start it in a minute after now + # (or else it will still have overlap and conflicts with beginning of just-aborted running task) + # (for the test the mom_id determines the prio) + task2_id = self.new_task(1000, starttime=datetime.datetime(2017, 1, 1, 12, 11, 0), + endtime=datetime.datetime(2017, 1, 1, 13, 11, 0)) + estimates = [{'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "RS106", + "resource_count": 1 } ] + scheduler = self.new_scheduler(task2_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) # First task must have been killed - otdb_id = self.fake_ra_database.tasks[0]["otdb_id"] + otdb_id = self.radb.getTask(task_id)["otdb_id"] self.obscontrol_mock.assert_called_with(otdb_id) - # First task must have its endtime cut short to utcnow or starttime - my_starttime = self.fake_ra_database.tasks[1000]["starttime"] - for c in self.fake_ra_database.claims[0]: - self.assertLessEqual(c["endtime"], my_starttime) + # First task must have its endtime cut short to utcnow + # and all claims should be ended (but still claimed) as well. + self.assertEqual(datetime.datetime(2017, 1, 1, 12, 10, 0), self.radb.getTask(task_id)['endtime']) + self.assertEqual(1, len(self.radb.getResourceClaims(task_ids=task_id))) + for claim in self.radb.getResourceClaims(task_ids=task_id): + self.assertLessEqual(claim["endtime"], datetime.datetime(2017, 1, 1, 12, 10, 0)) + self.assertEqual('claimed', claim["status"]) + + # and the starttime should still be the original + self.assertEqual(datetime.datetime(2017, 1, 1, 12, 0, 0), self.radb.getTask(task_id)['starttime']) + # and status should be aborted + self.assertEqual('aborted', self.radb.getTask(task_id)['status']) + + + def test_do_not_unschedule_higher_priority_future_task(self): + # utcnow lies before the tasks we are scheduling (the tasks lie in the future) + self.datetime_mock.utcnow.return_value = datetime.datetime(2017, 1, 1, 0, 0, 0) + + max_bw_cap = self.get_station_bandwidth_max_capacity() + + # First task must succeed (for the test the mom_id determines the prio) + task_id = self.new_task(1000) + estimates = [{'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 } ] + scheduler = self.new_scheduler(task_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() + self.assertTrue(allocation_succesful) - def test_not_kill_higher_priority(self): - """ Whether two tasks that fit individually but not together get rejected priorities do not allow an override. """ + self.assertEqual('approved', self.radb.getTask(task_id)['status']) + self.radb.updateTask(task_id, task_status='scheduled') + self.assertEqual('scheduled', self.radb.getTask(task_id)['status']) + + # Second task must succeed as it has a higher priority (for the test the mom_id determines the prio) + task2_id = self.new_task(0) #(for the test the mom_id determines the prio) + estimates = [{'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 } ] + scheduler = self.new_scheduler(task2_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() + self.assertFalse(allocation_succesful) - # First task must succeed - self.new_task(1000) - estimates = [{ 'resource_types': {'bandwidth': 512} }] - allocation_succesful = self.new_scheduler(1000, lambda _: estimates).allocate_resources() + # the second (low-prio) task could not be scheduled + # as a result there are no claims allocated and the task stays in approved state. + # Thought by JS: I think that's wrong, and does not give the proper feedback to the user. + # I think that the claims and task should go to conflict to make it clear to the user what happened. + self.assertEqual('approved', self.radb.getTask(task2_id)['status']) + self.assertEqual(0, len(self.radb.getResourceClaims(task_ids=task2_id))) + + # First task must NOT have been unscheduled + self.assertEqual('scheduled', self.radb.getTask(task_id)['status']) + self.assertEqual(2, len(self.radb.getResourceClaims(task_ids=task_id, status='claimed'))) + + + def test_do_not_kill_higher_priority_running_task(self): + + # utcnow lies before the tasks we are scheduling (the tasks lie in the future) + self.datetime_mock.utcnow.return_value = datetime.datetime(2017, 1, 1, 0, 0, 0) + + max_bw_cap = self.get_station_bandwidth_max_capacity() + + # First (task must succeed) + task_id = self.new_task(1000) #(for the test the mom_id determines the prio) + estimates = [{'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 } ] + scheduler = self.new_scheduler(task_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) - # Second task must fail as it has a lower priority - self.new_task(0) - estimates = [{ 'resource_types': {'bandwidth': 513} }] - allocation_succesful = self.new_scheduler(0, lambda _: estimates).allocate_resources() + self.assertEqual('approved', self.radb.getTask(task_id)['status']) + self.radb.updateTask(task_id, task_status='scheduled') + self.assertEqual('scheduled', self.radb.getTask(task_id)['status']) + + # shift utcnow and fake that the task is running + self.datetime_mock.utcnow.return_value = datetime.datetime(2017, 1, 1, 1, 10, 0) + self.radb.updateTask(task_id, task_status='active') + self.assertEqual('active', self.radb.getTask(task_id)['status']) + + # Second task must succeed as it has a higher priority + # start it in a minute after now + # (or else it will still have overlap and conflicts with beginning of just-aborted running task) + # (for the test the mom_id determines the prio) + task2_id = self.new_task(0, starttime=datetime.datetime(2017, 1, 1, 1, 11, 0)) + estimates = [{'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 } ] + scheduler = self.new_scheduler(task2_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertFalse(allocation_succesful) + # the second (low-prio) task could not be scheduled + # as a result there are no claims allocated and the task stays in approved state. + # Thought by JS: I think that's wrong, and does not give the proper feedback to the user. + # I think that the claims and task should go to conflict to make it clear to the user what happened. + self.assertEqual('approved', self.radb.getTask(task2_id)['status']) + self.assertEqual(0, len(self.radb.getResourceClaims(task_ids=task2_id))) + # First task must NOT have been killed - otdb_id = self.fake_ra_database.tasks[1000]["otdb_id"] - with self.assertRaises(AssertionError): - self.obscontrol_mock.assert_called_with(otdb_id) + self.assertEqual('active', self.radb.getTask(task_id)['status']) + self.assertEqual(2, len(self.radb.getResourceClaims(task_ids=task_id, status='claimed'))) - def test_not_kill_equal_priority(self): + def test_not_unschedule_equal_priority(self): """ Whether two tasks that fit individually but not together get rejected priorities do not allow an override. """ + max_bw_cap = self.get_station_bandwidth_max_capacity() + # First task must succeed - self.new_task(1) - estimates = [{ 'resource_types': {'bandwidth': 512} }] - allocation_succesful = self.new_scheduler(1, lambda _: estimates).allocate_resources() + task1_id = self.new_task(1) #mom_id=1 and mom_id=0 yield equal priorities + estimates = [{'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 } ] + scheduler = self.new_scheduler(task1_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) + self.assertEqual('approved', self.radb.getTask(task1_id)['status']) + self.radb.updateTask(task1_id, task_status='scheduled') + self.assertEqual('scheduled', self.radb.getTask(task1_id)['status']) + # Second task must fail as it has a lower priority - self.new_task(0) - estimates = [{ 'resource_types': {'bandwidth': 513} }] - allocation_succesful = self.new_scheduler(0, lambda _: estimates).allocate_resources() + task2_id = self.new_task(0) #mom_id=1 and mom_id=0 yield equal priorities + estimates = [{'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 } ] + scheduler = self.new_scheduler(task2_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertFalse(allocation_succesful) + self.assertEqual('scheduled', self.radb.getTask(task1_id)['status']) + # Thought by JS: I think it's wrong that task2 has approved status, and does not give the proper feedback to the user. + # I think that the claims and task should go to conflict to make it clear to the user what happened. + self.assertEqual('approved', self.radb.getTask(task2_id)['status']) + def test_partial_conflict(self): """ Whether a task gets scheduled correctly if it has a partial conflict after the first fit. """ - # First task must succeed - self.new_task(0) - estimates = [{ 'resource_types': {'bandwidth': 512} }, - { 'resource_types': {'bandwidth': 512} }] - allocation_succesful = self.new_scheduler(0, lambda _: estimates).allocate_resources() + + # utcnow lies before the tasks we are scheduling (the tasks lie in the future) + self.datetime_mock.utcnow.return_value = datetime.datetime(2017, 1, 1, 0, 0, 0) + + max_bw_cap = self.get_station_bandwidth_max_capacity() + + # First task must succeed (for the test the mom_id determines the prio) + task_id = self.new_task(0) + estimates = [{'resource_types': {'bandwidth': 0.25*max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 1 }, + {'resource_types': {'bandwidth': 0.25 * max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 1} + ] + + scheduler = self.new_scheduler(task_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) - # Second task must succeed as it has a higher priority - self.new_task(1000) - estimates = [{ 'resource_types': {'bandwidth': 512} }, - { 'resource_types': {'bandwidth': 513} }] - allocation_succesful = self.new_scheduler(1000, lambda _: estimates).allocate_resources() + self.assertEqual('approved', self.radb.getTask(task_id)['status']) + self.radb.updateTask(task_id, task_status='scheduled') + self.assertEqual('scheduled', self.radb.getTask(task_id)['status']) + + # Second task must succeed as it has a higher priority (for the test the mom_id determines the prio) + task2_id = self.new_task(1000) + estimates = [{'resource_types': {'bandwidth': 0.25*max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 1 }, + {'resource_types': {'bandwidth': 0.95 * max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 1} + ] + scheduler = self.new_scheduler(task2_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) + self.assertEqual(2, len(self.radb.getResourceClaims(task_ids=task2_id, status='claimed'))) - # First task must have been killed - otdb_id = self.fake_ra_database.tasks[0]["otdb_id"] - self.obscontrol_mock.assert_called_with(otdb_id) + # First task must have been unscheduled + # as a result, it should not have any claimed claims anymore + self.assertEqual(0, len(self.radb.getResourceClaims(task_ids=task_id, status='claimed'))) + self.assertEqual(1, len(self.radb.getResourceClaims(task_ids=task_id, status='tentative'))) + self.assertEqual(1, len(self.radb.getResourceClaims(task_ids=task_id, status='conflict'))) + # and the low-prio task should now have conflict state (cause the high-prio task claimed the resources) + self.assertEqual('conflict', self.radb.getTask(task_id)['status']) def test_should_not_kill_a_task_without_a_mom_id(self): + max_bw_cap = self.get_station_bandwidth_max_capacity() + # First task must succeed - self.new_task_without_momid(0) - estimates = [{'resource_types': {'bandwidth': 512}}] - allocation_succesful = self.new_scheduler(0, lambda _: estimates).allocate_resources() + task_id = self.new_task_without_momid(0) + estimates = [{'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "RS106", + "resource_count": 1 }] + scheduler = self.new_scheduler(task_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) - self.new_task(1000) - estimates = [{'resource_types': {'bandwidth': 513}}] - allocation_succesful = self.new_scheduler(1000, lambda _: estimates).allocate_resources() + task2_id = self.new_task(1000) + estimates = [{'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "RS106", + "resource_count": 1 }] + scheduler = self.new_scheduler(task2_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertFalse(allocation_succesful) - otdb_id = self.fake_ra_database.tasks[0]["otdb_id"] self.obscontrol_mock.assert_not_called() class DwellSchedulerTest(PrioritySchedulerTest): # The DwellScheduler must not regress on the PriorityScheduler, so we inherit all its tests - def new_task(self, task_id): - self.fake_ra_database.addTask(task_id, { - "mom_id": 1000 + task_id, - "otdb_id": 2000 + task_id, - "type": "observation", - "starttime": datetime.datetime(2017, 1, 1, 1, 0, 0), - "endtime": datetime.datetime(2017, 1, 1, 2, 0, 0), - }) - - self.fake_ra_database.commit() - self.fake_ra_database.committed = False # dont confuse subsequent checks on whether the scheduler committed - - def new_scheduler(self, task_id, resource_estimator): - return DwellScheduler(task_id, self.get_specification_tree(task_id), resource_estimator, - datetime.datetime(2017, 1, 1, 1, 0, 0), # minstarttime - datetime.datetime(2017, 1, 1, 1, 0, 0), # maxstarttime - datetime.timedelta(hours=1), # duration - self.fake_resource_availability_checker, None) - - def new_station_scheduler(self, task_id, specification_tree): - return DwellScheduler(task_id, specification_tree, self.fake_resource_estimator, - datetime.datetime(2017, 1, 1, 1, 0, 0), # minstarttime - datetime.datetime(2017, 1, 1, 1, 0, 0), # maxstarttime - datetime.timedelta(hours=1), # duration - FakeResourceAvailabilityChecker(), None) - - def new_dwell_scheduler(self, task_id, resource_estimator): - return DwellScheduler(task_id, self.get_specification_tree(task_id), resource_estimator, - datetime.datetime(2017, 1, 1, 1, 0, 0), # minstarttime - datetime.datetime(2017, 1, 2, 1, 0, 0), # maxstarttime - datetime.timedelta(hours=1), # duration - self.fake_resource_availability_checker, None) + class TestResourceAvailabilityChecker(ResourceAvailabilityChecker): + """Helper class to keep track of arguments in calls to get_is_claimable""" + def get_is_claimable(self, requested_resources, available_resources): + self.last_requested_resources = requested_resources + self.last_available_resources = available_resources + return super(DwellSchedulerTest.TestResourceAvailabilityChecker, self).get_is_claimable(requested_resources, + available_resources) + + def setUp(self): + super(DwellSchedulerTest, self).setUp() + self.resource_availability_checker = DwellSchedulerTest.TestResourceAvailabilityChecker(self.radb) + + def new_scheduler(self, task_id, resource_estimator=None, specification_tree=None): + """overridden factory method returning a scheduler class specific for this test class. + In this case, in the DwellSchedulerTest class, it returns a new DwellScheduler.""" + return self.new_dwell_scheduler(task_id, resource_estimator, specification_tree, allow_dwelling=False) + + def new_station_scheduler(self, task_id, resource_estimator=None, specification_tree=None): + """overridden factory method returning a scheduler class specific for this test class. + In this case, in the DwellSchedulerTest class, it returns a new DwellScheduler.""" + return self.new_dwell_scheduler(task_id, resource_estimator, specification_tree, allow_dwelling=False) + + def new_priority_scheduler(self, task_id, resource_estimator=None, specification_tree=None): + """overridden factory method returning a scheduler class specific for this test class. + In this case, in the DwellSchedulerTest class, it returns a new DwellScheduler.""" + return self.new_dwell_scheduler(task_id, resource_estimator, specification_tree, allow_dwelling=False) + + def new_dwell_scheduler(self, task_id, resource_estimator=None, specification_tree=None, allow_dwelling=True): + if allow_dwelling: + min_starttime = datetime.datetime(2017, 1, 1, 1, 0, 0) + max_starttime = datetime.datetime(2017, 1, 2, 1, 0, 0) + else: + # we do not want dwelling, so limit the dwell starttime window to the task's actual starttime. + min_starttime = self.radb.getTask(task_id)['starttime'] + max_starttime = min_starttime + + return DwellScheduler(task_id, + specification_tree if specification_tree else self.get_specification_tree(task_id), + resource_estimator if resource_estimator else self.fake_resource_estimator, + min_starttime, + max_starttime, + datetime.timedelta(hours=1), # duration + self.resource_availability_checker, self.radb.dbcreds) def test_no_dwell(self): """ Whether a task will not dwell unnecessarily on an empty system. """ # Task must succeed - self.new_task(0) - estimates = [{ 'resource_types': {'bandwidth': 512} }] - allocation_succesful = self.new_dwell_scheduler(0, lambda _: estimates).allocate_resources() + task_id = self.new_task(0) + estimates = [{ 'resource_types': {'bandwidth': 512}, + "root_resource_group": "CS001", + "resource_count": 1 } ] + scheduler = self.new_dwell_scheduler(task_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) - # Task must NOT have been moved - self.assertEqual(self.fake_ra_database.tasks[0]["starttime"], datetime.datetime(2017, 1, 1, 1, 0, 0)) + # Task must be positioned at start of dwelling period. + task = self.radb.getTask(task_id) + self.assertEqual(scheduler.min_starttime, task["starttime"]) + self.assertEqual(scheduler.min_starttime+scheduler.duration, task["endtime"]) def test_dwell(self): """ Whether a task will dwell after an existing task. """ + max_bw_cap = self.get_station_bandwidth_max_capacity() + # First task must succeed - self.new_task(0) - estimates = [{ 'resource_types': {'bandwidth': 512} }] - allocation_succesful = self.new_dwell_scheduler(0, lambda _: estimates).allocate_resources() + task1_id = self.new_task(0) + estimates = [{ 'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 }] + scheduler = self.new_dwell_scheduler(task1_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) # Second task must also succeed - self.new_task(1) - estimates = [{ 'resource_types': {'bandwidth': 513} }] - allocation_succesful = self.new_dwell_scheduler(1, lambda _: estimates).allocate_resources() + task2_id = self.new_task(1) + estimates = [{ 'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 }] + scheduler = self.new_dwell_scheduler(task2_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) # Second task must have been moved, first task not - self.assertEqual(self.fake_ra_database.tasks[0]["starttime"], datetime.datetime(2017, 1, 1, 1, 0, 0)) - self.assertEqual(self.fake_ra_database.tasks[0]["endtime"], datetime.datetime(2017, 1, 1, 2, 0, 0)) - self.assertEqual(self.fake_ra_database.tasks[1]["starttime"], datetime.datetime(2017, 1, 1, 2, 1, 0)) - self.assertEqual(self.fake_ra_database.tasks[1]["endtime"], datetime.datetime(2017, 1, 1, 3, 1, 0)) + self.assertEqual(self.radb.getTask(task1_id)["starttime"], datetime.datetime(2017, 1, 1, 1, 0, 0)) + self.assertEqual(self.radb.getTask(task1_id)["endtime"], datetime.datetime(2017, 1, 1, 2, 0, 0)) + self.assertEqual(self.radb.getTask(task2_id)["starttime"], datetime.datetime(2017, 1, 1, 2, 1, 0)) + self.assertEqual(self.radb.getTask(task2_id)["endtime"], datetime.datetime(2017, 1, 1, 3, 1, 0)) def test_dwell_respect_claim_endtime(self): """ Whether a dwelling task will honour the claim endtimes, instead of the task endtime. """ + max_bw_cap = self.get_station_bandwidth_max_capacity() + # First task must succeed - self.new_task(0) - estimates = [{ 'resource_types': {'bandwidth': 512} }] - allocation_succesful = self.new_dwell_scheduler(0, lambda _: estimates).allocate_resources() + task1_id = self.new_task(0) + estimates = [{ 'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 }] + # use normal basic scheduler for first normal task, which we want to schedule in a normal (non-dwell) way. + scheduler = self.new_basic_scheduler(task1_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) + self.assertEqual(2, len(self.radb.getResourceClaims(task_ids=task1_id, status='claimed'))) # Extend claim - self.fake_ra_database.claims[0][0]["endtime"] += datetime.timedelta(hours=1) + task = self.radb.getTask(task1_id) + self.radb.updateResourceClaims(where_task_ids=task1_id, endtime=task["endtime"] + datetime.timedelta(hours=1)) + self.assertEqual(2, len(self.radb.getResourceClaims(task_ids=task1_id, status='claimed'))) # Second task must also succeed - self.new_task(1) - estimates = [{ 'resource_types': {'bandwidth': 513} }] - allocation_succesful = self.new_dwell_scheduler(1, lambda _: estimates).allocate_resources() + task2_id = self.new_task(1) + estimates = [{ 'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 }] + scheduler = self.new_dwell_scheduler(task2_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) - # Second task must have been moved beyond claim endtime - self.assertEqual(self.fake_ra_database.tasks[1]["starttime"], datetime.datetime(2017, 1, 1, 3, 1, 0)) - self.assertEqual(self.fake_ra_database.tasks[1]["endtime"], datetime.datetime(2017, 1, 1, 4, 1, 0)) + # Second task must have been moved beyond 1st claim endtime, first task not + self.assertEqual(self.radb.getTask(task1_id)["starttime"], datetime.datetime(2017, 1, 1, 1, 0, 0)) + self.assertEqual(self.radb.getTask(task1_id)["endtime"], datetime.datetime(2017, 1, 1, 2, 0, 0)) + self.assertEqual(self.radb.getTask(task2_id)["starttime"], datetime.datetime(2017, 1, 1, 3, 1, 0)) + self.assertEqual(self.radb.getTask(task2_id)["endtime"], datetime.datetime(2017, 1, 1, 4, 1, 0)) def test_dwellScheduler_should_give_all_available_resources_on_second_pass(self): """ @@ -884,23 +971,30 @@ class DwellSchedulerTest(PrioritySchedulerTest): that list get subtracted from the list handed to the resource_availability checker. This test verifies that the complete list should be provided on the second try. """ + max_bw_cap = self.get_station_bandwidth_max_capacity() # First task must succeed - self.new_task(0) - estimates = [{'resource_types': {'bandwidth': 512}}] - allocation_succesful = self.new_dwell_scheduler(0, lambda _: estimates).allocate_resources() + task1_id = self.new_task(0) + estimates = [{ 'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 }] + scheduler = self.new_dwell_scheduler(task1_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) # Second task must also succeed - self.new_task(1) - estimates = [{ 'resource_types': {'bandwidth': 513} }] - allocation_succesful = self.new_dwell_scheduler(1, lambda _: estimates).allocate_resources() + task2_id = self.new_task(1) + estimates = [{ 'resource_types': {'bandwidth': max_bw_cap}, + "root_resource_group": "CS001", + "resource_count": 2 }] + scheduler = self.new_dwell_scheduler(task2_id, resource_estimator=lambda _: estimates) + allocation_succesful = scheduler.allocate_resources() self.assertTrue(allocation_succesful) # avialable resources can be limited by tracking unkillable resources. They should be # cleared on the second try like in this test. - self.assertEqual(self.fake_resource_availability_checker.available_resources, - self.fake_ra_database.resources) + self.assertEqual(set(r['name'] for r in self.resource_availability_checker.last_available_resources), + set(r['name'] for r in self.radb.getResources(include_availability=True))) if __name__ == '__main__': logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.DEBUG) diff --git a/SAS/ResourceAssignment/ResourceAssignmentDatabase/CMakeLists.txt b/SAS/ResourceAssignment/ResourceAssignmentDatabase/CMakeLists.txt index a26dc94c2967c204374d4866ba1f3a9c5446bf95..ddd09281e123d0d2d105162e2e041508292438b8 100644 --- a/SAS/ResourceAssignment/ResourceAssignmentDatabase/CMakeLists.txt +++ b/SAS/ResourceAssignment/ResourceAssignmentDatabase/CMakeLists.txt @@ -22,10 +22,8 @@ install(FILES radbpglistener.ini DESTINATION etc/supervisord.d) -add_subdirectory(radb/sql) add_subdirectory(tests) -# symmetric install of sql with symlinks in build share/... and normal install in installed/share/... set(sql_files radb/sql/add_notifications.sql radb/sql/add_functions_and_triggers.sql radb/sql/add_resource_allocation_statics.sql diff --git a/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb.py b/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb.py index a160d325768cf6d9ee9d73281fbb76361175b357..e84053fdb515b1c66421a592e9999f6ae8051574 100644 --- a/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb.py +++ b/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb.py @@ -39,6 +39,9 @@ _FETCH_NONE=0 _FETCH_ONE=1 _FETCH_ALL=2 +class RADBError(Exception): + pass + class RADatabase: def __init__(self, dbcreds=None, log_queries=False): self.dbcreds = dbcreds @@ -50,6 +53,10 @@ class RADatabase: self._claimStatusName2IdCache = {} self._claimStatusId2NameCache = {} + # keep track if last/current transaction was already committed or rolled_back + self.committed = False + self.rolled_back = False + def _connect(self): self.conn = None self.cursor = None @@ -75,6 +82,9 @@ class RADatabase: for i in range(5): try: start = datetime.utcnow() + # keep track if last/current transaction was already committed or rolled_back + self.committed = False + self.rolled_back = False self.cursor.execute(query, qargs) if self.log_queries: elapsed = datetime.utcnow() - start @@ -98,6 +108,9 @@ class RADatabase: logger.error("Rolling back query=\'%s\' due to error: \'%s\'" % (self._queryAsSingleLine(query, qargs), e)) self.rollback() return [] + # TODO: instead of doing a "silent" rollback and continue, we should raise an RADBError. + # We cannot oversee the impact of such a change at this moment, so let's investigate that later. + # raise RADBError(e.message) self._log_database_notifications() @@ -116,10 +129,14 @@ class RADatabase: def commit(self): logger.info('commit') self.conn.commit() + # keep track if last/current transaction was already committed or rolled_back + self.committed = True def rollback(self): logger.info('rollback') self.conn.rollback() + # keep track if last/current transaction was already committed or rolled_back + self.rolled_back = True def getTaskStatuses(self): query = '''SELECT * from resource_allocation.task_status;''' @@ -412,7 +429,7 @@ class RADatabase: VALUES (%s, %s, %s, %s, %s) RETURNING id;''' - id = self._executeQuery(query, (mom_id, otdb_id, task_status, task_type, specification_id), fetch=_FETCH_ONE)['id'] + id = self._executeQuery(query, (mom_id, otdb_id, task_status, task_type, specification_id), fetch=_FETCH_ONE).get('id') if commit: self.commit() return id @@ -808,7 +825,15 @@ class RADatabase: claim_status_id = claim_status query = '''SELECT * from resource_allocation.get_current_resource_usage(%s, %s)''' - return self._executeQuery(query, (resource_id, claim_status_id), fetch=_FETCH_ONE) + result = self._executeQuery(query, (resource_id, claim_status_id), fetch=_FETCH_ONE) + + if result is None or result.get('resource_id') is None: + result = { 'resource_id': resource_id, + 'status_id': claim_status_id, + 'as_of_timestamp': datetime.utcnow(), + 'usage': 0 } + + return result def get_resource_usage_at_or_before(self, resource_id, timestamp, claim_status='claimed', exactly_at=False, only_before=False): if isinstance(claim_status, basestring): @@ -817,7 +842,14 @@ class RADatabase: claim_status_id = claim_status query = '''SELECT * from resource_allocation.get_resource_usage_at_or_before(%s, %s, %s, %s, %s, %s)''' - return self._executeQuery(query, (resource_id, claim_status_id, timestamp, exactly_at, only_before, False), fetch=_FETCH_ONE) + result = self._executeQuery(query, (resource_id, claim_status_id, timestamp, exactly_at, only_before, False), fetch=_FETCH_ONE) + + if result is None or result.get('resource_id') is None: + result = { 'resource_id': resource_id, + 'status_id': claim_status_id, + 'as_of_timestamp': timestamp, + 'usage': 0 } + return result def updateResourceAvailability(self, resource_id, active=None, available_capacity=None, total_capacity=None, commit=True): if active is not None: @@ -942,7 +974,7 @@ class RADatabase: r_items[r_item_id] = r_item parent_id = relation['resource_group_parent_id'] - if parent_id != None: + if parent_id != None and parent_id in rg_items: r_items[r_item_id]['parent_group_ids'].append(parent_id) rg_items[parent_id]['resource_ids'].append(r_item_id) @@ -1260,6 +1292,7 @@ class RADatabase: result = self.insertResourceClaims(task_id, [claim], username, user_id, commit) if result: return result[0] + return None def insertResourceClaims(self, task_id, claims, username, user_id, commit=True): '''bulk insert of a list of resource claims for a task(_id). All claims are inserted with status tentative. @@ -1280,12 +1313,12 @@ class RADatabase: ''' logger.info('insertResourceClaims for task_id=%d with %d claim(s)' % (task_id, len(claims))) - status_strings = set([c['status'] for c in claims if isinstance(c['status'], basestring)]) + status_strings = set([c.get('status', 'tentative') for c in claims if isinstance(c.get('status', 'tentative'), basestring)]) if status_strings: status_string2id = {s:self.getResourceClaimStatusId(s) for s in status_strings} for c in claims: - if isinstance(c['status'], basestring): - c['status_id'] = status_string2id[c['status']] + if isinstance(c.get('status', 'tentative'), basestring): + c['status_id'] = status_string2id[c.get('status', 'tentative')] elif isinstance(c['status'], int): c['status_id'] = c['status'] @@ -1590,6 +1623,8 @@ class RADatabase: if commit: self.commit() return {'inserted': True, 'specification_id': specId, 'task_id': taskId} + else: + self.rollback() except Exception as e: logger.error(e) self.rollback() diff --git a/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/CMakeLists.txt b/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/CMakeLists.txt deleted file mode 100644 index d0443398bad2a8ca891f2d43956776ce0dcfcf8e..0000000000000000000000000000000000000000 --- a/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -# $Id: CMakeLists.txt 32341 2015-08-28 11:59:26Z schaap $ - -set(sql_files add_notifications.sql - add_functions_and_triggers.sql - add_resource_allocation_statics.sql - add_virtual_instrument.sql - create_database.sql - create_and_populate_database.sql - README) - -install_files(/share/radb/sql FILES ${sql_files}) - diff --git a/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/add_functions_and_triggers.sql b/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/add_functions_and_triggers.sql index 9fa8caae0f520853b0a5dfb243f92e8ebcb5dffd..56504320b0c87b8c630ea7f6b6fd668908fef162 100644 --- a/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/add_functions_and_triggers.sql +++ b/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/add_functions_and_triggers.sql @@ -12,10 +12,13 @@ DECLARE claim_tentative_status_id int := 0; --beware: hard coded instead of lookup for performance claim_claimed_status_id int := 1; --beware: hard coded instead of lookup for performance task_approved_status_id int := 300; --beware: hard coded instead of lookup for performance + task_conflict_status_id int := 335; --beware: hard coded instead of lookup for performance BEGIN IF NEW.status_id <> OLD.status_id THEN - IF NEW.status_id = task_approved_status_id THEN - UPDATE resource_allocation.resource_claim rc SET status_id=claim_tentative_status_id WHERE rc.task_id=NEW.id AND rc.status_id <> claim_tentative_status_id; + IF NEW.status_id = task_approved_status_id OR NEW.status_id = task_conflict_status_id THEN + UPDATE resource_allocation.resource_claim + SET status_id=claim_tentative_status_id + WHERE (task_id=NEW.id AND status_id = claim_claimed_status_id); ELSIF NEW.status_id = ANY(ARRAY[400, 500, 600, 900, 1000, 1100]) THEN --prevent task status to be upgraded to scheduled (or beyond) when not all its claims are claimed IF EXISTS (SELECT id FROM resource_allocation.resource_claim WHERE task_id = NEW.id AND status_id <> claim_claimed_status_id) THEN @@ -232,37 +235,13 @@ CREATE TRIGGER T_specification_insertupdate_check_startendtimes --------------------------------------------------------------------------------------------------------------------- -CREATE OR REPLACE FUNCTION resource_allocation.on_claim_insertupdate_check_startendtimes() - RETURNS trigger AS -$BODY$ -BEGIN - IF NEW.starttime > NEW.endtime THEN - RAISE EXCEPTION 'claim starttime > endtime: %', NEW; - END IF; -RETURN NEW; -END; -$BODY$ - LANGUAGE plpgsql VOLATILE - COST 100; -ALTER FUNCTION resource_allocation.on_claim_insertupdate_check_startendtimes() - OWNER TO resourceassignment; - -DROP TRIGGER IF EXISTS T_claim_insertupdate_check_startendtimes ON resource_allocation.resource_claim; -CREATE TRIGGER T_claim_insertupdate_check_startendtimes - BEFORE INSERT OR UPDATE - ON resource_allocation.resource_claim - FOR EACH ROW - EXECUTE PROCEDURE resource_allocation.on_claim_insertupdate_check_startendtimes(); - ---------------------------------------------------------------------------------------------------------------------- - CREATE OR REPLACE FUNCTION resource_allocation.process_new_claim_into_resource_usages(new_claim resource_allocation.resource_claim) RETURNS void AS $$ DECLARE - usage_at_or_before_start RECORD; - usage_at_or_before_end RECORD; - intermediate_usage RECORD; + usage_at_or_before_start resource_allocation.resource_usage; + usage_at_or_before_end resource_allocation.resource_usage; + intermediate_usage resource_allocation.resource_usage; BEGIN -- find resource_usage at claim starttime SELECT * FROM resource_allocation.get_resource_usage_at_or_before(new_claim.resource_id, new_claim.status_id, new_claim.starttime, false, false, false) into usage_at_or_before_start; @@ -292,6 +271,7 @@ BEGIN INSERT INTO resource_allocation.resource_usage (resource_id, status_id, as_of_timestamp, usage) VALUES (new_claim.resource_id, new_claim.status_id, new_claim.endtime, usage_at_or_before_end.usage); END IF; + --TODO: 20180709; why no else with an upate? ELSE -- no previous usage known, so insert 0 as the last usage INSERT INTO resource_allocation.resource_usage (resource_id, status_id, as_of_timestamp, usage) @@ -316,6 +296,16 @@ COMMENT ON FUNCTION resource_allocation.process_new_claim_into_resource_usages(n --------------------------------------------------------------------------------------------------------------------- +-- 20180903: brainstorm with AK & JS: the resource_usages table is useful because it makes lookups faster. However, +-- there are known bugs in inserting/updating the resource_usages table upon changes in resource_claims. +-- We discussed the idea of using an additional deltas helper table: claims -> deltas -> usages. +-- the current implementation goes diretly from claims -> usages, and loops over claims "opening" and "closing" in the usage table. +-- Introducing the intermediate deltas table has the benefit of using simple sql sum's, and not keeping track of opening/closing claims. +-- Highly recommended to give this a try in JIRA SW-35. + +--------------------------------------------------------------------------------------------------------------------- + + CREATE OR REPLACE FUNCTION resource_allocation.rebuild_resource_usages_from_claims() RETURNS void AS $$ @@ -602,6 +592,7 @@ BEGIN IF usage_at_end.usage = 0 THEN --usage_at_end was 'caused' by this deleted claim only, so delete it + --TODO:20180704 do not delete if another claim with this status and timestamp also causes this 0 DELETE FROM resource_allocation.resource_usage ru WHERE ru.id = usage_at_end.id; END IF; @@ -658,15 +649,7 @@ BEGIN -- try again, but now without the option to rebuild_usage_when_not_found (to prevent endless recursion) SELECT * FROM resource_allocation.get_resource_usage_at_or_before(_resource_id, _claim_status_id, _timestamp, exactly_at, only_before, false) INTO result; - RAISE NOTICE 'get_resource_usage_at_or_before(_resource_id=%, status_id=%, timestamp=%, exactly_at=%, only_before=%, rebuild_usage_when_not_found=%): after rebuild, result=%.', _resource_id, _claim_status_id, _timestamp, exactly_at, only_before, rebuild_usage_when_not_found, result; - END IF; - - IF result IS NULL THEN - -- if result is still null (after possible rebuild etc), then return a 'default' usage of 0 - result.resource_id = _resource_id; - result.status_id = _claim_status_id; - result.as_of_timestamp = _timestamp; - result.usage = 0; + RAISE NOTICE 'get_resource_usage_at_or_before(_resource_id=%, status_id=%, timestamp=%, exactly_at=%, only_before=%, rebuild_usage_when_not_found=%): after rebuild, result=%.', _resource_id, _claim_status_id, _timestamp, exactly_at, only_before, false, result; END IF; RETURN result; @@ -711,8 +694,8 @@ CREATE OR REPLACE FUNCTION resource_allocation.get_max_resource_usage_between(_r RETURNS resource_allocation.resource_usage AS $$ DECLARE - max_resource_usage_in_time_window record; - max_resource_at_or_before_starttime record; + max_resource_usage_in_time_window resource_allocation.resource_usage; + max_resource_at_or_before_starttime resource_allocation.resource_usage; BEGIN SELECT * FROM resource_allocation.get_resource_usage_at_or_before(_resource_id, _claim_status_id, _lower, false, false, false) into max_resource_at_or_before_starttime; @@ -725,10 +708,14 @@ BEGIN LIMIT 1 INTO max_resource_usage_in_time_window; IF max_resource_usage_in_time_window IS NOT NULL THEN - IF max_resource_usage_in_time_window.usage > max_resource_at_or_before_starttime.usage THEN - RETURN max_resource_usage_in_time_window; + IF max_resource_at_or_before_starttime IS NULL THEN + RETURN max_resource_usage_in_time_window; ELSE - RETURN max_resource_at_or_before_starttime; + IF max_resource_usage_in_time_window.usage > max_resource_at_or_before_starttime.usage THEN + RETURN max_resource_usage_in_time_window; + ELSE + RETURN max_resource_at_or_before_starttime; + END IF; END IF; ELSE -- could also be NULL but that is checked for elsewhere @@ -783,7 +770,7 @@ BEGIN END IF; END; $$ LANGUAGE plpgsql; -ALTER FUNCTION resource_allocation.get_resource_claimable_capacity_between(_resource_id int, _lower timestamp, _upper timestamp) +ALTER FUNCTION resource_allocation.get_resource_claimable_capacity_between(_resource_id int, _lower timestamp, _upper timestamp) OWNER TO resourceassignment; COMMENT ON FUNCTION resource_allocation.get_resource_claimable_capacity_between(_resource_id int, _lower timestamp, _upper timestamp) IS 'get the maximum resource usage for the given _resource_id for claims with given _claim_status_id in the period between the given _lower and _upper timestamps'; @@ -867,6 +854,14 @@ DECLARE BEGIN --order of following steps is important, do not reorder the steps + IF TG_OP = 'INSERT' OR TG_OP = 'UPDATE' THEN + IF NEW.starttime >= NEW.endtime THEN + -- Conceptually, you can't claim and release a resource at the same timestamp. + -- Nor can you claim a resource for a negative timespan. + RAISE EXCEPTION 'claim starttime >= endtime: %', NEW; + END IF; + END IF; + -- bounce any inserted claim which is not tentative IF TG_OP = 'INSERT' THEN IF NEW.status_id <> claim_tentative_status_id THEN @@ -901,7 +896,9 @@ BEGIN --update the resource usages affected by this claim --do this before we check for conflicts, because this claim might be shifted for example --which might influence the resource_usages which determine wheter a claim fits. - PERFORM resource_allocation.process_old_claim_outof_resource_usages(OLD); + IF OLD.resource_id <> 117 THEN --20180903: skip checking of cep4 storage until JIRA SW-35 is solved. + PERFORM resource_allocation.process_old_claim_outof_resource_usages(OLD); + END IF; END IF; --only check claim if status and/or claim_size and/or start/end time changed @@ -909,26 +906,33 @@ BEGIN OLD.claim_size <> NEW.claim_size OR OLD.starttime <> NEW.starttime OR OLD.endtime <> NEW.endtime)) THEN - --check if claim fits or has conflicts - SELECT * FROM resource_allocation.has_conflict_with_overlapping_claims(NEW) INTO claim_has_conflicts; - - IF claim_has_conflicts THEN - IF NEW.status_id <> claim_conflict_status_id THEN - -- only set claims to conflict if task status <= queued - -- when a claim goes to conflict, then so does it's task, and we don't want that for running/finished/aborted tasks - IF EXISTS (SELECT 1 FROM resource_allocation.task - WHERE id=NEW.task_id - AND status_id = ANY(ARRAY[300, 335, 350, 400, 500])) THEN -- hardcoded tasks statuses <= queued - -- conflict with others, so set claim status to conflict - NEW.status_id := claim_conflict_status_id; + IF NEW.resource_id <> 117 THEN --20180903: skip checking of cep4 storage until JIRA SW-35 is solved. + --check if claim fits or has conflicts + SELECT * FROM resource_allocation.has_conflict_with_overlapping_claims(NEW) INTO claim_has_conflicts; + + IF claim_has_conflicts THEN + IF NEW.status_id <> claim_conflict_status_id THEN + -- only set claims to conflict if task status <= queued + -- when a claim goes to conflict, then so does it's task, and we don't want that for running/finished/aborted tasks + IF EXISTS (SELECT 1 FROM resource_allocation.task + WHERE id=NEW.task_id + AND status_id = ANY(ARRAY[300, 335, 350, 400, 500])) THEN -- hardcoded tasks statuses <= queued + -- conflict with others, so set claim status to conflict + NEW.status_id := claim_conflict_status_id; + END IF; + END IF; + ELSE + -- no conflict (anymore) with others, so set claim status to tentative if currently in conflict + IF NEW.status_id = claim_conflict_status_id THEN + NEW.status_id := claim_tentative_status_id; END IF; - END IF; - ELSE - -- no conflict (anymore) with others, so set claim status to tentative if currently in conflict - IF NEW.status_id = claim_conflict_status_id THEN - NEW.status_id := claim_tentative_status_id; END IF; END IF; + + IF TG_OP = 'INSERT' OR TG_OP = 'UPDATE' THEN + --update the resource usages affected by this claim + PERFORM resource_allocation.process_new_claim_into_resource_usages(NEW); + END IF; END IF; IF TG_OP = 'DELETE' THEN @@ -966,14 +970,6 @@ DECLARE affected_claim resource_allocation.resource_claim; claim_has_conflicts boolean; BEGIN - --do not process_old_claim_outof_resource_usages(OLD) - --because that has been done already in before_claim_insertupdatedelete - - IF TG_OP = 'INSERT' OR TG_OP = 'UPDATE' THEN - --update the resource usages affected by this claim - PERFORM resource_allocation.process_new_claim_into_resource_usages(NEW); - END IF; - -- in the before trigger function, everything on the claim has been checked and adapted. -- now (in the after trigger, when all claims were inserted/updated in the database), let's check if the task should also be updated (to conflict status for example) -- only if claim status was changed or inserted... @@ -982,8 +978,14 @@ BEGIN --if claim status went to conflict, then set the task status to conflict as well UPDATE resource_allocation.task SET status_id=task_conflict_status_id WHERE id=NEW.task_id AND status_id <> task_conflict_status_id; ELSIF NEW.status_id = claim_tentative_status_id THEN - IF NOT EXISTS (SELECT id FROM resource_allocation.resource_claim WHERE task_id = NEW.task_id AND status_id = claim_conflict_status_id) THEN - UPDATE resource_allocation.task SET status_id=task_approved_status_id WHERE id=NEW.task_id AND status_id <> task_approved_status_id; + IF NOT EXISTS (SELECT id FROM resource_allocation.resource_claim + WHERE task_id = NEW.task_id + AND status_id = claim_conflict_status_id) THEN + IF NOT EXISTS (SELECT id FROM resource_allocation.task + WHERE id = NEW.task_id + AND status_id = task_approved_status_id) THEN + UPDATE resource_allocation.task SET status_id=task_approved_status_id WHERE id=NEW.task_id AND status_id <> task_approved_status_id; + END IF; END IF; END IF; END IF; @@ -1000,12 +1002,14 @@ BEGIN AND rc.endtime >= OLD.starttime AND rc.starttime < OLD.endtime LOOP - --check if claim fits or has conflicts - SELECT * FROM resource_allocation.has_conflict_with_overlapping_claims(affected_claim) INTO claim_has_conflicts; + IF affected_claim.resource_id <> 117 THEN --20180903: skip checking of cep4 storage until JIRA SW-35 is solved. + --check if claim fits or has conflicts + SELECT * FROM resource_allocation.has_conflict_with_overlapping_claims(affected_claim) INTO claim_has_conflicts; - IF NOT claim_has_conflicts THEN - -- no conflict (anymore) with others, so set claim status to tentative - UPDATE resource_allocation.resource_claim SET status_id=claim_tentative_status_id WHERE id = affected_claim.id; + IF NOT claim_has_conflicts THEN + -- no conflict (anymore) with others, so set claim status to tentative + UPDATE resource_allocation.resource_claim SET status_id=claim_tentative_status_id WHERE id = affected_claim.id; + END IF; END IF; END LOOP; END IF; @@ -1021,16 +1025,18 @@ BEGIN AND rc.endtime >= NEW.starttime AND rc.starttime < NEW.endtime LOOP - --check if claim fits or has conflicts - SELECT * FROM resource_allocation.has_conflict_with_overlapping_claims(affected_claim) INTO claim_has_conflicts; + IF affected_claim.resource_id <> 117 THEN --20180903: skip checking of cep4 storage until JIRA SW-35 is solved. + --check if claim fits or has conflicts + SELECT * FROM resource_allocation.has_conflict_with_overlapping_claims(affected_claim) INTO claim_has_conflicts; - IF claim_has_conflicts THEN - -- new conflict for affected_claim because this NEW claim is now claimed - UPDATE resource_allocation.resource_claim SET status_id=claim_conflict_status_id WHERE id = affected_claim.id; + IF claim_has_conflicts THEN + -- new conflict for affected_claim because this NEW claim is now claimed + UPDATE resource_allocation.resource_claim SET status_id=claim_conflict_status_id WHERE id = affected_claim.id; + END IF; END IF; END LOOP; END IF; - + IF TG_OP = 'DELETE' THEN RETURN OLD; END IF; diff --git a/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/add_triggers.sql b/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/add_triggers.sql deleted file mode 100644 index 5686fc819241bcf8fefbc933697abc7436956c89..0000000000000000000000000000000000000000 --- a/SAS/ResourceAssignment/ResourceAssignmentDatabase/radb/sql/add_triggers.sql +++ /dev/null @@ -1,240 +0,0 @@ ---add triggers and trigger functions to radb (note, there are also the notification triggers in the add_notifications.sql file) - -BEGIN; - --- only issue >warnings log messages. (only during this transaction) -SET LOCAL client_min_messages=warning; - -DROP TRIGGER IF EXISTS T_delete_resource_claims_for_approved_task ON resource_allocation.task CASCADE; -DROP FUNCTION IF EXISTS resource_allocation.delete_resource_claims_for_approved_task(); - -CREATE OR REPLACE FUNCTION resource_allocation.delete_resource_claims_for_approved_task() - RETURNS trigger AS -$BODY$ -BEGIN - IF NEW.status_id <> OLD.status_id AND NEW.status_id = 300 THEN - DELETE FROM resource_allocation.resource_claim rc WHERE rc.task_id = NEW.id; - END IF; -RETURN NEW; -END; -$BODY$ - LANGUAGE plpgsql VOLATILE - COST 100; -ALTER FUNCTION resource_allocation.delete_resource_claims_for_approved_task() - OWNER TO resourceassignment; -COMMENT ON FUNCTION resource_allocation.delete_resource_claims_for_approved_task() - IS 'function which is called by task table update trigger, which deletes all the tasks resource claims.'; - -CREATE TRIGGER T_delete_resource_claims_for_approved_task - AFTER UPDATE - ON resource_allocation.task - FOR EACH ROW - EXECUTE PROCEDURE resource_allocation.delete_resource_claims_for_approved_task(); -COMMENT ON TRIGGER T_delete_resource_claims_for_approved_task ON resource_allocation.task - IS 'task table update trigger, calls the resource_allocation.delete_resource_claims_for_approved_task() function.'; - ---------------------------------------------------------------------------------------------------------------------- - -DROP TRIGGER IF EXISTS T_delete_conflict_reasons_after_resource_claim_update ON resource_allocation.resource_claim CASCADE; -DROP FUNCTION IF EXISTS resource_allocation.delete_conflict_reasons_after_resource_claim_update(); - -CREATE OR REPLACE FUNCTION resource_allocation.delete_conflict_reasons_after_resource_claim_update() - RETURNS trigger AS -$BODY$ -BEGIN - IF OLD.status_id = 2 AND NEW.status_id <> 2 THEN --new status is not conflict - DELETE FROM resource_allocation.resource_claim_conflict_reason rccr WHERE rccr.resource_claim_id = NEW.id; - END IF; -RETURN NEW; -END; -$BODY$ - LANGUAGE plpgsql VOLATILE - COST 100; -ALTER FUNCTION resource_allocation.delete_conflict_reasons_after_resource_claim_update() - OWNER TO resourceassignment; -COMMENT ON FUNCTION resource_allocation.delete_conflict_reasons_after_resource_claim_update() - IS 'function which is called by resource_claim table update trigger, which deletes resource_claim_conflict_reasons when the claim status is updated to !conflict.'; - -CREATE TRIGGER T_delete_conflict_reasons_after_resource_claim_update - AFTER UPDATE - ON resource_allocation.resource_claim - FOR EACH ROW - EXECUTE PROCEDURE resource_allocation.delete_conflict_reasons_after_resource_claim_update(); - ---------------------------------------------------------------------------------------------------------------------- - -DROP TRIGGER IF EXISTS T_before_insert_conflict_reason_do_resource_claim_status_check ON resource_allocation.resource_claim_conflict_reason CASCADE; -DROP FUNCTION IF EXISTS resource_allocation.before_insert_conflict_reason_do_resource_claim_status_check(); - -CREATE OR REPLACE FUNCTION resource_allocation.before_insert_conflict_reason_do_resource_claim_status_check() - RETURNS trigger AS -$BODY$ -BEGIN - -- check if referred resource_claim is in conflict status, else raise - IF (SELECT COUNT(id) FROM resource_allocation.resource_claim rc WHERE rc.id = NEW.resource_claim_id AND rc.status_id = 2) = 0 THEN - RAISE EXCEPTION 'resource_claim has no conflict status'; - END IF; -RETURN NEW; -END; -$BODY$ - LANGUAGE plpgsql VOLATILE - COST 100; -ALTER FUNCTION resource_allocation.before_insert_conflict_reason_do_resource_claim_status_check() - OWNER TO resourceassignment; -COMMENT ON FUNCTION resource_allocation.before_insert_conflict_reason_do_resource_claim_status_check() - IS 'check if referred resource_claim is in conflict status, else raise'; - -CREATE TRIGGER T_before_insert_conflict_reason_do_resource_claim_status_check - BEFORE INSERT - ON resource_allocation.resource_claim_conflict_reason - FOR EACH ROW - EXECUTE PROCEDURE resource_allocation.before_insert_conflict_reason_do_resource_claim_status_check(); - ---------------------------------------------------------------------------------------------------------------------- - -DROP TRIGGER IF EXISTS T_delete_conflict_reasons_after_task_update ON resource_allocation.task CASCADE; -DROP FUNCTION IF EXISTS resource_allocation.delete_conflict_reasons_after_task_update(); - -CREATE OR REPLACE FUNCTION resource_allocation.delete_conflict_reasons_after_task_update() - RETURNS trigger AS -$BODY$ -BEGIN - IF OLD.status_id = 335 AND NEW.status_id <> 335 THEN --new status is not conflict - DELETE FROM resource_allocation.task_conflict_reason tcr WHERE tcr.task_id = NEW.id; - END IF; -RETURN NEW; -END; -$BODY$ - LANGUAGE plpgsql VOLATILE - COST 100; -ALTER FUNCTION resource_allocation.delete_conflict_reasons_after_task_update() - OWNER TO resourceassignment; -COMMENT ON FUNCTION resource_allocation.delete_conflict_reasons_after_task_update() - IS 'function which is called by task table update trigger, which deletes task_conflict_reasons when the task status is updated to !conflict.'; - -CREATE TRIGGER T_delete_conflict_reasons_after_task_update - AFTER UPDATE - ON resource_allocation.task - FOR EACH ROW - EXECUTE PROCEDURE resource_allocation.delete_conflict_reasons_after_task_update(); - ---------------------------------------------------------------------------------------------------------------------- - -DROP TRIGGER IF EXISTS T_before_insert_conflict_reason_do_task_status_check ON resource_allocation.task_conflict_reason CASCADE; -DROP FUNCTION IF EXISTS resource_allocation.before_insert_conflict_reason_do_task_status_check(); - -CREATE OR REPLACE FUNCTION resource_allocation.before_insert_conflict_reason_do_task_status_check() - RETURNS trigger AS -$BODY$ -BEGIN - -- check if referred task is in conflict status, else raise - IF (SELECT COUNT(id) FROM resource_allocation.task task WHERE task.id = NEW.task_id AND task.status_id = 335) = 0 THEN - RAISE EXCEPTION 'task has no conflict status'; - END IF; -RETURN NEW; -END; -$BODY$ - LANGUAGE plpgsql VOLATILE - COST 100; -ALTER FUNCTION resource_allocation.before_insert_conflict_reason_do_task_status_check() - OWNER TO resourceassignment; -COMMENT ON FUNCTION resource_allocation.before_insert_conflict_reason_do_task_status_check() - IS 'check if referred task is in conflict status, else raise'; - -CREATE TRIGGER T_before_insert_conflict_reason_do_task_status_check - BEFORE INSERT - ON resource_allocation.task_conflict_reason - FOR EACH ROW - EXECUTE PROCEDURE resource_allocation.before_insert_conflict_reason_do_task_status_check(); - ---------------------------------------------------------------------------------------------------------------------- - -DROP TRIGGER IF EXISTS T_specification_insertupdate_check_startendtimes ON resource_allocation.specification; -DROP FUNCTION IF EXISTS resource_allocation.on_insertupdate_check_specification_startendtimes(); - -CREATE OR REPLACE FUNCTION resource_allocation.on_insertupdate_check_specification_startendtimes() - RETURNS trigger AS -$BODY$ -DECLARE -task RECORD; -pred_task RECORD; -suc_task RECORD; -predecessor_task_id int; -successor_task_id int; -moved_seconds double precision; -duration double precision; -max_pred_endtime timestamp := '1900-01-01 00:00:00'; -tmp_time timestamp; -min_starttime timestamp; -min_inter_task_delay int; -BEGIN - --swap start/end time if needed - IF NEW.starttime > NEW.endtime THEN - RAISE NOTICE 'NEW.starttime > NEW.endtime'; - tmp_time := NEW.starttime; - NEW.starttime := NEW.endtime; - NEW.endtime := tmp_time; - END IF; - - --store task duration - SELECT EXTRACT(epoch FROM age(NEW.endtime, NEW.starttime)) INTO duration; - - --deterimine max_pred_endtime - FOR task IN SELECT * FROM resource_allocation.task_view tv WHERE tv.specification_id = NEW.id LOOP - IF task.predecessor_ids IS NOT NULL THEN - FOREACH predecessor_task_id IN ARRAY task.predecessor_ids LOOP - FOR pred_task IN SELECT * FROM resource_allocation.task_view tv WHERE tv.id = predecessor_task_id LOOP - IF pred_task.endtime > max_pred_endtime THEN - max_pred_endtime := pred_task.endtime; - END IF; - END LOOP; - END LOOP; - END IF; - END LOOP; - - --check if spec is before max_pred_endtime, correct if needed. - IF max_pred_endtime > '1900-01-01 00:00:00' THEN - SELECT c.value::integer INTO min_inter_task_delay FROM resource_allocation.config c WHERE c.name = 'min_inter_task_delay'; - IF min_inter_task_delay IS NULL THEN - min_inter_task_delay := 0; - END IF; - min_starttime := max_pred_endtime + min_inter_task_delay * interval '1 second'; - IF min_starttime > NEW.starttime THEN - NEW.starttime := min_starttime; - NEW.endtime := min_starttime + duration * interval '1 second'; - END IF; - END IF; - - --move successor tasks by same amount if needed - IF TG_OP = 'UPDATE' THEN - IF NEW.endtime <> OLD.endtime THEN - SELECT EXTRACT(epoch FROM age(NEW.endtime, OLD.endtime)) INTO moved_seconds; - FOR task IN SELECT * FROM resource_allocation.task_view tv WHERE tv.specification_id = NEW.id LOOP - IF task.successor_ids IS NOT NULL THEN - FOREACH successor_task_id IN ARRAY task.successor_ids LOOP - FOR suc_task IN SELECT * FROM resource_allocation.task_view tv WHERE tv.id = successor_task_id LOOP - UPDATE resource_allocation.specification SET (starttime, endtime) = (starttime + moved_seconds * interval '1 second', endtime + moved_seconds * interval '1 second') WHERE id = suc_task.specification_id; - END LOOP; - END LOOP; - END IF; - END LOOP; - END IF; - END IF; - -RETURN NEW; -END; -$BODY$ - LANGUAGE plpgsql VOLATILE - COST 100; -ALTER FUNCTION resource_allocation.on_insertupdate_check_specification_startendtimes() - OWNER TO resourceassignment; - -CREATE TRIGGER T_specification_insertupdate_check_startendtimes - BEFORE INSERT OR UPDATE - ON resource_allocation.specification - FOR EACH ROW - EXECUTE PROCEDURE resource_allocation.on_insertupdate_check_specification_startendtimes(); - ---------------------------------------------------------------------------------------------------------------------- - -COMMIT; diff --git a/SAS/ResourceAssignment/ResourceAssignmentDatabase/tests/radb_common_testing.py b/SAS/ResourceAssignment/ResourceAssignmentDatabase/tests/radb_common_testing.py new file mode 100755 index 0000000000000000000000000000000000000000..d4dd75f3121cfea14743be254e291b751a3dc3fb --- /dev/null +++ b/SAS/ResourceAssignment/ResourceAssignmentDatabase/tests/radb_common_testing.py @@ -0,0 +1,171 @@ +#!/usr/bin/python + +# Copyright (C) 2012-2015 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# $Id$ +import unittest +import psycopg2 +import os +from datetime import datetime, timedelta +from dateutil import parser +import logging + +logger = logging.getLogger(__name__) + +try: + import testing.postgresql +except ImportError as e: + print str(e) + print 'Please install python package testing.postgresql: sudo pip install testing.postgresql' + exit(3) # special lofar test exit code: skipped test + +from lofar.common.dbcredentials import Credentials +from lofar.sas.resourceassignment.database.radb import RADatabase + + +# Create shared test database for better performance +database_credentials = None +Postgresql = None + +def setUpModule(): + global database_credentials, Postgresql + database_credentials = Credentials() + Postgresql = testing.postgresql.PostgresqlFactory(cache_initialized_db=True) + + +def tearDownModule(): + # clear cached database at end of tests + logger.info('tearDownModule') + Postgresql.clear_cache() + + +class RADBCommonTest(unittest.TestCase): + + def setUp(self): + logger.info('setting up test RA database...') + # connect to shared test db + self.postgresql = Postgresql() # fresh db instead of shared one: self.postgresql = testing.postgresql.Postgresql() + + # set up fixtures + # Note: In theory, this can be moved to the PostgresqlFactory call as kwarg 'on_initialized=populatedb' + # ...but for some reason that was much slower than keeping it here. + self._setup_database() + + # update credentials (e.g. port changes for each test) + database_credentials.host = self.postgresql.dsn()['host'] + database_credentials.database = self.postgresql.dsn()['database'] + database_credentials.port = self.postgresql.dsn()['port'] + + # connect with useradministration role for tests + self.connection = psycopg2.connect(host=database_credentials.host, + user=database_credentials.user, + password=database_credentials.password, + dbname=database_credentials.database, + port=database_credentials.port) + + # set up radb python module + self.radb = RADatabase(database_credentials, log_queries=True) + logger.info('...finished setting up test RA database') + + def tearDown(self): + logger.info('removing test RA database...') + self.connection.close() + # self.Postgresql.clear_cache() # for fresh db during setUp, do instead: + self.postgresql.stop() + + def _setup_database(self): + + # connect to db as root + conn = psycopg2.connect(**self.postgresql.dsn()) + cursor = conn.cursor() + + # set credentials to be used during tests + database_credentials.user = 'resourceassignment' + database_credentials.password = 'blabla' # cannot be empty... + + # create user role + # Note: NOSUPERUSER currently raises "permission denied for schema virtual_instrument" + # Maybe we want to sort out user creation and proper permissions in the sql scripts? + query = "CREATE USER %s WITH SUPERUSER PASSWORD '%s'" % ( + database_credentials.user, + database_credentials.password) + cursor.execute(query) + + # populate db tables + # These are applied in given order to set up test db + # Note: cannot use create_and_populate_database.sql since '\i' is not understood by cursor.execute() + sql_basepath = os.environ['LOFARROOT'] + "/share/radb/sql/" + sql_createdb_paths = [sql_basepath + "create_database.sql", + sql_basepath + "/add_resource_allocation_statics.sql", + sql_basepath + "/add_virtual_instrument.sql", + sql_basepath + "/add_notifications.sql", + sql_basepath + "/add_functions_and_triggers.sql"] + + for sql_path in sql_createdb_paths: + logger.debug("setting up database. applying sql file: %s", sql_path) + with open(sql_path) as sql: + cursor.execute(sql.read()) + + cursor.close() + conn.commit() + conn.close() + + def _execute_query(self, query, fetch=False): + cursor = self.connection.cursor() + cursor.execute(query) + ret = None + if fetch: + ret = cursor.fetchall() + cursor.close() + self.connection.commit() + return ret + + # --- tests start here + + # integrity tests of postgres database itself + # + # Note: These are meant to make sure the setup generally works and all sql scripts were applied. + # I don't see much benefit in full coverage here since it should be all be tested through RADataBase functionality. + # Of course new tests can be added here where db functionality like triggers should be tested separately from the + # Python part of the job. + + # database created? + def test_select_tables_contains_tables_for_each_schema(self): + query = "SELECT table_schema,table_name FROM information_schema.tables" + fetch = self._execute_query(query, fetch=True) + self.assertTrue('resource_allocation' in str(fetch)) + self.assertTrue('resource_monitoring' in str(fetch)) + self.assertTrue('virtual_instrument' in str(fetch)) + + # resource allocation_statics there? + def test_select_task_types_contains_obervation(self): + query = "SELECT * FROM resource_allocation.task_type" + fetch = self._execute_query(query, fetch=True) + self.assertTrue('observation' in str(fetch)) + + # virtual instrument there? + def test_select_virtualinstrument_units_contain_rcuboard(self): + query = "SELECT * FROM virtual_instrument.unit" + fetch = self._execute_query(query, fetch=True) + self.assertTrue('rcu_board' in str(fetch)) + + +if __name__ == "__main__": + os.environ['TZ'] = 'UTC' + logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO) + unittest.main() diff --git a/SAS/ResourceAssignment/ResourceAssignmentDatabase/tests/t_radb.py b/SAS/ResourceAssignment/ResourceAssignmentDatabase/tests/t_radb.py index 9a8a714739b1ce7e9b5c7677311a6d3227ad5823..98170ed7993acf3371c59071338484814d4f1259 100755 --- a/SAS/ResourceAssignment/ResourceAssignmentDatabase/tests/t_radb.py +++ b/SAS/ResourceAssignment/ResourceAssignmentDatabase/tests/t_radb.py @@ -19,7 +19,6 @@ # $Id: $ import unittest -import testing.postgresql import psycopg2 import os from datetime import datetime, timedelta @@ -36,30 +35,15 @@ except ImportError as e: print 'Please install python package mock: sudo pip install mock' exit(3) # special lofar test exit code: skipped test -try: - import testing.postgresql -except ImportError as e: - print str(e) - print 'Please install python package testing.postgresql: sudo pip install testing.postgresql' - exit(3) # special lofar test exit code: skipped test - -from lofar.common.dbcredentials import Credentials -from lofar.sas.resourceassignment.database.radb import RADatabase -from lofar.common.postgres import PostgresListener - - -# Create shared test database for better performance -database_credentials = Credentials() -Postgresql = testing.postgresql.PostgresqlFactory(cache_initialized_db=True) +import radb_common_testing +def setUpModule(): + return radb_common_testing.setUpModule() def tearDownModule(): - # clear cached database at end of tests - logger.info('tearDownModule') - Postgresql.clear_cache() - + return radb_common_testing.tearDownModule() -class ResourceAssignmentDatabaseTest(unittest.TestCase): +class ResourceAssignmentDatabaseTest(radb_common_testing.RADBCommonTest): class test_task: """ A lot of tests involve manipulation of a task (and its corresponding specification) in the RADB. A test task @@ -72,122 +56,6 @@ class ResourceAssignmentDatabaseTest(unittest.TestCase): content = "" cluster = "CEP4" - def setUp(self): - logger.info('setting up test RA database...') - # connect to shared test db - self.postgresql = Postgresql() # fresh db instead of shared one: self.postgresql = testing.postgresql.Postgresql() - - # set up fixtures - # Note: In theory, this can be moved to the PostgresqlFactory call as kwarg 'on_initialized=populatedb' - # ...but for some reason that was much slower than keeping it here. - self.populate_db() - - # update credentials (e.g. port changes for each test) - database_credentials.host = self.postgresql.dsn()['host'] - database_credentials.database = self.postgresql.dsn()['database'] - database_credentials.port = self.postgresql.dsn()['port'] - - # connect with useradministration role for tests - self.connection = psycopg2.connect(host=database_credentials.host, - user=database_credentials.user, - password=database_credentials.password, - dbname=database_credentials.database, - port=database_credentials.port) - - # set up PostgresListener for notifications: - self.listener = PostgresListener(host=database_credentials.host, - username=database_credentials.user, - password=database_credentials.password, - database=database_credentials.database, - port=database_credentials.port) - - # set up radb python module - self.radb = RADatabase(database_credentials, log_queries=True) - logger.info('...finished setting up test RA database') - - def tearDown(self): - logger.info('removing test RA database...') - self.connection.close() - # self.Postgresql.clear_cache() # for fresh db during setUp, do instead: - self.postgresql.stop() - - def populate_db(self): - - # connect to db as root - conn = psycopg2.connect(**self.postgresql.dsn()) - cursor = conn.cursor() - - # set credentials to be used during tests - database_credentials.user = 'resourceassignment' - database_credentials.password = 'blabla' # cannot be empty... - - # create user role - # Note: NOSUPERUSER currently raises "permission denied for schema virtual_instrument" - # Maybe we want to sort out user creation and proper permissions in the sql scripts? - query = "CREATE USER %s WITH SUPERUSER PASSWORD '%s'" % ( - database_credentials.user, - database_credentials.password) - cursor.execute(query) - - # populate db tables - # These are applied in given order to set up test db - # Note: cannot use create_and_populate_database.sql since '\i' is not understood by cursor.execute() - sql_basepath = os.environ['LOFARROOT'] + "/share/radb/sql/" - sql_createdb_paths = [sql_basepath + "create_database.sql", - sql_basepath + "/add_resource_allocation_statics.sql", - sql_basepath + "/add_virtual_instrument.sql", - sql_basepath + "/add_notifications.sql", - sql_basepath + "/add_functions_and_triggers.sql" - ] - - for sql_path in sql_createdb_paths: - with open(sql_path) as sql: - cursor.execute(sql.read()) - - cursor.close() - conn.commit() - conn.close() - - def _execute_query(self, query, fetch=False): - cursor = self.connection.cursor() - cursor.execute(query) - ret = None - if fetch: - ret = cursor.fetchall() - cursor.close() - self.connection.commit() - return ret - - # --- tests start here - - - # integrity tests of postgres database itself - # - # Note: These are meant to make sure the setup generally works and all sql scripts were applied. - # I don't see much benefit in full coverage here since it should be all be tested through RADataBase functionality. - # Of course new tests can be added here where db functionality like triggers should be tested separately from the - # Python part of the job. - - # database created? - def test_select_tables_contains_tables_for_each_schema(self): - query = "SELECT table_schema,table_name FROM information_schema.tables" - fetch = self._execute_query(query, fetch=True) - self.assertTrue('resource_allocation' in str(fetch)) - self.assertTrue('resource_monitoring' in str(fetch)) - self.assertTrue('virtual_instrument' in str(fetch)) - - # resource allocation_statics there? - def test_select_task_types_contains_obervation(self): - query = "SELECT * FROM resource_allocation.task_type" - fetch = self._execute_query(query, fetch=True) - self.assertTrue('observation' in str(fetch)) - - # virtual instrument there? - def test_select_virtualinstrument_units_contain_rcuboard(self): - query = "SELECT * FROM virtual_instrument.unit" - fetch = self._execute_query(query, fetch=True) - self.assertTrue('rcu_board' in str(fetch)) - def _insert_test_spec(self, starttime='2017-05-10 13:00:00', endtime='2017-05-10 14:00:00', @@ -650,8 +518,8 @@ class ResourceAssignmentDatabaseTest(unittest.TestCase): 'starttime': parser.parse(sample_starttime), 'endtime': parser.parse(sample_endtime), 'cluster': 'CEP4', - 'status': 'conflict', - 'status_id': 335, + 'status': 'approved', + 'status_id': 300, 'type': 'observation', 'type_id': 0, 'mom_id': 0, @@ -2005,8 +1873,13 @@ class ResourceAssignmentDatabaseTest(unittest.TestCase): 'claim_size': 96 } claim2_id = self.radb.insertResourceClaims(task2_id, [claim2], 'foo', 1, 1)[0] - self.radb.updateResourceClaims(claim2_id, status='claimed') + # task1 is partially in the way, so claim2 and task2 should have conflict status + self.assertEqual('conflict', self.radb.getResourceClaim(claim2_id)['status']) + self.assertEqual('conflict', self.radb.getTask(task2_id)['status']) + # updating claim2's status to claimed should not succeed + self.radb.updateResourceClaims(claim2_id, status='claimed') + self.assertEqual('conflict', self.radb.getResourceClaim(claim2_id)['status']) self.assertEqual('conflict', self.radb.getTask(task2_id)['status']) def test_double_claim_should_result_in_conflict_overlap_in_the_past_and_future(self): @@ -2169,6 +2042,441 @@ class ResourceAssignmentDatabaseTest(unittest.TestCase): self.assertEqual('approved', self.radb.getTask(task2_id)['status']) + def test_dwellscheduler_high_low_priority_scenario(self): + """special test case to prove and solve bug: https://support.astron.nl/jira/browse/SW-426 + """ + #start with clean database + for spec in self.radb.getSpecifications(): + self.radb.deleteSpecification(spec['id']) # cascades into tasks and claims + + ###################################################################################### + # setup phase, create tasks and claims. should just work. + # we replay a responsive telescope trigger event, as performed by the dwellscheduler. + # We have two tasks, one with high prio, and one with low. + # the high prio tasks will have a conflict with the low one. + ###################################################################################### + + base_time = datetime.utcnow() + # round to current full hour (for readability in logging) + base_time = base_time - timedelta(minutes=base_time.minute, seconds=base_time.second, microseconds=base_time.microsecond) + + RESOURCE_ID = 252 + resource_max_cap = self.radb.get_resource_claimable_capacity(RESOURCE_ID, base_time, base_time) + + # insert the 'low prio' spec, task... + spec_task_low = self.radb.insertSpecificationAndTask(1, 1, 'prescheduled', 'observation', + base_time + timedelta(minutes=5), + base_time + timedelta(minutes=10), 'foo', 'CEP4') + task_low_id = spec_task_low['task_id'] + task_low = self.radb.getTask(task_low_id) + + + # the dwellscheduler inserts the claim(s)... + self.radb.insertResourceClaims(task_low_id, [{ 'resource_id': RESOURCE_ID, + 'starttime': task_low['starttime'], + 'endtime': task_low['endtime'], + 'status': 'tentative', + 'claim_size': resource_max_cap }], + 'user', 1) + + # ... and then the dwellscheduler sets the claims status to claimed... + self.radb.updateResourceClaims(where_task_ids=[task_low_id], status="claimed") + + logger.info("task_low's claims: %s", self.radb.getResourceClaims(task_ids=task_low_id)) + + self.assertEqual(1, len(self.radb.getResourceClaims(task_ids=task_low_id))) + self.assertEqual(1, len(self.radb.getResourceClaims(task_ids=task_low_id, status='claimed'))) + + # ... and updates the spec's start and endtime to the already specified start and endtime + # (why? not needed, but should not do any harm either) + self.radb.updateSpecification(task_low['specification_id'], + starttime=task_low['starttime'], + endtime=task_low['endtime']) + + # finally make the task scheduled. Should still work. + self.radb.updateTask(task_low_id, task_status='scheduled') + + # so fo so good. Everything should be normal and fine. Let's check. + self.assertEqual('scheduled', self.radb.getTask(id=task_low_id)['status']) + self.assertEqual(1, len(self.radb.getResourceClaims(task_ids=task_low_id, status='claimed'))) + + # now insert a second task, the so called high priority task, + # overlapping with the beginning of task_low + # so, the dwellscheduler finds task_low in task_high's higway + # so, task_low is aborted by the dwellscheduler (later in the code). + spec_task_high1 = self.radb.insertSpecificationAndTask(2, 2, 'approved', 'observation', + base_time, + base_time + timedelta(minutes=7), 'foo', 'CEP4') + task_high1_id = spec_task_high1['task_id'] + task_high1 = self.radb.getTask(task_high1_id) + + # the dwellscheduler inserts the claim(s)... + self.radb.insertResourceClaims(task_high1_id, [{ 'resource_id': RESOURCE_ID, + 'starttime': task_high1['starttime'], + 'endtime': task_high1['endtime'], + 'status': 'tentative', + 'claim_size': resource_max_cap }], + 'user', 1) + + logger.info("task_high1's claims: %s", self.radb.getResourceClaims(task_ids=task_high1_id)) + + # we expect task_high1 to have on claim in conflict (with the claim of task_low) + self.assertEqual(1, len(self.radb.getResourceClaims(task_ids=task_high1_id))) + self.assertEqual(0, len(self.radb.getResourceClaims(task_ids=task_high1_id, status='claimed'))) + self.assertEqual(1, len(self.radb.getResourceClaims(task_ids=task_high1_id, status='conflict'))) + + claim_in_conflict = self.radb.getResourceClaims(task_ids=task_high1_id, status='conflict')[0] + overlapping_claims = self.radb.get_overlapping_claims(claim_id=claim_in_conflict['id']) + logger.info('claim_in_conflict: %s', claim_in_conflict) + logger.info('overlapping_claims: %s', overlapping_claims) + self.assertEqual(1, len(overlapping_claims)) + self.assertEqual(task_low_id, overlapping_claims[0]['task_id']) + + ######################################################################## + # end of setup phase, now let's (try to) reproduce the bug... + # the dwellscheduler tries to abort task_low, to make room for task_high + # this caused an erroneous database exception on the production system + # but strangely enough we cannot repeat it here, + # even though we follow the same code path. + # + # This leads us to the conclusion that there was a strange set of + # circumstances in the data in the resource_usage table causing the bug in production. + # + # While examining the bug we did discover some errors in the sql code, + # for which we added more additional tests: + # - test_task_releases_claims_when_set_to_approved + # - test_task_in_conflict_releases_claimed_claims + # - test_duplicate_full_claims_on_one_resource + # - test_task_and_claim_with_zero_duration + # - test_are_claims_in_conflict_released_by_removing_conclict_causing_claims + # + # Even though this test could not reproduce the error as it happenend on production, + # we'll keep it for future reference, and for future proof the the code still works. + # + ######################################################################## + + with mock.patch('lofar.sas.resourceassignment.database.radb.logger') as mocked_logger: + self.radb.updateTaskAndResourceClaims(task_id=task_low_id, task_status='aborted', + endtime=task_low['starttime']) # yes, the endtime is set to the starttime + + # on production the above call produce the following log line: + # 2018-06-29 09:46:16,240 ERROR Rolling back query='UPDATE resource_allocation.resource_claim SET (endtime) = (2018-06-29 11:59:17) WHERE task_id = 148052' due to error: 'duplicate key value violates unique constraint "usage_unique" + # but unfortunately this error is not reproduced here, + # the only thing we can test for is if a rollback occurs + + # test if there was a log line containing the database log message for 'claim starttime >= endtime' + self.assertTrue(len([ca for ca in mocked_logger.error.call_args_list + if 'Rolling back' in ca[0][0] + and 'claim starttime >= endtime' in ca[0][0]]) > 0) + + + def test_task_releases_claims_when_set_to_approved(self): + now = datetime.utcnow() + now -= timedelta(minutes=now.minute, seconds=now.second, microseconds=now.microsecond) # round to full hour + + result = self.radb.insertSpecificationAndTask(0, 0, 'approved', 'observation', + now, now+timedelta(hours=1), 'foo', 'CEP4') + self.assertTrue(result['inserted']) + self.assertIsNotNone(result['task_id']) + task_id = result['task_id'] + task = self.radb.getTask(task_id) + self.assertEqual('approved', task['status']) + + # select first (arbitrary) resource + resource = self.radb.getResources(include_availability=True)[0] + + self.radb.insertResourceClaim(resource['id'], task_id, task['starttime'], task['endtime'], + 0.5*resource['available_capacity'], 'foo', 1) + tentative_claims = self.radb.getResourceClaims(task_ids=task_id, status='tentative') + self.assertEqual(1, len(tentative_claims)) + + # set status to claimed + self.radb.updateResourceClaims(where_task_ids=task_id, status='claimed') + tentative_claims = self.radb.getResourceClaims(task_ids=task_id, status='tentative') + claimed_claims = self.radb.getResourceClaims(task_ids=task_id, status='claimed') + self.assertEqual(0, len(tentative_claims)) + self.assertEqual(1, len(claimed_claims)) + + # when setting the task to prescheduled and back to approved, all claimed claims should be released + self.radb.updateTask(task_id=task_id, task_status='prescheduled') + self.radb.updateTask(task_id=task_id, task_status='approved') + task = self.radb.getTask(task_id) + self.assertEqual('approved', task['status']) + + tentative_claims = self.radb.getResourceClaims(task_ids=task_id, status='tentative') + claimed_claims = self.radb.getResourceClaims(task_ids=task_id, status='claimed') + self.assertEqual(1, len(tentative_claims)) + self.assertEqual(0, len(claimed_claims)) + + + def test_task_in_conflict_releases_claimed_claims(self): + """tests whether a task with multiple claims releases the claimed claims when the task goes to conflict. + This is wanted behaviour, because when a single claim goes to conflict, then the task cannot be scheduled. + So, it makes sense to release the other already claimed claims for other tasks. + """ + now = datetime.utcnow() + now -= timedelta(minutes=now.minute, seconds=now.second, microseconds=now.microsecond) # round to full hour + + result = self.radb.insertSpecificationAndTask(0, 0, 'approved', 'observation', + now, now+timedelta(hours=1), 'foo', 'CEP4') + self.assertTrue(result['inserted']) + self.assertIsNotNone(result['task_id']) + task_id = result['task_id'] + task = self.radb.getTask(task_id) + self.assertEqual('approved', task['status']) + + # select first two (arbitrary) resources + resources = self.radb.getResources(include_availability=True) + resource1 = resources[0] + resource2 = resources[1] + + # and insert a claim for each resource. + # one claim should fit and be set to claimed... + self.radb.insertResourceClaim(resource1['id'], task_id, task['starttime'], task['endtime'], + 0.5*resource1['available_capacity'], 'foo', 1) + tentative_claims = self.radb.getResourceClaims(task_ids=task_id, status='tentative') + self.assertEqual(1, len(tentative_claims)) + + # set status to claimed + self.radb.updateResourceClaims(where_task_ids=task_id, status='claimed') + tentative_claims = self.radb.getResourceClaims(task_ids=task_id, status='tentative') + conflict_claims = self.radb.getResourceClaims(task_ids=task_id, status='conflict') + claimed_claims = self.radb.getResourceClaims(task_ids=task_id, status='claimed') + self.assertEqual(0, len(tentative_claims)) + self.assertEqual(0, len(conflict_claims)) + self.assertEqual(1, len(claimed_claims)) + + # the other claim should not fit and cause a conflict... + self.radb.insertResourceClaim(resource2['id'], task_id, task['starttime'], task['endtime'], + 2.0*resource2['available_capacity'], 'foo', 1) + + # ... result should be that the task also goes to conflict ... + task = self.radb.getTask(task_id) + self.assertEqual('conflict', task['status']) + + # ... and that all the task's claimed claims should be released + tentative_claims = self.radb.getResourceClaims(task_ids=task_id, status='tentative') + conflict_claims = self.radb.getResourceClaims(task_ids=task_id, status='conflict') + claimed_claims = self.radb.getResourceClaims(task_ids=task_id, status='claimed') + self.assertEqual(1, len(tentative_claims)) + self.assertEqual(1, len(conflict_claims)) + self.assertEqual(0, len(claimed_claims)) + conflict_claim = conflict_claims[0] + + # a user/operator action could be to set the task back to approved + # all claimed claims which were already set back to tentative should still be tentative + # and claims in conflict should remain in conflict + self.radb.updateTask(task_id=task_id, task_status='approved') + task = self.radb.getTask(task_id) + self.assertEqual('approved', task['status']) + + tentative_claims = self.radb.getResourceClaims(task_ids=task_id, status='tentative') + conflict_claims = self.radb.getResourceClaims(task_ids=task_id, status='conflict') + claimed_claims = self.radb.getResourceClaims(task_ids=task_id, status='claimed') + self.assertEqual(1, len(tentative_claims)) + self.assertEqual(1, len(conflict_claims)) + self.assertEqual(0, len(claimed_claims)) + self.assertEqual(conflict_claim['id'], conflict_claims[0]['id']) + + def test_duplicate_full_claims_on_one_resource(self): + """special test case to prove and solve bug: https://support.astron.nl/jira/browse/SW-426 + We found out that inserting two duplicate claims for one resource does not result in the two claims + having the conflict status, even though at least one of them should have it. + """ + # start with clean database + for spec in self.radb.getSpecifications(): + self.radb.deleteSpecification(spec['id']) # cascades into tasks and claims + + now = datetime.utcnow() + # round to next full hour (for readability in logging) + now = now - timedelta(minutes=now.minute, seconds=now.second, microseconds=now.microsecond) + now = now + timedelta(hours=1) + + spec_task = self.radb.insertSpecificationAndTask(0, 0, 'approved', 'observation', + now, now + timedelta(minutes=10), + 'foo', 'CEP4') + + task_id = spec_task['task_id'] + task = self.radb.getTask(task_id) + + RESOURCE_ID = 252 + resource_max_cap = self.radb.get_resource_claimable_capacity(RESOURCE_ID, now, now) + + # create one claim, with claim_size of max capacity + claim = {'resource_id': RESOURCE_ID, + 'starttime': task['starttime'], + 'endtime': task['endtime'], + 'status': 'tentative', + 'claim_size': resource_max_cap} + + # insert the same claim twice, so two times the maxcap should not fit in total, + # but should fit if only one is claimed + self.radb.insertResourceClaims(task_id, [claim, claim], 'user', 1) + + # get the claims from the db, and check if there are 2, and check their status. + # Both should have tentative status, and not conflict status, + # because we did not claim anything yet. + tentative_claims = self.radb.getResourceClaims(task_ids=task_id, status='tentative') + conflict_claims = self.radb.getResourceClaims(task_ids=task_id, status='conflict') + self.assertEqual(2, len(tentative_claims)) + self.assertEqual(0, len(conflict_claims)) + self.assertEqual('approved', self.radb.getTask(task_id)['status']) + + # let's try to claim them both in one call. + self.radb.updateResourceClaims(where_task_ids=[task_id], status='claimed') + + # Get the claims again from the db, and check if there are 2 + # one was successfully claimed, but put back to tentative, + # because for the other there was no room, so it should be in conflict. + # As a result of the claim in conflict, the task is in conflict as well. + # And as a result of the task in conflict, all claimed claims are released and put back to tentative. + # And because the claimed claim was put back to tentative, this frees up room for the claim in conflict, + # which should not be in conflict anymore, but also tentative. + # (Yes, this is quite confusing, but correct.) + tentative_claims = self.radb.getResourceClaims(task_ids=task_id, status='tentative') + claimed_claims = self.radb.getResourceClaims(task_ids=task_id, status='claimed') + conflict_claims = self.radb.getResourceClaims(task_ids=task_id, status='conflict') + self.assertEqual(2, len(tentative_claims)) + self.assertEqual(0, len(claimed_claims)) + self.assertEqual(0, len(conflict_claims)) + self.assertEqual('approved', self.radb.getTask(task_id)['status']) + + # let's try to claim only one. + # One should fit, but as a result the other won't fit anymore and will go to conflict + # which causes the task to go to conflict, which causes the claimed claim to be released, + # which frees up space to the other which will be put to tentative after being in conflict. + # (Yes, this is also quite confusing, but correct.) + self.radb.updateResourceClaim(tentative_claims[0]['id'], status='claimed') + tentative_claims = self.radb.getResourceClaims(task_ids=task_id, status='tentative') + claimed_claims = self.radb.getResourceClaims(task_ids=task_id, status='claimed') + conflict_claims = self.radb.getResourceClaims(task_ids=task_id, status='conflict') + self.assertEqual(2, len(tentative_claims)) + self.assertEqual(0, len(claimed_claims)) + self.assertEqual(0, len(conflict_claims)) + self.assertEqual('approved', self.radb.getTask(task_id)['status']) + + + def test_task_and_claim_with_zero_duration(self): + """claims which claim a resource and release it at the same moment are now allowed (it's a paradox). + """ + # start with clean database + for spec in self.radb.getSpecifications(): + self.radb.deleteSpecification(spec['id']) # cascades into tasks and claims + + now = datetime.utcnow() + + spec_task = self.radb.insertSpecificationAndTask(0, 0, 'approved', 'observation', + now, now, # tasks can have zero duration + 'foo', 'CEP4') + + task_id = spec_task['task_id'] + task = self.radb.getTask(task_id) + self.assertIsNotNone(task) + self.assertEqual(now, task['starttime']) + self.assertEqual(now, task['endtime']) + + with mock.patch('lofar.sas.resourceassignment.database.radb.logger') as mocked_logger: + RESOURCE_ID = 252 + inserted_claim_id = self.radb.insertResourceClaim(RESOURCE_ID, task_id, + now, now, # claims cannot have zero duration, test that! + 1, 'foo', 1) + self.assertIsNone(inserted_claim_id) + mocked_logger.error.assert_any_call('One or more claims could not be inserted. Rolling back.') + # test if there was a log line containing the database log message for 'claim starttime >= endtime' + self.assertTrue( + len([ca for ca in mocked_logger.error.call_args_list if 'claim starttime >= endtime' in ca[0][0]]) > 0) + + with mock.patch('lofar.sas.resourceassignment.database.radb.logger') as mocked_logger: + # try again, with multi-claim insert + inserted_claim_ids = self.radb.insertResourceClaims(task_id, [{'resource_id': RESOURCE_ID, + 'starttime': now, + 'endtime': now, + 'status': 'tentative', + 'claim_size': 1}], + 'foo', 1) + self.assertEqual([], inserted_claim_ids) + # c for c in mocked_logger.error.calls if c + mocked_logger.error.assert_any_call('One or more claims could not be inserted. Rolling back.') + # test if there was a log line containing the database log message for 'claim starttime >= endtime' + self.assertTrue( + len([ca for ca in mocked_logger.error.call_args_list if 'claim starttime >= endtime' in ca[0][0]]) > 0) + + def test_are_claims_in_conflict_released_by_removing_conflict_causing_claims(self): + """test whether a claim which is in conflict is put automatically to tentative when the conflict-causing claim is released. + """ + # start with clean database + for spec in self.radb.getSpecifications(): + self.radb.deleteSpecification(spec['id']) # cascades into tasks and claims + + base_time = datetime.utcnow() + # round to current full hour (for readability in logging) + base_time = base_time - timedelta(minutes=base_time.minute, seconds=base_time.second, + microseconds=base_time.microsecond) + + RESOURCE_ID = 252 + resource_max_cap = self.radb.get_resource_claimable_capacity(RESOURCE_ID, base_time, base_time) + + # insert a first task and full claim on a resource... + spec_task1 = self.radb.insertSpecificationAndTask(0, 0, 'approved', 'observation', + base_time + timedelta(minutes=+0), + base_time + timedelta(minutes=+10), 'foo', 'CEP4') + self.assertTrue(spec_task1['inserted']) + task1_id = spec_task1['task_id'] + task1 = self.radb.getTask(task1_id) + self.assertEqual('approved', task1['status']) + + claim1_id = self.radb.insertResourceClaim(RESOURCE_ID, task1_id, + task1['starttime'], task1['endtime'], + resource_max_cap, 'foo', 1) + # claim it, and check it. Should succeed. + self.radb.updateResourceClaim(claim1_id, status='claimed') + self.assertEqual('claimed', self.radb.getResourceClaim(claim1_id)['status']) + + # insert second (partially overlapping) task and claim on same resource, which we expect to get a conflict status + # because the first claim already claims the resource fully. + spec_task2 = self.radb.insertSpecificationAndTask(1, 1, 'approved', 'observation', + base_time + timedelta(minutes=+5), + base_time + timedelta(minutes=+15), 'foo', 'CEP4') + self.assertTrue(spec_task2['inserted']) + task2_id = spec_task2['task_id'] + task2 = self.radb.getTask(task2_id) + self.assertEqual('approved', task2['status']) + + claim2_id = self.radb.insertResourceClaim(RESOURCE_ID, task2_id, + task2['starttime'], task2['endtime'], + resource_max_cap, 'foo', 1) + self.assertEqual('conflict', self.radb.getResourceClaim(claim2_id)['status']) + self.assertEqual('conflict', self.radb.getTask(task2_id)['status']) + + # now let's see if releasing claim1 results in claim2 not having conflict state anymore + self.radb.updateResourceClaim(claim1_id, status='tentative') + self.assertEqual('tentative', self.radb.getResourceClaim(claim1_id)['status']) + self.assertEqual('tentative', self.radb.getResourceClaim(claim2_id)['status']) + self.assertEqual('approved', self.radb.getTask(task1_id)['status']) + self.assertEqual('approved', self.radb.getTask(task2_id)['status']) + + # claim claim1 again, and check it. Should succeed. + # and claim2 should go to conflict again. + self.radb.updateResourceClaim(claim1_id, status='claimed') + self.assertEqual('claimed', self.radb.getResourceClaim(claim1_id)['status']) + self.assertEqual('conflict', self.radb.getResourceClaim(claim2_id)['status']) + self.assertEqual('approved', self.radb.getTask(task1_id)['status']) + self.assertEqual('conflict', self.radb.getTask(task2_id)['status']) + + # this time, resolve the conflict by shifting the endtime of claim1 + self.radb.updateResourceClaim(claim1_id, endtime=task2['starttime']) + self.assertEqual('claimed', self.radb.getResourceClaim(claim1_id)['status']) + self.assertEqual('tentative', self.radb.getResourceClaim(claim2_id)['status']) + self.assertEqual('approved', self.radb.getTask(task1_id)['status']) + self.assertEqual('approved', self.radb.getTask(task2_id)['status']) + + # and finally, we should be able to claim claim2 as well + self.radb.updateResourceClaim(claim2_id, status='claimed') + self.assertEqual('claimed', self.radb.getResourceClaim(claim1_id)['status']) + self.assertEqual('claimed', self.radb.getResourceClaim(claim2_id)['status']) + self.assertEqual('approved', self.radb.getTask(task1_id)['status']) + self.assertEqual('approved', self.radb.getTask(task2_id)['status']) + if __name__ == "__main__": os.environ['TZ'] = 'UTC' diff --git a/SAS/ResourceAssignment/ResourceAssignmentEditor/lib/static/app/controllers/cleanupcontroller.js b/SAS/ResourceAssignment/ResourceAssignmentEditor/lib/static/app/controllers/cleanupcontroller.js index e973db2bd649e778de1b06d5b526ebe0ad28e213..d75abb67db0961c3af0ac15fa9cb8ca3f4795341 100644 --- a/SAS/ResourceAssignment/ResourceAssignmentEditor/lib/static/app/controllers/cleanupcontroller.js +++ b/SAS/ResourceAssignment/ResourceAssignmentEditor/lib/static/app/controllers/cleanupcontroller.js @@ -274,6 +274,7 @@ cleanupControllerMod.controller('CleanupController', ['$scope', '$uibModal', '$m </p>\ </div>\ <div class="modal-footer">\ + <span style="margin-right:8px">1KB=1000bytes</span>\ <button class="btn btn-primary glyphicon glyphicon-level-up" type="button" ng-click="up()" title="Up one level" ng-if="watchedObjectType!=\'projects\'"></button>\ <button class="btn btn-primary" type="button" autofocus ng-click="ok()">OK</button>\ </div>', diff --git a/SAS/ResourceAssignment/ResourceAssignmentEditor/lib/webservice.py b/SAS/ResourceAssignment/ResourceAssignmentEditor/lib/webservice.py index 461f8860c5130c9854abff54e09ac904a6dd4bf0..7be3f99d59fda6da50d64ab33142d99c35197270 100755 --- a/SAS/ResourceAssignment/ResourceAssignmentEditor/lib/webservice.py +++ b/SAS/ResourceAssignment/ResourceAssignmentEditor/lib/webservice.py @@ -1051,7 +1051,7 @@ def main(): global curpc curpc = CleanupRPC(busname=options.cleanup_busname, servicename=options.cleanup_servicename, broker=options.broker) global sqrpc - sqrpc = StorageQueryRPC(busname=options.storagequery_busname, servicename=options.storagequery_servicename, broker=options.broker) + sqrpc = StorageQueryRPC(busname=options.storagequery_busname, servicename=options.storagequery_servicename, timeout=10, broker=options.broker) global momqueryrpc momqueryrpc = MoMQueryRPC(busname=options.mom_query_busname, servicename=options.mom_query_servicename, timeout=10, broker=options.broker) global changeshandler diff --git a/SAS/Scheduler/src/Controller.cpp b/SAS/Scheduler/src/Controller.cpp index 2342513510d8c2cece1b72ef98bf05334b534abf..d3471434a6a213253c43e82a38fb3574db952965 100644 --- a/SAS/Scheduler/src/Controller.cpp +++ b/SAS/Scheduler/src/Controller.cpp @@ -3961,8 +3961,7 @@ std::pair<unscheduled_reasons, QString> Controller::setInputFilesForPipeline(Tas for (dataProductTypes dpType = _BEGIN_DATA_PRODUCTS_ENUM_; dpType < _END_DATA_PRODUCTS_ENUM_-1; dpType = dataProductTypes(dpType + 1)) { if (pPipe->storage()->isInputDataProduktEnabled(dpType)) { // is this input data product type enabled? TaskStorage::inputDataProduct &dp = inputDataProducts[dpType]; // also creates the record in the inputDataProducts map if it doesn't exist yet - resetSkipVector = (dp.skip.empty() /*|| (dp.skip.size() != (unsigned)dp.filenames.size())*/); // the skip vector should only be synchronized with the predecessor skip vector the first time (i.e. when it is not yet set) - storageVector storageVec; + resetSkipVector = (dp.skip.empty() ); // the skip vector should only be synchronized with the predecessor skip vector the first time (i.e. when it is not yet set) for (QStringList::const_iterator identit = dp.identifications.begin(); identit != dp.identifications.end(); ++identit) { foundit = false; @@ -3974,9 +3973,7 @@ std::pair<unscheduled_reasons, QString> Controller::setInputFilesForPipeline(Tas if (pit != pred_output.end()) { idxIt = pit->second.identifications.indexOf(*identit); if (idxIt != -1) { // found? - storageVector predecessorStorageVec(predStorage->getStorageLocations(dpType)); - unsigned psz(predecessorStorageVec.size()); - if (psz != 0 || (*predit)->getOutputDataproductCluster() == "CEP4") { + if (true /*Used to contain code for CEP2/CEP4 checks*/) { // copy the filenames and locations pointed to by this identification to the input data product list of this task if (pit->second.filenames.size() == pit->second.locations.size()) { if ((dpType == DP_CORRELATED_UV) || (dpType == DP_COHERENT_STOKES) || (dpType == DP_INCOHERENT_STOKES)) { // for these data product types copy only the files that have the corresponding SAP @@ -3990,9 +3987,6 @@ std::pair<unscheduled_reasons, QString> Controller::setInputFilesForPipeline(Tas if (filename.contains(sapstr)) { filenames.push_back(filename); locations.push_back(pit->second.locations.at(i)); - if ((*predit)->getOutputDataproductCluster() != "CEP4") { - storageVec.push_back(predecessorStorageVec.at(i % psz)); - } if (resetSkipVector) { if (SyncSkipWithPredecessor) { skipVec.push_back(pit->second.skip.at(i)); @@ -4007,9 +4001,6 @@ std::pair<unscheduled_reasons, QString> Controller::setInputFilesForPipeline(Tas else { // no specific SAP specified in identification, just copy all files filenames += pit->second.filenames; locations += pit->second.locations; - if ((*predit)->getOutputDataproductCluster() != "CEP4") { - storageVec.insert(storageVec.end(), predecessorStorageVec.begin(), predecessorStorageVec.end()); - } if (resetSkipVector) { if (pit->second.skip.size() == (unsigned)pit->second.filenames.size()) { skipVec.insert(skipVec.end(), pit->second.skip.begin(), pit->second.skip.end()); @@ -4023,9 +4014,6 @@ std::pair<unscheduled_reasons, QString> Controller::setInputFilesForPipeline(Tas else { // for all other data product types copy all files filenames += pit->second.filenames; locations += pit->second.locations; - if ((*predit)->getOutputDataproductCluster() != "CEP4") { - storageVec.insert(storageVec.end(), predecessorStorageVec.begin(), predecessorStorageVec.end()); - } if (resetSkipVector) { if (pit->second.skip.size() == (unsigned)pit->second.filenames.size()) { skipVec.insert(skipVec.end(), pit->second.skip.begin(), pit->second.skip.end()); @@ -4066,8 +4054,6 @@ std::pair<unscheduled_reasons, QString> Controller::setInputFilesForPipeline(Tas return error; // identification not found } } - // set storage location IDs equal to the accumulation of the predecessor output storage vec's - pPipe->storage()->addInputStorageLocations(dpType, storageVec); dp.filenames = filenames; dp.locations = locations; if (!resetSkipVector) { @@ -4544,7 +4530,7 @@ bool Controller::checkEarlyTasksStatus(void) { int treeID; for (std::vector<Task *>::const_iterator it = tasks.begin(); it != tasks.end(); ++it) { if ((*it)->getScheduledStart() <= now()) { - if (((*it)->getOutputDataproductCluster() == "CEP4") && (*it)->isPipeline()) { + if ( (*it)->isPipeline()) { continue; //Pipelines on CEP4: we don't care as SLURM sorts it out. } treeID = (*it)->getSASTreeID(); @@ -4587,14 +4573,14 @@ int Controller::assignResources(bool showResult) { } if (retVal == 0) { - int ret = refreshStorageNodesInfo(); + int ret = 0; //Data Monitor does not exist any more refreshStorageNodesInfo(); // ret: // 0: refresh ok // 1: no connection to data monitor don't continue // 2: user clicked cancel when asked to connect to the data monitor if (ret == 0) { // refresh ok - if (!assignStorageResources()) { + if (false /*Used to contain code for CEP2/CEP4 checks*/) { retVal = 3; // storage resource assignment conflicts detected } } @@ -4615,8 +4601,7 @@ int Controller::assignResources(bool showResult) { } break; case 1: -// QMessageBox::critical(0, tr("No connection to Data Monitor"), -// tr("Could not connect to the Data Monitor.\nPlease check Data Monitor connection settings")); + //Used to contain DataMonitor code break; case 2: QMessageBox::warning(gui,tr("Resource assignment conflicts detected"),tr("Some task(s) are scheduled in the past!\nStart time needs to be at least 3 minutes after now")); @@ -4767,7 +4752,7 @@ bool Controller::calculateDataSlots(void) { } -bool Controller::assignManualStorageToTask(Task *pTask) { +/*bool Controller::assignManualStorageToTask(Task *pTask) { if (pTask->hasStorage()) { TaskStorage *taskStorage(pTask->storage()); // check and when possible assign the task's manually requested resources @@ -4807,10 +4792,10 @@ bool Controller::assignManualStorageToTask(Task *pTask) { } } else return true; -} +}*/ -bool Controller::assignStorageToTask(Task *pTask) { +/*bool Controller::assignStorageToTask(Task *pTask) { bool bResult(true); if (pTask->hasStorage()) { TaskStorage *taskStorage(pTask->storage()); @@ -5132,9 +5117,9 @@ bool Controller::assignStorageToTask(Task *pTask) { } } return bResult; -} +}*/ -bool Controller::assignGroupedStorage(void) { // not for manual assignment of storage +/*bool Controller::assignGroupedStorage(void) { // not for manual assignment of storage bool bResult(true); std::map<unsigned, std::vector<Task *> > groupedTasks = data.getGroupedTasks(Task::PRESCHEDULED); @@ -5613,10 +5598,10 @@ bool Controller::assignGroupedStorage(void) { // not for manual assignment of st } return bResult; -} +}*/ -bool Controller::assignStorageResources(Task *task) { +/*bool Controller::assignStorageResources(Task *task) { bool bResult(true); // if (refreshStorageNodesInfo()) { if (task) { @@ -5688,5 +5673,5 @@ bool Controller::assignStorageResources(Task *task) { gui->updateTaskDialog(); // update task dialog (needed for the storage tree to show the conflict info) return bResult; -} +}*/ diff --git a/SAS/Scheduler/src/Controller.h b/SAS/Scheduler/src/Controller.h index 52185f026fe1da177a551353aeb271fc3821c1cf..b33812173b73bf5849a4dfe66356712998b4ed4b 100644 --- a/SAS/Scheduler/src/Controller.h +++ b/SAS/Scheduler/src/Controller.h @@ -190,7 +190,7 @@ public: #endif // resource assignment - bool assignStorageResources(Task *task = 0); +// bool assignStorageResources(Task *task = 0); bool calculateDataSlots(void); @@ -223,10 +223,10 @@ private: void updateDeletedTasksUndo(void); bool askOverWriteExistingTask(bool &overwrite, bool &forAll, unsigned taskID, const QString &taskName); // bool dataMonitorInitRequired(void); // checks to see if data monitor init is required - bool assignManualStorageToTask(Task *pTask); - bool assignGroupedStorage(void); +// bool assignManualStorageToTask(Task *pTask); +// bool assignGroupedStorage(void); // bool assignMinimumStorageToTask(Task *pTask); - bool assignStorageToTask(Task *pTask); +// bool assignStorageToTask(Task *pTask); void rescheduleTask(unsigned task_id, AstroDateTime new_start); // checkEarlyTasksStatus: checks the current status of too early tasks in the SAS database and updates the tasks in the scheduler if the status was changed in SAS // returns false if any too early task was found which is still (PRE)SCHEDULED diff --git a/SAS/Scheduler/src/SASConnection.cpp b/SAS/Scheduler/src/SASConnection.cpp index 66edc07fc541db8edb5932adbd0f5a806c52c7f9..395f032285d9bce5a3ccdb90982fb15f1c8b50b4 100644 --- a/SAS/Scheduler/src/SASConnection.cpp +++ b/SAS/Scheduler/src/SASConnection.cpp @@ -2576,131 +2576,13 @@ bool SASConnection::saveStationSettings(int treeID, const StationTask &task, con bool SASConnection::saveInputStorageSettings(int treeID, const Task &task) { bool bResult(true); - if (task.getOutputDataproductCluster() == "CEP4") { //For CEP4 we're skipping this. /AR - return bResult; - } - const TaskStorage *task_storage(task.storage()); - if (task_storage) { - const std::map<dataProductTypes, TaskStorage::inputDataProduct> &inputDataProducts(task_storage->getInputDataProducts()); - std::map<dataProductTypes, TaskStorage::inputDataProduct>::const_iterator flit; - QString locationsStr, filenamesStr, skipVectorStr, enabledStr; - - for (dataProductTypes dp = _BEGIN_DATA_PRODUCTS_ENUM_; dp < _END_DATA_PRODUCTS_ENUM_-1; dp = dataProductTypes(dp + 1)) { - flit = inputDataProducts.find(dp); - enabledStr = task_storage->isInputDataProduktEnabled(dp) ? "true" : "false"; - if (flit != inputDataProducts.end()) { - locationsStr = "[" + flit->second.locations.join(",") + "]"; - filenamesStr = "[" + flit->second.filenames.join(",") + "]"; - skipVectorStr = boolVector2StringVector(flit->second.skip); - switch (dp) { - case DP_COHERENT_STOKES: - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_CoherentStokes.enabled", enabledStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_CoherentStokes.locations", locationsStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_CoherentStokes.filenames", filenamesStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_CoherentStokes.skip", skipVectorStr)) bResult = false; - break; - case DP_INCOHERENT_STOKES: - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_IncoherentStokes.enabled", enabledStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_IncoherentStokes.locations", locationsStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_IncoherentStokes.filenames", filenamesStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_IncoherentStokes.skip", skipVectorStr)) bResult = false; - break; - case DP_CORRELATED_UV: - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_Correlated.enabled", enabledStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_Correlated.locations", locationsStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_Correlated.filenames", filenamesStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_Correlated.skip", skipVectorStr)) bResult = false; - break; - case DP_INSTRUMENT_MODEL: - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_InstrumentModel.enabled", enabledStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_InstrumentModel.locations", locationsStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_InstrumentModel.filenames", filenamesStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_InstrumentModel.skip", skipVectorStr)) bResult = false; - break; - case DP_SKY_IMAGE: - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_SkyImage.enabled", enabledStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_SkyImage.locations", locationsStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_SkyImage.filenames", filenamesStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_SkyImage.skip", skipVectorStr)) bResult = false; - break; - default: - break; - } - } - else { - switch (dp) { - case DP_COHERENT_STOKES: - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_CoherentStokes.enabled", enabledStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_CoherentStokes.locations", "[]")) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_CoherentStokes.filenames", "[]")) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_CoherentStokes.skip", "[]")) bResult = false; - break; - case DP_INCOHERENT_STOKES: - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_IncoherentStokes.enabled", enabledStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_IncoherentStokes.locations", "[]")) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_IncoherentStokes.filenames", "[]")) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_IncoherentStokes.skip", "[]")) bResult = false; - break; - case DP_CORRELATED_UV: - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_Correlated.enabled", enabledStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_Correlated.locations", "[]")) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_Correlated.filenames", "[]")) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_Correlated.skip", "[]")) bResult = false; - break; - case DP_INSTRUMENT_MODEL: - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_InstrumentModel.enabled", enabledStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_InstrumentModel.locations", "[]")) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_InstrumentModel.filenames", "[]")) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_InstrumentModel.skip", "[]")) bResult = false; - break; - case DP_SKY_IMAGE: - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_SkyImage.enabled", enabledStr)) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_SkyImage.locations", "[]")) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_SkyImage.filenames", "[]")) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Input_SkyImage.skip", "[]")) bResult = false; - break; - default: - break; - } - } - } - } - return bResult; + //For CEP4 we're skipping this. /AR + return bResult; } bool SASConnection::saveOutputStorageSettings(int treeID, const Task &task, const task_diff *diff) { bool bResult(true); - if (task.getOutputDataproductCluster() == "CEP4") { //For CEP4 we're skipping this. /AR - return bResult; - } - const TaskStorage *task_storage(task.storage()); - if (task_storage) { - QString trueStr("true"), falseStr("false"); - // which output data to generate - const TaskStorage::enableDataProdukts &odp(task_storage->getOutputDataProductsEnabled()); - if (diff) { - if (diff->output_data_types) { - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Output_Correlated.enabled", (odp.correlated ? trueStr : falseStr))) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Output_CoherentStokes.enabled", (odp.coherentStokes ? trueStr : falseStr))) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Output_IncoherentStokes.enabled", (odp.incoherentStokes ? trueStr : falseStr))) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Output_InstrumentModel.enabled", (odp.instrumentModel ? trueStr : falseStr))) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Output_Pulsar.enabled", (odp.pulsar ? trueStr : falseStr))) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Output_SkyImage.enabled", (odp.skyImage ? trueStr : falseStr))) bResult = false; - } - if (diff->output_data_products) { - bResult &= saveOutputDataProducts(treeID, task); - } - } - else { - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Output_Correlated.enabled", (odp.correlated ? trueStr : falseStr))) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Output_CoherentStokes.enabled", (odp.coherentStokes ? trueStr : falseStr))) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Output_IncoherentStokes.enabled", (odp.incoherentStokes ? trueStr : falseStr))) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Output_InstrumentModel.enabled", (odp.instrumentModel ? trueStr : falseStr))) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Output_Pulsar.enabled", (odp.pulsar ? trueStr : falseStr))) bResult = false; - if (!setNodeValue(treeID, "LOFAR.ObsSW.Observation.DataProducts.Output_SkyImage.enabled", (odp.skyImage ? trueStr : falseStr))) bResult = false; - bResult &= saveOutputDataProducts(treeID, task); - } - } + //For CEP4 we're skipping this. /AR return bResult; } diff --git a/SAS/Scheduler/src/Storage.cpp b/SAS/Scheduler/src/Storage.cpp index b4e3c887a85fb8d8a2d6715861c7f4c00f2cca37..3409b95f0afb2bf3e622b29734c9db9121dfb160 100644 --- a/SAS/Scheduler/src/Storage.cpp +++ b/SAS/Scheduler/src/Storage.cpp @@ -87,186 +87,6 @@ void Storage::initStorage(void) { } } -std::vector<storageResult> Storage::addStorageToTask(Task *pTask, const storageMap &storageLocations) { - if (pTask->hasStorage()) { - TaskStorage *task_storage = pTask->storage(); - storageNodesMap::iterator sit; - const AstroDateTime &start = pTask->getScheduledStart(); - const AstroDateTime &end = pTask->getScheduledEnd(); - unsigned durationSec = pTask->getDuration().totalSeconds(); - const dataFileMap &dataFiles = task_storage->getOutputFileSizes(); // contains the number of files and the size of an individual file for each output data product of the task - double claimSize, bandWidth; - // check node bandwidth requirements (CAUTION: multiple data product types could use the same storage node - // search for dataproducts that use the same storage node - std::map<int, double> totalBWPerNodeMap; - double dpBWPerLocation; - task_conflict res; - for (storageMap::const_iterator it = storageLocations.begin(); it != storageLocations.end(); ++it) { - dataFileMap::const_iterator dit = dataFiles.find(it->first); - if (dit != dataFiles.end()) { - // total bandwidth (kbit/sec) required from a storage node to receive the required number of files from the data product - // ceil(total_number_files / number_of_raids_used) * filesize [kbyte] / duration [seconds] * 8 - // std::cout << "task:" << pTask->getID() << ", dataproduct:" << DATA_PRODUCTS[dit->first] << std::endl - // << "nr of files:" << dit->second.second << ", size per file:" << dit->second.first << "number of locations:" << it->second.size() << std::endl - // << "BW per location:" << ceil((double)dit->second.second / it->second.size()) * dit->second.first / durationSec * 8 << std::endl; - dpBWPerLocation = ceil((double)dit->second.second / it->second.size()) * dit->second.first / durationSec * 8; - for (storageVector::const_iterator sit = it->second.begin(); sit != it->second.end(); ++sit) { - if (totalBWPerNodeMap.find(sit->first) == totalBWPerNodeMap.end()) { // node not already in totalSizePerNodeMap? - // for each time this node is used by this data product increase its totalSizePerNodeMap value accordingly - for (storageVector::const_iterator countit = sit; countit != it->second.end(); ++countit) { - if (countit->first == sit->first) { // for each time this storage node is used in storageVector - totalBWPerNodeMap[sit->first] += dpBWPerLocation; // add the amount of bandwidth used by the set of files - } - } - } - } - } - } - itsLastStorageCheckResult.clear(); - if (pTask->getOutputDataproductCluster() == "CEP4") { //Can we just skip this for CEP4 ? /AR - debugWarn("sis","Storage::addStorageToTask: Did not check storage for task:", pTask->getID(), " (CEP4 detected)"); - } - else { - // check if the total bandwidths for the nodes used do not exceed the nodes their available bandwidths - for (std::map<int, double>::const_iterator nit = totalBWPerNodeMap.begin(); nit != totalBWPerNodeMap.end(); ++nit) { - storageNodesMap::const_iterator nodeit = itsStorageNodes.find(nit->first); - if (nodeit != itsStorageNodes.end()) { - // std::cout << "Total bandwidth required for node:" << nodeit->second.name() << " = " << nit->second << " kb/s" << std::endl; - res = nodeit->second.checkBandWidth(start, end, nit->second); - if (res != CONFLICT_NO_CONFLICT) { - itsLastStorageCheckResult.push_back(storageResult(_END_DATA_PRODUCTS_ENUM_, nit->first, -1, res)); - } - } - } - if (itsLastStorageCheckResult.empty()) { // if no total bandwidth error for any node then start the rest of the checks - for (dataFileMap::const_iterator dfit = dataFiles.begin(); dfit != dataFiles.end(); ++dfit) { - storageMap::const_iterator stit = storageLocations.find(dfit->first); - if (stit != storageLocations.end()) { - if (!stit->second.empty()) { - claimSize = (double) dfit->second.first * dfit->second.second / stit->second.size(); // size per file * nrFiles / nr of raid arrays assigned - bandWidth = (double) claimSize / 1000 / durationSec; // MByte/sec, the required remaining disk write speed (or bandwidth) for this array - - // check requested resources - for (storageVector::const_iterator it = stit->second.begin(); it != stit->second.end(); ++it) { - sit = itsStorageNodes.find(it->first); - if (sit != itsStorageNodes.end()) { - // check size requirements - res = sit->second.checkSpaceAndWriteSpeed(start, end, claimSize, bandWidth, it->second); // check space and write speed for every raid array - if (res != CONFLICT_NO_CONFLICT) { - itsLastStorageCheckResult.push_back(storageResult(dfit->first, it->first, it->second, res)); - // itsLastStorageCheckResult[it->first].push_back(std::pair<int, task_conflict>(it->second, res)); // store the error result - } - else { // add the claim - sit->second.addClaim(pTask->getID(), start, end, dfit->first, claimSize, bandWidth, it->second); - } - } - } - // if there were conflicts then remove the claim again from the storage nodes - if (!itsLastStorageCheckResult.empty()) { - std::vector<int> snd; - for (storageVector::const_iterator it = stit->second.begin(); it != stit->second.end(); ++it) { - sit = itsStorageNodes.find(it->first); - if (sit != itsStorageNodes.end()) { - if (std::find(snd.begin(), snd.end(), stit->first) == snd.end()) { - sit->second.removeClaim(pTask->getID()); // only call removeClaim one time for every storage node (it removes all claims found for the task ID) - snd.push_back(stit->first); - } - } - } - } - } - else { // no storage has been assigned to this data product type - itsLastStorageCheckResult.push_back(storageResult(dfit->first, -1, -1, CONFLICT_NO_STORAGE_ASSIGNED)); - } - } - else { // no storage has been assigned to this data product type - itsLastStorageCheckResult.push_back(storageResult(dfit->first, -1, -1, CONFLICT_NO_STORAGE_ASSIGNED)); - } - } - } - } - if (itsLastStorageCheckResult.empty()) { - task_storage->unAssignStorage(); - task_storage->setStorage(storageLocations); // sets the new locations in the task - for (storageMap::const_iterator tsit = storageLocations.begin(); tsit != storageLocations.end(); ++tsit) { - task_storage->setOutputDataProductAssigned(tsit->first, true); - } - } - } - else { - debugWarn("sis","Storage::addStorageToTask: Cannot add storage to task:", pTask->getID(), " (hint:not an observation or pipeline?)"); - } - - return itsLastStorageCheckResult; -} - -std::vector<storageResult> Storage::addStorageToTask(Task *pTask, dataProductTypes dataProduct, const storageVector &storageLocations, bool noCheck) { - if (pTask->hasStorage()) { - TaskStorage *task_storage = pTask->storage(); - storageNodesMap::iterator sit; - const AstroDateTime &start = pTask->getScheduledStart(); - const AstroDateTime &end = pTask->getScheduledEnd(); - unsigned durationSec = pTask->getDuration().totalSeconds(); - unsigned taskID = pTask->getID(); - const dataFileMap &dataFiles = task_storage->getOutputFileSizes(); // contains the number of files and the size of an individual file for each output data product of the task - double claimSize, bandWidth; - // iterate over all required data products for the task - // for (dataFileMap::const_iterator dpit = dataFiles.begin(); dpit != dataFiles.end(); ++dpit) { - dataFileMap::const_iterator dfit = dataFiles.find(dataProduct); - itsLastStorageCheckResult.clear(); - if (dfit != dataFiles.end()) { - // claimsize = size of the claim for this raid array - claimSize = (double) dfit->second.first * dfit->second.second / storageLocations.size(); // size per file * nrFiles / nr of raid arrays assigned - bandWidth = (double) claimSize / 1000 / durationSec; // MByte/sec, the required remaining disk write speed (or bandwidth) for this array - // std::cout << "total size: " << totalStorageSize << std::endl << "nr of storage locations:" << storageLocations.size() << std::endl << "size per node: " << sizePerNode << std::endl - // << "total bandwidth: " << totalBandWidth << std::endl << "per node: " << bandWidthPerNode << std::endl; - task_conflict res(CONFLICT_NO_CONFLICT); - for (storageVector::const_iterator it = storageLocations.begin(); it != storageLocations.end(); ++it) { - res = CONFLICT_NO_CONFLICT; - sit = itsStorageNodes.find(it->first); - if (sit != itsStorageNodes.end()) { - // check size requirements - if (!noCheck && pTask->getOutputDataproductCluster() != "CEP4") { - res = sit->second.checkSpaceAndWriteSpeed(start, end, claimSize, bandWidth, it->second); // check space and bandwidth for every raid array - } - if (res == CONFLICT_NO_CONFLICT) { - sit->second.addClaim(taskID, start, end, dataProduct, claimSize, bandWidth, it->second); - if (std::find(itsTaskStorageNodes[taskID].begin(), itsTaskStorageNodes[taskID].end(), it->first) == itsTaskStorageNodes[taskID].end()) { - itsTaskStorageNodes[taskID].push_back(it->first); - } - } - else { - itsLastStorageCheckResult.push_back(storageResult(dataProduct, it->first, it->second, res)); - } - } - } - if (!storageLocations.empty() && res == CONFLICT_NO_CONFLICT) { - task_storage->addStorage(dataProduct, storageLocations); // adds the storage to the task - task_storage->setOutputDataProductAssigned(dataProduct, true); - } - } - else { - // error: dataProduct not found in dataFiles map of the task! - } - } - else { - debugWarn("sis","Storage::addStorageToTask: Cannot add storage to task:", pTask->getID(), " (hint:not an observation or pipeline?)"); - } - return itsLastStorageCheckResult; -} - -void Storage::removeTaskStorage(unsigned taskID) { - std::map<unsigned, std::vector<int> >::iterator it = itsTaskStorageNodes.find(taskID); - if (it != itsTaskStorageNodes.end()) { - for (std::vector<int>::iterator sit = it->second.begin(); sit != it->second.end(); ++sit) { - storageNodesMap::iterator snit = itsStorageNodes.find(*sit); - if (snit != itsStorageNodes.end()) { - snit->second.removeClaim(taskID); - } - } - itsTaskStorageNodes.erase(it); - } -} // function checkAssignedTaskStorage is used for checking if the given task it's claims are registered at the storage nodes assigned to the task // assuming it is not possible to assign storage to a task if a conflict arises from it, the function doesn't check if the size and bandwidth requirements are fulfilled. diff --git a/SAS/Scheduler/src/Storage.h b/SAS/Scheduler/src/Storage.h index acef733e60f5e7c82d01cf59864b2c6db1c08b9b..5f1fa89b38181227eb5ed85021b5c862978ff55f 100644 --- a/SAS/Scheduler/src/Storage.h +++ b/SAS/Scheduler/src/Storage.h @@ -32,9 +32,6 @@ public: bool addStorageNode(const std::string &nodeName, int nodeID);// {itsStorageNodes.insert(storageNodesMap::value_type(nodeID, nodeName));} void addStoragePartition(int nodeID, unsigned short partitionID, const std::string &path, const double &capacity, const double &free_space); void clearStorageClaims(void); // removes all claims from all storage nodes - std::vector<storageResult> addStorageToTask(Task *pTask, const storageMap &storageLocations); - std::vector<storageResult> addStorageToTask(Task *pTask, dataProductTypes dataProduct, const storageVector &storageLocations, bool noCheck); // bool addStorageTask(unsigned taskID, const AstroDateTime &startTime, const AstroDateTime &endTime, const double &claimSize, const double &bandWidth, int storageNodeID, int raidID); - void removeTaskStorage(unsigned taskID); std::vector<storageResult> checkAssignedTaskStorage(Task *pTask, dataProductTypes dataProduct); // returns the possible storage locations for the claim.key = node ID, value vector of raidID,free space pairs storageLocationOptions getStorageLocationOptions(dataProductTypes dataProduct, const AstroDateTime &startTime, const AstroDateTime &endTime, diff --git a/SAS/Scheduler/src/schedulerdata.h b/SAS/Scheduler/src/schedulerdata.h index 3762bfd42c862d718f3e5a9db7d2ad1c45643f39..8557890ab97a801af7a1f4380fa052a60e2108fd 100644 --- a/SAS/Scheduler/src/schedulerdata.h +++ b/SAS/Scheduler/src/schedulerdata.h @@ -164,11 +164,6 @@ public: void updateStations(void) {itsData.updateStations();} void initStorage(void) {itsData.initStorage();} void clearStorageClaims(void) {itsData.clearStorageClaims();} - std::vector<storageResult> addStorageToTask(Task *pTask, const storageMap &storageLocations) {return itsData.addStorageToTask(pTask, storageLocations);} - std::vector<storageResult> addStorageToTask(Task *pTask, dataProductTypes dataProduct, const storageVector &storageLocations, bool noCheck = false) { - return itsData.addStorageToTask(pTask, dataProduct, storageLocations, noCheck); - } - void removeStorageForTask(unsigned taskID) {itsData.removeStorageForTask(taskID);} std::vector<storageResult> checkAssignedTaskStorage(Task *pTask, dataProductTypes dataProduct) {return itsData.checkAssignedTaskStorage(pTask, dataProduct);} storageLocationOptions getStorageLocationOptions(dataProductTypes dataProduct, const AstroDateTime &startTime, const AstroDateTime &endTime, const double &fileSize, const double &bandWidth, unsigned minNrFiles, sortMode sort_mode = SORT_NONE, const std::vector<int> &nodes = std::vector<int>()) { return itsData.getStorageLocationOptions(dataProduct, startTime, endTime, fileSize, bandWidth, minNrFiles, sort_mode, nodes); diff --git a/SAS/Scheduler/src/schedulerdatablock.h b/SAS/Scheduler/src/schedulerdatablock.h index 84e7ca02816f8001110a2a4d25c91a0ba22032f7..bad28149bd55f38f1aebf4a6e813531ee347d831 100644 --- a/SAS/Scheduler/src/schedulerdatablock.h +++ b/SAS/Scheduler/src/schedulerdatablock.h @@ -211,11 +211,6 @@ public: void initStorage(void) {itsStorage.initStorage();} void clearStorageClaims(void) {itsStorage.clearStorageClaims();} - std::vector<storageResult> addStorageToTask(Task *pTask, const storageMap &storageLocations) {return itsStorage.addStorageToTask(pTask, storageLocations);} - std::vector<storageResult> addStorageToTask(Task *pTask, dataProductTypes dataProduct, const storageVector &storageLocations, bool noCheck = false) { - return itsStorage.addStorageToTask(pTask, dataProduct, storageLocations, noCheck); - } - void removeStorageForTask(unsigned taskID) {itsStorage.removeTaskStorage(taskID);} std::vector<storageResult> checkAssignedTaskStorage(Task *pTask, dataProductTypes dataProduct) {return itsStorage.checkAssignedTaskStorage(pTask, dataProduct);} storageLocationOptions getStorageLocationOptions(dataProductTypes dataProduct, const AstroDateTime &startTime, const AstroDateTime &endTime, const double &fileSize, const double &bandWidth, unsigned minNrFiles, sortMode sort_mode = SORT_NONE, const std::vector<int> &nodes = std::vector<int>()) { return itsStorage.getStorageLocationOptions(dataProduct, startTime, endTime, fileSize, bandWidth, minNrFiles, sort_mode, nodes); diff --git a/SAS/XML_generator/src/xmlgen.py b/SAS/XML_generator/src/xmlgen.py index 612de8c9d706e8f49d6942c766ce7964398adbcf..c9329e6be5816d800d052dc4210ed7c6bb40e07b 100755 --- a/SAS/XML_generator/src/xmlgen.py +++ b/SAS/XML_generator/src/xmlgen.py @@ -654,7 +654,7 @@ def writeXMLPulsarPipe(ofile, topo, pred_topo, name, descr, defaulttemplate, dur storageCluster, status, nr_tasks, nr_cores_per_task, _2bf2fitsExtraOpts, _8bitConversionSigma, decodeNblocks, decodeSigma, digifilExtraOpts, dspsrExtraOpts, dynamicSpectrumTimeAverage, nofold, nopdmp, norfi, prepdataExtraOpts, prepfoldExtraOpts, prepsubbandExtraOpts, pulsar, rawTo8bit, - rfifindExtraOpts, rrats, singlePulse, skipDsps, skipDynamicSpectrum, skipPrepfold, tsubint, + rfifindExtraOpts, rrats, singlePulse, skipDspsr, skipDynamicSpectrum, skipPrepfold, tsubint, miscParameters): stor_cluster = dataProductCluster(storageCluster) proc_cluster = processingCluster(storageCluster, nr_tasks, nr_cores_per_task) @@ -690,7 +690,7 @@ def writeXMLPulsarPipe(ofile, topo, pred_topo, name, descr, defaulttemplate, dur <rfifindExtraOpts>%s</rfifindExtraOpts> <rrats>%s</rrats> <singlePulse>%s</singlePulse> - <skipDsps>%s</skipDsps> + <skipDspsr>%s</skipDspsr> <skipDynamicSpectrum>%s</skipDynamicSpectrum> <skipPrepfold>%s</skipPrepfold> <tsubint>%s</tsubint>""" % (defaulttemplate, duration, _2bf2fitsExtraOpts, _8bitConversionSigma, @@ -698,7 +698,7 @@ def writeXMLPulsarPipe(ofile, topo, pred_topo, name, descr, defaulttemplate, dur dynamicSpectrumTimeAverage, writeBoolean(nofold), writeBoolean(nopdmp), writeBoolean(norfi), prepdataExtraOpts, prepfoldExtraOpts, prepsubbandExtraOpts, pulsar, writeBoolean(rawTo8bit), rfifindExtraOpts, - writeBoolean(rrats), writeBoolean(singlePulse), writeBoolean(skipDsps), + writeBoolean(rrats), writeBoolean(singlePulse), writeBoolean(skipDspsr), writeBoolean(skipDynamicSpectrum), writeBoolean(skipPrepfold), tsubint) writeMiscParameters(ofile, miscParameters) print >> ofile, r""" @@ -1266,7 +1266,7 @@ def readGlobalPulsar(value): globalPulsar[7] = toBool(globalPulsar[7]) # norfi globalPulsar[8] = toBool(globalPulsar[8]) # nofold globalPulsar[9] = toBool(globalPulsar[9]) # nopdmp - globalPulsar[10] = toBool(globalPulsar[10]) # skipDsps + globalPulsar[10] = toBool(globalPulsar[10]) # skipDspsr globalPulsar[11] = toBool(globalPulsar[11]) # rrats globalPulsar[19] = toBool(globalPulsar[19]) # skipDynamicSpectrum globalPulsar[20] = toBool(globalPulsar[20]) # skipPrepfold @@ -1554,7 +1554,7 @@ def readTargetBeams(startLine, lines, globalSubbands, globalBBS, globalDemix, gl targetPulsar[nr_beams][-1][7] = toBool(targetPulsar[nr_beams][-1][7]) # norfi targetPulsar[nr_beams][-1][8] = toBool(targetPulsar[nr_beams][-1][8]) # nofold targetPulsar[nr_beams][-1][9] = toBool(targetPulsar[nr_beams][-1][9]) # nopdmp - targetPulsar[nr_beams][-1][10] = toBool(targetPulsar[nr_beams][-1][10]) # skipDsps + targetPulsar[nr_beams][-1][10] = toBool(targetPulsar[nr_beams][-1][10]) # skipDspsr targetPulsar[nr_beams][-1][11] = toBool(targetPulsar[nr_beams][-1][11]) # rrats targetPulsar[nr_beams][-1][19] = toBool( targetPulsar[nr_beams][-1][19]) # skipDynamicSpectrum @@ -2685,7 +2685,7 @@ def writeRepeat(ofile, projectName, blockTopo, repeatNr, settings, imaging_pipe_ norfi=targetPulsar[beamNr][0][7], nofold=targetPulsar[beamNr][0][8], nopdmp=targetPulsar[beamNr][0][9], - skipDsps=targetPulsar[beamNr][0][10], + skipDspsr=targetPulsar[beamNr][0][10], rrats=targetPulsar[beamNr][0][11], _2bf2fitsExtraOpts=targetPulsar[beamNr][0][12], decodeSigma=targetPulsar[beamNr][0][13], diff --git a/SAS/XML_generator/test/test_regression.in_data/xml/lc4_019_1.xml b/SAS/XML_generator/test/test_regression.in_data/xml/lc4_019_1.xml index 82add8e0e4be30ce36713191b39bae20c54d4d18..f5163c09d2932171d15715203c9166e0398917ee 100644 --- a/SAS/XML_generator/test/test_regression.in_data/xml/lc4_019_1.xml +++ b/SAS/XML_generator/test/test_regression.in_data/xml/lc4_019_1.xml @@ -162,7 +162,7 @@ <rfifindExtraOpts></rfifindExtraOpts> <rrats></rrats> <singlePulse></singlePulse> - <skipDsps></skipDsps> + <skipDspsr></skipDspsr> <skipDynamicSpectrum></skipDynamicSpectrum> <skipPrepfold></skipPrepfold> <tsubint></tsubint> diff --git a/SAS/XML_generator/test/test_regression.in_data/xml/test_input.xml b/SAS/XML_generator/test/test_regression.in_data/xml/test_input.xml index ed802d6a4c8d6cbfbef8b1dfcc791a10cdeb0e8c..653cb363b4fe194ea0fdb257a2439c05701c6b3c 100644 --- a/SAS/XML_generator/test/test_regression.in_data/xml/test_input.xml +++ b/SAS/XML_generator/test/test_regression.in_data/xml/test_input.xml @@ -164,7 +164,7 @@ <rfifindExtraOpts></rfifindExtraOpts> <rrats></rrats> <singlePulse></singlePulse> - <skipDsps></skipDsps> + <skipDspsr></skipDspsr> <skipDynamicSpectrum></skipDynamicSpectrum> <skipPrepfold></skipPrepfold> <tsubint></tsubint> @@ -344,7 +344,7 @@ <rfifindExtraOpts></rfifindExtraOpts> <rrats></rrats> <singlePulse></singlePulse> - <skipDsps></skipDsps> + <skipDspsr></skipDspsr> <skipDynamicSpectrum></skipDynamicSpectrum> <skipPrepfold></skipPrepfold> <tsubint></tsubint> diff --git a/SAS/XML_generator/test/test_regression.in_data/xml/test_input_cep4.xml b/SAS/XML_generator/test/test_regression.in_data/xml/test_input_cep4.xml index ed802d6a4c8d6cbfbef8b1dfcc791a10cdeb0e8c..653cb363b4fe194ea0fdb257a2439c05701c6b3c 100644 --- a/SAS/XML_generator/test/test_regression.in_data/xml/test_input_cep4.xml +++ b/SAS/XML_generator/test/test_regression.in_data/xml/test_input_cep4.xml @@ -164,7 +164,7 @@ <rfifindExtraOpts></rfifindExtraOpts> <rrats></rrats> <singlePulse></singlePulse> - <skipDsps></skipDsps> + <skipDspsr></skipDspsr> <skipDynamicSpectrum></skipDynamicSpectrum> <skipPrepfold></skipPrepfold> <tsubint></tsubint> @@ -344,7 +344,7 @@ <rfifindExtraOpts></rfifindExtraOpts> <rrats></rrats> <singlePulse></singlePulse> - <skipDsps></skipDsps> + <skipDspsr></skipDspsr> <skipDynamicSpectrum></skipDynamicSpectrum> <skipPrepfold></skipPrepfold> <tsubint></tsubint>