diff --git a/.gitattributes b/.gitattributes index 1f555631211ad339d475a27dc5c34c3e78cb1743..7a6b617b2670bc153caffeed371482555659c4f3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2389,10 +2389,22 @@ MAC/APL/CASATools/include/CASATools/CasaConverter.h -text MAC/APL/CASATools/src/CasaConverter.cc -text MAC/APL/CASATools/test/tCasaConverter.cc -text MAC/APL/CASATools/test/tCasaConverter.log_prop -text +MAC/APL/CEPCU/src/CEPHardwareMonitor/BlueGeneMonitor.cc -text +MAC/APL/CEPCU/src/CEPHardwareMonitor/BlueGeneMonitor.h -text +MAC/APL/CEPCU/src/CEPHardwareMonitor/CEPHWMonitor.dpl -text +MAC/APL/CEPCU/src/CEPHardwareMonitor/CEPHardwareMonitor.conf -text +MAC/APL/CEPCU/src/CEPHardwareMonitor/CEPHardwareMonitorMain.cc -text +MAC/APL/CEPCU/src/CEPHardwareMonitor/CMakeLists.txt -text +MAC/APL/CEPCU/src/CEPHardwareMonitor/ClusterMonitor.cc -text +MAC/APL/CEPCU/src/CEPHardwareMonitor/ClusterMonitor.h -text +MAC/APL/CEPCU/src/CEPHardwareMonitor/PVSSDatapointDefs.h -text +MAC/APL/CEPCU/src/CEPHardwareMonitor/README -text +MAC/APL/CEPCU/src/CEPHardwareMonitor/zabbix_get -text MAC/APL/CEPCU/src/CEPlogProcessor/CEPDatapoints.dpl -text MAC/APL/CEPCU/src/CEPlogProcessor/CEPDatapointtypes.dpl -text MAC/APL/CEPCU/src/CEPlogProcessor/CircularBuffer.h -text MAC/APL/CEPCU/src/CEPlogProcessor/rtlogsender.py -text +MAC/APL/CEPCU/src/OnlineControl/tPVSSMapping.cc -text MAC/APL/CEPCU/src/PythonControl/PythonControl.conf -text MAC/APL/CEPCU/src/PythonControl/tMDparser.cc -text MAC/APL/CR_Protocol/CMakeLists.txt -text diff --git a/MAC/APL/APLCommon/src/ControllerDefines.cc b/MAC/APL/APLCommon/src/ControllerDefines.cc index 5f06e931287e84cf290d2357b91535455ab942ad..8a321d4f28d9db07d0cafdfbff19a53442df63e2 100644 --- a/MAC/APL/APLCommon/src/ControllerDefines.cc +++ b/MAC/APL/APLCommon/src/ControllerDefines.cc @@ -29,7 +29,6 @@ #include <Common/SystemUtil.h> #include <Common/ParameterSet.h> // indexValue #include <APL/APLCommon/ControllerDefines.h> -#include <ApplCommon/LofarDirs.h> #include <ApplCommon/StationInfo.h> #include "Controller_Protocol.ph" @@ -312,7 +311,10 @@ string createPropertySetName(const string& propSetMask, psName.replace(pos, 10, string("Midplane%d")); } if ((pos = psName.find("@ionode@")) != string::npos) { - psName.replace(pos, 8, string("IONode%d")); + psName.replace(pos, 8, string("IONode%02d")); + } + if ((pos = psName.find("@locusnode@")) != string::npos) { + psName.replace(pos, 11, string("LocusNode%03d")); } if ((pos = psName.find("@osrack@")) != string::npos) { psName.replace(pos, 8, string("OSRack%d")); diff --git a/MAC/APL/CEPCU/CMakeLists.txt b/MAC/APL/CEPCU/CMakeLists.txt index bbdafe85ae51df4f47ec77e5d9f733318a298bb9..01fca5d2dcc6e3833b1a88f8119f361a52de7dfc 100644 --- a/MAC/APL/CEPCU/CMakeLists.txt +++ b/MAC/APL/CEPCU/CMakeLists.txt @@ -1,7 +1,7 @@ # $Id$ # Do not split the following line, otherwise makeversion will fail! -lofar_package(CEPCU 1.0 DEPENDS Common ALC PLC ApplCommon MACIO GCFTM GCFRTDB APLCommon OTDB) +lofar_package(CEPCU 1.0 DEPENDS Common ALC PLC ApplCommon MACIO GCFTM GCFRTDB APLCommon RTDBCommon OTDB) include(LofarFindPackage) lofar_find_package(Boost REQUIRED COMPONENTS date_time) diff --git a/MAC/APL/CEPCU/src/CEPHardwareMonitor/BlueGeneMonitor.cc b/MAC/APL/CEPCU/src/CEPHardwareMonitor/BlueGeneMonitor.cc new file mode 100644 index 0000000000000000000000000000000000000000..350d5f831a19f8beededb99096d1d427095e02b1 --- /dev/null +++ b/MAC/APL/CEPCU/src/CEPHardwareMonitor/BlueGeneMonitor.cc @@ -0,0 +1,330 @@ +//# BlueGeneMonitor.cc: Monitors if the BGP hardware is available +//# +//# Copyright (C) 2011 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: BlueGeneMonitor.cc 10505 2007-09-07 17:14:57Z overeem $ +#include <lofar_config.h> +#include <Common/LofarLogger.h> +#include <Common/LofarConstants.h> +#include <Common/LofarLocators.h> +#include <Common/StringUtil.h> +#include <Common/ParameterSet.h> + +#include <GCF/PVSS/GCF_PVTypes.h> +#include <GCF/PVSS/PVSSinfo.h> +#include <MACIO/MACServiceInfo.h> +#include <APL/APLCommon/ControllerDefines.h> +#include <APL/RTDBCommon/RTDButilities.h> +#include <GCF/RTDB/DP_Protocol.ph> +//#include <APL/APLCommon/StationInfo.h> +#include <errno.h> +#include <signal.h> +#include <unistd.h> // usleep +#include <netdb.h> // gethostbyname + +#include "BlueGeneMonitor.h" +#include "PVSSDatapointDefs.h" + +#define IONODES_PER_BGP_PARTITION 64 +#define MAX2(a,b) ((a) > (b)) ? (a) : (b) + +namespace LOFAR { + using namespace APLCommon; + using namespace APL::RTDBCommon; + using namespace GCF::TM; + using namespace GCF::PVSS; + using namespace GCF::RTDB; + namespace CEPCU { + +// +// BlueGeneMonitor() +// +BlueGeneMonitor::BlueGeneMonitor(const string& cntlrName) : + GCFTask ((State)&BlueGeneMonitor::initial_state,cntlrName), + itsOwnPropertySet (0), + itsTimerPort (0), + itsDPservice (0), + itsPollInterval (60), + itsLastBGPState (-1) +{ + LOG_TRACE_OBJ_STR (cntlrName << " construction"); + + // need port for timers. + itsTimerPort = new GCFTimerPort(*this, "BGPTimerPort"); + + itsDPservice = new DPservice(this); + ASSERTSTR(itsDPservice, "Can't allocate DPservice"); + + itsBlueGeneFrontEnd = globalParameterSet()->getString("BlueGeneFrontEnd", ""); + ASSERTSTR(!itsBlueGeneFrontEnd.empty(), "Name of BlueGene FrontEnd node not specified"); + + registerProtocol(DP_PROTOCOL, DP_PROTOCOL_STRINGS); +} + + +// +// ~BlueGeneMonitor() +// +BlueGeneMonitor::~BlueGeneMonitor() +{ + LOG_TRACE_OBJ_STR (getName() << " destruction"); + + if (itsDPservice) delete itsDPservice; + + if (itsTimerPort) delete itsTimerPort; +} + + +// +// initial_state(event, port) +// +// Setup connection with PVSS +// +GCFEvent::TResult BlueGeneMonitor::initial_state(GCFEvent& event, + GCFPortInterface& port) +{ + LOG_DEBUG_STR ("initial:" << eventName(event) << "@" << port.getName()); + + switch (event.signal) { + case F_INIT: + break; + + case F_ENTRY: { + // Get access to my own propertyset. + LOG_DEBUG_STR ("Activating PropertySet " << PSN_CEP_HARDWARE_MONITOR); + itsTimerPort->setTimer(2.0); + itsOwnPropertySet = new RTDBPropertySet(PSN_CEP_HARDWARE_MONITOR, + PST_CEP_HARDWARE_MONITOR, + PSAT_WO, + this); + + } + break; + + case DP_CREATED: { + // NOTE: this function may be called DURING the construction of the PropertySet. + // Always exit this event in a way that GCF can end the construction. + DPCreatedEvent dpEvent(event); + LOG_DEBUG_STR("Result of creating " << dpEvent.DPname << " = " << dpEvent.result); + itsTimerPort->cancelAllTimers(); + itsTimerPort->setTimer(0.0); + } + break; + + case F_TIMER: { + // PropertySet must exist by now + ASSERTSTR(itsOwnPropertySet, "Could not create the PVSS datapoint " << PSN_CEP_HARDWARE_MONITOR); + + // update PVSS. + LOG_TRACE_FLOW ("Updateing state to PVSS"); + itsOwnPropertySet->setValue(PN_FSM_CURRENT_ACTION, GCFPVString("BlueGene:initialising")); + itsOwnPropertySet->setValue(PN_CHM_BGP_CONNECTED, GCFPVBool(false)); + + // is name resolvable? + struct hostent* hostinfo = gethostbyname(itsBlueGeneFrontEnd.c_str()); + if (!hostinfo) { + itsOwnPropertySet->setValue(PN_FSM_ERROR, GCFPVString("Name of BG frontend node is unresolvable")); + TRAN(BlueGeneMonitor::finish_state); // go to final state. + } + itsOwnPropertySet->setValue(PN_FSM_ERROR, GCFPVString("")); + TRAN(BlueGeneMonitor::getBlueGeneState); // do inital check + } + break; + + case DP_SET: + break; + + case F_QUIT: + TRAN (BlueGeneMonitor::finish_state); + break; + + default: + LOG_DEBUG_STR ("initial, DEFAULT: " << eventName(event)); + break; + } + + return (GCFEvent::HANDLED); +} + + + +// +// getBlueGeneState(event, port) +// +// Ask the information of the BlueGene +// +GCFEvent::TResult BlueGeneMonitor::getBlueGeneState(GCFEvent& event, + GCFPortInterface& port) +{ + if (eventName(event) != "DP_SET") { + LOG_DEBUG_STR ("getBlueGeneState:" << eventName(event) << "@" << port.getName()); + } + + switch (event.signal) { + case F_ENTRY: { + itsOwnPropertySet->setValue(PN_FSM_CURRENT_ACTION,GCFPVString("BlueGene:requesting BlueGene info")); + itsTimerPort->setTimer(15.0); // in case the answer never comes + + string command(formatString("ssh %s 'bgpartstatus R00' 2>&1", itsBlueGeneFrontEnd.c_str())); + FILE* pipe(popen(command.c_str(), "r")); + char line[1024]; + line[0] = '\0'; + if (!pipe || !fgets (line, sizeof (line), pipe)) { + LOG_ERROR_STR ("BlueGene:Unable to read pipe: " << command); + itsOwnPropertySet->setValue(PN_FSM_ERROR,GCFPVString("BlueGene:pipe failure")); + if (pipe) { + LOG_ERROR_STR("Pipe error: " << strerror(errno)); + fclose(pipe); + } + TRAN(BlueGeneMonitor::waitForNextCycle); // go to next state. + break; + } + fclose(pipe); + + // possible answers: + // free - partition is available. + // initializing - partition is booting. + // rebooting - partition is rebooting. + // busy - partition is running a job. + // unavailable - partition is partly used by other processes + // deallocating - partition is cleaning up. + // error - partition is in error state + bool inError(false); + if (!strcmp(line, "error")) { + LOG_ERROR_STR ("BlueGene:Partition R00 in error state: " << line); + itsOwnPropertySet->setValue(PN_FSM_ERROR,GCFPVString("BlueGene:unknown partitionstate")); + inError = true; + } + + int newState(inError ? RTDB_OBJ_STATE_BROKEN : RTDB_OBJ_STATE_OPERATIONAL); + if (newState != itsLastBGPState) { + string pvssDBname(PVSSinfo::getLocalSystemName()); + for (int i = 0; i < IONODES_PER_BGP_PARTITION; i++) { + LOG_INFO_STR("setObjectState(" << getName() << "," << formatString("%s:%s", pvssDBname.c_str(), _IOnodeName(i).c_str()) << "," << newState << ")"); + setObjectState(getName(), formatString("%s:%s", pvssDBname.c_str(), _IOnodeName(i).c_str()), newState); + } + } + itsLastBGPState = newState; + TRAN(BlueGeneMonitor::waitForNextCycle); // go to next state. + break; + } + + case DP_SET: + case F_EXIT: + break; + + case F_QUIT: + TRAN (BlueGeneMonitor::finish_state); + break; + + default: + LOG_DEBUG_STR ("getBlueGeneState, DEFAULT: " << eventName(event)); + break; + } + + return (GCFEvent::HANDLED); +} + +// +// IOnodeName(nodeNr) +// +string BlueGeneMonitor::_IOnodeName(int nodeNr) +{ + string IONodeMask(createPropertySetName(PSN_IO_NODE,"")); + return (formatString(IONodeMask.c_str(), nodeNr/32, nodeNr)); +} + +// +// waitForNextCycle(event, port) +// +// Wait for our next cycle. +// +GCFEvent::TResult BlueGeneMonitor::waitForNextCycle(GCFEvent& event, + GCFPortInterface& port) +{ + if (eventName(event) != "DP_SET") { + LOG_DEBUG_STR ("waitForNextCycle:" << eventName(event) << "@" << port.getName()); + } + + switch (event.signal) { + case F_ENTRY: { + itsOwnPropertySet->setValue(PN_FSM_CURRENT_ACTION,GCFPVString("BlueGene:wait for next cycle")); + int waitTime = itsPollInterval - (time(0) % itsPollInterval); + if (waitTime == 0) { + waitTime = itsPollInterval; + } + itsTimerPort->cancelAllTimers(); + itsTimerPort->setTimer(double(waitTime)); + LOG_INFO_STR("BlueGene:Waiting " << waitTime << " seconds for next cycle"); + } + break; + + case F_TIMER: { + itsOwnPropertySet->setValue(string(PN_FSM_ERROR),GCFPVString("")); + TRAN(BlueGeneMonitor::getBlueGeneState); + } + break; + + case DP_SET: + case F_EXIT: + break; + + case F_QUIT: + TRAN (BlueGeneMonitor::finish_state); + break; + + default: + LOG_DEBUG_STR ("waitForNextCycle, DEFAULT: " << eventName(event)); + break; + } + + return (GCFEvent::HANDLED); +} + + +// +// finish_state(event, port) +// +// Write controller state to PVSS +// +GCFEvent::TResult BlueGeneMonitor::finish_state(GCFEvent& event, GCFPortInterface& port) +{ + LOG_DEBUG_STR ("finish_state:" << eventName(event) << "@" << port.getName()); + + switch (event.signal) { + case F_ENTRY: { + // update PVSS + itsOwnPropertySet->setValue(string(PN_FSM_CURRENT_ACTION),GCFPVString("BlueGene:finished")); + break; + } + + case DP_SET: + case F_EXIT: + break; + + default: + LOG_DEBUG("finishing_state, DEFAULT"); + break; + } + return (GCFEvent::HANDLED); +} + + +}; // CEPCU +}; // LOFAR diff --git a/MAC/APL/CEPCU/src/CEPHardwareMonitor/BlueGeneMonitor.h b/MAC/APL/CEPCU/src/CEPHardwareMonitor/BlueGeneMonitor.h new file mode 100644 index 0000000000000000000000000000000000000000..5567484edeab186fe95ea7c72cda70fc26944219 --- /dev/null +++ b/MAC/APL/CEPCU/src/CEPHardwareMonitor/BlueGeneMonitor.h @@ -0,0 +1,85 @@ +//# BlueGeneMonitor.h: Monitors the BlueGene hardware. +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: BlueGeneMonitor.h 10461 2007-08-23 22:44:03Z overeem $ + +#ifndef CEPCU_BLUEGENE_MONITOR_H +#define CEPCU_BLUEGENE_MONITOR_H + +//# Common Includes +#include <blitz/array.h> +#include <Common/lofar_string.h> +#include <Common/lofar_vector.h> +#include <Common/LofarConstants.h> + +//# GCF Includes +#include <GCF/TM/GCF_Control.h> +#include <GCF/RTDB/RTDB_PropertySet.h> +#include <GCF/RTDB/DPservice.h> + +// forward declaration + +namespace LOFAR { + namespace CEPCU { + +using MACIO::GCFEvent; +using GCF::TM::GCFPortInterface; +using GCF::TM::GCFTimerPort; +using GCF::TM::GCFTCPPort; +using GCF::TM::GCFTask; +using GCF::RTDB::RTDBPropertySet; +using GCF::RTDB::DPservice; + + +class BlueGeneMonitor : public GCFTask +{ +public: + explicit BlueGeneMonitor(const string& cntlrName); + ~BlueGeneMonitor(); + +private: + // During the initial state all connections with the other programs are made. + GCFEvent::TResult initial_state (GCFEvent& e, GCFPortInterface& p); + GCFEvent::TResult getBlueGeneState (GCFEvent& e, GCFPortInterface& p); + GCFEvent::TResult waitForNextCycle (GCFEvent& e, GCFPortInterface& p); + GCFEvent::TResult finish_state (GCFEvent& e, GCFPortInterface& p); + + string _IOnodeName(int nodeNr); + + // avoid defaultconstruction and copying + BlueGeneMonitor(); + BlueGeneMonitor(const BlueGeneMonitor&); + BlueGeneMonitor& operator=(const BlueGeneMonitor&); + + // Data members + RTDBPropertySet* itsOwnPropertySet; + + GCFTimerPort* itsTimerPort; + DPservice* itsDPservice; + + string itsBlueGeneFrontEnd; + uint32 itsPollInterval; + + int itsLastBGPState; +}; + + };//CEPCU +};//LOFAR +#endif diff --git a/MAC/APL/CEPCU/src/CEPHardwareMonitor/CEPHWMonitor.dpl b/MAC/APL/CEPCU/src/CEPHardwareMonitor/CEPHWMonitor.dpl new file mode 100644 index 0000000000000000000000000000000000000000..02ffad928cff6819c183caa676432c194dfc83de --- /dev/null +++ b/MAC/APL/CEPCU/src/CEPHardwareMonitor/CEPHWMonitor.dpl @@ -0,0 +1,29 @@ +# DpType +TypeName +CEPHardwareMonitor.CEPHardwareMonitor 1# + status 41#:ObjectStatus + process 41#:ProcessStatus + BGP 1# + connected 23# + Cluster 1# + connected 23# + +DpName TypeName ID +_mp_CEPHardwareMonitor CEPHardwareMonitor 0 +_dt_CEPHardwareMonitor _DynamicDatapoints 0 + +ElementName TypeName _original.._value _original.._status +_dt_CEPHardwareMonitor.Leaf _DynamicDatapoints "_mp_CEPHardwareMonitor.logMsg:_archive" 0x101 +_dt_CEPHardwareMonitor.DynamicAttribute _DynamicDatapoints "_da_none" 0x101 + +StampSec StampMSec ElementName TypeName DetailNr _archive.._type _archive.._archive _archive.._class +0 0 _mp_CEPHardwareMonitor.process.logMsg CEPHardwareMonitor 45 1 +0 0 _mp_CEPHardwareMonitor.process.logMsg CEPHardwareMonitor 1 15 _ValueArchive_2 + +# Modify the default value for the leaf-field +ElementName TypeName _original.._value _original.._status +_mp_CEPHardwareMonitor.status.leaf CEPHardwareMonitor 1 0x101 + +DpName TypeName ID +LOFAR_PermSW_HardwareMonitor CEPHardwareMonitor 2213 + diff --git a/MAC/APL/CEPCU/src/CEPHardwareMonitor/CEPHardwareMonitor.conf b/MAC/APL/CEPCU/src/CEPHardwareMonitor/CEPHardwareMonitor.conf new file mode 100644 index 0000000000000000000000000000000000000000..0eeac979916e798b8f281edafc82bd07ce7c9c19 --- /dev/null +++ b/MAC/APL/CEPCU/src/CEPHardwareMonitor/CEPHardwareMonitor.conf @@ -0,0 +1,12 @@ +# +# CEPHardwareMonitor.conf +# +WatchBlueGene = 1 +WatchCluster = 1 +closingDelay = 2.0 +BlueGeneFrontEnd = bgfen1 + +ClusterNameMask = locus%03d +ClusterNetwork = cep2.lofar +FirstClusterNode = 1 +LastClusterNode = 100 diff --git a/MAC/APL/CEPCU/src/CEPHardwareMonitor/CEPHardwareMonitorMain.cc b/MAC/APL/CEPCU/src/CEPHardwareMonitor/CEPHardwareMonitorMain.cc new file mode 100644 index 0000000000000000000000000000000000000000..64b50993a527ac17702aae0049bab4a7f29b0241 --- /dev/null +++ b/MAC/APL/CEPCU/src/CEPHardwareMonitor/CEPHardwareMonitorMain.cc @@ -0,0 +1,84 @@ +//# HardwareMonitor.cc: Main entry for the HardwareMonitor. +//# +//# Copyright (C) 2011 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: HardwareMonitorMain.cc 14858 2010-01-22 09:14:52Z loose $ +//# +#include <lofar_config.h> +#include <Common/LofarLogger.h> +#include <Common/Version.h> + +#include <Common/ParameterSet.h> +#include "BlueGeneMonitor.h" +#include "ClusterMonitor.h" +#include <CEPCU/Package__Version.h> + +using namespace LOFAR; +using namespace LOFAR::GCF; +using namespace LOFAR::GCF::TM; +using namespace LOFAR::CEPCU; + +int main(int argc, char* argv[]) +{ + // args: cntlrname, parentHost, parentService + GCFScheduler::instance()->init(argc, argv, "CEPHardwareMonitor"); + + LOG_INFO("MACProcessScope: LOFAR_PermSW_HardwareMonitor"); + LOG_INFO(Version::getInfo<CEPCUVersion>("CEPHardwareMonitor")); + + // Create tasks and call initial routines + BlueGeneMonitor* bgm(0); + ClusterMonitor* ctm(0); + + // monitor BLUEGENE? + if (globalParameterSet()->getUint32("WatchBlueGene",0)) { + bgm = new BlueGeneMonitor("BlueGeneMonitor"); + bgm->start(); + LOG_INFO("Monitoring the BlueGene"); + } + + // monitor CEP2Cluster? + if (globalParameterSet()->getUint32("WatchCluster",0)) { + ctm = new ClusterMonitor("ClusterMonitor"); + ctm->start(); + LOG_INFO("Monitoring the Cluster"); + } + + // sanity check + if (!bgm && !ctm) { + LOG_FATAL_STR("Non of the monitortask (WatchBlueGene, WatchCluster) " + "was switched on in the configfile, terminating program"); + return (0); + } + + // ok, we have something to do, do it. + GCFScheduler::instance()->setDelayedQuit(true); // we need a clean shutdown + GCFScheduler::instance()->run(); // until stop was called + + if (bgm) { + bgm->quit(); // let task quit nicely + } + if (ctm) { + ctm->quit(); // let task quit nicely + } + double postRunTime = globalParameterSet()->getDouble("closingDelay", 1.5); + GCFScheduler::instance()->run(postRunTime); // let processes die. + + return (0); +} diff --git a/MAC/APL/CEPCU/src/CEPHardwareMonitor/CMakeLists.txt b/MAC/APL/CEPCU/src/CEPHardwareMonitor/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..95605102d92626dd153f587025c910e32a74c9dc --- /dev/null +++ b/MAC/APL/CEPCU/src/CEPHardwareMonitor/CMakeLists.txt @@ -0,0 +1,14 @@ +# $Id: CMakeLists.txt 18492 2011-07-14 21:07:51Z loose $ + +lofar_add_bin_program(CEPHardwareMonitor + CEPHardwareMonitorMain.cc + BlueGeneMonitor.cc + ClusterMonitor.cc) + +install(FILES + CEPHardwareMonitor.conf + DESTINATION etc) + +install(PROGRAMS + zabbix_get + DESTINATION sbin) diff --git a/MAC/APL/CEPCU/src/CEPHardwareMonitor/ClusterMonitor.cc b/MAC/APL/CEPCU/src/CEPHardwareMonitor/ClusterMonitor.cc new file mode 100644 index 0000000000000000000000000000000000000000..20fffaa8bea52424587d96af6f05f636fb337737 --- /dev/null +++ b/MAC/APL/CEPCU/src/CEPHardwareMonitor/ClusterMonitor.cc @@ -0,0 +1,363 @@ +//# ClusterMonitor.cc: Monitors if the BGP hardware is available +//# +//# Copyright (C) 2011 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: ClusterMonitor.cc 10505 2007-09-07 17:14:57Z overeem $ +#include <lofar_config.h> +#include <Common/LofarLogger.h> +#include <Common/LofarConstants.h> +#include <Common/LofarLocators.h> +#include <Common/lofar_vector.h> +#include <Common/StringUtil.h> +#include <Common/ParameterSet.h> + +#include <GCF/PVSS/GCF_PVTypes.h> +#include <GCF/PVSS/PVSSinfo.h> +#include <MACIO/MACServiceInfo.h> +#include <APL/APLCommon/ControllerDefines.h> +#include <APL/RTDBCommon/RTDButilities.h> +#include <GCF/RTDB/DP_Protocol.ph> +//#include <APL/APLCommon/StationInfo.h> +#include <errno.h> +#include <signal.h> +#include <unistd.h> // usleep +#include <netdb.h> // gethostbyname + +#include "ClusterMonitor.h" +#include "PVSSDatapointDefs.h" + +#define MAX_CLUSTER_NODE 100 +#define MAX2(a,b) ((a) > (b)) ? (a) : (b) + +namespace LOFAR { + using namespace APLCommon; + using namespace APL::RTDBCommon; + using namespace GCF::TM; + using namespace GCF::PVSS; + using namespace GCF::RTDB; + namespace CEPCU { + +// +// ClusterMonitor() +// +ClusterMonitor::ClusterMonitor(const string& cntlrName) : + GCFTask ((State)&ClusterMonitor::initial_state,cntlrName), + itsOwnPropertySet (0), + itsTimerPort (0), + itsDPservice (0), + itsPollInterval (60) +{ + LOG_TRACE_OBJ_STR (cntlrName << " construction"); + + // need port for timers. + itsTimerPort = new GCFTimerPort(*this, "ClusterTimerPort"); + + itsDPservice = new DPservice(this); + ASSERTSTR(itsDPservice, "Can't allocate DPservice"); + + itsClusterNameMask = globalParameterSet()->getString("ClusterNameMask", "locus%03d"); + itsClusterNetwork = globalParameterSet()->getString("ClusterNetwork", "cep2.lofar"); + itsFirstClusterNode = globalParameterSet()->getUint("FirstClusterNode", 1); + itsLastClusterNode = globalParameterSet()->getUint("LastClusterNode", MAX_CLUSTER_NODE); + ASSERTSTR(!itsClusterNameMask.empty(), "NameMask of Cluster not specified"); + ASSERTSTR(!itsClusterNetwork.empty(), "Network name of Cluster not specified"); + ASSERTSTR(itsLastClusterNode <= MAX_CLUSTER_NODE, "Supporting only " << MAX_CLUSTER_NODE << " nodes"); + + itsLastState.resize(itsLastClusterNode+1, -1); +} + + +// +// ~ClusterMonitor() +// +ClusterMonitor::~ClusterMonitor() +{ + LOG_TRACE_OBJ_STR (getName() << " destruction"); + + if (itsDPservice) delete itsDPservice; + + if (itsTimerPort) delete itsTimerPort; +} + + +// +// initial_state(event, port) +// +// Setup connection with PVSS +// +GCFEvent::TResult ClusterMonitor::initial_state(GCFEvent& event, + GCFPortInterface& port) +{ + LOG_DEBUG_STR ("initial:" << eventName(event) << "@" << port.getName()); + + switch (event.signal) { + case F_INIT: + break; + + case F_ENTRY: { + // Get access to my own propertyset. + LOG_DEBUG_STR ("Activating PropertySet " << PSN_CEP_HARDWARE_MONITOR); + itsTimerPort->setTimer(2.0); + itsOwnPropertySet = new RTDBPropertySet(PSN_CEP_HARDWARE_MONITOR, + PST_CEP_HARDWARE_MONITOR, + PSAT_WO, + this); + + } + break; + + case DP_CREATED: { + // NOTE: this function may be called DURING the construction of the PropertySet. + // Always exit this event in a way that GCF can end the construction. + DPCreatedEvent dpEvent(event); + LOG_DEBUG_STR("Result of creating " << dpEvent.DPname << " = " << dpEvent.result); + itsTimerPort->cancelAllTimers(); + itsTimerPort->setTimer(0.0); + } + break; + + case F_TIMER: { + // PropertySet must exist by now + ASSERTSTR(itsOwnPropertySet, "Could not create the PVSS datapoint " << PSN_CEP_HARDWARE_MONITOR); + + // update PVSS. + LOG_TRACE_FLOW ("Updateing state to PVSS"); + itsOwnPropertySet->setValue(PN_FSM_CURRENT_ACTION, GCFPVString("Cluster:initialising")); + itsOwnPropertySet->setValue(PN_CHM_CLUSTER_CONNECTED, GCFPVBool(false)); + + TRAN(ClusterMonitor::getClusterState); // do inital check + } + break; + + case DP_SET: + break; + + case F_QUIT: + TRAN (ClusterMonitor::finish_state); + break; + + default: + LOG_DEBUG_STR ("initial, DEFAULT: " << eventName(event)); + break; + } + + return (GCFEvent::HANDLED); +} + + + +// +// getClusterState(event, port) +// +// Ask the information of the Cluster +// +GCFEvent::TResult ClusterMonitor::getClusterState(GCFEvent& event, + GCFPortInterface& port) +{ + if (eventName(event) != "DP_SET") { + LOG_DEBUG_STR ("getClusterState:" << eventName(event) << "@" << port.getName()); + } + + switch (event.signal) { + case F_ENTRY: { + itsOwnPropertySet->setValue(PN_FSM_CURRENT_ACTION,GCFPVString("Cluster:requesting Cluster info")); + itsTimerPort->setTimer(15.0); // in case the answer never comes + + string command(formatString("for i in `echo \".\" | awk '{ for (i=%d; i<=%d;i++) { printf \"%s.%s\\n\",i } }'`; do ../sbin/zabbix_get -s $i -k system.hostname ; done", + itsFirstClusterNode, itsLastClusterNode, itsClusterNameMask.c_str(), itsClusterNetwork.c_str())); + FILE* pipe(popen(command.c_str(), "r")); + if (!pipe) { + LOG_ERROR_STR ("Cluster:Unable to read pipe: " << command); + TRAN(ClusterMonitor::waitForNextCycle); + break; + } + + int lineLength((itsLastClusterNode-itsFirstClusterNode)*256); + vector<char> line(lineLength); + line[0] = '\0'; + size_t btsRead = fread(&line[0], 1, lineLength-1, pipe); + if (!btsRead) { + itsOwnPropertySet->setValue(PN_FSM_ERROR,GCFPVString("Cluster:pipe failure")); + LOG_WARN_STR("Could not determine the state of the clusternodes!"); + } + else { + line[btsRead] = '\0'; + _analyzeClusterResult(line, btsRead); + } + fclose(pipe); + TRAN(ClusterMonitor::waitForNextCycle); // go to next state. + break; + } + + case DP_SET: + case F_EXIT: + break; + + case F_QUIT: + TRAN (ClusterMonitor::finish_state); + break; + + default: + LOG_DEBUG_STR ("getClusterState, DEFAULT: " << eventName(event)); + break; + } + + return (GCFEvent::HANDLED); +} + + +// +// _analyzeClusterResult(result, length); +// +void ClusterMonitor::_analyzeClusterResult(vector<char> result, size_t length) +{ + vector<bool> online(itsLastClusterNode+1, false); + size_t end(length); + size_t begin(end); + while (begin && end) { + while (end && (result[end]=='\0' || result[end]=='\n')) { + result[end--]='\0'; + } + begin = end; + while (begin && result[begin-1]!='\n') { + begin--; + } + + // possible answers: + // locus999 + // zabbix_get [4624]: Get value error: cannot connect to [[locus002.cep2.lofar]:10050]: [113] No route to host + int nodeNr = -1; + if (sscanf(&result[begin], itsClusterNameMask.c_str(), &nodeNr) != 1) { + LOG_INFO_STR("Received error: " << (char*)&result[begin]); + } + else if (nodeNr < itsFirstClusterNode || nodeNr > itsLastClusterNode) { + LOG_WARN_STR("Received info about node " << nodeNr << " which is not in my monitor range!"); + } + else { + online[nodeNr] = true; + } + end = begin - 1; + } + + // Finally update the statusfields of all the nodes + for (int i = itsFirstClusterNode; i <= itsLastClusterNode; i++) { + int newState = online[i] ? RTDB_OBJ_STATE_BROKEN : RTDB_OBJ_STATE_OPERATIONAL; + if (itsLastState[i] != newState) { + LOG_INFO_STR("Node " << _clusterNodeName(i) << ": " << (online[i] ? "ON" : "OFF")); + itsLastState[i] = newState; + } + } +} + +// +// _clusterNodeName(nodeNr) +// +string ClusterMonitor::_clusterNodeName(int nodeNr) +{ + // NOTE: THIS IS INSIDE INFORMATION ABOUT THE SETUP OF THE CLUSTER!!! + int rackMax[] = {12, 24, 36, 48, 52, 64, 76, 88, 100 }; + uint rack; + for (rack = 0; rack < sizeof(rackMax)-1; rack++) { + if (nodeNr <= rackMax[rack]) + break; + } + + string locusNodeMask (createPropertySetName(PSN_LOCUS_NODE, "")); + return (formatString(locusNodeMask.c_str(), rack, nodeNr)); +} + + +// +// waitForNextCycle(event, port) +// +// Wait for our next cycle. +// +GCFEvent::TResult ClusterMonitor::waitForNextCycle(GCFEvent& event, + GCFPortInterface& port) +{ + if (eventName(event) != "DP_SET") { + LOG_DEBUG_STR ("waitForNextCycle:" << eventName(event) << "@" << port.getName()); + } + + switch (event.signal) { + case F_ENTRY: { + itsOwnPropertySet->setValue(PN_FSM_CURRENT_ACTION,GCFPVString("Cluster:wait for next cycle")); + int waitTime = itsPollInterval - (time(0) % itsPollInterval); + if (waitTime == 0) { + waitTime = itsPollInterval; + } + itsTimerPort->cancelAllTimers(); + itsTimerPort->setTimer(double(waitTime)); + LOG_INFO_STR("Cluster:Waiting " << waitTime << " seconds for next cycle"); + } + break; + + case F_TIMER: { + itsOwnPropertySet->setValue(string(PN_FSM_ERROR),GCFPVString("")); + TRAN(ClusterMonitor::getClusterState); + } + break; + + case DP_SET: + case F_EXIT: + break; + + case F_QUIT: + TRAN (ClusterMonitor::finish_state); + break; + + default: + LOG_DEBUG_STR ("waitForNextCycle, DEFAULT: " << eventName(event)); + break; + } + + return (GCFEvent::HANDLED); +} + + +// +// finish_state(event, port) +// +// Write controller state to PVSS +// +GCFEvent::TResult ClusterMonitor::finish_state(GCFEvent& event, GCFPortInterface& port) +{ + LOG_DEBUG_STR ("finish_state:" << eventName(event) << "@" << port.getName()); + + switch (event.signal) { + case F_ENTRY: { + // update PVSS + itsOwnPropertySet->setValue(string(PN_FSM_CURRENT_ACTION),GCFPVString("Cluster:finished")); + break; + } + + case DP_SET: + case F_EXIT: + break; + + default: + LOG_DEBUG("finishing_state, DEFAULT"); + break; + } + return (GCFEvent::HANDLED); +} + + +}; // CEPCU +}; // LOFAR diff --git a/MAC/APL/CEPCU/src/CEPHardwareMonitor/ClusterMonitor.h b/MAC/APL/CEPCU/src/CEPHardwareMonitor/ClusterMonitor.h new file mode 100644 index 0000000000000000000000000000000000000000..f4f91f07af9d165bb686cda293b7f051589bab79 --- /dev/null +++ b/MAC/APL/CEPCU/src/CEPHardwareMonitor/ClusterMonitor.h @@ -0,0 +1,89 @@ +//# ClusterMonitor.h: Monitors the Cluster hardware. +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: ClusterMonitor.h 10461 2007-08-23 22:44:03Z overeem $ + +#ifndef CEPCU_CLUSTER_MONITOR_H +#define CEPCU_CLUSTER_MONITOR_H + +//# Common Includes +#include <blitz/array.h> +#include <Common/lofar_string.h> +#include <Common/lofar_vector.h> +#include <Common/LofarConstants.h> + +//# GCF Includes +#include <GCF/TM/GCF_Control.h> +#include <GCF/RTDB/RTDB_PropertySet.h> +#include <GCF/RTDB/DPservice.h> + +// forward declaration + +namespace LOFAR { + namespace CEPCU { + +using MACIO::GCFEvent; +using GCF::TM::GCFPortInterface; +using GCF::TM::GCFTimerPort; +using GCF::TM::GCFTCPPort; +using GCF::TM::GCFTask; +using GCF::RTDB::RTDBPropertySet; +using GCF::RTDB::DPservice; + + +class ClusterMonitor : public GCFTask +{ +public: + explicit ClusterMonitor(const string& cntlrName); + ~ClusterMonitor(); + +private: + // During the initial state all connections with the other programs are made. + GCFEvent::TResult initial_state (GCFEvent& e, GCFPortInterface& p); + GCFEvent::TResult getClusterState (GCFEvent& e, GCFPortInterface& p); + GCFEvent::TResult waitForNextCycle (GCFEvent& e, GCFPortInterface& p); + GCFEvent::TResult finish_state (GCFEvent& e, GCFPortInterface& p); + + void _analyzeClusterResult(vector<char> result, size_t length); + string _clusterNodeName(int nodeNr); + + // avoid defaultconstruction and copying + ClusterMonitor(); + ClusterMonitor(const ClusterMonitor&); + ClusterMonitor& operator=(const ClusterMonitor&); + + // Data members + RTDBPropertySet* itsOwnPropertySet; + + GCFTimerPort* itsTimerPort; + DPservice* itsDPservice; + + string itsClusterNameMask; + string itsClusterNetwork; + int itsFirstClusterNode; + int itsLastClusterNode; + + uint32 itsPollInterval; + vector<int> itsLastState; +}; + + };//CEPCU +};//LOFAR +#endif diff --git a/MAC/APL/CEPCU/src/CEPHardwareMonitor/PVSSDatapointDefs.h b/MAC/APL/CEPCU/src/CEPHardwareMonitor/PVSSDatapointDefs.h new file mode 100644 index 0000000000000000000000000000000000000000..7eb0c5978bf6301fdb9f1d1ae754ba5843fe9672 --- /dev/null +++ b/MAC/APL/CEPCU/src/CEPHardwareMonitor/PVSSDatapointDefs.h @@ -0,0 +1,393 @@ +// This file was generated by create_db_files v2.0 on Fri Dec 9 13:04:01 UTC 2011 + +#ifndef LOFAR_DEPLOYMENT_PVSSDATAPOINTS_H +#define LOFAR_DEPLOYMENT_PVSSDATAPOINTS_H +// process +#define PN_FSM_PROCESSID "process.processID" +#define PN_FSM_START_TIME "process.startTime" +#define PN_FSM_STOP_TIME "process.stopTime" +#define PN_FSM_LOG_MSG "process.logMsg" +#define PN_FSM_ERROR "process.error" +#define PN_FSM_CURRENT_ACTION "process.currentAction" +// object +#define PN_OBJ_STATE "object.state" +#define PN_OBJ_CHILD_STATE "object.childState" +#define PN_OBJ_MESSAGE "object.message" +#define PN_OBJ_LEAF "object.leaf" + +// Station +#define PSN_STATION "LOFAR_PIC_@ring@_@station@" +#define PST_STATION "Station" +#define PN_STS_POWER48_ON "power48On" +#define PN_STS_POWER220_ON "power220On" + +// Station +#define PSN_STATION "LOFAR_PermSW_@ring@_@station@" +#define PST_STATION "Station" +#define PN_STS_POWER48_ON "power48On" +#define PN_STS_POWER220_ON "power220On" + +// MACScheduler +#define PSN_MAC_SCHEDULER "LOFAR_PermSW_MACScheduler" +#define PST_MAC_SCHEDULER "MACScheduler" +#define PN_MS_PLANNED_OBSERVATIONS "plannedObservations" +#define PN_MS_ACTIVE_OBSERVATIONS "activeObservations" +#define PN_MS_FINISHED_OBSERVATIONS "finishedObservations" +#define PN_MS_OTDB_CONNECTED "OTDB.connected" +#define PN_MS_OTDB_LAST_POLL "OTDB.lastPoll" +#define PN_MS_OTDB_POLLINTERVAL "OTDB.pollinterval" + +// Observation +#define PSN_OBSERVATION "LOFAR_ObsSW_@observation@" +#define PST_OBSERVATION "Observation" +#define PN_OBS_CLAIM_CLAIM_DATE "claim.claimDate" +#define PN_OBS_CLAIM_NAME "claim.name" +#define PN_OBS_CLAIM_PERIOD "claimPeriod" +#define PN_OBS_PREPARE_PERIOD "preparePeriod" +#define PN_OBS_START_TIME "startTime" +#define PN_OBS_STOP_TIME "stopTime" +#define PN_OBS_BAND_FILTER "bandFilter" +#define PN_OBS_NYQUISTZONE "nyquistzone" +#define PN_OBS_ANTENNA_ARRAY "antennaArray" +#define PN_OBS_RECEIVER_LIST "receiverList" +#define PN_OBS_SAMPLE_CLOCK "sampleClock" +#define PN_OBS_RUN_STATE "runState" +#define PN_OBS_MEASUREMENT_SET "measurementSet" +#define PN_OBS_STATION_LIST "stationList" +#define PN_OBS_INPUT_NODE_LIST "inputNodeList" +#define PN_OBS_BGL_NODE_LIST "BGLNodeList" +#define PN_OBS_STORAGE_NODE_LIST "storageNodeList" +#define PN_OBS_BEAMS_ANGLE1 "Beams.angle1" +#define PN_OBS_BEAMS_ANGLE2 "Beams.angle2" +#define PN_OBS_BEAMS_DIRECTION_TYPE "Beams.directionType" +#define PN_OBS_BEAMS_BEAMLET_LIST "Beams.beamletList" +#define PN_OBS_BEAMS_SUBBAND_LIST "Beams.subbandList" + +// ObservationControl +#define PSN_OBSERVATION_CONTROL "LOFAR_ObsSW_@observation@_ObservationControl" +#define PST_OBSERVATION_CONTROL "ObservationControl" + +// InputBuffer +#define PSN_INPUT_BUFFER "LOFAR_PermSW_@psionode@_InputBuffer" +#define PST_INPUT_BUFFER "InputBuffer" +#define PN_IPB_STATION_NAME "stationName" +#define PN_IPB_OBSERVATION_NAME "observationName" +#define PN_IPB_STREAM0_BLOCKS_IN "stream0.blocksIn" +#define PN_IPB_STREAM0_PERC_BAD "stream0.percBad" +#define PN_IPB_STREAM0_REJECTED "stream0.rejected" +#define PN_IPB_STREAM1_BLOCKS_IN "stream1.blocksIn" +#define PN_IPB_STREAM1_PERC_BAD "stream1.percBad" +#define PN_IPB_STREAM1_REJECTED "stream1.rejected" +#define PN_IPB_STREAM2_BLOCKS_IN "stream2.blocksIn" +#define PN_IPB_STREAM2_PERC_BAD "stream2.percBad" +#define PN_IPB_STREAM2_REJECTED "stream2.rejected" +#define PN_IPB_STREAM3_BLOCKS_IN "stream3.blocksIn" +#define PN_IPB_STREAM3_PERC_BAD "stream3.percBad" +#define PN_IPB_STREAM3_REJECTED "stream3.rejected" +#define PN_IPB_LATE "late" +#define PN_IPB_IO_TIME "IOTime" + +// Adder +#define PSN_ADDER "LOFAR_ObsSW_@osionode@_@adder@" +#define PST_ADDER "Adder" +#define PN_ADD_DROPPING "dropping" +#define PN_ADD_DROPPED "dropped" +#define PN_ADD_DATA_PRODUCT_TYPE "dataProductType" +#define PN_ADD_FILE_NAME "fileName" +#define PN_ADD_LOCUS_NODE "locusNode" +#define PN_ADD_DIRECTORY "directory" +#define PN_ADD_OBSERVATION_NAME "observationName" + +// Writer +#define PSN_WRITER "LOFAR_ObsSW_@oslocusnode@_@writer@" +#define PST_WRITER "Writer" +#define PN_WTR_TIME "time" +#define PN_WTR_COUNT "count" +#define PN_WTR_DROPPED "dropped" +#define PN_WTR_FILE_NAME "fileName" +#define PN_WTR_DATA_RATE "dataRate" +#define PN_WTR_DATA_PRODUKT_TYPE "dataProduktType" +#define PN_WTR_OBSERVATION_NAME "observationName" + +// IONode +#define PSN_IO_NODE "LOFAR_PIC_BGP_@midplane@_@ionode@" +#define PST_IO_NODE "IONode" +#define PN_ION_STATION0 "station0" +#define PN_ION_IP0 "IP0" +#define PN_ION_MAC0 "MAC0" +#define PN_ION_STATION1 "station1" +#define PN_ION_IP1 "IP1" +#define PN_ION_MAC1 "MAC1" +#define PN_ION_USE2ND_STATION "use2ndStation" +#define PN_ION_USED_STATION "usedStation" +#define PN_ION_USEDIP "usedIP" +#define PN_ION_USEDMAC "usedMAC" + +// LocusNode +#define PSN_LOCUS_NODE "LOFAR_PIC_@osrack@_@locusnode@" +#define PST_LOCUS_NODE "LocusNode" +#define PN_LCN_FREE "free" +#define PN_LCN_TOTAL "total" +#define PN_LCN_CLAIMED "claimed" + +// CEPHardwareMonitor +#define PSN_CEP_HARDWARE_MONITOR "LOFAR_PermSW_CEPHardwareMonitor" +#define PST_CEP_HARDWARE_MONITOR "CEPHardwareMonitor" +#define PN_CHM_BGP_CONNECTED "BGP.connected" +#define PN_CHM_CLUSTER_CONNECTED "Cluster.connected" + +// Cabinet +#define PSN_CABINET "LOFAR_PIC_@cabinet@" +#define PST_CABINET "Cabinet" +#define PN_CAB_FRONT_DOOR_OPEN "frontDoorOpen" +#define PN_CAB_FRONT_FAN_INNER "frontFanInner" +#define PN_CAB_FRONT_FAN_OUTER "frontFanOuter" +#define PN_CAB_FRONT_AIRFLOW "frontAirflow" +#define PN_CAB_BACK_DOOR_OPEN "backDoorOpen" +#define PN_CAB_BACK_FAN_INNER "backFanInner" +#define PN_CAB_BACK_FAN_OUTER "backFanOuter" +#define PN_CAB_BACK_AIRFLOW "backAirflow" +#define PN_CAB_TEMP_ALARM "tempAlarm" +#define PN_CAB_HUMIDITY_ALARM "humidityAlarm" +#define PN_CAB_TEMPERATURE "temperature" +#define PN_CAB_TEMP_MIN "tempMin" +#define PN_CAB_TEMP_MAX "tempMax" +#define PN_CAB_TEMP_MAX_MAX "tempMaxMax" +#define PN_CAB_HUMIDITY "humidity" +#define PN_CAB_HUMIDITY_MAX "humidityMax" +#define PN_CAB_HUMIDITY_MAX_MAX "humidityMaxMax" +#define PN_CAB_CONTROL_MODE "controlMode" +#define PN_CAB_TEMPERATURE_SENSOR "temperatureSensor" +#define PN_CAB_HUMIDITY_CONTROL "humidityControl" +#define PN_CAB_DOOR_CONTROL "doorControl" + +// SubRack +#define PSN_SUB_RACK "LOFAR_PIC_@cabinet@_@subrack@" +#define PST_SUB_RACK "SubRack" +#define PN_SRCK_SPU_STATUS_STATE "SPU.status.state" +#define PN_SRCK_SPU_STATUS_CHILD_STATE "SPU.status.childState" +#define PN_SRCK_SPU_STATUS_MESSAGE "SPU.status.message" +#define PN_SRCK_SPU_STATUS_LEAF "SPU.status.leaf" +#define PN_SRCK_SPU__VHBA "SPU.Vhba" +#define PN_SRCK_SPU__VLBA "SPU.Vlba" +#define PN_SRCK_SPU__VDIG "SPU.Vdig" +#define PN_SRCK_SPU_TEMPERATURE "SPU.temperature" +#define PN_SRCK_CLOCK_BOARD_STATUS_STATE "clockBoard.status.state" +#define PN_SRCK_CLOCK_BOARD_STATUS_CHILD_STATE "clockBoard.status.childState" +#define PN_SRCK_CLOCK_BOARD_STATUS_MESSAGE "clockBoard.status.message" +#define PN_SRCK_CLOCK_BOARD_STATUS_LEAF "clockBoard.status.leaf" +#define PN_SRCK_CLOCK_BOARD__VFSP "clockBoard.Vfsp" +#define PN_SRCK_CLOCK_BOARD__VCLOCK "clockBoard.Vclock" +#define PN_SRCK_CLOCK_BOARD_VERSION "clockBoard.version" +#define PN_SRCK_CLOCK_BOARD_FREQ "clockBoard.freq" +#define PN_SRCK_CLOCK_BOARD_LOCK160 "clockBoard.lock160" +#define PN_SRCK_CLOCK_BOARD_LOCK200 "clockBoard.lock200" +#define PN_SRCK_CLOCK_BOARD_TEMPERATURE "clockBoard.temperature" + +// RSPBoard +#define PSN_RSP_BOARD "LOFAR_PIC_@cabinet@_@subrack@_@RSPBoard@" +#define PST_RSP_BOARD "RSPBoard" +#define PN_RSP_VOLTAGE12 "voltage12" +#define PN_RSP_VOLTAGE25 "voltage25" +#define PN_RSP_VOLTAGE33 "voltage33" +#define PN_RSP_VERSION "version" +#define PN_RSP_ALERT "alert" +#define PN_RSP_SPLITTER_ON "splitterOn" +#define PN_RSP__ETHERNET_STATUS_STATE "Ethernet.status.state" +#define PN_RSP__ETHERNET_STATUS_CHILD_STATE "Ethernet.status.childState" +#define PN_RSP__ETHERNET_STATUS_MESSAGE "Ethernet.status.message" +#define PN_RSP__ETHERNET_STATUS_LEAF "Ethernet.status.leaf" +#define PN_RSP_ETHERNET_PACKETS_RECEIVED "Ethernet.packetsReceived" +#define PN_RSP_ETHERNET_PACKETS_ERROR "Ethernet.packetsError" +#define PN_RSP_ETHERNET_LAST_ERROR "Ethernet.lastError" +#define PN_RSP_MEP_SEQNR "MEP.seqnr" +#define PN_RSP_MEP_ERROR "MEP.error" +#define PN_RSP_BP_STATUS_STATE "BP.status.state" +#define PN_RSP_BP_STATUS_CHILD_STATE "BP.status.childState" +#define PN_RSP_BP_STATUS_MESSAGE "BP.status.message" +#define PN_RSP_BP_STATUS_LEAF "BP.status.leaf" +#define PN_RSP_BP_TEMPERATURE "BP.temperature" +#define PN_RSP_BP_VERSION "BP.version" +#define PN_RSP_AP0_STATUS_STATE "AP0.status.state" +#define PN_RSP_AP0_STATUS_CHILD_STATE "AP0.status.childState" +#define PN_RSP_AP0_STATUS_MESSAGE "AP0.status.message" +#define PN_RSP_AP0_STATUS_LEAF "AP0.status.leaf" +#define PN_RSP_AP0_TEMPERATURE "AP0.temperature" +#define PN_RSP_AP0_VERSION "AP0.version" +#define PN_RSP_AP0_SYNC_SAMPLE_COUNT "AP0.SYNC.sampleCount" +#define PN_RSP_AP0_SYNC_SYNC_COUNT "AP0.SYNC.syncCount" +#define PN_RSP_AP0_SYNC_ERROR_COUNT "AP0.SYNC.errorCount" +#define PN_RSP_AP1_STATUS_STATE "AP1.status.state" +#define PN_RSP_AP1_STATUS_CHILD_STATE "AP1.status.childState" +#define PN_RSP_AP1_STATUS_MESSAGE "AP1.status.message" +#define PN_RSP_AP1_STATUS_LEAF "AP1.status.leaf" +#define PN_RSP_AP1_TEMPERATURE "AP1.temperature" +#define PN_RSP_AP1_VERSION "AP1.version" +#define PN_RSP_AP1_SYNC_SAMPLE_COUNT "AP1.SYNC.sampleCount" +#define PN_RSP_AP1_SYNC_SYNC_COUNT "AP1.SYNC.syncCount" +#define PN_RSP_AP1_SYNC_ERROR_COUNT "AP1.SYNC.errorCount" +#define PN_RSP_AP2_STATUS_STATE "AP2.status.state" +#define PN_RSP_AP2_STATUS_CHILD_STATE "AP2.status.childState" +#define PN_RSP_AP2_STATUS_MESSAGE "AP2.status.message" +#define PN_RSP_AP2_STATUS_LEAF "AP2.status.leaf" +#define PN_RSP_AP2_TEMPERATURE "AP2.temperature" +#define PN_RSP_AP2_VERSION "AP2.version" +#define PN_RSP_AP2_SYNC_SAMPLE_COUNT "AP2.SYNC.sampleCount" +#define PN_RSP_AP2_SYNC_SYNC_COUNT "AP2.SYNC.syncCount" +#define PN_RSP_AP2_SYNC_ERROR_COUNT "AP2.SYNC.errorCount" +#define PN_RSP_AP3_STATUS_STATE "AP3.status.state" +#define PN_RSP_AP3_STATUS_CHILD_STATE "AP3.status.childState" +#define PN_RSP_AP3_STATUS_MESSAGE "AP3.status.message" +#define PN_RSP_AP3_STATUS_LEAF "AP3.status.leaf" +#define PN_RSP_AP3_TEMPERATURE "AP3.temperature" +#define PN_RSP_AP3_VERSION "AP3.version" +#define PN_RSP_AP3_SYNC_SAMPLE_COUNT "AP3.SYNC.sampleCount" +#define PN_RSP_AP3_SYNC_SYNC_COUNT "AP3.SYNC.syncCount" +#define PN_RSP_AP3_SYNC_ERROR_COUNT "AP3.SYNC.errorCount" + +// RCU +#define PSN_RCU "LOFAR_PIC_@cabinet@_@subrack@_@RSPBoard@_@rcu@" +#define PST_RCU "RCU" +#define PN_RCU_DELAY "Delay" +#define PN_RCU_INPUT_ENABLE "InputEnable" +#define PN_RCU_LBL_ENABLE "LBLEnable" +#define PN_RCU_LBH_ENABLE "LBHEnable" +#define PN_RCU_HBA_ENABLE "HBAEnable" +#define PN_RCU_BAND_SEL_LBA_HBA "bandSelLbaHba" +#define PN_RCU_HBA_FILTER_SEL "HBAFilterSel" +#define PN_RCU_VL_ENABLE "VlEnable" +#define PN_RCU_VH_ENABLE "VhEnable" +#define PN_RCU_VDD_VCC_ENABLE "VddVccEnable" +#define PN_RCU_BAND_SEL_LBL_LBH "bandSelLblLbh" +#define PN_RCU_LBA_FILTER_SEL "LBAFilterSel" +#define PN_RCU_ATTENUATION "Attenuation" +#define PN_RCU_NOF_OVERFLOW "nofOverflow" +#define PN_RCU_ADC_STATISTICS_OVERFLOW "ADCStatistics.overflow" +#define PN_RCU_TBB_ERROR "TBB.error" +#define PN_RCU_TBB_MODE "TBB.mode" +#define PN_RCU_TBB_START_ADDR "TBB.startAddr" +#define PN_RCU_TBB_BUF_SIZE "TBB.bufSize" +#define PN_RCU_TRIGGER_STARTLEVEL "Trigger.startlevel" +#define PN_RCU_TRIGGER_BASELEVEL "Trigger.baselevel" +#define PN_RCU_TRIGGER_STOPLEVEL "Trigger.stoplevel" +#define PN_RCU_TRIGGER_FILTER "Trigger.filter" +#define PN_RCU_TRIGGER_WINDOW "Trigger.window" +#define PN_RCU_TRIGGER_OPERATING_MODE "Trigger.operatingMode" +#define PN_RCU_TRIGGER_TRIGGER_MODE "Trigger.triggerMode" +#define PN_RCU_TRIGGER_FILTER0_COEFF0 "Trigger.filter0.coeff0" +#define PN_RCU_TRIGGER_FILTER0_COEFF1 "Trigger.filter0.coeff1" +#define PN_RCU_TRIGGER_FILTER0_COEFF2 "Trigger.filter0.coeff2" +#define PN_RCU_TRIGGER_FILTER0_COEFF3 "Trigger.filter0.coeff3" +#define PN_RCU_TRIGGER_FILTER1_COEFF0 "Trigger.filter1.coeff0" +#define PN_RCU_TRIGGER_FILTER1_COEFF1 "Trigger.filter1.coeff1" +#define PN_RCU_TRIGGER_FILTER1_COEFF2 "Trigger.filter1.coeff2" +#define PN_RCU_TRIGGER_FILTER1_COEFF3 "Trigger.filter1.coeff3" + +// TBBoard +#define PSN_TB_BOARD "LOFAR_PIC_@cabinet@_@subrack@_@TBBoard@" +#define PST_TB_BOARD "TBBoard" +#define PN_TBB_BOARDID "boardID" +#define PN_TBB_RAM_SIZE "RAMSize" +#define PN_TBB_SW_VERSION "SWVersion" +#define PN_TBB_BOARD_VERSION "boardVersion" +#define PN_TBB_TP_VERSION "TPVersion" +#define PN_TBB_MP0_VERSION "MP0Version" +#define PN_TBB_MP1_VERSION "MP1Version" +#define PN_TBB_MP2_VERSION "MP2Version" +#define PN_TBB_MP3_VERSION "MP3Version" +#define PN_TBB_VOLTAGE12 "voltage12" +#define PN_TBB_VOLTAGE25 "voltage25" +#define PN_TBB_VOLTAGE33 "voltage33" +#define PN_TBB_TEMPPCB "tempPCB" +#define PN_TBB_TEMPTP "tempTP" +#define PN_TBB_TEMPMP0 "tempMP0" +#define PN_TBB_TEMPMP1 "tempMP1" +#define PN_TBB_TEMPMP2 "tempMP2" +#define PN_TBB_TEMPMP3 "tempMP3" +#define PN_TBB_IMAGE_INFO_VERSION "imageInfo.version" +#define PN_TBB_IMAGE_INFO_WRITE_DATE "imageInfo.writeDate" +#define PN_TBB_IMAGE_INFO_TP_FILE "imageInfo.TPFile" +#define PN_TBB_IMAGE_INFO_MP_FILE "imageInfo.MPFile" + +// LBAAntenna +#define PSN_LBA_ANTENNA "LOFAR_PIC_@lbaantenna@" +#define PST_LBA_ANTENNA "LBAAntenna" + +// HBAAntenna +#define PSN_HBA_ANTENNA "LOFAR_PIC_@hbaantenna@" +#define PST_HBA_ANTENNA "HBAAntenna" + +// LogProcessor +#define PSN_LOG_PROCESSOR "LOFAR_PermSW_Daemons_LogProcessor" +#define PST_LOG_PROCESSOR "LogProcessor" + +// SASGateway +#define PSN_SAS_GATEWAY "LOFAR_PermSW_Daemons_SASGateway" +#define PST_SAS_GATEWAY "SASGateway" + +// SoftwareMonitor +#define PSN_SOFTWARE_MONITOR "LOFAR_PermSW_SoftwareMonitor" +#define PST_SOFTWARE_MONITOR "SoftwareMonitor" + +// SHMInfoServer +#define PSN_SHM_INFO_SERVER "LOFAR_PermSW_SHMInfoServer" +#define PST_SHM_INFO_SERVER "SHMInfoServer" + +// StationControl +#define PSN_STATION_CONTROL "LOFAR_PermSW_StationControl" +#define PST_STATION_CONTROL "StationControl" +#define PN_SC_ACTIVE_OBSERVATIONS "activeObservations" + +// ClockControl +#define PSN_CLOCK_CONTROL "LOFAR_PermSW_ClockControl" +#define PST_CLOCK_CONTROL "ClockControl" +#define PN_CLC_CONNECTED "connected" +#define PN_CLC_REQUESTED_CLOCK "requestedClock" +#define PN_CLC_ACTUAL_CLOCK "actualClock" + +// StnObservation +#define PSN_STN_OBSERVATION "LOFAR_ObsSW_@observation@" +#define PST_STN_OBSERVATION "StnObservation" +#define PN_OBS_NAME "name" +#define PN_OBS_CLAIM_CLAIM_DATE "claim.claimDate" +#define PN_OBS_CLAIM_NAME "claim.name" +#define PN_OBS_RECEIVER_BITMAP "receiverBitmap" + +// BeamControl +#define PSN_BEAM_CONTROL "LOFAR_ObsSW_@observation@_BeamControl" +#define PST_BEAM_CONTROL "BeamControl" +#define PN_BC_CONNECTED "connected" +#define PN_BC_SUB_ARRAY "subArray" +#define PN_BC_SUBBAND_LIST "subbandList" +#define PN_BC_BEAMLET_LIST "beamletList" +#define PN_BC_ANGLE1 "angle1" +#define PN_BC_ANGLE2 "angle2" +#define PN_BC_DIRECTION_TYPE "directionType" +#define PN_BC_BEAM_NAME "beamName" + +// CalibrationControl +#define PSN_CALIBRATION_CONTROL "LOFAR_ObsSW_@observation@_CalibrationControl" +#define PST_CALIBRATION_CONTROL "CalibrationControl" +#define PN_CC_CONNECTED "connected" +#define PN_CC_BEAM_NAMES "beamNames" +#define PN_CC_ANTENNA_ARRAY "antennaArray" +#define PN_CC_FILTER "filter" +#define PN_CC_NYQUISTZONE "nyquistzone" +#define PN_CC_RCUS "rcus" + +// TBBControl +#define PSN_TBB_CONTROL "LOFAR_ObsSW_@observation@_TBBControl" +#define PST_TBB_CONTROL "TBBControl" +#define PN_TBC_CONNECTED "connected" +#define PN_TBC_TRIGGER_RCU_NR "trigger.rcuNr" +#define PN_TBC_TRIGGER_SEQUENCE_NR "trigger.sequenceNr" +#define PN_TBC_TRIGGER_TIME "trigger.time" +#define PN_TBC_TRIGGER_SAMPLE_NR "trigger.sampleNr" +#define PN_TBC_TRIGGER_SUM "trigger.sum" +#define PN_TBC_TRIGGER_NR_SAMPLES "trigger.nrSamples" +#define PN_TBC_TRIGGER_PEAK_VALUE "trigger.peakValue" +#define PN_TBC_TRIGGER_FLAGS "trigger.flags" +#define PN_TBC_TRIGGER_TABLE "trigger.table" +#define PN_TBC_TRIGGER_MISSED "trigger.missed" + +#endif diff --git a/MAC/APL/CEPCU/src/CEPHardwareMonitor/README b/MAC/APL/CEPCU/src/CEPHardwareMonitor/README new file mode 100644 index 0000000000000000000000000000000000000000..546cc88ca7d39be2d4a3c36ed499bb1d9ca31ec3 --- /dev/null +++ b/MAC/APL/CEPCU/src/CEPHardwareMonitor/README @@ -0,0 +1,10 @@ +HWMonitor via IPMI en bgpartstatus + swlevel.conf + +IPMI -> clustermnachine on/off + +bgpartstatus + swlevel.conf geeft status R00 (busy). +Als busy -> OPER + else als swlevel OFF -> OFF + else ERROR + + diff --git a/MAC/APL/CEPCU/src/CEPHardwareMonitor/zabbix_get b/MAC/APL/CEPCU/src/CEPHardwareMonitor/zabbix_get new file mode 100755 index 0000000000000000000000000000000000000000..d2f8ef579f41c827c9eefc9aaac67f8a703b8a38 Binary files /dev/null and b/MAC/APL/CEPCU/src/CEPHardwareMonitor/zabbix_get differ diff --git a/MAC/APL/CEPCU/src/CMakeLists.txt b/MAC/APL/CEPCU/src/CMakeLists.txt index 7eb79319c10f8afaede83d28120153973a0d69b6..fcc62f24f9181edf89b8d22fa259eb0505e771a8 100644 --- a/MAC/APL/CEPCU/src/CMakeLists.txt +++ b/MAC/APL/CEPCU/src/CMakeLists.txt @@ -13,4 +13,5 @@ lofar_add_bin_program(versioncepcu versioncepcu.cc) add_subdirectory(OnlineControl) add_subdirectory(PythonControl) add_subdirectory(CEPlogProcessor) +add_subdirectory(CEPHardwareMonitor) # add_subdirectory(OfflineControl) diff --git a/MAC/APL/CEPCU/src/OnlineControl/tPVSSMapping.cc b/MAC/APL/CEPCU/src/OnlineControl/tPVSSMapping.cc new file mode 100644 index 0000000000000000000000000000000000000000..a1d6039e88591175777534f90f2888719b1eb1c0 --- /dev/null +++ b/MAC/APL/CEPCU/src/OnlineControl/tPVSSMapping.cc @@ -0,0 +1,87 @@ +//# tPVSSmapping.cc: test StreamToStrorage conversion to PVSS dps. +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: OnlineControl.cc 18153 2011-05-31 23:03:25Z schoenmakers $ +#include <lofar_config.h> +#include <Common/LofarLogger.h> + +#include <signal.h> +#include <Common/StreamUtil.h> +//#include <Common/lofar_vector.h> +//#include <Common/lofar_string.h> +#include <Common/ParameterSet.h> +#include <Common/Exceptions.h> +#include <Common/SystemUtil.h> +#include <ApplCommon/StationInfo.h> +#include <ApplCommon/Observation.h> + +using namespace std; +using namespace LOFAR; + +int main(int argc, char* argv[]) +{ + if (argc < 2) { + cout << "Syntax: " << argv[0] << " parameterSet" << endl; + return (-1); + } + + ParameterSet thePS(argv[1]); + Observation theObs(&thePS, false); + int nrStreams = theObs.streamsToStorage.size(); + cout << "_setupBGPmapping: " << nrStreams << " streams found." << endl; + cout << "ioNode , locusNodes , adders , writers , dataProducts, dataProductTypes" << endl; + + uint prevPset = (nrStreams ? theObs.streamsToStorage[0].sourcePset : -1); + vector<string> locusVector; + vector<int> adderVector; + vector<int> writerVector; + vector<string> DPVector; + vector<string> DPtypeVector; + for (int i = 0; i < nrStreams; i++) { + if (theObs.streamsToStorage[i].sourcePset != prevPset) { // other Pset? write current vector to the database. + stringstream os; + writeVector(os, locusVector); + os << ","; + writeVector(os, adderVector); + os << ","; + writeVector(os, writerVector); + os << ","; + writeVector(os, DPVector); + os << ","; + writeVector(os, DPtypeVector); + cout << prevPset << "," << os.str() << endl; + // clear the collecting vectors + locusVector.clear(); + adderVector.clear(); + writerVector.clear(); + DPVector.clear(); + DPtypeVector.clear(); + prevPset = theObs.streamsToStorage[i].sourcePset; + } + // extend vector with info + locusVector.push_back (theObs.streamsToStorage[i].destStorageNode); + adderVector.push_back (theObs.streamsToStorage[i].adderNr); + writerVector.push_back(theObs.streamsToStorage[i].writerNr); + DPVector.push_back (theObs.streamsToStorage[i].filename); + DPtypeVector.push_back(theObs.streamsToStorage[i].dataProduct); + } + return (0); +} +