-
Marcel Loose authored
Task #3942: Fixed warning: ignoring return value of ‘foo()’, declared with attribute warn_unused_result [-Wunused-result]
Marcel Loose authoredTask #3942: Fixed warning: ignoring return value of ‘foo()’, declared with attribute warn_unused_result [-Wunused-result]
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
SoftwareMonitor.cc 22.58 KiB
//# SoftwareMonitor.cc: Implementation of the MAC Scheduler task
//#
//# Copyright (C) 2008
//# ASTRON (Netherlands Foundation for Research in Astronomy)
//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
//#
//# This program is free software; you can redistribute it and/or modify
//# it under the terms of the GNU General Public License as published by
//# the Free Software Foundation; either version 2 of the License, or
//# (at your option) any later version.
//#
//# This program is distributed in the hope that it will be useful,
//# but WITHOUT ANY WARRANTY; without even the implied warranty of
//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//# GNU General Public License for more details.
//#
//# You should have received a copy of the GNU General Public License
//# along with this program; if not, write to the Free Software
//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//#
//# $Id$
#include <lofar_config.h>
#include <Common/lofar_fstream.h>
#include <Common/LofarLogger.h>
#include <Common/LofarConstants.h>
#include <Common/LofarLocators.h>
#include <Common/StringUtil.h>
#include <Common/StreamUtil.h>
#include <Common/ParameterSet.h>
#include <ApplCommon/LofarDirs.h>
#include <ApplCommon/StationInfo.h>
#include <MACIO/MACServiceInfo.h>
#include <GCF/PVSS/GCF_PVTypes.h>
#include <GCF/PVSS/PVSSinfo.h>
#include <GCF/RTDB/DP_Protocol.ph>
#include <APL/APLCommon/ControllerDefines.h>
#include <APL/RTDBCommon/RTDButilities.h>
#include <APL/RTDBCommon/CM_Protocol.ph>
#include <stdlib.h>
#include <dirent.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "SoftwareMonitor.h"
#include "PVSSDatapointDefs.h"
#include <boost/date_time/posix_time/posix_time.hpp>
enum {
SW_FLD_LEVEL = 0,
SW_FLD_UP,
SW_FLD_DOWN,
SW_FLD_ROOT,
SW_FLD_MPI,
SW_FLD_NAME,
SW_FLD_NR_OF_FIELDS
};
const int MAX_PROCMAP_ERRORS = 3;
#define MAX2(a,b) ((a) > (b)) ? (a) : (b)
using namespace boost::posix_time;
namespace LOFAR {
using namespace APLCommon;
using namespace APL::RTDBCommon;
using namespace GCF::TM;
using namespace GCF::PVSS;
using namespace GCF::RTDB;
namespace RTDBDaemons {
//
// SoftwareMonitor()
//
SoftwareMonitor::SoftwareMonitor(const string& cntlrName) :
GCFTask ((State)&SoftwareMonitor::initial_state,cntlrName),
itsOwnPropertySet (0),
itsTimerPort (0),
itsDPservice (0),
itsClaimMgrTask (0),
itsITCPort (0),
itsProcMapErrors (0)
{
LOG_TRACE_OBJ_STR (cntlrName << " construction");
// need port for timers.
itsTimerPort = new GCFTimerPort(*this, "TimerPort");
itsDPservice = new DPservice(this); // don't report back
ASSERTSTR(itsDPservice, "Can't allocate DPservice");
itsClaimMgrTask = ClaimMgrTask::instance();
ASSERTSTR(itsClaimMgrTask, "Can't construct a claimMgrTask");
itsITCPort = new GCFITCPort(*this, *this, "ClaimMgrPort", GCFPortInterface::SAP, CM_PROTOCOL);
ASSERTSTR(itsITCPort, "Can't construct an ITC port");
itsPollInterval = globalParameterSet()->getInt("pollInterval", 15);
itsSuspThreshold = globalParameterSet()->getInt("suspisciousThreshold", 2);
itsBrokenThreshold = globalParameterSet()->getInt("brokenThreshold", 4);
itsRestartInterval = globalParameterSet()->getInt("restartIntervalInPolls", 4);
itsRestartList = globalParameterSet()->getStringVector("restartablePrograms");
LOG_INFO_STR("pollInterval : " << itsPollInterval);
LOG_INFO_STR("suspiciousThreshold : " << itsSuspThreshold);
LOG_INFO_STR("brokenThreshold : " << itsBrokenThreshold);
LOG_INFO_STR("restartIntervalInPolls: " << itsRestartInterval);
ostringstream oss;
writeVector(oss, itsRestartList);
LOG_INFO_STR("restartablePrograms : " << oss.str());
registerProtocol(CM_PROTOCOL, CM_PROTOCOL_STRINGS);
}
//
// ~SoftwareMonitor()
//
SoftwareMonitor::~SoftwareMonitor()
{
LOG_TRACE_OBJ_STR (getName() << " destruction");
if (itsDPservice) delete itsDPservice;
if (itsTimerPort) delete itsTimerPort;
if (itsITCPort) delete itsITCPort;
}
//
// initial_state(event, port)
//
// Setup connection with PVSS
//
GCFEvent::TResult SoftwareMonitor::initial_state(GCFEvent& event,
GCFPortInterface& port)
{
LOG_DEBUG_STR ("initial:" << eventName(event) << "@" << port.getName());
GCFEvent::TResult status = GCFEvent::HANDLED;
switch (event.signal) {
case F_INIT:
break;
case F_ENTRY: {
// Get access to my own propertyset.
LOG_DEBUG_STR ("Activating PropertySet " << PSN_SOFTWARE_MONITOR);
itsTimerPort->setTimer(2.0);
itsOwnPropertySet = new RTDBPropertySet(PSN_SOFTWARE_MONITOR,
PST_SOFTWARE_MONITOR,
PSAT_WO,
this);
}
break;
case DP_CREATED: {
// NOTE: this function may be called DURING the construction of the PropertySet.
// Always exit this event in a way that GCF can end the construction.
DPCreatedEvent dpEvent(event);
LOG_DEBUG_STR("Result of creating " << dpEvent.DPname << " = " << dpEvent.result);
itsTimerPort->cancelAllTimers();
itsTimerPort->setTimer(0.0);
}
break;
case F_TIMER: {
// update PVSS.
LOG_TRACE_FLOW ("Updateing state to PVSS");
itsOwnPropertySet->setValue(PN_FSM_CURRENT_ACTION, GCFPVString("Initial"));
itsOwnPropertySet->setValue(PN_FSM_ERROR, GCFPVString(""));
LOG_DEBUG_STR("Going to read the software levels");
TRAN (SoftwareMonitor::readSWlevels);
}
case DP_SET:
break;
case F_QUIT:
TRAN (SoftwareMonitor::finish_state);
break;
default:
LOG_DEBUG_STR ("initial, DEFAULT");
break;
}
return (status);
}
//
// readSWlevels(event, port)
//
// Setup connection with Softwaredriver
//
GCFEvent::TResult SoftwareMonitor::readSWlevels(GCFEvent& event,
GCFPortInterface& port)
{
LOG_DEBUG_STR ("readSWlevels:" << eventName(event) << "@" << port.getName());
GCFEvent::TResult status = GCFEvent::HANDLED;
switch (event.signal) {
case F_ENTRY: {
// update PVSS
itsOwnPropertySet->setValue(PN_FSM_CURRENT_ACTION, GCFPVString("Reading swlevel file"));
// Try to find and open the swlevel.conf file.
ConfigLocator cl;
string swFile (cl.locate("swlevel.conf"));
ifstream swfStream;
swfStream.open(swFile.c_str(), ifstream::in);
ASSERTSTR(swfStream, "Unable to open the swlevelfile '" << swFile << "'");
// parse the file
string line;
getline(swfStream, line);
while(swfStream) {
if (line[0] != '#' && line[0] != ' ') {
// line syntax: level : up : down : root : mpi : program
vector<string> field = StringUtil::split(line, ':');
ASSERTSTR(field.size() >= SW_FLD_NR_OF_FIELDS, "Strange formatted line in swlevel.conf: " << line);
// check if executable exists (this is what swlevel does also)
struct stat statBuf;
// note: stat return 0 on success.
if (!stat(formatString("%s/%s", LOFAR_BIN_LOCATION, field[SW_FLD_NAME].c_str()).c_str(), &statBuf)) {
// add line to our admin
ProcessDef proc;
proc.name = field[SW_FLD_NAME];
proc.level = atoi(field[SW_FLD_LEVEL].c_str());
proc.mustBroot = atoi(field[SW_FLD_ROOT].c_str());
proc.runsUnderMPI = atoi(field[SW_FLD_MPI].c_str());
proc.permSW = !field[SW_FLD_UP].empty();
itsLevelList.push_back(proc);
}
}
getline(swfStream, line);
}
swfStream.close();
ASSERTSTR(itsLevelList.size(), "File swlevel does not contain legal lines.");
LOG_INFO_STR("Found " << itsLevelList.size() << " programs I should watch.");
// copy permSW entries to itsPermProcs vector
_constructPermProcsList();
TRAN(SoftwareMonitor::checkPrograms);
}
break;
case DP_SET:
break;
case F_QUIT:
TRAN (SoftwareMonitor::finish_state);
break;
default:
LOG_DEBUG_STR ("readSWlevels: DEFAULT");
break;
}
return (status);
}
//
// checkPrograms(event, port)
//
// check the level of all programs.
//
GCFEvent::TResult SoftwareMonitor::checkPrograms(GCFEvent& event, GCFPortInterface& port)
{
LOG_DEBUG_STR ("checkPrograms:" << eventName(event) << "@" << port.getName());
GCFEvent::TResult status = GCFEvent::HANDLED;
switch (event.signal) {
case F_ENTRY: {
itsOwnPropertySet->setValue(PN_FSM_CURRENT_ACTION,GCFPVString("Checking programs"));
_buildProcessMap(); // reconstruct map for current processlist.(name,pid)
// Note: swlevel v1.6 20081229 (svn 12378) returns current level as return value.
int curLevel = system("swlevel >>/dev/null") >> 8;
ASSERTSTR(curLevel >= 0, "Program 'swlevel' not in my execution path");
LOG_DEBUG_STR("Current level is " << curLevel);
itsOwnPropertySet->setValue(PN_SWM_SW_LEVEL, GCFPVInteger(curLevel));
// loop over the permanent processes and update their status in PVSS
vector<Process>::iterator iter = itsPermProcs.begin();
vector<Process>::iterator end = itsPermProcs.end();
while (iter != end) {
processMap_t::iterator procPtr = itsProcessMap.find(iter->name); // search in ps-ef list
iter->pid = (procPtr != itsProcessMap.end()) ? procPtr->second : 0;
_updateProcess(iter, iter->pid, curLevel); // pid=0: not running, pid!=0: running
if (iter->pid) { // did we found the process?
itsProcessMap.erase(procPtr); // remove it from the list
}
iter++;
} // while iter
// --- pseudo code for next loop ---
// for all current processes (processMap)
// process == obs process(levelList) ?
// No: try next
// is process already in ObsProcslist ?
// Yes: update PVSS, try next
// is obsID already in the ObsMap ?
// No: send a req. to the claimMgr for DPname, try next (*1)
// Yes: add process to ObsProcsList, update PVSS, try next
//
// (*1) When answer arrives some time later, the obsID is added to the ObsMap and
// all Obs-bound processes are added to the ObsProcsList.
// loop over the rest of the current processes and see if they match one of ours.
processMap_t::iterator cpIter = itsProcessMap.begin(); // Current Process iter
processMap_t::iterator cpEnd = itsProcessMap.end();
while (cpIter != cpEnd) {
// Is this process an observation bound process?
vector<ProcessDef_t>::iterator llIter = itsLevelList.begin();
vector<ProcessDef_t>::iterator llEnd = itsLevelList.end();
while (llIter != llEnd) {
if (!llIter->permSW && llIter->name == cpIter->first) { // proc is an obs-bound proc?
// is proc already in our obsProc list?
vector<Process>::iterator opIter = _searchObsProcess(cpIter->second);
if (opIter != itsObsProcs.end()) {
_updateProcess(opIter, opIter->pid, curLevel);
}
else {
// proc is not in our obsProcList, do we know this observation??
int obsID = _solveObservationID(cpIter->second);
if (obsID) {
obsMap_t::iterator obsIter = itsObsMap.find(obsID);
if (obsIter == itsObsMap.end()) { // new observationID?
// Note: Since the claimObject call is in the loop this might result in multiple
// requests for the same observation to the claimManager (no problem)
itsClaimMgrTask->claimObject("Observation", "LOFAR_ObsSW_" + observationName(obsID), *itsITCPort); // ask claim manager
break; // process this later.
}
else { // obsID is known but proces not (strange), just add it.
Process newProc(llIter->name, obsIter->second.DPname+"_"+llIter->name, obsID, llIter->level);
newProc.pid = cpIter->second;
itsObsProcs.push_back(newProc);
LOG_DEBUG_STR("new process for obs " << obsID << " : " << llIter->name);
_updateProcess(_searchObsProcess(newProc.pid), newProc.pid, curLevel);
}
} // obsID != 0
} // process (not) in ObsProcList
} // process is obs-bound
llIter++;
} // loop over levelList
cpIter++;
} // loop over current process list.
TRAN(SoftwareMonitor::waitForNextCycle);
}
break;
case DP_SET:
break;
case CM_CLAIM_RESULT: {
CMClaimResultEvent cmEvent(event);
LOG_INFO_STR(cmEvent.nameInAppl << " is mapped to " << cmEvent.DPname);
_updateObservationMap(cmEvent.nameInAppl, cmEvent.DPname);
}
break;
case F_QUIT:
TRAN (SoftwareMonitor::finish_state);
break;
default:
LOG_DEBUG_STR ("checkPrograms: DEFAULT");
break;
}
return (status);
}
//
// waitForNextCycle(event, port)
//
// Take subscription on clock modifications
//
GCFEvent::TResult SoftwareMonitor::waitForNextCycle(GCFEvent& event,
GCFPortInterface& port)
{
if (event.signal != DP_SET) {
LOG_DEBUG_STR ("waitForNextCycle:" << eventName(event) << "@" << port.getName());
}
GCFEvent::TResult status = GCFEvent::HANDLED;
switch (event.signal) {
case F_ENTRY: {
itsOwnPropertySet->setValue(PN_FSM_CURRENT_ACTION, GCFPVString("Wait for next cycle"));
int waitTime = itsPollInterval - (time(0) % itsPollInterval);
if (waitTime == 0) {
waitTime = itsPollInterval;
}
itsTimerPort->cancelAllTimers();
itsTimerPort->setTimer(double(waitTime));
LOG_INFO_STR("Waiting " << waitTime << " seconds for next cycle");
}
break;
case F_TIMER: {
TRAN(SoftwareMonitor::checkPrograms);
}
break;
case DP_SET:
break;
case CM_CLAIM_RESULT: {
CMClaimResultEvent cmEvent(event);
LOG_INFO_STR(cmEvent.nameInAppl << " is mapped to " << cmEvent.DPname);
_updateObservationMap(cmEvent.nameInAppl, cmEvent.DPname);
}
break;
case F_QUIT:
TRAN (SoftwareMonitor::finish_state);
break;
default:
LOG_DEBUG_STR ("waitForNextCycle, DEFAULT");
break;
}
return (status);
}
//
// finish_state(event, port)
//
// Write controller state to PVSS
//
GCFEvent::TResult SoftwareMonitor::finish_state(GCFEvent& event, GCFPortInterface& port)
{
LOG_DEBUG_STR ("finish_state:" << eventName(event) << "@" << port.getName());
GCFEvent::TResult status = GCFEvent::HANDLED;
switch (event.signal) {
case F_INIT:
break;
case F_ENTRY: {
// update PVSS
itsOwnPropertySet->setValue(string(PN_FSM_CURRENT_ACTION),GCFPVString("Software:finished"));
// itsOwnPropertySet->setValue(string(PN_HWM_Software_ERROR),GCFPVString(""));
break;
}
case DP_SET:
break;
default:
LOG_DEBUG("finishing_state, DEFAULT");
status = GCFEvent::NOT_HANDLED;
break;
}
return (status);
}
//
// _updateProcess(iter, pid, curlevel)
//
// Update the information of the given process in PVSS
//
void SoftwareMonitor::_updateProcess(vector<Process>::iterator iter, int pid, int curLevel)
{
LOG_DEBUG_STR("_updateProcess(" << iter->DPname << "," << pid << "," << curLevel << ")");
if (pid) { // process is running?
// mark it operational whether or not it should be running
setObjectState(getName(), iter->DPname, RTDB_OBJ_STATE_OPERATIONAL, true); // force
iter->errorCnt = 0;
// update startTime when not done before
if (iter->startTime) {
return;
}
int fd;
char statFile [256];
struct stat statStruct;
snprintf(statFile, sizeof statFile, "/proc/%d/cmdline", iter->pid);
if ((fd = stat(statFile, &statStruct)) != -1) {
iter->startTime = statStruct.st_ctime;
}
else { // retrieval of time failed assume 'now'
iter->startTime = time(0);
}
LOG_DEBUG_STR("starttime of " << iter->name << " = " << to_simple_string(from_time_t(iter->startTime)));
itsDPservice->setValue(iter->DPname+".process.startTime",
GCFPVString(to_simple_string(from_time_t(iter->startTime))));
itsDPservice->setValue(iter->DPname+".process.processID", GCFPVInteger(iter->pid));
return;
}
// pid = 0 ==> process is not running
itsDPservice->setValue(iter->DPname+".process.processID", GCFPVInteger(iter->pid));
if (iter->level > curLevel) { // should it be down?
setObjectState(getName(), iter->DPname, RTDB_OBJ_STATE_OFF, true); // yes
iter->errorCnt = 0;
}
else {
// When switching from swlevel 1 to eg. swlevel 5 may take some time when the RSPboards
// are running in low-power mode. During this time you don't want the processes that
// are not yet running being reported are broken. With the conf file of the SoftwareMonitor
// you can set the number of cycles a process is not reported as suspicious or broken.
if (iter->errorCnt >= itsBrokenThreshold) { // serious problem
setObjectState(formatString("%s: %s not running", getName().c_str(), iter->name.c_str()),
iter->DPname, RTDB_OBJ_STATE_BROKEN);
if (iter->errorCnt % itsRestartInterval == 0) {
_restartProgram(iter->name);
}
}
else if (iter->errorCnt >= itsSuspThreshold) { // allow start/stop times
setObjectState(formatString("%s: %s not running", getName().c_str(), iter->name.c_str()),
iter->DPname, RTDB_OBJ_STATE_SUSPICIOUS);
_restartProgram(iter->name);
}
else {
setObjectState(getName(), iter->DPname, RTDB_OBJ_STATE_OFF, true); // force
}
if (iter->errorCnt == 0) { // first error? set stoptime
iter->stopTime = 0; // force update of stoptime
}
iter->errorCnt++;
} // proces not running but it should have been running
// update stopTime is not done already.
if (iter->startTime > iter->stopTime) {
iter->stopTime = time(0);
LOG_DEBUG_STR("stoptime of " << iter->name << " = " << to_simple_string(from_time_t(iter->stopTime)));
itsDPservice->setValue(iter->DPname+".process.stopTime",
GCFPVString(to_simple_string(from_time_t(iter->stopTime))));
itsDPservice->setValue(iter->DPname+".process.processID", GCFPVInteger(0));
iter->startTime = 0;
}
}
//
// _buildProcessMap()
//
// Reconstruct a multimap with all the processes currently running.
//
void SoftwareMonitor::_buildProcessMap()
{
const int STAT_BUFFER_SIZE = 1024;
itsProcessMap.clear();
DIR* procDir = opendir("/proc");
ASSERTSTR(procDir, "Cannot open directory /proc to check programlist");
struct dirent* dirPtr;
while ((dirPtr = readdir(procDir))) {
if (!isdigit(dirPtr->d_name[0])) {
continue;
}
int fd;
char statFile [256];
char statBuffer[STAT_BUFFER_SIZE];
snprintf(statFile, sizeof statFile, "/proc/%s/cmdline", dirPtr->d_name);
if ((fd = open(statFile, O_RDONLY)) != -1) {
if (read(fd, statBuffer, STAT_BUFFER_SIZE-1)) {
itsProcessMap.insert(pair<string,int>(basename(statBuffer), atoi(dirPtr->d_name)));
}
close(fd);
}
}
closedir(procDir);
// Sometimes the list appears to be empty. Allow this several time before warning.
if (itsProcessMap.empty()) {
if (++itsProcMapErrors > MAX_PROCMAP_ERRORS) {
setObjectState(formatString("%s: UNIX returned empty processlist!", getName().c_str()),
PSN_SOFTWARE_MONITOR, RTDB_OBJ_STATE_SUSPICIOUS);
itsProcMapErrors = 0;
}
else {
LOG_WARN_STR("Unix returned empty processlist for the " << itsProcMapErrors << " time");
}
}
else {
itsProcMapErrors = 0;
}
}
//
// _updateObservationMap(obsName, DPname)
//
// Add the given observation to the activeObsMap and add process-entries to the ObsProcList.
//
void SoftwareMonitor::_updateObservationMap(const string& orgName, const string& DPname)
{
// note: orgName: LOFAR_ObsSW_Observation9999
// DPname : LOFAR_ObsSW_TempObs9999
string obsName(orgName); // modifyable copy
ltrim(obsName, string("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_:")); // strip off text
int obsID = atoi(obsName.c_str());
obsMap_t::iterator obsIter = itsObsMap.find(obsID);
if (obsIter != itsObsMap.end()) {
LOG_DEBUG_STR("Obs " << obsID << " already exists in the ObservationMap");
return;
}
// Add a processObject for each observation-bound process to the ObservationProcs.
// We expect for each of these entries a running process.
LOG_INFO_STR("Adding observation " << obsID << " to my administration");
// itsObsMap[obsID] = ObsInfo(DPname, true);
string fullDPname(PVSSDatabaseName()+":"+DPname);
itsObsMap.insert(make_pair(obsID, ObsInfo(fullDPname, true)));
vector<ProcessDef_t>::iterator procDefIter = itsLevelList.begin();
vector<ProcessDef_t>::iterator procDefEnd = itsLevelList.end();
while (procDefIter != procDefEnd) {
if (!procDefIter->permSW) {
itsObsProcs.push_back(Process(procDefIter->name, fullDPname+"_"+procDefIter->name, obsID, procDefIter->level));
LOG_DEBUG_STR("new obs entry:" << procDefIter->name << ", " << fullDPname+"_"+procDefIter->name << ", " << obsID);
}
procDefIter++;
}
}
//
// _constructPermProcsList()
//
// Construct the PermProcs list from the swLevel list.
//
void SoftwareMonitor::_constructPermProcsList()
{
vector<ProcessDef_t>::iterator procDefIter = itsLevelList.begin();
vector<ProcessDef_t>::iterator procDefEnd = itsLevelList.end();
while (procDefIter != procDefEnd) {
if (procDefIter->permSW) {
string DPname(formatString((procDefIter->level == 1) ? "%s:LOFAR_PermSW_Daemons_%s" : "%s:LOFAR_PermSW_%s",
PVSSDatabaseName().c_str(), procDefIter->name.c_str()));
itsPermProcs.push_back(Process(procDefIter->name, DPname, -1, procDefIter->level));
LOG_DEBUG_STR("new perm entry:" << procDefIter->name << ", " << DPname);
}
procDefIter++;
}
}
//
// _isRestartable(procName)
//
bool SoftwareMonitor::_isRestartable(const string& procName)
{
vector<string>::const_iterator iter = itsRestartList.begin();
vector<string>::const_iterator end = itsRestartList.end();
while (iter != end) {
if (*iter == procName) {
return (true);
}
++iter;
}
return (false);
}
//
// _restartProgram(procName)
//
void SoftwareMonitor::_restartProgram(const string& procName)
{
if (!_isRestartable(procName)) {
return;
}
LOG_WARN_STR("Trying to restart program " << procName);
if (system (formatString("swlevel -r %s", procName.c_str()).c_str()) != 0) {
LOG_ERROR_STR("Failed to restart program" << procName);
}
}
//
// _searchObsProcess(pid)
//
// Returns iterator to Obs process with given pid or iter to end.
//
vector<SoftwareMonitor::Process>::iterator SoftwareMonitor::_searchObsProcess(int pid)
{
vector<Process>::iterator opIter = itsObsProcs.begin();
vector<Process>::iterator opEnd = itsObsProcs.end();
while (opIter != opEnd) {
if (opIter->pid == pid) { // known process?
return (opIter);
}
++opIter;
}
return (opEnd);
}
//
// _solveObservationID(pid)
//
// Try to find out the observationnumber from the given pid by analysing the cmdline.
//
int SoftwareMonitor::_solveObservationID(int pid)
{
int fd;
char fileName[256];
char buffer [1024];
int nrBytes;
snprintf(fileName, sizeof fileName, "/proc/%d/cmdline", pid);
if ((fd = open(fileName, O_RDONLY)) == -1) {
LOG_WARN_STR("No observationID found for process " << pid);
return (0);
}
if ((nrBytes = read(fd, buffer, sizeof buffer -1)) > 0) {
buffer[nrBytes] ='\0'; // terminate buffer
for (int i = nrBytes-1; i >= 0; --i) { // replace all zero's with spaces
if (buffer[i] == '\0') {
buffer[i] = ' ';
}
}
char* obsPos = strstr(buffer, "{");
if (obsPos) {
int obsID = 0;
sscanf (obsPos, "{%d}%*s", &obsID);
if (!obsID) {
LOG_WARN_STR("ObservationNr=0 in cmdline: " << buffer);
}
close(fd);
return (obsID);
}
LOG_WARN_STR("No observationID found for pid " << pid << " in:" << buffer);
}
close(fd);
return (0);
}
}; // StationCU
}; // LOFAR