Skip to content
Snippets Groups Projects
Commit 9e49fce6 authored by Jorrit Schaap's avatar Jorrit Schaap
Browse files

Task #9607: added logging in MACScheduler::_updatePlannedList() to identify if...

Task #9607: added logging in MACScheduler::_updatePlannedList() to identify if the missed observations are due to a potential race-condiction in itsOTDBconnection->getTreeGroup and the timeBeforeStart check
parent 7636f3f3
No related branches found
No related tags found
No related merge requests found
......@@ -65,7 +65,7 @@ namespace LOFAR {
// static (this) pointer used for signal handling
static MACScheduler* pMacScheduler = 0;
//
// MACScheduler()
//
......@@ -108,7 +108,7 @@ MACScheduler::MACScheduler() :
}
}
ASSERTSTR(itsMaxPlanned + itsMaxFinished < MAX_CONCURRENT_OBSERVATIONS,
ASSERTSTR(itsMaxPlanned + itsMaxFinished < MAX_CONCURRENT_OBSERVATIONS,
"maxPlannedList + maxFinishedList should be less than " << MAX_CONCURRENT_OBSERVATIONS);
// Read the schedule periods for starting observations.
......@@ -192,12 +192,12 @@ void MACScheduler::_databaseEventHandler(GCFEvent& event)
itsQueuePeriod = newVal;
}
#endif
}
}
break;
default:
break;
}
}
}
......@@ -211,7 +211,7 @@ GCFEvent::TResult MACScheduler::initial_state(GCFEvent& event, GCFPortInterface&
LOG_DEBUG_STR ("initial_state:" << eventName(event));
GCFEvent::TResult status = GCFEvent::HANDLED;
switch (event.signal) {
case F_INIT:
break;
......@@ -235,7 +235,7 @@ GCFEvent::TResult MACScheduler::initial_state(GCFEvent& event, GCFPortInterface&
itsTimerPort->setTimer(0.0);
}
break;
case F_TIMER: { // must be timer that PropSet is enabled.
// update PVSS.
LOG_TRACE_FLOW ("Updateing state to PVSS");
......@@ -249,7 +249,7 @@ GCFEvent::TResult MACScheduler::initial_state(GCFEvent& event, GCFPortInterface&
itsPropertySet->setValue(PN_MS_PLANNED_OBSERVATIONS, GCFPVDynArr(LPT_STRING, emptyArr));
itsPropertySet->setValue(PN_MS_FINISHED_OBSERVATIONS, GCFPVDynArr(LPT_STRING, emptyArr));
// Try to connect to the SAS database.
ParameterSet* pParamSet = globalParameterSet();
string username = pParamSet->getString("OTDBusername");
......@@ -261,7 +261,7 @@ GCFEvent::TResult MACScheduler::initial_state(GCFEvent& event, GCFPortInterface&
itsOTDBconnection= new OTDBconnection(username, password, DBname, hostname);
ASSERTSTR (itsOTDBconnection, "Memory allocation error (OTDB)");
ASSERTSTR (itsOTDBconnection->connect(),
"Unable to connect to database " << DBname << " on " << hostname <<
"Unable to connect to database " << DBname << " on " << hostname <<
" using " << username << "," << password);
LOG_INFO ("Connected to the OTDB");
itsPropertySet->setValue(PN_MS_OTDB_CONNECTED, GCFPVBool(true));
......@@ -285,12 +285,12 @@ GCFEvent::TResult MACScheduler::initial_state(GCFEvent& event, GCFPortInterface&
case F_DISCONNECTED:
break;
default:
LOG_DEBUG ("MACScheduler::initial, default");
status = GCFEvent::NOT_HANDLED;
break;
}
}
return (status);
}
......@@ -320,15 +320,15 @@ GCFEvent::TResult MACScheduler::recover_state(GCFEvent& event, GCFPortInterface&
// TODO: do recovery
TRAN(MACScheduler::active_state);
break;
}
default:
LOG_DEBUG("recover_state, default");
status = GCFEvent::NOT_HANDLED;
break;
}
}
return (status);
}
......@@ -349,7 +349,7 @@ GCFEvent::TResult MACScheduler::active_state(GCFEvent& event, GCFPortInterface&
break;
case F_ENTRY: {
// install my own signal handler. GCFTask also installs a handler so we have
// install my own signal handler. GCFTask also installs a handler so we have
// to install our handler later than the GCFTask handler.
pMacScheduler = this;
signal (SIGINT, MACScheduler::sigintHandler); // ctrl-c
......@@ -367,12 +367,12 @@ GCFEvent::TResult MACScheduler::active_state(GCFEvent& event, GCFPortInterface&
case F_ACCEPT_REQ:
break;
case F_CONNECTED:
case F_CONNECTED:
// Should be from the (lost) connection with the SD
_connectedHandler(port);
break;
case F_DISCONNECTED:
case F_DISCONNECTED:
// Can be from StartDaemon or ObsController.
// StartDaemon: trouble! Try to reconnect asap.
// ObsController: ok when obs is finished, BIG TROUBLE when not!
......@@ -432,7 +432,7 @@ GCFEvent::TResult MACScheduler::active_state(GCFEvent& event, GCFPortInterface&
// -------------------- EVENTS FROM CHILDCONTROL --------------------
//
// That must be events from the ObservationControllers that are currently
// That must be events from the ObservationControllers that are currently
// started or running.
//
case CONTROL_STARTED: {
......@@ -562,16 +562,16 @@ GCFEvent::TResult MACScheduler::finishing_state(GCFEvent& event, GCFPortInterfac
itsTimerPort->setTimer(1L);
break;
}
case F_TIMER:
GCFScheduler::instance()->stop();
break;
default:
LOG_DEBUG("finishing_state, default");
status = GCFEvent::NOT_HANDLED;
break;
}
}
return (status);
}
......@@ -624,25 +624,27 @@ void MACScheduler::_doOTDBcheck()
//
void MACScheduler::_updatePlannedList()
{
LOG_DEBUG("_updatePlannedList()");
LOG_INFO("_updatePlannedList()");
// get time info
time_t now = time(0);
ptime currentTime = from_time_t(now);
ASSERTSTR (currentTime != not_a_date_time, "Can't determine systemtime, bailing out");
LOG_INFO_STR("calling getTreeGroup");
// get new list (list is ordered on starttime) of planned observations
vector<OTDBtree> plannedDBlist = itsOTDBconnection->getTreeGroup(1, itsPlannedPeriod, itsExclPLcluster);
vector<OTDBtree> plannedDBlist = itsOTDBconnection->getTreeGroup(1, itsPlannedPeriod, itsExclPLcluster);
LOG_INFO_STR("called getTreeGroup, nr of items in plannedDBlist: " << plannedDBlist.size());
if (!plannedDBlist.empty()) {
LOG_DEBUG(formatString("OTDBCheck:First planned observation (%d) is at %s (active over %d seconds)",
plannedDBlist[0].treeID(), to_simple_string(plannedDBlist[0].starttime).c_str(),
LOG_DEBUG(formatString("OTDBCheck:First planned observation (%d) is at %s (active over %d seconds)",
plannedDBlist[0].treeID(), to_simple_string(plannedDBlist[0].starttime).c_str(),
time_duration(plannedDBlist[0].starttime - currentTime).total_seconds()));
}
// NOTE: do not exit routine on emptylist: we need to write an empty list to clear the DB
// make a copy of the current prepared observations (= observations shown in the navigator in the 'future'
// list). By eliminating the observations that are in the current SAS list we end up (at the end of this function)
// list). By eliminating the observations that are in the current SAS list we end up (at the end of this function)
// with a list of observations that were in the SASlist the last time but now not anymore. Normally those observations
// will appear in the active-list and will be removed there from the prepared list but WHEN AN OPERATOR CHANGES
// THE STATUS MANUALLY into something different (e.g. ON-HOLD) the observation stays in the preparedlist.
......@@ -672,14 +674,14 @@ void MACScheduler::_updatePlannedList()
// must we claim this observation at the claimMgr?
OLiter prepIter = itsPreparedObs.find(obsID);
if ((prepIter == itsPreparedObs.end()) || (prepIter->second.prepReady == false) ||
if ((prepIter == itsPreparedObs.end()) || (prepIter->second.prepReady == false) ||
(prepIter->second.modTime != modTime)) {
// create a ParameterFile for this Observation
TreeMaintenance tm(itsOTDBconnection);
OTDBnode topNode = tm.getTopNode(obsID);
string filename(observationParset(obsID));
if (!tm.exportTree(obsID, topNode.nodeID(), filename)) {
LOG_ERROR_STR ("Cannot create ParameterSet '" << filename <<
LOG_ERROR_STR ("Cannot create ParameterSet '" << filename <<
"' for new observation. Observation CANNOT BE STARTED!");
}
else {
......@@ -694,10 +696,14 @@ void MACScheduler::_updatePlannedList()
// otherwise thing will go wrong in the Navigator
plannedArr.push_back(new GCFPVString(obsName));
}
// should this observation (have) be(en) started?
int timeBeforeStart = time_duration(plannedDBlist[idx].starttime - currentTime).total_seconds();
// LOG_DEBUG_STR(obsName << " starts over " << timeBeforeStart << " seconds");
LOG_INFO(formatString("%s starts at %s which is in %d seconds",
obsName.c_str(),
to_simple_string(plannedDBlist[idx].starttime).c_str(),
timeBeforeStart));
if (timeBeforeStart > 0 && timeBeforeStart <= (int)itsQueuePeriod) {
if (itsPreparedObs[obsID].prepReady == false) {
LOG_INFO_STR("Observation " << obsID << " must be started but is not claimed yet.");
......@@ -710,22 +716,22 @@ void MACScheduler::_updatePlannedList()
string cntlrName(controllerName(CNTLRTYPE_OBSERVATIONCTRL, 0, obsID));
if (itsControllerMap.find(cntlrName) == itsControllerMap.end()) {
LOG_INFO_STR("Requesting start of " << cntlrName);
itsChildControl->startChild(CNTLRTYPE_OBSERVATIONCTRL,
obsID,
itsChildControl->startChild(CNTLRTYPE_OBSERVATIONCTRL,
obsID,
0, // instanceNr
myHostname(true));
// Note: controller is now in state NO_STATE/CONNECTED (C/R)
// add controller to our 'monitor' administration
itsControllerMap[cntlrName] = obsID;
LOG_DEBUG_STR("itsControllerMap[" << cntlrName << "]=" << obsID);
LOG_INFO_STR("itsControllerMap[" << cntlrName << "]=" << obsID);
if (!itsPreparedObs[obsID].parsetDistributed) {
_setParsetOnMsgBus(observationParset(obsID));
itsPreparedObs[obsID].parsetDistributed = true;
}
}
else {
LOG_DEBUG_STR("Observation " << obsID << " is already (being) started");
LOG_INFO_STR("Observation " << obsID << " is already (being) started");
}
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment