From 9e49fce628d49673b94fe7e7eb8d22aa4b1e0e8c Mon Sep 17 00:00:00 2001
From: Jorrit Schaap <schaap@astron.nl>
Date: Wed, 6 Jul 2016 14:49:12 +0000
Subject: [PATCH] Task #9607: added logging in
 MACScheduler::_updatePlannedList() to identify if the missed observations are
 due to a potential race-condiction in itsOTDBconnection->getTreeGroup and the
 timeBeforeStart check

---
 .../MainCU/src/MACScheduler/MACScheduler.cc   | 72 ++++++++++---------
 1 file changed, 39 insertions(+), 33 deletions(-)

diff --git a/MAC/APL/MainCU/src/MACScheduler/MACScheduler.cc b/MAC/APL/MainCU/src/MACScheduler/MACScheduler.cc
index 268a664f815..2b1be5279aa 100644
--- a/MAC/APL/MainCU/src/MACScheduler/MACScheduler.cc
+++ b/MAC/APL/MainCU/src/MACScheduler/MACScheduler.cc
@@ -65,7 +65,7 @@ namespace LOFAR {
 
 // static (this) pointer used for signal handling
 static MACScheduler* pMacScheduler = 0;
-	
+
 //
 // MACScheduler()
 //
@@ -108,7 +108,7 @@ MACScheduler::MACScheduler() :
 		}
 	}
 
-	ASSERTSTR(itsMaxPlanned + itsMaxFinished < MAX_CONCURRENT_OBSERVATIONS, 
+	ASSERTSTR(itsMaxPlanned + itsMaxFinished < MAX_CONCURRENT_OBSERVATIONS,
 				"maxPlannedList + maxFinishedList should be less than " << MAX_CONCURRENT_OBSERVATIONS);
 
 	// Read the schedule periods for starting observations.
@@ -192,12 +192,12 @@ void MACScheduler::_databaseEventHandler(GCFEvent& event)
 			itsQueuePeriod = newVal;
 		}
 #endif
-	}  
+	}
 	break;
 
 	default:
 		break;
-	}  
+	}
 }
 
 
@@ -211,7 +211,7 @@ GCFEvent::TResult MACScheduler::initial_state(GCFEvent& event, GCFPortInterface&
 	LOG_DEBUG_STR ("initial_state:" << eventName(event));
 
 	GCFEvent::TResult status = GCFEvent::HANDLED;
-  
+
 	switch (event.signal) {
     case F_INIT:
    		break;
@@ -235,7 +235,7 @@ GCFEvent::TResult MACScheduler::initial_state(GCFEvent& event, GCFPortInterface&
 			itsTimerPort->setTimer(0.0);
         }
 		break;
-	  
+
 	case F_TIMER: {		// must be timer that PropSet is enabled.
 		// update PVSS.
 		LOG_TRACE_FLOW ("Updateing state to PVSS");
@@ -249,7 +249,7 @@ GCFEvent::TResult MACScheduler::initial_state(GCFEvent& event, GCFPortInterface&
 		itsPropertySet->setValue(PN_MS_PLANNED_OBSERVATIONS,  GCFPVDynArr(LPT_STRING, emptyArr));
 		itsPropertySet->setValue(PN_MS_FINISHED_OBSERVATIONS, GCFPVDynArr(LPT_STRING, emptyArr));
 
-      
+
 		// Try to connect to the SAS database.
 		ParameterSet* pParamSet = globalParameterSet();
 		string username	= pParamSet->getString("OTDBusername");
@@ -261,7 +261,7 @@ GCFEvent::TResult MACScheduler::initial_state(GCFEvent& event, GCFPortInterface&
 		itsOTDBconnection= new OTDBconnection(username, password, DBname, hostname);
 		ASSERTSTR (itsOTDBconnection, "Memory allocation error (OTDB)");
 		ASSERTSTR (itsOTDBconnection->connect(),
-					"Unable to connect to database " << DBname << " on " << hostname << 
+					"Unable to connect to database " << DBname << " on " << hostname <<
 					" using " << username << "," << password);
 		LOG_INFO ("Connected to the OTDB");
 		itsPropertySet->setValue(PN_MS_OTDB_CONNECTED, GCFPVBool(true));
@@ -285,12 +285,12 @@ GCFEvent::TResult MACScheduler::initial_state(GCFEvent& event, GCFPortInterface&
 
 	case F_DISCONNECTED:
 		break;
-	
+
 	default:
 		LOG_DEBUG ("MACScheduler::initial, default");
 		status = GCFEvent::NOT_HANDLED;
 		break;
-	}    
+	}
 	return (status);
 }
 
@@ -320,15 +320,15 @@ GCFEvent::TResult MACScheduler::recover_state(GCFEvent& event, GCFPortInterface&
 		// TODO: do recovery
 
 		TRAN(MACScheduler::active_state);
-		
+
 		break;
 	}
-  
+
 	default:
 		LOG_DEBUG("recover_state, default");
 		status = GCFEvent::NOT_HANDLED;
 		break;
-	}    
+	}
 	return (status);
 }
 
@@ -349,7 +349,7 @@ GCFEvent::TResult MACScheduler::active_state(GCFEvent& event, GCFPortInterface&
 		break;
 
 	case F_ENTRY: {
-  	    // install my own signal handler. GCFTask also installs a handler so we have 
+  	    // install my own signal handler. GCFTask also installs a handler so we have
 		// to install our handler later than the GCFTask handler.
 	    pMacScheduler = this;
 		signal (SIGINT, MACScheduler::sigintHandler);	// ctrl-c
@@ -367,12 +367,12 @@ GCFEvent::TResult MACScheduler::active_state(GCFEvent& event, GCFPortInterface&
 	case F_ACCEPT_REQ:
 		break;
 
-	case F_CONNECTED:	
+	case F_CONNECTED:
 		// Should be from the (lost) connection with the SD
 		_connectedHandler(port);
 		break;
 
-	case F_DISCONNECTED:	
+	case F_DISCONNECTED:
 		// Can be from StartDaemon or ObsController.
 		// StartDaemon: trouble! Try to reconnect asap.
 		// ObsController: ok when obs is finished, BIG TROUBLE when not!
@@ -432,7 +432,7 @@ GCFEvent::TResult MACScheduler::active_state(GCFEvent& event, GCFPortInterface&
 
 	// -------------------- EVENTS FROM CHILDCONTROL --------------------
 	//
-	// That must be events from the ObservationControllers that are currently 
+	// That must be events from the ObservationControllers that are currently
 	// started or running.
 	//
 	case CONTROL_STARTED: {
@@ -562,16 +562,16 @@ GCFEvent::TResult MACScheduler::finishing_state(GCFEvent& event, GCFPortInterfac
 		itsTimerPort->setTimer(1L);
 		break;
 	}
-  
+
     case F_TIMER:
       GCFScheduler::instance()->stop();
       break;
-    
+
 	default:
 		LOG_DEBUG("finishing_state, default");
 		status = GCFEvent::NOT_HANDLED;
 		break;
-	}    
+	}
 	return (status);
 }
 
@@ -624,25 +624,27 @@ void MACScheduler::_doOTDBcheck()
 //
 void MACScheduler::_updatePlannedList()
 {
-	LOG_DEBUG("_updatePlannedList()");
+	LOG_INFO("_updatePlannedList()");
 
 	// get time info
 	time_t	now = time(0);
 	ptime	currentTime = from_time_t(now);
 	ASSERTSTR (currentTime != not_a_date_time, "Can't determine systemtime, bailing out");
 
+        LOG_INFO_STR("calling getTreeGroup");
 	// get new list (list is ordered on starttime) of planned observations
-	vector<OTDBtree> plannedDBlist = itsOTDBconnection->getTreeGroup(1, itsPlannedPeriod, itsExclPLcluster);	
+	vector<OTDBtree> plannedDBlist = itsOTDBconnection->getTreeGroup(1, itsPlannedPeriod, itsExclPLcluster);
+        LOG_INFO_STR("called getTreeGroup, nr of items in plannedDBlist: " << plannedDBlist.size());
 
 	if (!plannedDBlist.empty()) {
-		LOG_DEBUG(formatString("OTDBCheck:First planned observation (%d) is at %s (active over %d seconds)", 
-				plannedDBlist[0].treeID(), to_simple_string(plannedDBlist[0].starttime).c_str(), 
+		LOG_DEBUG(formatString("OTDBCheck:First planned observation (%d) is at %s (active over %d seconds)",
+				plannedDBlist[0].treeID(), to_simple_string(plannedDBlist[0].starttime).c_str(),
 				time_duration(plannedDBlist[0].starttime - currentTime).total_seconds()));
 	}
 	// NOTE: do not exit routine on emptylist: we need to write an empty list to clear the DB
 
 	// make a copy of the current prepared observations (= observations shown in the navigator in the 'future'
-	// list). By eliminating the observations that are in the current SAS list we end up (at the end of this function) 
+	// list). By eliminating the observations that are in the current SAS list we end up (at the end of this function)
 	// with a list of observations that were in the SASlist the last time but now not anymore. Normally those observations
 	// will appear in the active-list and will be removed there from the prepared list but WHEN AN OPERATOR CHANGES
 	// THE STATUS MANUALLY into something different (e.g. ON-HOLD) the observation stays in the preparedlist.
@@ -672,14 +674,14 @@ void MACScheduler::_updatePlannedList()
 
 		// must we claim this observation at the claimMgr?
 		OLiter	prepIter = itsPreparedObs.find(obsID);
-		if ((prepIter == itsPreparedObs.end()) || (prepIter->second.prepReady == false) || 
+		if ((prepIter == itsPreparedObs.end()) || (prepIter->second.prepReady == false) ||
 												  (prepIter->second.modTime != modTime)) {
 			// create a ParameterFile for this Observation
 			TreeMaintenance		tm(itsOTDBconnection);
 			OTDBnode			topNode = tm.getTopNode(obsID);
 			string				filename(observationParset(obsID));
 			if (!tm.exportTree(obsID, topNode.nodeID(), filename)) {
-				LOG_ERROR_STR ("Cannot create ParameterSet '" << filename << 
+				LOG_ERROR_STR ("Cannot create ParameterSet '" << filename <<
 								"' for new observation. Observation CANNOT BE STARTED!");
 			}
 			else {
@@ -694,10 +696,14 @@ void MACScheduler::_updatePlannedList()
 			// otherwise thing will go wrong in the Navigator
 			plannedArr.push_back(new GCFPVString(obsName));
 		}
-	
+
 		// should this observation (have) be(en) started?
 		int		timeBeforeStart = time_duration(plannedDBlist[idx].starttime - currentTime).total_seconds();
-//		LOG_DEBUG_STR(obsName << " starts over " << timeBeforeStart << " seconds");
+                LOG_INFO(formatString("%s starts at %s which is in %d seconds",
+                                obsName.c_str(),
+                                to_simple_string(plannedDBlist[idx].starttime).c_str(),
+                                timeBeforeStart));
+
 		if (timeBeforeStart > 0 && timeBeforeStart <= (int)itsQueuePeriod) {
 			if (itsPreparedObs[obsID].prepReady == false) {
 				LOG_INFO_STR("Observation " << obsID << " must be started but is not claimed yet.");
@@ -710,22 +716,22 @@ void MACScheduler::_updatePlannedList()
 				string	cntlrName(controllerName(CNTLRTYPE_OBSERVATIONCTRL, 0, obsID));
 				if (itsControllerMap.find(cntlrName) == itsControllerMap.end()) {
 					LOG_INFO_STR("Requesting start of " << cntlrName);
-					itsChildControl->startChild(CNTLRTYPE_OBSERVATIONCTRL, 
-												obsID, 
+					itsChildControl->startChild(CNTLRTYPE_OBSERVATIONCTRL,
+												obsID,
 												0,		// instanceNr
 												myHostname(true));
 					// Note: controller is now in state NO_STATE/CONNECTED (C/R)
 
 					// add controller to our 'monitor' administration
 					itsControllerMap[cntlrName] =  obsID;
-					LOG_DEBUG_STR("itsControllerMap[" << cntlrName << "]=" <<  obsID);
+					LOG_INFO_STR("itsControllerMap[" << cntlrName << "]=" <<  obsID);
 					if (!itsPreparedObs[obsID].parsetDistributed) {
 						_setParsetOnMsgBus(observationParset(obsID));
 						itsPreparedObs[obsID].parsetDistributed = true;
 					}
 				}
 				else {
-					LOG_DEBUG_STR("Observation " << obsID << " is already (being) started");
+					LOG_INFO_STR("Observation " << obsID << " is already (being) started");
 				}
 			}
 		}
-- 
GitLab