From 6fdcff71bc3e5935e567b265390e9e4eb2ccb057 Mon Sep 17 00:00:00 2001
From: Ruud Overeem <overeem@astron.nl>
Date: Thu, 28 Apr 2011 12:21:33 +0000
Subject: [PATCH] Bug 1000: + Update to PythonControl to allow clients that can
 not communicate. + Added 'result' field to CONTROL_CONNECT message to pass
 failures. + Fixed stopping garbagecontroller timer in ChildControl + Fixed
 emergency stop timer in ParentControl + ObservationControl reports more
 detailed how an observation ends.

---
 .../include/APL/APLCommon/ControllerDefines.h |  1 +
 MAC/APL/APLCommon/src/ChildControl.cc         | 23 +++---
 .../APLCommon/src/Controller_Protocol.prot    |  4 +
 MAC/APL/APLCommon/src/ParentControl.cc        | 17 ++--
 .../CEPCU/src/PythonControl/PythonControl.cc  | 77 +++++++++++++++----
 .../CEPCU/src/PythonControl/PythonControl.h   |  2 +
 .../MainCU/src/MACScheduler/MACScheduler.cc   |  4 +-
 .../ObservationControl/ObservationControl.cc  | 45 ++++++++---
 .../src/ClockControl/ClockControl.cc          |  2 +-
 MAC/Deployment/data/OTDB/PythonControl.comp   |  1 +
 10 files changed, 133 insertions(+), 43 deletions(-)

diff --git a/MAC/APL/APLCommon/include/APL/APLCommon/ControllerDefines.h b/MAC/APL/APLCommon/include/APL/APLCommon/ControllerDefines.h
index 0ee00ff74ef..1641cb3a2f1 100644
--- a/MAC/APL/APLCommon/include/APL/APLCommon/ControllerDefines.h
+++ b/MAC/APL/APLCommon/include/APL/APLCommon/ControllerDefines.h
@@ -51,6 +51,7 @@ enum
 	CT_RESULT_OUT_OF_SYNC,
 	CT_RESULT_OBS_CONFLICT,
 	CT_RESULT_MANUAL_ABORT,
+	CT_RESULT_MANUAL_REMOVED,
 	CT_RESULT_NO_PARSET,
 	CT_RESULT_UNKNOWN_OBSERVATION,
   CT_RESULT_UNSPECIFIED
diff --git a/MAC/APL/APLCommon/src/ChildControl.cc b/MAC/APL/APLCommon/src/ChildControl.cc
index 3ace53bfd35..23dcdbd904b 100644
--- a/MAC/APL/APLCommon/src/ChildControl.cc
+++ b/MAC/APL/APLCommon/src/ChildControl.cc
@@ -966,23 +966,19 @@ void ChildControl::_doGarbageCollection()
 
 	CIiter			iter  = itsCntlrList->begin();
 	const_CIiter	end   = itsCntlrList->end();
+	bool			restartTimer(false);
 	while (iter != end) {
 		// Note: Removing a controller is done in two stages.
 		// 1: port == 0: inform main task about removal after retry interval expired
 		// 2: port == -1: remove from list
 		// This is necc. because main task may poll childcontrol for results.
 		if (!iter->port) {
+			restartTimer = true;
 			LOG_DEBUG_STR(time(0)<<"-"<<iter->requestTime<<">="<<itsStartupRetryInterval<<"*"<<itsMaxStartupRetries<<"?");
 			if ((uint32(time(0)-iter->requestTime)) >= itsStartupRetryInterval*itsMaxStartupRetries) {
 				LOG_DEBUG_STR ("Controller " << iter->cntlrName << " is still unreachable, informing main task");
 				_setEstablishedState(iter->cntlrName, CTState::QUITED, time(0), CT_RESULT_LOST_CONNECTION);
 				iter->port = (GCFPortInterface*) -1;
-				// start timer for second stage.
-				if (itsGarbageTimer) {
-					itsTimerPort.cancelTimer(itsGarbageTimer);
-				}
-				itsGarbageTimer = itsTimerPort.setTimer(1.0 * itsGarbageInterval);
-				LOG_DEBUG_STR("GarbageTimer = " << itsGarbageTimer);
 			}
 			iter++;
 		} else if (iter->port == (GCFPortInterface*)-1) {
@@ -995,6 +991,15 @@ void ChildControl::_doGarbageCollection()
 			iter++;
 		}
 	}
+
+	// restart the garbage timer when more work needs to be done in the future.
+	if (restartTimer) {
+		if (itsGarbageTimer) {
+			itsTimerPort.cancelTimer(itsGarbageTimer);
+		}
+		itsGarbageTimer = itsTimerPort.setTimer(1.0 * itsGarbageInterval);
+		LOG_DEBUG_STR("GarbageTimer = " << itsGarbageTimer);
+	}
 }
 
 // -------------------- STATE MACHINES FOR GCFTASK --------------------
@@ -1206,14 +1211,12 @@ GCFEvent::TResult	ChildControl::operational(GCFEvent&			event,
 
 			CIiter	controller = findController(msg.cntlrName);
 			if (controller == itsCntlrList->end()) {		// not found?
-				LOG_WARN_STR ("CONNECT event received from unknown controller: " <<
-							  msg.cntlrName);
+				LOG_WARN_STR ("CONNECT event received from unknown controller: " << msg.cntlrName);
 				answer.result = CT_RESULT_UNSPECIFIED;
 			}
 			else {
 				LOG_DEBUG_STR("CONNECT event received from " << msg.cntlrName);
-				// we don't have a result field in the Connect message, return NO_ERROR
-				_setEstablishedState(msg.cntlrName, CTState::CONNECTED, time(0), CT_RESULT_NO_ERROR);
+				_setEstablishedState(msg.cntlrName, CTState::CONNECTED, time(0), msg.result);
 				// first direct contact with controller, remember port
 				controller->port = &port;
 				answer.result = CT_RESULT_NO_ERROR;
diff --git a/MAC/APL/APLCommon/src/Controller_Protocol.prot b/MAC/APL/APLCommon/src/Controller_Protocol.prot
index f0cfc25e471..293a1ee0105 100644
--- a/MAC/APL/APLCommon/src/Controller_Protocol.prot
+++ b/MAC/APL/APLCommon/src/Controller_Protocol.prot
@@ -104,6 +104,10 @@ event = {
 		name = "cntlrName";
 		type = "string";
 	};
+	param = {
+		name = "result";
+		type = "uint16";
+	};
 };
 
 //
diff --git a/MAC/APL/APLCommon/src/ParentControl.cc b/MAC/APL/APLCommon/src/ParentControl.cc
index f5a5d89b3d1..73a444a9be8 100644
--- a/MAC/APL/APLCommon/src/ParentControl.cc
+++ b/MAC/APL/APLCommon/src/ParentControl.cc
@@ -429,6 +429,7 @@ void ParentControl::_doRequestedAction(PIiter	parent)
 			// construct and send message
 			CONTROLConnectEvent		hello;
 			hello.cntlrName = parent->name;
+			hello.result    = CT_RESULT_NO_ERROR;
 			parent->port->send(hello);
 
 			// (re)set the parameters of this connection
@@ -693,7 +694,7 @@ GCFEvent::TResult	ParentControl::operational(GCFEvent&			event,
 			LOG_TRACE_VAR_STR("timerID:" << timerEvent.id);
 			PIiter				parent = findParentOnTimerID(timerEvent.id, &timerType);
 			if (!isParent(parent)) {
-				LOG_DEBUG ("timerevent is not of a known parent, ignore");
+				LOG_WARN ("Timerevent is not of a known parent, ignore");
 				break;
 			}
 
@@ -730,10 +731,14 @@ GCFEvent::TResult	ParentControl::operational(GCFEvent&			event,
 				}
 			}
 			else {
-				LOG_WARN_STR ("Could not reconnect to parent " << 
-							parent->name << ", deleting entry");
-//					concrete_release(parent);
-				itsParentList.erase(parent);
+				LOG_WARN_STR ("Could not reconnect to parent " << parent->name << ", deleting entry");
+				if (parent->stopTimer) {
+					itsTimerPort.cancelTimer(parent->stopTimer);
+					parent->stopTimer = itsTimerPort.setTimer(0.0);
+				}
+				else {
+					itsParentList.erase(parent);
+				}
 			}
 		}
 		break;
@@ -784,6 +789,7 @@ GCFEvent::TResult	ParentControl::operational(GCFEvent&			event,
 		LOG_DEBUG_STR("Sending CONNECT(" << parent.name << ") event to maintask");
 		CONTROLConnectEvent		request;
 		request.cntlrName = parent.name;
+		request.result	  = CT_RESULT_NO_ERROR;
 		itsMainTaskPort->sendBack(request);
 		}
 		break;
@@ -800,6 +806,7 @@ GCFEvent::TResult	ParentControl::operational(GCFEvent&			event,
 			}
 			CONTROLConnectEvent	outMsg;
 			outMsg.cntlrName = inMsg.cntlrName;
+			outMsg.result    = inMsg.result;
 			parent->port->send(outMsg);
 			parent->currentState = CTState::CONNECT;
 			LOG_DEBUG_STR("Forwarding CONTROL_CONNECT to parent " << inMsg.cntlrName);
diff --git a/MAC/APL/CEPCU/src/PythonControl/PythonControl.cc b/MAC/APL/CEPCU/src/PythonControl/PythonControl.cc
index de981ae3191..2d9ceec5dbc 100644
--- a/MAC/APL/CEPCU/src/PythonControl/PythonControl.cc
+++ b/MAC/APL/CEPCU/src/PythonControl/PythonControl.cc
@@ -29,17 +29,16 @@
 //#include <Common/lofar_string.h>
 #include <Common/ParameterSet.h>
 #include <Common/Exceptions.h>
-#include <ApplCommon/StationInfo.h>
-#include <GCF/PVSS/GCF_PVTypes.h>
 #include <Common/SystemUtil.h>
+#include <ApplCommon/StationInfo.h>
 #include <MACIO/MACServiceInfo.h>
 #include <GCF/TM/GCF_Protocols.h>
+#include <GCF/PVSS/GCF_PVTypes.h>
+#include <GCF/RTDB/DP_Protocol.ph>
 #include <APL/APLCommon/APL_Defines.h>
 #include <APL/APLCommon/APLUtilities.h>
 #include <APL/APLCommon/Controller_Protocol.ph>
-#include <APL/APLCommon/APLUtilities.h>
 #include <APL/APLCommon/CTState.h>
-#include <GCF/RTDB/DP_Protocol.ph>
 
 #include "PythonControl.h"
 #include "PVSSDatapointDefs.h"
@@ -264,9 +263,10 @@ GCFEvent::TResult PythonControl::initial_state(GCFEvent& event,
 
 		// request from parent task to start up the child side.
 		ParameterSet*   thePS  = globalParameterSet();      // shortcut to global PS.
-		string  myPrefix(thePS->locateModule("PythonControl")+"PythonControl.");
-		string	pythonProg(thePS->getString(myPrefix+"pythonProgram", "@pythonProgram@"));
-		string	pythonHost(thePS->getString(myPrefix+"pythonHost", "@pythonHost@"));
+		string  myPrefix        (thePS->locateModule("PythonControl")+"PythonControl.");
+		string	pythonProg      (thePS->getString(myPrefix+"pythonProgram",  "@pythonProgram@"));
+		string	pythonHost      (thePS->getString(myPrefix+"pythonHost",     "@pythonHost@"));
+		itsChildCanCommunicate = thePS->getBool  (myPrefix+"canCommunicate", true);
 		// START PYTHON
 		bool	startOK = _startPython(pythonProg, getObservationNr(getName()), realHostname(pythonHost), itsListener->makeServiceName());
 		if (!startOK) {
@@ -278,8 +278,18 @@ GCFEvent::TResult PythonControl::initial_state(GCFEvent& event,
 			TRAN(PythonControl::finishing_state);
 		}
 		else {
-			LOG_DEBUG ("Started Python environment, going to waitForConnection state");
-			TRAN(PythonControl::waitForConnection_state);
+			if (itsChildCanCommunicate) {
+				LOG_DEBUG ("Started Python environment, going to waitForConnection state");
+				TRAN(PythonControl::waitForConnection_state);
+			}
+			else {
+				LOG_WARN ("Started Python environment, CHILD CANNOT COMMUNICATE, FAKING RESPONSES!!!");
+				CONTROLConnectedEvent	answer;
+				answer.cntlrName = itsMyName;
+				answer.result = CT_RESULT_NO_ERROR;
+				itsParentPort->send(answer);
+				TRAN(PythonControl::operational_state);
+			}
 		}
 	}
 	break;
@@ -426,42 +436,79 @@ GCFEvent::TResult PythonControl::operational_state(GCFEvent& event, GCFPortInter
 	case CONTROL_CLAIM: {
 		CONTROLClaimEvent		msg(event);
 		LOG_DEBUG_STR("Received CLAIM(" << msg.cntlrName << ")");
-		itsPythonPort->send(msg);
+		if (itsChildCanCommunicate) {
+			itsPythonPort->send(msg);
+		}
+		else {
+			LOG_WARN("Sending FAKE Claim response");
+			sendControlResult(*itsParentPort, event.signal, itsMyName, CT_RESULT_NO_ERROR);
+		}
 		break;
 	}
 
 	case CONTROL_PREPARE: {
 		CONTROLPrepareEvent		msg(event);
 		LOG_DEBUG_STR("Received PREPARE(" << msg.cntlrName << ")");
-		itsPythonPort->send(msg);
+		if (itsChildCanCommunicate) {
+			itsPythonPort->send(msg);
+		}
+		else {
+			LOG_WARN("Sending FAKE Prepare response");
+			sendControlResult(*itsParentPort, event.signal, itsMyName, CT_RESULT_NO_ERROR);
+		}
 		break;
 	}
 
 	case CONTROL_RESUME: {
 		CONTROLResumeEvent		msg(event);
 		LOG_DEBUG_STR("Received RESUME(" << msg.cntlrName << ")");
-		itsPythonPort->send(msg);
+		if (itsChildCanCommunicate) {
+			itsPythonPort->send(msg);
+		}
+		else {
+			LOG_WARN("Sending FAKE Resume response");
+			sendControlResult(*itsParentPort, event.signal, itsMyName, CT_RESULT_NO_ERROR);
+		}
 		break;
 	}
 
 	case CONTROL_SUSPEND: {
 		CONTROLSuspendEvent		msg(event);
 		LOG_DEBUG_STR("Received SUSPEND(" << msg.cntlrName << ")");
-		itsPythonPort->send(msg);
+		if (itsChildCanCommunicate) {
+			itsPythonPort->send(msg);
+		}
+		else {
+			LOG_WARN("Sending FAKE Suspend response");
+			sendControlResult(*itsParentPort, event.signal, itsMyName, CT_RESULT_NO_ERROR);
+		}
 		break;
 	}
 
 	case CONTROL_RELEASE: {
 		CONTROLReleaseEvent		msg(event);
 		LOG_DEBUG_STR("Received RELEASE(" << msg.cntlrName << ")");
-		itsPythonPort->send(msg);
+		if (itsChildCanCommunicate) {
+			itsPythonPort->send(msg);
+		}
+		else {
+			LOG_WARN("Sending FAKE Release response");
+			sendControlResult(*itsParentPort, event.signal, itsMyName, CT_RESULT_NO_ERROR);
+		}
 		break;
 	}
 
 	case CONTROL_QUIT: {
 		CONTROLQuitEvent		msg(event);
 		LOG_DEBUG_STR("Received QUIT(" << msg.cntlrName << ")");
-		itsPythonPort->send(msg);
+		if (itsChildCanCommunicate) {
+			itsPythonPort->send(msg);
+		}
+		else {
+			LOG_WARN("Sending FAKE Quit response and quiting application");
+			sendControlResult(*itsParentPort, event.signal, itsMyName, CT_RESULT_NO_ERROR);
+			TRAN(PythonControl::finishing_state);
+		}
 		break;
 	}
 
diff --git a/MAC/APL/CEPCU/src/PythonControl/PythonControl.h b/MAC/APL/CEPCU/src/PythonControl/PythonControl.h
index a2848dfa15f..4ce3fff2988 100644
--- a/MAC/APL/CEPCU/src/PythonControl/PythonControl.h
+++ b/MAC/APL/CEPCU/src/PythonControl/PythonControl.h
@@ -103,6 +103,8 @@ private:
 
 	GCFTCPPort*				itsPythonPort;
 	string					itsPythonName;
+	// Until the python pipeline can communicate we can 'fake' the communication by setting the variable to FALSE
+	bool					itsChildCanCommunicate;
 
 	CTState::CTstateNr		itsState;
 
diff --git a/MAC/APL/MainCU/src/MACScheduler/MACScheduler.cc b/MAC/APL/MainCU/src/MACScheduler/MACScheduler.cc
index 108e4513ca5..5a3cac41cc4 100644
--- a/MAC/APL/MainCU/src/MACScheduler/MACScheduler.cc
+++ b/MAC/APL/MainCU/src/MACScheduler/MACScheduler.cc
@@ -459,6 +459,7 @@ GCFEvent::TResult MACScheduler::active_state(GCFEvent& event, GCFPortInterface&
 		}
 		OTDB::TreeMaintenance	tm(itsOTDBconnection);
 		TreeStateConv			tsc(itsOTDBconnection);
+		// CT_RESULT_: MANUAL_REMOVED, MANUAL_ABORT, LOST_CONNECTION, NO_ERROR
 		if (quitedEvent.result == CT_RESULT_NO_ERROR) {
 			tm.setTreeState(theObs->second, tsc.get("finished"));
 		}
@@ -467,8 +468,7 @@ GCFEvent::TResult MACScheduler::active_state(GCFEvent& event, GCFPortInterface&
 		}
 
 		// update our administration
-		LOG_DEBUG_STR("Removing observation " << quitedEvent.cntlrName << 
-						" from activeList");
+		LOG_DEBUG_STR("Removing observation " << quitedEvent.cntlrName << " from activeList");
 //		_removeActiveObservation(quitedEvent.cntlrName);
 		break;
 	}
diff --git a/MAC/APL/MainCU/src/ObservationControl/ObservationControl.cc b/MAC/APL/MainCU/src/ObservationControl/ObservationControl.cc
index 7f3808f4431..89baaf53549 100644
--- a/MAC/APL/MainCU/src/ObservationControl/ObservationControl.cc
+++ b/MAC/APL/MainCU/src/ObservationControl/ObservationControl.cc
@@ -182,9 +182,8 @@ void ObservationControl::finish()
 void ObservationControl::abortObservation()
 {
 	LOG_WARN("Received manual interrupt to ABORT the observation");
-	if (itsState < CTState::RESUME) {
-		itsQuitReason = CT_RESULT_MANUAL_ABORT;
-	}
+	itsQuitReason = (itsState < CTState::RESUME) ? CT_RESULT_MANUAL_REMOVED : CT_RESULT_MANUAL_ABORT;
+
 	itsTimerPort->cancelTimer(itsStopTimer);	// cancel old timer
 	itsStopTimer = itsTimerPort->setTimer(0.0);	// expire immediately
 	// will result in F_TIMER in ::active_state
@@ -212,8 +211,32 @@ void	ObservationControl::setState(CTState::CTstateNr		newState)
 				LOG_WARN_STR("Could not update runstate in PVSS of observation " << itsTreeID);
 			}
 		}
-		setObjectState(formatString("ObservationControl: %s: %s", getName().c_str(), cts.name(newState).c_str()), 
-							itsObsDPname, ((newState>CTState::CONNECT) ? RTDB_OBJ_STATE_OPERATIONAL : RTDB_OBJ_STATE_OFF));
+
+		string message(cts.name(newState));
+		int    reportState(RTDB_OBJ_STATE_OFF);
+		if (newState == CTState::QUITED && itsQuitReason != CT_RESULT_NO_ERROR) {
+			switch (itsQuitReason) {
+			case CT_RESULT_MANUAL_REMOVED: 	
+				message = "Aborted by operator before observation started";
+				break;
+			case CT_RESULT_MANUAL_ABORT: 	
+				message = "Aborted by operator during the observation";
+				reportState = RTDB_OBJ_STATE_SUSPICIOUS;
+				break;
+			case CT_RESULT_LOST_CONNECTION:	
+				message = "Lost connection(s)";
+				reportState = RTDB_OBJ_STATE_BROKEN;
+				break;
+			default:
+				message = "Unknown reason";
+				reportState = RTDB_OBJ_STATE_BROKEN;
+			}
+		}
+		else if (newState > CTState::CONNECT) {
+			reportState = RTDB_OBJ_STATE_OPERATIONAL;
+		}
+		string reporterID (formatString("ObservationControl: %s: %s", getName().c_str(), message.c_str()));
+		setObjectState(reporterID, itsObsDPname, reportState);
 	}
 
 	if (itsParentControl) {		// allow calling this function before parentControl is online
@@ -240,9 +263,9 @@ void ObservationControl::registerResultMessage(const string& cntlrName, int	resu
 					<< cts.name(cts.stateAck(itsState)) << " message.");
 			itsBusyControllers--;	// [15122010] see note in doHeartBeatTask!
 		}
-//		if (state == CTState::QUITED) {
-//			...
-//		}
+		if (state == CTState::QUITED && result != CT_RESULT_NO_ERROR) {
+			itsQuitReason = result;
+		}
 		return;
 	}
 
@@ -426,7 +449,7 @@ GCFEvent::TResult ObservationControl::active_state(GCFEvent& event, GCFPortInter
 		}
 		else if (timerEvent.id == itsStopTimer) {
 			setState(CTState::QUIT);
-			itsChildResult   = CT_RESULT_NO_ERROR;
+			itsChildResult   = itsQuitReason;
 			itsChildsInError = 0;
 			itsStopTimer     = 0;
 			LOG_DEBUG("Requesting all childs to quit");
@@ -552,6 +575,7 @@ GCFEvent::TResult ObservationControl::finishing_state(GCFEvent& 		event,
 
 		// tell Parent task we like to go down.
 		itsParentControl->nowInState(getName(), CTState::QUIT);
+		setState(CTState::QUITED);
 
 		// inform MACScheduler we are going down
 		CONTROLQuitedEvent	msg;
@@ -560,7 +584,8 @@ GCFEvent::TResult ObservationControl::finishing_state(GCFEvent& 		event,
 		itsParentPort->send(msg);
 
 		// update PVSS
-		itsPropertySet->setValue(string(PN_FSM_CURRENT_ACTION),GCFPVString("Finished"));
+		itsPropertySet->setValue(string(PN_FSM_CURRENT_ACTION),
+						GCFPVString((itsQuitReason == CT_RESULT_NO_ERROR) ? "Finished" : "Aborted"));
 		itsPropertySet->setValue(string(PN_FSM_ERROR),GCFPVString(""));
 
 		itsTimerPort->setTimer(1L);	// give PVSS task some time to update the DB.
diff --git a/MAC/APL/StationCU/src/ClockControl/ClockControl.cc b/MAC/APL/StationCU/src/ClockControl/ClockControl.cc
index 577ecaf4225..248b860a697 100644
--- a/MAC/APL/StationCU/src/ClockControl/ClockControl.cc
+++ b/MAC/APL/StationCU/src/ClockControl/ClockControl.cc
@@ -985,7 +985,7 @@ GCFEvent::TResult ClockControl::defaultMessageHandling(GCFEvent& 		event,
 			CONTROLConnectEvent		msg(event);
 			CONTROLConnectedEvent	answer;
 			answer.cntlrName = msg.cntlrName;
-			answer.result = true;
+			answer.result = true;	// !!!! ??? TODO
 			itsParentPort->send(answer);
 		}
 		break;
diff --git a/MAC/Deployment/data/OTDB/PythonControl.comp b/MAC/Deployment/data/OTDB/PythonControl.comp
index 87291744f6d..d26614e4e89 100644
--- a/MAC/Deployment/data/OTDB/PythonControl.comp
+++ b/MAC/Deployment/data/OTDB/PythonControl.comp
@@ -14,6 +14,7 @@ par  pythonHost		 I    text   -	100	0	'CCU001'					-		"Machine the Pythonscript
 par  runtimeDirectory	 I    text   -	100	0	'/home/pipeline/runtime/${MSNUMBER}/run'	-	"Where to start the programs"
 par  resultDirectory	 I    text   -	100	0	'lexar001:/data/${MSNUMBER}/output'		-	"Where to store the results"
 par  workingDirectory	 I    text   -	100	0	'/data/scratch/${MSNUMBER}/work'			-	"Where to store temporarely files"
+par  canCommunicate	 I    bool   -	10	0	'true'			-	"Temp flag to tell MAC if the current PythonController can respond to CONTROL_xxx messages"
 
 #uses  ApplCtrl    4.0.0  development 	1	"The ACC controller"
 #uses  Flagger	   4.0.0  development 	1	"Flagger"
-- 
GitLab