diff --git a/RTCP/FCNP/src/fcnp_ion.cc b/RTCP/FCNP/src/fcnp_ion.cc index 63f7fd5ba2ee832099d869f44781d8118731fbe1..fc26ef65efd6c90972f3873e0b30acbd39856335 100644 --- a/RTCP/FCNP/src/fcnp_ion.cc +++ b/RTCP/FCNP/src/fcnp_ion.cc @@ -471,7 +471,7 @@ static void *pollThread(void *) } if (useInterrupts) { - LOG_INFO_STR( "FCNP: Received " << nrInterrupts << " vc0 interrupts" ); + LOG_DEBUG_STR( "FCNP: Received " << nrInterrupts << " vc0 interrupts" ); stopped = true; } @@ -602,7 +602,7 @@ static void drainFIFO() } if (quadsToRead > 0) - LOG_INFO_STR( "FCNP: Dropped " << quadsToRead << " lingering quadwords from packets of a previous job" ); + LOG_DEBUG_STR( "FCNP: Dropped " << quadsToRead << " lingering quadwords from packets of a previous job" ); while (-- quadsToRead >= 0) _bgp_QuadLoad(vc0 + _BGP_TRx_Sx, 0); @@ -629,7 +629,7 @@ static void drainFIFO() } if (dropped > 0) - LOG_INFO_STR( "FCNP: Dropped " << dropped << " lingering packets from previous job" ); + LOG_DEBUG_STR( "FCNP: Dropped " << dropped << " lingering packets from previous job" ); } diff --git a/RTCP/IONProc/src/CommandServer.cc b/RTCP/IONProc/src/CommandServer.cc index ab034a4caa6998317d614250aa9c7994324c4b9d..d394e1f6c97dc6f86ffc3b089893993072ce2fbb 100644 --- a/RTCP/IONProc/src/CommandServer.cc +++ b/RTCP/IONProc/src/CommandServer.cc @@ -65,6 +65,12 @@ static void handleCommand(const std::string &command) } } else if (command == "quit") { quit = true; +#if defined HAVE_BGP + } else if (command == "debug") { + LOGCOUT_SETLEVEL(8); + } else if (command == "nodebug") { + LOGCOUT_SETLEVEL(4); +#endif } else if (command == "") { // quietly drop empty commands } else if (myPsetNumber == 0) { @@ -82,12 +88,14 @@ static void commandMaster() SocketStream sk("0.0.0.0", 4000, SocketStream::TCP, SocketStream::Server); + LOG_INFO( "Command server ready" ); + while (!quit) { std::string command; try { command = sk.readLine(); - LOG_DEBUG_STR("read command: " << command); + LOG_INFO_STR("read command: " << command); } catch (Stream::EndOfStreamException &) { sk.reaccept(); continue; diff --git a/RTCP/IONProc/src/ION_main.cc b/RTCP/IONProc/src/ION_main.cc index 3e31fc0060276a185f6b1c62d2c7ff76be72a385..5eb1048a56b91fc21d5379811784f0ae5fbca28b 100644 --- a/RTCP/IONProc/src/ION_main.cc +++ b/RTCP/IONProc/src/ION_main.cc @@ -439,6 +439,13 @@ int main(int argc, char **argv) #if defined HAVE_BGP INIT_LOGGER_WITH_SYSINFO(str(format("IONProc@%02d") % myPsetNumber)); + bool isProduction = argc > 1 && argv[1][0] == '1'; + + if (isProduction) { + LOGCOUT_SETLEVEL(4); // do not show debug info + } else { + LOGCOUT_SETLEVEL(8); // show debug info + } #elif defined HAVE_LOG4CPLUS // do nothing #elif defined HAVE_LOG4CXX diff --git a/RTCP/IONProc/src/Job.cc b/RTCP/IONProc/src/Job.cc index 7eaa72bdc8f358caa963f3c449e6985976bada07..c2890e78153e2aca8602471c0060bbfed1704eef 100644 --- a/RTCP/IONProc/src/Job.cc +++ b/RTCP/IONProc/src/Job.cc @@ -42,6 +42,8 @@ #include <boost/format.hpp> using boost::format; +#define LOG_CONDITION (myPsetNumber == 0) + namespace LOFAR { namespace RTCP { @@ -63,11 +65,13 @@ Job::Job(const char *parsetName) checkParset(); - LOG_INFO_STR(itsLogPrefix << "----- Creating new observation"); - LOG_DEBUG_STR(itsLogPrefix << "usedCoresInPset = " << itsParset.usedCoresInPset()); - itsNrRuns = static_cast<unsigned>(ceil((itsParset.stopTime() - itsParset.startTime()) / itsParset.CNintegrationTime())); - LOG_DEBUG_STR(itsLogPrefix << "itsNrRuns = " << itsNrRuns); + + if (LOG_CONDITION) { + LOG_INFO_STR(itsLogPrefix << "----- Creating new job"); + LOG_DEBUG_STR(itsLogPrefix << "usedCoresInPset = " << itsParset.usedCoresInPset()); + LOG_DEBUG_STR(itsLogPrefix << "itsNrRuns = " << itsNrRuns); + } // synchronize roughly every 5 seconds to see if the job is cancelled itsNrRunTokensPerBroadcast = static_cast<unsigned>(ceil(5.0 / itsParset.CNintegrationTime())); @@ -86,7 +90,8 @@ Job::~Job() delete itsJobThread; jobQueue.remove(this); - LOG_INFO_STR(itsLogPrefix << "----- Observation " << (itsIsRunning ? "ended" : "cancelled") << " successfully"); + if (LOG_CONDITION) + LOG_INFO_STR(itsLogPrefix << "----- Job " << (itsIsRunning ? "finished" : "cancelled") << " successfully"); } @@ -498,7 +503,7 @@ bool Job::configureCNs() void Job::unconfigureCNs() { CN_Command command(CN_Command::POSTPROCESS); - + LOG_DEBUG_STR(itsLogPrefix << "Unconfiguring cores " << itsParset.usedCoresInPset() << " ..."); for (unsigned core = 0; core < itsCNstreams.size(); core ++) @@ -530,14 +535,15 @@ template <typename SAMPLE_TYPE> void Job::doObservation() unsigned nrparts = itsParset.nrPartsPerStokes(); unsigned nrbeams = itsParset.flysEye() ? itsParset.nrMergedStations() : itsParset.nrPencilBeams(); - - LOG_INFO_STR(itsLogPrefix << "----- Observation start"); + if (LOG_CONDITION) + LOG_INFO_STR(itsLogPrefix << "----- Observation start"); // first: send configuration to compute nodes so they know what to expect if (!configureCNs()) { unconfigureCNs(); - LOG_INFO_STR(itsLogPrefix << "----- Observation finished"); + if (LOG_CONDITION) + LOG_INFO_STR(itsLogPrefix << "----- Observation finished"); return; } @@ -626,8 +632,6 @@ template <typename SAMPLE_TYPE> void Job::doObservation() continue; } - LOG_DEBUG_STR(itsLogPrefix << "Setting up output " << p.outputNr << " (" << p.name << ")"); - outputSections[output] = new OutputSection(itsParset, cores, list, maxlistsize, p, &createCNstream); } @@ -666,8 +670,9 @@ template <typename SAMPLE_TYPE> void Job::doObservation() detachFromInputSection<SAMPLE_TYPE>(); unconfigureCNs(); - - LOG_INFO_STR(itsLogPrefix << "----- Observation finished"); + + if (LOG_CONDITION) + LOG_INFO_STR(itsLogPrefix << "----- Observation finished"); } diff --git a/RTCP/IONProc/src/LogThread.cc b/RTCP/IONProc/src/LogThread.cc index 5c1de2a01302aea16ae0c0af9ad2444d1660a6da..b5d3bc583c4039fb9290bd667cb3e3df14c902e3 100644 --- a/RTCP/IONProc/src/LogThread.cc +++ b/RTCP/IONProc/src/LogThread.cc @@ -54,6 +54,8 @@ LogThread::LogThread(unsigned nrRspBoards, std::string stationName) LogThread::~LogThread() { itsShouldStop = true; + + itsThread.abort(); // mainly to shorten the sleep() call } @@ -123,7 +125,7 @@ void LogThread::mainLoop() readCPUstats(previousLoad); #endif - LOG_DEBUG("LogThread running"); + //LOG_DEBUG("LogThread running"); // non-atomic updates from other threads cause race conditions, but who cares @@ -160,11 +162,11 @@ void LogThread::mainLoop() writeCPUstats(logStr); #endif - LOG_INFO(logStr.str()); + LOG_INFO_STR(logStr.str()); sleep(1); } - LOG_DEBUG("LogThread stopped"); + //LOG_DEBUG("LogThread stopped"); } } // namespace RTCP diff --git a/RTCP/IONProc/src/LogThread.h b/RTCP/IONProc/src/LogThread.h index 8537de731dd7552d6e6958f6c7b411d238b7b446..f27a1228420924e20e3623ac43757de7e4cbfbec 100644 --- a/RTCP/IONProc/src/LogThread.h +++ b/RTCP/IONProc/src/LogThread.h @@ -54,7 +54,7 @@ class LogThread std::string itsStationName; volatile bool itsShouldStop; - Thread itsThread; + InterruptibleThread itsThread; #if defined HAVE_BGP_ION struct CPUload { diff --git a/RTCP/IONProc/src/OutputSection.cc b/RTCP/IONProc/src/OutputSection.cc index beb7f73dff4e9511997ccb8fc857bfd6a0203da6..f1c1562405e235829dc2e3e7ad4f620ac14899b0 100644 --- a/RTCP/IONProc/src/OutputSection.cc +++ b/RTCP/IONProc/src/OutputSection.cc @@ -171,7 +171,7 @@ void OutputSection::droppingData(unsigned subband) void OutputSection::notDroppingData(unsigned subband) { if (itsDroppedCount[subband] > 0) { - LOG_WARN_STR(itsLogPrefix << " index " << setw(3) << itsItemList[subband].second << "] Dropped " << itsDroppedCount[subband] << " integration time(s)" ); + LOG_WARN_STR(itsLogPrefix << " index " << setw(3) << itsItemList[subband].second << "] Dropped " << itsDroppedCount[subband] << " blocks" ); itsDroppedCount[subband] = 0; } } diff --git a/RTCP/IONProc/src/OutputThread.cc b/RTCP/IONProc/src/OutputThread.cc index b7ef57acf13fadfd0c30dbd4db80273914a84605..8f7c8e7d6811a649f31ebf7a50223944910c8828 100644 --- a/RTCP/IONProc/src/OutputThread.cc +++ b/RTCP/IONProc/src/OutputThread.cc @@ -144,12 +144,12 @@ void OutputThread::mainLoop() std::auto_ptr<Stream> streamToStorage; std::string outputDescriptor = getStreamDescriptorBetweenIONandStorage(itsParset, itsServer, itsFilename); - LOG_INFO_STR(itsLogPrefix << "Creating connection to " << outputDescriptor << "..."); + LOG_DEBUG_STR(itsLogPrefix << "Creating connection to " << outputDescriptor << "..."); try { streamToStorage.reset(createStream(outputDescriptor, false)); - LOG_INFO_STR(itsLogPrefix << "Creating connection to " << outputDescriptor << ": done"); + LOG_DEBUG_STR(itsLogPrefix << "Creating connection to " << outputDescriptor << ": done"); } catch (SystemCallException &ex) { if (ex.error == EINTR) { LOG_WARN_STR(itsLogPrefix << "Connection to " << outputDescriptor << " aborted"); @@ -194,7 +194,7 @@ void OutputThread::mainLoop() delete streamToStorage.release(); // close socket - LOG_INFO_STR(itsLogPrefix << "Connection to " << outputDescriptor << " closed"); + LOG_DEBUG_STR(itsLogPrefix << "Connection to " << outputDescriptor << " closed"); } } // namespace RTCP diff --git a/RTCP/IONProc/src/OutputThread.h b/RTCP/IONProc/src/OutputThread.h index dd8960ca2e3e482875e9d91c0f541df8dc1df9e0..d71ad356928af4020cd09a3844651b23993ea458 100644 --- a/RTCP/IONProc/src/OutputThread.h +++ b/RTCP/IONProc/src/OutputThread.h @@ -51,7 +51,7 @@ class OutputThread bool waitForDone(const struct timespec ×pec); void abort(); - static const unsigned maxSendQueueSize = 2; // use 2 if you run out of memory, but test carefully to avoid data loss + static const unsigned maxSendQueueSize = 3; // use 2 if you run out of memory, but test carefully to avoid data loss Queue<StreamableData *> itsFreeQueue, itsSendQueue; diff --git a/RTCP/Run/src/LOFAR/Core.py b/RTCP/Run/src/LOFAR/Core.py index 93ca782eee4744dc87d12ff0f9eb2dfdf202a666..882bd34235a028c64543062cbcfd3274246e530b 100644 --- a/RTCP/Run/src/LOFAR/Core.py +++ b/RTCP/Run/src/LOFAR/Core.py @@ -9,7 +9,6 @@ from CommandClient import sendCommand from ObservationID import ObservationID from Parset import Parset from Stations import Stations,overrideRack -from util.Hosts import ropen,rmkdir,rexists,runlink,rsymlink from util.dateutil import format import sys import signal diff --git a/RTCP/Run/src/LOFAR/Parset.py b/RTCP/Run/src/LOFAR/Parset.py index a07e994c5b973f21c221c9cc0f171b9a27114b28..8217a88016af302636b265a943c6f6ca76eaf33d 100644 --- a/RTCP/Run/src/LOFAR/Parset.py +++ b/RTCP/Run/src/LOFAR/Parset.py @@ -129,7 +129,7 @@ class Parset(util.Parset.Parset): self.setdefault("OLAP.Correlator.integrationTime",1); if "OLAP.Stokes.channelsPerSubband" not in self or int(self["OLAP.Stokes.channelsPerSubband"]) == 0: - self["OLAP.Stokes.channelsPerSubband"] = self["Observation.nrChannelsPerSubband"] + self["OLAP.Stokes.channelsPerSubband"] = self["Observation.channelsPerSubband"] self.setdefault('OLAP.Storage.Filtered.namemask','L${OBSID}_SB${SUBBAND}.filtered') diff --git a/RTCP/Run/src/LOFAR/Sections.py b/RTCP/Run/src/LOFAR/Sections.py index f1a880947f7a166029a73d7c58db78aa1a1f85d1..d52971dd2f07d21ae2b114edcdc2ca3be77b9cdd 100644 --- a/RTCP/Run/src/LOFAR/Sections.py +++ b/RTCP/Run/src/LOFAR/Sections.py @@ -2,7 +2,7 @@ from util.Commands import SyncCommand,AsyncCommand,mpikill,backquote,PIPE from util.Aborter import runUntilSuccess,runFunc -from Locations import Locations,Hosts +from Locations import Locations,Hosts,isProduction import Logger import os import Partitions @@ -182,6 +182,7 @@ class IONProcSection(Section): "%s" % (Locations.files["ionproc"],), # arguments + "%d" % (int(isProduction()),), ]