From 7692b5f37ffaf1e7789ff2a519547d1d796827f9 Mon Sep 17 00:00:00 2001
From: Jan David Mol <mol@astron.nl>
Date: Fri, 22 Mar 2019 14:45:13 +0000
Subject: [PATCH] COB-4: Report statistics about GPU usage with respect to
 real-time behaviour.

---
 RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc | 12 ++++++++++++
 RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h  |  2 ++
 RTCP/Cobalt/GPUProc/src/gpu_load.cc                 | 13 +++++--------
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc
index d2b77a2ef23..4a35c04a6db 100644
--- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc
+++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc
@@ -140,6 +140,18 @@ namespace LOFAR
       }
     }
 
+    SubbandProc::~SubbandProc()
+    {
+      const double averageGPURunTime = totalCounter.getStats().mean() / 1000.0; /* counters are in ms */
+      const double blockDuration =  ps.settings.blockDuration();
+
+      // Report how our processing relates to real time
+      LOG_INFO_STR("[GPU] Processing ran at " << (100.0 * (averageGPURunTime * nrSubbandsPerSubbandProc) / blockDuration) << "% of real time (GPU required " << averageGPURunTime << "s to process " << blockDuration << "s of data for one subband, and needs to process " << nrSubbandsPerSubbandProc << " subbands per GPU).");
+
+      // Report how many subbands would yield up to 99% load
+      LOG_INFO_STR("[GPU] I can process at most  " << static_cast<int>(floor(0.99 * blockDuration / averageGPURunTime)) << " subbands per GPU at real time.");
+    }
+
 
     size_t SubbandProc::nrOutputElements() const
     {
diff --git a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h
index 608606c2c1d..f1b70855fc7 100644
--- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h
+++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h
@@ -102,6 +102,8 @@ namespace LOFAR
                   KernelFactories &factories,
                   size_t nrSubbandsPerSubbandProc = 1);
 
+      ~SubbandProc();
+
       // A pool of input data, to allow items to be filled and
       // computed on in parallel.
       Pool<SubbandProcInputData> inputPool;
diff --git a/RTCP/Cobalt/GPUProc/src/gpu_load.cc b/RTCP/Cobalt/GPUProc/src/gpu_load.cc
index bd77798a6e9..10f7b5a9046 100644
--- a/RTCP/Cobalt/GPUProc/src/gpu_load.cc
+++ b/RTCP/Cobalt/GPUProc/src/gpu_load.cc
@@ -73,14 +73,11 @@ int main(int argc, char **argv) {
   const size_t nrChannelsPerSubband = ps.settings.correlator.nrChannels;
   const size_t integrationSteps = ps.settings.correlator.nrSamplesPerIntegration();
 
-  // Create very simple kernel programs, with predictable output. Skip as much
-  // as possible. Nr of channels/sb from the parset is 1, so the PPF will not
-  // even run.  Parset also has turned of delay compensation and bandpass
-  // correction (but that kernel will run to convert int to float and to
-  // transform the data order).
-
-  KernelFactories factories(ps, 1);
-  SubbandProc cwq(ps, ctx, factories);
+  // Assume each node has as many GPUs as us.
+  const size_t nrSubbandsPerSubbandProc = ceilDiv(ceilDiv(ps.settings.subbands.size(), ps.settings.nodes.size()), devices.size());
+
+  KernelFactories factories(ps, nrSubbandsPerSubbandProc);
+  SubbandProc cwq(ps, ctx, factories, nrSubbandsPerSubbandProc);
 
   SubbandProcInputData in(
     nrBeams, nrStations, nrPolarisations, maxNrTABsPerSAP,
-- 
GitLab