COB-4: Report statistics about GPU usage with respect to real-time behaviour.

7692b5f3 · Jan David Mol · a6dcafde · 7692b5f3 · 7692b5f3 · 7692b5f3
Commit 7692b5f3 authored 6 years ago by Jan David Mol
--- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc
+++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc
@@ -140,6 +140,18 @@ namespace LOFAR
      }
    }
+    SubbandProc::~SubbandProc()
+    {
+      const double averageGPURunTime = totalCounter.getStats().mean() / 1000.0; /* counters are in ms */
+      const double blockDuration =  ps.settings.blockDuration();
+      // Report how our processing relates to real time
+      LOG_INFO_STR("[GPU] Processing ran at " << (100.0 * (averageGPURunTime * nrSubbandsPerSubbandProc) / blockDuration) << "% of real time (GPU required " << averageGPURunTime << "s to process " << blockDuration << "s of data for one subband, and needs to process " << nrSubbandsPerSubbandProc << " subbands per GPU).");
+      // Report how many subbands would yield up to 99% load
+      LOG_INFO_STR("[GPU] I can process at most  " << static_cast<int>(floor(0.99 * blockDuration / averageGPURunTime)) << " subbands per GPU at real time.");
+    }
    size_t SubbandProc::nrOutputElements() const
    {

--- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h
+++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h
@@ -102,6 +102,8 @@ namespace LOFAR
                  KernelFactories &factories,
                  size_t nrSubbandsPerSubbandProc = 1);
+      ~SubbandProc();
      // A pool of input data, to allow items to be filled and
      // computed on in parallel.
      Pool<SubbandProcInputData> inputPool;

--- a/RTCP/Cobalt/GPUProc/src/gpu_load.cc
+++ b/RTCP/Cobalt/GPUProc/src/gpu_load.cc
@@ -73,14 +73,11 @@ int main(int argc, char **argv) {
  const size_t nrChannelsPerSubband = ps.settings.correlator.nrChannels;
  const size_t integrationSteps = ps.settings.correlator.nrSamplesPerIntegration();
-  // Create very simple kernel programs, with predictable output. Skip as much
+  // Assume each node has as many GPUs as us.
-  // as possible. Nr of channels/sb from the parset is 1, so the PPF will not
+  const size_t nrSubbandsPerSubbandProc = ceilDiv(ceilDiv(ps.settings.subbands.size(), ps.settings.nodes.size()), devices.size());
-  // even run.  Parset also has turned of delay compensation and bandpass
-  // correction (but that kernel will run to convert int to float and to
+  KernelFactories factories(ps, nrSubbandsPerSubbandProc);
-  // transform the data order).
+  SubbandProc cwq(ps, ctx, factories, nrSubbandsPerSubbandProc);
-  KernelFactories factories(ps, 1);
-  SubbandProc cwq(ps, ctx, factories);
  SubbandProcInputData in(
    nrBeams, nrStations, nrPolarisations, maxNrTABsPerSAP,