diff --git a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc
index d2b77a2ef2384943ab2e3e8cb695697302ec60b5..4a35c04a6db39bd0b6c48c897810acd1bf7dabbe 100644
--- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc
+++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc
@@ -140,6 +140,18 @@ namespace LOFAR
       }
     }
 
+    SubbandProc::~SubbandProc()
+    {
+      const double averageGPURunTime = totalCounter.getStats().mean() / 1000.0; /* counters are in ms */
+      const double blockDuration =  ps.settings.blockDuration();
+
+      // Report how our processing relates to real time
+      LOG_INFO_STR("[GPU] Processing ran at " << (100.0 * (averageGPURunTime * nrSubbandsPerSubbandProc) / blockDuration) << "% of real time (GPU required " << averageGPURunTime << "s to process " << blockDuration << "s of data for one subband, and needs to process " << nrSubbandsPerSubbandProc << " subbands per GPU).");
+
+      // Report how many subbands would yield up to 99% load
+      LOG_INFO_STR("[GPU] I can process at most  " << static_cast<int>(floor(0.99 * blockDuration / averageGPURunTime)) << " subbands per GPU at real time.");
+    }
+
 
     size_t SubbandProc::nrOutputElements() const
     {
diff --git a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h
index 608606c2c1d15661473441b58641fabfd50fba0f..f1b70855fc72abadc3cc927506437838ff63e7f0 100644
--- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h
+++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h
@@ -102,6 +102,8 @@ namespace LOFAR
                   KernelFactories &factories,
                   size_t nrSubbandsPerSubbandProc = 1);
 
+      ~SubbandProc();
+
       // A pool of input data, to allow items to be filled and
       // computed on in parallel.
       Pool<SubbandProcInputData> inputPool;
diff --git a/RTCP/Cobalt/GPUProc/src/gpu_load.cc b/RTCP/Cobalt/GPUProc/src/gpu_load.cc
index bd77798a6e9d50bae733cf4675f578c73c785ebe..10f7b5a9046ed6322a71ab70754a5b190a83bc9e 100644
--- a/RTCP/Cobalt/GPUProc/src/gpu_load.cc
+++ b/RTCP/Cobalt/GPUProc/src/gpu_load.cc
@@ -73,14 +73,11 @@ int main(int argc, char **argv) {
   const size_t nrChannelsPerSubband = ps.settings.correlator.nrChannels;
   const size_t integrationSteps = ps.settings.correlator.nrSamplesPerIntegration();
 
-  // Create very simple kernel programs, with predictable output. Skip as much
-  // as possible. Nr of channels/sb from the parset is 1, so the PPF will not
-  // even run.  Parset also has turned of delay compensation and bandpass
-  // correction (but that kernel will run to convert int to float and to
-  // transform the data order).
-
-  KernelFactories factories(ps, 1);
-  SubbandProc cwq(ps, ctx, factories);
+  // Assume each node has as many GPUs as us.
+  const size_t nrSubbandsPerSubbandProc = ceilDiv(ceilDiv(ps.settings.subbands.size(), ps.settings.nodes.size()), devices.size());
+
+  KernelFactories factories(ps, nrSubbandsPerSubbandProc);
+  SubbandProc cwq(ps, ctx, factories, nrSubbandsPerSubbandProc);
 
   SubbandProcInputData in(
     nrBeams, nrStations, nrPolarisations, maxNrTABsPerSAP,