From 7692b5f37ffaf1e7789ff2a519547d1d796827f9 Mon Sep 17 00:00:00 2001 From: Jan David Mol <mol@astron.nl> Date: Fri, 22 Mar 2019 14:45:13 +0000 Subject: [PATCH] COB-4: Report statistics about GPU usage with respect to real-time behaviour. --- RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc | 12 ++++++++++++ RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h | 2 ++ RTCP/Cobalt/GPUProc/src/gpu_load.cc | 13 +++++-------- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc index d2b77a2ef23..4a35c04a6db 100644 --- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc +++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.cc @@ -140,6 +140,18 @@ namespace LOFAR } } + SubbandProc::~SubbandProc() + { + const double averageGPURunTime = totalCounter.getStats().mean() / 1000.0; /* counters are in ms */ + const double blockDuration = ps.settings.blockDuration(); + + // Report how our processing relates to real time + LOG_INFO_STR("[GPU] Processing ran at " << (100.0 * (averageGPURunTime * nrSubbandsPerSubbandProc) / blockDuration) << "% of real time (GPU required " << averageGPURunTime << "s to process " << blockDuration << "s of data for one subband, and needs to process " << nrSubbandsPerSubbandProc << " subbands per GPU)."); + + // Report how many subbands would yield up to 99% load + LOG_INFO_STR("[GPU] I can process at most " << static_cast<int>(floor(0.99 * blockDuration / averageGPURunTime)) << " subbands per GPU at real time."); + } + size_t SubbandProc::nrOutputElements() const { diff --git a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h index 608606c2c1d..f1b70855fc7 100644 --- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h +++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProc.h @@ -102,6 +102,8 @@ namespace LOFAR KernelFactories &factories, size_t nrSubbandsPerSubbandProc = 1); + ~SubbandProc(); + // A pool of input data, to allow items to be filled and // computed on in parallel. Pool<SubbandProcInputData> inputPool; diff --git a/RTCP/Cobalt/GPUProc/src/gpu_load.cc b/RTCP/Cobalt/GPUProc/src/gpu_load.cc index bd77798a6e9..10f7b5a9046 100644 --- a/RTCP/Cobalt/GPUProc/src/gpu_load.cc +++ b/RTCP/Cobalt/GPUProc/src/gpu_load.cc @@ -73,14 +73,11 @@ int main(int argc, char **argv) { const size_t nrChannelsPerSubband = ps.settings.correlator.nrChannels; const size_t integrationSteps = ps.settings.correlator.nrSamplesPerIntegration(); - // Create very simple kernel programs, with predictable output. Skip as much - // as possible. Nr of channels/sb from the parset is 1, so the PPF will not - // even run. Parset also has turned of delay compensation and bandpass - // correction (but that kernel will run to convert int to float and to - // transform the data order). - - KernelFactories factories(ps, 1); - SubbandProc cwq(ps, ctx, factories); + // Assume each node has as many GPUs as us. + const size_t nrSubbandsPerSubbandProc = ceilDiv(ceilDiv(ps.settings.subbands.size(), ps.settings.nodes.size()), devices.size()); + + KernelFactories factories(ps, nrSubbandsPerSubbandProc); + SubbandProc cwq(ps, ctx, factories, nrSubbandsPerSubbandProc); SubbandProcInputData in( nrBeams, nrStations, nrPolarisations, maxNrTABsPerSAP, -- GitLab