Skip to content
Snippets Groups Projects
Commit 7692b5f3 authored by Jan David Mol's avatar Jan David Mol
Browse files

COB-4: Report statistics about GPU usage with respect to real-time behaviour.

parent a6dcafde
No related branches found
No related tags found
1 merge request!6Import cobalt2 into lofar4
...@@ -140,6 +140,18 @@ namespace LOFAR ...@@ -140,6 +140,18 @@ namespace LOFAR
} }
} }
SubbandProc::~SubbandProc()
{
const double averageGPURunTime = totalCounter.getStats().mean() / 1000.0; /* counters are in ms */
const double blockDuration = ps.settings.blockDuration();
// Report how our processing relates to real time
LOG_INFO_STR("[GPU] Processing ran at " << (100.0 * (averageGPURunTime * nrSubbandsPerSubbandProc) / blockDuration) << "% of real time (GPU required " << averageGPURunTime << "s to process " << blockDuration << "s of data for one subband, and needs to process " << nrSubbandsPerSubbandProc << " subbands per GPU).");
// Report how many subbands would yield up to 99% load
LOG_INFO_STR("[GPU] I can process at most " << static_cast<int>(floor(0.99 * blockDuration / averageGPURunTime)) << " subbands per GPU at real time.");
}
size_t SubbandProc::nrOutputElements() const size_t SubbandProc::nrOutputElements() const
{ {
......
...@@ -102,6 +102,8 @@ namespace LOFAR ...@@ -102,6 +102,8 @@ namespace LOFAR
KernelFactories &factories, KernelFactories &factories,
size_t nrSubbandsPerSubbandProc = 1); size_t nrSubbandsPerSubbandProc = 1);
~SubbandProc();
// A pool of input data, to allow items to be filled and // A pool of input data, to allow items to be filled and
// computed on in parallel. // computed on in parallel.
Pool<SubbandProcInputData> inputPool; Pool<SubbandProcInputData> inputPool;
......
...@@ -73,14 +73,11 @@ int main(int argc, char **argv) { ...@@ -73,14 +73,11 @@ int main(int argc, char **argv) {
const size_t nrChannelsPerSubband = ps.settings.correlator.nrChannels; const size_t nrChannelsPerSubband = ps.settings.correlator.nrChannels;
const size_t integrationSteps = ps.settings.correlator.nrSamplesPerIntegration(); const size_t integrationSteps = ps.settings.correlator.nrSamplesPerIntegration();
// Create very simple kernel programs, with predictable output. Skip as much // Assume each node has as many GPUs as us.
// as possible. Nr of channels/sb from the parset is 1, so the PPF will not const size_t nrSubbandsPerSubbandProc = ceilDiv(ceilDiv(ps.settings.subbands.size(), ps.settings.nodes.size()), devices.size());
// even run. Parset also has turned of delay compensation and bandpass
// correction (but that kernel will run to convert int to float and to KernelFactories factories(ps, nrSubbandsPerSubbandProc);
// transform the data order). SubbandProc cwq(ps, ctx, factories, nrSubbandsPerSubbandProc);
KernelFactories factories(ps, 1);
SubbandProc cwq(ps, ctx, factories);
SubbandProcInputData in( SubbandProcInputData in(
nrBeams, nrStations, nrPolarisations, maxNrTABsPerSAP, nrBeams, nrStations, nrPolarisations, maxNrTABsPerSAP,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment