diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CorrelatorKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CorrelatorKernel.cc index b4bb8ccf782c3f87418b3abdf563cb82d24ee0e8..0679e7adc4ee22113f84470495022f5039cd1603 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CorrelatorKernel.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CorrelatorKernel.cc @@ -32,6 +32,10 @@ #include <GPUProc/global_defines.h> +// For Cobalt (= up to 80 antenna fields), the 2x2 kernel gives the best +// performance. +#define USE_2X2 + namespace LOFAR { namespace Cobalt @@ -56,8 +60,7 @@ namespace LOFAR setArg(0, buffers.output); setArg(1, buffers.input); - size_t maxNrThreads, preferredMultiple; - maxNrThreads = getAttribute(CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK); + size_t preferredMultiple; gpu::Platform pf; if (pf.getName() == "AMD Accelerated Parallel Processing") { @@ -80,7 +83,7 @@ namespace LOFAR # else unsigned nrBlocks = nrBaselines; # endif - unsigned nrPasses = (nrBlocks + maxNrThreads - 1) / maxNrThreads; + unsigned nrPasses = (nrBlocks + maxThreadsPerBlock - 1) / maxThreadsPerBlock; unsigned nrThreads = (nrBlocks + nrPasses - 1) / nrPasses; nrThreads = (nrThreads + preferredMultiple - 1) / preferredMultiple * preferredMultiple; diff --git a/RTCP/Cobalt/GPUProc/src/global_defines.cc b/RTCP/Cobalt/GPUProc/src/global_defines.cc index c26aab7e4959e7ed7ed09e1eac4d957be1019b54..922ddf5348d993f60d619c8d8ff91581abe5052e 100644 --- a/RTCP/Cobalt/GPUProc/src/global_defines.cc +++ b/RTCP/Cobalt/GPUProc/src/global_defines.cc @@ -34,9 +34,6 @@ namespace LOFAR { bool profiling = false; bool gpuProfiling = true; - const char *str = getenv("NR_GPUS"); - unsigned nrGPUs = str ? atoi(str) : 1; - inline void set_affinity(unsigned device) { diff --git a/RTCP/Cobalt/GPUProc/src/global_defines.h b/RTCP/Cobalt/GPUProc/src/global_defines.h index 94f61113bae40ac9a173ba42feb415ff8edd89aa..ff2d2f87e7934deabf1c246dd7d5a3265a42d970 100644 --- a/RTCP/Cobalt/GPUProc/src/global_defines.h +++ b/RTCP/Cobalt/GPUProc/src/global_defines.h @@ -22,11 +22,8 @@ #define LOFAR_GPUPROC_GLOBAL_DEFINES_H #define NR_STATION_FILTER_TAPS 16 -#undef USE_NEW_CORRELATOR #define NR_POLARIZATIONS 2 // TODO: get the nr of pol symbol from an LCS/Common header and/or from CoInterface/Config.h (if that isn't a dup too) #define NR_TAPS 16 -#undef USE_2X2 -#undef USE_TEST_DATA #undef USE_B7015 namespace LOFAR @@ -35,7 +32,6 @@ namespace LOFAR { extern bool profiling; extern bool gpuProfiling; - extern unsigned nrGPUs; void set_affinity(unsigned device); }