Skip to content
Snippets Groups Projects
Commit cc8174b5 authored by Jan David Mol's avatar Jan David Mol
Browse files

Task #2669: Use 2x2 correlator kernel for better performance, and cleaned up global_defines a bit

parent d55efcee
No related branches found
No related tags found
No related merge requests found
...@@ -32,6 +32,10 @@ ...@@ -32,6 +32,10 @@
#include <GPUProc/global_defines.h> #include <GPUProc/global_defines.h>
// For Cobalt (= up to 80 antenna fields), the 2x2 kernel gives the best
// performance.
#define USE_2X2
namespace LOFAR namespace LOFAR
{ {
namespace Cobalt namespace Cobalt
...@@ -56,8 +60,7 @@ namespace LOFAR ...@@ -56,8 +60,7 @@ namespace LOFAR
setArg(0, buffers.output); setArg(0, buffers.output);
setArg(1, buffers.input); setArg(1, buffers.input);
size_t maxNrThreads, preferredMultiple; size_t preferredMultiple;
maxNrThreads = getAttribute(CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK);
gpu::Platform pf; gpu::Platform pf;
if (pf.getName() == "AMD Accelerated Parallel Processing") { if (pf.getName() == "AMD Accelerated Parallel Processing") {
...@@ -80,7 +83,7 @@ namespace LOFAR ...@@ -80,7 +83,7 @@ namespace LOFAR
# else # else
unsigned nrBlocks = nrBaselines; unsigned nrBlocks = nrBaselines;
# endif # endif
unsigned nrPasses = (nrBlocks + maxNrThreads - 1) / maxNrThreads; unsigned nrPasses = (nrBlocks + maxThreadsPerBlock - 1) / maxThreadsPerBlock;
unsigned nrThreads = (nrBlocks + nrPasses - 1) / nrPasses; unsigned nrThreads = (nrBlocks + nrPasses - 1) / nrPasses;
nrThreads = (nrThreads + preferredMultiple - 1) / preferredMultiple * preferredMultiple; nrThreads = (nrThreads + preferredMultiple - 1) / preferredMultiple * preferredMultiple;
......
...@@ -34,9 +34,6 @@ namespace LOFAR ...@@ -34,9 +34,6 @@ namespace LOFAR
{ {
bool profiling = false; bool profiling = false;
bool gpuProfiling = true; bool gpuProfiling = true;
const char *str = getenv("NR_GPUS");
unsigned nrGPUs = str ? atoi(str) : 1;
inline void set_affinity(unsigned device) inline void set_affinity(unsigned device)
{ {
......
...@@ -22,11 +22,8 @@ ...@@ -22,11 +22,8 @@
#define LOFAR_GPUPROC_GLOBAL_DEFINES_H #define LOFAR_GPUPROC_GLOBAL_DEFINES_H
#define NR_STATION_FILTER_TAPS 16 #define NR_STATION_FILTER_TAPS 16
#undef USE_NEW_CORRELATOR
#define NR_POLARIZATIONS 2 // TODO: get the nr of pol symbol from an LCS/Common header and/or from CoInterface/Config.h (if that isn't a dup too) #define NR_POLARIZATIONS 2 // TODO: get the nr of pol symbol from an LCS/Common header and/or from CoInterface/Config.h (if that isn't a dup too)
#define NR_TAPS 16 #define NR_TAPS 16
#undef USE_2X2
#undef USE_TEST_DATA
#undef USE_B7015 #undef USE_B7015
namespace LOFAR namespace LOFAR
...@@ -35,7 +32,6 @@ namespace LOFAR ...@@ -35,7 +32,6 @@ namespace LOFAR
{ {
extern bool profiling; extern bool profiling;
extern bool gpuProfiling; extern bool gpuProfiling;
extern unsigned nrGPUs;
void set_affinity(unsigned device); void set_affinity(unsigned device);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment