Skip to content
Snippets Groups Projects
Commit f99f17fb authored by Marcel Loose's avatar Marcel Loose :sunglasses:
Browse files

Task #4520: Moved defaultDefinitions() to Kernel::compileDefinitions(). All...

Task #4520: Moved defaultDefinitions() to Kernel::compileDefinitions(). All kernel still get the whole she-bang of compile options; this can later be made more kernel-specific.
parent b093f661
No related branches found
No related tags found
No related merge requests found
Showing
with 82 additions and 71 deletions
......@@ -68,54 +68,14 @@ namespace LOFAR
return flags;
};
// Return empty set of definitions for the nvcc compilation of a cuda kernel
definitions_type defaultDefinitions()
{
definitions_type defs;
return defs;
}
// Return the set of default definitions for the nvcc compilation of a cuda kernel in Cobalt
definitions_type defaultDefinitions(const Parset &ps)
{
definitions_type defs;
// // Return empty set of definitions for the nvcc compilation of a cuda kernel
// definitions_type defaultDefinitions()
// {
// definitions_type defs;
using boost::format;
// return defs;
// }
defs["NVIDIA_CUDA"] = ""; // left-over from OpenCL for Correlator.cl/.cu
// TODO: support device specific defs somehow (createPTX() knows about targets, but may be kernel and target specific)
//if (devices[0].getInfo<CL_DEVICE_NAME>() == "GeForce GTX 680")
// defs["USE_FLOAT4_IN_CORRELATOR"] = "";
// TODO: kernel-specific defs should be specified in the XXXKernel class
defs["COMPLEX"] = "2";
defs["NR_BITS_PER_SAMPLE"] = str(format("%u") % ps.nrBitsPerSample());
defs["SUBBAND_BANDWIDTH"] = str(format("%.7ff") % ps.subbandBandwidth()); // returns double, so rounding issue?
defs["NR_SUBBANDS"] = str(format("%u") % ps.nrSubbands()); // size_t, but %zu not supp
defs["NR_CHANNELS"] = str(format("%u") % ps.nrChannelsPerSubband());
defs["NR_STATIONS"] = str(format("%u") % ps.nrStations());
defs["NR_SAMPLES_PER_CHANNEL"] = str(format("%u") % ps.nrSamplesPerChannel());
defs["NR_SAMPLES_PER_SUBBAND"] = str(format("%u") % ps.nrSamplesPerSubband());
defs["NR_BEAMS"] = str(format("%u") % ps.nrBeams());
defs["NR_TABS"] = str(format("%u") % ps.nrTABs(0)); // TODO: 0 should be dep on #beams
defs["NR_COHERENT_STOKES"] = str(format("%u") % ps.nrCoherentStokes()); // size_t
defs["NR_INCOHERENT_STOKES"] = str(format("%u") % ps.nrIncoherentStokes()); // size_t
defs["COHERENT_STOKES_TIME_INTEGRATION_FACTOR"] = str(format("%u") % ps.coherentStokesTimeIntegrationFactor());
defs["INCOHERENT_STOKES_TIME_INTEGRATION_FACTOR"] = str(format("%u") % ps.incoherentStokesTimeIntegrationFactor());
defs["NR_POLARIZATIONS"] = str(format("%u") % NR_POLARIZATIONS);
defs["NR_TAPS"] = str(format("%u") % NR_TAPS);
defs["NR_STATION_FILTER_TAPS"] = str(format("%u") % NR_STATION_FILTER_TAPS);
if (ps.delayCompensation())
defs["DELAY_COMPENSATION"] = "";
if (ps.correctBandPass())
defs["BANDPASS_CORRECTION"] = "";
defs["DEDISPERSION_FFT_SIZE"] = str(format("%u") % ps.dedispersionFFTsize()); // size_t
return defs;
}
// Performs a 'system' call of nvcc. Return the stdout of the command
// on error no stdout is created and an exception is thrown
......@@ -162,7 +122,9 @@ namespace LOFAR
// Create a nvcc command line string based on the input path, a set of flags and a map
// of definitions. Use this command to call nvcc and compile the file at input path to a ptx file
// which content is returned as a string
std::string compileToPtx(const std::string& pathToCuFile, const flags_type& flags, const definitions_type& definitions)
std::string compileToPtx(const std::string& pathToCuFile,
const flags_type& flags,
const definitions_type& definitions)
{
const string cudaCompiler = "nvcc";
stringstream cmd("");
......@@ -189,13 +151,6 @@ namespace LOFAR
return runNVCC(cmd.str());
};
// overloaded function. Use the path and default flags and definitions to call nvcc
std::string compileToPtx(const std::string& pathToCuFile)
{
// compile with the default flags and definitions
return compileToPtx(pathToCuFile, defaultFlags(), defaultDefinitions());
};
}
}
......@@ -48,10 +48,9 @@ namespace LOFAR
// Return the set of default flags for the nvcc compilation of a cuda kernel in Cobalt
flags_type defaultFlags();
// Return the set of default definitions for the nvcc compilation of a cuda kernel in Cobalt
// DEFINITION=0 results in a definition and cannot for used for undef. Do not insert parameters in this case
definitions_type defaultDefinitions();
definitions_type defaultDefinitions(const Parset &ps);
// // Return the set of default definitions for the nvcc compilation of a cuda kernel in Cobalt
// // DEFINITION=0 results in a definition and cannot for used for undef. Do not insert parameters in this case
// definitions_type defaultDefinitions();
// Performs a 'system' call of nvcc. Return the stdout of the command
// on error no stdout is created and an exception is thrown
......@@ -60,11 +59,12 @@ namespace LOFAR
// Create a nvcc command line string based on the input path, a set of flags and a map
// of definitions. Use this command to call nvcc and compile the file at input path to a ptx file
// which content is returned as a string
std::string compileToPtx(const std::string& pathToCuFile, const flags_type& flags, const definitions_type& definitions);
std::string
compileToPtx(const std::string& pathToCuFile,
const flags_type& flags = defaultFlags(),
const definitions_type& definitions = definitions_type());
// overloaded function. Use the path and default flags and definitions to call nvcc
std::string compileToPtx(const std::string& pathToCuFile);
}
}
......
......@@ -20,6 +20,9 @@
#include <lofar_config.h>
#include <boost/format.hpp>
#include <GPUProc/global_defines.h>
#include "Kernel.h"
namespace LOFAR
......@@ -53,6 +56,46 @@ namespace LOFAR
// counter.doOperation(event, nrOperations, nrBytesRead, nrBytesWritten);
}
Kernel::definitions_type Kernel::compileDefinitions(const Parset& ps)
{
definitions_type defs;
using boost::format;
defs["NVIDIA_CUDA"] = ""; // left-over from OpenCL for Correlator.cl/.cu
// TODO: support device specific defs somehow (createPTX() knows about targets, but may be kernel and target specific)
//if (devices[0].getInfo<CL_DEVICE_NAME>() == "GeForce GTX 680")
// defs["USE_FLOAT4_IN_CORRELATOR"] = "";
// TODO: kernel-specific defs should be specified in the XXXKernel class
defs["COMPLEX"] = "2";
defs["NR_BITS_PER_SAMPLE"] = str(format("%u") % ps.nrBitsPerSample());
defs["SUBBAND_BANDWIDTH"] = str(format("%.7ff") % ps.subbandBandwidth()); // returns double, so rounding issue?
defs["NR_SUBBANDS"] = str(format("%u") % ps.nrSubbands()); // size_t, but %zu not supp
defs["NR_CHANNELS"] = str(format("%u") % ps.nrChannelsPerSubband());
defs["NR_STATIONS"] = str(format("%u") % ps.nrStations());
defs["NR_SAMPLES_PER_CHANNEL"] = str(format("%u") % ps.nrSamplesPerChannel());
defs["NR_SAMPLES_PER_SUBBAND"] = str(format("%u") % ps.nrSamplesPerSubband());
defs["NR_BEAMS"] = str(format("%u") % ps.nrBeams());
defs["NR_TABS"] = str(format("%u") % ps.nrTABs(0)); // TODO: 0 should be dep on #beams
defs["NR_COHERENT_STOKES"] = str(format("%u") % ps.nrCoherentStokes()); // size_t
defs["NR_INCOHERENT_STOKES"] = str(format("%u") % ps.nrIncoherentStokes()); // size_t
defs["COHERENT_STOKES_TIME_INTEGRATION_FACTOR"] = str(format("%u") % ps.coherentStokesTimeIntegrationFactor());
defs["INCOHERENT_STOKES_TIME_INTEGRATION_FACTOR"] = str(format("%u") % ps.incoherentStokesTimeIntegrationFactor());
defs["NR_POLARIZATIONS"] = str(format("%u") % NR_POLARIZATIONS);
defs["NR_TAPS"] = str(format("%u") % NR_TAPS);
defs["NR_STATION_FILTER_TAPS"] = str(format("%u") % NR_STATION_FILTER_TAPS);
if (ps.delayCompensation())
defs["DELAY_COMPENSATION"] = "";
if (ps.correctBandPass())
defs["BANDPASS_CORRECTION"] = "";
defs["DEDISPERSION_FFT_SIZE"] = str(format("%u") % ps.dedispersionFFTsize()); // size_t
return defs;
}
}
}
......@@ -40,6 +40,12 @@ namespace LOFAR
void enqueue(gpu::Stream &queue/*, PerformanceCounter &counter*/);
// Map used for storing compile defintions as key/value pairs.
typedef std::map<std::string, std::string> definitions_type;
// Return required compile definitions given the Parset \a ps.
static definitions_type compileDefinitions(const Parset& ps);
protected:
gpu::Event event;
const Parset &ps;
......
......@@ -26,6 +26,7 @@
#include <Common/lofar_iomanip.h>
#include <GPUProc/gpu_utils.h>
#include <GPUProc/Kernels/Kernel.h>
namespace LOFAR
{
......@@ -46,7 +47,7 @@ namespace LOFAR
std::string Pipeline::createPTX(const string &srcFilename)
{
flags_type flags(defaultFlags());
definitions_type definitions(defaultDefinitions(ps));
Kernel::definitions_type definitions(Kernel::compileDefinitions(ps));
return LOFAR::Cobalt::createPTX(devices, srcFilename, flags, definitions);
}
......
......@@ -31,6 +31,7 @@
#include <GPUProc/gpu_wrapper.h>
#include <GPUProc/gpu_utils.h>
#include <GPUProc/Kernels/Kernel.h>
#include <GPUProc/cuda/CudaRuntimeCompiler.h>
#include "TestUtil.h"
......@@ -59,7 +60,7 @@ HostMemory runTest(gpu::Context ctx,
cout << "\n==== runTest: function = " << function << " ====\n" << endl;
// Get an instantiation of the default parameters
definitions_type definitions = defaultDefinitions();
definitions_type definitions = Kernel::definitions_type();
flags_type flags = defaultFlags();
// ****************************************
......
......@@ -25,6 +25,7 @@
#include <Common/LofarLogger.h>
#include <CoInterface/Parset.h>
#include <GPUProc/gpu_utils.h>
#include <GPUProc/Kernels/Kernel.h>
#include <GPUProc/WorkQueues/CorrelatorWorkQueue.h>
using namespace std;
......@@ -61,7 +62,7 @@ int main() {
map<string, string> ptx;
flags_type flags(defaultFlags());
definitions_type definitions(defaultDefinitions(ps));
Kernel::definitions_type definitions(Kernel::compileDefinitions(ps));
ptx[kfilenameFIR] = createPTX(devices, kfilenameFIR, flags, definitions);
ptx[kfilenameDBP] = createPTX(devices, kfilenameDBP, flags, definitions);
ptx[kfilenameCor] = createPTX(devices, kfilenameCor, flags, definitions);
......
......@@ -21,6 +21,7 @@
#include <lofar_config.h>
#include <string>
#include <GPUProc/Kernels/Kernel.h>
#include <GPUProc/cuda/CudaRuntimeCompiler.h>
#include <Common/LofarLogger.h>
......@@ -35,7 +36,7 @@ int main()
string kernelPath = "tCudaRuntimeCompiler.in_.cu";
// Get an instantiation of the default parameters
definitions_type definitions = defaultDefinitions();
definitions_type definitions = Kernel::definitions_type();
// override the default with a magic number
definitions["NVIDIA_CUDA"] = "123456";
......
......@@ -32,6 +32,7 @@
#include <GPUProc/gpu_wrapper.h>
#include <GPUProc/gpu_utils.h>
#include <GPUProc/Kernels/Kernel.h>
#include <GPUProc/cuda/CudaRuntimeCompiler.h>
#include <UnitTest++.h>
......@@ -67,7 +68,7 @@ float * runTest(float bandPassFactor,
string kernelPath = "DelayAndBandPass.cu"; //The test copies the kernel to the current dir (also the complex header, needed for compilation)
// Get an instantiation of the default parameters
definitions_type definitions = defaultDefinitions();
definitions_type definitions = Kernel::definitions_type();
flags_type flags = defaultFlags();
// ****************************************
......
......@@ -52,7 +52,7 @@ int main() {
// Get default parameters for the compiler
flags_type flags = defaultFlags();
definitions_type definitions = defaultDefinitions(ps);
Kernel::definitions_type definitions(Kernel::compileDefinitions(ps));
string ptx = createPTX(devices, srcFilename, flags, definitions);
gpu::Module module(createModule(ctx, srcFilename, ptx));
......
......@@ -32,6 +32,7 @@
#include <GPUProc/gpu_wrapper.h>
#include <GPUProc/gpu_utils.h>
#include <GPUProc/Kernels/Kernel.h>
#include <GPUProc/cuda/CudaRuntimeCompiler.h>
#include <UnitTest++.h>
......@@ -65,7 +66,7 @@ float * runTest(unsigned NR_BITS_PER_SAMPLE = 16,
// ****************************************
// Compile to ptx
// Get an instantiation of the default parameters
definitions_type definitions = defaultDefinitions();
definitions_type definitions = Kernel::definitions_type();
flags_type flags = defaultFlags();
// Set op string string pairs to be provided to the compiler as defines
......
......@@ -53,7 +53,7 @@ int main() {
// Get default parameters for the compiler
flags_type flags = defaultFlags();
definitions_type definitions = defaultDefinitions(ps);
Kernel::definitions_type definitions(Kernel::compileDefinitions(ps));
string ptx = createPTX(devices, srcFilename, flags, definitions);
gpu::Module module(createModule(ctx, srcFilename, ptx));
......
......@@ -54,7 +54,7 @@ int main() {
// Get default parameters for the compiler
flags_type flags = defaultFlags();
definitions_type definitions = defaultDefinitions(ps);
Kernel::definitions_type definitions(Kernel::compileDefinitions(ps));
string ptx = createPTX(devices, srcFilename, flags, definitions);
gpu::Module module(createModule(ctx, srcFilename, ptx));
......
......@@ -27,6 +27,7 @@
#include <Common/LofarLogger.h>
#include <CoInterface/Parset.h>
#include <GPUProc/gpu_utils.h>
#include <GPUProc/Kernels/Kernel.h>
#include <GPUProc/cuda/CudaRuntimeCompiler.h>
#include <GPUProc/global_defines.h>
......@@ -59,7 +60,7 @@ int main(int argc, char *argv[]) {
// Collect inputs from the parste and assign them to CudaRuntimeCompiler
// input_types.
flags_type flags = defaultFlags();
definitions_type definitions = defaultDefinitions(ps);
Kernel::definitions_type definitions(Kernel::compileDefinitions(ps));
string ptx = createPTX(devices, srcFilename, flags, definitions);
gpu::Module module(createModule(ctx, srcFilename, ptx));
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment