diff --git a/RTCP/Cobalt/GPUProc/src/CMakeLists.txt b/RTCP/Cobalt/GPUProc/src/CMakeLists.txt index 8bd1194b58b177ef45e8127e37a127dd075288d1..36a70dd791a5b48cb5ab48c44d161b56bfb6c876 100644 --- a/RTCP/Cobalt/GPUProc/src/CMakeLists.txt +++ b/RTCP/Cobalt/GPUProc/src/CMakeLists.txt @@ -13,7 +13,7 @@ set(_gpuproc_sources BlockID.cc FilterBank.cc global_defines.cc - Kernels/CompileDefinitions.cc + KernelCompiler.cc Storage/SSH.cc Storage/StorageProcess.cc Storage/StorageProcesses.cc @@ -24,6 +24,7 @@ if(USE_CUDA) cuda/gpu_wrapper.cc cuda/gpu_utils.cc cuda/CudaRuntimeCompiler.cc + cuda/KernelCompiler.cc cuda/PerformanceCounter.cc cuda/Kernels/Kernel.cc cuda/Kernels/BeamFormerKernel.cc diff --git a/RTCP/Cobalt/GPUProc/src/Kernels/Kernel.h b/RTCP/Cobalt/GPUProc/src/Kernels/Kernel.h index f4cd48822e86c8a5aef2177614fe2408ba8f60a7..1ba3d8edd7405dc20829ed541d638f5cb1702ff2 100644 --- a/RTCP/Cobalt/GPUProc/src/Kernels/Kernel.h +++ b/RTCP/Cobalt/GPUProc/src/Kernels/Kernel.h @@ -28,7 +28,7 @@ #include <CoInterface/Parset.h> #include <GPUProc/gpu_wrapper.h> -#include <GPUProc/Kernels/CompileDefinitions.h> +#include <GPUProc/KernelCompiler.h> //#include <GPUProc/PerformanceCounter.h> namespace LOFAR @@ -43,7 +43,7 @@ namespace LOFAR void enqueue(gpu::Stream &queue/*, PerformanceCounter &counter*/); // Return required compile definitions given the Parset \a ps. - static CompileDefinitions compileDefinitions(const Parset& ps); + static const CompileDefinitions& compileDefinitions(const Parset& ps); protected: gpu::Event event; diff --git a/RTCP/Cobalt/GPUProc/src/cuda/CudaRuntimeCompiler.cc b/RTCP/Cobalt/GPUProc/src/cuda/CudaRuntimeCompiler.cc index eb534673c1e3ba24714bd441625354c1e1709ca2..d07c01283168e8158f8df10a1c49725e06997f87 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/CudaRuntimeCompiler.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/CudaRuntimeCompiler.cc @@ -93,7 +93,7 @@ namespace LOFAR // of definitions. Use this command to call nvcc and compile the file at input path to a ptx file // which content is returned as a string std::string compileToPtx(const std::string& pathToCuFile, - const flags_type& flags, + const CompileFlags& flags, const CompileDefinitions& definitions) { const string cudaCompiler = "nvcc"; @@ -102,10 +102,7 @@ namespace LOFAR cmd << " " << pathToCuFile ; cmd << " --ptx"; cmd << definitions; - - // add the set of flags - for (flags_type::const_iterator it=flags.begin(); it!=flags.end(); ++it) - cmd << " --" << *it; // flags should be prepended with a space and a minus + cmd << flags; // output to stdout cmd << " -o -"; diff --git a/RTCP/Cobalt/GPUProc/src/cuda/CudaRuntimeCompiler.h b/RTCP/Cobalt/GPUProc/src/cuda/CudaRuntimeCompiler.h index 41741f617e23e58293c6462c12a485377dd84c6f..e673eef4d43c28e2660761c815ffc83ec1ec9f5a 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/CudaRuntimeCompiler.h +++ b/RTCP/Cobalt/GPUProc/src/cuda/CudaRuntimeCompiler.h @@ -30,7 +30,7 @@ #include <set> #include <CoInterface/Parset.h> -#include <GPUProc/Kernels/CompileDefinitions.h> +#include <GPUProc/KernelCompiler.h> #include <GPUProc/gpu_utils.h> #include "gpu_incl.h" @@ -41,29 +41,18 @@ namespace LOFAR namespace Cobalt { - // // flags - // typedef std::set<std::string> flags_type; - - // Return the set of default flags for the nvcc compilation of a cuda kernel in Cobalt - flags_type defaultFlags(); - - // // Return the set of default definitions for the nvcc compilation of a cuda kernel in Cobalt - // // DEFINITION=0 results in a definition and cannot for used for undef. Do not insert parameters in this case - // definitions_type defaultDefinitions(); - // Performs a 'system' call of nvcc. Return the stdout of the command // on error no stdout is created and an exception is thrown std::string runNVCC(const std::string &cmd); - // Create a nvcc command line string based on the input path, a set of flags and a map - // of definitions. Use this command to call nvcc and compile the file at input path to a ptx file - // which content is returned as a string + // Create a nvcc command line string based on the input path, a set of flags + // and a map of definitions. Use this command to call nvcc and compile the + // file at input path to a ptx file which content is returned as a string std::string compileToPtx(const std::string& pathToCuFile, - const flags_type& flags = defaultFlags(), + const CompileFlags& flags = CompileFlags(), const CompileDefinitions& definitions = CompileDefinitions()); - // overloaded function. Use the path and default flags and definitions to call nvcc } } diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/Kernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/Kernel.cc index 74404c1f9f9eb25ec68d29e16078da3ef442df9f..a8f2da23826f116f039a6e29ac2835897d9d3974 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/Kernel.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/Kernel.cc @@ -59,42 +59,43 @@ namespace LOFAR // counter.doOperation(event, nrOperations, nrBytesRead, nrBytesWritten); } - CompileDefinitions Kernel::compileDefinitions(const Parset& ps) + const CompileDefinitions& Kernel::compileDefinitions(const Parset& ps) { - CompileDefinitions defs; + static CompileDefinitions defs; using boost::format; - defs["NVIDIA_CUDA"] = ""; // left-over from OpenCL for Correlator.cl/.cu - - // TODO: support device specific defs somehow (createPTX() knows about targets, but may be kernel and target specific) - //if (devices[0].getInfo<CL_DEVICE_NAME>() == "GeForce GTX 680") - // defs["USE_FLOAT4_IN_CORRELATOR"] = ""; - - // TODO: kernel-specific defs should be specified in the XXXKernel class - defs["COMPLEX"] = "2"; - - defs["NR_BITS_PER_SAMPLE"] = str(format("%u") % ps.nrBitsPerSample()); - defs["SUBBAND_BANDWIDTH"] = str(format("%.7ff") % ps.subbandBandwidth()); // returns double, so rounding issue? - defs["NR_SUBBANDS"] = str(format("%u") % ps.nrSubbands()); // size_t, but %zu not supp - defs["NR_CHANNELS"] = str(format("%u") % ps.nrChannelsPerSubband()); - defs["NR_STATIONS"] = str(format("%u") % ps.nrStations()); - defs["NR_SAMPLES_PER_CHANNEL"] = str(format("%u") % ps.nrSamplesPerChannel()); - defs["NR_SAMPLES_PER_SUBBAND"] = str(format("%u") % ps.nrSamplesPerSubband()); - defs["NR_BEAMS"] = str(format("%u") % ps.nrBeams()); - defs["NR_TABS"] = str(format("%u") % ps.nrTABs(0)); // TODO: 0 should be dep on #beams - defs["NR_COHERENT_STOKES"] = str(format("%u") % ps.nrCoherentStokes()); // size_t - defs["NR_INCOHERENT_STOKES"] = str(format("%u") % ps.nrIncoherentStokes()); // size_t - defs["COHERENT_STOKES_TIME_INTEGRATION_FACTOR"] = str(format("%u") % ps.coherentStokesTimeIntegrationFactor()); - defs["INCOHERENT_STOKES_TIME_INTEGRATION_FACTOR"] = str(format("%u") % ps.incoherentStokesTimeIntegrationFactor()); - defs["NR_POLARIZATIONS"] = str(format("%u") % NR_POLARIZATIONS); - defs["NR_TAPS"] = str(format("%u") % NR_TAPS); - defs["NR_STATION_FILTER_TAPS"] = str(format("%u") % NR_STATION_FILTER_TAPS); - if (ps.delayCompensation()) - defs["DELAY_COMPENSATION"] = ""; - if (ps.correctBandPass()) - defs["BANDPASS_CORRECTION"] = ""; - defs["DEDISPERSION_FFT_SIZE"] = str(format("%u") % ps.dedispersionFFTsize()); // size_t + if (defs.empty()) { + defs["NVIDIA_CUDA"] = ""; // left-over from OpenCL for Correlator.cl/.cu + // TODO: support device specific defs somehow (createPTX() knows about targets, but may be kernel and target specific) + //if (devices[0].getInfo<CL_DEVICE_NAME>() == "GeForce GTX 680") + // defs["USE_FLOAT4_IN_CORRELATOR"] = ""; + + // TODO: kernel-specific defs should be specified in the XXXKernel class + defs["COMPLEX"] = "2"; + + defs["NR_BITS_PER_SAMPLE"] = str(format("%u") % ps.nrBitsPerSample()); + defs["SUBBAND_BANDWIDTH"] = str(format("%.7ff") % ps.subbandBandwidth()); // returns double, so rounding issue? + defs["NR_SUBBANDS"] = str(format("%u") % ps.nrSubbands()); // size_t, but %zu not supp + defs["NR_CHANNELS"] = str(format("%u") % ps.nrChannelsPerSubband()); + defs["NR_STATIONS"] = str(format("%u") % ps.nrStations()); + defs["NR_SAMPLES_PER_CHANNEL"] = str(format("%u") % ps.nrSamplesPerChannel()); + defs["NR_SAMPLES_PER_SUBBAND"] = str(format("%u") % ps.nrSamplesPerSubband()); + defs["NR_BEAMS"] = str(format("%u") % ps.nrBeams()); + defs["NR_TABS"] = str(format("%u") % ps.nrTABs(0)); // TODO: 0 should be dep on #beams + defs["NR_COHERENT_STOKES"] = str(format("%u") % ps.nrCoherentStokes()); // size_t + defs["NR_INCOHERENT_STOKES"] = str(format("%u") % ps.nrIncoherentStokes()); // size_t + defs["COHERENT_STOKES_TIME_INTEGRATION_FACTOR"] = str(format("%u") % ps.coherentStokesTimeIntegrationFactor()); + defs["INCOHERENT_STOKES_TIME_INTEGRATION_FACTOR"] = str(format("%u") % ps.incoherentStokesTimeIntegrationFactor()); + defs["NR_POLARIZATIONS"] = str(format("%u") % NR_POLARIZATIONS); + defs["NR_TAPS"] = str(format("%u") % NR_TAPS); + defs["NR_STATION_FILTER_TAPS"] = str(format("%u") % NR_STATION_FILTER_TAPS); + if (ps.delayCompensation()) + defs["DELAY_COMPENSATION"] = ""; + if (ps.correctBandPass()) + defs["BANDPASS_CORRECTION"] = ""; + defs["DEDISPERSION_FFT_SIZE"] = str(format("%u") % ps.dedispersionFFTsize()); // size_t + } return defs; } diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/Pipeline.cc b/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/Pipeline.cc index 3e5f4207b0c5dee89b377e3855147130bfb8e1dd..8401f81b02e556cff484f0e3d20c78dbaba8e8c3 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/Pipeline.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/Pipeline.cc @@ -46,7 +46,7 @@ namespace LOFAR std::string Pipeline::createPTX(const string &srcFilename) { - flags_type flags(defaultFlags()); + CompileFlags flags(defaultCompileFlags()); CompileDefinitions definitions(Kernel::compileDefinitions(ps)); return LOFAR::Cobalt::createPTX(devices, srcFilename, flags, definitions); diff --git a/RTCP/Cobalt/GPUProc/src/cuda/gpu_utils.cc b/RTCP/Cobalt/GPUProc/src/cuda/gpu_utils.cc index 086e4dbee4f63ae8abcc4637c55aed4397f2f925..944ead2b63318ada8ca87aabf620ec7d94c99490 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/gpu_utils.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/gpu_utils.cc @@ -39,7 +39,6 @@ #include <Stream/FileStream.h> #include <GPUProc/global_defines.h> -#include <GPUProc/Kernels/CompileDefinitions.h> #include "CudaRuntimeCompiler.h" #define BUILD_MAX_LOG_SIZE 4095 @@ -51,22 +50,6 @@ namespace LOFAR using namespace std; using boost::format; - flags_type defaultFlags() - { - flags_type flags; - - using boost::format; - - //flags.insert("device-debug"); - - // TODO: If this simplifies trig funcs, verify effect on the - // BPDelayKernel. Ideally, a Kernel specifies when to enable build flags. - flags.insert("use_fast_math"); - - return flags; - }; - - namespace { // Return the highest compute target supported by the given device @@ -214,14 +197,14 @@ namespace LOFAR std::string createPTX(const vector<gpu::Device> &devices, const std::string &srcFilename, - flags_type &flags, + CompileFlags &flags, const CompileDefinitions &definitions ) { // The CUDA code is assumed to be written for the architecture of the // oldest device. #if CUDA_VERSION >= 5000 CUjit_target commonTarget = computeTarget(devices); - flags.insert(str(format("gpu-architecture %s") % get_virtarch(commonTarget))); + flags.add(str(format("--gpu-architecture %s") % get_virtarch(commonTarget))); #endif #if 0 @@ -230,18 +213,18 @@ namespace LOFAR set<CUjit_target> allTargets; for (vector<gpu::Device>::const_iterator i = devices.begin(); i != devices.end(); ++i) { - allTargets.insert(computeTarget(*i)); + allTargets.add(computeTarget(*i)); } for (set<CUjit_target>::const_iterator i = allTargets.begin(); i != allTargets.end(); ++i) { - flags.insert(str(format("gpu-code %s") % get_gpuarch(*i))); + flags.add(str(format("--gpu-code %s") % get_gpuarch(*i))); } #endif // Add $LOFARROOT/include to include path, if $LOFARROOT is set. const char* lofarroot = getenv("LOFARROOT"); if (lofarroot) { - flags.insert(str(format("include-path %s/include") % lofarroot)); + flags.add(str(format("--include-path %s/include") % lofarroot)); } // Prefix the CUDA kernel filename with $LOFARROOT/share/gpu/kernels diff --git a/RTCP/Cobalt/GPUProc/src/gpu_utils.h b/RTCP/Cobalt/GPUProc/src/gpu_utils.h index 45bd1fae4c258a0cb46fc583a2f27fedc70b1225..d7901eba4df93420dcd49f28f2937cfb6e03c79d 100644 --- a/RTCP/Cobalt/GPUProc/src/gpu_utils.h +++ b/RTCP/Cobalt/GPUProc/src/gpu_utils.h @@ -25,6 +25,7 @@ #include <vector> #include <CoInterface/Parset.h> +#include <GPUProc/KernelCompiler.h> #include "gpu_wrapper.h" // #include "CudaRuntimeCompiler.h" @@ -32,14 +33,6 @@ namespace LOFAR { namespace Cobalt { - // flags - typedef std::set<std::string> flags_type; - - // Return default flags to be used for kernel compilation. The - // implementation of this method is CUDA/OpenCL specific. - flags_type defaultFlags(); - - class CompileDefinitions; /* * If no devices are given, the program is compiled for the latest * architecture. @@ -48,7 +41,7 @@ namespace LOFAR */ std::string createPTX( const std::vector<gpu::Device> &devices, const std::string &srcFilename, - flags_type &flags, + CompileFlags &flags, const CompileDefinitions &definitions ); /* * Create a Module from a PTX (string). diff --git a/RTCP/Cobalt/GPUProc/test/cuda/tCorrelator.cc b/RTCP/Cobalt/GPUProc/test/cuda/tCorrelator.cc index 7ada5b32d1880fca82d540f6d73f1494a1673ecf..286fc6ce49c25decf9986859b5036fa73fbe2316 100644 --- a/RTCP/Cobalt/GPUProc/test/cuda/tCorrelator.cc +++ b/RTCP/Cobalt/GPUProc/test/cuda/tCorrelator.cc @@ -31,7 +31,7 @@ #include <GPUProc/gpu_wrapper.h> #include <GPUProc/gpu_utils.h> -#include <GPUProc/Kernels/CompileDefinitions.h> +#include <GPUProc/KernelCompiler.h> #include <GPUProc/cuda/CudaRuntimeCompiler.h> #include "TestUtil.h" @@ -61,7 +61,7 @@ HostMemory runTest(gpu::Context ctx, // Get an instantiation of the default parameters CompileDefinitions definitions = CompileDefinitions(); - flags_type flags = defaultFlags(); + CompileFlags flags = defaultCompileFlags(); // **************************************** // Compile to ptx diff --git a/RTCP/Cobalt/GPUProc/test/cuda/tCorrelatorWorkQueueProcessSb.cc b/RTCP/Cobalt/GPUProc/test/cuda/tCorrelatorWorkQueueProcessSb.cc index a575a464340f842fd68978d5e9230d56eb6cf418..fb8f5328d8b3f802c556ddef3e0896f92d6a124f 100644 --- a/RTCP/Cobalt/GPUProc/test/cuda/tCorrelatorWorkQueueProcessSb.cc +++ b/RTCP/Cobalt/GPUProc/test/cuda/tCorrelatorWorkQueueProcessSb.cc @@ -25,7 +25,7 @@ #include <Common/LofarLogger.h> #include <CoInterface/Parset.h> #include <GPUProc/gpu_utils.h> -#include <GPUProc/Kernels/Kernel.h> +#include <GPUProc/KernelCompiler.h> #include <GPUProc/WorkQueues/CorrelatorWorkQueue.h> using namespace std; @@ -61,7 +61,7 @@ int main() { kernels.push_back(kfilenameCor); map<string, string> ptx; - flags_type flags(defaultFlags()); + CompileFlags flags(defaultCompileFlags()); CompileDefinitions definitions(Kernel::compileDefinitions(ps)); ptx[kfilenameFIR] = createPTX(devices, kfilenameFIR, flags, definitions); ptx[kfilenameDBP] = createPTX(devices, kfilenameDBP, flags, definitions); diff --git a/RTCP/Cobalt/GPUProc/test/cuda/tCudaRuntimeCompiler.cc b/RTCP/Cobalt/GPUProc/test/cuda/tCudaRuntimeCompiler.cc index d041e3a27f9b95b129edad04798d81cbfecc930e..92e04e5296f1fdface1b5d59ad4298e138129e91 100644 --- a/RTCP/Cobalt/GPUProc/test/cuda/tCudaRuntimeCompiler.cc +++ b/RTCP/Cobalt/GPUProc/test/cuda/tCudaRuntimeCompiler.cc @@ -21,6 +21,7 @@ #include <lofar_config.h> #include <string> +#include <GPUProc/KernelCompiler.h> #include <GPUProc/cuda/CudaRuntimeCompiler.h> #include <Common/LofarLogger.h> @@ -40,10 +41,10 @@ int main() // override the default with a magic number definitions["NVIDIA_CUDA"] = "123456"; - string ptx1 = compileToPtx(kernelPath, defaultFlags(), definitions); + string ptx1 = compileToPtx(kernelPath, defaultCompileFlags(), definitions); definitions["NVIDIA_CUDA"] = "654321"; - string ptx2 = compileToPtx(kernelPath, defaultFlags(), definitions); + string ptx2 = compileToPtx(kernelPath, defaultCompileFlags(), definitions); // tests if the magic numbers are inserted into the ptx files if ((std::string::npos != ptx1.find("123456")) || diff --git a/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc b/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc index d7b101cb0f7e1a9f502524e7d66bc65f46dce1eb..1319e625b097a59df4a51311466cc676ebcfb9d2 100644 --- a/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc +++ b/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc @@ -32,6 +32,7 @@ #include <GPUProc/gpu_wrapper.h> #include <GPUProc/gpu_utils.h> +#include <GPUProc/KernelCompiler.h> #include <GPUProc/cuda/CudaRuntimeCompiler.h> #include <UnitTest++.h> @@ -68,7 +69,7 @@ float * runTest(float bandPassFactor, // Get an instantiation of the default parameters CompileDefinitions definitions; - flags_type flags = defaultFlags(); + CompileFlags flags = defaultCompileFlags(); // **************************************** // Compile to ptx diff --git a/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPassKernel.cc b/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPassKernel.cc index f887237a9f53fd34faf7c5df1f935922b2a35e50..3e4dcb386d8da1eaf87ebf33aab3a33d98a79c9d 100644 --- a/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPassKernel.cc +++ b/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPassKernel.cc @@ -25,6 +25,7 @@ #include <GPUProc/gpu_wrapper.h> #include <GPUProc/gpu_utils.h> #include <GPUProc/BandPass.h> +#include <GPUProc/KernelCompiler.h> #include <GPUProc/Kernels/DelayAndBandPassKernel.h> #include <GPUProc/WorkQueues/CorrelatorWorkQueue.h> @@ -51,7 +52,7 @@ int main() { string srcFilename("DelayAndBandPass.cu"); // Get default parameters for the compiler - flags_type flags = defaultFlags(); + CompileFlags flags = defaultCompileFlags(); CompileDefinitions definitions(Kernel::compileDefinitions(ps)); string ptx = createPTX(devices, srcFilename, flags, definitions); diff --git a/RTCP/Cobalt/GPUProc/test/cuda/tIntToFloat.cc b/RTCP/Cobalt/GPUProc/test/cuda/tIntToFloat.cc index 5513c09032bfbd82237bf31fd3c600b081802cf2..0f0a59dd9ce9798e9207bef5d40118e4f5f4913f 100644 --- a/RTCP/Cobalt/GPUProc/test/cuda/tIntToFloat.cc +++ b/RTCP/Cobalt/GPUProc/test/cuda/tIntToFloat.cc @@ -32,6 +32,7 @@ #include <GPUProc/gpu_wrapper.h> #include <GPUProc/gpu_utils.h> +#include <GPUProc/KernelCompiler.h> #include <GPUProc/cuda/CudaRuntimeCompiler.h> #include <UnitTest++.h> @@ -66,7 +67,7 @@ float * runTest(unsigned NR_BITS_PER_SAMPLE = 16, // Compile to ptx // Get an instantiation of the default parameters CompileDefinitions definitions; - flags_type flags = defaultFlags(); + CompileFlags flags = defaultCompileFlags(); // Set op string string pairs to be provided to the compiler as defines definitions["NR_STATIONS"] = "2"; diff --git a/RTCP/Cobalt/GPUProc/test/cuda/tIntToFloatKernel.cc b/RTCP/Cobalt/GPUProc/test/cuda/tIntToFloatKernel.cc index 584de7f4f347d4d7c53ee5a5e4e6ee0343e91258..1ac55859717cff57992295f9257b31364b197546 100644 --- a/RTCP/Cobalt/GPUProc/test/cuda/tIntToFloatKernel.cc +++ b/RTCP/Cobalt/GPUProc/test/cuda/tIntToFloatKernel.cc @@ -25,6 +25,7 @@ #include <GPUProc/gpu_wrapper.h> #include <GPUProc/gpu_utils.h> #include <GPUProc/BandPass.h> +#include <GPUProc/KernelCompiler.h> #include <GPUProc/Kernels/IntToFloatKernel.h> #include <GPUProc/WorkQueues/CorrelatorWorkQueue.h> @@ -52,7 +53,7 @@ int main() { string srcFilename("IntToFloat.cu"); // Get default parameters for the compiler - flags_type flags = defaultFlags(); + CompileFlags flags = defaultCompileFlags(); CompileDefinitions definitions(Kernel::compileDefinitions(ps)); string ptx = createPTX(devices, srcFilename, flags, definitions); diff --git a/RTCP/Cobalt/GPUProc/test/cuda/tKernel.cc b/RTCP/Cobalt/GPUProc/test/cuda/tKernel.cc index 11f19cccfa1d671b56637b6472667fb16bccb572..d86bc73f79fc652475b42e93cdc047941f439e5b 100644 --- a/RTCP/Cobalt/GPUProc/test/cuda/tKernel.cc +++ b/RTCP/Cobalt/GPUProc/test/cuda/tKernel.cc @@ -28,6 +28,7 @@ #include <CoInterface/Parset.h> #include <GPUProc/Kernels/Kernel.h> #include <GPUProc/gpu_utils.h> +#include <GPUProc/KernelCompiler.h> #include <GPUProc/cuda/CudaRuntimeCompiler.h> #include <GPUProc/global_defines.h> #include <Common/LofarLogger.h> @@ -53,7 +54,7 @@ int main() { Parset ps("tKernel.parset.in"); // Get default parameters for the compiler - flags_type flags = defaultFlags(); + CompileFlags flags = defaultCompileFlags(); CompileDefinitions definitions(Kernel::compileDefinitions(ps)); string ptx = createPTX(devices, srcFilename, flags, definitions); diff --git a/RTCP/Cobalt/GPUProc/test/cuda/tcreateProgram.cc b/RTCP/Cobalt/GPUProc/test/cuda/tcreateProgram.cc index e25be71c55e7877dba839bd53e89aee45eaab0cd..8f696325e0374873f703ae6319a0c6b6e03972dd 100644 --- a/RTCP/Cobalt/GPUProc/test/cuda/tcreateProgram.cc +++ b/RTCP/Cobalt/GPUProc/test/cuda/tcreateProgram.cc @@ -28,6 +28,7 @@ #include <CoInterface/Parset.h> #include <GPUProc/gpu_utils.h> #include <GPUProc/Kernels/Kernel.h> +#include <GPUProc/KernelCompiler.h> #include <GPUProc/cuda/CudaRuntimeCompiler.h> #include <GPUProc/global_defines.h> @@ -59,7 +60,7 @@ int main(int argc, char *argv[]) { // Collect inputs from the parste and assign them to CudaRuntimeCompiler // input_types. - flags_type flags = defaultFlags(); + CompileFlags flags = defaultCompileFlags(); CompileDefinitions definitions(Kernel::compileDefinitions(ps)); string ptx = createPTX(devices, srcFilename, flags, definitions);