diff --git a/libtcc/Correlator.cc b/libtcc/Correlator.cc index 2ea9ce3e976a43c8699300a07f92da3a27457216..b95fe813132dc0c55ddf3e09e7888eb1da0df1f6 100644 --- a/libtcc/Correlator.cc +++ b/libtcc/Correlator.cc @@ -32,7 +32,8 @@ std::string Correlator::findNVRTCincludePath() const } -Correlator::Correlator(unsigned nrBits, +Correlator::Correlator(const cu::Device &device, + unsigned nrBits, unsigned nrReceivers, unsigned nrChannels, unsigned nrSamplesPerChannel, @@ -41,8 +42,7 @@ Correlator::Correlator(unsigned nrBits, const std::string &customStoreVisibility ) : - capability([] { - cu::Device device(0); + capability([&] { return 10 * device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>() + device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>(); } ()), nrReceiversPerBlock(nrReceiversPerBlock != 0 ? nrReceiversPerBlock : defaultNrReceiversPerBlock(nrReceivers)), diff --git a/libtcc/Correlator.h b/libtcc/Correlator.h index bfbde78cd785d03262c640768d07603c4c0e432d..b618baf73ec7ff3e2f0a8dc381b3177d57ccea65 100644 --- a/libtcc/Correlator.h +++ b/libtcc/Correlator.h @@ -11,7 +11,8 @@ namespace tcc { class Correlator { public: - Correlator(unsigned nrBits, + Correlator(const cu::Device &, + unsigned nrBits, unsigned nrReceivers, unsigned nrChannels, unsigned nrSamplesPerChannel, diff --git a/test/Benchmark/Benchmark.cc b/test/Benchmark/Benchmark.cc index b122667a096d6c95067f3f7d55595d3324900fdd..eac765062df8edb3d9441a81258bca19752f9546 100644 --- a/test/Benchmark/Benchmark.cc +++ b/test/Benchmark/Benchmark.cc @@ -59,7 +59,7 @@ template <typename SampleType, typename VisibilityType> void Benchmark::doTest(u unsigned nrTimesPerBlock = 128 / nrBits; unsigned nrBaselines = nrReceivers * (nrReceivers + 1) / 2; - tcc::Correlator correlator(nrBits, nrReceivers, nrChannels, nrSamplesPerChannel, nrPolarizations, nrReceiversPerBlock); + tcc::Correlator correlator(device, nrBits, nrReceivers, nrChannels, nrSamplesPerChannel, nrPolarizations, nrReceiversPerBlock); unsigned repeatCount; multi_array::extent<5> samplesExtent(multi_array::extents[nrChannels][nrSamplesPerChannel / nrTimesPerBlock][nrReceivers][nrPolarizations][nrTimesPerBlock]); diff --git a/test/CorrelatorTest/CorrelatorTest.cc b/test/CorrelatorTest/CorrelatorTest.cc index 1baa8fb66696c84f8db90babb97b683c5f6655cf..09c52dcac3b6fbbef6fd18801ed79e5586311358 100644 --- a/test/CorrelatorTest/CorrelatorTest.cc +++ b/test/CorrelatorTest/CorrelatorTest.cc @@ -20,7 +20,7 @@ CorrelatorTest::CorrelatorTest(const Options &options) UnitTest(options.deviceNumber), options(options), hasIntegratedMemory(device.getAttribute(CU_DEVICE_ATTRIBUTE_INTEGRATED)), - correlator(options.nrBits, options.nrReceivers, options.nrChannels, options.nrSamplesPerChannel, options.nrPolarizations, options.nrReceiversPerBlock) + correlator(device, options.nrBits, options.nrReceivers, options.nrChannels, options.nrSamplesPerChannel, options.nrPolarizations, options.nrReceiversPerBlock) { #if defined MEASURE_POWER Record start(*powerSensor), stop(*powerSensor); diff --git a/test/SimpleExample/SimpleExample.cu b/test/SimpleExample/SimpleExample.cu index 25846a09bc822e7eefc254815bf0083a35596cac..6ad4ed52b7d9016677c9c42b6a9414b60d4a65ac 100644 --- a/test/SimpleExample/SimpleExample.cu +++ b/test/SimpleExample/SimpleExample.cu @@ -51,7 +51,7 @@ int main() checkCudaCall(cudaSetDevice(0)); // combine the CUDA runtime API and CUDA driver API checkCudaCall(cudaFree(0)); - tcc::Correlator correlator(NR_BITS, NR_RECEIVERS, NR_CHANNELS, NR_SAMPLES_PER_CHANNEL, NR_POLARIZATIONS, NR_RECEIVERS_PER_BLOCK); + tcc::Correlator correlator(cu::Device(0), NR_BITS, NR_RECEIVERS, NR_CHANNELS, NR_SAMPLES_PER_CHANNEL, NR_POLARIZATIONS, NR_RECEIVERS_PER_BLOCK); cudaStream_t stream; Samples *samples;