Skip to content
Snippets Groups Projects
Commit 5d873e92 authored by John Romein's avatar John Romein
Browse files

Merge branch 'multi-gpu-types' into 'master'

Support for multiple GPU types per system. Breaks API!

See merge request !12
parents fcec87e1 9ab18bf0
No related branches found
No related tags found
1 merge request!12Support for multiple GPU types per system. Breaks API!
Pipeline #86573 passed
...@@ -32,7 +32,8 @@ std::string Correlator::findNVRTCincludePath() const ...@@ -32,7 +32,8 @@ std::string Correlator::findNVRTCincludePath() const
} }
Correlator::Correlator(unsigned nrBits, Correlator::Correlator(const cu::Device &device,
unsigned nrBits,
unsigned nrReceivers, unsigned nrReceivers,
unsigned nrChannels, unsigned nrChannels,
unsigned nrSamplesPerChannel, unsigned nrSamplesPerChannel,
...@@ -41,8 +42,7 @@ Correlator::Correlator(unsigned nrBits, ...@@ -41,8 +42,7 @@ Correlator::Correlator(unsigned nrBits,
const std::string &customStoreVisibility const std::string &customStoreVisibility
) )
: :
capability([] { capability([&] {
cu::Device device(0);
return 10 * device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>() + device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>(); return 10 * device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>() + device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>();
} ()), } ()),
nrReceiversPerBlock(nrReceiversPerBlock != 0 ? nrReceiversPerBlock : defaultNrReceiversPerBlock(nrReceivers)), nrReceiversPerBlock(nrReceiversPerBlock != 0 ? nrReceiversPerBlock : defaultNrReceiversPerBlock(nrReceivers)),
......
...@@ -11,7 +11,8 @@ ...@@ -11,7 +11,8 @@
namespace tcc { namespace tcc {
class Correlator { class Correlator {
public: public:
Correlator(unsigned nrBits, Correlator(const cu::Device &,
unsigned nrBits,
unsigned nrReceivers, unsigned nrReceivers,
unsigned nrChannels, unsigned nrChannels,
unsigned nrSamplesPerChannel, unsigned nrSamplesPerChannel,
......
...@@ -59,7 +59,7 @@ template <typename SampleType, typename VisibilityType> void Benchmark::doTest(u ...@@ -59,7 +59,7 @@ template <typename SampleType, typename VisibilityType> void Benchmark::doTest(u
unsigned nrTimesPerBlock = 128 / nrBits; unsigned nrTimesPerBlock = 128 / nrBits;
unsigned nrBaselines = nrReceivers * (nrReceivers + 1) / 2; unsigned nrBaselines = nrReceivers * (nrReceivers + 1) / 2;
tcc::Correlator correlator(nrBits, nrReceivers, nrChannels, nrSamplesPerChannel, nrPolarizations, nrReceiversPerBlock); tcc::Correlator correlator(device, nrBits, nrReceivers, nrChannels, nrSamplesPerChannel, nrPolarizations, nrReceiversPerBlock);
unsigned repeatCount; unsigned repeatCount;
multi_array::extent<5> samplesExtent(multi_array::extents[nrChannels][nrSamplesPerChannel / nrTimesPerBlock][nrReceivers][nrPolarizations][nrTimesPerBlock]); multi_array::extent<5> samplesExtent(multi_array::extents[nrChannels][nrSamplesPerChannel / nrTimesPerBlock][nrReceivers][nrPolarizations][nrTimesPerBlock]);
......
...@@ -20,7 +20,7 @@ CorrelatorTest::CorrelatorTest(const Options &options) ...@@ -20,7 +20,7 @@ CorrelatorTest::CorrelatorTest(const Options &options)
UnitTest(options.deviceNumber), UnitTest(options.deviceNumber),
options(options), options(options),
hasIntegratedMemory(device.getAttribute(CU_DEVICE_ATTRIBUTE_INTEGRATED)), hasIntegratedMemory(device.getAttribute(CU_DEVICE_ATTRIBUTE_INTEGRATED)),
correlator(options.nrBits, options.nrReceivers, options.nrChannels, options.nrSamplesPerChannel, options.nrPolarizations, options.nrReceiversPerBlock) correlator(device, options.nrBits, options.nrReceivers, options.nrChannels, options.nrSamplesPerChannel, options.nrPolarizations, options.nrReceiversPerBlock)
{ {
#if defined MEASURE_POWER #if defined MEASURE_POWER
Record start(*powerSensor), stop(*powerSensor); Record start(*powerSensor), stop(*powerSensor);
......
...@@ -51,7 +51,7 @@ int main() ...@@ -51,7 +51,7 @@ int main()
checkCudaCall(cudaSetDevice(0)); // combine the CUDA runtime API and CUDA driver API checkCudaCall(cudaSetDevice(0)); // combine the CUDA runtime API and CUDA driver API
checkCudaCall(cudaFree(0)); checkCudaCall(cudaFree(0));
tcc::Correlator correlator(NR_BITS, NR_RECEIVERS, NR_CHANNELS, NR_SAMPLES_PER_CHANNEL, NR_POLARIZATIONS, NR_RECEIVERS_PER_BLOCK); tcc::Correlator correlator(cu::Device(0), NR_BITS, NR_RECEIVERS, NR_CHANNELS, NR_SAMPLES_PER_CHANNEL, NR_POLARIZATIONS, NR_RECEIVERS_PER_BLOCK);
cudaStream_t stream; cudaStream_t stream;
Samples *samples; Samples *samples;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment