Skip to content
Snippets Groups Projects
Commit fcec87e1 authored by John Romein's avatar John Romein
Browse files

Merge branch 'default-n' into 'master'

Use a heuristic for the best number of receivers per block.

See merge request !11
parents 11d8a4a5 29828cd8
No related branches found
No related tags found
1 merge request!11Use a heuristic for the best number of receivers per block.
Pipeline #86354 passed
...@@ -41,12 +41,29 @@ Correlator::Correlator(unsigned nrBits, ...@@ -41,12 +41,29 @@ Correlator::Correlator(unsigned nrBits,
const std::string &customStoreVisibility const std::string &customStoreVisibility
) )
: :
correlatorModule(compileModule(nrBits, nrReceivers, nrChannels, nrSamplesPerChannel, nrPolarizations, nrReceiversPerBlock, customStoreVisibility)), capability([] {
correlatorKernel(correlatorModule, nrBits, nrReceivers, nrChannels, nrSamplesPerChannel, nrPolarizations, nrReceiversPerBlock) cu::Device device(0);
return 10 * device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>() + device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>();
} ()),
nrReceiversPerBlock(nrReceiversPerBlock != 0 ? nrReceiversPerBlock : defaultNrReceiversPerBlock(nrReceivers)),
correlatorModule(compileModule(nrBits, nrReceivers, nrChannels, nrSamplesPerChannel, nrPolarizations, this->nrReceiversPerBlock, customStoreVisibility)),
correlatorKernel(correlatorModule, nrBits, nrReceivers, nrChannels, nrSamplesPerChannel, nrPolarizations, this->nrReceiversPerBlock)
{ {
} }
unsigned Correlator::defaultNrReceiversPerBlock(unsigned nrReceivers) const
{
return nrReceivers <= 32 ? 32 :
nrReceivers <= 48 ? 48 :
nrReceivers <= 64 || capability == 750 || capability == 870 ? 64 :
nrReceivers <= 96 ? 32 :
nrReceivers <= 128 ? 64 :
nrReceivers <= 144 && capability == 900 ? 48 :
nrReceivers <= 160 && capability == 900 ? 32 : 64;
}
cu::Module Correlator::compileModule(unsigned nrBits, cu::Module Correlator::compileModule(unsigned nrBits,
unsigned nrReceivers, unsigned nrReceivers,
unsigned nrChannels, unsigned nrChannels,
...@@ -56,9 +73,6 @@ cu::Module Correlator::compileModule(unsigned nrBits, ...@@ -56,9 +73,6 @@ cu::Module Correlator::compileModule(unsigned nrBits,
const std::string &customStoreVisibility const std::string &customStoreVisibility
) )
{ {
cu::Device device(0);
int capability = 10 * device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>() + device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>();
std::vector<std::string> options = std::vector<std::string> options =
{ {
"-I" + findNVRTCincludePath(), "-I" + findNVRTCincludePath(),
......
...@@ -16,7 +16,7 @@ namespace tcc { ...@@ -16,7 +16,7 @@ namespace tcc {
unsigned nrChannels, unsigned nrChannels,
unsigned nrSamplesPerChannel, unsigned nrSamplesPerChannel,
unsigned nrPolarizations = 2, unsigned nrPolarizations = 2,
unsigned nrReceiversPerBlock = 64, unsigned nrReceiversPerBlock = 0, // 0: use a heuristic value that should work well
const std::string &customStoreVisibility = "" const std::string &customStoreVisibility = ""
); // throw (cu::Error, nvrtc::Error) ); // throw (cu::Error, nvrtc::Error)
...@@ -27,6 +27,7 @@ namespace tcc { ...@@ -27,6 +27,7 @@ namespace tcc {
private: private:
std::string findNVRTCincludePath() const; std::string findNVRTCincludePath() const;
unsigned defaultNrReceiversPerBlock(unsigned nrReceivers) const;
cu::Module compileModule(unsigned nrBits, cu::Module compileModule(unsigned nrBits,
unsigned nrReceivers, unsigned nrReceivers,
unsigned nrChannels, unsigned nrChannels,
...@@ -36,6 +37,8 @@ namespace tcc { ...@@ -36,6 +37,8 @@ namespace tcc {
const std::string &customStoreVisibility const std::string &customStoreVisibility
); );
unsigned capability;
unsigned nrReceiversPerBlock;
cu::Module correlatorModule; cu::Module correlatorModule;
CorrelatorKernel correlatorKernel; CorrelatorKernel correlatorKernel;
}; };
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment