Skip to content
Snippets Groups Projects
Commit 85a0bb73 authored by Marcel Loose's avatar Marcel Loose :sunglasses:
Browse files

Task #4589: Reformatted some code to width of 80 columns.

parent 9ae4c978
Branches
Tags
No related merge requests found
......@@ -58,7 +58,8 @@ namespace LOFAR
const Parameters &params)
:
gpu::Function(function),
maxThreadsPerBlock(stream.getContext().getDevice().getMaxThreadsPerBlock()),
maxThreadsPerBlock(
stream.getContext().getDevice().getMaxThreadsPerBlock()),
itsStream(stream),
itsBuffers(buffers),
itsParameters(params)
......@@ -71,27 +72,37 @@ namespace LOFAR
function.getAttribute(CU_FUNC_ATTRIBUTE_NUM_REGS));
}
void Kernel::setEnqueueWorkSizes(gpu::Grid globalWorkSize, gpu::Block localWorkSize)
void Kernel::setEnqueueWorkSizes(gpu::Grid globalWorkSize,
gpu::Block localWorkSize)
{
gpu::Grid grid;
ostringstream errMsgs;
// Enforce by the hardware supported work sizes to see errors clearly and early.
// Enforce by the hardware supported work sizes to see errors clearly and
// early.
gpu::Block maxLocalWorkSize = itsStream.getContext().getDevice().getMaxBlockDims();
gpu::Block maxLocalWorkSize =
itsStream.getContext().getDevice().getMaxBlockDims();
if (localWorkSize.x > maxLocalWorkSize.x ||
localWorkSize.y > maxLocalWorkSize.y ||
localWorkSize.z > maxLocalWorkSize.z)
errMsgs << " - localWorkSize must be at most " << maxLocalWorkSize << endl;
if (localWorkSize.x * localWorkSize.y * localWorkSize.z > maxThreadsPerBlock)
errMsgs << " - localWorkSize total must be at most " << maxThreadsPerBlock << " threads/block" << endl;
// globalWorkSize may (in theory) be all zero (no work). Reject such localWorkSize.
if (localWorkSize.x == 0 || localWorkSize.y == 0 || localWorkSize.z == 0) {
errMsgs << " - localWorkSize must be at most " << maxLocalWorkSize
<< endl;
if (localWorkSize.x * localWorkSize.y * localWorkSize.z >
maxThreadsPerBlock)
errMsgs << " - localWorkSize total must be at most "
<< maxThreadsPerBlock << " threads/block" << endl;
// globalWorkSize may (in theory) be all zero (no work). Reject such
// localWorkSize.
if (localWorkSize.x == 0 ||
localWorkSize.y == 0 ||
localWorkSize.z == 0) {
errMsgs << " - localWorkSize components must be non-zero" << endl;
} else {
// TODO: to globalWorkSize in terms of localWorkSize (CUDA) ('gridWorkSize').
// TODO: to globalWorkSize in terms of localWorkSize (CUDA)
// ('gridWorkSize').
if (globalWorkSize.x % localWorkSize.x != 0 ||
globalWorkSize.y % localWorkSize.y != 0 ||
globalWorkSize.z % localWorkSize.z != 0)
......@@ -100,19 +111,21 @@ namespace LOFAR
globalWorkSize.y / localWorkSize.y,
globalWorkSize.z / localWorkSize.z);
gpu::Grid maxGridWorkSize = itsStream.getContext().getDevice().getMaxGridDims();
gpu::Grid maxGridWorkSize =
itsStream.getContext().getDevice().getMaxGridDims();
if (grid.x > maxGridWorkSize.x ||
grid.y > maxGridWorkSize.y ||
grid.z > maxGridWorkSize.z)
errMsgs << " - globalWorkSize / localWorkSize must be at most " << maxGridWorkSize << endl;
errMsgs << " - globalWorkSize / localWorkSize must be at most "
<< maxGridWorkSize << endl;
}
string errStr(errMsgs.str());
if (!errStr.empty())
THROW(gpu::GPUException, "setEnqueueWorkSizes(): unsupported globalWorkSize " <<
globalWorkSize << " and/or localWorkSize " << localWorkSize << " selected:" <<
endl << errStr);
THROW(gpu::GPUException,
"setEnqueueWorkSizes(): unsupported globalWorkSize " <<
globalWorkSize << " and/or localWorkSize " << localWorkSize <<
" selected:" << endl << errStr);
LOG_DEBUG_STR("CUDA Grid size: " << grid);
LOG_DEBUG_STR("CUDA Block size: " << localWorkSize);
......
......@@ -41,7 +41,9 @@ namespace LOFAR
{
public:
// Parameters that must be passed to the constructor of this Kernel class.
// TODO: more at constructor passed immediates can be turned into defines (blockDim/gridDim too if enforced fixed (consider conditional define) or drop opt)
// TODO: more at constructor passed immediates can be turned into defines
// (blockDim/gridDim too if enforced fixed (consider conditional define)
// or drop opt)
struct Parameters
{
Parameters(const Parset& ps);
......@@ -85,7 +87,8 @@ namespace LOFAR
// Explicit destructor, because the implicitly generated one is public.
~Kernel();
void setEnqueueWorkSizes(gpu::Grid globalWorkSize, gpu::Block localWorkSize);
void setEnqueueWorkSizes(gpu::Grid globalWorkSize,
gpu::Block localWorkSize);
const unsigned maxThreadsPerBlock;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment