Skip to content
Snippets Groups Projects
Commit 85a0bb73 authored by Marcel Loose's avatar Marcel Loose :sunglasses:
Browse files

Task #4589: Reformatted some code to width of 80 columns.

parent 9ae4c978
Branches
Tags
No related merge requests found
...@@ -58,7 +58,8 @@ namespace LOFAR ...@@ -58,7 +58,8 @@ namespace LOFAR
const Parameters &params) const Parameters &params)
: :
gpu::Function(function), gpu::Function(function),
maxThreadsPerBlock(stream.getContext().getDevice().getMaxThreadsPerBlock()), maxThreadsPerBlock(
stream.getContext().getDevice().getMaxThreadsPerBlock()),
itsStream(stream), itsStream(stream),
itsBuffers(buffers), itsBuffers(buffers),
itsParameters(params) itsParameters(params)
...@@ -71,27 +72,37 @@ namespace LOFAR ...@@ -71,27 +72,37 @@ namespace LOFAR
function.getAttribute(CU_FUNC_ATTRIBUTE_NUM_REGS)); function.getAttribute(CU_FUNC_ATTRIBUTE_NUM_REGS));
} }
void Kernel::setEnqueueWorkSizes(gpu::Grid globalWorkSize, gpu::Block localWorkSize) void Kernel::setEnqueueWorkSizes(gpu::Grid globalWorkSize,
gpu::Block localWorkSize)
{ {
gpu::Grid grid; gpu::Grid grid;
ostringstream errMsgs; ostringstream errMsgs;
// Enforce by the hardware supported work sizes to see errors clearly and early. // Enforce by the hardware supported work sizes to see errors clearly and
// early.
gpu::Block maxLocalWorkSize = itsStream.getContext().getDevice().getMaxBlockDims(); gpu::Block maxLocalWorkSize =
itsStream.getContext().getDevice().getMaxBlockDims();
if (localWorkSize.x > maxLocalWorkSize.x || if (localWorkSize.x > maxLocalWorkSize.x ||
localWorkSize.y > maxLocalWorkSize.y || localWorkSize.y > maxLocalWorkSize.y ||
localWorkSize.z > maxLocalWorkSize.z) localWorkSize.z > maxLocalWorkSize.z)
errMsgs << " - localWorkSize must be at most " << maxLocalWorkSize << endl; errMsgs << " - localWorkSize must be at most " << maxLocalWorkSize
<< endl;
if (localWorkSize.x * localWorkSize.y * localWorkSize.z > maxThreadsPerBlock)
errMsgs << " - localWorkSize total must be at most " << maxThreadsPerBlock << " threads/block" << endl; if (localWorkSize.x * localWorkSize.y * localWorkSize.z >
maxThreadsPerBlock)
// globalWorkSize may (in theory) be all zero (no work). Reject such localWorkSize. errMsgs << " - localWorkSize total must be at most "
if (localWorkSize.x == 0 || localWorkSize.y == 0 || localWorkSize.z == 0) { << maxThreadsPerBlock << " threads/block" << endl;
// globalWorkSize may (in theory) be all zero (no work). Reject such
// localWorkSize.
if (localWorkSize.x == 0 ||
localWorkSize.y == 0 ||
localWorkSize.z == 0) {
errMsgs << " - localWorkSize components must be non-zero" << endl; errMsgs << " - localWorkSize components must be non-zero" << endl;
} else { } else {
// TODO: to globalWorkSize in terms of localWorkSize (CUDA) ('gridWorkSize'). // TODO: to globalWorkSize in terms of localWorkSize (CUDA)
// ('gridWorkSize').
if (globalWorkSize.x % localWorkSize.x != 0 || if (globalWorkSize.x % localWorkSize.x != 0 ||
globalWorkSize.y % localWorkSize.y != 0 || globalWorkSize.y % localWorkSize.y != 0 ||
globalWorkSize.z % localWorkSize.z != 0) globalWorkSize.z % localWorkSize.z != 0)
...@@ -100,19 +111,21 @@ namespace LOFAR ...@@ -100,19 +111,21 @@ namespace LOFAR
globalWorkSize.y / localWorkSize.y, globalWorkSize.y / localWorkSize.y,
globalWorkSize.z / localWorkSize.z); globalWorkSize.z / localWorkSize.z);
gpu::Grid maxGridWorkSize = itsStream.getContext().getDevice().getMaxGridDims(); gpu::Grid maxGridWorkSize =
itsStream.getContext().getDevice().getMaxGridDims();
if (grid.x > maxGridWorkSize.x || if (grid.x > maxGridWorkSize.x ||
grid.y > maxGridWorkSize.y || grid.y > maxGridWorkSize.y ||
grid.z > maxGridWorkSize.z) grid.z > maxGridWorkSize.z)
errMsgs << " - globalWorkSize / localWorkSize must be at most " << maxGridWorkSize << endl; errMsgs << " - globalWorkSize / localWorkSize must be at most "
<< maxGridWorkSize << endl;
} }
string errStr(errMsgs.str()); string errStr(errMsgs.str());
if (!errStr.empty()) if (!errStr.empty())
THROW(gpu::GPUException, "setEnqueueWorkSizes(): unsupported globalWorkSize " << THROW(gpu::GPUException,
globalWorkSize << " and/or localWorkSize " << localWorkSize << " selected:" << "setEnqueueWorkSizes(): unsupported globalWorkSize " <<
endl << errStr); globalWorkSize << " and/or localWorkSize " << localWorkSize <<
" selected:" << endl << errStr);
LOG_DEBUG_STR("CUDA Grid size: " << grid); LOG_DEBUG_STR("CUDA Grid size: " << grid);
LOG_DEBUG_STR("CUDA Block size: " << localWorkSize); LOG_DEBUG_STR("CUDA Block size: " << localWorkSize);
......
...@@ -41,7 +41,9 @@ namespace LOFAR ...@@ -41,7 +41,9 @@ namespace LOFAR
{ {
public: public:
// Parameters that must be passed to the constructor of this Kernel class. // Parameters that must be passed to the constructor of this Kernel class.
// TODO: more at constructor passed immediates can be turned into defines (blockDim/gridDim too if enforced fixed (consider conditional define) or drop opt) // TODO: more at constructor passed immediates can be turned into defines
// (blockDim/gridDim too if enforced fixed (consider conditional define)
// or drop opt)
struct Parameters struct Parameters
{ {
Parameters(const Parset& ps); Parameters(const Parset& ps);
...@@ -85,7 +87,8 @@ namespace LOFAR ...@@ -85,7 +87,8 @@ namespace LOFAR
// Explicit destructor, because the implicitly generated one is public. // Explicit destructor, because the implicitly generated one is public.
~Kernel(); ~Kernel();
void setEnqueueWorkSizes(gpu::Grid globalWorkSize, gpu::Block localWorkSize); void setEnqueueWorkSizes(gpu::Grid globalWorkSize,
gpu::Block localWorkSize);
const unsigned maxThreadsPerBlock; const unsigned maxThreadsPerBlock;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment