diff --git a/RTCP/Cobalt/GPUProc/src/cpu_utils.cc b/RTCP/Cobalt/GPUProc/src/cpu_utils.cc index c9405fa3542a878177b2d6aaabf2265e3ba711f1..ad9aa34a593ae8046270d6b65a2c717e2796877a 100644 --- a/RTCP/Cobalt/GPUProc/src/cpu_utils.cc +++ b/RTCP/Cobalt/GPUProc/src/cpu_utils.cc @@ -21,14 +21,25 @@ #include <lofar_config.h> #include <sched.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/mman.h> #include <fstream> -#include <boost/format.hpp> +#include <Common/LofarLogger.h> #include <Common/SystemCallException.h> #include <CoInterface/Parset.h> #include <CoInterface/Exceptions.h> #include <CoInterface/PrintVector.h> +#ifdef HAVE_LIBNUMA +#include <numa.h> +#include <numaif.h> +#endif + +#include <boost/format.hpp> +#include <boost/lexical_cast.hpp> + namespace LOFAR { namespace Cobalt @@ -76,6 +87,73 @@ namespace LOFAR if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) != 0) THROW_SYSCALL("sched_setaffinity"); } + + void unlimitedPinnedMemory() { + // Remove limits on pinned (locked) memory + struct rlimit unlimited = { RLIM_INFINITY, RLIM_INFINITY }; + + if (setrlimit(RLIMIT_MEMLOCK, &unlimited) < 0) + THROW_SYSCALL("setrlimit(RLIMIT_MEMLOCK, unlimited)"); + } + + void pinAllMemory() { + // Bindings are done -- Lock everything in memory + if (mlockall(MCL_CURRENT | MCL_FUTURE) < 0) + { + THROW_SYSCALL("mlockall(MCL_CURRENT | MCL_FUTURE)"); + } else { + LOG_DEBUG("All memory is now pinned."); + } + } + + void bindToCPU(int cpu) { + setProcessorAffinity(cpu); + + #ifdef HAVE_LIBNUMA + if (numa_available() != -1) { + // force node + memory binding for future allocations + struct bitmask *numa_node = numa_allocate_nodemask(); + numa_bitmask_clearall(numa_node); + numa_bitmask_setbit(numa_node, cpu); + numa_bind(numa_node); + numa_bitmask_free(numa_node); + + // only allow allocation on this node in case + // the numa_alloc_* functions are used + numa_set_strict(1); + + // retrieve and report memory binding + numa_node = numa_get_membind(); + vector<string> nodestrs; + for (size_t i = 0; i < numa_node->size; i++) + if (numa_bitmask_isbitset(numa_node, i)) + nodestrs.push_back(str(boost::format("%s") % i)); + + // migrate currently used memory to our node + numa_migrate_pages(0, numa_all_nodes_ptr, numa_node); + + numa_bitmask_free(numa_node); + + LOG_DEBUG_STR("Bound to memory on nodes " << nodestrs); + } else { + LOG_INFO("Cannot bind memory: libnuma reports NUMA is not available"); + } + #else + LOG_WARN("Cannot bind memory (no libnuma support)"); + #endif + } + + void bindMPItoNIC(const string& nic) { + LOG_DEBUG_STR("Binding MPI to interface " << nic); + + // configure openib module + if (setenv("OMPI_MCA_btl_openib_if_include", nic.c_str(), 1) < 0) + THROW_SYSCALL("setenv(OMPI_MCA_btl_openib_if_include)"); + + // configure UCX module + if (setenv("UCX_NET_DEVICES", str(boost::format("%s:1") % nic).c_str(), 1) < 0) + THROW_SYSCALL("setenv(UCX_NET_DEVICES)"); + } } } diff --git a/RTCP/Cobalt/GPUProc/src/cpu_utils.h b/RTCP/Cobalt/GPUProc/src/cpu_utils.h index 5bbc2e4f2b84eeeae7d0c396f7496fc70df6a5bf..39e76cf18b1ac52834c82184e42be156012113e3 100644 --- a/RTCP/Cobalt/GPUProc/src/cpu_utils.h +++ b/RTCP/Cobalt/GPUProc/src/cpu_utils.h @@ -31,6 +31,20 @@ namespace LOFAR // Set the correct processer affinity for a COBALT system // This will result in less then optimal performance on NON COBALT SYSTEMS! void setProcessorAffinity(unsigned procId); + + // Request no limit on amount of pinned memory. + // Throws a SystemCallException on failure. + void unlimitedPinnedMemory(); + + // Pins all memory, current and future. + // Throws a SystemCallException on failure. + void pinAllMemory(); + + // Bind the process and memory to the specified socket + void bindToCPU(int cpu); + + // Sets environment variables to tell MPI to use a specific NIC + void bindMPItoNIC(const std::string& nic); } } #endif diff --git a/RTCP/Cobalt/GPUProc/src/rtcp.cc b/RTCP/Cobalt/GPUProc/src/rtcp.cc index a54d997db99243812b04883948491b15808d4052..0944f7a0d3b2d7c1aa0f635e5f2f7d275cc33686 100644 --- a/RTCP/Cobalt/GPUProc/src/rtcp.cc +++ b/RTCP/Cobalt/GPUProc/src/rtcp.cc @@ -27,8 +27,6 @@ #include <ctime> #include <sys/types.h> #include <sys/stat.h> -#include <sys/resource.h> -#include <sys/mman.h> #include <unistd.h> #include <omp.h> @@ -36,11 +34,6 @@ #include <vector> #include <iostream> -#ifdef HAVE_LIBNUMA -#include <numa.h> -#include <numaif.h> -#endif - #include <mpi.h> #include <InputProc/Transpose/MPIUtil.h> @@ -209,13 +202,11 @@ int main(int argc, char **argv) MACIO::RTmetadata mdLogger(ps.settings.observationID, mdRegisterName, ps.settings.realTime ? mdHostName : ""); // Remove limits on pinned (locked) memory - struct rlimit unlimited = { RLIM_INFINITY, RLIM_INFINITY }; - if (setrlimit(RLIMIT_MEMLOCK, &unlimited) < 0) - { - if (ps.settings.realTime) - THROW_SYSCALL("setrlimit(RLIMIT_MEMLOCK, unlimited)"); - else - LOG_WARN("Cannot setrlimit(RLIMIT_MEMLOCK, unlimited)"); + try { + unlimitedPinnedMemory(); + } catch(SystemCallException &ex) { + if (ps.settings.realTime) throw; + LOG_WARN_STR("Cannot increase memlock limit: " << ex.what()); } /* Tuning parameters */ @@ -279,7 +270,7 @@ int main(int argc, char **argv) // The set of GPUs we're allowed to use vector<gpu::Device> devices; -#if 1 + // If we are testing we do not want dependency on hardware specific cpu configuration // Just use all gpu's if(mpi.rank() >= 0 && (size_t)mpi.rank() < ps.settings.nodes.size()) { @@ -287,41 +278,10 @@ int main(int argc, char **argv) if (mynode.cpu != -1) { // set the processor affinity before any threads are created - setProcessorAffinity(mynode.cpu); - } - -#ifdef HAVE_LIBNUMA - if (mynode.cpu != -1 && numa_available() != -1) { - // force node + memory binding for future allocations - struct bitmask *numa_node = numa_allocate_nodemask(); - numa_bitmask_clearall(numa_node); - numa_bitmask_setbit(numa_node, mynode.cpu); - numa_bind(numa_node); - numa_bitmask_free(numa_node); - - // only allow allocation on this node in case - // the numa_alloc_* functions are used - numa_set_strict(1); - - // retrieve and report memory binding - numa_node = numa_get_membind(); - vector<string> nodestrs; - for (size_t i = 0; i < numa_node->size; i++) - if (numa_bitmask_isbitset(numa_node, i)) - nodestrs.push_back(str(format("%s") % i)); - - // migrate currently used memory to our node - numa_migrate_pages(0, numa_all_nodes_ptr, numa_node); - - numa_bitmask_free(numa_node); - - LOG_DEBUG_STR("Bound to memory on nodes " << nodestrs); + bindToCPU(mynode.cpu); } else { - LOG_INFO("Cannot bind memory: cpu nr to bind to is set to -1 or libnuma reports NUMA is not available"); + LOG_INFO("Cannot bind memory: cpu nr to bind to is set to -1"); } -#else - LOG_WARN("Cannot bind memory (no libnuma support)"); -#endif // derive the set of gpus we're allowed to use for (size_t i = 0; i < mynode.gpus.size(); ++i) { @@ -334,20 +294,9 @@ int main(int argc, char **argv) // Select on the local NUMA InfiniBand interface (OpenMPI only, for now) if (mynode.mpi_nic != "") { - LOG_DEBUG_STR("Binding MPI to interface " << mynode.mpi_nic); - - // configure openib module - if (setenv("OMPI_MCA_btl_openib_if_include", mynode.mpi_nic.c_str(), 1) < 0) - THROW_SYSCALL("setenv(OMPI_MCA_btl_openib_if_include)"); - - // configure UCX module - if (setenv("UCX_NET_DEVICES", str(format("%s:1") % mynode.mpi_nic).c_str(), 1) < 0) - THROW_SYSCALL("setenv(UCX_NET_DEVICES)"); + bindMPItoNIC(mynode.mpi_nic); } } else { -#else - { -#endif LOG_WARN_STR("Rank " << mpi.rank() << " not present in node list -- using full machine"); devices = allDevices; } @@ -359,14 +308,11 @@ int main(int argc, char **argv) " global memory: " << (devices[i].getTotalGlobalMem() / 1024 / 1024) << " Mbyte"); // Bindings are done -- Lock everything in memory - if (mlockall(MCL_CURRENT | MCL_FUTURE) < 0) - { - if (ps.settings.realTime) - THROW_SYSCALL("mlockall"); - else - LOG_WARN("Cannot mlockall(MCL_CURRENT | MCL_FUTURE)"); - } else { - LOG_DEBUG("All memory is now pinned."); + try { + pinAllMemory(); + } catch(SystemCallException &ex) { + if (ps.settings.realTime) throw; + LOG_WARN_STR("Cannot pin all memory: " << ex.what()); } LOG_INFO("----- Initialising Pipeline");