diff --git a/CMake/FindLibNuma.cmake b/CMake/FindLibNuma.cmake new file mode 100644 index 0000000000000000000000000000000000000000..b95220247ecb71076c3acf0a2f723dea8f982bf3 --- /dev/null +++ b/CMake/FindLibNuma.cmake @@ -0,0 +1,48 @@ +# - Try to find libnuma. +# Variables used by this module: +# LIBNUMA_ROOT_DIR - LIBNUMA root directory +# Variables defined by this module: +# LIBNUMA_FOUND - system has LIBNUMA +# LIBNUMA_INCLUDE_DIR - the LIBNUMA include directory (cached) +# LIBNUMA_INCLUDE_DIRS - the LIBNUMA include directories +# (identical to LIBNUMA_INCLUDE_DIR) +# LIBNUMA_LIBRARY - the LIBNUMA library (cached) +# LIBNUMA_LIBRARIES - the LIBNUMA libraries +# (identical to LIBNUMA_LIBRARY) + +# Copyright (C) 2009 +# ASTRON (Netherlands Institute for Radio Astronomy) +# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. +# +# $Id$ + +if(NOT LIBNUMA_FOUND) + + find_path(LIBNUMA_INCLUDE_DIR numa.h + HINTS ${LIBNUMA_ROOT_DIR} PATH_SUFFIXES include) + find_library(LIBNUMA_LIBRARY numa + HINTS ${LIBNUMA_ROOT_DIR} PATH_SUFFIXES lib lib64) + mark_as_advanced(LIBNUMA_INCLUDE_DIR LIBNUMA_LIBRARY) + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(LIBNUMA DEFAULT_MSG + LIBNUMA_LIBRARY LIBNUMA_INCLUDE_DIR) + + set(LIBNUMA_INCLUDE_DIRS ${LIBNUMA_INCLUDE_DIR}) + set(LIBNUMA_LIBRARIES ${LIBNUMA_LIBRARY}) + +endif(NOT LIBNUMA_FOUND) diff --git a/RTCP/Cobalt/GPUProc/CMakeLists.txt b/RTCP/Cobalt/GPUProc/CMakeLists.txt index 8802599982798dedf72c04c9191959c26728c940..9b1b16fabc01bb4ac0f69747ecdcaaa8d9dc5b77 100644 --- a/RTCP/Cobalt/GPUProc/CMakeLists.txt +++ b/RTCP/Cobalt/GPUProc/CMakeLists.txt @@ -50,6 +50,7 @@ lofar_package(GPUProc 1.0 DEPENDS ${_gpuproc_deps}) lofar_find_package(OpenMP REQUIRED) lofar_find_package(Boost REQUIRED) lofar_find_package(MPI) +lofar_find_package(LibNuma) lofar_find_package(FFTW3 COMPONENTS single double threads REQUIRED) # 'double threads' for FFT unit test refs lofar_find_package(UnitTest++) @@ -81,4 +82,4 @@ endif() add_subdirectory(src) add_subdirectory(test) add_subdirectory(share/gpu/kernels) -add_subdirectory(etc) \ No newline at end of file +add_subdirectory(etc) diff --git a/RTCP/Cobalt/GPUProc/src/rtcp.cc b/RTCP/Cobalt/GPUProc/src/rtcp.cc index 7cdabbaa7dd3b6c49124055c877966bb2dc25078..acc4dfd5e15fbb82a5e40654070599a84ad814b4 100644 --- a/RTCP/Cobalt/GPUProc/src/rtcp.cc +++ b/RTCP/Cobalt/GPUProc/src/rtcp.cc @@ -34,6 +34,11 @@ #include <sys/resource.h> #include <sys/mman.h> +#ifdef HAVE_LIBNUMA +#include <numa.h> +#include <numaif.h> +#endif + #ifdef HAVE_MPI #include <mpi.h> #endif @@ -175,18 +180,6 @@ int main(int argc, char **argv) if (setrlimit(RLIMIT_MEMLOCK, &unlimited) < 0) THROW_SYSCALL("setrlimit(RLIMIT_MEMLOCK, unlimited)"); - /* - * Initialise OpenMP - */ - - LOG_INFO_STR("----- Initialising OpenMP"); - - // Allow usage of nested omp calls - omp_set_nested(true); - - // Allow OpenMP thread registration - OMPThread::init(); - /* * INIT stage */ @@ -207,11 +200,9 @@ int main(int argc, char **argv) LOG_INFO_STR("----- Initialising NUMA bindings"); - // TODO: How to migrate the memory that's currently in use - // (and mlocked!) to the selected CPU? - // The set of GPUs we're allowed to use vector<gpu::Device> devices; + // If we are testing we do not want dependency on hardware specific cpu configuration // Just use all gpu's if(rank >= 0 && (size_t)rank < ps.settings.nodes.size()) { @@ -219,11 +210,49 @@ int main(int argc, char **argv) int cpuId = ps.settings.nodes[rank].cpu; setProcessorAffinity(cpuId); +#ifdef HAVE_LIBNUMA + // force node + memory binding for future allocations + struct bitmask *numa_node = numa_allocate_nodemask(); + numa_bitmask_clearall(numa_node); + numa_bitmask_setbit(numa_node, cpuId); + numa_bind(numa_node); + numa_bitmask_free(numa_node); + + // only allow allocation on this node in case + // the numa_alloc_* functions are used + numa_set_strict(1); + + // retrieve and report memory binding + numa_node = numa_get_membind(); + vector<string> nodestrs; + for (size_t i = 0; i < numa_node->size; i++) + if (numa_bitmask_isbitset(numa_node, i)) + nodestrs.push_back(str(format("%s") % i)); + + // migrate currently used memory to our node + numa_migrate_pages(0, numa_all_nodes_ptr, numa_node); + + numa_bitmask_free(numa_node); + + LOG_DEBUG_STR("Bound to memory on nodes " << nodestrs); +#else + LOG_WARN_STR("Cannot bind memory (no libnuma support)"); +#endif + + // Bindings are done -- Lock everything in memory + if (mlockall(MCL_CURRENT | MCL_FUTURE) < 0) + THROW_SYSCALL("mlockall"); + // derive the set of gpus we're allowed to use const vector<unsigned> &gpuIds = ps.settings.nodes[rank].gpus; - LOG_DEBUG_STR("Binding to GPUs " << gpuIds); - for (size_t i = 0; i < gpuIds.size(); ++i) - devices.push_back(allDevices[i]); + vector<string> gpuPciIds; + for (size_t i = 0; i < gpuIds.size(); ++i) { + gpu::Device &d = allDevices[gpuIds[i]]; + + devices.push_back(d); + gpuPciIds.push_back(d.pciId()); + } + LOG_DEBUG_STR("Binding to GPUs " << gpuIds << " = " << gpuPciIds); // Select on the local NUMA InfiniBand interface (OpenMPI only, for now) const string nic = ps.settings.nodes[rank].nic; @@ -239,11 +268,17 @@ int main(int argc, char **argv) devices = allDevices; } - // Bindings are done -- Lock everything in memory - if (mlockall(MCL_CURRENT | MCL_FUTURE) < 0) - THROW_SYSCALL("mlockall"); + /* + * Initialise OpenMP + */ + + LOG_INFO_STR("----- Initialising OpenMP"); - LOG_DEBUG_STR("All memory is now pinned."); + // Allow usage of nested omp calls + omp_set_nested(true); + + // Allow OpenMP thread registration + OMPThread::init(); // Only ONE host should start the Storage processes SmartPtr<StorageProcesses> storageProcesses; diff --git a/lofar_config.h.cmake b/lofar_config.h.cmake index 0f4a55aadced24e354157ae8c28c69762d616feb..df20a130399d110d0185c8537725ebfef251ac21 100644 --- a/lofar_config.h.cmake +++ b/lofar_config.h.cmake @@ -126,6 +126,9 @@ /* Define if LAM is installed */ #cmakedefine HAVE_LAM 1 +/* Define if libnuma is installed */ +#cmakedefine HAVE_LIBNUMA 1 + /* Define if libssh2 is installed */ #cmakedefine HAVE_LIBSSH2 1