diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cc1eb71dfcb18268f0636d4b99a2749f813ffc8c..5e5ce4ab55ce84e8bef1219142651bffada6c26d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -45,7 +45,6 @@ test-das6: - das6-gpu before_script: - module load spack/12.2.0 - - module av - module load python/3.11.6 - module load py-pybind11/2.11.0 - module load cuda/12.2.1 diff --git a/common/cpu.h b/common/cpu.h new file mode 100644 index 0000000000000000000000000000000000000000..470188d60f87b8c635ae923646ba15078ef524af --- /dev/null +++ b/common/cpu.h @@ -0,0 +1,61 @@ +#ifndef PMT_CPU_H_ +#define PMT_CPU_H_ + +#include <fstream> +#include <set> +#include <sstream> +#include <stdexcept> +#include <string> +#include <vector> + +#include <unistd.h> + +namespace pmt::common { + +std::vector<int> get_active_cpus() { + cpu_set_t cpu_set; + CPU_ZERO(&cpu_set); + + const int result = sched_getaffinity(0, sizeof(cpu_set), &cpu_set); + if (result == -1) { + throw std::system_error(errno, std::generic_category(), + "sched_getaffinity"); + } + + const int n_cpus = sysconf(_SC_NPROCESSORS_ONLN); + + std::vector<int> active_cpus; + for (int cpu = 0; cpu < n_cpus; ++cpu) { + if (CPU_ISSET(cpu, &cpu_set)) { + active_cpus.push_back(cpu); + } + } + + return active_cpus; +} + +std::set<int> get_active_packages(const std::vector<int>& active_cpus) { + std::set<int> active_packages; + + for (int cpu : active_cpus) { + const std::string path = "/sys/devices/system/cpu/cpu" + + std::to_string(cpu) + + "/topology/physical_package_id"; + + std::ifstream file(path); + if (!file.is_open()) { + throw std::runtime_error("Failed to open file: " + path); + } + + int socket_id; + file >> socket_id; + + active_packages.insert(socket_id); + } + + return active_packages; +} + +} // namespace pmt::common + +#endif // PMT_CPU_H_ \ No newline at end of file diff --git a/common/io.h b/common/io.h deleted file mode 100644 index 2794e2a0195a0053f556f363a522de70b993f957..0000000000000000000000000000000000000000 --- a/common/io.h +++ /dev/null @@ -1,100 +0,0 @@ -#ifndef PMT_COMMON_IO_H_ -#define PMT_COMMON_IO_H_ - -#include <memory> -#include <span> -#include <sstream> - -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> - -namespace pmt { - -namespace os { - -class file_descriptor { - public: - inline file_descriptor(int fd) : fd_(fd) {} - inline file_descriptor(const file_descriptor&) = delete; - inline file_descriptor(file_descriptor&& other) : fd_(-1) { - std::swap(fd_, other.fd_); - } - inline ~file_descriptor() { - if (fd_ > 0) { - (void)::close(fd_); - } - } - inline int fd() const { return fd_; } - - private: - int fd_; -}; - -inline int take_and_reset_errno() { - const int errcode = errno; - errno = 0; - return errcode; -} - -inline file_descriptor opendir(const std::string& filename) { - while (true) { - const int fd = ::open(filename.c_str(), O_RDONLY | O_DIRECTORY); - if (fd < 0) { - const int errcode = take_and_reset_errno(); - if (errcode == EINTR) { - // interrupted system call - continue; - } - std::stringstream message; - message << "opendir fail for '" << filename << "'"; - throw std::system_error( - std::make_error_code(static_cast<std::errc>(errcode)), message.str()); - } - return file_descriptor(fd); - } -} - -inline file_descriptor openat(int dirfd, const std::string& filename) { - while (true) { - const int fd = ::openat(dirfd, filename.c_str(), O_RDONLY); - if (fd < 0) { - const int errcode = take_and_reset_errno(); - if (errcode == EINTR) { - // interrupted system call - continue; - } - std::stringstream message; - message << "open fail for '" << filename << "'"; - throw std::system_error( - std::make_error_code(static_cast<std::errc>(errcode)), message.str()); - } - return file_descriptor(fd); - } -} - -inline size_t pread(int fd, const std::span<std::byte>& byte, - std::int64_t offset) { - while (true) { - const ::ssize_t data_read = - ::pread(fd, static_cast<void*>(byte.data()), byte.size(), - static_cast<::off_t>(offset)); - if (data_read < 0) { - const int errcode = take_and_reset_errno(); - if (errno == EINTR) { - // interrupted system call - continue; - } - throw std::system_error( - std::make_error_code(static_cast<std::errc>(errcode)), "<pread>"); - } - return static_cast<std::size_t>(data_read); - } -} - -} // namespace os - -} // end namespace pmt - -#endif diff --git a/rapl/CMakeLists.txt b/rapl/CMakeLists.txt index 6315b84adc412063c4e0d26e613761491053a820..7780d4b10ee00d350c2cc0909accbc69f9f77223 100644 --- a/rapl/CMakeLists.txt +++ b/rapl/CMakeLists.txt @@ -9,6 +9,7 @@ add_sensor( Rapl.h SRC_FILES Rapl.cpp + RaplCounter.cpp RaplImpl.cpp LINK_LIBRARIES ${LINK_LIBRARIES}) diff --git a/rapl/RaplCounter.cpp b/rapl/RaplCounter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d1ac22df947b4545ae35f6f4f5b0a66f3e84c220 --- /dev/null +++ b/rapl/RaplCounter.cpp @@ -0,0 +1,36 @@ +#include <cassert> +#include <fstream> + +#include "RaplCounter.h" +namespace pmt::rapl { + +RaplCounter::RaplCounter(const std::string& directory) { + std::ifstream ifstream_directory(directory + "/name"); + assert(ifstream_directory.is_open()); + ifstream_directory >> name_; + + std::ifstream ifstream_max_energy_range_uj(directory + + "/max_energy_range_uj"); + assert(ifstream_max_energy_range_uj.is_open()); + ifstream_max_energy_range_uj >> max_energy_range_uj_; + + ifstream_energy_uj_ = std::ifstream(directory + "/energy_uj"); + assert(ifstream_energy_uj_.is_open()); + + const std::size_t energy_uj = Read(); + energy_uj_first_ = energy_uj; + energy_uj_previous_ = energy_uj; + energy_uj_offset_ = 0; +} + +std::size_t RaplCounter::Read() { + std::size_t energy_uj; + assert(ifstream_energy_uj_.is_open()); + ifstream_energy_uj_ >> energy_uj; + ifstream_energy_uj_.seekg(0); + energy_uj_offset_ += + energy_uj < energy_uj_previous_ ? max_energy_range_uj_ : 0; + return energy_uj_offset_ + energy_uj - energy_uj_first_; +} + +} // end namespace pmt::rapl \ No newline at end of file diff --git a/rapl/RaplCounter.h b/rapl/RaplCounter.h new file mode 100644 index 0000000000000000000000000000000000000000..01b01bcad80c0e219bcd2567a5b06aa728764b61 --- /dev/null +++ b/rapl/RaplCounter.h @@ -0,0 +1,37 @@ +#ifndef PMT_RAPLCOUNTER_H_ +#define PMT_RAPLCOUNTER_H_ + +#include <charconv> +#include <cstddef> +#include <fstream> +#include <string> + +namespace pmt::rapl { + +class RaplCounter { + public: + RaplCounter(const std::string& directory); + + const std::string& GetName() const { return name_; }; + + // The numbers in the rapl /energy_uj files range from zero up to a maximum + // specified in /max_energy_range_uj. This class reports monotonically + // increasing values starting with zero for the first measurement. Therefore, + // the result is computed as follows: + // now = <read value> + // offset += now < previous ? max : 0 + // result += offset + now - first + std::size_t Read(); + + private: + std::ifstream ifstream_energy_uj_; + std::string name_; + std::size_t max_energy_range_uj_; + std::size_t energy_uj_first_ = 0; + std::size_t energy_uj_previous_ = 0; + std::size_t energy_uj_offset_ = 0; +}; + +} // namespace pmt::rapl + +#endif // PMT_RAPLCOUNTER_H_ \ No newline at end of file diff --git a/rapl/RaplImpl.cpp b/rapl/RaplImpl.cpp index ab7ce89c0ad38ca0a59ed9fe05f59986cfee3d95..58ecba3cbeadcceda5373104a742d1d19dcf4ce3 100644 --- a/rapl/RaplImpl.cpp +++ b/rapl/RaplImpl.cpp @@ -1,27 +1,26 @@ +#include <algorithm> +#include <array> #include <cassert> #include <cerrno> -#include <iostream> -#include <charconv> +#include <cstring> #include <filesystem> +#include <iostream> #include <iterator> #include <memory> -#include <cstring> #include <regex> #include <sstream> #include <stdexcept> -#include <algorithm> #include <string> +#include <system_error> #include <utility> #include <vector> -#include <filesystem> -#include <system_error> namespace fs = std::filesystem; -#include <sched.h> -#include <unistd.h> - #include "RaplImpl.h" +#include "RaplCounter.h" + +#include "common/cpu.h" /* * RAPL (Running Average Power Limit) is an API provided by Intel for power @@ -47,54 +46,36 @@ RaplImpl::RaplImpl() { previous_measurements_ = GetMeasurements(); } +RaplImpl::~RaplImpl() { std::lock_guard<std::mutex> lock(mutex_); } + std::string GetBasename(int package_id) { std::stringstream basename; basename << "/sys/class/powercap/intel-rapl:" << package_id; return basename.str(); } -std::string read_string(int fd) { - std::string result; - std::array<std::byte, 256> buffer; - const std::size_t read_size = os::pread(fd, buffer, 0); - result.reserve(read_size); - for (std::size_t i = 0; i < read_size; ++i) { - const char c = static_cast<char>(buffer[i]); - if (!std::isspace(c) && c != '\n') { - result.push_back(c); - } - } - return result; -} - -template <typename Numeric> -Numeric read_numerical_value(int fd) { - Numeric value = 0; - std::array<std::byte, 256> buffer; - const std::size_t read_size = os::pread(fd, buffer, 0); - std::from_chars_result res = std::from_chars( - static_cast<const char*>(static_cast<void*>(buffer.data())), - static_cast<const char*>(static_cast<void*>(buffer.data() + read_size)), - value); - if (res.ec == std::errc::invalid_argument && - res.ec == std::errc::result_out_of_range) { - throw std::runtime_error("Unable to parse file to numerical value"); - } - return value; -} - void RaplImpl::Init() { const fs::path basename = "/sys/class/powercap"; - const std::regex pattern("intel-rapl(:\\d+)+"); + const std::regex pattern_directory("intel-rapl(:\\d+)+"); + const std::regex pattern_package_id(":([0-9]+)"); std::vector<std::string> rapl_dirs; + const std::set active_sockets = + common::get_active_packages(common::get_active_cpus()); + for (const auto& entry : fs::directory_iterator(basename)) { const std::string directory = entry.path().filename().string(); - if (std::regex_match(directory, pattern)) { + std::smatch match; + if (std::regex_match(directory, pattern_directory)) { if (fs::exists(entry.path() / "energy_uj")) { - rapl_dirs.push_back(entry.path()); + if (std::regex_search(directory, match, pattern_package_id)) { + const int package_id = std::stoi(match[1].str()); + if (active_sockets.find(package_id) != active_sockets.end()) { + rapl_dirs.emplace_back(entry.path()); + } + } } } } @@ -108,23 +89,7 @@ void RaplImpl::Init() { for (const auto& rapl_dir : rapl_dirs) { try { - const os::file_descriptor fd_rapl_dir = os::opendir(rapl_dir); - const os::file_descriptor name_fd = os::openat(fd_rapl_dir.fd(), "name"); - - const std::string package_id = read_string(name_fd.fd()); - - const os::file_descriptor max_energy_fd = - os::openat(fd_rapl_dir.fd(), "max_energy_range_uj"); - std::size_t max_energy_range_uj = - read_numerical_value<std::size_t>(max_energy_fd.fd()); - - os::file_descriptor energy_uj_fd = - os::openat(fd_rapl_dir.fd(), "energy_uj"); - std::ignore = read_numerical_value<std::size_t>(energy_uj_fd.fd()); - - packages_names_.push_back(package_id); - uj_max_.push_back(max_energy_range_uj); - energy_fds_.emplace_back(std::move(energy_uj_fd)); + rapl_counters_.emplace_back(rapl_dir); } catch (std::system_error& e) { std::stringstream message; message << "OS error: " << e.what(); @@ -135,50 +100,18 @@ void RaplImpl::Init() { } } } - - // Initialize state variables - const std::size_t n = uj_max_.size(); - uj_first_.resize(n); - uj_previous_.resize(n); - uj_offset_.resize(n); - std::vector<RaplMeasurement> measurements = GetMeasurements(); - for (std::size_t i = 0; i < n; i++) { - uj_first_[i] = measurements[i].joules; - uj_previous_[i] = uj_first_[i]; - uj_offset_[i] = 0; - } } std::vector<RaplMeasurement> RaplImpl::GetMeasurements() { std::lock_guard<std::mutex> lock(mutex_); - std::vector<RaplMeasurement> measurements; - const std::size_t n = packages_names_.size(); - assert(n == energy_fds_.size()); - assert(n == uj_max_.size()); - - // Take all measurements - auto packages_name_it = packages_names_.begin(); - for (const auto& energy_fd : energy_fds_) { - std::size_t measurement; - const std::size_t energy_value = - read_numerical_value<std::size_t>(energy_fd.fd()); - measurements.emplace_back(RaplMeasurement{*packages_name_it, energy_value}); - packages_name_it++; - } - - for (std::size_t i = 0; i < measurements.size(); i++) { - const std::size_t uj_now = measurements[i].joules; - if (uj_now < uj_previous_[i]) { - uj_offset_[i] += uj_max_[i]; - } - uj_previous_[i] = uj_now; - measurements[i].joules = uj_offset_[i] + uj_now - uj_first_[i]; + for (auto& counter : rapl_counters_) { + measurements.emplace_back(counter.GetName(), counter.Read()); } return measurements; -} // end Rapl::GetMeasurement +} State RaplImpl::GetState() { std::vector<RaplMeasurement> measurements = GetMeasurements(); @@ -190,17 +123,17 @@ State RaplImpl::GetState() { for (std::size_t i = 0; i < measurements.size(); i++) { const std::string name = measurements[i].name; - const std::size_t joules_now = measurements[i].joules; - const std::size_t joules_previous = previous_measurements_[i].joules; + const std::size_t ujoules_now = measurements[i].ujoules; + const std::size_t ujoules_previous = previous_measurements_[i].ujoules; const double duration = seconds(previous_timestamp_, state.timestamp_); - const float joules_diff = (joules_now - joules_previous) * 1e-6; + const float joules_diff = (ujoules_now - ujoules_previous) * 1e-6; const float watt = joules_diff / duration; state.name_[i + 1] = name; - state.joules_[i + 1] = joules_now * 1e-6; + state.joules_[i + 1] = ujoules_now * 1e-6; state.watt_[i + 1] = watt; if (name.find("package") != std::string::npos) { - state.joules_[0] += joules_now * 1e-6; + state.joules_[0] += ujoules_now * 1e-6; state.watt_[0] += watt; } } diff --git a/rapl/RaplImpl.h b/rapl/RaplImpl.h index 68e3083ca602a04ff48b081bc20aa7019584734d..b5c1d679a2bced0c930e188307945bc626fde9a6 100644 --- a/rapl/RaplImpl.h +++ b/rapl/RaplImpl.h @@ -7,21 +7,20 @@ #include <vector> #include "Rapl.h" +#include "RaplCounter.h" #include "common/PMT.h" -#include "common/io.h" namespace pmt::rapl { -const int kKeepAliveInterval = 10; // call Measure() roughly every nth update - struct RaplMeasurement { std::string name; - std::size_t joules; + std::size_t ujoules; }; class RaplImpl : public Rapl { public: RaplImpl(); + ~RaplImpl(); State GetState() override; @@ -35,21 +34,7 @@ class RaplImpl : public Rapl { Timestamp previous_timestamp_; std::vector<RaplMeasurement> previous_measurements_; - - std::vector<std::string> packages_names_; - std::vector<os::file_descriptor> energy_fds_; - - // The numbers in the rapl /energy_uj files range from zero up to a maximum - // specified in /max_energy_range_uj. This class reports monotonically - // increasing values starting with zero for the first measurement. Therefore, - // for every counter, the result is computed as follows: - // now = <read value> - // offset += now < previous ? max : 0 - // result += offset + now - first - std::vector<std::size_t> uj_max_; - std::vector<std::size_t> uj_first_; - std::vector<std::size_t> uj_previous_; - std::vector<std::size_t> uj_offset_; + std::vector<RaplCounter> rapl_counters_; // Mutex used to guard GetMeasurements() std::mutex mutex_;