Skip to content
Snippets Groups Projects
Commit b1caa9c5 authored by Bram Veenboer's avatar Bram Veenboer
Browse files

Refactoring of the Rapl backend

parent 54034bf6
Branches
No related tags found
1 merge request!105Refactoring of the Rapl backend
Pipeline #103993 passed
...@@ -45,7 +45,6 @@ test-das6: ...@@ -45,7 +45,6 @@ test-das6:
- das6-gpu - das6-gpu
before_script: before_script:
- module load spack/12.2.0 - module load spack/12.2.0
- module av
- module load python/3.11.6 - module load python/3.11.6
- module load py-pybind11/2.11.0 - module load py-pybind11/2.11.0
- module load cuda/12.2.1 - module load cuda/12.2.1
......
#ifndef PMT_CPU_H_
#define PMT_CPU_H_
#include <fstream>
#include <set>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
#include <unistd.h>
namespace pmt::common {
std::vector<int> get_active_cpus() {
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
const int result = sched_getaffinity(0, sizeof(cpu_set), &cpu_set);
if (result == -1) {
throw std::system_error(errno, std::generic_category(),
"sched_getaffinity");
}
const int n_cpus = sysconf(_SC_NPROCESSORS_ONLN);
std::vector<int> active_cpus;
for (int cpu = 0; cpu < n_cpus; ++cpu) {
if (CPU_ISSET(cpu, &cpu_set)) {
active_cpus.push_back(cpu);
}
}
return active_cpus;
}
std::set<int> get_active_packages(const std::vector<int>& active_cpus) {
std::set<int> active_packages;
for (int cpu : active_cpus) {
const std::string path = "/sys/devices/system/cpu/cpu" +
std::to_string(cpu) +
"/topology/physical_package_id";
std::ifstream file(path);
if (!file.is_open()) {
throw std::runtime_error("Failed to open file: " + path);
}
int socket_id;
file >> socket_id;
active_packages.insert(socket_id);
}
return active_packages;
}
} // namespace pmt::common
#endif // PMT_CPU_H_
\ No newline at end of file
#ifndef PMT_COMMON_IO_H_
#define PMT_COMMON_IO_H_
#include <memory>
#include <span>
#include <sstream>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
namespace pmt {
namespace os {
class file_descriptor {
public:
inline file_descriptor(int fd) : fd_(fd) {}
inline file_descriptor(const file_descriptor&) = delete;
inline file_descriptor(file_descriptor&& other) : fd_(-1) {
std::swap(fd_, other.fd_);
}
inline ~file_descriptor() {
if (fd_ > 0) {
(void)::close(fd_);
}
}
inline int fd() const { return fd_; }
private:
int fd_;
};
inline int take_and_reset_errno() {
const int errcode = errno;
errno = 0;
return errcode;
}
inline file_descriptor opendir(const std::string& filename) {
while (true) {
const int fd = ::open(filename.c_str(), O_RDONLY | O_DIRECTORY);
if (fd < 0) {
const int errcode = take_and_reset_errno();
if (errcode == EINTR) {
// interrupted system call
continue;
}
std::stringstream message;
message << "opendir fail for '" << filename << "'";
throw std::system_error(
std::make_error_code(static_cast<std::errc>(errcode)), message.str());
}
return file_descriptor(fd);
}
}
inline file_descriptor openat(int dirfd, const std::string& filename) {
while (true) {
const int fd = ::openat(dirfd, filename.c_str(), O_RDONLY);
if (fd < 0) {
const int errcode = take_and_reset_errno();
if (errcode == EINTR) {
// interrupted system call
continue;
}
std::stringstream message;
message << "open fail for '" << filename << "'";
throw std::system_error(
std::make_error_code(static_cast<std::errc>(errcode)), message.str());
}
return file_descriptor(fd);
}
}
inline size_t pread(int fd, const std::span<std::byte>& byte,
std::int64_t offset) {
while (true) {
const ::ssize_t data_read =
::pread(fd, static_cast<void*>(byte.data()), byte.size(),
static_cast<::off_t>(offset));
if (data_read < 0) {
const int errcode = take_and_reset_errno();
if (errno == EINTR) {
// interrupted system call
continue;
}
throw std::system_error(
std::make_error_code(static_cast<std::errc>(errcode)), "<pread>");
}
return static_cast<std::size_t>(data_read);
}
}
} // namespace os
} // end namespace pmt
#endif
...@@ -9,6 +9,7 @@ add_sensor( ...@@ -9,6 +9,7 @@ add_sensor(
Rapl.h Rapl.h
SRC_FILES SRC_FILES
Rapl.cpp Rapl.cpp
RaplCounter.cpp
RaplImpl.cpp RaplImpl.cpp
LINK_LIBRARIES LINK_LIBRARIES
${LINK_LIBRARIES}) ${LINK_LIBRARIES})
#include <cassert>
#include <fstream>
#include "RaplCounter.h"
namespace pmt::rapl {
RaplCounter::RaplCounter(const std::string& directory) {
std::ifstream ifstream_directory(directory + "/name");
assert(ifstream_directory.is_open());
ifstream_directory >> name_;
std::ifstream ifstream_max_energy_range_uj(directory +
"/max_energy_range_uj");
assert(ifstream_max_energy_range_uj.is_open());
ifstream_max_energy_range_uj >> max_energy_range_uj_;
ifstream_energy_uj_ = std::ifstream(directory + "/energy_uj");
assert(ifstream_energy_uj_.is_open());
const std::size_t energy_uj = Read();
energy_uj_first_ = energy_uj;
energy_uj_previous_ = energy_uj;
energy_uj_offset_ = 0;
}
std::size_t RaplCounter::Read() {
std::size_t energy_uj;
assert(ifstream_energy_uj_.is_open());
ifstream_energy_uj_ >> energy_uj;
ifstream_energy_uj_.seekg(0);
energy_uj_offset_ +=
energy_uj < energy_uj_previous_ ? max_energy_range_uj_ : 0;
return energy_uj_offset_ + energy_uj - energy_uj_first_;
}
} // end namespace pmt::rapl
\ No newline at end of file
#ifndef PMT_RAPLCOUNTER_H_
#define PMT_RAPLCOUNTER_H_
#include <charconv>
#include <cstddef>
#include <fstream>
#include <string>
namespace pmt::rapl {
class RaplCounter {
public:
RaplCounter(const std::string& directory);
const std::string& GetName() const { return name_; };
// The numbers in the rapl /energy_uj files range from zero up to a maximum
// specified in /max_energy_range_uj. This class reports monotonically
// increasing values starting with zero for the first measurement. Therefore,
// the result is computed as follows:
// now = <read value>
// offset += now < previous ? max : 0
// result += offset + now - first
std::size_t Read();
private:
std::ifstream ifstream_energy_uj_;
std::string name_;
std::size_t max_energy_range_uj_;
std::size_t energy_uj_first_ = 0;
std::size_t energy_uj_previous_ = 0;
std::size_t energy_uj_offset_ = 0;
};
} // namespace pmt::rapl
#endif // PMT_RAPLCOUNTER_H_
\ No newline at end of file
#include <algorithm>
#include <array>
#include <cassert> #include <cassert>
#include <cerrno> #include <cerrno>
#include <iostream> #include <cstring>
#include <charconv>
#include <filesystem> #include <filesystem>
#include <iostream>
#include <iterator> #include <iterator>
#include <memory> #include <memory>
#include <cstring>
#include <regex> #include <regex>
#include <sstream> #include <sstream>
#include <stdexcept> #include <stdexcept>
#include <algorithm>
#include <string> #include <string>
#include <system_error>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <filesystem>
#include <system_error>
namespace fs = std::filesystem; namespace fs = std::filesystem;
#include <sched.h>
#include <unistd.h>
#include "RaplImpl.h" #include "RaplImpl.h"
#include "RaplCounter.h"
#include "common/cpu.h"
/* /*
* RAPL (Running Average Power Limit) is an API provided by Intel for power * RAPL (Running Average Power Limit) is an API provided by Intel for power
...@@ -47,54 +46,36 @@ RaplImpl::RaplImpl() { ...@@ -47,54 +46,36 @@ RaplImpl::RaplImpl() {
previous_measurements_ = GetMeasurements(); previous_measurements_ = GetMeasurements();
} }
RaplImpl::~RaplImpl() { std::lock_guard<std::mutex> lock(mutex_); }
std::string GetBasename(int package_id) { std::string GetBasename(int package_id) {
std::stringstream basename; std::stringstream basename;
basename << "/sys/class/powercap/intel-rapl:" << package_id; basename << "/sys/class/powercap/intel-rapl:" << package_id;
return basename.str(); return basename.str();
} }
std::string read_string(int fd) {
std::string result;
std::array<std::byte, 256> buffer;
const std::size_t read_size = os::pread(fd, buffer, 0);
result.reserve(read_size);
for (std::size_t i = 0; i < read_size; ++i) {
const char c = static_cast<char>(buffer[i]);
if (!std::isspace(c) && c != '\n') {
result.push_back(c);
}
}
return result;
}
template <typename Numeric>
Numeric read_numerical_value(int fd) {
Numeric value = 0;
std::array<std::byte, 256> buffer;
const std::size_t read_size = os::pread(fd, buffer, 0);
std::from_chars_result res = std::from_chars(
static_cast<const char*>(static_cast<void*>(buffer.data())),
static_cast<const char*>(static_cast<void*>(buffer.data() + read_size)),
value);
if (res.ec == std::errc::invalid_argument &&
res.ec == std::errc::result_out_of_range) {
throw std::runtime_error("Unable to parse file to numerical value");
}
return value;
}
void RaplImpl::Init() { void RaplImpl::Init() {
const fs::path basename = "/sys/class/powercap"; const fs::path basename = "/sys/class/powercap";
const std::regex pattern("intel-rapl(:\\d+)+"); const std::regex pattern_directory("intel-rapl(:\\d+)+");
const std::regex pattern_package_id(":([0-9]+)");
std::vector<std::string> rapl_dirs; std::vector<std::string> rapl_dirs;
const std::set active_sockets =
common::get_active_packages(common::get_active_cpus());
for (const auto& entry : fs::directory_iterator(basename)) { for (const auto& entry : fs::directory_iterator(basename)) {
const std::string directory = entry.path().filename().string(); const std::string directory = entry.path().filename().string();
if (std::regex_match(directory, pattern)) { std::smatch match;
if (std::regex_match(directory, pattern_directory)) {
if (fs::exists(entry.path() / "energy_uj")) { if (fs::exists(entry.path() / "energy_uj")) {
rapl_dirs.push_back(entry.path()); if (std::regex_search(directory, match, pattern_package_id)) {
const int package_id = std::stoi(match[1].str());
if (active_sockets.find(package_id) != active_sockets.end()) {
rapl_dirs.emplace_back(entry.path());
}
}
} }
} }
} }
...@@ -108,23 +89,7 @@ void RaplImpl::Init() { ...@@ -108,23 +89,7 @@ void RaplImpl::Init() {
for (const auto& rapl_dir : rapl_dirs) { for (const auto& rapl_dir : rapl_dirs) {
try { try {
const os::file_descriptor fd_rapl_dir = os::opendir(rapl_dir); rapl_counters_.emplace_back(rapl_dir);
const os::file_descriptor name_fd = os::openat(fd_rapl_dir.fd(), "name");
const std::string package_id = read_string(name_fd.fd());
const os::file_descriptor max_energy_fd =
os::openat(fd_rapl_dir.fd(), "max_energy_range_uj");
std::size_t max_energy_range_uj =
read_numerical_value<std::size_t>(max_energy_fd.fd());
os::file_descriptor energy_uj_fd =
os::openat(fd_rapl_dir.fd(), "energy_uj");
std::ignore = read_numerical_value<std::size_t>(energy_uj_fd.fd());
packages_names_.push_back(package_id);
uj_max_.push_back(max_energy_range_uj);
energy_fds_.emplace_back(std::move(energy_uj_fd));
} catch (std::system_error& e) { } catch (std::system_error& e) {
std::stringstream message; std::stringstream message;
message << "OS error: " << e.what(); message << "OS error: " << e.what();
...@@ -135,50 +100,18 @@ void RaplImpl::Init() { ...@@ -135,50 +100,18 @@ void RaplImpl::Init() {
} }
} }
} }
// Initialize state variables
const std::size_t n = uj_max_.size();
uj_first_.resize(n);
uj_previous_.resize(n);
uj_offset_.resize(n);
std::vector<RaplMeasurement> measurements = GetMeasurements();
for (std::size_t i = 0; i < n; i++) {
uj_first_[i] = measurements[i].joules;
uj_previous_[i] = uj_first_[i];
uj_offset_[i] = 0;
}
} }
std::vector<RaplMeasurement> RaplImpl::GetMeasurements() { std::vector<RaplMeasurement> RaplImpl::GetMeasurements() {
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
std::vector<RaplMeasurement> measurements; std::vector<RaplMeasurement> measurements;
const std::size_t n = packages_names_.size(); for (auto& counter : rapl_counters_) {
assert(n == energy_fds_.size()); measurements.emplace_back(counter.GetName(), counter.Read());
assert(n == uj_max_.size());
// Take all measurements
auto packages_name_it = packages_names_.begin();
for (const auto& energy_fd : energy_fds_) {
std::size_t measurement;
const std::size_t energy_value =
read_numerical_value<std::size_t>(energy_fd.fd());
measurements.emplace_back(RaplMeasurement{*packages_name_it, energy_value});
packages_name_it++;
}
for (std::size_t i = 0; i < measurements.size(); i++) {
const std::size_t uj_now = measurements[i].joules;
if (uj_now < uj_previous_[i]) {
uj_offset_[i] += uj_max_[i];
}
uj_previous_[i] = uj_now;
measurements[i].joules = uj_offset_[i] + uj_now - uj_first_[i];
} }
return measurements; return measurements;
} // end Rapl::GetMeasurement }
State RaplImpl::GetState() { State RaplImpl::GetState() {
std::vector<RaplMeasurement> measurements = GetMeasurements(); std::vector<RaplMeasurement> measurements = GetMeasurements();
...@@ -190,17 +123,17 @@ State RaplImpl::GetState() { ...@@ -190,17 +123,17 @@ State RaplImpl::GetState() {
for (std::size_t i = 0; i < measurements.size(); i++) { for (std::size_t i = 0; i < measurements.size(); i++) {
const std::string name = measurements[i].name; const std::string name = measurements[i].name;
const std::size_t joules_now = measurements[i].joules; const std::size_t ujoules_now = measurements[i].ujoules;
const std::size_t joules_previous = previous_measurements_[i].joules; const std::size_t ujoules_previous = previous_measurements_[i].ujoules;
const double duration = seconds(previous_timestamp_, state.timestamp_); const double duration = seconds(previous_timestamp_, state.timestamp_);
const float joules_diff = (joules_now - joules_previous) * 1e-6; const float joules_diff = (ujoules_now - ujoules_previous) * 1e-6;
const float watt = joules_diff / duration; const float watt = joules_diff / duration;
state.name_[i + 1] = name; state.name_[i + 1] = name;
state.joules_[i + 1] = joules_now * 1e-6; state.joules_[i + 1] = ujoules_now * 1e-6;
state.watt_[i + 1] = watt; state.watt_[i + 1] = watt;
if (name.find("package") != std::string::npos) { if (name.find("package") != std::string::npos) {
state.joules_[0] += joules_now * 1e-6; state.joules_[0] += ujoules_now * 1e-6;
state.watt_[0] += watt; state.watt_[0] += watt;
} }
} }
......
...@@ -7,21 +7,20 @@ ...@@ -7,21 +7,20 @@
#include <vector> #include <vector>
#include "Rapl.h" #include "Rapl.h"
#include "RaplCounter.h"
#include "common/PMT.h" #include "common/PMT.h"
#include "common/io.h"
namespace pmt::rapl { namespace pmt::rapl {
const int kKeepAliveInterval = 10; // call Measure() roughly every nth update
struct RaplMeasurement { struct RaplMeasurement {
std::string name; std::string name;
std::size_t joules; std::size_t ujoules;
}; };
class RaplImpl : public Rapl { class RaplImpl : public Rapl {
public: public:
RaplImpl(); RaplImpl();
~RaplImpl();
State GetState() override; State GetState() override;
...@@ -35,21 +34,7 @@ class RaplImpl : public Rapl { ...@@ -35,21 +34,7 @@ class RaplImpl : public Rapl {
Timestamp previous_timestamp_; Timestamp previous_timestamp_;
std::vector<RaplMeasurement> previous_measurements_; std::vector<RaplMeasurement> previous_measurements_;
std::vector<RaplCounter> rapl_counters_;
std::vector<std::string> packages_names_;
std::vector<os::file_descriptor> energy_fds_;
// The numbers in the rapl /energy_uj files range from zero up to a maximum
// specified in /max_energy_range_uj. This class reports monotonically
// increasing values starting with zero for the first measurement. Therefore,
// for every counter, the result is computed as follows:
// now = <read value>
// offset += now < previous ? max : 0
// result += offset + now - first
std::vector<std::size_t> uj_max_;
std::vector<std::size_t> uj_first_;
std::vector<std::size_t> uj_previous_;
std::vector<std::size_t> uj_offset_;
// Mutex used to guard GetMeasurements() // Mutex used to guard GetMeasurements()
std::mutex mutex_; std::mutex mutex_;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment