Skip to content
Snippets Groups Projects
Commit b1caa9c5 authored by Bram Veenboer's avatar Bram Veenboer
Browse files

Refactoring of the Rapl backend

parent 54034bf6
No related branches found
No related tags found
1 merge request!105Refactoring of the Rapl backend
Pipeline #103993 passed
...@@ -45,7 +45,6 @@ test-das6: ...@@ -45,7 +45,6 @@ test-das6:
- das6-gpu - das6-gpu
before_script: before_script:
- module load spack/12.2.0 - module load spack/12.2.0
- module av
- module load python/3.11.6 - module load python/3.11.6
- module load py-pybind11/2.11.0 - module load py-pybind11/2.11.0
- module load cuda/12.2.1 - module load cuda/12.2.1
......
#ifndef PMT_CPU_H_
#define PMT_CPU_H_
#include <fstream>
#include <set>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
#include <unistd.h>
namespace pmt::common {
std::vector<int> get_active_cpus() {
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
const int result = sched_getaffinity(0, sizeof(cpu_set), &cpu_set);
if (result == -1) {
throw std::system_error(errno, std::generic_category(),
"sched_getaffinity");
}
const int n_cpus = sysconf(_SC_NPROCESSORS_ONLN);
std::vector<int> active_cpus;
for (int cpu = 0; cpu < n_cpus; ++cpu) {
if (CPU_ISSET(cpu, &cpu_set)) {
active_cpus.push_back(cpu);
}
}
return active_cpus;
}
std::set<int> get_active_packages(const std::vector<int>& active_cpus) {
std::set<int> active_packages;
for (int cpu : active_cpus) {
const std::string path = "/sys/devices/system/cpu/cpu" +
std::to_string(cpu) +
"/topology/physical_package_id";
std::ifstream file(path);
if (!file.is_open()) {
throw std::runtime_error("Failed to open file: " + path);
}
int socket_id;
file >> socket_id;
active_packages.insert(socket_id);
}
return active_packages;
}
} // namespace pmt::common
#endif // PMT_CPU_H_
\ No newline at end of file
#ifndef PMT_COMMON_IO_H_
#define PMT_COMMON_IO_H_
#include <memory>
#include <span>
#include <sstream>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
namespace pmt {
namespace os {
class file_descriptor {
public:
inline file_descriptor(int fd) : fd_(fd) {}
inline file_descriptor(const file_descriptor&) = delete;
inline file_descriptor(file_descriptor&& other) : fd_(-1) {
std::swap(fd_, other.fd_);
}
inline ~file_descriptor() {
if (fd_ > 0) {
(void)::close(fd_);
}
}
inline int fd() const { return fd_; }
private:
int fd_;
};
inline int take_and_reset_errno() {
const int errcode = errno;
errno = 0;
return errcode;
}
inline file_descriptor opendir(const std::string& filename) {
while (true) {
const int fd = ::open(filename.c_str(), O_RDONLY | O_DIRECTORY);
if (fd < 0) {
const int errcode = take_and_reset_errno();
if (errcode == EINTR) {
// interrupted system call
continue;
}
std::stringstream message;
message << "opendir fail for '" << filename << "'";
throw std::system_error(
std::make_error_code(static_cast<std::errc>(errcode)), message.str());
}
return file_descriptor(fd);
}
}
inline file_descriptor openat(int dirfd, const std::string& filename) {
while (true) {
const int fd = ::openat(dirfd, filename.c_str(), O_RDONLY);
if (fd < 0) {
const int errcode = take_and_reset_errno();
if (errcode == EINTR) {
// interrupted system call
continue;
}
std::stringstream message;
message << "open fail for '" << filename << "'";
throw std::system_error(
std::make_error_code(static_cast<std::errc>(errcode)), message.str());
}
return file_descriptor(fd);
}
}
inline size_t pread(int fd, const std::span<std::byte>& byte,
std::int64_t offset) {
while (true) {
const ::ssize_t data_read =
::pread(fd, static_cast<void*>(byte.data()), byte.size(),
static_cast<::off_t>(offset));
if (data_read < 0) {
const int errcode = take_and_reset_errno();
if (errno == EINTR) {
// interrupted system call
continue;
}
throw std::system_error(
std::make_error_code(static_cast<std::errc>(errcode)), "<pread>");
}
return static_cast<std::size_t>(data_read);
}
}
} // namespace os
} // end namespace pmt
#endif
...@@ -9,6 +9,7 @@ add_sensor( ...@@ -9,6 +9,7 @@ add_sensor(
Rapl.h Rapl.h
SRC_FILES SRC_FILES
Rapl.cpp Rapl.cpp
RaplCounter.cpp
RaplImpl.cpp RaplImpl.cpp
LINK_LIBRARIES LINK_LIBRARIES
${LINK_LIBRARIES}) ${LINK_LIBRARIES})
#include <cassert>
#include <fstream>
#include "RaplCounter.h"
namespace pmt::rapl {
RaplCounter::RaplCounter(const std::string& directory) {
std::ifstream ifstream_directory(directory + "/name");
assert(ifstream_directory.is_open());
ifstream_directory >> name_;
std::ifstream ifstream_max_energy_range_uj(directory +
"/max_energy_range_uj");
assert(ifstream_max_energy_range_uj.is_open());
ifstream_max_energy_range_uj >> max_energy_range_uj_;
ifstream_energy_uj_ = std::ifstream(directory + "/energy_uj");
assert(ifstream_energy_uj_.is_open());
const std::size_t energy_uj = Read();
energy_uj_first_ = energy_uj;
energy_uj_previous_ = energy_uj;
energy_uj_offset_ = 0;
}
std::size_t RaplCounter::Read() {
std::size_t energy_uj;
assert(ifstream_energy_uj_.is_open());
ifstream_energy_uj_ >> energy_uj;
ifstream_energy_uj_.seekg(0);
energy_uj_offset_ +=
energy_uj < energy_uj_previous_ ? max_energy_range_uj_ : 0;
return energy_uj_offset_ + energy_uj - energy_uj_first_;
}
} // end namespace pmt::rapl
\ No newline at end of file
#ifndef PMT_RAPLCOUNTER_H_
#define PMT_RAPLCOUNTER_H_
#include <charconv>
#include <cstddef>
#include <fstream>
#include <string>
namespace pmt::rapl {
class RaplCounter {
public:
RaplCounter(const std::string& directory);
const std::string& GetName() const { return name_; };
// The numbers in the rapl /energy_uj files range from zero up to a maximum
// specified in /max_energy_range_uj. This class reports monotonically
// increasing values starting with zero for the first measurement. Therefore,
// the result is computed as follows:
// now = <read value>
// offset += now < previous ? max : 0
// result += offset + now - first
std::size_t Read();
private:
std::ifstream ifstream_energy_uj_;
std::string name_;
std::size_t max_energy_range_uj_;
std::size_t energy_uj_first_ = 0;
std::size_t energy_uj_previous_ = 0;
std::size_t energy_uj_offset_ = 0;
};
} // namespace pmt::rapl
#endif // PMT_RAPLCOUNTER_H_
\ No newline at end of file
#include <algorithm>
#include <array>
#include <cassert> #include <cassert>
#include <cerrno> #include <cerrno>
#include <iostream> #include <cstring>
#include <charconv>
#include <filesystem> #include <filesystem>
#include <iostream>
#include <iterator> #include <iterator>
#include <memory> #include <memory>
#include <cstring>
#include <regex> #include <regex>
#include <sstream> #include <sstream>
#include <stdexcept> #include <stdexcept>
#include <algorithm>
#include <string> #include <string>
#include <system_error>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <filesystem>
#include <system_error>
namespace fs = std::filesystem; namespace fs = std::filesystem;
#include <sched.h>
#include <unistd.h>
#include "RaplImpl.h" #include "RaplImpl.h"
#include "RaplCounter.h"
#include "common/cpu.h"
/* /*
* RAPL (Running Average Power Limit) is an API provided by Intel for power * RAPL (Running Average Power Limit) is an API provided by Intel for power
...@@ -47,54 +46,36 @@ RaplImpl::RaplImpl() { ...@@ -47,54 +46,36 @@ RaplImpl::RaplImpl() {
previous_measurements_ = GetMeasurements(); previous_measurements_ = GetMeasurements();
} }
RaplImpl::~RaplImpl() { std::lock_guard<std::mutex> lock(mutex_); }
std::string GetBasename(int package_id) { std::string GetBasename(int package_id) {
std::stringstream basename; std::stringstream basename;
basename << "/sys/class/powercap/intel-rapl:" << package_id; basename << "/sys/class/powercap/intel-rapl:" << package_id;
return basename.str(); return basename.str();
} }
std::string read_string(int fd) {
std::string result;
std::array<std::byte, 256> buffer;
const std::size_t read_size = os::pread(fd, buffer, 0);
result.reserve(read_size);
for (std::size_t i = 0; i < read_size; ++i) {
const char c = static_cast<char>(buffer[i]);
if (!std::isspace(c) && c != '\n') {
result.push_back(c);
}
}
return result;
}
template <typename Numeric>
Numeric read_numerical_value(int fd) {
Numeric value = 0;
std::array<std::byte, 256> buffer;
const std::size_t read_size = os::pread(fd, buffer, 0);
std::from_chars_result res = std::from_chars(
static_cast<const char*>(static_cast<void*>(buffer.data())),
static_cast<const char*>(static_cast<void*>(buffer.data() + read_size)),
value);
if (res.ec == std::errc::invalid_argument &&
res.ec == std::errc::result_out_of_range) {
throw std::runtime_error("Unable to parse file to numerical value");
}
return value;
}
void RaplImpl::Init() { void RaplImpl::Init() {
const fs::path basename = "/sys/class/powercap"; const fs::path basename = "/sys/class/powercap";
const std::regex pattern("intel-rapl(:\\d+)+"); const std::regex pattern_directory("intel-rapl(:\\d+)+");
const std::regex pattern_package_id(":([0-9]+)");
std::vector<std::string> rapl_dirs; std::vector<std::string> rapl_dirs;
const std::set active_sockets =
common::get_active_packages(common::get_active_cpus());
for (const auto& entry : fs::directory_iterator(basename)) { for (const auto& entry : fs::directory_iterator(basename)) {
const std::string directory = entry.path().filename().string(); const std::string directory = entry.path().filename().string();
if (std::regex_match(directory, pattern)) { std::smatch match;
if (std::regex_match(directory, pattern_directory)) {
if (fs::exists(entry.path() / "energy_uj")) { if (fs::exists(entry.path() / "energy_uj")) {
rapl_dirs.push_back(entry.path()); if (std::regex_search(directory, match, pattern_package_id)) {
const int package_id = std::stoi(match[1].str());
if (active_sockets.find(package_id) != active_sockets.end()) {
rapl_dirs.emplace_back(entry.path());
}
}
} }
} }
} }
...@@ -108,23 +89,7 @@ void RaplImpl::Init() { ...@@ -108,23 +89,7 @@ void RaplImpl::Init() {
for (const auto& rapl_dir : rapl_dirs) { for (const auto& rapl_dir : rapl_dirs) {
try { try {
const os::file_descriptor fd_rapl_dir = os::opendir(rapl_dir); rapl_counters_.emplace_back(rapl_dir);
const os::file_descriptor name_fd = os::openat(fd_rapl_dir.fd(), "name");
const std::string package_id = read_string(name_fd.fd());
const os::file_descriptor max_energy_fd =
os::openat(fd_rapl_dir.fd(), "max_energy_range_uj");
std::size_t max_energy_range_uj =
read_numerical_value<std::size_t>(max_energy_fd.fd());
os::file_descriptor energy_uj_fd =
os::openat(fd_rapl_dir.fd(), "energy_uj");
std::ignore = read_numerical_value<std::size_t>(energy_uj_fd.fd());
packages_names_.push_back(package_id);
uj_max_.push_back(max_energy_range_uj);
energy_fds_.emplace_back(std::move(energy_uj_fd));
} catch (std::system_error& e) { } catch (std::system_error& e) {
std::stringstream message; std::stringstream message;
message << "OS error: " << e.what(); message << "OS error: " << e.what();
...@@ -135,50 +100,18 @@ void RaplImpl::Init() { ...@@ -135,50 +100,18 @@ void RaplImpl::Init() {
} }
} }
} }
// Initialize state variables
const std::size_t n = uj_max_.size();
uj_first_.resize(n);
uj_previous_.resize(n);
uj_offset_.resize(n);
std::vector<RaplMeasurement> measurements = GetMeasurements();
for (std::size_t i = 0; i < n; i++) {
uj_first_[i] = measurements[i].joules;
uj_previous_[i] = uj_first_[i];
uj_offset_[i] = 0;
}
} }
std::vector<RaplMeasurement> RaplImpl::GetMeasurements() { std::vector<RaplMeasurement> RaplImpl::GetMeasurements() {
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
std::vector<RaplMeasurement> measurements; std::vector<RaplMeasurement> measurements;
const std::size_t n = packages_names_.size(); for (auto& counter : rapl_counters_) {
assert(n == energy_fds_.size()); measurements.emplace_back(counter.GetName(), counter.Read());
assert(n == uj_max_.size());
// Take all measurements
auto packages_name_it = packages_names_.begin();
for (const auto& energy_fd : energy_fds_) {
std::size_t measurement;
const std::size_t energy_value =
read_numerical_value<std::size_t>(energy_fd.fd());
measurements.emplace_back(RaplMeasurement{*packages_name_it, energy_value});
packages_name_it++;
}
for (std::size_t i = 0; i < measurements.size(); i++) {
const std::size_t uj_now = measurements[i].joules;
if (uj_now < uj_previous_[i]) {
uj_offset_[i] += uj_max_[i];
}
uj_previous_[i] = uj_now;
measurements[i].joules = uj_offset_[i] + uj_now - uj_first_[i];
} }
return measurements; return measurements;
} // end Rapl::GetMeasurement }
State RaplImpl::GetState() { State RaplImpl::GetState() {
std::vector<RaplMeasurement> measurements = GetMeasurements(); std::vector<RaplMeasurement> measurements = GetMeasurements();
...@@ -190,17 +123,17 @@ State RaplImpl::GetState() { ...@@ -190,17 +123,17 @@ State RaplImpl::GetState() {
for (std::size_t i = 0; i < measurements.size(); i++) { for (std::size_t i = 0; i < measurements.size(); i++) {
const std::string name = measurements[i].name; const std::string name = measurements[i].name;
const std::size_t joules_now = measurements[i].joules; const std::size_t ujoules_now = measurements[i].ujoules;
const std::size_t joules_previous = previous_measurements_[i].joules; const std::size_t ujoules_previous = previous_measurements_[i].ujoules;
const double duration = seconds(previous_timestamp_, state.timestamp_); const double duration = seconds(previous_timestamp_, state.timestamp_);
const float joules_diff = (joules_now - joules_previous) * 1e-6; const float joules_diff = (ujoules_now - ujoules_previous) * 1e-6;
const float watt = joules_diff / duration; const float watt = joules_diff / duration;
state.name_[i + 1] = name; state.name_[i + 1] = name;
state.joules_[i + 1] = joules_now * 1e-6; state.joules_[i + 1] = ujoules_now * 1e-6;
state.watt_[i + 1] = watt; state.watt_[i + 1] = watt;
if (name.find("package") != std::string::npos) { if (name.find("package") != std::string::npos) {
state.joules_[0] += joules_now * 1e-6; state.joules_[0] += ujoules_now * 1e-6;
state.watt_[0] += watt; state.watt_[0] += watt;
} }
} }
......
...@@ -7,21 +7,20 @@ ...@@ -7,21 +7,20 @@
#include <vector> #include <vector>
#include "Rapl.h" #include "Rapl.h"
#include "RaplCounter.h"
#include "common/PMT.h" #include "common/PMT.h"
#include "common/io.h"
namespace pmt::rapl { namespace pmt::rapl {
const int kKeepAliveInterval = 10; // call Measure() roughly every nth update
struct RaplMeasurement { struct RaplMeasurement {
std::string name; std::string name;
std::size_t joules; std::size_t ujoules;
}; };
class RaplImpl : public Rapl { class RaplImpl : public Rapl {
public: public:
RaplImpl(); RaplImpl();
~RaplImpl();
State GetState() override; State GetState() override;
...@@ -35,21 +34,7 @@ class RaplImpl : public Rapl { ...@@ -35,21 +34,7 @@ class RaplImpl : public Rapl {
Timestamp previous_timestamp_; Timestamp previous_timestamp_;
std::vector<RaplMeasurement> previous_measurements_; std::vector<RaplMeasurement> previous_measurements_;
std::vector<RaplCounter> rapl_counters_;
std::vector<std::string> packages_names_;
std::vector<os::file_descriptor> energy_fds_;
// The numbers in the rapl /energy_uj files range from zero up to a maximum
// specified in /max_energy_range_uj. This class reports monotonically
// increasing values starting with zero for the first measurement. Therefore,
// for every counter, the result is computed as follows:
// now = <read value>
// offset += now < previous ? max : 0
// result += offset + now - first
std::vector<std::size_t> uj_max_;
std::vector<std::size_t> uj_first_;
std::vector<std::size_t> uj_previous_;
std::vector<std::size_t> uj_offset_;
// Mutex used to guard GetMeasurements() // Mutex used to guard GetMeasurements()
std::mutex mutex_; std::mutex mutex_;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment