Skip to content
Snippets Groups Projects
Commit 170ea032 authored by Wiebe van Breukelen's avatar Wiebe van Breukelen
Browse files

Merge branch 'integrate-cudawrappers' into 'main'

Add cudawrappers and AMD HIP support

See merge request !17
parents 6583a98e e3a4567e
No related branches found
No related tags found
1 merge request!17Add cudawrappers and AMD HIP support
Checking pipeline status
Showing
with 362 additions and 49 deletions
cmake_minimum_required(VERSION 3.17) cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
project(dedisp) project(dedisp)
...@@ -9,17 +9,60 @@ set(DEDISP_VERSION_PATCH 1) ...@@ -9,17 +9,60 @@ set(DEDISP_VERSION_PATCH 1)
set(DEDISP_VERSION set(DEDISP_VERSION
${DEDISP_VERSION_MAJOR}.${DEDISP_VERSION_MINOR}.${DEDISP_VERSION_PATCH}) ${DEDISP_VERSION_MAJOR}.${DEDISP_VERSION_MINOR}.${DEDISP_VERSION_PATCH})
set(CMAKE_CXX_STANDARD 14)
# dependencies # dependencies
find_package(PkgConfig REQUIRED) find_package(PkgConfig REQUIRED)
# CUDA if(NOT DEFINED DEDISP_BACKEND)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) set(DEDISP_BACKEND "CUDA")
set(CMAKE_CUDA_ARCHITECTURES 80) endif()
set(DEDISP_BACKEND
${DEDISP_BACKEND}
CACHE STRING "GPU backend API to use")
set_property(CACHE DEDISP_BACKEND PROPERTY STRINGS "CUDA" "HIP")
if(${DEDISP_BACKEND} STREQUAL "CUDA")
set(DEDISP_BACKEND_CUDA True)
elseif(${DEDISP_BACKEND} STREQUAL "HIP")
set(DEDISP_BACKEND_HIP True)
else()
message(FATAL_ERROR "Invalid value for DEDISP_BACKEND: ${DEDISP_BACKEND}")
endif() endif()
find_package(CUDAToolkit REQUIRED) find_package(Threads REQUIRED)
find_package(OpenMP REQUIRED)
set(CUDAWRAPPERS_BACKEND ${DEDISP_BACKEND})
if(${DEDISP_BACKEND_HIP})
# Workaround missing omp.h for compilation of sources with language property
# HIP by adding the OpenMP include directory of the host compiler to the
# OpenMP target.
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
get_filename_component(OpenMP_LIB_DIR ${OpenMP_gomp_LIBRARY} DIRECTORY)
message(STATUS "OpenMP library directory: ${OpenMP_LIB_DIR}")
find_path(
OpenMP_INCLUDE_DIR omp.h
PATHS ${OpenMP_LIB_DIR} "${OpenMP_LIB_DIR}/.."
PATH_SUFFIXES
"include"
"lib/gcc/x86_64-pc-linux-gnu/${CMAKE_CXX_COMPILER_VERSION}/include")
target_include_directories(OpenMP::OpenMP_CXX SYSTEM
INTERFACE ${OpenMP_INCLUDE_DIR})
endif()
enable_language(HIP)
set(LINK_gpu_runtime hip::host)
elseif(${DEDISP_BACKEND_CUDA})
enable_language(CUDA) enable_language(CUDA)
set(CUDA_MIN_VERSION 10.0)
find_package(CUDAToolkit REQUIRED)
set(LINK_gpu_runtime CUDA::cudart)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 80)
endif()
endif()
# set Release as default build type # set Release as default build type
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)
...@@ -33,23 +76,32 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) ...@@ -33,23 +76,32 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
option(DEDISP_BUILD_BENCHMARKS "Build benchmarks" ON) option(DEDISP_BUILD_BENCHMARKS "Build benchmarks" ON)
if(${DEDISP_BUILD_BENCHMARKS}) if(${DEDISP_BUILD_BENCHMARKS})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDEDISP_BENCHMARK") add_compile_options("-DDEDISP_BENCHMARK")
endif() endif()
option(DEDISP_ENABLE_DEBUG "Enable extra verbose output" OFF) option(DEDISP_ENABLE_DEBUG "Enable extra verbose output" OFF)
if(${DEDISP_ENABLE_DEBUG}) if(${DEDISP_ENABLE_DEBUG})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDEDISP_DEBUG") add_compile_options("-DDEDISP_BENCHMARK")
endif() endif()
option(DEDISP_BUILD_TESTS "Build tests" OFF) option(DEDISP_BUILD_TESTING "Build tests" OFF)
option(DEDISP_BENCHMARK_WITH_PMT
"Enable Power Measurement Toolkit support in the benchmark suite" OFF)
if(${DEDISP_BUILD_TESTS}) if(${DEDISP_BUILD_TESTING})
enable_testing() enable_testing()
endif() endif()
# OpenMP configuration # CUDA Wrappers
find_package(OpenMP REQUIRED) include(GNUInstallDirs)
include(FetchContent)
FetchContent_Declare(
cudawrappers
GIT_REPOSITORY https://github.com/nlesc-recruit/cudawrappers
GIT_TAG main)
FetchContent_MakeAvailable(cudawrappers)
# library # library
add_subdirectory(src) add_subdirectory(src)
......
...@@ -10,7 +10,7 @@ Installation Instructions: ...@@ -10,7 +10,7 @@ Installation Instructions:
3. Optionally further configure cmake through interactive build settings 3. Optionally further configure cmake through interactive build settings
* `$ccmake .` * `$ccmake .`
* e.g. set `DEDISP_BUILD_BENCHMARKS` to `ON` to enable build of performance benchmarks [default: ON] * e.g. set `DEDISP_BUILD_BENCHMARKS` to `ON` to enable build of performance benchmarks [default: ON]
* e.g. set `DEDISP_BUILD_TESTS` to `ON` to enable build of tests [default: ON] * e.g. set `DEDISP_BUILD_TESTING` to `ON` to enable build of tests [default: ON]
* e.g. set `DEDISP_DEBUG` to `ON` to enable build with more verbose output (for debugging purposes) [default: OFF] * e.g. set `DEDISP_DEBUG` to `ON` to enable build with more verbose output (for debugging purposes) [default: OFF]
4. make and install 4. make and install
* `$ make install` * `$ make install`
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
# add binaries # add binaries
if(${DEDISP_BUILD_TESTS}) if(${DEDISP_BUILD_TESTING})
add_subdirectory(test) add_subdirectory(test)
endif() endif()
if(${DEDISP_BUILD_BENCHMARKS}) if(${DEDISP_BUILD_BENCHMARKS})
......
if(${DEDISP_BACKEND_CUDA})
add_subdirectory(dedisp) add_subdirectory(dedisp)
endif()
add_subdirectory(tdd) add_subdirectory(tdd)
add_subdirectory(fdd) add_subdirectory(fdd)
...@@ -425,6 +425,14 @@ template <typename PlanType> int run(BenchParameters &benchParameter) { ...@@ -425,6 +425,14 @@ template <typename PlanType> int run(BenchParameters &benchParameter) {
// Print timings // Print timings
if (benchParameter.verbose) { if (benchParameter.verbose) {
tbench->Pause(); tbench->Pause();
dedisp_float out_mean, out_sigma;
calc_stats_float(output, nsamps_computed * dm_count, &out_mean,
&out_sigma);
printf("Output RMS : %f\n", out_mean);
printf("Output StdDev : %f\n", out_sigma);
printf("\n"); printf("\n");
printf("------------- BENCHMARK TIMES (accumulated for %d iteration(s)) " printf("------------- BENCHMARK TIMES (accumulated for %d iteration(s)) "
"-------------\n", "-------------\n",
......
add_executable(benchdedisp benchdedisp.cpp) add_executable(benchdedisp benchdedisp.cpp)
target_include_directories(benchdedisp PRIVATE ${CMAKE_SOURCE_DIR}/src) target_include_directories(benchdedisp PRIVATE "${CMAKE_SOURCE_DIR}/src")
target_link_libraries(benchdedisp dedisp) target_link_libraries(benchdedisp dedisp)
......
add_executable(benchfdd benchfdd.cpp) add_executable(benchfdd benchfdd.cpp)
target_include_directories(benchfdd PRIVATE ${CMAKE_SOURCE_DIR}/src) target_include_directories(benchfdd PRIVATE "${CMAKE_SOURCE_DIR}/src")
target_link_libraries(benchfdd fdd) target_link_libraries(benchfdd fdd)
......
add_executable(benchtdd benchtdd.cpp) add_executable(benchtdd benchtdd.cpp)
target_include_directories(benchtdd PRIVATE ${CMAKE_SOURCE_DIR}/src) target_include_directories(benchtdd PRIVATE "${CMAKE_SOURCE_DIR}/src")
target_link_libraries(benchtdd tdd) target_link_libraries(benchtdd tdd)
......
if(${DEDISP_BUILD_TESTS}) if(${DEDISP_BUILD_TESTING})
if(${DEDISP_BACKEND_CUDA})
add_subdirectory(dedisp) add_subdirectory(dedisp)
endif()
add_subdirectory(tdd) add_subdirectory(tdd)
add_subdirectory(fdd) add_subdirectory(fdd)
include(FetchContent)
FetchContent_Declare(
Catch2
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
GIT_TAG v3.6.0)
FetchContent_MakeAvailable(Catch2)
list(APPEND CMAKE_MODULE_PATH "${catch2_SOURCE_DIR}/contrib")
include(Catch)
add_executable(test_unit UnitTests.cpp)
target_include_directories(test_unit PRIVATE "${CMAKE_SOURCE_DIR}/src")
if(${DEDISP_BACKEND_CUDA})
target_link_libraries(test_unit dedisp fdd tdd Catch2::Catch2WithMain)
else()
target_link_libraries(test_unit fdd tdd Catch2::Catch2WithMain)
endif()
catch_discover_tests(test_unit)
endif() endif()
#include "fdd/FDDGPUPlan.hpp"
#define CATCH_CONFIG_MAIN
#include <catch2/catch_approx.hpp>
#include <catch2/catch_test_macros.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include <catch2/matchers/catch_matchers_vector.hpp>
#include "common/test.hpp"
#include "fdd/FDDGPUPlan.hpp"
#include "tdd/TDDPlan.hpp"
// Dedisp is not supported on HIP.
#ifndef __HIP__
#include "dedisp/DedispPlan.hpp"
TEST_CASE("Dedisp compare expected DM output") {
const std::vector<float> expected_dms = {
0.392292, 0.395637, 0.401067, 0.397621, 0.390937, 0.446373,
0.401329, 0.405182, 0.391987, 0.391658, 0.391621};
unsigned int seed = 0;
TestInput test_input{&seed};
TestOutput test_output;
run<dedisp::DedispPlan>(test_input,
test_output); // uses run method from test.hpp
REQUIRE(
test_output.out_mean ==
Catch::Approx(0.376468509).margin(std::numeric_limits<float>::epsilon()));
REQUIRE(
test_output.out_sigma ==
Catch::Approx(0.002306607).margin(std::numeric_limits<float>::epsilon()));
REQUIRE_THAT(test_output.calculated_dms,
Catch::Matchers::Approx(expected_dms)
.margin(std::numeric_limits<float>::epsilon()));
}
#endif
TEST_CASE("tdd compare expected DM output") {
const std::vector<float> expected_dms = {
0.392292, 0.395637, 0.401067, 0.397621, 0.390937, 0.446373,
0.401329, 0.405182, 0.391987, 0.391658, 0.391621};
unsigned int seed = 0;
TestInput test_input{&seed};
TestOutput test_output;
run<dedisp::TDDPlan>(test_input,
test_output); // uses run method from test.hpp
REQUIRE(
test_output.out_mean ==
Catch::Approx(0.376468509).margin(std::numeric_limits<float>::epsilon()));
REQUIRE(
test_output.out_sigma ==
Catch::Approx(0.002306607).margin(std::numeric_limits<float>::epsilon()));
REQUIRE_THAT(test_output.calculated_dms,
Catch::Matchers::Approx(expected_dms)
.margin(std::numeric_limits<float>::epsilon()));
}
TEST_CASE("fdd compare expected DM output") {
const std::vector<float> expected_dms = {
4.850260, 6.077153, 8.697535, 7.709451, 4.709877, 7.514279,
20.041370, 6.095791, 7.270431, 9.196441, 4.596337, 4.517603};
unsigned int seed = 0;
TestInput test_input{&seed};
TestOutput test_output;
run<dedisp::FDDGPUPlan>(test_input,
test_output); // uses run method from test.hpp
REQUIRE(test_output.out_mean ==
Catch::Approx(-0.000709622)
.margin(std::numeric_limits<float>::epsilon()));
REQUIRE(test_output.out_sigma == Catch::Approx(0.747440).margin(
std::numeric_limits<float>::epsilon()));
REQUIRE_THAT(test_output.calculated_dms,
Catch::Matchers::Approx(expected_dms)
.margin(std::numeric_limits<float>::epsilon()));
}
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <time.h> #include <time.h>
#include <functional> #include <functional>
#include <optional>
#include <Plan.hpp> #include <Plan.hpp>
...@@ -18,6 +19,16 @@ ...@@ -18,6 +19,16 @@
#define WRITE_INPUT_DATA 0 #define WRITE_INPUT_DATA 0
#define WRITE_OUTPUT_DATA 0 #define WRITE_OUTPUT_DATA 0
struct TestInput {
unsigned int *seed = nullptr;
};
struct TestOutput {
float out_mean;
float out_sigma;
std::vector<float> calculated_dms;
};
// Assume input is a 0 mean float and quantize to an unsigned 8-bit quantity // Assume input is a 0 mean float and quantize to an unsigned 8-bit quantity
dedisp_byte bytequant(dedisp_float f) { dedisp_byte bytequant(dedisp_float f) {
dedisp_float v = f + 127.5f; dedisp_float v = f + 127.5f;
...@@ -94,7 +105,8 @@ void calc_stats_float(dedisp_float *a, dedisp_size n, dedisp_float *mean, ...@@ -94,7 +105,8 @@ void calc_stats_float(dedisp_float *a, dedisp_size n, dedisp_float *mean,
return; return;
} }
template <typename PlanType> int run() { template <typename PlanType>
int run(const TestInput &test_input, TestOutput &test_output) {
int device_idx = 0; int device_idx = 0;
dedisp_float sampletime_base = dedisp_float sampletime_base =
...@@ -153,7 +165,7 @@ template <typename PlanType> int run() { ...@@ -153,7 +165,7 @@ template <typename PlanType> int run() {
/* Random number generator setup */ /* Random number generator setup */
static std::random_device rd; static std::random_device rd;
static std::mt19937 generator(rd()); static std::mt19937 generator((test_input.seed) ? *(test_input.seed) : rd());
static std::normal_distribution<float> distribution(0.0, 1.0); static std::normal_distribution<float> distribution(0.0, 1.0);
/* First build 2-D array of floats with our signal in it */ /* First build 2-D array of floats with our signal in it */
...@@ -263,22 +275,26 @@ template <typename PlanType> int run() { ...@@ -263,22 +275,26 @@ template <typename PlanType> int run() {
(double)(clock() - startclock) / CLOCKS_PER_SEC); (double)(clock() - startclock) / CLOCKS_PER_SEC);
// Look for significant peaks // Look for significant peaks
dedisp_float out_mean, out_sigma; calc_stats_float(output, nsamps_computed * dm_count, &test_output.out_mean,
calc_stats_float(output, nsamps_computed * dm_count, &out_mean, &out_sigma); &test_output.out_sigma);
printf("Output RMS : %f\n", out_mean); printf("Output RMS : %f\n",
printf("Output StdDev : %f\n", out_sigma); test_output.out_mean);
printf("Output StdDev : %f\n",
test_output.out_sigma);
i = 0; i = 0;
for (nd = 0; nd < dm_count; nd++) { for (nd = 0; nd < dm_count; nd++) {
for (ns = 0; ns < nsamps_computed; ns++) { for (ns = 0; ns < nsamps_computed; ns++) {
dedisp_size idx = nd * nsamps_computed + ns; dedisp_size idx = nd * nsamps_computed + ns;
dedisp_float val = output[idx]; dedisp_float val = output[idx];
if (val - out_mean > 6.0 * out_sigma) { if (val - test_output.out_mean > 6.0 * test_output.out_sigma) {
printf( printf(
"DM trial %u (%.3f pc/cm^3), Samp %u (%.6f s): %f (%.2f sigma)\n", "DM trial %u (%.3f pc/cm^3), Samp %u (%.6f s): %f (%.2f sigma)\n",
nd, dmlist[nd], ns, ns * dt, val, (val - out_mean) / out_sigma); nd, dmlist[nd], ns, ns * dt, val,
(val - test_output.out_mean) / test_output.out_sigma);
i++; i++;
test_output.calculated_dms.push_back(val);
if (i > 100) if (i > 100)
break; break;
} }
......
# test for dedisp C interface # test for dedisp C interface
add_executable(ctestdedisp ctestdedisp.c) add_executable(ctestdedisp ctestdedisp.c)
target_include_directories(ctestdedisp PRIVATE ${CMAKE_SOURCE_DIR}/src) target_include_directories(ctestdedisp PRIVATE "${CMAKE_SOURCE_DIR}/src")
target_link_libraries(ctestdedisp dedisp m) target_link_libraries(ctestdedisp dedisp m)
add_test(ctestdedisp ctestdedisp) add_test(ctestdedisp ctestdedisp)
# test for dedisp C++ interface # test for dedisp C++ interface
add_executable(testdedisp testdedisp.cpp) add_executable(testdedisp testdedisp.cpp)
target_include_directories(testdedisp PRIVATE ${CMAKE_SOURCE_DIR}/src) target_include_directories(testdedisp PRIVATE "${CMAKE_SOURCE_DIR}/src")
target_link_libraries(testdedisp dedisp) target_link_libraries(testdedisp dedisp)
add_test(testdedisp testdedisp) add_test(testdedisp testdedisp)
......
...@@ -2,8 +2,12 @@ ...@@ -2,8 +2,12 @@
#include "../common/test.hpp" #include "../common/test.hpp"
template <typename PlanType> int run(); template <typename PlanType> int run(const TestInput &, TestOutput &);
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
return run<dedisp::DedispPlan>(); // uses run method from test.hpp TestInput test_input{};
TestOutput test_output{};
return run<dedisp::DedispPlan>(test_input,
test_output); // uses run method from test.hpp
} }
\ No newline at end of file
# test for fdd C interface # test for fdd C interface
add_executable(ctestfdd ctestfdd.c) add_executable(ctestfdd ctestfdd.c)
target_include_directories(ctestfdd PRIVATE ${CMAKE_SOURCE_DIR}/src) target_include_directories(ctestfdd PRIVATE "${CMAKE_SOURCE_DIR}/src")
target_link_libraries(ctestfdd fdd m) target_link_libraries(ctestfdd fdd m)
add_test(ctestfdd ctestfdd) add_test(ctestfdd ctestfdd)
# test for fdd C++ interface # test for fdd C++ interface
add_executable(testfdd testfdd.cpp) add_executable(testfdd testfdd.cpp)
target_include_directories(testfdd PRIVATE ${CMAKE_SOURCE_DIR}/src) target_include_directories(testfdd PRIVATE "${CMAKE_SOURCE_DIR}/src")
target_link_libraries(testfdd fdd) target_link_libraries(testfdd fdd)
add_test(testfdd testfdd) add_test(testfdd testfdd)
......
...@@ -5,17 +5,20 @@ ...@@ -5,17 +5,20 @@
#include "../common/test.hpp" #include "../common/test.hpp"
template <typename PlanType> int run(); template <typename PlanType> int run(const TestInput &, TestOutput &);
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
TestInput test_input{};
TestOutput test_output{};
// Set environment variable USE_CPU to switch to CPU implementation of FDD // Set environment variable USE_CPU to switch to CPU implementation of FDD
char *use_cpu_str = getenv("USE_CPU"); char *use_cpu_str = getenv("USE_CPU");
bool use_cpu = !use_cpu_str ? false : atoi(use_cpu_str); bool use_cpu = !use_cpu_str ? false : atoi(use_cpu_str);
if (use_cpu) { if (use_cpu) {
std::cout << "Test FDD on CPU" << std::endl; std::cout << "Test FDD on CPU" << std::endl;
return run<dedisp::FDDCPUPlan>(); return run<dedisp::FDDCPUPlan>(test_input, test_output);
} else { } else {
std::cout << "Test FDD on GPU" << std::endl; std::cout << "Test FDD on GPU" << std::endl;
return run<dedisp::FDDGPUPlan>(); return run<dedisp::FDDGPUPlan>(test_input, test_output);
} }
} }
\ No newline at end of file
# test for tdd C interface # test for tdd C interface
add_executable(ctesttdd ctesttdd.c) add_executable(ctesttdd ctesttdd.c)
target_include_directories(ctesttdd PRIVATE ${CMAKE_SOURCE_DIR}/src) target_include_directories(ctesttdd PRIVATE "${CMAKE_SOURCE_DIR}/src")
target_link_libraries(ctesttdd tdd m) target_link_libraries(ctesttdd tdd m)
add_test(ctesttdd ctesttdd) add_test(ctesttdd ctesttdd)
# test for tdd C++ interface # test for tdd C++ interface
add_executable(testtdd testtdd.cpp) add_executable(testtdd testtdd.cpp)
target_include_directories(testtdd PRIVATE ${CMAKE_SOURCE_DIR}/src) target_include_directories(testtdd PRIVATE "${CMAKE_SOURCE_DIR}/src")
target_link_libraries(testtdd tdd) target_link_libraries(testtdd tdd)
add_test(testtdd testtdd) add_test(testtdd testtdd)
......
...@@ -2,8 +2,12 @@ ...@@ -2,8 +2,12 @@
#include "../common/test.hpp" #include "../common/test.hpp"
template <typename PlanType> int run(); template <typename PlanType> int run(const TestInput &, TestOutput &);
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
return run<dedisp::TDDPlan>(); // uses run method from test.hpp TestInput test_input{};
TestOutput test_output{};
return run<dedisp::TDDPlan>(test_input,
test_output); // uses run method from test.hpp
} }
\ No newline at end of file
# helper libraries # helper libraries
# include directory for common header files include_directories(common PRIVATE ${cudawrappers_SOURCE_DIR}/include)
include_directories(common)
# add subdirectories for individual libraries # add subdirectories for individual libraries
add_subdirectory(common) add_subdirectory(common)
...@@ -14,11 +13,28 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) ...@@ -14,11 +13,28 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
# plan library # plan library
add_library(plan OBJECT Plan.cpp GPUPlan.cpp) add_library(plan OBJECT Plan.cpp GPUPlan.cpp GPUKernel.cpp)
target_link_libraries(plan CUDA::cudart) target_link_libraries(plan PRIVATE LINK_gpu_runtime cudawrappers::cu
cudawrappers::nvrtc cudawrappers::nvtx)
# dedisp library
if(DEDISP_BENCHMARK_WITH_PMT)
if(${DEDISP_BACKEND_HIP})
set(PMT_BUILD_ROCM ON)
else()
set(PMT_BUILD_NVML ON)
set(PMT_BUILD_NVIDIA ON)
endif()
FetchContent_Declare(pmt GIT_REPOSITORY https://git.astron.nl/RD/pmt)
FetchContent_MakeAvailable(pmt)
add_compile_definitions("HAVE_PMT")
include_directories(common PRIVATE ${pmt_SOURCE_DIR}/include)
endif()
target_include_directories(common PRIVATE "${PROJECT_SOURCE_DIR}/include")
if(${DEDISP_BACKEND_CUDA})
add_subdirectory(dedisp) add_subdirectory(dedisp)
endif()
# TDD library # TDD library
add_subdirectory(tdd) add_subdirectory(tdd)
......
#include "GPUKernel.hpp"
#include <cudawrappers/cu.hpp>
#include <iostream>
#include <stdexcept>
void GPUKernel::assertCompiled(const CompiledKernelInfo &kernel_info) const {
if (!kernel_info.function) {
std::ostringstream ss;
ss << func_name_ << " in file " << filename_
<< " has not yet been compiled";
throw std::runtime_error(ss.str());
}
}
std::pair<const CompiledKernelInfo &, bool>
GPUKernel::compile(const std::vector<std::string> &compile_options) {
// Concat the compile options into a single string to be used as a map key.
std::string options;
if (compile_options.size()) {
// Allocate some space beforehand to avoid repeated expanding allocations.
options.reserve(compile_options.size() * 32);
for (const auto &compile_option : compile_options) {
options += compile_option;
}
}
// Check if the function has already been compiled which the same options.
if (compiled_kernels_.find(options) != compiled_kernels_.end()) {
return {compiled_kernels_[options], false};
}
// Create a new map entry inplace.
compiled_kernels_[options] = CompiledKernelInfo{};
CompiledKernelInfo &info = compiled_kernels_[options];
try {
program_->compile(compile_options);
} catch (nvrtc::Error &error) {
std::cerr << program_->getLog();
throw;
}
// Create a new module from the compiled PTX if needed.
info.module = std::make_unique<cu::Module>(
static_cast<const void *>(program_->getPTX().data()));
if (!info.module) {
throw std::runtime_error("Unable to create kernel module");
}
info.function = std::make_shared<cu::Function>(
*info.module, std::string(func_name_).c_str());
return {info, true};
}
\ No newline at end of file
#ifndef DEDISP_GPU_RUNTIME_KERNEL_HPP_
#define DEDISP_GPU_RUNTIME_KERNEL_HPP_
#include <memory>
#include <optional>
#include <sstream>
#include <stdexcept>
#include <unordered_map>
#include <vector>
#include <cudawrappers/cu.hpp>
#include <cudawrappers/nvrtc.hpp>
struct CompiledKernelInfo {
std::shared_ptr<cu::Module> module;
std::shared_ptr<cu::Function> function;
};
class GPUKernel {
public:
GPUKernel(const std::string &filename, const std::string &func_name,
const std::string &file_src)
: filename_(filename), func_name_(func_name),
program_(std::make_unique<nvrtc::Program>(file_src, filename)) {}
void setFunctionParams(const std::string &kernel_function_name,
const std::vector<const void *> &params);
void assertCompiled(const CompiledKernelInfo &kernel_info) const;
inline std::string getFilename() const { return filename_; }
protected:
std::pair<const CompiledKernelInfo &, bool>
compile(const std::vector<std::string> &compile_options = {});
private:
const std::string filename_;
const std::string func_name_;
const std::unique_ptr<nvrtc::Program> program_;
std::map<std::string, CompiledKernelInfo> compiled_kernels_;
};
#endif // DEDISP_GPU_RUNTIME_KERNEL_HPP_
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment