From 81b60be587777e03baadb3a586f7f56de7c111d0 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Thu, 6 Feb 2025 14:29:58 +0100 Subject: [PATCH 01/18] Add schaapspack and gracehopper runs --- .gitlab-ci.yml | 20 ++++++++++++++++++++ CMakeLists.txt | 9 +++++++++ ci/das6/compile_and_run_native.sh | 20 ++++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 ci/das6/compile_and_run_native.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d11a266..d1d46da 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -96,6 +96,26 @@ performance-jetson: # This job runs in the test stage. access: all expire_in: 1 days +performance-gracehopper: # This job runs in the test stage. + allow_failure: true + tags: + - das6-gpu + stage: benchmark # It only starts when the job in the build stage completes successfully. + script: + - sbatch --wait -p ghq -o output.txt -e error.txt ci/das6/compile_and_run_native_sh ghq_arm64 + - cat output.txt >&1 + - cat error.txt >&2 + + artifacts: + paths: + - ./results*.json + - ./output.txt + - ./error.txt + - ./*.tar + when: always + access: all + expire_in: 1 days + performance-generic: stage: benchmark image: "$CI_REGISTRY_IMAGE:latest" diff --git a/CMakeLists.txt b/CMakeLists.txt index 73d5509..153e159 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,15 @@ FetchContent_Declare( FetchContent_Populate(aocommon) +# Make schaapspack available +FetchContent_Declare( + schaapcommon + GIT_REPOSITORY git@git.astron.nl:RD/schaapcommon.git + GIT_TAG master) + +FetchContent_Populate(schaapcommon) + + set(COMPILER_FLAGS "-O3;-march=native;-ggdb;") # List all kernel code diff --git a/ci/das6/compile_and_run_native.sh b/ci/das6/compile_and_run_native.sh new file mode 100644 index 0000000..4125cdc --- /dev/null +++ b/ci/das6/compile_and_run_native.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +set -e +# compile the code and run it on das6 +# Specify the compiler version and architecture + +ARCHITECTURE=$1 +COMPILER_VERSION=$(gcc --version | head -n 1 | awk '{print $4}') + + +echo RUNNING ON ${COMPILER_VERSION} AND ${ARCHITECTURE} +BUILD_DIR=build-${COMPILER_VERSION}-${ARCHITECTURE} + +cmake -B ${BUILD_DIR} . -DCMAKE_BUILD_TYPE=Release + +make -C ${BUILD_DIR} -j + +tar -cvf asm-${ARCHITECTURE}-${COMPILER_VERSION}.tar ${BUILD_DIR}/*.s + +${BUILD_DIR}/microbenchmarks --benchmark_out=results-${COMPILER_VERSION}-${ARCHITECTURE}.json --benchmark_out_format=json \ No newline at end of file -- GitLab From e038b2a35a4e2465c8be64b9d06b93b2012ab106 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Thu, 6 Feb 2025 17:07:03 +0100 Subject: [PATCH 02/18] Fix build and add benchmark test --- CMakeLists.txt | 18 +-- benchmarks/convolution.cpp | 62 ++++++++++ code/convolution.h | 26 +++++ code/convolution_reference.cpp | 8 ++ code/convolution_serial_fftw.cpp | 190 +++++++++++++++++++++++++++++++ 5 files changed, 296 insertions(+), 8 deletions(-) create mode 100644 benchmarks/convolution.cpp create mode 100644 code/convolution.h create mode 100644 code/convolution_reference.cpp create mode 100644 code/convolution_serial_fftw.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 153e159..fdbe25d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,9 +45,10 @@ include(Catch) FetchContent_Declare( aocommon GIT_REPOSITORY https://gitlab.com/aroffringa/aocommon.git - GIT_TAG master) + GIT_TAG master + EXCLUDE_FROM_ALL) -FetchContent_Populate(aocommon) +FetchContent_MakeAvailable(aocommon) # Make schaapspack available FetchContent_Declare( @@ -55,8 +56,8 @@ FetchContent_Declare( GIT_REPOSITORY git@git.astron.nl:RD/schaapcommon.git GIT_TAG master) -FetchContent_Populate(schaapcommon) - +FetchContent_MakeAvailable(schaapcommon) +target_include_directories(schaapcommon PUBLIC ${aocommon_SOURCE_DIR}/include) set(COMPILER_FLAGS "-O3;-march=native;-ggdb;") @@ -73,8 +74,9 @@ find_package(OpenMP) # Link against Google Benchmark target_link_libraries(microbenchmarks PRIVATE benchmark::benchmark) -target_include_directories(microbenchmarks PRIVATE ${aocommon_SOURCE_DIR}/include) +target_include_directories(microbenchmarks PRIVATE ${aocommon_SOURCE_DIR}/include ${schaapcommon_SOURCE_DIR}/include) target_include_directories(microbenchmarks PRIVATE code) +target_link_libraries(microbenchmarks PRIVATE schaapcommon) target_compile_options(microbenchmarks PUBLIC ${COMPILER_FLAGS}) if(OpenMP_CXX_FOUND) target_link_libraries(microbenchmarks PRIVATE OpenMP::OpenMP_CXX) @@ -88,15 +90,15 @@ list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras) catch_discover_tests(unittests WORKING_DIRECTORY) target_link_libraries(unittests PRIVATE Catch2::Catch2WithMain) target_include_directories(unittests PRIVATE code) -target_include_directories(unittests PRIVATE ${aocommon_SOURCE_DIR}/include) - +target_include_directories(unittests PRIVATE ${aocommon_SOURCE_DIR}/include ${schaapcommon_SOURCE_DIR}/include) +target_link_libraries(unittests PRIVATE schaapcommon) target_compile_options(unittests PUBLIC ${COMPILER_FLAGS}) foreach(KERNEL_SOURCE ${KERNEL_SOURCES}) get_filename_component(KERNEL_NAME ${KERNEL_SOURCE} NAME_WE) add_precompile_target(TARGET_NAME ${KERNEL_NAME} - INCLUDE_DIRS code ${aocommon_SOURCE_DIR}/include + INCLUDE_DIRS code ${aocommon_SOURCE_DIR}/include ${schaapcommon_SOURCE_DIR}/include SOURCES ${KERNEL_SOURCE} COMPILER_FLAGS ${COMPILER_FLAGS}) endforeach() diff --git a/benchmarks/convolution.cpp b/benchmarks/convolution.cpp new file mode 100644 index 0000000..ae82e9f --- /dev/null +++ b/benchmarks/convolution.cpp @@ -0,0 +1,62 @@ +#include <benchmark/benchmark.h> +#include <convolution.h> +#include <memory> +#include <vector> + +namespace { + +class InitializeInput : public benchmark::Fixture { + public: + void SetUp(::benchmark::State& state) { + size_t width = state.range(0); + size_t height = state.range(1); + image = std::make_unique<std::vector<float>>(width * height); + kernel = std::make_unique<std::vector<float>>(width * height); + + Initialize(image->data(), width, height); + Initialize(kernel->data(), width, height); + } + void TearDown(::benchmark::State& state) { + image.reset(); + kernel.reset(); + } + + std::unique_ptr<std::vector<float>> image; + std::unique_ptr<std::vector<float>> kernel; +}; +} // namespace + +// Reference standard +BENCHMARK_DEFINE_F(InitializeInput, ConvolveReference) +(benchmark::State& state) { + for (auto _ : state) { + ConvolveReference(image->data(), kernel->data(), state.range(0), + state.range(1)); + } +} +BENCHMARK_REGISTER_F(InitializeInput, ConvolveReference) + ->Args({64, 32}) + ->Args({128, 64}) + ->Args({256, 126}) + ->Args({512, 754}) + ->Args({1024, 124}) + ->Args({2048, 1000}) + ->Args({4096, 5000}); + +// FFTW serial standard +BENCHMARK_DEFINE_F(InitializeInput, ConvolveSerial) +(benchmark::State& state) { + for (auto _ : state) { + ConvolveSerial(image->data(), kernel->data(), state.range(0), + state.range(1)); + } +} + +BENCHMARK_REGISTER_F(InitializeInput, ConvolveSerial) + ->Args({64, 32}) + ->Args({128, 64}) + ->Args({256, 126}) + ->Args({512, 754}) + ->Args({1024, 124}) + ->Args({2048, 1000}) + ->Args({4096, 5000}); diff --git a/code/convolution.h b/code/convolution.h new file mode 100644 index 0000000..66e8c8c --- /dev/null +++ b/code/convolution.h @@ -0,0 +1,26 @@ +#ifndef CONVOLUTION_H_ +#define CONVOLUTION_H_ + +#include <complex> +#include <iomanip> +#include <iostream> +#include <random> + +inline void Initialize(float* a, const size_t width, const size_t height) { + // Initialize matrices with random complex values + std::seed_seq seed({42}); + std::mt19937 gen(seed); + std::uniform_real_distribution<float> dis(-1.0, 1.0); + const size_t linear_size = width * height; + for (int i = 0; i < linear_size; i++) { + a[i] = dis(gen); + } +} + +// Function to perform matrix multiplication for 2x2 complex matrices +void ConvolveReference(float* image, const float* kernel, size_t width, + size_t height); + +void ConvolveSerial(float* image, const float* kernel, size_t width, + size_t height); +#endif diff --git a/code/convolution_reference.cpp b/code/convolution_reference.cpp new file mode 100644 index 0000000..e83c8d0 --- /dev/null +++ b/code/convolution_reference.cpp @@ -0,0 +1,8 @@ +#include "convolution.h" + +#include <schaapcommon/math/convolution.h> + +void ConvolveReference(float* image, const float* kernel, size_t width, + size_t height) { + schaapcommon::math::Convolve(image, kernel, width, height); +} \ No newline at end of file diff --git a/code/convolution_serial_fftw.cpp b/code/convolution_serial_fftw.cpp new file mode 100644 index 0000000..e097730 --- /dev/null +++ b/code/convolution_serial_fftw.cpp @@ -0,0 +1,190 @@ +#include "convolution.h" +#include <fftw3.h> +#include <algorithm> +// Partially unroll rows/columns with a factor of kUnroll +constexpr size_t kUnroll = 4; + +// With kUnroll > 1, the temporary buffers need to be aligned +// for FFTW to work correctly. +constexpr size_t kAlignment = 64; + +size_t RoundUp(size_t a, size_t b) { return ((a + b) / b) * b; } + +void FftR2CComposite(fftwf_plan plan_r2c, fftwf_plan plan_c2c, + size_t image_height, size_t image_width, const float* in, + fftwf_complex* out) { + const size_t complex_width = image_width / 2 + 1; + const size_t complex_size = image_height * complex_width; + + fftwf_complex* temp1 = fftwf_alloc_complex(complex_size); + + fftwf_complex* temp2 = fftwf_alloc_complex(complex_width); + float* temp2_ptr = reinterpret_cast<float*>(temp2); + for (size_t y = 0; y < image_height; y++) { + float* temp1_ptr = reinterpret_cast<float*>(&temp1[y * complex_width]); + std::copy_n(&in[y * image_width], image_width, temp2_ptr); + fftwf_execute_dft_r2c(plan_r2c, temp2_ptr, temp2); + std::copy_n(temp2_ptr, 2 * complex_width, temp1_ptr); + } + fftwf_free(temp2); + + // Partially kUnroll over columns + size_t padded_height = RoundUp(image_height, kAlignment); + temp2 = fftwf_alloc_complex(kUnroll * padded_height); + + for (size_t x = 0; x < complex_width; x += kUnroll) { + // Copy input + for (size_t y = 0; y < image_height; y++) { + for (size_t i = 0; i < kUnroll; i++) { + if ((x + i) < complex_width) { + float* temp1_ptr = + reinterpret_cast<float*>(&temp1[y * complex_width + x + i]); + float* temp2_ptr = + reinterpret_cast<float*>(&temp2[i * padded_height + y]); + std::copy_n(temp1_ptr, 2, temp2_ptr); + } + } + } + + // Perform 1D FFT over columns + for (size_t i = 0; i < kUnroll; i++) { + fftwf_complex* temp2_ptr = &temp2[i * padded_height]; + fftwf_execute_dft(plan_c2c, temp2_ptr, temp2_ptr); + } + + // Transpose output + for (size_t y = 0; y < image_height; y++) { + for (size_t i = 0; i < kUnroll; i++) { + if ((x + i) < complex_width) { + float* temp2_ptr = + reinterpret_cast<float*>(&temp2[i * padded_height + y]); + float* out_ptr = + reinterpret_cast<float*>(&out[y * complex_width + x + i]); + std::copy_n(temp2_ptr, 2, out_ptr); + } + } + } + } + + fftwf_free(temp2); + fftwf_free(temp1); +} + +void FftC2RComposite(fftwf_plan plan_c2c, fftwf_plan plan_c2r, + size_t image_height, size_t image_width, + const fftwf_complex* in, float* out) { + const size_t complex_width = image_width / 2 + 1; + + size_t padded_height = RoundUp(image_height, kAlignment); + size_t padded_size = padded_height * complex_width; + fftwf_complex* temp1 = fftwf_alloc_complex(padded_size); + + for (size_t x = 0; x < complex_width; x += kUnroll) { + // Transpose input + for (size_t y = 0; y < image_height; y++) { + for (size_t i = 0; i < kUnroll; i++) { + if ((x + i) < complex_width) { + const float* in_ptr = + reinterpret_cast<const float*>(&in[y * complex_width + x + i]); + float* temp1_ptr = + reinterpret_cast<float*>(&temp1[(x + i) * padded_height + y]); + std::copy_n(in_ptr, 2, temp1_ptr); + } + } + } + + // Perform 1D C2C FFT over columns + for (size_t i = 0; i < kUnroll; i++) { + if ((x + i) < complex_width) { + fftwf_complex* temp1_ptr = &temp1[(x + i) * padded_height]; + fftwf_execute_dft(plan_c2c, temp1_ptr, temp1_ptr); + } + } + } + + size_t paddedWidth = RoundUp(complex_width, kAlignment); + fftwf_complex* temp2 = fftwf_alloc_complex(kUnroll * paddedWidth); + + for (size_t y = 0; y < image_height; y += kUnroll) { + // Transpose input + for (size_t x = 0; x < complex_width; x++) { + for (size_t i = 0; i < kUnroll; i++) { + if ((y + i) < image_height) { + float* temp1_ptr = + reinterpret_cast<float*>(&temp1[x * padded_height + y + i]); + float* temp2_ptr = + reinterpret_cast<float*>(&temp2[i * paddedWidth + x]); + std::copy_n(temp1_ptr, 2, temp2_ptr); + } + } + } + + // Perform 1D C2R FFT over rows + for (size_t i = 0; i < kUnroll; i++) { + if ((y + i) < image_height) { + fftwf_complex* temp2_ptr = &temp2[i * paddedWidth]; + fftwf_execute_dft_c2r(plan_c2r, temp2_ptr, + reinterpret_cast<float*>(temp2_ptr)); + } + } + + // Copy output + for (size_t i = 0; i < kUnroll; i++) { + if ((y + i) < image_height) { + float* temp2_ptr = reinterpret_cast<float*>(&temp2[i * paddedWidth]); + std::copy_n(temp2_ptr, image_width, &out[(y + i) * image_width]); + } + } + } + + fftwf_free(temp2); + + fftwf_free(temp1); +} + +void ConvolveSerial(float* image, const float* kernel, size_t image_width, + size_t image_height) { + const size_t image_size = image_width * image_height; + const size_t complex_width = image_width / 2 + 1; + const size_t complex_size = complex_width * image_height; + float* temp_data = fftwf_alloc_real(image_size); + fftwf_complex* fft_image_data = fftwf_alloc_complex(complex_size); + fftwf_complex* fft_kernel_data = fftwf_alloc_complex(complex_size); + + fftwf_plan plan_r2c = + fftwf_plan_dft_r2c_1d(image_width, nullptr, nullptr, FFTW_ESTIMATE); + fftwf_plan plan_c2c_forward = fftwf_plan_dft_1d( + image_height, nullptr, nullptr, FFTW_FORWARD, FFTW_ESTIMATE); + fftwf_plan plan_c2c_backward = fftwf_plan_dft_1d( + image_height, nullptr, nullptr, FFTW_BACKWARD, FFTW_ESTIMATE); + fftwf_plan plan_c2r = + fftwf_plan_dft_c2r_1d(image_width, nullptr, nullptr, FFTW_ESTIMATE); + + FftR2CComposite(plan_r2c, plan_c2c_forward, image_height, image_width, image, + fft_image_data); + + std::copy_n(kernel, image_size, temp_data); + FftR2CComposite(plan_r2c, plan_c2c_forward, image_height, image_width, + temp_data, fft_kernel_data); + + const float fact = 1.0 / image_size; + for (size_t y = 0; y != image_height; ++y) { + for (size_t x = 0; x != complex_width; ++x) { + const size_t i = y * complex_width + x; + reinterpret_cast<std::complex<float>*>(fft_image_data)[i] *= + fact * reinterpret_cast<std::complex<float>*>(fft_kernel_data)[i]; + } + } + + FftC2RComposite(plan_c2c_backward, plan_c2r, image_height, image_width, + fft_image_data, image); + + fftwf_free(fft_image_data); + fftwf_free(fft_kernel_data); + fftwf_free(temp_data); + + fftwf_destroy_plan(plan_r2c); + fftwf_destroy_plan(plan_c2c_forward); + fftwf_destroy_plan(plan_c2c_backward); + fftwf_destroy_plan(plan_c2r); +} \ No newline at end of file -- GitLab From 21f86690899ad17e18156251dba4c6d395364c54 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Thu, 6 Feb 2025 17:32:50 +0100 Subject: [PATCH 03/18] Add tests --- code/convolution.h | 1 + test/helpers.cpp | 49 ++++++++++++++++++++++++++++++++++++++- test/helpers.h | 7 ++++++ test/test_convolution.cpp | 37 +++++++++++++++++++++++++++++ 4 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 test/test_convolution.cpp diff --git a/code/convolution.h b/code/convolution.h index 66e8c8c..5e0ceb4 100644 --- a/code/convolution.h +++ b/code/convolution.h @@ -23,4 +23,5 @@ void ConvolveReference(float* image, const float* kernel, size_t width, void ConvolveSerial(float* image, const float* kernel, size_t width, size_t height); + #endif diff --git a/test/helpers.cpp b/test/helpers.cpp index 60cabef..9b4b04b 100644 --- a/test/helpers.cpp +++ b/test/helpers.cpp @@ -16,6 +16,14 @@ void compareSingle(const std::vector<T>& lv, const std::vector<T>& rv, REQUIRE_THAT(lv, Catch::Matchers::WithinAbs(rv, precision)); } +template <typename T> +void compareMulti(const std::vector<T>& lv, const std::vector<T>& rv, + float precision) { + for (size_t idx = 0; idx < lv.size(); idx++) { + REQUIRE_THAT(lv[idx], Catch::Matchers::WithinAbs(rv[idx], precision)); + } +} + template <> void compareSingle(const std::vector<std::complex<float>>& lv, const std::vector<std::complex<float>>& rv, @@ -49,7 +57,7 @@ void compareArrays(const std::string& test, unsigned line, std::array<T, N> lhs, std::stringstream ss; ss << "Expected : \n"; - for (size_t idx = 0; idx < N; idx++) { + for (size_t idx = 0; idx < lv.size(); idx++) { ss << valueToString(lhs[idx]) << "\t"; } @@ -59,14 +67,53 @@ void compareArrays(const std::string& test, unsigned line, std::array<T, N> lhs, } ss << "\n"; INFO("Reason: \n" << ss.str()); + compareSingle(lv, rv, precision); } +template <typename T> +void compareVectors(const std::string& test, unsigned line, std::vector<T> lhs, + std::vector<T> rhs, float precision) { + INFO("Test case [" << test << "] failed at line " + << line); // Reported only if REQUIRE fails + + std::stringstream ss; + if (lhs.size() != rhs.size()) { + ss << " Size mismatch\n"; + ss << "Expected size : " << lhs.size() << "\n"; + ss << "Obtained size : " << rhs.size() << "\n"; + INFO("Reason: \n" << ss.str()); + } + + CHECK(lhs.size() == rhs.size()); + const size_t N = lhs.size(); + ss << "Expected : \n"; + for (size_t idx = 0; idx < N; idx++) { + ss << valueToString(lhs[idx]) << "\t"; + } + + ss << "\nObtained : \n"; + for (size_t idx = 0; idx < N; idx++) { + ss << valueToString(rhs[idx]) << "\t"; + } + ss << "\n"; + INFO("Reason: \n" << ss.str()); + compareMulti(lhs, rhs, precision); +} + template void compareArrays(const std::string& test, unsigned line, std::array<std::complex<float>, 4ul> lhs, std::array<std::complex<float>, 4ul> rhs, float precision); +template void compareVectors(const std::string& test, unsigned line, + std::vector<float> lhs, std::vector<float> rhs, + float precision); + +template void compareVectors(const std::string& test, unsigned line, + std::vector<double> lhs, std::vector<double> rhs, + float precision); + void AssertEqual(const aocommon::Matrix4x4& a, const aocommon::Matrix4x4& b, float precision) { for (size_t i = 0; i < 16; i++) { diff --git a/test/helpers.h b/test/helpers.h index 83f306f..f3ce809 100644 --- a/test/helpers.h +++ b/test/helpers.h @@ -14,6 +14,9 @@ #define COMPARE_ARRAYS(lhs, rhs, precision) \ compareArrays(Catch::getResultCapture().getCurrentTestName(), __LINE__, lhs, \ rhs, precision) +#define COMPARE_VECTORS(lhs, rhs, precision) \ + compareVectors(Catch::getResultCapture().getCurrentTestName(), __LINE__, \ + lhs, rhs, precision) template <typename T> void compareSingle(const std::vector<T>& lv, const std::vector<T>& rv, @@ -32,6 +35,10 @@ template <typename T, size_t N> void compareArrays(const std::string& test, unsigned line, std::array<T, N> lhs, std::array<T, N> rhs, float precision); +template <typename T> +void compareVectors(const std::string& test, unsigned line, std::vector<T> lhs, + std::vector<T> rhs, float precision); + void AssertEqual(const aocommon::Matrix4x4& a, const aocommon::Matrix4x4& b, float precision); diff --git a/test/test_convolution.cpp b/test/test_convolution.cpp new file mode 100644 index 0000000..4227e34 --- /dev/null +++ b/test/test_convolution.cpp @@ -0,0 +1,37 @@ +#include <convolution.h> + +#include <catch2/catch_test_macros.hpp> +#include <catch2/matchers/catch_matchers_floating_point.hpp> + +#include "helpers.h" + +TEST_CASE("test convolution", "[float]") { + // This setup will be done 4 times in total, once for each section + size_t width = 16; + size_t height = 32; + + std::vector<float> image(width * height); + std::vector<float> expected_image(width * height); + + std::vector<float> kernel(width * height); + Initialize(image.data(), width, height); + std::copy(image.begin(), image.end(), expected_image.begin()); + Initialize(kernel.data(), width, height); + + ConvolveReference(expected_image.data(), kernel.data(), width, height); + + SECTION("test correctness of reference implementation") { + ConvolveReference(image.data(), kernel.data(), width, height); + COMPARE_VECTORS(expected_image, image, 1.e-5); + } + + SECTION("test correctness of reference implementation twice") { + ConvolveReference(image.data(), kernel.data(), width, height); + COMPARE_VECTORS(expected_image, image, 1.e-5); + } + + SECTION("test correctness of serial implementation") { + ConvolveSerial(image.data(), kernel.data(), width, height); + COMPARE_VECTORS(expected_image, image, 1.e-5); + } +} \ No newline at end of file -- GitLab From 09fc57641cd73355524fb48419acc53c1056735e Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Thu, 6 Feb 2025 17:45:56 +0100 Subject: [PATCH 04/18] Use HTTPS instead of SSH to clone schaapcommon --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fdbe25d..26c5e45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,7 +53,7 @@ FetchContent_MakeAvailable(aocommon) # Make schaapspack available FetchContent_Declare( schaapcommon - GIT_REPOSITORY git@git.astron.nl:RD/schaapcommon.git + GIT_REPOSITORY https://git.astron.nl/RD/schaapcommon.git GIT_TAG master) FetchContent_MakeAvailable(schaapcommon) -- GitLab From 247218cff598ce95e5ed5264c1e158692952559c Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Thu, 6 Feb 2025 17:48:37 +0100 Subject: [PATCH 05/18] Add missing dependency --- docker/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/Dockerfile b/docker/Dockerfile index 67a2ef8..a60a0d8 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -9,6 +9,7 @@ RUN apt-get update -qq &&\ git \ libblas-dev liblapack-dev \ libboost-date-time-dev \ + libbost-filesystem-dev \ libboost-test-dev \ libboost-dev \ libcfitsio-dev \ -- GitLab From ac9acc622bee1834db0a622e7605a4ed71306c64 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Thu, 6 Feb 2025 17:49:34 +0100 Subject: [PATCH 06/18] Add fftw3 --- docker/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/Dockerfile b/docker/Dockerfile index a60a0d8..4f51d02 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -18,6 +18,7 @@ RUN apt-get update -qq &&\ libhdf5-dev \ libopenmpi-dev \ libpython3-dev \ + libfftw3-dev \ pkg-config \ python3-dev python3-numpy \ python3-sphinx \ -- GitLab From 0f361b708b7dc360f73009666c978b6fbe08eca1 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Thu, 6 Feb 2025 17:51:33 +0100 Subject: [PATCH 07/18] Fix name of library --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 4f51d02..6b72250 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -9,7 +9,7 @@ RUN apt-get update -qq &&\ git \ libblas-dev liblapack-dev \ libboost-date-time-dev \ - libbost-filesystem-dev \ + libboost-filesystem-dev \ libboost-test-dev \ libboost-dev \ libcfitsio-dev \ -- GitLab From 1131c9a79e460da3b7a062a8950159f2106ec0e9 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Thu, 6 Feb 2025 19:56:14 +0100 Subject: [PATCH 08/18] Force pull policy --- .gitlab-ci.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d1d46da..28fda75 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -25,7 +25,9 @@ docker-base: - docker/Dockerfile .build_docker: - image: "$CI_REGISTRY_IMAGE:latest" + image: + name: "$CI_REGISTRY_IMAGE:latest" + pull_policy: always before_script: - cmake -B build . -DCMAKE_BUILD_TYPE=Release - make -C build -j @@ -33,7 +35,9 @@ docker-base: build-job: # This job runs in the build stage, which runs first. stage: build - image: "$CI_REGISTRY_IMAGE:latest" + image: + name: "$CI_REGISTRY_IMAGE:latest" + pull_policy: always script: - cmake -B build . -DCMAKE_BUILD_TYPE=Release - make -C build -j -- GitLab From 5ae0648f742bf7dda19cafb2228d3af07f5fc92b Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Thu, 6 Feb 2025 19:59:13 +0100 Subject: [PATCH 09/18] Summarize convolution results --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 28fda75..d4e9793 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -155,4 +155,5 @@ collect-performance: - python3 ci/summarize-results.py --filter MatrixMultiplication results*.json result-summary-matrix-multiplication - python3 ci/summarize-results.py --filter HermitianSquare results*.json result-summary-hermitian-square - python3 ci/summarize-results.py --filter KroneckerSquare results*.json result-summary-kronecker-square + - python3 ci/summarize-results.py --filter Convolution results*.json result-summary-convolution -- GitLab From 5ea6c8fb99c1d840c8890e335b346afddd4b1c36 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Thu, 6 Feb 2025 20:01:31 +0100 Subject: [PATCH 10/18] Nodes need boost --- ci/das6/compile_and_run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/das6/compile_and_run.sh b/ci/das6/compile_and_run.sh index 5d89c01..3641f71 100755 --- a/ci/das6/compile_and_run.sh +++ b/ci/das6/compile_and_run.sh @@ -11,7 +11,7 @@ COMPILER_VERSION=$2 echo RUNNING ON ${COMPILER_VERSION} AND ${ARCHITECTURE} BUILD_DIR=build-${COMPILER_VERSION}-${ARCHITECTURE} module load spack/${COMPILER_VERSION} -module load cmake +module load cmake boost cmake -B ${BUILD_DIR} . -DCMAKE_BUILD_TYPE=Release -- GitLab From a4e5c2ca5de48f44df701e08439fda963f829fae Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Thu, 6 Feb 2025 20:07:28 +0100 Subject: [PATCH 11/18] Add more dependencies --- ci/das6/compile_and_run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/das6/compile_and_run.sh b/ci/das6/compile_and_run.sh index 3641f71..4b8175d 100755 --- a/ci/das6/compile_and_run.sh +++ b/ci/das6/compile_and_run.sh @@ -11,7 +11,7 @@ COMPILER_VERSION=$2 echo RUNNING ON ${COMPILER_VERSION} AND ${ARCHITECTURE} BUILD_DIR=build-${COMPILER_VERSION}-${ARCHITECTURE} module load spack/${COMPILER_VERSION} -module load cmake boost +module load cmake boost casacore fftw3 cmake -B ${BUILD_DIR} . -DCMAKE_BUILD_TYPE=Release -- GitLab From bc4fd251874b2bcb640171defb78a685bbc184b5 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Thu, 6 Feb 2025 20:13:13 +0100 Subject: [PATCH 12/18] Fix wrong module name --- ci/das6/compile_and_run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/das6/compile_and_run.sh b/ci/das6/compile_and_run.sh index 4b8175d..34d5ef9 100755 --- a/ci/das6/compile_and_run.sh +++ b/ci/das6/compile_and_run.sh @@ -11,7 +11,7 @@ COMPILER_VERSION=$2 echo RUNNING ON ${COMPILER_VERSION} AND ${ARCHITECTURE} BUILD_DIR=build-${COMPILER_VERSION}-${ARCHITECTURE} module load spack/${COMPILER_VERSION} -module load cmake boost casacore fftw3 +module load cmake boost casacore fftw cmake -B ${BUILD_DIR} . -DCMAKE_BUILD_TYPE=Release -- GitLab From 0e46a84507c32c4bc8d94848f760e6809c428815 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Fri, 7 Feb 2025 09:00:50 +0100 Subject: [PATCH 13/18] Add hdf5 --- ci/das6/compile_and_run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/das6/compile_and_run.sh b/ci/das6/compile_and_run.sh index 34d5ef9..9838a5d 100755 --- a/ci/das6/compile_and_run.sh +++ b/ci/das6/compile_and_run.sh @@ -11,7 +11,7 @@ COMPILER_VERSION=$2 echo RUNNING ON ${COMPILER_VERSION} AND ${ARCHITECTURE} BUILD_DIR=build-${COMPILER_VERSION}-${ARCHITECTURE} module load spack/${COMPILER_VERSION} -module load cmake boost casacore fftw +module load cmake boost casacore fftw hdf5 cmake -B ${BUILD_DIR} . -DCMAKE_BUILD_TYPE=Release -- GitLab From 6de58f42860c3f481b2ecbc60381ae6017ee04f0 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Fri, 7 Feb 2025 09:16:58 +0100 Subject: [PATCH 14/18] Add explicitly fftw3 --- CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 26c5e45..f694c8f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,12 +71,14 @@ file(GLOB TEST_SOURCES "test/*.cpp") add_executable(unittests ${TEST_SOURCES} ${KERNEL_SOURCES}) find_package(OpenMP) +find_package(PkgConfig REQUIRED) +pkg_search_module(FFTW REQUIRED fftw3 IMPORTED_TARGET) # Link against Google Benchmark target_link_libraries(microbenchmarks PRIVATE benchmark::benchmark) target_include_directories(microbenchmarks PRIVATE ${aocommon_SOURCE_DIR}/include ${schaapcommon_SOURCE_DIR}/include) target_include_directories(microbenchmarks PRIVATE code) -target_link_libraries(microbenchmarks PRIVATE schaapcommon) +target_link_libraries(microbenchmarks PRIVATE schaapcommon PkgConfig::FFTW) target_compile_options(microbenchmarks PUBLIC ${COMPILER_FLAGS}) if(OpenMP_CXX_FOUND) target_link_libraries(microbenchmarks PRIVATE OpenMP::OpenMP_CXX) @@ -91,7 +93,7 @@ catch_discover_tests(unittests WORKING_DIRECTORY) target_link_libraries(unittests PRIVATE Catch2::Catch2WithMain) target_include_directories(unittests PRIVATE code) target_include_directories(unittests PRIVATE ${aocommon_SOURCE_DIR}/include ${schaapcommon_SOURCE_DIR}/include) -target_link_libraries(unittests PRIVATE schaapcommon) +target_link_libraries(unittests PRIVATE schaapcommon PkgConfig::FFTW) target_compile_options(unittests PUBLIC ${COMPILER_FLAGS}) foreach(KERNEL_SOURCE ${KERNEL_SOURCES}) -- GitLab From b590d541623bbe866c5457fdeafae072d6c0b940 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Fri, 7 Feb 2025 09:48:31 +0100 Subject: [PATCH 15/18] Add include to precompile --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f694c8f..cac174a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,7 +100,7 @@ foreach(KERNEL_SOURCE ${KERNEL_SOURCES}) get_filename_component(KERNEL_NAME ${KERNEL_SOURCE} NAME_WE) add_precompile_target(TARGET_NAME ${KERNEL_NAME} - INCLUDE_DIRS code ${aocommon_SOURCE_DIR}/include ${schaapcommon_SOURCE_DIR}/include + INCLUDE_DIRS code ${aocommon_SOURCE_DIR}/include ${schaapcommon_SOURCE_DIR}/include ${FFTW_INCLUDE_DIRS} SOURCES ${KERNEL_SOURCE} COMPILER_FLAGS ${COMPILER_FLAGS}) endforeach() -- GitLab From 070d07dbabcc3d0ea759f5071e382bd943e9eca6 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Fri, 7 Feb 2025 10:31:23 +0100 Subject: [PATCH 16/18] Add cfitsio --- ci/das6/compile_and_run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/das6/compile_and_run.sh b/ci/das6/compile_and_run.sh index 9838a5d..295945e 100755 --- a/ci/das6/compile_and_run.sh +++ b/ci/das6/compile_and_run.sh @@ -11,7 +11,7 @@ COMPILER_VERSION=$2 echo RUNNING ON ${COMPILER_VERSION} AND ${ARCHITECTURE} BUILD_DIR=build-${COMPILER_VERSION}-${ARCHITECTURE} module load spack/${COMPILER_VERSION} -module load cmake boost casacore fftw hdf5 +module load cmake boost casacore fftw hdf5 cfitsio cmake -B ${BUILD_DIR} . -DCMAKE_BUILD_TYPE=Release -- GitLab From 7a250ed25e1e8afc144cd32970b750987da21186 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Fri, 7 Feb 2025 10:42:00 +0100 Subject: [PATCH 17/18] Fix naming --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d4e9793..5f614c6 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -155,5 +155,5 @@ collect-performance: - python3 ci/summarize-results.py --filter MatrixMultiplication results*.json result-summary-matrix-multiplication - python3 ci/summarize-results.py --filter HermitianSquare results*.json result-summary-hermitian-square - python3 ci/summarize-results.py --filter KroneckerSquare results*.json result-summary-kronecker-square - - python3 ci/summarize-results.py --filter Convolution results*.json result-summary-convolution + - python3 ci/summarize-results.py --filter Convolve results*.json result-summary-convolution -- GitLab From 75da170fbd79219faf2a70f4f9a22a47fd75a9dd Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Fri, 7 Feb 2025 11:08:22 +0100 Subject: [PATCH 18/18] Reduce the number of tests --- benchmarks/convolution.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/benchmarks/convolution.cpp b/benchmarks/convolution.cpp index ae82e9f..d1dbb4a 100644 --- a/benchmarks/convolution.cpp +++ b/benchmarks/convolution.cpp @@ -35,11 +35,6 @@ BENCHMARK_DEFINE_F(InitializeInput, ConvolveReference) } } BENCHMARK_REGISTER_F(InitializeInput, ConvolveReference) - ->Args({64, 32}) - ->Args({128, 64}) - ->Args({256, 126}) - ->Args({512, 754}) - ->Args({1024, 124}) ->Args({2048, 1000}) ->Args({4096, 5000}); @@ -53,10 +48,5 @@ BENCHMARK_DEFINE_F(InitializeInput, ConvolveSerial) } BENCHMARK_REGISTER_F(InitializeInput, ConvolveSerial) - ->Args({64, 32}) - ->Args({128, 64}) - ->Args({256, 126}) - ->Args({512, 754}) - ->Args({1024, 124}) ->Args({2048, 1000}) ->Args({4096, 5000}); -- GitLab