From f40fbb565ac828b425ca71f129af5b011740393d Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Wed, 5 Jun 2024 16:37:12 +0200 Subject: [PATCH] Remove OpenMP implementation --- CMakeLists.txt | 2 +- benchmarks/matrix_multiplication.cpp | 12 ----- ...trix_multiplication_batch_real_complex.cpp | 45 ------------------- test/test_matrix_multiplication.cpp | 5 --- 4 files changed, 1 insertion(+), 63 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 13a2702..ea8b5b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,7 +49,7 @@ FetchContent_Declare( FetchContent_Populate(aocommon) -set(COMPILER_FLAGS "-O3;-march=native;-ggdb;-fopenmp") +set(COMPILER_FLAGS "-O3;-march=native;-ggdb") # List all kernel code file(GLOB KERNEL_SOURCES "code/*.cpp") # Add the benchmark executable diff --git a/benchmarks/matrix_multiplication.cpp b/benchmarks/matrix_multiplication.cpp index e3cbe4e..1615afb 100644 --- a/benchmarks/matrix_multiplication.cpp +++ b/benchmarks/matrix_multiplication.cpp @@ -146,14 +146,6 @@ BENCHMARK_DEFINE_F(InitializeInputBatch, BatchMatrixMultiplicationAOCommon) } } -BENCHMARK_DEFINE_F(InitializeInputBatch, - BatchMatrixMultiplicationRealComplexOpenMP) -(benchmark::State& state) { - for (auto _ : state) { - matrixMultiplyRealComplexSIMD(A, B, C, state.range(0)); - } -} - BENCHMARK_DEFINE_F(InitializeInputBatch, BatchMatrixMultiplicationRealComplex) (benchmark::State& state) { for (auto _ : state) { @@ -170,8 +162,4 @@ BENCHMARK_REGISTER_F(InitializeInputBatch, BatchMatrixMultiplicationAOCommon) BENCHMARK_REGISTER_F(InitializeInputBatch, BatchMatrixMultiplicationRealComplex) ->Range(8, 512); -BENCHMARK_REGISTER_F(InitializeInputBatch, - BatchMatrixMultiplicationRealComplexOpenMP) - ->Range(8, 512); - BENCHMARK_MAIN(); diff --git a/code/matrix_multiplication_batch_real_complex.cpp b/code/matrix_multiplication_batch_real_complex.cpp index c996076..925c5b6 100644 --- a/code/matrix_multiplication_batch_real_complex.cpp +++ b/code/matrix_multiplication_batch_real_complex.cpp @@ -1,50 +1,5 @@ #include "matrix_multiplication.h" -void matrixMultiplyRealComplexSIMD(const std::complex<float>* a, - const std::complex<float>* b, - std::complex<float>* c, size_t batch_size) { - float* a_ptr = reinterpret_cast<float*>(const_cast<std::complex<float>*>(a)); - float* b_ptr = reinterpret_cast<float*>(const_cast<std::complex<float>*>(b)); - float* c_ptr = reinterpret_cast<float*>(c); - -#pragma omp simd - for (size_t s = 0; s < batch_size; s++) { - const float a_00_real = a_ptr[s * 8 + 0]; - const float a_00_imag = a_ptr[s * 8 + 1]; - const float a_01_real = a_ptr[s * 8 + 2]; - const float a_01_imag = a_ptr[s * 8 + 3]; - const float a_10_real = a_ptr[s * 8 + 4]; - const float a_10_imag = a_ptr[s * 8 + 5]; - const float a_11_real = a_ptr[s * 8 + 6]; - const float a_11_imag = a_ptr[s * 8 + 7]; - const float b_00_real = b_ptr[s * 8 + 0]; - const float b_00_imag = b_ptr[s * 8 + 1]; - const float b_01_real = b_ptr[s * 8 + 2]; - const float b_01_imag = b_ptr[s * 8 + 3]; - const float b_10_real = b_ptr[s * 8 + 4]; - const float b_10_imag = b_ptr[s * 8 + 5]; - const float b_11_real = b_ptr[s * 8 + 6]; - const float b_11_imag = b_ptr[s * 8 + 7]; - - c_ptr[s * 8 + 0] = a_00_real * b_00_real + a_01_real * b_10_real; - c_ptr[s * 8 + 0] -= a_00_imag * b_00_imag + a_01_imag * b_10_imag; - c_ptr[s * 8 + 1] = a_00_real * b_00_imag + a_01_real * b_10_imag; - c_ptr[s * 8 + 1] += a_00_imag * b_00_real + a_01_imag * b_10_real; - c_ptr[s * 8 + 2] = a_00_real * b_01_real + a_01_real * b_11_real; - c_ptr[s * 8 + 2] -= a_00_imag * b_01_imag + a_01_imag * b_11_imag; - c_ptr[s * 8 + 3] = a_00_real * b_01_imag + a_01_real * b_11_imag; - c_ptr[s * 8 + 3] += a_00_imag * b_01_real + a_01_imag * b_11_real; - c_ptr[s * 8 + 4] = a_10_real * b_00_real + a_11_real * b_10_real; - c_ptr[s * 8 + 4] -= a_10_imag * b_00_imag + a_11_imag * b_10_imag; - c_ptr[s * 8 + 5] = a_10_real * b_00_imag + a_11_real * b_10_imag; - c_ptr[s * 8 + 5] += a_10_imag * b_00_real + a_11_imag * b_10_real; - c_ptr[s * 8 + 6] = a_10_real * b_01_real + a_11_real * b_11_real; - c_ptr[s * 8 + 6] -= a_10_imag * b_01_imag + a_11_imag * b_11_imag; - c_ptr[s * 8 + 7] = a_10_real * b_01_imag + a_11_real * b_11_imag; - c_ptr[s * 8 + 7] += a_10_imag * b_01_real + a_11_imag * b_11_real; - } -} - void matrixMultiplyRealComplex(const std::complex<float>* a, const std::complex<float>* b, std::complex<float>* c, size_t batch_size) { diff --git a/test/test_matrix_multiplication.cpp b/test/test_matrix_multiplication.cpp index c9709db..28ac6de 100644 --- a/test/test_matrix_multiplication.cpp +++ b/test/test_matrix_multiplication.cpp @@ -35,11 +35,6 @@ TEST_CASE("test complex matrix multiplication", "[float]") { COMPARE_ARRAYS(C_expected, C, 1.e-6); } - SECTION("test correctness of batch implementation") { - matrixMultiplyRealComplexSIMD(A.data(), B.data(), C.data(), 1); - - COMPARE_ARRAYS(C_expected, C, 1.e-6); - } #if defined(__AVX__) SECTION("test correctness of avx implementation") { matrixMultiplyAVX(A.data(), B.data(), C.data()); -- GitLab