From f40fbb565ac828b425ca71f129af5b011740393d Mon Sep 17 00:00:00 2001
From: mancini <mancini@astron.nl>
Date: Wed, 5 Jun 2024 16:37:12 +0200
Subject: [PATCH] Remove OpenMP implementation

---
 CMakeLists.txt                                |  2 +-
 benchmarks/matrix_multiplication.cpp          | 12 -----
 ...trix_multiplication_batch_real_complex.cpp | 45 -------------------
 test/test_matrix_multiplication.cpp           |  5 ---
 4 files changed, 1 insertion(+), 63 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 13a2702..ea8b5b3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,7 +49,7 @@ FetchContent_Declare(
 
 FetchContent_Populate(aocommon)
 
-set(COMPILER_FLAGS "-O3;-march=native;-ggdb;-fopenmp")
+set(COMPILER_FLAGS "-O3;-march=native;-ggdb")
 # List all kernel code
 file(GLOB KERNEL_SOURCES "code/*.cpp")
 # Add the benchmark executable
diff --git a/benchmarks/matrix_multiplication.cpp b/benchmarks/matrix_multiplication.cpp
index e3cbe4e..1615afb 100644
--- a/benchmarks/matrix_multiplication.cpp
+++ b/benchmarks/matrix_multiplication.cpp
@@ -146,14 +146,6 @@ BENCHMARK_DEFINE_F(InitializeInputBatch, BatchMatrixMultiplicationAOCommon)
   }
 }
 
-BENCHMARK_DEFINE_F(InitializeInputBatch,
-                   BatchMatrixMultiplicationRealComplexOpenMP)
-(benchmark::State& state) {
-  for (auto _ : state) {
-    matrixMultiplyRealComplexSIMD(A, B, C, state.range(0));
-  }
-}
-
 BENCHMARK_DEFINE_F(InitializeInputBatch, BatchMatrixMultiplicationRealComplex)
 (benchmark::State& state) {
   for (auto _ : state) {
@@ -170,8 +162,4 @@ BENCHMARK_REGISTER_F(InitializeInputBatch, BatchMatrixMultiplicationAOCommon)
 BENCHMARK_REGISTER_F(InitializeInputBatch, BatchMatrixMultiplicationRealComplex)
     ->Range(8, 512);
 
-BENCHMARK_REGISTER_F(InitializeInputBatch,
-                     BatchMatrixMultiplicationRealComplexOpenMP)
-    ->Range(8, 512);
-
 BENCHMARK_MAIN();
diff --git a/code/matrix_multiplication_batch_real_complex.cpp b/code/matrix_multiplication_batch_real_complex.cpp
index c996076..925c5b6 100644
--- a/code/matrix_multiplication_batch_real_complex.cpp
+++ b/code/matrix_multiplication_batch_real_complex.cpp
@@ -1,50 +1,5 @@
 #include "matrix_multiplication.h"
 
-void matrixMultiplyRealComplexSIMD(const std::complex<float>* a,
-                                   const std::complex<float>* b,
-                                   std::complex<float>* c, size_t batch_size) {
-  float* a_ptr = reinterpret_cast<float*>(const_cast<std::complex<float>*>(a));
-  float* b_ptr = reinterpret_cast<float*>(const_cast<std::complex<float>*>(b));
-  float* c_ptr = reinterpret_cast<float*>(c);
-
-#pragma omp simd
-  for (size_t s = 0; s < batch_size; s++) {
-    const float a_00_real = a_ptr[s * 8 + 0];
-    const float a_00_imag = a_ptr[s * 8 + 1];
-    const float a_01_real = a_ptr[s * 8 + 2];
-    const float a_01_imag = a_ptr[s * 8 + 3];
-    const float a_10_real = a_ptr[s * 8 + 4];
-    const float a_10_imag = a_ptr[s * 8 + 5];
-    const float a_11_real = a_ptr[s * 8 + 6];
-    const float a_11_imag = a_ptr[s * 8 + 7];
-    const float b_00_real = b_ptr[s * 8 + 0];
-    const float b_00_imag = b_ptr[s * 8 + 1];
-    const float b_01_real = b_ptr[s * 8 + 2];
-    const float b_01_imag = b_ptr[s * 8 + 3];
-    const float b_10_real = b_ptr[s * 8 + 4];
-    const float b_10_imag = b_ptr[s * 8 + 5];
-    const float b_11_real = b_ptr[s * 8 + 6];
-    const float b_11_imag = b_ptr[s * 8 + 7];
-
-    c_ptr[s * 8 + 0] = a_00_real * b_00_real + a_01_real * b_10_real;
-    c_ptr[s * 8 + 0] -= a_00_imag * b_00_imag + a_01_imag * b_10_imag;
-    c_ptr[s * 8 + 1] = a_00_real * b_00_imag + a_01_real * b_10_imag;
-    c_ptr[s * 8 + 1] += a_00_imag * b_00_real + a_01_imag * b_10_real;
-    c_ptr[s * 8 + 2] = a_00_real * b_01_real + a_01_real * b_11_real;
-    c_ptr[s * 8 + 2] -= a_00_imag * b_01_imag + a_01_imag * b_11_imag;
-    c_ptr[s * 8 + 3] = a_00_real * b_01_imag + a_01_real * b_11_imag;
-    c_ptr[s * 8 + 3] += a_00_imag * b_01_real + a_01_imag * b_11_real;
-    c_ptr[s * 8 + 4] = a_10_real * b_00_real + a_11_real * b_10_real;
-    c_ptr[s * 8 + 4] -= a_10_imag * b_00_imag + a_11_imag * b_10_imag;
-    c_ptr[s * 8 + 5] = a_10_real * b_00_imag + a_11_real * b_10_imag;
-    c_ptr[s * 8 + 5] += a_10_imag * b_00_real + a_11_imag * b_10_real;
-    c_ptr[s * 8 + 6] = a_10_real * b_01_real + a_11_real * b_11_real;
-    c_ptr[s * 8 + 6] -= a_10_imag * b_01_imag + a_11_imag * b_11_imag;
-    c_ptr[s * 8 + 7] = a_10_real * b_01_imag + a_11_real * b_11_imag;
-    c_ptr[s * 8 + 7] += a_10_imag * b_01_real + a_11_imag * b_11_real;
-  }
-}
-
 void matrixMultiplyRealComplex(const std::complex<float>* a,
                                const std::complex<float>* b,
                                std::complex<float>* c, size_t batch_size) {
diff --git a/test/test_matrix_multiplication.cpp b/test/test_matrix_multiplication.cpp
index c9709db..28ac6de 100644
--- a/test/test_matrix_multiplication.cpp
+++ b/test/test_matrix_multiplication.cpp
@@ -35,11 +35,6 @@ TEST_CASE("test complex matrix multiplication", "[float]") {
     COMPARE_ARRAYS(C_expected, C, 1.e-6);
   }
 
-  SECTION("test correctness of batch implementation") {
-    matrixMultiplyRealComplexSIMD(A.data(), B.data(), C.data(), 1);
-
-    COMPARE_ARRAYS(C_expected, C, 1.e-6);
-  }
 #if defined(__AVX__)
   SECTION("test correctness of avx implementation") {
     matrixMultiplyAVX(A.data(), B.data(), C.data());
-- 
GitLab