Remove OpenMP implementation

f40fbb56 · Mattia Mancini · 7e5ea4c4 · f40fbb56 · f40fbb56 · f40fbb56
Commit f40fbb56 authored 1 year ago by Mattia Mancini
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,7 +49,7 @@ FetchContent_Declare(
 FetchContent_Populate(aocommon)
-set(COMPILER_FLAGS "-O3;-march=native;-ggdb;-fopenmp")
+set(COMPILER_FLAGS "-O3;-march=native;-ggdb")
 # List all kernel code
 file(GLOB KERNEL_SOURCES "code/*.cpp")
 # Add the benchmark executable

--- a/benchmarks/matrix_multiplication.cpp
+++ b/benchmarks/matrix_multiplication.cpp
@@ -146,14 +146,6 @@ BENCHMARK_DEFINE_F(InitializeInputBatch, BatchMatrixMultiplicationAOCommon)
  }
 }
-BENCHMARK_DEFINE_F(InitializeInputBatch,
-                   BatchMatrixMultiplicationRealComplexOpenMP)
-(benchmark::State& state) {
-  for (auto _ : state) {
-    matrixMultiplyRealComplexSIMD(A, B, C, state.range(0));
-  }
-}
 BENCHMARK_DEFINE_F(InitializeInputBatch, BatchMatrixMultiplicationRealComplex)
 (benchmark::State& state) {
  for (auto _ : state) {
@@ -170,8 +162,4 @@ BENCHMARK_REGISTER_F(InitializeInputBatch, BatchMatrixMultiplicationAOCommon)
 BENCHMARK_REGISTER_F(InitializeInputBatch, BatchMatrixMultiplicationRealComplex)
    ->Range(8, 512);
-BENCHMARK_REGISTER_F(InitializeInputBatch,
-                     BatchMatrixMultiplicationRealComplexOpenMP)
-    ->Range(8, 512);
 BENCHMARK_MAIN();
--- a/code/matrix_multiplication_batch_real_complex.cpp
+++ b/code/matrix_multiplication_batch_real_complex.cpp
 #include "matrix_multiplication.h"
-void matrixMultiplyRealComplexSIMD(const std::complex<float>* a,
-                                   const std::complex<float>* b,
-                                   std::complex<float>* c, size_t batch_size) {
-  float* a_ptr = reinterpret_cast<float*>(const_cast<std::complex<float>*>(a));
-  float* b_ptr = reinterpret_cast<float*>(const_cast<std::complex<float>*>(b));
-  float* c_ptr = reinterpret_cast<float*>(c);
-#pragma omp simd
-  for (size_t s = 0; s < batch_size; s++) {
-    const float a_00_real = a_ptr[s * 8 + 0];
-    const float a_00_imag = a_ptr[s * 8 + 1];
-    const float a_01_real = a_ptr[s * 8 + 2];
-    const float a_01_imag = a_ptr[s * 8 + 3];
-    const float a_10_real = a_ptr[s * 8 + 4];
-    const float a_10_imag = a_ptr[s * 8 + 5];
-    const float a_11_real = a_ptr[s * 8 + 6];
-    const float a_11_imag = a_ptr[s * 8 + 7];
-    const float b_00_real = b_ptr[s * 8 + 0];
-    const float b_00_imag = b_ptr[s * 8 + 1];
-    const float b_01_real = b_ptr[s * 8 + 2];
-    const float b_01_imag = b_ptr[s * 8 + 3];
-    const float b_10_real = b_ptr[s * 8 + 4];
-    const float b_10_imag = b_ptr[s * 8 + 5];
-    const float b_11_real = b_ptr[s * 8 + 6];
-    const float b_11_imag = b_ptr[s * 8 + 7];
-    c_ptr[s * 8 + 0] = a_00_real * b_00_real + a_01_real * b_10_real;
-    c_ptr[s * 8 + 0] -= a_00_imag * b_00_imag + a_01_imag * b_10_imag;
-    c_ptr[s * 8 + 1] = a_00_real * b_00_imag + a_01_real * b_10_imag;
-    c_ptr[s * 8 + 1] += a_00_imag * b_00_real + a_01_imag * b_10_real;
-    c_ptr[s * 8 + 2] = a_00_real * b_01_real + a_01_real * b_11_real;
-    c_ptr[s * 8 + 2] -= a_00_imag * b_01_imag + a_01_imag * b_11_imag;
-    c_ptr[s * 8 + 3] = a_00_real * b_01_imag + a_01_real * b_11_imag;
-    c_ptr[s * 8 + 3] += a_00_imag * b_01_real + a_01_imag * b_11_real;
-    c_ptr[s * 8 + 4] = a_10_real * b_00_real + a_11_real * b_10_real;
-    c_ptr[s * 8 + 4] -= a_10_imag * b_00_imag + a_11_imag * b_10_imag;
-    c_ptr[s * 8 + 5] = a_10_real * b_00_imag + a_11_real * b_10_imag;
-    c_ptr[s * 8 + 5] += a_10_imag * b_00_real + a_11_imag * b_10_real;
-    c_ptr[s * 8 + 6] = a_10_real * b_01_real + a_11_real * b_11_real;
-    c_ptr[s * 8 + 6] -= a_10_imag * b_01_imag + a_11_imag * b_11_imag;
-    c_ptr[s * 8 + 7] = a_10_real * b_01_imag + a_11_real * b_11_imag;
-    c_ptr[s * 8 + 7] += a_10_imag * b_01_real + a_11_imag * b_11_real;
-  }
-}
 void matrixMultiplyRealComplex(const std::complex<float>* a,
                               const std::complex<float>* b,
                               std::complex<float>* c, size_t batch_size) {

--- a/test/test_matrix_multiplication.cpp
+++ b/test/test_matrix_multiplication.cpp
@@ -35,11 +35,6 @@ TEST_CASE("test complex matrix multiplication", "[float]") {
    COMPARE_ARRAYS(C_expected, C, 1.e-6);
  }
-  SECTION("test correctness of batch implementation") {
-    matrixMultiplyRealComplexSIMD(A.data(), B.data(), C.data(), 1);
-    COMPARE_ARRAYS(C_expected, C, 1.e-6);
-  }
 #if defined(__AVX__)
  SECTION("test correctness of avx implementation") {
    matrixMultiplyAVX(A.data(), B.data(), C.data());