diff --git a/benchmarks/access_to_memory.cpp b/benchmarks/access_to_memory.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1cdc1a11fb133c77995e5331f2a23f201dda5f27 --- /dev/null +++ b/benchmarks/access_to_memory.cpp @@ -0,0 +1,126 @@ +#include <access_to_memory.h> +#include <benchmark/benchmark.h> + +#include <iostream> +#include <memory> + +constexpr size_t MB = 1024 * 1024; +class InitializeInput : public benchmark::Fixture { + public: + void SetUp(::benchmark::State& state) { + // DEFINES A LARGE COUPLES OF ARRAYS UNALIGNED + + constexpr size_t bytes = 40 * MB; + + array_size = bytes / sizeof(float); + + A = static_cast<float*>(malloc(bytes)); + B = static_cast<float*>(malloc(bytes)); + + // Initialize matrices with random values + Initialize(A, array_size); + Initialize(B, array_size); + } + void TearDown(::benchmark::State& state) { + // Free the allocated memory + std::free(A); + std::free(B); + } + + float* A; + float* B; + size_t array_size; +}; + +BENCHMARK_F(InitializeInput, MemoryAccessRandomAccessData) +(benchmark::State& state) { + for (auto _ : state) { + for (size_t t = 0; t < 1000; t++) { + size_t pos = (t * array_size * 1000000000) % array_size; + simply_multiply(A, pos); + } + } +} + +BENCHMARK_F(InitializeInput, MemoryAccessBigJumps)(benchmark::State& state) { + for (auto _ : state) { + for (size_t t = 0; t < 1000; t++) + simply_multiply(A, (t * 384000) % array_size); + } +} + +BENCHMARK_F(InitializeInput, MemoryAccessMediumJumpsAccessData) +(benchmark::State& state) { + for (auto _ : state) { + for (size_t t = 0; t < 1000; t++) { + const size_t pos = (t * 49000) % array_size; + simply_multiply(A, pos); + } + } +} + +BENCHMARK_F(InitializeInput, MemoryAccessSmallJumpsAccessData) +(benchmark::State& state) { + for (auto _ : state) { + for (size_t t = 0; t < 1000; t++) { + const size_t pos = (t * 3) % array_size; + simply_multiply(A, pos); + } + } +} + +BENCHMARK_F(InitializeInput, MemoryAccessNoJump) +(benchmark::State& state) { + for (auto _ : state) { + for (size_t t = 0; t < 1000; t++) { + const size_t pos = (t) % array_size; + simply_multiply(A, pos); + } + } +} + +BENCHMARK_F(InitializeInput, MemoryAccessMultiplyCopyNoJump) +(benchmark::State& state) { + for (auto _ : state) { + for (size_t t = 0; t < 1000; t++) { + // Keeps an operation just for sake of comparison + const size_t pos = (t) % array_size; + multiply_copy(A, B, pos); + } + } +} + +BENCHMARK_F(InitializeInput, MemoryAccessMultiplyCopyMultipleCallsNoJump) +(benchmark::State& state) { + for (auto _ : state) { + for (size_t t = 0; t < 1000; t++) { + // Keeps an operation just for sake of comparison + const size_t pos = (t) % array_size; + multiply_copy_not_inlined(A, B, pos); + } + } +} + +BENCHMARK_F(InitializeInput, MemoryAccessMultiplyCopyBulkNoJump) +(benchmark::State& state) { + for (auto _ : state) { + for (size_t t = 0; t < 10; t++) { + // Keeps an operation just for sake of comparison + const size_t pos = (t) % array_size; + + multiply_copy_multiple(A, B, pos * 100, 100); + } + } +} + +BENCHMARK_F(InitializeInput, MemoryAccessMultiplyCopyBulkVectorizedNoJump) +(benchmark::State& state) { + for (auto _ : state) { + for (size_t t = 0; t < 10; t++) { + // Keeps an operation just for sake of comparison + const size_t pos = (t) % array_size; + + multiply_copy_multiple(A, B, pos * 100, 100); + } + } +} diff --git a/benchmarks/main.cpp b/benchmarks/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..71fefa047228720dd970aefbfca70030a737f9a4 --- /dev/null +++ b/benchmarks/main.cpp @@ -0,0 +1,3 @@ +#include <benchmark/benchmark.h> + +BENCHMARK_MAIN(); diff --git a/benchmarks/matrix_multiplication.cpp b/benchmarks/matrix_multiplication.cpp index fb0f0f7b3276f4959e7e7665a9dc6d70c5bdd0fb..75a058de04e513b0b3869881e575e5c882b3602e 100644 --- a/benchmarks/matrix_multiplication.cpp +++ b/benchmarks/matrix_multiplication.cpp @@ -109,5 +109,3 @@ BENCHMARK_F(InitializeInput, MatrixMultiplicationSSE) } } #endif - -BENCHMARK_MAIN(); diff --git a/code/access_to_memory.cpp b/code/access_to_memory.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5824846e7a451852658f6d94e637364371388321 --- /dev/null +++ b/code/access_to_memory.cpp @@ -0,0 +1,20 @@ +#include "matrix_multiplication.h" + +void multiply_copy_not_inlined(float* input, float* out, size_t index) { + out[index] = 2.0f * input[index] * index; +} + +void multiply_copy_multiple(float* input, float* out, size_t index, + size_t batch_size) { + for (int i = 0; i < batch_size; i++) { + out[index + i] = 2.0f * input[index + i] * (index + i); + } +} + +void multiply_copy_multiple_vectorized(float* input, float* out, size_t index, + size_t batch_size) { +#pragma omp simd + for (int i = 0; i < batch_size; i++) { + out[index + i] = 2.0f * input[index + i] * (index + i); + } +} \ No newline at end of file diff --git a/code/access_to_memory.h b/code/access_to_memory.h new file mode 100644 index 0000000000000000000000000000000000000000..f4bda63676d33bfe2497af84c24e63604329ad34 --- /dev/null +++ b/code/access_to_memory.h @@ -0,0 +1,29 @@ +#ifndef ACCESS_TO_MEMORY_H_ +#define ACCESS_TO_MEMORY_H_ + +#include <algorithm> +#include <random> + +inline void Initialize(float* a, size_t size) { + // Initialize matrices with random complex values + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> dis(-1.0, 1.0); + for (size_t i = 0; i < size; i++) { + a[i] = dis(gen); + } +} + +inline void simply_multiply(float* data, size_t index) { + data[index] = 2.0f * data[index] * index; +} + +inline void multiply_copy(float* input, float* out, size_t index) { + out[index] = 2.0f * input[index] * index; +} + +void multiply_copy_not_inlined(float* input, float* out, size_t index); + +void multiply_copy_multiple(float* input, float* out, size_t index, + size_t batch_size); +#endif // ACCESS_TO_MEMORY \ No newline at end of file