From 0d25368ee82c1fc5f4b4a8e6f7d352da84f05859 Mon Sep 17 00:00:00 2001
From: Wiebe van Breukelen <breukelen@astron.nl>
Date: Thu, 1 May 2025 11:43:30 +0200
Subject: [PATCH] Fixed AVX2 support for radec2lmn.

We have to make sure that SmartVector is actually aligned. Then we
can switch back to the aligned mode. The current implementation does not
guarantee that the data is aligned.
---
 include/Directions.h         | 31 ++++++++++++++++++++++++-------
 include/common/SmartVector.h |  6 +++++-
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/include/Directions.h b/include/Directions.h
index 1534eb8..b17b97a 100644
--- a/include/Directions.h
+++ b/include/Directions.h
@@ -14,6 +14,7 @@
 #include <common/SmartVector.h>
 
 #include <vector>
+#include <xsimd/types/xsimd_avx512f_register.hpp>
 #include <xtensor/xmath.hpp>
 #include <xtensor/xview.hpp>
 
@@ -145,16 +146,18 @@ struct radec2lmn {
 template <class C, class Tag, class Arch>
 void radec2lmn::operator()(Arch, const Direction &reference, const C &ra,
                            const C &dec, xt::xtensor<double, 2> &lmn, Tag) {
-  using b_type = xsimd::batch<double, Arch>;
+  using b_type = xsimd::batch<double>;
   std::size_t inc = b_type::size;
   std::size_t size = ra.size();
-  double sin_dec0 = std::sin(reference.dec);
-  double cos_dec0 = std::cos(reference.dec);
+  const double sin_dec0 = std::sin(reference.dec);
+  const double cos_dec0 = std::cos(reference.dec);
+
+  const ConstSmartVectorView<double> ra_view = ra.view();
   // size for which the vectorization is possible
   std::size_t vec_size = size - size % inc;
   for (std::size_t i = 0; i < vec_size; i += inc) {
-    const b_type ra_vec = b_type::load(&ra[i], Tag());
-    const b_type dec_vec = b_type::load(&dec[i], Tag());
+    const b_type ra_vec = b_type::load(&(ra_view[i]), Tag());
+    const b_type dec_vec = b_type::load(&(dec[i]), Tag());
 
     const b_type delta_ra = ra_vec - reference.ra;
 
@@ -196,8 +199,22 @@ template <>
 inline void Directions::radec2lmn<Directions::computation_strategy::MULTI_SIMD>(
     const Direction &reference, xt::xtensor<double, 2> &lmn) {
   xt::xtensor<double, 2> lmn_tmp({3, ra.size()});
-  xsimd::dispatch(xsimd::radec2lmn{})(reference, ra, dec, lmn_tmp,
-                                      xsimd::aligned_mode());
+
+  // FIXME: we have to make sure that SmartVector is actually aligned. Then we
+  // can switch back to the aligned mode. The current implementation does not
+  // guarantee that the data is aligned.
+  if (reinterpret_cast<std::uintptr_t>(ra.data()) %
+          xsimd::arch_list<xsimd::avx2>::alignment() !=
+      0) {
+    std::cerr << "Warning: ra is not aligned. Using unaligned mode."
+              << std::endl;
+    xsimd::dispatch<xsimd::arch_list<xsimd::avx2>>(xsimd::radec2lmn{})(
+        reference, ra, dec, lmn_tmp, xsimd::unaligned_mode{});
+  } else {
+    xsimd::dispatch<xsimd::arch_list<xsimd::avx2>>(xsimd::radec2lmn{})(
+        reference, ra, dec, lmn_tmp, xsimd::aligned_mode{});
+  }
+
   lmn = xt::transpose(lmn_tmp);
 }
 #endif
\ No newline at end of file
diff --git a/include/common/SmartVector.h b/include/common/SmartVector.h
index a8cd1e9..b0d4af6 100644
--- a/include/common/SmartVector.h
+++ b/include/common/SmartVector.h
@@ -18,6 +18,10 @@ using SmartVectorView =
     decltype(::xt::adapt(std::add_pointer_t<T>{}, std::array<size_t, 1UL>{}));
 ;
 
+template <typename T>
+using ConstSmartVectorView =
+    decltype(::xt::adapt(std::declval<const T *>(), std::array<size_t, 1>{}));
+
 template <typename T> class SmartVector : public std::vector<T> {
 public:
   SmartVector() : std::vector<T>() {}
@@ -25,7 +29,7 @@ public:
   SmartVector(const std::initializer_list<T> &vec) : std::vector<T>(vec) {}
   SmartVectorView<T> view() { return xt::adapt(this->data(), {this->size()}); }
 
-  const SmartVectorView<T> view() const {
+  ConstSmartVectorView<T> view() const {
     return xt::adapt(this->data(), {this->size()});
   }
 
-- 
GitLab