diff --git a/CMakeLists.txt b/CMakeLists.txt
index 74d8f4150e9a3bda53108ffeec6fe0f40f7943ac..47bc2e8735199e14d344adf37a67a6282f3a9887 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -278,7 +278,10 @@ endif()
 
 # Include aocommon
 include_directories(${CMAKE_SOURCE_DIR}/external/aocommon/include/)
+# TODO(AST-1278): Try using a new (release) version of XSimd in aocommon.
+set(XSIMD_GIT_TAG 2f5eddf8912c7e2527f0c50895c7560b964d29af)
 include(external/aocommon/CMake/FetchXTensor.cmake)
+add_compile_definitions(XTENSOR_USE_XSIMD)
 
 # Include pybind11
 set(PYTHON_EXECUTABLE "${Python3_EXECUTABLE}")
@@ -370,7 +373,7 @@ add_library(
   ddecal/gain_solvers/SolverTools.cc
   ddecal/linear_solvers/LLSSolver.cc
   ${DDE_ARMADILLO_FILES})
-target_link_libraries(DDECal xtensor)
+target_link_libraries(DDECal xsimd xtensor)
 
 # 'Base' files and steps are only included in DP3.
 add_library(
@@ -449,7 +452,7 @@ add_library(
   steps/ApplyBeam.cc
   steps/DemixerNew.cc
   steps/NullStokes.cc)
-target_link_libraries(DP3_OBJ xtensor)
+target_link_libraries(DP3_OBJ xsimd xtensor)
 
 add_library(
   ParmDB OBJECT
diff --git a/antennaflagger/Flagger.cc b/antennaflagger/Flagger.cc
index a4297c502387625e5659694a214deb4c9866a9b1..0e4f20f1bef3bbcdd57b0190968fb3df37bad1b8 100644
--- a/antennaflagger/Flagger.cc
+++ b/antennaflagger/Flagger.cc
@@ -17,30 +17,29 @@
 #include "Flagger.h"
 
 namespace {
-xt::xtensor<bool, 1> FindOutliers(float sigma, size_t max_iterations,
-                                  const xt::xtensor<float, 1>& data) {
+xt::xtensor<int, 1> FindOutliers(float sigma, size_t max_iterations,
+                                 xt::xtensor<float, 1>&& data) {
   // All non finite numbers are flagged as outlier
-  xt::xtensor<bool, 1> flags = !xt::isfinite(data);
-  xt::xtensor<float, 1> data_copy = data;
+  xt::xtensor<int, 1> flags = !xt::isfinite(data);
 
   // Take the median of the finite numbers only, otherwise the median is NaN.
   const float median = xt::median(xt::filter(data, !flags));
 
   // Overwrite NaN's with median to workaround limitation of xt::stddev.
-  xt::masked_view(data_copy, flags) = median;
+  xt::masked_view(data, flags) = median;
 
   for (size_t i = 0; i < max_iterations; ++i) {
-    const float stddev = xt::stddev(data_copy)();
+    const float stddev = xt::stddev(data)();
 
     size_t outlier_count = 0;
     const float lower_bound = median - (sigma * stddev);
     const float upper_bound = median + (sigma * stddev);
 
     for (size_t j = 0; j < data.size(); ++j) {
-      const float value = data_copy[j];
+      const float value = data[j];
       if (!flags(j) && (value < lower_bound || value > upper_bound)) {
         flags(j) = true;
-        data_copy(j) = median;
+        data(j) = median;
         ++outlier_count;
       }
     }
@@ -102,32 +101,30 @@ xt::xtensor<std::complex<float>, 3> Flagger::GroupStats(
   return stats_antenna;
 }
 
-xt::xtensor<bool, 2> Flagger::ComputeAntennaFlags(
+xt::xtensor<int, 2> Flagger::ComputeAntennaFlags(
     float sigma, int max_iterations,
     const xt::xtensor<std::complex<float>, 3>& stats) {
   const size_t n_stations = stats.shape(0);
   const size_t n_antennas_per_station = stats.shape(1);
   const size_t n_correlations = stats.shape(2);
 
-  xt::xtensor<bool, 2> flags({n_stations, n_antennas_per_station}, false);
+  xt::xtensor<int, 2> flags({n_stations, n_antennas_per_station}, false);
 
   for (size_t station = 0; station < n_stations; ++station) {
     for (size_t cor = 0; cor < n_correlations; ++cor) {
-      const auto check_sum =
+      const xt::xtensor<std::complex<float>, 1> check_sum =
           xt::log(1.0f + xt::view(stats, station, xt::all(), cor));
-      const xt::xtensor<float, 1> check_sum_real = xt::real(check_sum);
-      const xt::xtensor<float, 1> check_sum_imag = xt::imag(check_sum);
 
       xt::view(flags, station, xt::all()) |=
-          FindOutliers(sigma, max_iterations, check_sum_real) |
-          FindOutliers(sigma, max_iterations, check_sum_imag);
+          FindOutliers(sigma, max_iterations, xt::real(check_sum)) |
+          FindOutliers(sigma, max_iterations, xt::imag(check_sum));
     }
   }
 
   return flags;
 }
 
-xt::xtensor<bool, 2> Flagger::ComputeStationFlags(
+xt::xtensor<int, 2> Flagger::ComputeStationFlags(
     float sigma, int max_iterations,
     const xt::xtensor<std::complex<float>, 3>& stats) {
   const size_t n_stations = stats.shape(0);
@@ -136,7 +133,7 @@ xt::xtensor<bool, 2> Flagger::ComputeStationFlags(
   // In case of n_correlations == 4, use only XX and YY (index 0 and 3),
   // otherwise use only XX (index 0).
   const size_t n_correlations_out = n_correlations == 4 ? 2 : 1;
-  xt::xtensor<bool, 2> flags({n_stations, n_correlations_out}, false);
+  xt::xtensor<int, 2> flags({n_stations, n_correlations_out}, false);
 
   const xt::xtensor<float, 3> stats_real = xt::real(stats);
   const xt::xtensor<float, 3> stats_imag = xt::imag(stats);
@@ -164,14 +161,14 @@ xt::xtensor<bool, 2> Flagger::ComputeStationFlags(
     const float scale_real = xt::median(stats_real_cor);
     const float scale_imag = xt::median(stats_imag_cor);
 
-    const xt::xtensor<float, 1> median_real =
+    xt::xtensor<float, 1> median_real =
         xt::median(stats_real_cor, 1) / scale_real;
-    const xt::xtensor<float, 1> median_imag =
+    xt::xtensor<float, 1> median_imag =
         xt::median(stats_imag_cor, 1) / scale_imag;
 
     xt::view(flags, xt::all(), cor_out) =
-        FindOutliers(sigma, max_iterations, median_real) |
-        FindOutliers(sigma, max_iterations, median_imag);
+        FindOutliers(sigma, max_iterations, std::move(median_real)) |
+        FindOutliers(sigma, max_iterations, std::move(median_imag));
   }
 
   return flags;
@@ -200,44 +197,44 @@ void Flagger::AssertStatsComputed() const {
   assert(stats_antenna_stddev_.shape() == stats_antenna_sum_square_.shape());
 }
 
-xt::xtensor<bool, 1> Flagger::FindBadAntennas(float sigma, int max_iterations) {
+xt::xtensor<int, 1> Flagger::FindBadAntennas(float sigma, int max_iterations) {
   AssertStatsComputed();
 
   find_bad_antennas_timer_.start();
 
-  const xt::xtensor<bool, 2> flags_stddev = Flagger::ComputeAntennaFlags(
+  const xt::xtensor<int, 2> flags_stddev = Flagger::ComputeAntennaFlags(
       sigma, max_iterations, stats_antenna_stddev_);
   assert(flags_stddev.shape(0) == n_stations_);
   assert(flags_stddev.shape(1) == n_antennas_per_station_);
 
-  const xt::xtensor<bool, 2> flags_sum_square = Flagger::ComputeAntennaFlags(
+  const xt::xtensor<int, 2> flags_sum_square = Flagger::ComputeAntennaFlags(
       sigma, max_iterations, stats_antenna_sum_square_);
 
-  const xt::xtensor<bool, 1> flagged_antennas =
-      xt::flatten(flags_stddev) && xt::flatten(flags_sum_square);
+  const xt::xtensor<int, 1> flagged_antennas =
+      xt::flatten(flags_stddev) & xt::flatten(flags_sum_square);
 
   find_bad_antennas_timer_.stop();
 
   return flagged_antennas;
 }
 
-xt::xtensor<bool, 1> Flagger::FindBadStations(float sigma, int max_iterations) {
+xt::xtensor<int, 1> Flagger::FindBadStations(float sigma, int max_iterations) {
   AssertStatsComputed();
 
   find_bad_stations_timer_.start();
 
-  const xt::xtensor<bool, 2> flags_stddev = Flagger::ComputeStationFlags(
+  const xt::xtensor<int, 2> flags_stddev = Flagger::ComputeStationFlags(
       sigma, max_iterations, stats_antenna_stddev_);
   assert(flags_stddev.shape(0) == n_stations_);
   assert(flags_stddev.shape(1) == (n_correlations_ == 4 ? 2 : 1));
 
-  const xt::xtensor<bool, 2> flags_sum_square = Flagger::ComputeStationFlags(
+  const xt::xtensor<int, 2> flags_sum_square = Flagger::ComputeStationFlags(
       sigma, max_iterations, stats_antenna_sum_square_);
 
   const xt::xtensor<size_t, 1> station_indices = xt::flatten_indices(
       xt::where(xt::prod(flags_stddev & flags_sum_square, {1})));
 
-  xt::xtensor<bool, 1> antenna_flags({n_antennas_}, false);
+  xt::xtensor<int, 1> antenna_flags({n_antennas_}, false);
 
   for (size_t station : station_indices) {
     const size_t first_antenna = station * n_antennas_per_station_;
diff --git a/antennaflagger/Flagger.h b/antennaflagger/Flagger.h
index c28a8efa4f4ae5f6223ce5abc086bb8f721b5d81..3eada63d0589854c718ede86ee90244f68497de4 100644
--- a/antennaflagger/Flagger.h
+++ b/antennaflagger/Flagger.h
@@ -1,11 +1,11 @@
-// Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy)
+// Copyright (C) 2023 ASTRON (Netherlands Institute for Radio Astronomy)
 // SPDX-License-Identifier: GPL-3.0-or-later
 
 #ifndef DP3_ANTENNAFLAGGER_FLAGGER_H_
 #define DP3_ANTENNAFLAGGER_FLAGGER_H_
 
-#include <vector>
 #include <complex>
+#include <vector>
 
 #include <aocommon/xt/span.h>
 
@@ -63,17 +63,18 @@ class Flagger {
    * @return Flags as a tensor of booleans of length n_stations *
    * n_antennas_per_station.
    */
-  xt::xtensor<bool, 1> FindBadAntennas(float sigma, int max_iterations);
+  xt::xtensor<int, 1> FindBadAntennas(float sigma, int max_iterations);
 
   /**
    * Identify stations that are outliers compared to the other stations.
    * In case of n_antennas_per_station > 1, a per-station statistic over all
    * antennas in a station is used.
    *
-   * @return Flags as a tensor of booleans of length n_stations *
-   * n_antennas_per_station.
+   * @return Flags as a tensor of integers of length n_stations *
+   * n_antennas_per_station. The booleans are 'int' instead of 'bool' since
+   * XSimd otherwise converts booleans to integers and back.
    */
-  xt::xtensor<bool, 1> FindBadStations(float sigma, int max_iterations);
+  xt::xtensor<int, 1> FindBadStations(float sigma, int max_iterations);
 
   /**
    * Compute standard deviation for the real and imaginary component seperately
@@ -111,10 +112,11 @@ class Flagger {
    * The input is assumed to be in the form (n_stations, n_antennas_per_station,
    * n_correlations).
    *
-   * @return Flags as a tensor of booleans in the form (n_stations,
-   * n_antennas_per_station).
+   * @return Flags as a tensor of integers in the form (n_stations,
+   * n_antennas_per_station). The flags are 'int' instead of 'bool' since
+   * XSimd otherwise converts booleans to integers and back.
    */
-  static xt::xtensor<bool, 2> ComputeAntennaFlags(
+  static xt::xtensor<int, 2> ComputeAntennaFlags(
       float sigma, int max_iterations,
       const xt::xtensor<std::complex<float>, 3>& stats);
 
@@ -125,8 +127,10 @@ class Flagger {
    * the form (n_stations, n_antennas_per_station, n_correlations).
    *
    * @return Flags as a tensor of booleans in the form (n_stations, 1 || 2).
+   * The booleans are 'int' instead of 'bool' since XSimd otherwise converts
+   * booleans to integers and back.
    */
-  static xt::xtensor<bool, 2> ComputeStationFlags(
+  static xt::xtensor<int, 2> ComputeStationFlags(
       float sigma, int max_iterations,
       const xt::xtensor<std::complex<float>, 3>& stats);
 
diff --git a/ddecal/constraints/AntennaConstraint.h b/ddecal/constraints/AntennaConstraint.h
index d5e3e431d2e6c0f1044753456324995789476e2e..4481bf79e1d33182499072d7165ed6ef937188dc 100644
--- a/ddecal/constraints/AntennaConstraint.h
+++ b/ddecal/constraints/AntennaConstraint.h
@@ -9,6 +9,8 @@
 #include <set>
 #include <vector>
 
+#include <xtensor/xcomplex.hpp>
+#include <xtensor/xmasked_view.hpp>
 #include <xtensor/xview.hpp>
 
 namespace dp3 {
@@ -47,17 +49,24 @@ class AntennaConstraint final : public Constraint {
         xt::xtensor<size_t, 2> solution_count_per_set =
             xt::zeros<size_t>({n_directions, n_polarizations});
 
+        // Declaring these variables outside the loop allows reusing memory.
+        xt::xtensor<dcomplex, 2> solution_set;
+        xt::xtensor<std::size_t, 2> solution_set_is_finite;
+
         // Calculate the sum of solutions over the set of stations
         for (size_t antenna_index : antenna_set) {
-          auto solution_set =
+          solution_set =
               xt::view(solutions, ch, antenna_index, xt::all(), xt::all());
-          auto solution_set_is_finite = xt::isfinite(solution_set);
-          solution_per_set += solution_set_is_finite * solution_set;
+          solution_set_is_finite = xt::isfinite(solution_set);
+          xt::masked_view(solution_set, !solution_set_is_finite)
+              .fill(std::complex<double>(0.0, 0.0));
+
+          solution_per_set += solution_set;
           solution_count_per_set += solution_set_is_finite;
         }
 
         // Divide by nr of core stations to get the mean solution
-        solution_per_set /= solution_count_per_set;
+        solution_per_set /= xt::cast<double>(solution_count_per_set);
 
         // Assign all core stations to the mean solution
         for (size_t antenna_index : antenna_set) {
diff --git a/ddecal/gain_solvers/SolverTools.cc b/ddecal/gain_solvers/SolverTools.cc
index 77b29c027f3e1550fed6cb722de70869601b925d..488129eabe2b9348ea80a2016ea6e917af50c579 100644
--- a/ddecal/gain_solvers/SolverTools.cc
+++ b/ddecal/gain_solvers/SolverTools.cc
@@ -42,7 +42,10 @@ void AssignAndWeight(
     // flagged. Although the initialization to false can be avoided by directly
     // assigning flags for the first correlation, a benchmark showed that the
     // (simpler) code below gives slightly better performance.
-    xt::xtensor<bool, 2> flags(
+    // Use 'int' instead of 'bool' for flags since XSimd uses integers when
+    // or'ing flags. Using integers directly prevents unpacking booleans to
+    // integers beforehand and packing integers to booleans afterwards.
+    xt::xtensor<int, 2> flags(
         {unweighted_data.shape(0), unweighted_data.shape(1)}, false);
     for (std::size_t correlation = 0; correlation < n_correlations;
          ++correlation) {
@@ -50,8 +53,9 @@ void AssignAndWeight(
                !xt::isfinite(xt::view(unweighted_data, xt::all(), xt::all(),
                                       correlation));
       for (const std::string& name : direction_names) {
-        flags |= !xt::isfinite(xt::view(unweighted_buffer.GetData(name),
-                                        xt::all(), xt::all(), correlation));
+        flags |= xt::cast<int>(
+            !xt::isfinite(xt::view(unweighted_buffer.GetData(name), xt::all(),
+                                   xt::all(), correlation)));
       }
     }
 
diff --git a/steps/AntennaFlagger.cc b/steps/AntennaFlagger.cc
index 3b5f05a9d73ba78f479f74b5bad6cd79bb0dfe53..4ecbf7ac19a12f99440e755fb30ae9ff3852bb59 100644
--- a/steps/AntennaFlagger.cc
+++ b/steps/AntennaFlagger.cc
@@ -100,7 +100,7 @@ bool AntennaFlagger::process(std::unique_ptr<base::DPBuffer> buffer) {
   flagger_->ComputeStats(buffer->GetData(), baseline_order_);
 
   // Find outlier antennas
-  const xt::xtensor<bool, 1> antenna_flags =
+  const xt::xtensor<int, 1> antenna_flags =
       flagger_->FindBadAntennas(antenna_flagging_sigma_,
                                 antenna_flagging_max_iterations_) |
       flagger_->FindBadStations(station_flagging_sigma_,
diff --git a/steps/PreFlagger.cc b/steps/PreFlagger.cc
index 242a4eaf3dda89bc1c368c2207226aef71586d73..2f3574e9742cc38e7acb65d2c9dc85f2e9cbe8aa 100644
--- a/steps/PreFlagger.cc
+++ b/steps/PreFlagger.cc
@@ -131,7 +131,7 @@ bool PreFlagger::process(std::unique_ptr<DPBuffer> buffer) {
   assert(buffer->GetFlags().size() != 0);
   // Do the PSet steps and combine the result with the current flags.
   // Only count if the flag changes.
-  const xt::xtensor<bool, 3>* const flags =
+  const xt::xtensor<int, 3>* const flags =
       itsPSet.process(*buffer, itsCount, xt::xtensor<bool, 1>(), itsTimer);
   switch (itsMode) {
     case Mode::kSetFlag:
@@ -324,7 +324,7 @@ void PreFlagger::PSet::updateInfo(const DPInfo& info) {
 void PreFlagger::PSet::fillChannels(const DPInfo& info) {
   unsigned int nrcorr = info.ncorr();
   unsigned int nrchan = info.nchan();
-  xt::xtensor<bool, 1> selChan(std::array<size_t, 1>{nrchan});
+  xt::xtensor<int, 1> selChan(std::array<size_t, 1>{nrchan});
   if (itsStrChan.empty()) {
     selChan.fill(true);
   } else {
@@ -449,7 +449,7 @@ void PreFlagger::PSet::show(std::ostream& os, bool showName) const {
   }
 }
 
-xt::xtensor<bool, 3>* PreFlagger::PSet::process(
+xt::xtensor<int, 3>* PreFlagger::PSet::process(
     DPBuffer& out, unsigned int timeSlot, const xt::xtensor<bool, 1>& matchBL,
     common::NSTimer& timer) {
   // No need to process it if the time mismatches or if only time selection.
@@ -495,7 +495,7 @@ xt::xtensor<bool, 3>* PreFlagger::PSet::process(
     return &itsFlags;
   }
   // Convert each baseline flag to a flag per correlation/channel.
-  bool* flagPtr = itsFlags.data();
+  int* flagPtr = itsFlags.data();
   for (unsigned int i = 0; i < itsMatchBL.size(); ++i) {
     if (itsMatchBL[i]) {
       std::fill(flagPtr, flagPtr + nr, itsMatchBL[i]);
@@ -525,17 +525,17 @@ xt::xtensor<bool, 3>* PreFlagger::PSet::process(
   // are reused to AND or OR subexpressions. This can be done harmlessly
   // and saves the creation of too many arrays.
   if (!itsPSets.empty()) {
-    std::stack<xt::xtensor<bool, 3>*> results;
+    std::stack<xt::xtensor<int, 3>*> results;
     for (int oper : itsRpn) {
       if (oper >= 0) {
         results.push(itsPSets[oper]->process(out, timeSlot, itsMatchBL, timer));
       } else if (oper == OpNot) {
-        xt::xtensor<bool, 3>* left = results.top();
+        xt::xtensor<int, 3>* left = results.top();
         *left = !*left;
       } else if (oper == OpOr || oper == OpAnd) {
-        xt::xtensor<bool, 3>* right = results.top();
+        xt::xtensor<int, 3>* right = results.top();
         results.pop();
-        xt::xtensor<bool, 3>* left = results.top();
+        xt::xtensor<int, 3>* left = results.top();
         if (oper == OpOr) {
           *left |= *right;
         } else {
@@ -550,7 +550,7 @@ xt::xtensor<bool, 3>* PreFlagger::PSet::process(
       throw std::runtime_error(
           "Something went wrong while evaluating expression: results.size() != "
           "1");
-    xt::xtensor<bool, 3>* mflags = results.top();
+    xt::xtensor<int, 3>* mflags = results.top();
     itsFlags &= *mflags;
   }
   return &itsFlags;
@@ -709,7 +709,7 @@ void PreFlagger::PSet::flagAmpl(
 
   const xt::xtensor<float, 3> amplitudes = xt::abs(data);
   const float* valPtr = amplitudes.data();
-  bool* flagPtr = itsFlags.data();
+  int* flagPtr = itsFlags.data();
   for (unsigned int i = 0; i < nr; ++i) {
     bool flag = false;
     for (unsigned int j = 0; j < nrcorr; ++j) {
@@ -733,7 +733,7 @@ void PreFlagger::PSet::flagPhase(
 
   const xt::xtensor<float, 3> phases = xt::arg(data);
   const float* valPtr = phases.data();
-  bool* flagPtr = itsFlags.data();
+  int* flagPtr = itsFlags.data();
   for (unsigned int i = 0; i < nr; ++i) {
     bool flag = false;
     for (unsigned int j = 0; j < nrcorr; ++j) {
@@ -756,7 +756,7 @@ void PreFlagger::PSet::flagReal(
   const std::size_t nrcorr = data.shape(2);
 
   const std::complex<float>* valPtr = data.data();
-  bool* flagPtr = itsFlags.data();
+  int* flagPtr = itsFlags.data();
   for (unsigned int i = 0; i < nr; ++i) {
     bool flag = false;
     for (unsigned int j = 0; j < nrcorr; ++j) {
@@ -780,7 +780,7 @@ void PreFlagger::PSet::flagImag(
   const std::size_t nrcorr = data.shape(2);
 
   const std::complex<float>* valPtr = data.data();
-  bool* flagPtr = itsFlags.data();
+  int* flagPtr = itsFlags.data();
   for (unsigned int i = 0; i < nr; ++i) {
     bool flag = false;
     for (unsigned int j = 0; j < nrcorr; ++j) {
@@ -1058,10 +1058,10 @@ void PreFlagger::PSet::fillBLMatrix() {
   }
 }
 
-xt::xtensor<bool, 1> PreFlagger::PSet::handleFreqRanges(
+xt::xtensor<int, 1> PreFlagger::PSet::handleFreqRanges(
     const std::vector<double>& chanFreqs) {
   unsigned int nrchan = chanFreqs.size();
-  xt::xtensor<bool, 1> selChan({nrchan}, false);
+  xt::xtensor<int, 1> selChan({nrchan}, false);
   // A frequency range can be given as  value..value or value+-value.
   // Units can be given for each value; if one is given it applies to both.
   // Default unit is MHz.
diff --git a/steps/PreFlagger.h b/steps/PreFlagger.h
index a9b6e02078008350f200c5c7b2ce017bfba1e4f6..7fc2d729e93d13d2f9f86793cddc3981871a66a1 100644
--- a/steps/PreFlagger.h
+++ b/steps/PreFlagger.h
@@ -3,7 +3,7 @@
 // SPDX-License-Identifier: GPL-3.0-or-later
 
 /// @file
-/// @brief DPPP step class to flag data on channel, baseline, or time
+/// @brief DP3 step class to flag data on channel, baseline, or time
 /// @author Ger van Diepen
 
 #ifndef DP3_STEPS_PREFLAGGER_H_
@@ -23,7 +23,7 @@
 
 namespace dp3::steps {
 
-/// @brief DPPP step class to flag data on channel, baseline, or time
+/// @brief DP3 step class to flag data on channel, baseline, or time
 
 /// This class is a Step class flagging data points based on data
 /// selections given in the parset file.
@@ -128,9 +128,9 @@ class PreFlagger : public Step {
     }
 
     /// Set and return the flags.
-    xt::xtensor<bool, 3>* process(base::DPBuffer&, unsigned int timeSlot,
-                                  const xt::xtensor<bool, 1>& matchBL,
-                                  common::NSTimer& timer);
+    xt::xtensor<int, 3>* process(base::DPBuffer&, unsigned int timeSlot,
+                                 const xt::xtensor<bool, 1>& matchBL,
+                                 common::NSTimer& timer);
 
     /// Update the general info.
     /// It is used to adjust the parms if needed.
@@ -208,7 +208,7 @@ class PreFlagger : public Step {
 
     /// Handle the frequency ranges given and determine which channels
     /// have to be flagged.
-    xt::xtensor<bool, 1> handleFreqRanges(const std::vector<double>& chanFreqs);
+    xt::xtensor<int, 1> handleFreqRanges(const std::vector<double>& chanFreqs);
 
     /// Get the value and possible unit.
     /// If no unit is given, the argument is left untouched.
@@ -267,7 +267,9 @@ class PreFlagger : public Step {
     std::vector<int> itsRpn;              ///< PSet expression in RPN form
     std::vector<PSet::ShPtr> itsPSets;    ///< PSets used in itsRpn
     xt::xtensor<bool, 2> itsChanFlags;    ///< flags for channels to be flagged
-    xt::xtensor<bool, 3> itsFlags;
+    // Using 'int' instead of 'bool' in itsFlags works better with XSimd, since
+    // XSimd uses an 'int' representation anyway for boolean operations.
+    xt::xtensor<int, 3> itsFlags;
     xt::xtensor<bool, 1> itsMatchBL;  ///< true = baseline in buffer matches
   };
 
diff --git a/steps/test/unit/tPreFlagger.cc b/steps/test/unit/tPreFlagger.cc
index 3b6fe8e75fa7ed8b9884b3229b352282c1cd9673..c539fc27f5091ade3d344b6bd0ec0f7273fcf300 100644
--- a/steps/test/unit/tPreFlagger.cc
+++ b/steps/test/unit/tPreFlagger.cc
@@ -123,7 +123,7 @@ class TestInput : public dp3::steps::MockInput {
     buffer->ResizeData(shape);
     for (std::size_t i = 0; i < buffer->GetData().size(); ++i) {
       buffer->GetData().data()[i] =
-          std::complex<float>(i + itsCount * 10, i - 10 + itsCount * 6);
+          std::complex<float>(i + itsCount * 10, int(i) - 10 + itsCount * 6);
     }
 
     buffer->ResizeUvw(itsNBl);