diff --git a/CMake/FindNVML.cmake b/CMake/FindNVML.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..3d6ca4a8e9fdeb9a0253ba46a72ece9134c95c2b
--- /dev/null
+++ b/CMake/FindNVML.cmake
@@ -0,0 +1,47 @@
+# FindNVML.cmake
+
+if(${CUDA_VERSION_STRING} VERSION_LESS "9.1")
+    string(CONCAT ERROR_MSG "--> ARCHER: Current CUDA version "
+                         ${CUDA_VERSION_STRING}
+                         " is too old. Must upgrade it to 9.1 or newer.")
+    message(FATAL_ERROR ${ERROR_MSG})
+endif()
+
+# windows, including both 32-bit and 64-bit
+if(WIN32)
+    set(NVML_NAMES nvml)
+    set(NVML_LIB_DIR "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64")
+    set(NVML_INCLUDE_DIR ${CUDA_INCLUDE_DIRS})
+
+    # .lib import library full path
+    find_file(NVML_LIB_PATH
+              NO_DEFAULT_PATH
+              NAMES nvml.lib
+              PATHS ${NVML_LIB_DIR})
+
+    # .dll full path
+    find_file(NVML_DLL_PATH
+              NO_DEFAULT_PATH
+              NAMES nvml.dll
+              PATHS "C:/Program Files/NVIDIA Corporation/NVSMI")
+# linux
+elseif(UNIX AND NOT APPLE)
+    set(NVML_NAMES nvidia-ml)
+    set(NVML_LIB_DIR "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs" "${CUDA_TOOLKIT_ROOT_DIR}/lib/x86_64-linux-gnu")
+    set(NVML_INCLUDE_DIR ${CUDA_INCLUDE_DIRS})
+
+    find_library(NVML_LIB_PATH
+                 NO_DEFAULT_PATH
+                 NAMES ${NVML_NAMES}
+                 PATHS ${NVML_LIB_DIR})
+else()
+    message(FATAL_ERROR "Unsupported platform.")
+endif()
+
+find_path(NVML_INCLUDE_PATH
+          NO_DEFAULT_PATH
+          NAMES nvml.h
+          PATHS ${NVML_INCLUDE_DIR})
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(NVML DEFAULT_MSG NVML_LIB_PATH NVML_INCLUDE_PATH)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9a5f158b2c6fa9e559652871a534edf5a4256379..d4cad499d93a397a876e1d34f48d45ec57e3d448 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -415,6 +415,7 @@ add_library(
   base/ProgressMeter.cc
   base/Simulate.cc
   base/Simulator.cc
+  base/ComponentInfo.cc
   base/SourceDBUtil.cc
   base/Stokes.cc
   base/SubtractMixed.cc
@@ -462,7 +463,8 @@ add_library(
   steps/UVWFlagger.cc
   steps/ApplyBeam.cc
   steps/DemixerNew.cc
-  steps/NullStokes.cc)
+  steps/NullStokes.cc
+  steps/SagecalPredict.cc)
 target_link_libraries(DP3_OBJ xsimd xtensor)
 
 add_library(
@@ -507,8 +509,15 @@ if(NOT "${LIBDIRAC_PREFIX}" STREQUAL "")
   pkg_search_module(LIBDIRAC libdirac)
 endif()
 if(LIBDIRAC_FOUND)
-  message("-- Found libdirac: ${LIBDIRAC_INCLUDE_DIRS}")
+  message(STATUS "Found libdirac: ${LIBDIRAC_INCLUDE_DIRS}")
   include_directories(${LIBDIRAC_INCLUDE_DIRS})
+  if(HAVE_CUDA)
+    enable_language(CUDA)
+    find_package(CUDA QUIET REQUIRED)
+    find_package(NVML REQUIRED)
+    message(STATUS "CUDA_LIBRARIES ............ = ${CUDA_LIBRARIES}")
+    include_directories(${CUDA_INCLUDE_DIRS})
+  endif()
 endif()
 
 set(DP3_LIBRARIES
@@ -527,10 +536,29 @@ set(DP3_LIBRARIES
 
 # If libdirac is found, use it
 if(LIBDIRAC_FOUND)
-  # add preprocessor def
-  add_definitions(-DHAVE_LIBDIRAC)
-  # add link flags
-  set(DP3_LIBRARIES ${DP3_LIBRARIES} ${LIBDIRAC_LINK_LIBRARIES})
+  if(HAVE_CUDA)
+    # if we use libdirac with CUDA support, we enable a different preprocessor def
+    # so as not to conflict with CPU only version of libdirac
+    add_definitions(-DHAVE_LIBDIRAC_CUDA)
+    add_definitions(-DHAVE_CUDA)
+    # add link flags
+    set(DP3_LIBRARIES ${DP3_LIBRARIES} ${LIBDIRAC_LINK_LIBRARIES})
+    # add preprocessor def
+    add_definitions(-DHAVE_CUDA)
+    set(DP3_LIBRARIES
+        ${DP3_LIBRARIES}
+        ${CUDA_LIBRARIES}
+        ${CUDA_CUBLAS_LIBRARIES}
+        ${CUDA_CUFFT_LIBRARIES}
+        ${CUDA_cusolver_LIBRARY}
+        ${CUDA_cudadevrt_LIBRARY}
+        ${NVML_LIB_PATH})
+  else()
+    # add preprocessor def
+    add_definitions(-DHAVE_LIBDIRAC)
+    # add link flags
+    set(DP3_LIBRARIES ${DP3_LIBRARIES} ${LIBDIRAC_LINK_LIBRARIES})
+  endif()
 endif()
 
 # Perform the BLAS check from aocommon
diff --git a/base/ComponentInfo.cc b/base/ComponentInfo.cc
new file mode 100644
index 0000000000000000000000000000000000000000..138fbcb8fdde4414d0f88654766060c3e69b5f9d
--- /dev/null
+++ b/base/ComponentInfo.cc
@@ -0,0 +1,60 @@
+// ComponentInfo.h: Class for getting information from model component
+// hierarchies.
+//
+// Copyright (C) 2020 ASTRON (Netherlands Institute for Radio Astronomy)
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+
+#include <iostream>
+#include <algorithm>
+
+#include "ComponentInfo.h"
+#include "GaussianSource.h"
+#include "PointSource.h"
+
+namespace dp3 {
+namespace base {
+
+ComponentInfo::ComponentInfo() {}
+
+void ComponentInfo::inspect(
+    const std::shared_ptr<const ModelComponent>& component) {
+  component->accept(*this);
+}
+
+void ComponentInfo::update(const PointSource& component) {
+  const Direction& direction = component.direction();
+  ra_ = direction.ra;
+  dec_ = direction.dec;
+  const Stokes& stokes = component.stokes();
+  sI_ = stokes.I;
+  sQ_ = stokes.Q;
+  sU_ = stokes.U;
+  sV_ = stokes.V;
+  const std::vector<double>& spec = component.spectrum();
+  // Note: we can only handle spectra with 3 terms
+  if (spec.size() > 3) {
+    throw std::runtime_error(
+        "Cannot handle spectra with more than three components");
+  }
+  const size_t spec_size = (spec.size() > 3 ? 3 : spec.size());
+  for (size_t index = 0; index < spec_size; index++) {
+    spectrum_[index] = spec[index];
+  }
+  f0_ = component.referenceFreq();
+}
+
+void ComponentInfo::visit(const PointSource& component) {
+  update(component);
+  source_type_ = kPoint;
+}
+
+void ComponentInfo::visit(const GaussianSource& component) {
+  update(component);
+  source_type_ = kGaussian;
+  g_pa_ = component.getPositionAngle();
+  g_major_ = component.getMajorAxis();
+  g_minor_ = component.getMinorAxis();
+}
+}  // namespace base
+}  // namespace dp3
diff --git a/base/ComponentInfo.h b/base/ComponentInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..a9aa6f1b1a1143469ed6fc87100902ca4a0ac216
--- /dev/null
+++ b/base/ComponentInfo.h
@@ -0,0 +1,51 @@
+// ComponentInfo.h: Class for getting information from model component
+// hierarchies.
+//
+// Copyright (C) 2020 ASTRON (Netherlands Institute for Radio Astronomy)
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef DP3_BASE_COMPONENTINFO_H
+#define DP3_BASE_COMPONENTINFO_H
+
+#include "ModelComponent.h"
+#include "ModelComponentVisitor.h"
+#include <dp3/base/Direction.h>
+#include "Stokes.h"
+#include <vector>
+
+namespace dp3 {
+namespace base {
+
+/// \brief Class for visitors that visit model component to extract information.
+
+/// @{
+
+class ComponentInfo : public ModelComponentVisitor {
+ public:
+  enum SourceType { kPoint, kGaussian };
+  ComponentInfo();
+
+  void inspect(const std::shared_ptr<const ModelComponent>& component);
+
+  double ra_, dec_;
+  double sI_, sQ_, sU_, sV_;
+  SourceType source_type_;
+  std::vector<double> spectrum_ = {0.0, 0.0, 0.0};
+  double f0_;
+
+  // Only used in Gaussians
+  double g_pa_{0.0}, g_major_{1.0}, g_minor_{1.0};
+
+ private:
+  void visit(const PointSource&) override;
+  void visit(const GaussianSource&) override;
+
+  void update(const PointSource& component);
+};
+
+/// @}
+
+}  // namespace base
+}  // namespace dp3
+
+#endif
diff --git a/base/DP3.cc b/base/DP3.cc
index e08a85346baa45f04a6592fd12a6bbf5326a6398..6db0bd5631d5eae28b369f6d0ed24dfeaf4d039b 100644
--- a/base/DP3.cc
+++ b/base/DP3.cc
@@ -43,6 +43,7 @@
 #include "../steps/PhaseShift.h"
 #include "../steps/Predict.h"
 #include "../steps/PreFlagger.h"
+#include "../steps/SagecalPredict.h"
 #include "../steps/ScaleData.h"
 #include "../steps/SetBeam.h"
 #include "../steps/Split.h"
@@ -201,6 +202,8 @@ std::shared_ptr<Step> MakeSingleStep(const std::string& type,
     step = std::make_shared<steps::DemixerNew>(parset, prefix);
   } else if (type == "grouppredict") {
     step = std::make_shared<steps::BdaGroupPredict>(parset, prefix);
+  } else if (type == "sagecalpredict") {
+    step = std::make_shared<steps::SagecalPredict>(parset, prefix);
   } else if (type == "h5parmpredict") {
     step = std::make_shared<steps::H5ParmPredict>(parset, prefix);
   } else if (type == "gaincal" || type == "calibrate") {
diff --git a/base/PointSource.cc b/base/PointSource.cc
index 608319e0ac34954fb44b73a62fdcb2bf5ef2e7d2..54352ea166878d3d9b2cbd20f99da2b066ad4ef7 100644
--- a/base/PointSource.cc
+++ b/base/PointSource.cc
@@ -98,6 +98,8 @@ Stokes PointSource::stokes(double freq) const {
   return stokes;
 }
 
+Stokes PointSource::stokes() const { return itsStokes; }
+
 void PointSource::accept(ModelComponentVisitor &visitor) const {
   visitor.visit(*this);
 }
diff --git a/base/PointSource.h b/base/PointSource.h
index 68ea3c9334a2ea02aad475de1cb294c3be4568da..b21e0b05ce1abcc1fab47fd218f02dfe17429294 100644
--- a/base/PointSource.h
+++ b/base/PointSource.h
@@ -42,6 +42,9 @@ class PointSource : public ModelComponent {
   void setRotationMeasure(double fraction, double angle, double rm);
 
   Stokes stokes(double freq) const;
+  Stokes stokes() const;
+  const std::vector<double> &spectrum() const { return itsSpectralTerms; }
+  double referenceFreq() const { return itsRefFreq; }
 
   void accept(ModelComponentVisitor &visitor) const override;
 
diff --git a/ddecal/Settings.cc b/ddecal/Settings.cc
index 7a9bfaf4aea2466bc9d960769d2d7dfe1889322c..a7c6310be908a1a564ef57707be463ae4503d517 100644
--- a/ddecal/Settings.cc
+++ b/ddecal/Settings.cc
@@ -128,6 +128,9 @@ Settings::Settings(const common::ParameterSet& _parset,
       idg_region_filename(GetString("idg.regions", "")),
       idg_image_filenames(GetStringVector("idg.images")),
 
+      // Sagecal predict
+      use_sagecal_predict(GetBool("sagecalpredict", false)),
+
       directions(GetStringVector("directions")),
       n_solutions_per_direction(
           GetSizeTVector("solutions_per_direction",
diff --git a/ddecal/Settings.h b/ddecal/Settings.h
index 937347a9dc458294262feb0dcd4b64d90a288112..ab618d412bab8484a99cb6933d958f4135bf96d9 100644
--- a/ddecal/Settings.h
+++ b/ddecal/Settings.h
@@ -142,6 +142,8 @@ struct Settings {
   const std::string idg_region_filename;
   const std::vector<std::string> idg_image_filenames;
 
+  const bool use_sagecal_predict;
+
   const std::vector<std::string> directions;
   std::vector<size_t> n_solutions_per_direction;
   const std::string source_db;
diff --git a/docs/index.rst b/docs/index.rst
index ad2e00bdf9ed013671365d2d7966ab81326cdaed..276db8477b475a5d293c1bb003993e0c997e0682 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -63,6 +63,7 @@ The following steps are possible:
     * :ref:`DDECal` to calibrate direction dependent gains.
     * :ref:`Predict` to predict the visibilities of a given sky model.
     * :ref:`H5ParmPredict` to subtract multiple directions of visibilities corrupted by an instrument model (in H5Parm) generated by DDECal.
+    * :ref:`SagecalPredict` to use SAGECal routines for model prediction, replacement for normal Predict, H5ParmPredict and also within DDECal.
     * `ApplyBeam <ApplyBeam.html>`__ to apply the LOFAR beam model, or the inverse of it.
     * :ref:`SetBeam` to set the beam keywords after prediction.
     * :ref:`ScaleData` to scale the data with a polynomial in frequency (based on SEFD of LOFAR stations).
diff --git a/docs/schemas/SagecalPredict.yml b/docs/schemas/SagecalPredict.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a6365afbf0dabfaee63f3bd25d65feb3fd5c1885
--- /dev/null
+++ b/docs/schemas/SagecalPredict.yml
@@ -0,0 +1,6 @@
+description: >-
+  Simulate visibilities for a given sky model using sagecal routines `.` This step will only work if you have libdirac available. This can be installed from https://github.com/nlesc-dirac/sagecal `.`
+inputs:
+  type:
+    type: string
+    doc: Case-insensitive step type; must be 'sagecalpredict' for model prediction, and within DDEcal, use 'ddecal.sagecalpredict=true' to enable this `.` All options for a normal predict or h5parmpredict can be used in the parameter file. Frequency smearing is always enabled.
diff --git a/steps/DDECal.cc b/steps/DDECal.cc
index ae3d27a5f7546c776f570fbf2f5cafaa593322ff..3ed1b839c76f23440f4d63a803b6c93ddbb38e1a 100644
--- a/steps/DDECal.cc
+++ b/steps/DDECal.cc
@@ -42,6 +42,7 @@
 #include "IDGPredict.h"
 #include "MsColumnReader.h"
 #include "Predict.h"
+#include "SagecalPredict.h"
 
 using aocommon::FitsReader;
 
@@ -185,7 +186,12 @@ void DDECal::initializePredictSteps(const common::ParameterSet& parset,
   auto n_solutions_iterator = itsSettings.n_solutions_per_direction.begin();
 
   for (std::vector<std::string>& direction : directions) {
-    itsSteps.push_back(std::make_shared<Predict>(parset, prefix, direction));
+    if (itsSettings.use_sagecal_predict) {
+      itsSteps.push_back(
+          std::make_shared<SagecalPredict>(parset, prefix, direction));
+    } else {
+      itsSteps.push_back(std::make_shared<Predict>(parset, prefix, direction));
+    }
     setModelNextSteps(*itsSteps.back(), direction.front(), parset, prefix);
     itsDirectionNames.push_back(prefix + direction.front());
     itsDirections.push_back(std::move(direction));
@@ -241,6 +247,11 @@ void DDECal::updateInfo(const DPInfo& infoIn) {
                    s->GetBufferSize() / itsSteps.size() / itsRequestedSolInt);
       // We increment by one so the IDGPredict will not flush in its process
       s->SetBufferSize(itsRequestedSolInt * itsSolIntCount + 1);
+    } else if (auto s = std::dynamic_pointer_cast<SagecalPredict>(step)) {
+      // Divide the full thread count to each step
+      s->setNThreads(std::min<std::size_t>(
+          (getInfo().nThreads() + itsSteps.size() - 1) / itsSteps.size(),
+          getInfo().nThreads()));
     } else if (!std::dynamic_pointer_cast<MsColumnReader>(step)) {
       throw std::runtime_error("DDECal received an invalid first model step");
     }
diff --git a/steps/SagecalPredict.cc b/steps/SagecalPredict.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d8707dfca9c56f363068ddfc287b3024a9587833
--- /dev/null
+++ b/steps/SagecalPredict.cc
@@ -0,0 +1,1258 @@
+// Copyright (C) 2023 ASTRON (Netherlands Institute for Radio Astronomy)
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include <stddef.h>
+#include <string>
+#include <sstream>
+#include <utility>
+#include <vector>
+#include <iostream>
+
+#include "../common/ParameterSet.h"
+#include "../common/Timer.h"
+#include "../common/StreamUtil.h"
+
+#include "../parmdb/ParmDBMeta.h"
+#include "../parmdb/PatchInfo.h"
+#include "../parmdb/SkymodelToSourceDB.h"
+
+#include <dp3/base/DPInfo.h>
+#include <dp3/base/DPBuffer.h>
+#include "../base/FlagCounter.h"
+#include "../base/Simulate.h"
+#include "../base/Simulator.h"
+#include "../base/Stokes.h"
+#include "../base/PointSource.h"
+#include "../base/GaussianSource.h"
+#include "../base/Telescope.h"
+#include "../base/ComponentInfo.h"
+
+#include "../parmdb/SourceDB.h"
+#include <casacore/casa/Arrays/Cube.h>
+
+#include <casacore/tables/Tables/Table.h>
+#include <casacore/tables/Tables/TableRecord.h>
+#include <casacore/measures/Measures/MDirection.h>
+#include <casacore/measures/Measures/Precession.h>
+#include <casacore/measures/Measures/Nutation.h>
+#include <casacore/casa/Quanta.h>
+#include <casacore/casa/Quanta/Quantum.h>
+
+#include <schaapcommon/h5parm/h5parm.h>
+
+#include "SagecalPredict.h"
+
+using dp3::base::DPBuffer;
+using dp3::base::DPInfo;
+using dp3::common::operator<<;
+
+using schaapcommon::h5parm::H5Parm;
+using schaapcommon::h5parm::SolTab;
+typedef JonesParameters::CorrectType CorrectType;
+
+namespace dp3 {
+namespace steps {
+
+SagecalPredict::SagecalPredict(const common::ParameterSet& parset,
+                               const std::string& prefix, MsType input_type)
+    : ms_type_(input_type),
+      name_(prefix),
+      operation_(Operation::kReplace),
+      h5_name_(parset.getString(prefix + "applycal.parmdb", "")),
+      directions_list_(parset.getStringVector(prefix + "directions",
+                                              std::vector<std::string>())),
+      solset_name_(parset.getString(prefix + "applycal.solset", "")),
+      soltab_name_(parset.getString(prefix + "applycal.correction", "")),
+      invert_(false),
+      parm_on_disk_(!h5_name_.empty()),
+      use_amp_phase_(false),
+      sigma_mmse_(0.0),
+      interp_type_(JonesParameters::InterpolationType::NEAREST),
+      timeslots_per_parmupdate_(0),
+      timestep_(0),
+      time_interval_(-1),
+      time_last_(-1) {
+  init(parset, prefix, std::vector<std::string>());
+}
+
+SagecalPredict::SagecalPredict(const common::ParameterSet& parset,
+                               const std::string& prefix,
+                               const std::vector<std::string>& source_patterns,
+                               MsType input_type)
+    : ms_type_(input_type),
+      name_(prefix),
+      operation_(Operation::kReplace),
+      h5_name_(parset.getString(prefix + "applycal.parmdb", "")),
+      directions_list_(std::vector<std::string>()),
+      solset_name_(parset.getString(prefix + "applycal.solset", "")),
+      soltab_name_(parset.getString(prefix + "applycal.correction", "")),
+      invert_(false),
+      parm_on_disk_(!h5_name_.empty()),
+      use_amp_phase_(false),
+      sigma_mmse_(0.0),
+      interp_type_(JonesParameters::InterpolationType::NEAREST),
+      timeslots_per_parmupdate_(0),
+      timestep_(0),
+      time_interval_(-1),
+      time_last_(-1) {
+  init(parset, prefix, source_patterns);
+}
+
+SagecalPredict::~SagecalPredict() {
+#if defined(HAVE_LIBDIRAC) || defined(HAVE_LIBDIRAC_CUDA)
+
+  for (size_t patch_index = 0; patch_index < patch_list_.size();
+       ++patch_index) {
+    delete[] iodata_.cluster_arr_[patch_index].ll;
+    delete[] iodata_.cluster_arr_[patch_index].mm;
+    delete[] iodata_.cluster_arr_[patch_index].nn;
+    delete[] iodata_.cluster_arr_[patch_index].sI;
+    delete[] iodata_.cluster_arr_[patch_index].sQ;
+    delete[] iodata_.cluster_arr_[patch_index].sU;
+    delete[] iodata_.cluster_arr_[patch_index].sV;
+    delete[] iodata_.cluster_arr_[patch_index].stype;
+    // Extra data
+    for (int ci = 0; ci < iodata_.cluster_arr_[patch_index].N; ci++) {
+      if (iodata_.cluster_arr_[patch_index].stype[ci] == STYPE_GAUSSIAN) {
+        exinfo_gaussian* exg = static_cast<exinfo_gaussian*>(
+            iodata_.cluster_arr_[patch_index].ex[ci]);
+        if (exg) {
+          delete exg;
+        }
+      }
+    }
+    delete[] iodata_.cluster_arr_[patch_index].ex;
+    delete[] iodata_.cluster_arr_[patch_index].sI0;
+    delete[] iodata_.cluster_arr_[patch_index].sQ0;
+    delete[] iodata_.cluster_arr_[patch_index].sU0;
+    delete[] iodata_.cluster_arr_[patch_index].sV0;
+    delete[] iodata_.cluster_arr_[patch_index].f0;
+    delete[] iodata_.cluster_arr_[patch_index].spec_idx;
+    delete[] iodata_.cluster_arr_[patch_index].spec_idx1;
+    delete[] iodata_.cluster_arr_[patch_index].spec_idx2;
+    delete[] iodata_.cluster_arr_[patch_index].ra;
+    delete[] iodata_.cluster_arr_[patch_index].dec;
+  }
+
+  if (beam_mode_ != DOBEAM_NONE) {
+    for (size_t ci = 0; ci < beam_data_.n_stations; ci++) {
+      delete[] beam_data_.xx[ci];
+      delete[] beam_data_.yy[ci];
+      delete[] beam_data_.zz[ci];
+    }
+    if (beam_mode_ == DOBEAM_FULL || beam_mode_ == DOBEAM_ELEMENT ||
+        beam_mode_ == DOBEAM_FULL_WB || beam_mode_ == DOBEAM_ELEMENT_WB) {
+      free_elementcoeffs(beam_data_.ecoeff);
+    }
+  }
+#endif /* HAVE_LIBDIRAC || HAVE_LIBDIRAC_CUDA */
+}
+
+void SagecalPredict::SetOperation(const std::string& operation) {
+  if (operation == "replace") {
+    operation_ = Operation::kReplace;
+  } else if (operation == "add") {
+    operation_ = Operation::kAdd;
+  } else if (operation == "subtract") {
+    operation_ = Operation::kSubtract;
+  } else {
+    throw std::invalid_argument("Invalid operation " + operation);
+  }
+}
+
+unsigned int SagecalPredict::nPol(const std::string& parmName) {
+  if (!sol_tab_.HasAxis("pol")) {
+    return 1;
+  } else {
+    return sol_tab_.GetAxis("pol").size;
+  }
+}
+
+void SagecalPredict::setCorrectType(std::vector<std::string>& solTabs) {
+  if (soltab_name_ == "fulljones") {
+    if (solTabs.size() != 2) {
+      throw std::runtime_error(
+          "The soltab parameter requires two soltabs for fulljones "
+          "correction (amplitude and phase)");
+    }
+    sol_tab_ = h5_parm_.GetSolTab(solTabs[0]);
+    sol_tab2_ = h5_parm_.GetSolTab(solTabs[1]);
+    soltab_name_ = solTabs[0] + "," + solTabs[1];
+    corr_type_ = CorrectType::FULLJONES;
+  } else if (soltab_name_ == "gain") {
+    sol_tab_ = h5_parm_.GetSolTab(solTabs[0]);
+    if (solTabs.size() == 2) {
+      sol_tab2_ = h5_parm_.GetSolTab(solTabs[1]);
+      soltab_name_ = solTabs[0] + "," + solTabs[1];
+      corr_type_ = CorrectType::GAIN;
+    } else {
+      soltab_name_ = solTabs[0];
+      corr_type_ = JonesParameters::StringToCorrectType(sol_tab_.GetType());
+    }
+  } else {
+    sol_tab_ = h5_parm_.GetSolTab(soltab_name_);
+    corr_type_ = JonesParameters::StringToCorrectType(sol_tab_.GetType());
+  }
+  if (corr_type_ == CorrectType::PHASE && nPol("") == 1) {
+    corr_type_ = CorrectType::SCALARPHASE;
+  }
+  if (corr_type_ == CorrectType::AMPLITUDE && nPol("") == 1) {
+    corr_type_ = CorrectType::SCALARAMPLITUDE;
+  }
+}
+
+void SagecalPredict::init(
+    const common::ParameterSet& parset, const std::string& prefix,
+    const std::vector<std::string>& given_source_patterns) {
+  if (ms_type_ == MsType::kBda) {
+    throw std::runtime_error("Does not support BDA data");
+  }
+  // Override 'applycal.correction' if substeps like 'applycal.steps=[step1,..]'
+  // and 'applycal.step1.correction=.. , applycal.step2.correction=..
+  // are defined.
+  // Try to find 'applycal.stepname.correction'
+  // where 'stepname' given as 'applycal.steps=[*]'
+  if (parset.isDefined(prefix + "applycal.steps")) {
+    common::ParameterValue step_names(
+        parset.getString(prefix + "applycal.steps"));
+    std::vector<std::string> substep_names;
+    if (step_names.isVector()) {
+      substep_names = step_names.getStringVector();
+    } else {
+      substep_names.push_back(step_names.getString());
+    }
+    // We can only handle one or two substeps (amplitude and/or phase)
+    if (substep_names.size() > 2) {
+      throw std::runtime_error(
+          "Only one or two types of gain application can be done at a time"
+          "applycal.steps should specify only one or two steps");
+    }
+    // update soltab_names_ from substeps
+    if (!substep_names.empty()) {
+      for (auto substep_name : substep_names) {
+        if (!substep_name.empty()) {
+          auto substep_soltab_name = parset.getString(
+              prefix + "applycal." + substep_name + ".correction");
+          soltab_names_.push_back(substep_soltab_name);
+        }
+      }
+      // update soltab_name_
+      soltab_name_ = "gain";
+    }
+  }
+
+  std::vector<std::string> source_patterns;
+  // Order of precedence for getting the sources list:
+  // 1) parset.directions key, 2) h5parm directions, 3) parset.sources key
+  // withing DDECal, 3) will be invoked because source_patterns in the
+  // constructor
+  if (!given_source_patterns.empty()) {
+    source_patterns = given_source_patterns;
+  } else {
+    source_patterns =
+        parset.getStringVector(prefix + "sources", std::vector<std::string>());
+  }
+  if (!source_patterns.empty() && !directions_list_.empty()) {
+    throw std::runtime_error(
+        "Both sources and directions specified, use only one of them");
+  }
+
+  // Try to get HDF5 file if any
+  if (!parm_on_disk_ && !directions_list_.empty()) {
+    throw std::runtime_error(
+        "H5parm name not specified but directions specified, give both of "
+        "them");
+  }
+  std::vector<std::string> h5directions;
+  if (parm_on_disk_) {
+    h5_parm_ = H5Parm(h5_name_, false);
+    // Check to see soltab is initialized at the constructor
+    if (soltab_name_.empty()) {
+      soltab_name_ = parset.getString(prefix + "applycal.correction");
+    }
+    // soltab_names_ will be already updated if 'substeps' are defined
+    if (soltab_names_.empty()) {
+      soltab_names_ = parset.getStringVector(
+          prefix + "applycal.soltab",
+          std::vector<std::string>{"amplitude000", "phase000"});
+    }
+    setCorrectType(soltab_names_);
+    h5directions = sol_tab_.GetStringAxis("dir");
+    if (h5directions.empty())
+      throw std::runtime_error("H5Parm has empty dir axis");
+    // Also do sanity check for time,freq axes
+    if (sol_tab_.HasAxis("freq") && sol_tab_.HasAxis("time") &&
+        sol_tab_.GetAxisIndex("freq") < sol_tab_.GetAxisIndex("time"))
+      throw std::runtime_error("H5Parm fastest varying axis should be freq");
+  }
+  missing_ant_behavior_ = JonesParameters::StringToMissingAntennaBehavior(
+      parset.getString(prefix + "applycal.missingantennabehavior", "error"));
+
+  if (!h5directions.empty() && directions_list_.empty()) {
+    directions_list_ = h5directions;
+  }
+
+  if (!directions_list_.empty() && source_patterns.empty()) {
+    // Build source patterns from the given directions list
+    // only if source_patterns is empty
+    for (size_t ci = 0; ci < directions_list_.size(); ci++) {
+      std::string dir = directions_list_[ci];
+      std::vector<std::string> dir_vec;
+      if (dir.size() <= 2 || dir[0] != '[' || dir[dir.size() - 1] != ']')
+        throw std::runtime_error(
+            "Invalid direction string: expecting array between square "
+            "brackets, "
+            "e.g. [a, b, c, ...]");
+      dir_vec =
+          common::stringtools::tokenize(dir.substr(1, dir.size() - 2), ",");
+      for (auto source : dir_vec) {
+        // only include source if it is not already in the source_patterns
+        if (find(source_patterns.begin(), source_patterns.end(), source) ==
+            source_patterns.end()) {
+          source_patterns.push_back(source);
+        }
+      }
+    }
+  }
+
+  // Reconcile source patterns and h5parm directions, if h5parm is given
+  if (parm_on_disk_) {
+    for (auto dir : source_patterns) {
+      if (find(h5directions.begin(), h5directions.end(), "[" + dir + "]") ==
+          h5directions.end()) {
+        throw std::runtime_error("Direction " + dir + " not found in " +
+                                 h5_name_);
+      }
+    }
+  }
+
+  name_ = prefix;
+  SetOperation(parset.getString(prefix + "operation", "replace"));
+  source_db_name_ = parset.getString(prefix + "sourcedb");
+
+  patch_list_.clear();
+
+  base::SourceDB source_db{source_db_name_, source_patterns,
+                           base::SourceDB::FilterMode::kPattern};
+
+  try {
+    patch_list_ = source_db.MakePatchList();
+    if (patch_list_.empty()) {
+      std::stringstream ss;
+      ss << source_patterns;
+      throw std::runtime_error("Couldn't find patch for directions " +
+                               ss.str());
+    }
+  } catch (std::exception& exception) {
+    throw std::runtime_error(std::string("Something went wrong while reading "
+                                         "the source model. The error was: ") +
+                             exception.what());
+  }
+
+  source_list_ = makeSourceList(patch_list_);
+  any_orientation_is_absolute_ = source_db.CheckAnyOrientationIsAbsolute();
+#if defined(HAVE_LIBDIRAC) || defined(HAVE_LIBDIRAC_CUDA)
+  const bool apply_beam = parset.getBool(prefix + "usebeammodel", false);
+  const bool use_channel_freq = parset.getBool(prefix + "usechannelfreq", true);
+  beam_mode_ = DOBEAM_NONE;
+  if (apply_beam) {
+    const std::string& beam_mode_string =
+        parset.getString(prefix + "beammode", "full");
+    if (beam_mode_string == "full" || beam_mode_string == "default") {
+      beam_mode_ = use_channel_freq ? DOBEAM_FULL_WB : DOBEAM_FULL;
+    } else if (beam_mode_string == "arrayfactor" ||
+               beam_mode_string == "array_factor") {
+      beam_mode_ = use_channel_freq ? DOBEAM_ARRAY_WB : DOBEAM_ARRAY;
+    } else if (beam_mode_string == "element") {
+      beam_mode_ = use_channel_freq ? DOBEAM_ELEMENT_WB : DOBEAM_ELEMENT;
+    }
+  }
+  if (parm_on_disk_) {
+    ignore_list_.resize(patch_list_.size());
+    memset(ignore_list_.data(), 0, sizeof(int) * patch_list_.size());
+  }
+#endif /* HAVE_LIBDIRAC || HAVE_LIBDIRAC_CUDA */
+}
+
+void SagecalPredict::updateFromH5(const double startTime) {
+  time_last_ = startTime + time_interval_ * (-0.5 + timeslots_per_parmupdate_);
+
+  const double lastMSTime =
+      info().startTime() + info().ntime() * time_interval_;
+  if (time_last_ > lastMSTime &&
+      !casacore::nearAbs(time_last_, lastMSTime, 1e-3)) {
+    time_last_ = lastMSTime;
+  }
+
+  std::vector<double> times(info().ntime());
+#pragma GCC ivdep
+  for (size_t t = 0; t < times.size(); ++t) {
+    times[t] = info().startTime() + (t + 0.5) * info().timeInterval();
+  }
+
+  const size_t n_chan = info().chanFreqs().size();
+  const size_t n_dir = directions_list_.size();
+
+  size_t dir_index = 0;
+  for (auto dir : directions_list_) {
+    hsize_t direction_index = sol_tab_.GetDirIndex(dir);
+    std::unique_ptr<JonesParameters> Jones_params_ =
+        std::make_unique<JonesParameters>(
+            info().chanFreqs(), times, info().antennaNames(), corr_type_,
+            interp_type_, direction_index, &sol_tab_, &sol_tab2_, invert_,
+            sigma_mmse_, parm_expressions_.size(), missing_ant_behavior_);
+
+    // shape : ncorr x n_stations x ntime*nfreq (ncorr:2,4)
+    const casacore::Cube<casacore::Complex>& gains = Jones_params_->GetParms();
+    const size_t n_corr = gains.shape()[0];
+    const size_t n_stat = gains.shape()[1];
+    const size_t n_timefreq = gains.shape()[2];
+    const size_t n_time = n_timefreq / n_chan;
+
+    if (!(n_corr == 2 || n_corr == 4)) {
+      throw std::runtime_error("H5 solutions correlations has to be 2 or 4.");
+    }
+
+    // If not allocated, setup memory
+    if (params_.size() == 0) {
+      // storage: 8*n_dir*n_stat*n_time*n_chan
+      params_.resize(8 * n_dir * n_stat * n_time * n_chan);
+      memset(params_.data(), 0,
+             sizeof(double) * 8 * n_dir * n_stat * n_time * n_chan);
+    } else if (!dir_index) {  // if already allocated, reset at the first dir
+      memset(params_.data(), 0,
+             sizeof(double) * 8 * n_dir * n_stat * n_time * n_chan);
+    }
+
+    // Copy to params
+    for (size_t chan = 0; chan < n_chan; chan++) {
+      for (size_t t = 0; t < n_time; t++) {
+        const size_t time_freq_offset = t * n_chan + chan;
+        if (n_corr == 2) {
+#pragma GCC ivdep
+          for (size_t ant = 0; ant < n_stat; ant++) {
+            const casacore::Complex* xx = &gains(0, ant, time_freq_offset);
+            const casacore::Complex* yy = &gains(1, ant, time_freq_offset);
+            params_[chan * 8 * n_dir * n_stat * n_time +
+                    t * 8 * n_dir * n_stat + dir_index * 8 * n_stat + 8 * ant] =
+                xx->real();
+            params_[chan * 8 * n_dir * n_stat * n_time +
+                    t * 8 * n_dir * n_stat + dir_index * 8 * n_stat + 8 * ant +
+                    1] = xx->imag();
+            params_[chan * 8 * n_dir * n_stat * n_time +
+                    t * 8 * n_dir * n_stat + dir_index * 8 * n_stat + 8 * ant +
+                    6] = yy->real();
+            params_[chan * 8 * n_dir * n_stat * n_time +
+                    t * 8 * n_dir * n_stat + dir_index * 8 * n_stat + 8 * ant +
+                    7] = yy->imag();
+          }
+        } else if (n_corr == 4) {
+#pragma GCC ivdep
+          for (size_t ant = 0; ant < n_stat; ant++) {
+            const casacore::Complex* xx = &gains(0, ant, time_freq_offset);
+            const casacore::Complex* xy = &gains(1, ant, time_freq_offset);
+            const casacore::Complex* yx = &gains(2, ant, time_freq_offset);
+            const casacore::Complex* yy = &gains(3, ant, time_freq_offset);
+            params_[chan * 8 * n_dir * n_stat * n_time +
+                    t * 8 * n_dir * n_stat + dir_index * 8 * n_stat + 8 * ant] =
+                xx->real();
+            params_[chan * 8 * n_dir * n_stat * n_time +
+                    t * 8 * n_dir * n_stat + dir_index * 8 * n_stat + 8 * ant +
+                    1] = xx->imag();
+            params_[chan * 8 * n_dir * n_stat * n_time +
+                    t * 8 * n_dir * n_stat + dir_index * 8 * n_stat + 8 * ant +
+                    2] = xy->real();
+            params_[chan * 8 * n_dir * n_stat * n_time +
+                    t * 8 * n_dir * n_stat + dir_index * 8 * n_stat + 8 * ant +
+                    3] = xy->imag();
+            params_[chan * 8 * n_dir * n_stat * n_time +
+                    t * 8 * n_dir * n_stat + dir_index * 8 * n_stat + 8 * ant +
+                    4] = yx->real();
+            params_[chan * 8 * n_dir * n_stat * n_time +
+                    t * 8 * n_dir * n_stat + dir_index * 8 * n_stat + 8 * ant +
+                    5] = yx->imag();
+            params_[chan * 8 * n_dir * n_stat * n_time +
+                    t * 8 * n_dir * n_stat + dir_index * 8 * n_stat + 8 * ant +
+                    6] = yy->real();
+            params_[chan * 8 * n_dir * n_stat * n_time +
+                    t * 8 * n_dir * n_stat + dir_index * 8 * n_stat + 8 * ant +
+                    7] = yy->imag();
+          }
+        }
+      }
+    }
+    dir_index++;
+  }
+}
+
+bool SagecalPredict::process(std::unique_ptr<DPBuffer> buffer) {
+  timer_.start();
+  // Determine the various sizes.
+  const size_t nDr = patch_list_.size();
+  const size_t nBl = info().nbaselines();
+  const size_t nCh = info().nchan();
+  const size_t nCr = info().ncorr();
+
+  buffer->ResizeData({nBl, nCh, nCr});
+  buffer->MakeIndependent(kDataField);
+
+  if (parm_on_disk_ && (buffer->getTime() > time_last_)) {
+    timestep_ = 0;
+    // Update solutions (also aux info such as time_last_ ...)
+    updateFromH5(buffer->getTime());
+  } else {
+    timestep_++;
+  }
+#if defined(HAVE_LIBDIRAC) || defined(HAVE_LIBDIRAC_CUDA)
+  loadData(buffer);
+
+  // update time
+  if (beam_mode_ != DOBEAM_NONE) {
+    beam_data_.time_utc[0] = (buffer->getTime() / 86400.0 + 2400000.5);
+    // precess source positions
+    if (!beam_data_.sources_prec) {
+      casacore::Precession prec(casacore::Precession::IAU2000);
+      casacore::RotMatrix rotat_prec(prec(beam_data_.time_utc[0] - 2400000.5));
+      casacore::Nutation nut(casacore::Nutation::IAU2000);
+      casacore::RotMatrix rotat_nut(nut(beam_data_.time_utc[0] - 2400000.5));
+
+      casacore::RotMatrix rotat = rotat_prec * rotat_nut;
+      rotat.transpose();
+      for (size_t cl = 0; cl < nDr; cl++) {
+#pragma GCC ivdep
+        for (int ci = 0; ci < iodata_.cluster_arr_[cl].N; ci++) {
+          casacore::MVDirection pos(
+              casacore::Quantity(iodata_.cluster_arr_[cl].ra[ci], "rad"),
+              casacore::Quantity(iodata_.cluster_arr_[cl].dec[ci], "rad"));
+          casacore::MVDirection newdir = rotat * pos;
+          iodata_.cluster_arr_[cl].ra[ci] = newdir.get()[0];
+          iodata_.cluster_arr_[cl].dec[ci] = newdir.get()[1];
+        }
+      }
+      casacore::MVDirection pos(casacore::Quantity(beam_data_.p_ra0, "rad"),
+                                casacore::Quantity(beam_data_.p_dec0, "rad"));
+      casacore::MVDirection newdir = rotat * pos;
+      beam_data_.p_ra0 = newdir.get()[0];
+      beam_data_.p_dec0 = newdir.get()[1];
+      beam_data_.sources_prec = true;
+    }
+  }
+
+  int operation = SIMUL_ONLY;
+  if (operation_ == Operation::kAdd) {
+    operation = SIMUL_ADD;
+  } else if (operation_ == Operation::kSubtract) {
+    operation = SIMUL_SUB;
+  }
+  const int tile_size = 1;
+  const double time_smear_factor = 1.0;
+#ifdef HAVE_LIBDIRAC /* mutually exclusive with HAVE_LIBDIRAC_CUDA */
+  if (!parm_on_disk_) {
+    if (beam_mode_ == DOBEAM_NONE) {
+      predict_visibilities_multifreq(
+          iodata_.u_.data(), iodata_.v_.data(), iodata_.w_.data(),
+          iodata_.data_.data(), iodata_.n_stations, iodata_.n_baselines,
+          tile_size, iodata_.baseline_arr_.data(), iodata_.cluster_arr_.data(),
+          nDr, iodata_.freqs_.data(), iodata_.n_channels, iodata_.fdelta,
+          time_smear_factor, phase_ref_.dec, n_threads_, operation);
+    } else {
+      predict_visibilities_multifreq_withbeam(
+          iodata_.u_.data(), iodata_.v_.data(), iodata_.w_.data(),
+          iodata_.data_.data(), iodata_.n_stations, iodata_.n_baselines,
+          tile_size, iodata_.baseline_arr_.data(), iodata_.cluster_arr_.data(),
+          nDr, iodata_.freqs_.data(), iodata_.n_channels, iodata_.fdelta,
+          time_smear_factor, phase_ref_.dec, beam_data_.p_ra0,
+          beam_data_.p_dec0, iodata_.f0, beam_data_.sx.data(),
+          beam_data_.sy.data(), beam_data_.time_utc.data(),
+          beam_data_.n_elem.data(), beam_data_.xx.data(), beam_data_.yy.data(),
+          beam_data_.zz.data(), &beam_data_.ecoeff, beam_mode_, n_threads_,
+          operation);
+    }
+  } else {
+    if (beam_mode_ == DOBEAM_NONE) {
+      predict_visibilities_multifreq_withsol(
+          iodata_.u_.data(), iodata_.v_.data(), iodata_.w_.data(),
+          &params_[timestep_ * 8 * nDr * nCh * info().nantenna()],
+          iodata_.data_.data(), ignore_list_.data(), iodata_.n_stations,
+          iodata_.n_baselines, tile_size, iodata_.baseline_arr_.data(),
+          iodata_.cluster_arr_.data(), nDr, iodata_.freqs_.data(),
+          iodata_.n_channels, iodata_.fdelta, time_smear_factor, phase_ref_.dec,
+          n_threads_, operation, -1, 0.0, false);
+
+    } else {
+      predict_visibilities_multifreq_withsol_withbeam(
+          iodata_.u_.data(), iodata_.v_.data(), iodata_.w_.data(),
+          &params_[timestep_ * 8 * nDr * nCh * info().nantenna()],
+          iodata_.data_.data(), ignore_list_.data(), iodata_.n_stations,
+          iodata_.n_baselines, tile_size, iodata_.baseline_arr_.data(),
+          iodata_.cluster_arr_.data(), nDr, iodata_.freqs_.data(),
+          iodata_.n_channels, iodata_.fdelta, time_smear_factor, phase_ref_.dec,
+          beam_data_.p_ra0, beam_data_.p_dec0, iodata_.f0, beam_data_.sx.data(),
+          beam_data_.sy.data(), beam_data_.time_utc.data(),
+          beam_data_.n_elem.data(), beam_data_.xx.data(), beam_data_.yy.data(),
+          beam_data_.zz.data(), &beam_data_.ecoeff, beam_mode_, n_threads_,
+          operation, -1, 0.0, false);
+    }
+  }
+#endif                    /* HAVE_LIBDIRAC */
+#ifdef HAVE_LIBDIRAC_CUDA /* mutually exclusive with HAVE_LIBDIRAC */
+  if (!parm_on_disk_) {
+    predict_visibilities_multifreq_withbeam_gpu(
+        iodata_.u_.data(), iodata_.v_.data(), iodata_.w_.data(),
+        iodata_.data_.data(), iodata_.n_stations, iodata_.n_baselines,
+        tile_size, iodata_.baseline_arr_.data(), iodata_.cluster_arr_.data(),
+        nDr, iodata_.freqs_.data(), iodata_.n_channels, iodata_.fdelta,
+        time_smear_factor, phase_ref_.dec, beam_data_.p_ra0, beam_data_.p_dec0,
+        iodata_.f0, beam_data_.sx.data(), beam_data_.sy.data(),
+        beam_data_.time_utc.data(), beam_data_.n_elem.data(),
+        beam_data_.xx.data(), beam_data_.yy.data(), beam_data_.zz.data(),
+        &beam_data_.ecoeff, beam_mode_, n_threads_, operation);
+  } else {
+    predict_visibilities_withsol_withbeam_gpu(
+        iodata_.u_.data(), iodata_.v_.data(), iodata_.w_.data(),
+        &params_[timestep_ * 8 * nDr * nCh * info().nantenna()],
+        iodata_.data_.data(), ignore_list_.data(), iodata_.n_stations,
+        iodata_.n_baselines, tile_size, iodata_.baseline_arr_.data(),
+        iodata_.cluster_arr_.data(), nDr, iodata_.freqs_.data(),
+        iodata_.n_channels, iodata_.fdelta, time_smear_factor, phase_ref_.dec,
+        beam_data_.p_ra0, beam_data_.p_dec0, iodata_.f0, beam_data_.sx.data(),
+        beam_data_.sy.data(), beam_data_.time_utc.data(),
+        beam_data_.n_elem.data(), beam_data_.xx.data(), beam_data_.yy.data(),
+        beam_data_.zz.data(), &beam_data_.ecoeff, beam_mode_, n_threads_,
+        operation, -1, 0.0, false);
+  }
+#endif /* HAVE_LIBDIRAC_CUDA */
+  writeData(buffer);
+#endif /* HAVE_LIBDIRAC || HAVE_LIBDIRAC_CUDA */
+
+  timer_.stop();
+  getNextStep()->process(std::move(buffer));
+
+  return false;
+}
+
+void SagecalPredict::updateInfo(const DPInfo& _info) {
+  const size_t n_directions = patch_list_.size();
+  const size_t n_stations = _info.nantenna();
+  const size_t n_given_baselines = _info.nbaselines();
+  const size_t n_channels = _info.nchan();
+  const size_t n_correlations = _info.ncorr();
+
+  if (n_correlations != 4) {
+    throw std::invalid_argument("Can only predict with all 4 correlations.");
+  }
+
+  if (n_threads_ == 0) {
+    setNThreads(_info.nThreads());
+  }
+
+  // Some data will not have autocorrelations, in that case
+  // n_baselines = n_stations*(n_stations-1)/2, otherwise
+  // n_baselines = n_stations*(n_stations-1)/2+n_stations
+  // we only need baselines without autocorrelations
+  size_t n_baselines = n_given_baselines;
+  if (n_given_baselines == n_stations * (n_stations - 1) / 2 + n_stations) {
+    n_baselines = n_stations * (n_stations - 1) / 2;
+  }
+
+  Step::updateInfo(_info);
+  try {
+    casacore::MDirection dirJ2000(casacore::MDirection::Convert(
+        _info.phaseCenter(), casacore::MDirection::J2000)());
+    casacore::Quantum<casacore::Vector<double>> angles = dirJ2000.getAngle();
+    // casacore::Quantum<casacore::Vector<double>> angles =
+    // _info.phaseCenter().getAngle();
+    phase_ref_ =
+        base::Direction(angles.getBaseValue()[0], angles.getBaseValue()[1]);
+  } catch (casacore::AipsError&) {
+    throw std::runtime_error("Casacore error.");
+  }
+
+#if defined(HAVE_LIBDIRAC) || defined(HAVE_LIBDIRAC_CUDA)
+  try {
+    iodata_.cluster_arr_.resize(n_directions);
+    iodata_.baseline_arr_.resize(n_baselines);
+    iodata_.data_.resize(n_baselines * n_correlations * n_channels * 2);
+    iodata_.u_.resize(n_baselines);
+    iodata_.v_.resize(n_baselines);
+    iodata_.w_.resize(n_baselines);
+    iodata_.freqs_.resize(n_channels);
+    iodata_.n_baselines = n_baselines;
+    iodata_.n_stations = n_stations;
+    iodata_.n_channels = n_channels;
+  } catch (const std::bad_alloc& e) {
+    throw std::runtime_error("Allocating memory failure");
+  }
+
+  for (size_t patch_index = 0; patch_index < n_directions; ++patch_index) {
+    // Count sources of this patch
+    size_t n_sources =
+        std::count_if(source_list_.begin(), source_list_.end(),
+                      [&](const std::pair<std::shared_ptr<base::ModelComponent>,
+                                          std::shared_ptr<base::Patch>>& item) {
+                        return item.second == patch_list_[patch_index];
+                      });
+
+    iodata_.cluster_arr_[patch_index].id = patch_index;
+    iodata_.cluster_arr_[patch_index].nchunk = 1;
+    iodata_.cluster_arr_[patch_index].N = n_sources;
+    try {
+      iodata_.cluster_arr_[patch_index].ll = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].mm = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].nn = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].sI = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].sQ = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].sU = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].sV = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].stype = new unsigned char[n_sources];
+      // Extra data (stored as a generic pointer)
+      iodata_.cluster_arr_[patch_index].ex = new void*[n_sources];
+      iodata_.cluster_arr_[patch_index].sI0 = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].sQ0 = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].sU0 = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].sV0 = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].f0 = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].spec_idx = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].spec_idx1 = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].spec_idx2 = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].ra = new double[n_sources];
+      iodata_.cluster_arr_[patch_index].dec = new double[n_sources];
+
+      iodata_.cluster_arr_[patch_index].p =
+          new int[iodata_.cluster_arr_[patch_index].nchunk];
+    } catch (const std::bad_alloc& e) {
+      throw std::runtime_error("Allocating memory failure");
+    }
+
+    dp3::base::ComponentInfo component_info;
+    size_t src_index = 0;
+    for (size_t scan_index = 0; scan_index < source_list_.size();
+         ++scan_index) {
+      if (source_list_[scan_index].second == patch_list_[patch_index]) {
+        auto source = source_list_[scan_index].first;
+        component_info.inspect(source);
+        // Insert this source
+        iodata_.cluster_arr_[patch_index].ra[src_index] = component_info.ra_;
+        iodata_.cluster_arr_[patch_index].dec[src_index] = component_info.dec_;
+        iodata_.cluster_arr_[patch_index].sI[src_index] = component_info.sI_;
+        iodata_.cluster_arr_[patch_index].sQ[src_index] = component_info.sQ_;
+        iodata_.cluster_arr_[patch_index].sU[src_index] = component_info.sU_;
+        iodata_.cluster_arr_[patch_index].sV[src_index] = component_info.sV_;
+        iodata_.cluster_arr_[patch_index].sI0[src_index] = component_info.sI_;
+        iodata_.cluster_arr_[patch_index].sQ0[src_index] = component_info.sQ_;
+        iodata_.cluster_arr_[patch_index].sU0[src_index] = component_info.sU_;
+        iodata_.cluster_arr_[patch_index].sV0[src_index] = component_info.sV_;
+        iodata_.cluster_arr_[patch_index].stype[src_index] =
+            (component_info.source_type_ == dp3::base::ComponentInfo::kPoint
+                 ? STYPE_POINT
+                 : STYPE_GAUSSIAN);
+
+        iodata_.cluster_arr_[patch_index].spec_idx[src_index] =
+            component_info.spectrum_[0];
+        iodata_.cluster_arr_[patch_index].spec_idx1[src_index] =
+            component_info.spectrum_[1] / log(10.0);
+        iodata_.cluster_arr_[patch_index].spec_idx2[src_index] =
+            component_info.spectrum_[2] / (log(10.0) * log(10.0));
+        iodata_.cluster_arr_[patch_index].f0[src_index] = component_info.f0_;
+        if (iodata_.cluster_arr_[patch_index].stype[src_index] ==
+            STYPE_GAUSSIAN) {
+          exinfo_gaussian* exg = new exinfo_gaussian;
+          // The following will be updated later
+          exg->eX = component_info.g_major_;
+          exg->eY = component_info.g_minor_;
+          exg->eP = component_info.g_pa_;
+          iodata_.cluster_arr_[patch_index].ex[src_index] =
+              static_cast<void*>(exg);
+        } else {
+          iodata_.cluster_arr_[patch_index].ex[src_index] = nullptr;
+        }
+        src_index++;
+      }
+    }
+    assert(src_index == n_sources);
+  }
+  const std::vector<double>& freqs = info().chanFreqs();
+  const std::vector<double>& widths = info().chanWidths();
+  iodata_.f0 = 0.0;
+  iodata_.fdelta = 0.0;
+  for (size_t ch = 0; ch < n_channels; ch++) {
+    iodata_.freqs_[ch] = freqs[ch];
+    iodata_.f0 += freqs[ch];
+    iodata_.fdelta += widths[ch];
+  }
+  iodata_.f0 /= (double)n_channels;
+
+  int param_offset = 0;
+  for (size_t patch_index = 0; patch_index < n_directions; ++patch_index) {
+    for (int chunk = 0; chunk < iodata_.cluster_arr_[patch_index].nchunk;
+         chunk++) {
+      iodata_.cluster_arr_[patch_index].p[chunk] =
+          8 * param_offset * iodata_.n_stations;
+      param_offset++;
+    }
+  }
+#endif /* HAVE_LIBDIRAC || HAVE_LIBDIRAC_CUDA */
+
+#if defined(HAVE_LIBDIRAC) || defined(HAVE_LIBDIRAC_CUDA)
+  // Update source positions
+  for (size_t cl = 0; cl < n_directions; cl++) {
+#pragma GCC ivdep
+    for (int ci = 0; ci < iodata_.cluster_arr_[cl].N; ci++) {
+      const double c_dec = cos(iodata_.cluster_arr_[cl].dec[ci]);
+      const double s_dec = sin(iodata_.cluster_arr_[cl].dec[ci]);
+      const double c_dec0 = cos(phase_ref_.dec);
+      const double s_dec0 = sin(phase_ref_.dec);
+      const double c_radiff =
+          cos(iodata_.cluster_arr_[cl].ra[ci] - phase_ref_.ra);
+      const double s_radiff =
+          sin(iodata_.cluster_arr_[cl].ra[ci] - phase_ref_.ra);
+      iodata_.cluster_arr_[cl].ll[ci] = c_dec * s_radiff;
+      iodata_.cluster_arr_[cl].mm[ci] =
+          s_dec * c_dec0 - c_dec * s_dec0 * c_radiff;
+      iodata_.cluster_arr_[cl].nn[ci] =
+          sqrt(1.0 -
+               iodata_.cluster_arr_[cl].ll[ci] *
+                   iodata_.cluster_arr_[cl].ll[ci] -
+               iodata_.cluster_arr_[cl].mm[ci] *
+                   iodata_.cluster_arr_[cl].mm[ci]) -
+          1.0;
+      if (iodata_.cluster_arr_[cl].stype[ci] == STYPE_GAUSSIAN) {
+        exinfo_gaussian* exg =
+            static_cast<exinfo_gaussian*>(iodata_.cluster_arr_[cl].ex[ci]);
+        exg->eX /= (2.0 * (sqrt(2.0 * log(2.0))));
+        exg->eY /= (2.0 * (sqrt(2.0 * log(2.0))));
+        exg->eP += M_PI_2;
+        if (any_orientation_is_absolute_) {
+          const double phi = acos(iodata_.cluster_arr_[cl].nn[ci] + 1.0);
+          const double xi = atan2(-iodata_.cluster_arr_[cl].ll[ci],
+                                  iodata_.cluster_arr_[cl].mm[ci]);
+          /* negate angles */
+          exg->cxi = cos(xi);
+          exg->sxi = sin(-xi);
+          exg->cphi = cos(phi);
+          exg->sphi = sin(-phi);
+          if (iodata_.cluster_arr_[cl].nn[ci] + 1.0 < PROJ_CUT) {
+            /* only then consider projection */
+            exg->use_projection = 1;
+          } else {
+            exg->use_projection = 0;
+          }
+        } else {
+          exg->use_projection = 0;
+        }
+      }
+    }
+  }
+
+  // If beam calculation is enabled
+  if (beam_mode_ != DOBEAM_NONE) {
+    readAuxData(_info);
+  }
+#endif /* HAVE_LIBDIRAC || HAVE_LIBDIRAC_CUDA */
+
+  // Setup reading the H5 solutions
+  if (parm_on_disk_) {
+    time_interval_ = _info.timeInterval();
+    // Read in solutions for all timeslots
+    timeslots_per_parmupdate_ = info().ntime();
+    if (corr_type_ == CorrectType::GAIN ||
+        corr_type_ == CorrectType::FULLJONES) {
+      use_amp_phase_ = true;
+    }
+    if (corr_type_ == CorrectType::GAIN) {
+      if (use_amp_phase_) {
+        parm_expressions_.push_back("Gain:0:0:Ampl");
+        parm_expressions_.push_back("Gain:0:0:Phase");
+        parm_expressions_.push_back("Gain:1:1:Ampl");
+        parm_expressions_.push_back("Gain:1:1:Phase");
+      } else {
+        parm_expressions_.push_back("Gain:0:0:Real");
+        parm_expressions_.push_back("Gain:0:0:Imag");
+        parm_expressions_.push_back("Gain:1:1:Real");
+        parm_expressions_.push_back("Gain:1:1:Imag");
+      }
+    } else if (corr_type_ == CorrectType::FULLJONES) {
+      if (use_amp_phase_) {
+        parm_expressions_.push_back("Gain:0:0:Ampl");
+        parm_expressions_.push_back("Gain:0:0:Phase");
+        parm_expressions_.push_back("Gain:0:1:Ampl");
+        parm_expressions_.push_back("Gain:0:1:Phase");
+        parm_expressions_.push_back("Gain:1:0:Ampl");
+        parm_expressions_.push_back("Gain:1:0:Phase");
+        parm_expressions_.push_back("Gain:1:1:Ampl");
+        parm_expressions_.push_back("Gain:1:1:Phase");
+      } else {
+        parm_expressions_.push_back("Gain:0:0:Real");
+        parm_expressions_.push_back("Gain:0:0:Imag");
+        parm_expressions_.push_back("Gain:0:1:Real");
+        parm_expressions_.push_back("Gain:0:1:Imag");
+        parm_expressions_.push_back("Gain:1:0:Real");
+        parm_expressions_.push_back("Gain:1:0:Imag");
+        parm_expressions_.push_back("Gain:1:1:Real");
+        parm_expressions_.push_back("Gain:1:1:Imag");
+      }
+    } else if (corr_type_ == CorrectType::TEC) {
+      if (nPol("TEC") == 1) {
+        parm_expressions_.push_back("TEC");
+      } else {
+        parm_expressions_.push_back("TEC:0");
+        parm_expressions_.push_back("TEC:1");
+      }
+    } else if (corr_type_ == CorrectType::CLOCK) {
+      if (nPol("Clock") == 1) {
+        parm_expressions_.push_back("Clock");
+      } else {
+        parm_expressions_.push_back("Clock:0");
+        parm_expressions_.push_back("Clock:1");
+      }
+    } else if (corr_type_ == CorrectType::ROTATIONANGLE) {
+      parm_expressions_.push_back("{Common,}RotationAngle");
+    } else if (corr_type_ == CorrectType::SCALARPHASE) {
+      parm_expressions_.push_back("{Common,}ScalarPhase");
+    } else if (corr_type_ == CorrectType::ROTATIONMEASURE) {
+      parm_expressions_.push_back("RotationMeasure");
+    } else if (corr_type_ == CorrectType::SCALARAMPLITUDE) {
+      parm_expressions_.push_back("{Common,}ScalarAmplitude");
+    } else if (corr_type_ == CorrectType::PHASE) {
+      parm_expressions_.push_back("Phase:0");
+      parm_expressions_.push_back("Phase:1");
+    } else if (corr_type_ == CorrectType::AMPLITUDE) {
+      parm_expressions_.push_back("Amplitude:0");
+      parm_expressions_.push_back("Amplitude:1");
+    } else {
+      throw std::runtime_error(
+          "Correction type " +
+          JonesParameters::CorrectTypeToString(corr_type_) + " unknown");
+    }
+  }
+}
+
+void SagecalPredict::finish() { getNextStep()->finish(); }
+
+void SagecalPredict::show(std::ostream& os) const {
+#ifdef HAVE_LIBDIRAC
+  os << "SagecalPredict " << name_ << '\n';
+#endif
+#ifdef HAVE_LIBDIRAC_CUDA
+  os << "SagecalPredict (GPU) " << name_ << '\n';
+#endif
+#if defined(HAVE_LIBDIRAC) || defined(HAVE_LIBDIRAC_CUDA)
+  os << "  sourcedb:                " << source_db_name_ << '\n';
+  os << "   number of directions:      " << patch_list_.size() << '\n';
+  os << "   number of components:   " << source_list_.size() << '\n';
+  os << "   correct freq smearing:  " << std::boolalpha << true << '\n';
+  os << "  apply beam:              ";
+  switch (beam_mode_) {
+    case DOBEAM_NONE:
+      os << std::boolalpha << false << '\n';
+      break;
+    case DOBEAM_FULL:
+      os << "mode: full beam" << '\n';
+      break;
+    case DOBEAM_FULL_WB:
+      os << "mode: full beam" << '\n';
+      os << "  use channelfreq: true" << '\n';
+      break;
+    case DOBEAM_ARRAY:
+      os << "mode: array beam" << '\n';
+      break;
+    case DOBEAM_ARRAY_WB:
+      os << "mode: array beam" << '\n';
+      os << "  use channelfreq: true" << '\n';
+      break;
+    case DOBEAM_ELEMENT:
+      os << "mode: element beam" << '\n';
+      break;
+    case DOBEAM_ELEMENT_WB:
+      os << "mode: element beam" << '\n';
+      os << "  use channelfreq: true" << '\n';
+      break;
+  }
+  os << "  operation:   ";
+  switch (operation_) {
+    case Operation::kReplace:
+      os << "replace";
+      break;
+    case Operation::kAdd:
+      os << "add";
+      break;
+    case Operation::kSubtract:
+      os << "subtract";
+      break;
+  }
+  os << '\n';
+  os << "  threads: " << n_threads_ << '\n';
+#endif /* HAVE_LIBDIRAC || HAVE_LIBDIRAC_CUDA */
+  if (parm_on_disk_) {
+    os << "H5 name " << h5_name_ << "\n";
+    os << " SolSet " << h5_parm_.GetSolSetName() << "\n";
+    os << " SolTab " << soltab_name_ << "\n";
+    os << "Time interval " << time_interval_ << "\n";
+    os << "Timeslots per parmupdate " << timeslots_per_parmupdate_ << "\n";
+  }
+}
+
+void SagecalPredict::showTimings(std::ostream& os,
+                                 [[maybe_unused]] double duration) const {
+  os << "  ";
+  base::FlagCounter::showPerc1(os, timer_.getElapsed(), duration);
+  os << " SagecalPredict " << name_ << '\n';
+}
+
+base::Direction SagecalPredict::GetFirstDirection() const {
+  return patch_list_.front()->direction();
+}
+
+#if defined(HAVE_LIBDIRAC) || defined(HAVE_LIBDIRAC_CUDA)
+void SagecalPredict::loadData(std::unique_ptr<dp3::base::DPBuffer>& buffer) {
+  const size_t nBl = info().nbaselines();
+  const size_t nCh = info().nchan();
+  const size_t nCr = info().ncorr();
+
+  assert(iodata_.n_baselines >= nBl);
+  assert(iodata_.n_stations == nSt);
+  assert(iodata_.n_channels == nCh);
+  assert(4 == nCr);
+
+  const aocommon::xt::Span<double, 2>& uvw = buffer->GetUvw();
+  size_t row0 = 0;
+  // load data, skipping autocorrelations
+  for (size_t bl = 0; bl < nBl; bl++) {
+    uint ant1 = info().getAnt1()[bl];
+    uint ant2 = info().getAnt2()[bl];
+    if (ant1 != ant2) {
+      // shape nBl x 3
+      iodata_.u_[row0] = uvw(bl, 0);
+      iodata_.v_[row0] = uvw(bl, 1);
+      iodata_.w_[row0] = uvw(bl, 2);
+
+      iodata_.baseline_arr_[row0].sta1 = ant1;
+      iodata_.baseline_arr_[row0].sta2 = ant2;
+      iodata_.baseline_arr_[row0].flag = 0;
+
+      // shape nCr x nCh x nBl
+      const casacore::Complex* ms_data =
+          &(buffer->GetCasacoreData().data()[bl * nCr * nCh]);
+#pragma GCC ivdep
+      for (uint ch = 0; ch < nCh; ch++) {
+        const casacore::Complex* ptr = &(ms_data[nCr * ch]);
+        iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8] = ptr[0].real();
+        iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 1] =
+            ptr[0].imag();
+        iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 2] =
+            ptr[1].real();
+        iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 3] =
+            ptr[1].imag();
+        iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 4] =
+            ptr[2].real();
+        iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 5] =
+            ptr[2].imag();
+        iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 6] =
+            ptr[3].real();
+        iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 7] =
+            ptr[3].imag();
+      }
+      row0++;
+    }
+  }
+
+  /* rescale u,v,w by 1/c */
+  const double inv_c = 1.0 / casacore::C::c;
+  my_dscal(iodata_.n_baselines, inv_c, iodata_.u_.data());
+  my_dscal(iodata_.n_baselines, inv_c, iodata_.v_.data());
+  my_dscal(iodata_.n_baselines, inv_c, iodata_.w_.data());
+}
+
+void SagecalPredict::writeData(std::unique_ptr<DPBuffer>& buffer) {
+  const size_t nBl = info().nbaselines();
+  const size_t nCh = info().nchan();
+
+  assert(iodata_.n_baselines >= nBl);
+  assert(iodata_.n_channels == nCh);
+
+  casacore::Cube<casacore::Complex>& data = buffer->GetCasacoreData();
+  size_t row0 = 0;
+  // load data, skipping autocorrelations
+  for (size_t bl = 0; bl < nBl; bl++) {
+    uint ant1 = info().getAnt1()[bl];
+    uint ant2 = info().getAnt2()[bl];
+    if (ant1 != ant2) {
+#pragma GCC ivdep
+      for (uint ch = 0; ch < nCh; ch++) {
+        data(0, ch, bl) = casacore::Complex(
+            iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8],
+            iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 1]);
+        data(1, ch, bl) = casacore::Complex(
+            iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 2],
+            iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 3]);
+        data(2, ch, bl) = casacore::Complex(
+            iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 4],
+            iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 5]);
+        data(3, ch, bl) = casacore::Complex(
+            iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 6],
+            iodata_.data_[iodata_.n_baselines * 8 * ch + row0 * 8 + 7]);
+      }
+      row0++;
+    }
+  }
+}
+
+void SagecalPredict::readAuxData(const DPInfo& _info) {
+  beam_data_.n_stations = _info.nantenna();
+  casacore::Table ms(_info.msName());
+  const size_t tile_size = 1;
+  beam_data_.isDipole = false;
+  try {
+    beam_data_.time_utc.resize(tile_size);
+    beam_data_.n_elem.resize(beam_data_.n_stations);
+    beam_data_.sx.resize(beam_data_.n_stations);
+    beam_data_.sy.resize(beam_data_.n_stations);
+    beam_data_.sz.resize(beam_data_.n_stations);
+    beam_data_.xx.resize(beam_data_.n_stations);
+    beam_data_.yy.resize(beam_data_.n_stations);
+    beam_data_.zz.resize(beam_data_.n_stations);
+  } catch (const std::bad_alloc& e) {
+    throw std::runtime_error("Allocating memory failure");
+  }
+
+  if (!ms.keywordSet().isDefined("LOFAR_ANTENNA_FIELD"))
+    throw std::runtime_error("subtable LOFAR_ANTENNA_FIELD is missing");
+  casacore::Table antfield(ms.keywordSet().asTable("LOFAR_ANTENNA_FIELD"));
+  casacore::ROArrayColumn<double> position(antfield, "POSITION");
+  casacore::ROArrayColumn<double> offset(antfield, "ELEMENT_OFFSET");
+  casacore::ROArrayColumn<double> coord(antfield, "COORDINATE_AXES");
+  casacore::ROArrayColumn<bool> eflag(antfield, "ELEMENT_FLAG");
+  casacore::ROArrayColumn<double> tileoffset(antfield, "TILE_ELEMENT_OFFSET");
+  casacore::Table _field(ms.keywordSet().asTable("FIELD"));
+  // check if TILE_ELEMENT_OFFSET has any rows, of no rows present,
+  //      we know this is LBA
+  bool isHBA = tileoffset.hasContent(0);
+  /* read positions, also setup memory for element coords */
+  for (size_t ci = 0; ci < beam_data_.n_stations; ci++) {
+    casacore::Array<double> _pos = position(ci);
+    double* tx = _pos.data();
+    beam_data_.sz[ci] = tx[2];
+
+    casacore::MPosition stnpos(casacore::MVPosition(tx[0], tx[1], tx[2]),
+                               casacore::MPosition::ITRF);
+    casacore::Array<double> _radpos = stnpos.getAngle("rad").getValue();
+    tx = _radpos.data();
+
+    beam_data_.sx[ci] = tx[0];
+    beam_data_.sy[ci] = tx[1];
+    beam_data_.n_elem[ci] = offset.shape(ci)[1];
+  }
+
+  if (isHBA) {
+    double cones[16];
+    for (int ci = 0; ci < 16; ci++) {
+      cones[ci] = 1.0;
+    }
+    double tempT[3 * 16];
+    /* now read in element offsets, also transform them to local coordinates */
+    for (size_t ci = 0; ci < beam_data_.n_stations; ci++) {
+      casacore::Array<double> _off = offset(ci);
+      double* off = _off.data();
+      casacore::Array<double> _coord = coord(ci);
+      double* coordmat = _coord.data();
+      casacore::Array<bool> _eflag = eflag(ci);
+      bool* ef = _eflag.data();
+      casacore::Array<double> _toff = tileoffset(ci);
+      double* toff = _toff.data();
+
+      double* tempC = new double[3 * beam_data_.n_elem[ci]];
+      my_dgemm('T', 'N', beam_data_.n_elem[ci], 3, 3, 1.0, off, 3, coordmat, 3,
+               0.0, tempC, beam_data_.n_elem[ci]);
+      my_dgemm('T', 'N', 16, 3, 3, 1.0, toff, 3, coordmat, 3, 0.0, tempT, 16);
+
+      /* now inspect the element flag table to see if any of the dipoles are
+       * flagged */
+      size_t fcount = 0;
+      for (int cj = 0; cj < beam_data_.n_elem[ci]; cj++) {
+        if (ef[2 * cj] == 1 || ef[2 * cj + 1] == 1) {
+          fcount++;
+        }
+      }
+
+      beam_data_.xx[ci] = new double[16 * (beam_data_.n_elem[ci] - fcount)];
+      beam_data_.yy[ci] = new double[16 * (beam_data_.n_elem[ci] - fcount)];
+      beam_data_.zz[ci] = new double[16 * (beam_data_.n_elem[ci] - fcount)];
+      /* copy unflagged coords, 16 times for each dipole */
+      fcount = 0;
+      for (int cj = 0; cj < beam_data_.n_elem[ci]; cj++) {
+        if (!(ef[2 * cj] == 1 || ef[2 * cj + 1] == 1)) {
+          my_dcopy(16, &tempT[0], 1, &(beam_data_.xx[ci][fcount]), 1);
+          my_daxpy(16, cones, tempC[cj], &(beam_data_.xx[ci][fcount]));
+          my_dcopy(16, &tempT[16], 1, &(beam_data_.yy[ci][fcount]), 1);
+          my_daxpy(16, cones, tempC[cj + beam_data_.n_elem[ci]],
+                   &(beam_data_.yy[ci][fcount]));
+          my_dcopy(16, &tempT[24], 1, &(beam_data_.zz[ci][fcount]), 1);
+          my_daxpy(16, cones, tempC[cj + 2 * beam_data_.n_elem[ci]],
+                   &(beam_data_.zz[ci][fcount]));
+          fcount += 16;
+        }
+      }
+      beam_data_.n_elem[ci] = fcount;
+
+      delete[] tempC;
+    }
+  } else { /* LBA */
+    /* read in element offsets, also transform them to local coordinates */
+    for (size_t ci = 0; ci < beam_data_.n_stations; ci++) {
+      casacore::Array<double> _off = offset(ci);
+      double* off = _off.data();
+      casacore::Array<double> _coord = coord(ci);
+      double* coordmat = _coord.data();
+      casacore::Array<bool> _eflag = eflag(ci);
+      bool* ef = _eflag.data();
+
+      double* tempC = new double[3 * beam_data_.n_elem[ci]];
+      my_dgemm('T', 'N', beam_data_.n_elem[ci], 3, 3, 1.0, off, 3, coordmat, 3,
+               0.0, tempC, beam_data_.n_elem[ci]);
+
+      /* now inspect the element flag table to see if any of the dipoles are
+       * flagged */
+      size_t fcount = 0;
+      for (int cj = 0; cj < beam_data_.n_elem[ci]; cj++) {
+        if (ef[2 * cj] == 1 || ef[2 * cj + 1] == 1) {
+          fcount++;
+        }
+      }
+
+      beam_data_.xx[ci] = new double[(beam_data_.n_elem[ci] - fcount)];
+      beam_data_.yy[ci] = new double[(beam_data_.n_elem[ci] - fcount)];
+      beam_data_.zz[ci] = new double[(beam_data_.n_elem[ci] - fcount)];
+      /* copy unflagged coords for each dipole */
+      fcount = 0;
+      for (int cj = 0; cj < beam_data_.n_elem[ci]; cj++) {
+        if (!(ef[2 * cj] == 1 || ef[2 * cj + 1] == 1)) {
+          beam_data_.xx[ci][fcount] = tempC[cj];
+          beam_data_.yy[ci][fcount] = tempC[cj + beam_data_.n_elem[ci]];
+          beam_data_.zz[ci][fcount] = tempC[cj + 2 * beam_data_.n_elem[ci]];
+          fcount++;
+        }
+      }
+      beam_data_.n_elem[ci] = fcount;
+      delete[] tempC;
+    }
+  }
+  /* read beam pointing direction */
+  casacore::ROArrayColumn<double> point_dir(_field, "REFERENCE_DIR");
+  casacore::Array<double> pdir = point_dir(0);
+  double* pc = pdir.data();
+  beam_data_.p_ra0 = pc[0];
+  beam_data_.p_dec0 = pc[1];
+
+  if (beam_mode_ == DOBEAM_FULL || beam_mode_ == DOBEAM_ELEMENT) {
+    set_elementcoeffs((iodata_.f0 < 100e6 ? ELEM_LBA : ELEM_HBA), iodata_.f0,
+                      &beam_data_.ecoeff);
+  } else if (beam_mode_ == DOBEAM_FULL_WB || beam_mode_ == DOBEAM_ELEMENT_WB) {
+    set_elementcoeffs_wb((iodata_.f0 < 100e6 ? ELEM_LBA : ELEM_HBA),
+                         iodata_.freqs_.data(), iodata_.n_channels,
+                         &beam_data_.ecoeff);
+  }
+}
+#endif /* HAVE_LIBDIRAC || HAVE_LIBDIRAC_CUDA */
+
+}  // namespace steps
+}  // namespace dp3
diff --git a/steps/SagecalPredict.h b/steps/SagecalPredict.h
new file mode 100644
index 0000000000000000000000000000000000000000..57213ab3f2b35961df904e3afb566798460ea8c0
--- /dev/null
+++ b/steps/SagecalPredict.h
@@ -0,0 +1,200 @@
+// Copyright (C) 2023 ASTRON (Netherlands Institute for Radio Astronomy)
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+/// @file
+/// @brief Interface to sagecal model prediction
+
+#ifndef DP3_STEPS_SAGECALPREDICT_H_
+#define DP3_STEPS_SAGECALPREDICT_H_
+
+#include <dp3/steps/Step.h>
+#include <dp3/base/DP3.h>
+#include <dp3/base/DPBuffer.h>
+#include "../base/ModelComponent.h"
+#include "../base/Patch.h"
+#include "../base/PredictBuffer.h"
+#include "../base/SourceDBUtil.h"
+#include "../common/ParameterSet.h"
+#include "ApplyCal.h"
+#include "ResultStep.h"
+
+#include "../base/ComponentInfo.h"
+
+#if defined(HAVE_LIBDIRAC) || defined(HAVE_LIBDIRAC_CUDA)
+#include <Dirac_radio.h>
+// Remove 'complex' def here as we do not need it afterwards
+#undef complex
+#endif
+
+namespace dp3 {
+namespace steps {
+
+class SagecalPredict : public ModelDataStep {
+#if defined(HAVE_LIBDIRAC) || defined(HAVE_LIBDIRAC_CUDA)
+  class IOData {
+   public:
+    IOData(){};
+    std::vector<double> data_;
+    std::vector<double> u_;
+    std::vector<double> v_;
+    std::vector<double> w_;
+    std::vector<double> freqs_;
+    std::vector<clus_source_t> cluster_arr_;
+    std::vector<baseline_t> baseline_arr_;
+
+    size_t n_baselines, n_stations, n_channels;
+    double f0;
+    double fdelta;
+
+   private:
+  };
+  class BeamData {
+   public:
+    BeamData() { sources_prec = false; };
+    size_t n_stations;            /* N */
+    std::vector<double> time_utc; /* time coord UTC (s), size tileszx1,
+                       convert from MJD (s) to JD (days) */
+    std::vector<int> n_elem;      /* no of elements in each station, size Nx1 */
+    /* position (ITRF) of stations (m)
+     later changed to logitude,latitude,height (rad,rad,m) */
+    std::vector<double> sx; /* x: size Nx1 */
+    std::vector<double> sy; /* y: ... */
+    std::vector<double> sz; /* z: ... */
+    /* x,y,z coords of elements, projected, converted to ITRF (m) */
+    std::vector<double*> xx; /* x coord pointer, size Nx1, each *x: x coord of
+                    station, size Nelem[]x1 */
+    std::vector<double*> yy; /* y ... */
+    std::vector<double*> zz; /* z ... */
+    /* pointing center of beams (only one) (could be different from phase
+     * center) */
+    double p_ra0;
+    double p_dec0;
+
+    /* flag to indicate this beam is only a dipole */
+    bool isDipole;
+
+    elementcoeff ecoeff;
+    bool sources_prec;
+
+   private:
+  };
+#endif /* HAVE_LIBDIRAC || HAVE_LIBDIRAC_CUDA */
+
+ public:
+  SagecalPredict(const common::ParameterSet&, const std::string& prefix,
+                 MsType input_type = MsType::kRegular);
+  // The following constructor is to be used within DDECal
+  SagecalPredict(const common::ParameterSet&, const std::string& prefix,
+                 const std::vector<std::string>& given_source_patterns,
+                 MsType input_type = MsType::kRegular);
+
+  ~SagecalPredict() override;
+
+  common::Fields getRequiredFields() const override {
+    // We need UVW and data
+    common::Fields fields = kUvwField;
+    if ((operation_ == Operation::kAdd) ||
+        (operation_ == Operation::kSubtract)) {
+      fields |= kDataField;
+    }
+    return fields;
+  }
+
+  common::Fields getProvidedFields() const override {
+    // We only modify the output data
+    common::Fields fields = kDataField;
+    return fields;
+  }
+
+  ///
+  /// buffer, copies the buffer and change its data.
+  bool process(std::unique_ptr<dp3::base::DPBuffer> buffer) override;
+
+  /// Update the general info.
+  void updateInfo(const base::DPInfo&) override;
+
+  void finish() override;
+
+  void show(std::ostream&) const override;
+
+  void showTimings(std::ostream& os, double duration) const override;
+
+  base::Direction GetFirstDirection() const override;
+
+  void setNThreads(const size_t n_threads) { n_threads_ = n_threads; }
+
+ private:
+  enum class Operation { kReplace, kAdd, kSubtract };
+
+  const MsType ms_type_;
+
+  void SetOperation(const std::string& operation);
+
+  unsigned int nPol(const std::string& parmName);
+
+  void setCorrectType(std::vector<std::string>& solTabs);
+
+  void updateFromH5(const double startTime);
+
+  /// The actual constructor
+  void init(const common::ParameterSet&, const std::string& prefix,
+            const std::vector<std::string>& source_patterns);
+
+  std::string name_;     ///< The name of the step (or prefix)
+  Operation operation_;  ///< Operation to use on the DATA column
+
+  std::string h5_name_;  ///< HDF5 parameter file
+  std::vector<std::string>
+      directions_list_;  ///< list of directions (one or more patches)
+  std::vector<std::shared_ptr<base::Patch>> patch_list_;
+  std::string source_db_name_;
+  std::vector<std::pair<std::shared_ptr<base::ModelComponent>,
+                        std::shared_ptr<base::Patch>>>
+      source_list_;
+  bool any_orientation_is_absolute_{false};  ///< Any of the Gaussian sources
+                                             ///< has absolute orientation
+
+  // H5 solutions handling
+  schaapcommon::h5parm::H5Parm h5_parm_;
+  std::string solset_name_;
+  std::string soltab_name_;
+  bool invert_{false};
+  bool parm_on_disk_{false};
+  bool use_amp_phase_{false};
+  double sigma_mmse_;
+  JonesParameters::CorrectType corr_type_;
+  JonesParameters::InterpolationType interp_type_;
+  JonesParameters::MissingAntennaBehavior missing_ant_behavior_;
+  schaapcommon::h5parm::SolTab sol_tab_;
+  schaapcommon::h5parm::SolTab sol_tab2_;
+  std::vector<casacore::String> parm_expressions_;
+  unsigned int timeslots_per_parmupdate_;
+  unsigned int timestep_;
+  double time_interval_;
+  double time_last_;
+  std::vector<std::string> soltab_names_;
+
+  std::vector<double> params_;
+
+  base::Direction phase_ref_;
+
+  common::NSTimer timer_;
+
+  // This should be set in updateInfo()
+  size_t n_threads_{0};
+
+#if defined(HAVE_LIBDIRAC) || defined(HAVE_LIBDIRAC_CUDA)
+  IOData iodata_;
+  BeamData beam_data_;
+  void loadData(std::unique_ptr<dp3::base::DPBuffer>& buffer);
+  void writeData(std::unique_ptr<dp3::base::DPBuffer>& buffer);
+  void readAuxData(const base::DPInfo&);
+  int beam_mode_{DOBEAM_NONE};
+  std::vector<int> ignore_list_;
+#endif /* HAVE_LIBDIRAC || HAVE_LIBDIRAC_CUDA */
+};
+
+}  // namespace steps
+}  // namespace dp3
+
+#endif  // header guard
diff --git a/steps/test/integration/CMakeLists.txt b/steps/test/integration/CMakeLists.txt
index 12a06804aad3498dbc72b394475fd5a632e90445..b94b6196f986f187bdf134268af0aefe52d02a8d 100644
--- a/steps/test/integration/CMakeLists.txt
+++ b/steps/test/integration/CMakeLists.txt
@@ -16,3 +16,7 @@ add_python_tests(
   tPredict
   tReadOnly
   tSplit)
+
+if(LIBDIRAC_FOUND)
+  add_python_tests(tSagecalPredict)
+endif()
diff --git a/steps/test/integration/tSagecalPredict.py b/steps/test/integration/tSagecalPredict.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3522c248412047d60b87eeccce54f00c4404834
--- /dev/null
+++ b/steps/test/integration/tSagecalPredict.py
@@ -0,0 +1,133 @@
+# Copyright (C) 2021 ASTRON (Netherlands Institute for Radio Astronomy)
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+import pytest
+import shutil
+import os
+import uuid
+from subprocess import check_call, check_output
+
+# Append current directory to system path in order to import testconfig
+import sys
+
+sys.path.append(".")
+
+import testconfig as tcf
+from utils import assert_taql, untar_ms
+
+"""
+Similar to tPredict.py, testing SagecalPredict using pytest.
+"""
+
+# Note we re-use the same MS for output
+MSIN = "tNDPPP-generic.MS"
+# But we have a different reference MS for comparison
+MSPREDICT = "tSagecalPredict.tab"
+CWD = os.getcwd()
+
+
+@pytest.fixture(autouse=True)
+def source_env():
+    os.chdir(CWD)
+    tmpdir = str(uuid.uuid4())
+    os.mkdir(tmpdir)
+    os.chdir(tmpdir)
+
+    untar_ms(f"{tcf.RESOURCEDIR}/{MSIN}.tgz")
+    untar_ms(f"{tcf.SRCDIR}/{MSPREDICT}.tgz")
+
+    # Tests are executed here
+    yield
+
+    # Post-test: clean up
+    os.chdir(CWD)
+    shutil.rmtree(tmpdir)
+
+
+def test_with_beam_replace():
+    check_call(
+        [
+            tcf.DP3EXE,
+            f"msin={MSIN}",
+            "msout=.",
+            "msout.datacolumn=MODEL_DATA",
+            "steps=[predict]",
+            "predict.type=sagecalpredict",
+            f"predict.sourcedb={MSIN}/sky",
+            "predict.usebeammodel=true",
+            "predict.operation=replace",
+        ]
+    )
+
+    # Compare the MODEL_DATA column of the output MS with the PREDICT_beam of reference MS.
+    taql_command = f"select t1.MODEL_DATA, t2.PREDICT_beam from {MSIN} t1, {MSPREDICT} t2 where not all(near(t1.MODEL_DATA,t2.PREDICT_beam,5e-2) || (isnan(t1.MODEL_DATA) && isnan(t2.PREDICT_beam)))"
+    assert_taql(taql_command)
+
+
+def test_with_rapthor_workflow():
+    """
+    This is a long test, but it tries to cover all modes of use, includeing
+    in DDECal and in h5parmpredict
+    1) scalar phace cal 2) complex gain cal 3) apply solutions and predict
+    """
+    check_call(
+        [
+            tcf.DP3EXE,
+            f"msin={MSIN}",
+            "msout=.",
+            "steps=[ddecal]",
+            "ddecal.type=ddecal",
+            f"ddecal.sourcedb={MSIN}/sky",
+            "ddecal.usebeammodel=true",
+            "ddecal.beammode=array_factor",
+            "ddecal.sagecalpredict=true",
+            "ddecal.mode=scalarphase",
+            "ddecal.maxiter=50",
+            "ddecal.nchan=8",
+            "ddecal.stepsize=1e-3",
+            "ddecal.solint=10",
+            "ddecal.h5parm=solutions_stage1.h5",
+        ]
+    )
+    check_call(
+        [
+            tcf.DP3EXE,
+            f"msin={MSIN}",
+            "msout=.",
+            "steps=[ddecal]",
+            "ddecal.type=ddecal",
+            f"ddecal.sourcedb={MSIN}/sky",
+            "ddecal.usebeammodel=true",
+            "ddecal.beammode=array_factor",
+            "ddecal.sagecalpredict=true",
+            "ddecal.mode=complexgain",
+            "ddecal.maxiter=50",
+            "ddecal.nchan=8",
+            "ddecal.stepsize=1e-3",
+            "ddecal.solint=10",
+            "ddecal.h5parm=solutions_stage2.h5",
+            "ddecal.applycal.steps=[fastphase]",
+            "ddecal.applycal.fastphase.correction=phase000",
+            "ddecal.applycal.parmdb=solutions_stage1.h5",
+        ]
+    )
+    check_call(
+        [
+            tcf.DP3EXE,
+            f"msin={MSIN}",
+            "msout=.",
+            "msout.datacolumn=MODEL_DATA",
+            "steps=[predict]",
+            "predict.type=sagecalpredict",
+            f"predict.sourcedb={MSIN}/sky",
+            "predict.usebeammodel=true",
+            "predict.beammode=array_factor",
+            "predict.operation=replace",
+            "predict.applycal.correction=phase000",
+            "predict.applycal.parmdb=solutions_stage2.h5",
+        ]
+    )
+
+    # Compare the MODEL_DATA column of the output MS with the PREDICT_nobeam of reference MS.
+    taql_command = f"select t1.MODEL_DATA, t2.PREDICT_nobeam from {MSIN} t1, {MSPREDICT} t2 where not all(near(t1.MODEL_DATA,t2.PREDICT_nobeam,5e-2) || (isnan(t1.MODEL_DATA) && isnan(t2.PREDICT_nobeam)))"
+    assert_taql(taql_command)
diff --git a/steps/test/integration/tSagecalPredict.tab.tgz b/steps/test/integration/tSagecalPredict.tab.tgz
new file mode 100644
index 0000000000000000000000000000000000000000..83ea84413b1586dd7081af525bfb8a41709d557d
Binary files /dev/null and b/steps/test/integration/tSagecalPredict.tab.tgz differ