diff --git a/idg/Imager.cc b/idg/Imager.cc
index 98da748c47a296bc313cd647fc05f2a0ffe4446b..c86a4739e065a155e3b87d801bb0e3c3ec05de83 100644
--- a/idg/Imager.cc
+++ b/idg/Imager.cc
@@ -97,8 +97,7 @@ xt::xtensor<T, 2> ComputeTaper(size_t size) {
   return taper;
 }
 
-void WeighVisibilities(size_t n_threads,
-                       const xt::xtensor<float, 2>& weight_map,
+void WeighVisibilities(const xt::xtensor<float, 2>& weight_map,
                        xt::xtensor<float, 3>& uv_pixels,
                        xt::xtensor<std::complex<float>, 3>& visibilities) {
   const size_t n_baselines = visibilities.shape()[0];
@@ -108,13 +107,15 @@ void WeighVisibilities(size_t n_threads,
   assert(uv_pixels.shape()[1] == n_baselines);
   assert(uv_pixels.shape()[2] == n_channels);
 
-  aocommon::StaticFor<size_t> loop(n_threads);
-  loop.Run(0, n_baselines, [&](size_t bl, size_t /*thread*/) {
-    for (size_t chan = 0; chan < n_channels; ++chan) {
-      for (size_t cor = 0; cor < n_correlations; ++cor) {
-        const size_t x = roundf(uv_pixels(0, bl, chan));
-        const size_t y = roundf(uv_pixels(1, bl, chan));
-        visibilities(bl, chan, cor) *= weight_map(y, x);
+  aocommon::StaticFor<size_t> loop;
+  loop.Run(0, n_baselines, [&](size_t start, size_t end, size_t) {
+    for (size_t bl = start; bl < end; bl++) {
+      for (size_t chan = 0; chan < n_channels; ++chan) {
+        for (size_t cor = 0; cor < n_correlations; ++cor) {
+          const size_t x = roundf(uv_pixels(0, bl, chan));
+          const size_t y = roundf(uv_pixels(1, bl, chan));
+          visibilities(bl, chan, cor) *= weight_map(y, x);
+        }
       }
     }
   });
@@ -136,7 +137,7 @@ float ComputeN(float l, float m) {
 }
 
 void VisibilitiesToSubgrids(
-    size_t n_threads, float image_size,
+    float image_size,
     float w_step,  // in lambda
     size_t grid_size, const xt::xtensor<double, 1>& frequencies,
     const xt::xtensor<double, 2>& uvw,
@@ -160,65 +161,67 @@ void VisibilitiesToSubgrids(
 
   const xt::xtensor<float, 1> wavenumbers = ComputeWavenumbers(frequencies);
 
-  aocommon::StaticFor<size_t> loop(n_threads);
-  loop.Run(0, n_baselines, [&](size_t bl, size_t /*thread*/) {
-    // Load metadata
-    const SubgridMetadata m = metadata(bl);
-    const float w_offset_in_lambda = w_step * (-m.tile_z + 0.5f);
-
-    // Compute u and v offset in wavelenghts
-    const float u_offset =
-        (m.subgrid_x + subgrid_size / 2.0f - grid_size / 2.0f) *
-        (2 * M_PI / image_size);
-    const float v_offset =
-        (m.subgrid_y + subgrid_size / 2.0f - grid_size / 2.0f) *
-        (2 * M_PI / image_size);
-    const float w_offset = 2 * M_PI * w_offset_in_lambda;
-
-    // Iterate all pixels in subgrid
-    for (size_t y = 0; y < subgrid_size; ++y) {
-      for (size_t x = 0; x < subgrid_size; ++x) {
-        // Compute l,m,n for phase offset and phase index calculation.
-        const float l_offset = ComputeL(x, subgrid_size, image_size);
-        const float m_offset = ComputeM(y, subgrid_size, image_size);
-        const float n_offset = ComputeN(l_offset, m_offset);
-
-        // Compute phase offset
-        const float phase_offset =
-            u_offset * l_offset + v_offset * m_offset + w_offset * n_offset;
-
-        // Initialize pixel (XX and YY)
-        std::complex<float> pixel = {0, 0};
-
-        // Load UVW coordinate
-        const float u = static_cast<float>(uvw(0, bl));
-        const float v = static_cast<float>(uvw(1, bl));
-        const float w = static_cast<float>(uvw(2, bl));
-
-        // Compute phase index
-        const float phase_index = u * l_offset + v * m_offset + w * n_offset;
-
-        // Update pixel for every channel
-        for (size_t chan = 0; chan < n_channels; ++chan) {
-          const float phase = phase_offset - (phase_index * wavenumbers[chan]);
-          const std::complex<float> phasor = {cosf(phase), sinf(phase)};
-          pixel +=
-              (visibilities(bl, chan, 0) + visibilities(bl, chan, 1)) * phasor;
-        }  // end for channel
-
-        // Compute shifted position in subgrid
-        const size_t x_dst = (x + (subgrid_size / 2)) % subgrid_size;
-        const size_t y_dst = (y + (subgrid_size / 2)) % subgrid_size;
-
-        // Copmute Stokes I pixel
-        subgrids(bl, y_dst, x_dst) = pixel * 0.5f * taper(y, x);
-      }  // end for x
-    }    // end for y
-  });    // end for bl
+  aocommon::StaticFor<size_t> loop;
+  loop.Run(0, n_baselines, [&](size_t start, size_t end, size_t) {
+    for (size_t bl = start; bl < end; bl++) {
+      // Load metadata
+      const SubgridMetadata m = metadata(bl);
+      const float w_offset_in_lambda = w_step * (-m.tile_z + 0.5f);
+
+      // Compute u and v offset in wavelenghts
+      const float u_offset =
+          (m.subgrid_x + subgrid_size / 2.0f - grid_size / 2.0f) *
+          (2 * M_PI / image_size);
+      const float v_offset =
+          (m.subgrid_y + subgrid_size / 2.0f - grid_size / 2.0f) *
+          (2 * M_PI / image_size);
+      const float w_offset = 2 * M_PI * w_offset_in_lambda;
+
+      // Iterate all pixels in subgrid
+      for (size_t y = 0; y < subgrid_size; ++y) {
+        for (size_t x = 0; x < subgrid_size; ++x) {
+          // Compute l,m,n for phase offset and phase index calculation.
+          const float l_offset = ComputeL(x, subgrid_size, image_size);
+          const float m_offset = ComputeM(y, subgrid_size, image_size);
+          const float n_offset = ComputeN(l_offset, m_offset);
+
+          // Compute phase offset
+          const float phase_offset =
+              u_offset * l_offset + v_offset * m_offset + w_offset * n_offset;
+
+          // Initialize pixel (XX and YY)
+          std::complex<float> pixel = {0, 0};
+
+          // Load UVW coordinate
+          const float u = static_cast<float>(uvw(0, bl));
+          const float v = static_cast<float>(uvw(1, bl));
+          const float w = static_cast<float>(uvw(2, bl));
+
+          // Compute phase index
+          const float phase_index = u * l_offset + v * m_offset + w * n_offset;
+
+          // Update pixel for every channel
+          for (size_t chan = 0; chan < n_channels; ++chan) {
+            const float phase =
+                phase_offset - (phase_index * wavenumbers[chan]);
+            const std::complex<float> phasor = {cosf(phase), sinf(phase)};
+            pixel += (visibilities(bl, chan, 0) + visibilities(bl, chan, 1)) *
+                     phasor;
+          }  // end for channel
+
+          // Compute shifted position in subgrid
+          const size_t x_dst = (x + (subgrid_size / 2)) % subgrid_size;
+          const size_t y_dst = (y + (subgrid_size / 2)) % subgrid_size;
+
+          // Copmute Stokes I pixel
+          subgrids(bl, y_dst, x_dst) = pixel * 0.5f * taper(y, x);
+        }  // end for x
+      }    // end for y
+    }
+  });  // end for bl
 }  // end VisibilitiesToSubgrids
 
-void ApplySubgridFFT(size_t n_threads,
-                     xt::xtensor<std::complex<float>, 3>& subgrids, int sign) {
+void ApplySubgridFFT(xt::xtensor<std::complex<float>, 3>& subgrids, int sign) {
   const size_t n_subgrids = subgrids.shape()[0];
   const size_t size = subgrids.shape()[1];
   fftwf_plan plan;
@@ -226,16 +229,18 @@ void ApplySubgridFFT(size_t n_threads,
   fftwf_complex* out = in;
   plan = fftwf_plan_dft_2d(size, size, in, out, sign, FFTW_ESTIMATE);
 
-  aocommon::StaticFor<size_t> loop(n_threads);
-  loop.Run(0, n_subgrids, [&](size_t i, size_t /*thread*/) {
-    fftwf_complex* subgrid =
-        reinterpret_cast<fftwf_complex*>(&subgrids(i, 0, 0));
-    fftwf_execute_dft(plan, subgrid, subgrid);
-    const float scale =
-        1.0f / (static_cast<float>(size) * static_cast<float>(size));
-    for (size_t y = 0; y < size; ++y) {
-      for (size_t x = 0; x < size; ++x) {
-        subgrids(i, y, x) *= scale;
+  aocommon::StaticFor<size_t> loop;
+  loop.Run(0, n_subgrids, [&](size_t start, size_t end, size_t) {
+    for (size_t s = start; s < end; s++) {
+      fftwf_complex* subgrid =
+          reinterpret_cast<fftwf_complex*>(&subgrids(s, 0, 0));
+      fftwf_execute_dft(plan, subgrid, subgrid);
+      const float scale =
+          1.0f / (static_cast<float>(size) * static_cast<float>(size));
+      for (size_t y = 0; y < size; ++y) {
+        for (size_t x = 0; x < size; ++x) {
+          subgrids(s, y, x) *= scale;
+        }
       }
     }
   });
@@ -243,25 +248,21 @@ void ApplySubgridFFT(size_t n_threads,
   fftwf_destroy_plan(plan);
 }  // end ApplySubgridFFT
 
-void SubgridsToGrid(size_t n_threads, size_t subgrid_size, size_t grid_size,
+void SubgridsToGrid(size_t subgrid_size, size_t grid_size,
                     const xt::xtensor<SubgridMetadata, 1>& metadata,
                     const xt::xtensor<std::complex<float>, 3>& subgrids,
                     xt::xtensor<std::complex<float>, 2>& grid) {
-  aocommon::StaticFor<size_t> loop(n_threads);
+  aocommon::StaticFor<size_t> loop;
 
   const size_t nr_subgrids = metadata.size();
 
-  loop.Run(0, n_threads, [&](size_t thread, size_t /*thread*/) {
-    // Iterate over subgrid rows, starting at a row that belongs to this
-    // thread and stepping by the number of threads
+  loop.Run(0, subgrid_size, [&](size_t start, size_t end, size_t) {
     for (size_t s = 0; s < nr_subgrids; s++) {
       const SubgridMetadata m = metadata[s];
       const size_t subgrid_y = m.subgrid_y;
       const size_t subgrid_x = m.subgrid_x;
 
-      size_t start_y =
-          (n_threads - (subgrid_y % n_threads) + thread) % n_threads;
-      for (size_t y = start_y; y < subgrid_size; y += n_threads) {
+      for (size_t y = start; y < end; y++) {
         for (size_t x = 0; x < subgrid_size; x++) {
           const size_t y_dst = (subgrid_y + y + grid_size / 2) % grid_size;
           const size_t x_dst = (subgrid_x + y + grid_size / 2) % grid_size;
@@ -272,14 +273,14 @@ void SubgridsToGrid(size_t n_threads, size_t subgrid_size, size_t grid_size,
   });
 }
 
-void SubgridsToTiles(size_t n_threads, size_t tile_size, size_t grid_size,
+void SubgridsToTiles(size_t tile_size, size_t grid_size,
                      const xt::xtensor<SubgridMetadata, 1>& metadata,
                      const xt::xtensor<std::complex<float>, 3>& subgrids,
                      xt::xtensor<std::complex<float>, 3>& tiles) {
   const size_t subgrid_size = subgrids.shape()[1];
   assert(subgrid_size == subgrids.shape()[2]);
 
-  aocommon::StaticFor<size_t> loop(n_threads);
+  aocommon::StaticFor<size_t> loop;
 
   // Precompute phasor
   xt::xtensor<float, 2> phase({subgrid_size, subgrid_size});
@@ -287,23 +288,25 @@ void SubgridsToTiles(size_t n_threads, size_t tile_size, size_t grid_size,
   xt::xtensor<float, 2> phasor_imag({subgrid_size, subgrid_size});
   assert(phase.shape()[1] == subgrid_size);
 
-  loop.Run(0, subgrid_size, [&](size_t y, size_t /*thread*/) {
-    for (size_t x = 0; x < subgrid_size; ++x) {
-      phase(y, x) =
-          M_PI *
-          (static_cast<float>(x) + static_cast<float>(y) - subgrid_size) /
-          subgrid_size;
-    }
+  loop.Run(0, subgrid_size, [&](size_t start, size_t end, size_t) {
+    for (size_t y = start; y < end; y++) {
+      for (size_t x = 0; x < subgrid_size; ++x) {
+        phase(y, x) =
+            M_PI *
+            (static_cast<float>(x) + static_cast<float>(y) - subgrid_size) /
+            subgrid_size;
+      }
 
-    for (size_t x = 0; x < subgrid_size; ++x) {
-      phasor_real(y, x) = cosf(phase(y, x));
-      phasor_imag(y, x) = sinf(phase(y, x));
+      for (size_t x = 0; x < subgrid_size; ++x) {
+        phasor_real(y, x) = cosf(phase(y, x));
+        phasor_imag(y, x) = sinf(phase(y, x));
+      }
     }
   });  // end for y
 
   const size_t n_baselines = subgrids.shape()[0];
 
-  loop.Run(0, n_threads, [&](size_t, size_t thread) {
+  loop.Run(0, subgrid_size, [&](size_t start, size_t end, size_t) {
     for (size_t bl = 0; bl < n_baselines; ++bl) {
       const SubgridMetadata m = metadata[bl];
 
@@ -312,11 +315,8 @@ void SubgridsToTiles(size_t n_threads, size_t tile_size, size_t grid_size,
       const size_t subgrid_x = m.subgrid_x - m.tile_x;
       const size_t subgrid_y = m.subgrid_y - m.tile_y;
 
-      // Iterate over subgrid rows, starting at a row that belongs to this
-      // thread and stepping by the number of threads
-      size_t start_y =
-          (n_threads - (subgrid_y % n_threads) + thread) % n_threads;
-      for (size_t y = start_y; y < subgrid_size; y += n_threads) {
+      // Iterate over subgrid rows
+      for (size_t y = start; y < end; y++) {
         // Iterate all columns of subgrid
         for (size_t x = 0; x < subgrid_size; ++x) {
           // Compute position in subgrid
@@ -440,32 +440,37 @@ void ApplyGridFFT(size_t n_threads, xt::xtensor<std::complex<float>, 2>& in,
   fftwf_plan plan =
       fftwf_plan_dft_1d(size, nullptr, nullptr, sign, FFTW_ESTIMATE);
 
-  aocommon::StaticFor<size_t> loop(n_threads);
+  aocommon::StaticFor<size_t> loop;
 
   xt::xtensor<std::complex<float>, 2> temp({size, size}, 0);
 
   // FFT over rows
-  loop.Run(0, size, [&](size_t i, size_t /*thread*/) {
-    fftwf_complex* ptr = reinterpret_cast<fftwf_complex*>(&in(i, 0));
-    fftwf_execute_dft(plan, ptr, ptr);
+  loop.Run(0, size, [&](size_t start, size_t end, size_t) {
+    for (size_t y = start; y < end; y++) {
+      fftwf_complex* ptr = reinterpret_cast<fftwf_complex*>(&in(y, 0));
+
+      fftwf_execute_dft(plan, ptr, ptr);
+    }
   });
 
   // Iterate all columns
-  loop.Run(0, size, [&](size_t x, size_t /*thread*/) {
+  loop.Run(0, size, [&](size_t start, size_t end, size_t) {
     xt::xtensor<std::complex<float>, 1> temp({size}, 0);
 
-    // Copy column
-    for (size_t y = 0; y < size; ++y) {
-      temp(y) = in(y, x);
-    }
+    for (size_t x = start; x < end; x++) {
+      // Copy column
+      for (size_t y = 0; y < size; ++y) {
+        temp(y) = in(y, x);
+      }
 
-    // FFT over column
-    fftwf_complex* ptr = reinterpret_cast<fftwf_complex*>(temp.data());
-    fftwf_execute_dft(plan, ptr, ptr);
+      // FFT over column
+      fftwf_complex* ptr = reinterpret_cast<fftwf_complex*>(temp.data());
+      fftwf_execute_dft(plan, ptr, ptr);
 
-    // Store the result
-    for (size_t y = 0; y < size; ++y) {
-      out(y, x) = temp(y);
+      // Store the result
+      for (size_t y = 0; y < size; ++y) {
+        out(y, x) = temp(y);
+      }
     }
   });
 
@@ -502,8 +507,8 @@ void ApplyPhasor(size_t tile_idx, float image_size, float w_step, float tile_z,
   }    // end for y
 }  // end ApplyPhasor
 
-void PaddedTilesToGrid(aocommon::StaticFor<size_t>& loop, size_t n_tiles,
-                       size_t tile_offset,
+void PaddedTilesToGrid(size_t n_threads, aocommon::StaticFor<size_t>& loop,
+                       size_t n_tiles, size_t tile_offset,
                        const xt::xtensor<TileMetadata, 1>& metadata,
                        const xt::xtensor<std::complex<float>, 3>& tiles,
                        xt::xtensor<std::complex<float>, 2>& grid) {
@@ -511,9 +516,8 @@ void PaddedTilesToGrid(aocommon::StaticFor<size_t>& loop, size_t n_tiles,
   assert(tile_size == tiles.shape()[2]);
   const size_t grid_size = grid.shape()[0];
   assert(grid_size == grid.shape()[1]);
-  const size_t n_threads = loop.NThreads();
 
-  loop.Run(0, n_threads, [&](size_t thread, size_t) {
+  loop.Run(0, n_threads, [&](size_t, size_t, size_t thread) {
     for (size_t i = 0; i < n_tiles; ++i) {
       const TileMetadata m = metadata[tile_offset + i];
       const size_t x0 = m.tile_x;
@@ -554,7 +558,7 @@ void TilesToGrid(size_t n_threads, float image_size, float w_step,
   fftwf_plan plan_backward = fftwf_plan_dft_1d(
       padded_tile_size, nullptr, nullptr, FFTW_BACKWARD, FFTW_ESTIMATE);
 
-  aocommon::StaticFor<size_t> loop(n_threads);
+  aocommon::StaticFor<size_t> loop;
 
   const size_t n_tiles = tiles.shape()[0];
   size_t current_n_tiles = 0;
@@ -563,7 +567,8 @@ void TilesToGrid(size_t n_threads, float image_size, float w_step,
     current_n_tiles = std::min(n_threads, n_tiles - tile_offset);
 
     loop.Run(tile_offset, tile_offset + current_n_tiles,
-             [&](size_t tile_id, size_t thread) {
+             [&](size_t, size_t, size_t thread) {
+               const size_t tile_id = tile_offset + thread;
                const TileMetadata m = metadata[tile_id];
                const size_t padded_tile_id = thread;
                ZeroTile(padded_tile_id, padded_tiles);
@@ -576,7 +581,7 @@ void TilesToGrid(size_t n_threads, float image_size, float w_step,
              });
 
     // Add current batch of tiles to grid
-    PaddedTilesToGrid(loop, current_n_tiles, tile_offset, metadata,
+    PaddedTilesToGrid(n_threads, loop, current_n_tiles, tile_offset, metadata,
                       padded_tiles, grid);
   }  // end for tile_offset
 }  // end TilesToGrid
@@ -585,8 +590,7 @@ void TilesToGrid(size_t n_threads, float image_size, float w_step,
 namespace dp3 {
 namespace idg {
 
-Imager::Imager(size_t n_threads, size_t grid_size)
-    : n_threads_(n_threads), grid_size_(grid_size) {
+Imager::Imager(size_t grid_size) : grid_size_(grid_size) {
   if (kApplyWeighting) {
     weight_map_ = xt::xtensor<float, 2>({grid_size, grid_size}, 0.0f);
   }
@@ -632,66 +636,69 @@ void Imager::Initialize(const xt::xtensor<double, 1>& frequencies,
   // Initialize per baseline metadata
   subgrid_metadata_.resize({n_baselines});
   uv_pixels_.resize({2, n_baselines, n_channels});
-  aocommon::StaticFor<size_t> loop(1);
-  loop.Run(0, n_baselines, [&](size_t bl, size_t /*thread*/) {
-    const double u_meters = uvw(bl, 0);
-    const double v_meters = uvw(bl, 1);
-    const double w_meters = uvw(bl, 2);
-
-    // Compute u and v in pixels
-    for (size_t chan = 0; chan < n_channels; ++chan) {
-      uv_pixels_(0, bl, chan) =
-          u_meters * image_size_ * (frequencies[chan] / detail::c());
-      uv_pixels_(1, bl, chan) =
-          v_meters * image_size_ * (frequencies[chan] / detail::c());
-    }
+  aocommon::StaticFor<size_t> loop;
+  loop.Run(0, n_baselines, [&](size_t start, size_t end, size_t) {
+    for (size_t bl = start; bl < end; bl++) {
+      const double u_meters = uvw(bl, 0);
+      const double v_meters = uvw(bl, 1);
+      const double w_meters = uvw(bl, 2);
+
+      // Compute u and v in pixels
+      for (size_t chan = 0; chan < n_channels; ++chan) {
+        uv_pixels_(0, bl, chan) =
+            u_meters * image_size_ * (frequencies[chan] / detail::c());
+        uv_pixels_(1, bl, chan) =
+            v_meters * image_size_ * (frequencies[chan] / detail::c());
+      }
 
-    // W in lambda
-    const float w_lambda = w_meters * (frequencies[0] / detail::c()) * w_step_;
-
-    // Compute min and max u and v values
-    const float u0 = uv_pixels_(0, bl, 0);
-    const float u1 = uv_pixels_(0, bl, n_channels - 1);
-    const float v0 = uv_pixels_(1, bl, 0);
-    const float v1 = uv_pixels_(1, bl, n_channels - 1);
-    const float u_min = std::min(u0, u1);
-    const float u_max = std::max(u0, u1);
-    const float v_min = std::min(v0, v1);
-    const float v_max = std::max(v0, v1);
-
-    // Middle point of subgrid in pixels
-    int subgrid_x = roundf((u_max + u_min) / 2);
-    int subgrid_y = roundf((v_max + v_min) / 2);
-
-    // Shift middle of subgrid from middle of grid to top left
-    subgrid_x += (grid_size_ / 2);
-    subgrid_y += (grid_size_ / 2);
-
-    // Shift from middle of subgrid to top left of subgrid
-    subgrid_x -= (subgrid_size_ / 2);
-    subgrid_y -= (subgrid_size_ / 2);
-
-    // Compute tile indices
-    size_t tile_x = subgrid_x / kTileSize;
-    size_t tile_y = subgrid_y / kTileSize;
-    const size_t tile_z = -floorf(w_lambda);
-    const size_t tile_index =
-        tile_z * n_tiles_xy * n_tiles_xy + tile_y * n_tiles_xy + tile_x;
-
-    // Compute top left padded tile coordinate
-    tile_x *= kTileSize;
-    tile_y *= kTileSize;
-
-    // Store metadata
-    detail::SubgridMetadata metadata = {
-        .subgrid_x = static_cast<size_t>(subgrid_x),
-        .subgrid_y = static_cast<size_t>(subgrid_y),
-        .tile_x = tile_x,
-        .tile_y = tile_y,
-        .tile_z = tile_z,
-        .tile_index = tile_index};
-
-    subgrid_metadata_[bl] = metadata;
+      // W in lambda
+      const float w_lambda =
+          w_meters * (frequencies[0] / detail::c()) * w_step_;
+
+      // Compute min and max u and v values
+      const float u0 = uv_pixels_(0, bl, 0);
+      const float u1 = uv_pixels_(0, bl, n_channels - 1);
+      const float v0 = uv_pixels_(1, bl, 0);
+      const float v1 = uv_pixels_(1, bl, n_channels - 1);
+      const float u_min = std::min(u0, u1);
+      const float u_max = std::max(u0, u1);
+      const float v_min = std::min(v0, v1);
+      const float v_max = std::max(v0, v1);
+
+      // Middle point of subgrid in pixels
+      int subgrid_x = roundf((u_max + u_min) / 2);
+      int subgrid_y = roundf((v_max + v_min) / 2);
+
+      // Shift middle of subgrid from middle of grid to top left
+      subgrid_x += (grid_size_ / 2);
+      subgrid_y += (grid_size_ / 2);
+
+      // Shift from middle of subgrid to top left of subgrid
+      subgrid_x -= (subgrid_size_ / 2);
+      subgrid_y -= (subgrid_size_ / 2);
+
+      // Compute tile indices
+      size_t tile_x = subgrid_x / kTileSize;
+      size_t tile_y = subgrid_y / kTileSize;
+      const size_t tile_z = -floorf(w_lambda);
+      const size_t tile_index =
+          tile_z * n_tiles_xy * n_tiles_xy + tile_y * n_tiles_xy + tile_x;
+
+      // Compute top left padded tile coordinate
+      tile_x *= kTileSize;
+      tile_y *= kTileSize;
+
+      // Store metadata
+      detail::SubgridMetadata metadata = {
+          .subgrid_x = static_cast<size_t>(subgrid_x),
+          .subgrid_y = static_cast<size_t>(subgrid_y),
+          .tile_x = tile_x,
+          .tile_y = tile_y,
+          .tile_z = tile_z,
+          .tile_index = tile_index};
+
+      subgrid_metadata_[bl] = metadata;
+    }
   });
 
   // Initialize weight map
@@ -792,23 +799,22 @@ void Imager::Invert(xt::xtensor<std::complex<float>, 3>& visibilities) {
   assert(n_channels == frequencies_.size());
   assert(n_correlations == 2);
   if (kApplyWeighting) {
-    detail::WeighVisibilities(n_threads_, weight_map_, uv_pixels_,
-                              visibilities);
+    detail::WeighVisibilities(weight_map_, uv_pixels_, visibilities);
   }
-  detail::VisibilitiesToSubgrids(
-      n_threads_, image_size_, w_step_, padded_grid_size_, frequencies_, uvw_,
-      visibilities, taper_, subgrid_metadata_, subgrids_);
-  detail::ApplySubgridFFT(n_threads_, subgrids_, FFTW_BACKWARD);
-  detail::SubgridsToTiles(n_threads_, padded_tile_size_, padded_grid_size_,
+  detail::VisibilitiesToSubgrids(image_size_, w_step_, padded_grid_size_,
+                                 frequencies_, uvw_, visibilities, taper_,
+                                 subgrid_metadata_, subgrids_);
+  detail::ApplySubgridFFT(subgrids_, FFTW_BACKWARD);
+  detail::SubgridsToTiles(padded_tile_size_, padded_grid_size_,
                           subgrid_metadata_, subgrids_, tiles_);
-  detail::TilesToGrid(n_threads_, image_size_, w_step_, padded_tile_size_,
+  detail::TilesToGrid(NThreads(), image_size_, w_step_, padded_tile_size_,
                       wpadded_tile_size_, padded_grid_size_, tile_metadata_,
                       tiles_, padded_grid_);
 }
 
 xt::xtensor<float, 2> Imager::GetImage() {
   xt::xtensor<std::complex<float>, 2> padded_image(padded_grid_.shape());
-  detail::ApplyGridFFT(n_threads_, padded_grid_, padded_image, FFTW_FORWARD);
+  detail::ApplyGridFFT(padded_grid_, padded_image, FFTW_FORWARD);
   xt::xtensor<float, 2> image({grid_size_, grid_size_}, 0.0f);
 
   xt::xtensor<float, 1> taper =
@@ -824,20 +830,26 @@ xt::xtensor<float, 2> Imager::GetImage() {
   const size_t x_start = y_start;
   const size_t x_end = y_end;
 
-  aocommon::StaticFor<size_t> loop(n_threads_);
-  loop.Run(y_start, y_end, [&](size_t y, size_t /*thread*/) {
-    for (size_t x = x_start; x < x_end; ++x) {
-      const size_t y_src = (y + (padded_grid_size_ / 2)) % padded_grid_size_;
-      const size_t x_src = (x + (padded_grid_size_ / 2)) % padded_grid_size_;
-      const size_t y_dst = y - y_start;
-      const size_t x_dst = x - x_start;
-      image(y_dst, x_dst) = padded_image(y_src, x_src).real() *
-                            inverse_taper[y_dst] * inverse_taper[x_dst];
+  aocommon::StaticFor<size_t> loop;
+  loop.Run(y_start, y_end, [&](size_t start, size_t end, size_t) {
+    for (size_t y = start; y < end; ++y) {
+      for (size_t x = x_start; x < x_end; ++x) {
+        const size_t y_src = (y + (padded_grid_size_ / 2)) % padded_grid_size_;
+        const size_t x_src = (x + (padded_grid_size_ / 2)) % padded_grid_size_;
+        const size_t y_dst = y - y_start;
+        const size_t x_dst = x - x_start;
+        image(y_dst, x_dst) = padded_image(y_src, x_src).real() *
+                              inverse_taper[y_dst] * inverse_taper[x_dst];
+      }
     }
   });
 
   return image;
 }
 
+size_t Imager::NThreads() const {
+  return aocommon::ThreadPool::GetInstance().NThreads();
+}
+
 }  // namespace idg
 }  // namespace dp3
diff --git a/idg/Imager.h b/idg/Imager.h
index 6a88ae7f1e4b9db614be4659c6da5db0625066d7..de42a02405555a2849ccdc815cee54e530b29517 100644
--- a/idg/Imager.h
+++ b/idg/Imager.h
@@ -32,7 +32,7 @@ namespace idg {
 
 class Imager {
  public:
-  Imager(size_t n_threads, size_t grid_size);
+  Imager(size_t grid_size);
 
   void Initialize(const xt::xtensor<double, 1>& frequencies,
                   const xt::xtensor<double, 2>& uvw,
@@ -43,13 +43,14 @@ class Imager {
   xt::xtensor<float, 2> GetImage();
 
  private:
+  size_t NThreads() const;
+
   const float kPadding = 1.20;
   const float kWKernelSize = 8.0;      // in pixels
   const float kTaperKernelSize = 7.0;  // in pixels
   const size_t kTileSize = 128;
   const bool kApplyWeighting = false;
 
-  size_t n_threads_;
   size_t grid_size_;
   size_t padded_grid_size_;
   float image_size_;
diff --git a/steps/IDGImage.cc b/steps/IDGImage.cc
index 8f64643954d242d3e694643fb81372edc20870c3..4d4eb9d066c04023516754eddc489e3a0373defc 100644
--- a/steps/IDGImage.cc
+++ b/steps/IDGImage.cc
@@ -29,7 +29,7 @@ void IDGImage::updateInfo(const base::DPInfo& info) {
   n_channels_ = info.nchan();
   const std::vector<double>& frequencies = info.chanFreqs();
   frequencies_ = xt::adapt(frequencies, {frequencies.size()});
-  imager_ = std::make_unique<idg::Imager>(info.nThreads(), grid_size_);
+  imager_ = std::make_unique<idg::Imager>(grid_size_);
 }
 
 void IDGImage::finish() {