diff --git a/idg/Imager.cc b/idg/Imager.cc index 98da748c47a296bc313cd647fc05f2a0ffe4446b..c86a4739e065a155e3b87d801bb0e3c3ec05de83 100644 --- a/idg/Imager.cc +++ b/idg/Imager.cc @@ -97,8 +97,7 @@ xt::xtensor<T, 2> ComputeTaper(size_t size) { return taper; } -void WeighVisibilities(size_t n_threads, - const xt::xtensor<float, 2>& weight_map, +void WeighVisibilities(const xt::xtensor<float, 2>& weight_map, xt::xtensor<float, 3>& uv_pixels, xt::xtensor<std::complex<float>, 3>& visibilities) { const size_t n_baselines = visibilities.shape()[0]; @@ -108,13 +107,15 @@ void WeighVisibilities(size_t n_threads, assert(uv_pixels.shape()[1] == n_baselines); assert(uv_pixels.shape()[2] == n_channels); - aocommon::StaticFor<size_t> loop(n_threads); - loop.Run(0, n_baselines, [&](size_t bl, size_t /*thread*/) { - for (size_t chan = 0; chan < n_channels; ++chan) { - for (size_t cor = 0; cor < n_correlations; ++cor) { - const size_t x = roundf(uv_pixels(0, bl, chan)); - const size_t y = roundf(uv_pixels(1, bl, chan)); - visibilities(bl, chan, cor) *= weight_map(y, x); + aocommon::StaticFor<size_t> loop; + loop.Run(0, n_baselines, [&](size_t start, size_t end, size_t) { + for (size_t bl = start; bl < end; bl++) { + for (size_t chan = 0; chan < n_channels; ++chan) { + for (size_t cor = 0; cor < n_correlations; ++cor) { + const size_t x = roundf(uv_pixels(0, bl, chan)); + const size_t y = roundf(uv_pixels(1, bl, chan)); + visibilities(bl, chan, cor) *= weight_map(y, x); + } } } }); @@ -136,7 +137,7 @@ float ComputeN(float l, float m) { } void VisibilitiesToSubgrids( - size_t n_threads, float image_size, + float image_size, float w_step, // in lambda size_t grid_size, const xt::xtensor<double, 1>& frequencies, const xt::xtensor<double, 2>& uvw, @@ -160,65 +161,67 @@ void VisibilitiesToSubgrids( const xt::xtensor<float, 1> wavenumbers = ComputeWavenumbers(frequencies); - aocommon::StaticFor<size_t> loop(n_threads); - loop.Run(0, n_baselines, [&](size_t bl, size_t /*thread*/) { - // Load metadata - const SubgridMetadata m = metadata(bl); - const float w_offset_in_lambda = w_step * (-m.tile_z + 0.5f); - - // Compute u and v offset in wavelenghts - const float u_offset = - (m.subgrid_x + subgrid_size / 2.0f - grid_size / 2.0f) * - (2 * M_PI / image_size); - const float v_offset = - (m.subgrid_y + subgrid_size / 2.0f - grid_size / 2.0f) * - (2 * M_PI / image_size); - const float w_offset = 2 * M_PI * w_offset_in_lambda; - - // Iterate all pixels in subgrid - for (size_t y = 0; y < subgrid_size; ++y) { - for (size_t x = 0; x < subgrid_size; ++x) { - // Compute l,m,n for phase offset and phase index calculation. - const float l_offset = ComputeL(x, subgrid_size, image_size); - const float m_offset = ComputeM(y, subgrid_size, image_size); - const float n_offset = ComputeN(l_offset, m_offset); - - // Compute phase offset - const float phase_offset = - u_offset * l_offset + v_offset * m_offset + w_offset * n_offset; - - // Initialize pixel (XX and YY) - std::complex<float> pixel = {0, 0}; - - // Load UVW coordinate - const float u = static_cast<float>(uvw(0, bl)); - const float v = static_cast<float>(uvw(1, bl)); - const float w = static_cast<float>(uvw(2, bl)); - - // Compute phase index - const float phase_index = u * l_offset + v * m_offset + w * n_offset; - - // Update pixel for every channel - for (size_t chan = 0; chan < n_channels; ++chan) { - const float phase = phase_offset - (phase_index * wavenumbers[chan]); - const std::complex<float> phasor = {cosf(phase), sinf(phase)}; - pixel += - (visibilities(bl, chan, 0) + visibilities(bl, chan, 1)) * phasor; - } // end for channel - - // Compute shifted position in subgrid - const size_t x_dst = (x + (subgrid_size / 2)) % subgrid_size; - const size_t y_dst = (y + (subgrid_size / 2)) % subgrid_size; - - // Copmute Stokes I pixel - subgrids(bl, y_dst, x_dst) = pixel * 0.5f * taper(y, x); - } // end for x - } // end for y - }); // end for bl + aocommon::StaticFor<size_t> loop; + loop.Run(0, n_baselines, [&](size_t start, size_t end, size_t) { + for (size_t bl = start; bl < end; bl++) { + // Load metadata + const SubgridMetadata m = metadata(bl); + const float w_offset_in_lambda = w_step * (-m.tile_z + 0.5f); + + // Compute u and v offset in wavelenghts + const float u_offset = + (m.subgrid_x + subgrid_size / 2.0f - grid_size / 2.0f) * + (2 * M_PI / image_size); + const float v_offset = + (m.subgrid_y + subgrid_size / 2.0f - grid_size / 2.0f) * + (2 * M_PI / image_size); + const float w_offset = 2 * M_PI * w_offset_in_lambda; + + // Iterate all pixels in subgrid + for (size_t y = 0; y < subgrid_size; ++y) { + for (size_t x = 0; x < subgrid_size; ++x) { + // Compute l,m,n for phase offset and phase index calculation. + const float l_offset = ComputeL(x, subgrid_size, image_size); + const float m_offset = ComputeM(y, subgrid_size, image_size); + const float n_offset = ComputeN(l_offset, m_offset); + + // Compute phase offset + const float phase_offset = + u_offset * l_offset + v_offset * m_offset + w_offset * n_offset; + + // Initialize pixel (XX and YY) + std::complex<float> pixel = {0, 0}; + + // Load UVW coordinate + const float u = static_cast<float>(uvw(0, bl)); + const float v = static_cast<float>(uvw(1, bl)); + const float w = static_cast<float>(uvw(2, bl)); + + // Compute phase index + const float phase_index = u * l_offset + v * m_offset + w * n_offset; + + // Update pixel for every channel + for (size_t chan = 0; chan < n_channels; ++chan) { + const float phase = + phase_offset - (phase_index * wavenumbers[chan]); + const std::complex<float> phasor = {cosf(phase), sinf(phase)}; + pixel += (visibilities(bl, chan, 0) + visibilities(bl, chan, 1)) * + phasor; + } // end for channel + + // Compute shifted position in subgrid + const size_t x_dst = (x + (subgrid_size / 2)) % subgrid_size; + const size_t y_dst = (y + (subgrid_size / 2)) % subgrid_size; + + // Copmute Stokes I pixel + subgrids(bl, y_dst, x_dst) = pixel * 0.5f * taper(y, x); + } // end for x + } // end for y + } + }); // end for bl } // end VisibilitiesToSubgrids -void ApplySubgridFFT(size_t n_threads, - xt::xtensor<std::complex<float>, 3>& subgrids, int sign) { +void ApplySubgridFFT(xt::xtensor<std::complex<float>, 3>& subgrids, int sign) { const size_t n_subgrids = subgrids.shape()[0]; const size_t size = subgrids.shape()[1]; fftwf_plan plan; @@ -226,16 +229,18 @@ void ApplySubgridFFT(size_t n_threads, fftwf_complex* out = in; plan = fftwf_plan_dft_2d(size, size, in, out, sign, FFTW_ESTIMATE); - aocommon::StaticFor<size_t> loop(n_threads); - loop.Run(0, n_subgrids, [&](size_t i, size_t /*thread*/) { - fftwf_complex* subgrid = - reinterpret_cast<fftwf_complex*>(&subgrids(i, 0, 0)); - fftwf_execute_dft(plan, subgrid, subgrid); - const float scale = - 1.0f / (static_cast<float>(size) * static_cast<float>(size)); - for (size_t y = 0; y < size; ++y) { - for (size_t x = 0; x < size; ++x) { - subgrids(i, y, x) *= scale; + aocommon::StaticFor<size_t> loop; + loop.Run(0, n_subgrids, [&](size_t start, size_t end, size_t) { + for (size_t s = start; s < end; s++) { + fftwf_complex* subgrid = + reinterpret_cast<fftwf_complex*>(&subgrids(s, 0, 0)); + fftwf_execute_dft(plan, subgrid, subgrid); + const float scale = + 1.0f / (static_cast<float>(size) * static_cast<float>(size)); + for (size_t y = 0; y < size; ++y) { + for (size_t x = 0; x < size; ++x) { + subgrids(s, y, x) *= scale; + } } } }); @@ -243,25 +248,21 @@ void ApplySubgridFFT(size_t n_threads, fftwf_destroy_plan(plan); } // end ApplySubgridFFT -void SubgridsToGrid(size_t n_threads, size_t subgrid_size, size_t grid_size, +void SubgridsToGrid(size_t subgrid_size, size_t grid_size, const xt::xtensor<SubgridMetadata, 1>& metadata, const xt::xtensor<std::complex<float>, 3>& subgrids, xt::xtensor<std::complex<float>, 2>& grid) { - aocommon::StaticFor<size_t> loop(n_threads); + aocommon::StaticFor<size_t> loop; const size_t nr_subgrids = metadata.size(); - loop.Run(0, n_threads, [&](size_t thread, size_t /*thread*/) { - // Iterate over subgrid rows, starting at a row that belongs to this - // thread and stepping by the number of threads + loop.Run(0, subgrid_size, [&](size_t start, size_t end, size_t) { for (size_t s = 0; s < nr_subgrids; s++) { const SubgridMetadata m = metadata[s]; const size_t subgrid_y = m.subgrid_y; const size_t subgrid_x = m.subgrid_x; - size_t start_y = - (n_threads - (subgrid_y % n_threads) + thread) % n_threads; - for (size_t y = start_y; y < subgrid_size; y += n_threads) { + for (size_t y = start; y < end; y++) { for (size_t x = 0; x < subgrid_size; x++) { const size_t y_dst = (subgrid_y + y + grid_size / 2) % grid_size; const size_t x_dst = (subgrid_x + y + grid_size / 2) % grid_size; @@ -272,14 +273,14 @@ void SubgridsToGrid(size_t n_threads, size_t subgrid_size, size_t grid_size, }); } -void SubgridsToTiles(size_t n_threads, size_t tile_size, size_t grid_size, +void SubgridsToTiles(size_t tile_size, size_t grid_size, const xt::xtensor<SubgridMetadata, 1>& metadata, const xt::xtensor<std::complex<float>, 3>& subgrids, xt::xtensor<std::complex<float>, 3>& tiles) { const size_t subgrid_size = subgrids.shape()[1]; assert(subgrid_size == subgrids.shape()[2]); - aocommon::StaticFor<size_t> loop(n_threads); + aocommon::StaticFor<size_t> loop; // Precompute phasor xt::xtensor<float, 2> phase({subgrid_size, subgrid_size}); @@ -287,23 +288,25 @@ void SubgridsToTiles(size_t n_threads, size_t tile_size, size_t grid_size, xt::xtensor<float, 2> phasor_imag({subgrid_size, subgrid_size}); assert(phase.shape()[1] == subgrid_size); - loop.Run(0, subgrid_size, [&](size_t y, size_t /*thread*/) { - for (size_t x = 0; x < subgrid_size; ++x) { - phase(y, x) = - M_PI * - (static_cast<float>(x) + static_cast<float>(y) - subgrid_size) / - subgrid_size; - } + loop.Run(0, subgrid_size, [&](size_t start, size_t end, size_t) { + for (size_t y = start; y < end; y++) { + for (size_t x = 0; x < subgrid_size; ++x) { + phase(y, x) = + M_PI * + (static_cast<float>(x) + static_cast<float>(y) - subgrid_size) / + subgrid_size; + } - for (size_t x = 0; x < subgrid_size; ++x) { - phasor_real(y, x) = cosf(phase(y, x)); - phasor_imag(y, x) = sinf(phase(y, x)); + for (size_t x = 0; x < subgrid_size; ++x) { + phasor_real(y, x) = cosf(phase(y, x)); + phasor_imag(y, x) = sinf(phase(y, x)); + } } }); // end for y const size_t n_baselines = subgrids.shape()[0]; - loop.Run(0, n_threads, [&](size_t, size_t thread) { + loop.Run(0, subgrid_size, [&](size_t start, size_t end, size_t) { for (size_t bl = 0; bl < n_baselines; ++bl) { const SubgridMetadata m = metadata[bl]; @@ -312,11 +315,8 @@ void SubgridsToTiles(size_t n_threads, size_t tile_size, size_t grid_size, const size_t subgrid_x = m.subgrid_x - m.tile_x; const size_t subgrid_y = m.subgrid_y - m.tile_y; - // Iterate over subgrid rows, starting at a row that belongs to this - // thread and stepping by the number of threads - size_t start_y = - (n_threads - (subgrid_y % n_threads) + thread) % n_threads; - for (size_t y = start_y; y < subgrid_size; y += n_threads) { + // Iterate over subgrid rows + for (size_t y = start; y < end; y++) { // Iterate all columns of subgrid for (size_t x = 0; x < subgrid_size; ++x) { // Compute position in subgrid @@ -440,32 +440,37 @@ void ApplyGridFFT(size_t n_threads, xt::xtensor<std::complex<float>, 2>& in, fftwf_plan plan = fftwf_plan_dft_1d(size, nullptr, nullptr, sign, FFTW_ESTIMATE); - aocommon::StaticFor<size_t> loop(n_threads); + aocommon::StaticFor<size_t> loop; xt::xtensor<std::complex<float>, 2> temp({size, size}, 0); // FFT over rows - loop.Run(0, size, [&](size_t i, size_t /*thread*/) { - fftwf_complex* ptr = reinterpret_cast<fftwf_complex*>(&in(i, 0)); - fftwf_execute_dft(plan, ptr, ptr); + loop.Run(0, size, [&](size_t start, size_t end, size_t) { + for (size_t y = start; y < end; y++) { + fftwf_complex* ptr = reinterpret_cast<fftwf_complex*>(&in(y, 0)); + + fftwf_execute_dft(plan, ptr, ptr); + } }); // Iterate all columns - loop.Run(0, size, [&](size_t x, size_t /*thread*/) { + loop.Run(0, size, [&](size_t start, size_t end, size_t) { xt::xtensor<std::complex<float>, 1> temp({size}, 0); - // Copy column - for (size_t y = 0; y < size; ++y) { - temp(y) = in(y, x); - } + for (size_t x = start; x < end; x++) { + // Copy column + for (size_t y = 0; y < size; ++y) { + temp(y) = in(y, x); + } - // FFT over column - fftwf_complex* ptr = reinterpret_cast<fftwf_complex*>(temp.data()); - fftwf_execute_dft(plan, ptr, ptr); + // FFT over column + fftwf_complex* ptr = reinterpret_cast<fftwf_complex*>(temp.data()); + fftwf_execute_dft(plan, ptr, ptr); - // Store the result - for (size_t y = 0; y < size; ++y) { - out(y, x) = temp(y); + // Store the result + for (size_t y = 0; y < size; ++y) { + out(y, x) = temp(y); + } } }); @@ -502,8 +507,8 @@ void ApplyPhasor(size_t tile_idx, float image_size, float w_step, float tile_z, } // end for y } // end ApplyPhasor -void PaddedTilesToGrid(aocommon::StaticFor<size_t>& loop, size_t n_tiles, - size_t tile_offset, +void PaddedTilesToGrid(size_t n_threads, aocommon::StaticFor<size_t>& loop, + size_t n_tiles, size_t tile_offset, const xt::xtensor<TileMetadata, 1>& metadata, const xt::xtensor<std::complex<float>, 3>& tiles, xt::xtensor<std::complex<float>, 2>& grid) { @@ -511,9 +516,8 @@ void PaddedTilesToGrid(aocommon::StaticFor<size_t>& loop, size_t n_tiles, assert(tile_size == tiles.shape()[2]); const size_t grid_size = grid.shape()[0]; assert(grid_size == grid.shape()[1]); - const size_t n_threads = loop.NThreads(); - loop.Run(0, n_threads, [&](size_t thread, size_t) { + loop.Run(0, n_threads, [&](size_t, size_t, size_t thread) { for (size_t i = 0; i < n_tiles; ++i) { const TileMetadata m = metadata[tile_offset + i]; const size_t x0 = m.tile_x; @@ -554,7 +558,7 @@ void TilesToGrid(size_t n_threads, float image_size, float w_step, fftwf_plan plan_backward = fftwf_plan_dft_1d( padded_tile_size, nullptr, nullptr, FFTW_BACKWARD, FFTW_ESTIMATE); - aocommon::StaticFor<size_t> loop(n_threads); + aocommon::StaticFor<size_t> loop; const size_t n_tiles = tiles.shape()[0]; size_t current_n_tiles = 0; @@ -563,7 +567,8 @@ void TilesToGrid(size_t n_threads, float image_size, float w_step, current_n_tiles = std::min(n_threads, n_tiles - tile_offset); loop.Run(tile_offset, tile_offset + current_n_tiles, - [&](size_t tile_id, size_t thread) { + [&](size_t, size_t, size_t thread) { + const size_t tile_id = tile_offset + thread; const TileMetadata m = metadata[tile_id]; const size_t padded_tile_id = thread; ZeroTile(padded_tile_id, padded_tiles); @@ -576,7 +581,7 @@ void TilesToGrid(size_t n_threads, float image_size, float w_step, }); // Add current batch of tiles to grid - PaddedTilesToGrid(loop, current_n_tiles, tile_offset, metadata, + PaddedTilesToGrid(n_threads, loop, current_n_tiles, tile_offset, metadata, padded_tiles, grid); } // end for tile_offset } // end TilesToGrid @@ -585,8 +590,7 @@ void TilesToGrid(size_t n_threads, float image_size, float w_step, namespace dp3 { namespace idg { -Imager::Imager(size_t n_threads, size_t grid_size) - : n_threads_(n_threads), grid_size_(grid_size) { +Imager::Imager(size_t grid_size) : grid_size_(grid_size) { if (kApplyWeighting) { weight_map_ = xt::xtensor<float, 2>({grid_size, grid_size}, 0.0f); } @@ -632,66 +636,69 @@ void Imager::Initialize(const xt::xtensor<double, 1>& frequencies, // Initialize per baseline metadata subgrid_metadata_.resize({n_baselines}); uv_pixels_.resize({2, n_baselines, n_channels}); - aocommon::StaticFor<size_t> loop(1); - loop.Run(0, n_baselines, [&](size_t bl, size_t /*thread*/) { - const double u_meters = uvw(bl, 0); - const double v_meters = uvw(bl, 1); - const double w_meters = uvw(bl, 2); - - // Compute u and v in pixels - for (size_t chan = 0; chan < n_channels; ++chan) { - uv_pixels_(0, bl, chan) = - u_meters * image_size_ * (frequencies[chan] / detail::c()); - uv_pixels_(1, bl, chan) = - v_meters * image_size_ * (frequencies[chan] / detail::c()); - } + aocommon::StaticFor<size_t> loop; + loop.Run(0, n_baselines, [&](size_t start, size_t end, size_t) { + for (size_t bl = start; bl < end; bl++) { + const double u_meters = uvw(bl, 0); + const double v_meters = uvw(bl, 1); + const double w_meters = uvw(bl, 2); + + // Compute u and v in pixels + for (size_t chan = 0; chan < n_channels; ++chan) { + uv_pixels_(0, bl, chan) = + u_meters * image_size_ * (frequencies[chan] / detail::c()); + uv_pixels_(1, bl, chan) = + v_meters * image_size_ * (frequencies[chan] / detail::c()); + } - // W in lambda - const float w_lambda = w_meters * (frequencies[0] / detail::c()) * w_step_; - - // Compute min and max u and v values - const float u0 = uv_pixels_(0, bl, 0); - const float u1 = uv_pixels_(0, bl, n_channels - 1); - const float v0 = uv_pixels_(1, bl, 0); - const float v1 = uv_pixels_(1, bl, n_channels - 1); - const float u_min = std::min(u0, u1); - const float u_max = std::max(u0, u1); - const float v_min = std::min(v0, v1); - const float v_max = std::max(v0, v1); - - // Middle point of subgrid in pixels - int subgrid_x = roundf((u_max + u_min) / 2); - int subgrid_y = roundf((v_max + v_min) / 2); - - // Shift middle of subgrid from middle of grid to top left - subgrid_x += (grid_size_ / 2); - subgrid_y += (grid_size_ / 2); - - // Shift from middle of subgrid to top left of subgrid - subgrid_x -= (subgrid_size_ / 2); - subgrid_y -= (subgrid_size_ / 2); - - // Compute tile indices - size_t tile_x = subgrid_x / kTileSize; - size_t tile_y = subgrid_y / kTileSize; - const size_t tile_z = -floorf(w_lambda); - const size_t tile_index = - tile_z * n_tiles_xy * n_tiles_xy + tile_y * n_tiles_xy + tile_x; - - // Compute top left padded tile coordinate - tile_x *= kTileSize; - tile_y *= kTileSize; - - // Store metadata - detail::SubgridMetadata metadata = { - .subgrid_x = static_cast<size_t>(subgrid_x), - .subgrid_y = static_cast<size_t>(subgrid_y), - .tile_x = tile_x, - .tile_y = tile_y, - .tile_z = tile_z, - .tile_index = tile_index}; - - subgrid_metadata_[bl] = metadata; + // W in lambda + const float w_lambda = + w_meters * (frequencies[0] / detail::c()) * w_step_; + + // Compute min and max u and v values + const float u0 = uv_pixels_(0, bl, 0); + const float u1 = uv_pixels_(0, bl, n_channels - 1); + const float v0 = uv_pixels_(1, bl, 0); + const float v1 = uv_pixels_(1, bl, n_channels - 1); + const float u_min = std::min(u0, u1); + const float u_max = std::max(u0, u1); + const float v_min = std::min(v0, v1); + const float v_max = std::max(v0, v1); + + // Middle point of subgrid in pixels + int subgrid_x = roundf((u_max + u_min) / 2); + int subgrid_y = roundf((v_max + v_min) / 2); + + // Shift middle of subgrid from middle of grid to top left + subgrid_x += (grid_size_ / 2); + subgrid_y += (grid_size_ / 2); + + // Shift from middle of subgrid to top left of subgrid + subgrid_x -= (subgrid_size_ / 2); + subgrid_y -= (subgrid_size_ / 2); + + // Compute tile indices + size_t tile_x = subgrid_x / kTileSize; + size_t tile_y = subgrid_y / kTileSize; + const size_t tile_z = -floorf(w_lambda); + const size_t tile_index = + tile_z * n_tiles_xy * n_tiles_xy + tile_y * n_tiles_xy + tile_x; + + // Compute top left padded tile coordinate + tile_x *= kTileSize; + tile_y *= kTileSize; + + // Store metadata + detail::SubgridMetadata metadata = { + .subgrid_x = static_cast<size_t>(subgrid_x), + .subgrid_y = static_cast<size_t>(subgrid_y), + .tile_x = tile_x, + .tile_y = tile_y, + .tile_z = tile_z, + .tile_index = tile_index}; + + subgrid_metadata_[bl] = metadata; + } }); // Initialize weight map @@ -792,23 +799,22 @@ void Imager::Invert(xt::xtensor<std::complex<float>, 3>& visibilities) { assert(n_channels == frequencies_.size()); assert(n_correlations == 2); if (kApplyWeighting) { - detail::WeighVisibilities(n_threads_, weight_map_, uv_pixels_, - visibilities); + detail::WeighVisibilities(weight_map_, uv_pixels_, visibilities); } - detail::VisibilitiesToSubgrids( - n_threads_, image_size_, w_step_, padded_grid_size_, frequencies_, uvw_, - visibilities, taper_, subgrid_metadata_, subgrids_); - detail::ApplySubgridFFT(n_threads_, subgrids_, FFTW_BACKWARD); - detail::SubgridsToTiles(n_threads_, padded_tile_size_, padded_grid_size_, + detail::VisibilitiesToSubgrids(image_size_, w_step_, padded_grid_size_, + frequencies_, uvw_, visibilities, taper_, + subgrid_metadata_, subgrids_); + detail::ApplySubgridFFT(subgrids_, FFTW_BACKWARD); + detail::SubgridsToTiles(padded_tile_size_, padded_grid_size_, subgrid_metadata_, subgrids_, tiles_); - detail::TilesToGrid(n_threads_, image_size_, w_step_, padded_tile_size_, + detail::TilesToGrid(NThreads(), image_size_, w_step_, padded_tile_size_, wpadded_tile_size_, padded_grid_size_, tile_metadata_, tiles_, padded_grid_); } xt::xtensor<float, 2> Imager::GetImage() { xt::xtensor<std::complex<float>, 2> padded_image(padded_grid_.shape()); - detail::ApplyGridFFT(n_threads_, padded_grid_, padded_image, FFTW_FORWARD); + detail::ApplyGridFFT(padded_grid_, padded_image, FFTW_FORWARD); xt::xtensor<float, 2> image({grid_size_, grid_size_}, 0.0f); xt::xtensor<float, 1> taper = @@ -824,20 +830,26 @@ xt::xtensor<float, 2> Imager::GetImage() { const size_t x_start = y_start; const size_t x_end = y_end; - aocommon::StaticFor<size_t> loop(n_threads_); - loop.Run(y_start, y_end, [&](size_t y, size_t /*thread*/) { - for (size_t x = x_start; x < x_end; ++x) { - const size_t y_src = (y + (padded_grid_size_ / 2)) % padded_grid_size_; - const size_t x_src = (x + (padded_grid_size_ / 2)) % padded_grid_size_; - const size_t y_dst = y - y_start; - const size_t x_dst = x - x_start; - image(y_dst, x_dst) = padded_image(y_src, x_src).real() * - inverse_taper[y_dst] * inverse_taper[x_dst]; + aocommon::StaticFor<size_t> loop; + loop.Run(y_start, y_end, [&](size_t start, size_t end, size_t) { + for (size_t y = start; y < end; ++y) { + for (size_t x = x_start; x < x_end; ++x) { + const size_t y_src = (y + (padded_grid_size_ / 2)) % padded_grid_size_; + const size_t x_src = (x + (padded_grid_size_ / 2)) % padded_grid_size_; + const size_t y_dst = y - y_start; + const size_t x_dst = x - x_start; + image(y_dst, x_dst) = padded_image(y_src, x_src).real() * + inverse_taper[y_dst] * inverse_taper[x_dst]; + } } }); return image; } +size_t Imager::NThreads() const { + return aocommon::ThreadPool::GetInstance().NThreads(); +} + } // namespace idg } // namespace dp3 diff --git a/idg/Imager.h b/idg/Imager.h index 6a88ae7f1e4b9db614be4659c6da5db0625066d7..de42a02405555a2849ccdc815cee54e530b29517 100644 --- a/idg/Imager.h +++ b/idg/Imager.h @@ -32,7 +32,7 @@ namespace idg { class Imager { public: - Imager(size_t n_threads, size_t grid_size); + Imager(size_t grid_size); void Initialize(const xt::xtensor<double, 1>& frequencies, const xt::xtensor<double, 2>& uvw, @@ -43,13 +43,14 @@ class Imager { xt::xtensor<float, 2> GetImage(); private: + size_t NThreads() const; + const float kPadding = 1.20; const float kWKernelSize = 8.0; // in pixels const float kTaperKernelSize = 7.0; // in pixels const size_t kTileSize = 128; const bool kApplyWeighting = false; - size_t n_threads_; size_t grid_size_; size_t padded_grid_size_; float image_size_; diff --git a/steps/IDGImage.cc b/steps/IDGImage.cc index 8f64643954d242d3e694643fb81372edc20870c3..4d4eb9d066c04023516754eddc489e3a0373defc 100644 --- a/steps/IDGImage.cc +++ b/steps/IDGImage.cc @@ -29,7 +29,7 @@ void IDGImage::updateInfo(const base::DPInfo& info) { n_channels_ = info.nchan(); const std::vector<double>& frequencies = info.chanFreqs(); frequencies_ = xt::adapt(frequencies, {frequencies.size()}); - imager_ = std::make_unique<idg::Imager>(info.nThreads(), grid_size_); + imager_ = std::make_unique<idg::Imager>(grid_size_); } void IDGImage::finish() {