Commit 88a07700 authored by Bram Veenboer's avatar Bram Veenboer

Merge branch 'fix-interface' into 'master'

Fix Proxy interface

See merge request !54
parents 5fde8334 fdba5bf0
Pipeline #8178 canceled with stages
......@@ -73,13 +73,15 @@ BufferSetImpl::BufferSetImpl(Type architecture)
m_avg_aterm_correction(0, 0, 0, 0),
m_grid(new Grid(0, 0, 0, 0)),
m_proxy(create_proxy(architecture)),
m_shift{0.0},
m_shift(3),
m_get_image_watch(Stopwatch::create()),
m_set_image_watch(Stopwatch::create()),
m_avg_beam_watch(Stopwatch::create()),
m_plan_watch(Stopwatch::create()),
m_gridding_watch(Stopwatch::create()),
m_degridding_watch(Stopwatch::create()) {}
m_degridding_watch(Stopwatch::create()) {
m_shift.zero();
}
BufferSetImpl::~BufferSetImpl() {
// Free all objects allocated via the proxy before destroying the proxy.
......@@ -241,9 +243,9 @@ void BufferSetImpl::init(size_t size, float cell_size, float max_w,
std::cout << "nr_w_layers: " << nr_w_layers << std::endl;
#endif
m_shift[0] = shiftl;
m_shift[1] = shiftm;
m_shift[2] = shiftp;
m_shift(0) = shiftl;
m_shift(1) = shiftm;
m_shift(2) = shiftp;
m_kernel_size = taper_kernel_size + w_kernel_size + a_term_kernel_size;
......@@ -268,8 +270,8 @@ void BufferSetImpl::init(size_t size, float cell_size, float max_w,
m_grid.reset(new Grid(nr_w_layers, 4, m_padded_size, m_padded_size));
m_grid->zero();
m_proxy->set_grid(m_grid, m_subgridsize, m_image_size, m_w_step,
m_shift.data());
m_proxy->set_grid(m_grid);
m_proxy->init_wtiles(m_subgridsize);
m_taper_subgrid.resize(m_subgridsize);
m_taper_grid.resize(m_padded_size);
......@@ -585,6 +587,7 @@ void BufferSetImpl::get_image(double* image) {
m_get_image_watch->Start();
// Flush all pending operations on the grid
m_proxy->flush_wtiles(m_subgridsize, m_image_size, m_w_step, m_shift);
m_proxy->get_grid();
double runtime = -omp_get_wtime();
......
......@@ -76,7 +76,7 @@ class BufferSetImpl : public virtual BufferSet {
float get_cell_size() const { return m_cell_size; }
float get_w_step() const { return m_w_step; }
const std::array<float, 3>& get_shift() const { return m_shift; }
const idg::Array1D<float>& get_shift() const { return m_shift; }
float get_kernel_size() const { return m_kernel_size; }
const Array2D<float>& get_spheroidal() const { return m_spheroidal; }
const std::shared_ptr<Grid>& get_grid() const { return m_grid; }
......@@ -116,7 +116,7 @@ class BufferSetImpl : public virtual BufferSet {
float m_image_size;
float m_cell_size;
float m_w_step;
std::array<float, 3> m_shift;
Array1D<float> m_shift;
size_t m_size;
size_t m_padded_size;
float m_kernel_size;
......
......@@ -249,8 +249,7 @@ void run() {
// Iterate all cycles
for (unsigned cycle = 0; cycle < nr_cycles; cycle++) {
// Set grid
float w_step = 0.0;
proxy.set_grid(grid, subgrid_size, image_size, w_step, shift.data());
proxy.set_grid(grid);
// Iterate all time blocks
for (unsigned time_offset = 0; time_offset < total_nr_timesteps;
......@@ -329,8 +328,8 @@ void run() {
clog << ">>> Run fft" << endl;
double runtime_fft = -omp_get_wtime();
if (!disable_fft) {
proxy.transform(idg::FourierDomainToImageDomain, *grid);
proxy.transform(idg::ImageDomainToFourierDomain, *grid);
proxy.transform(idg::FourierDomainToImageDomain);
proxy.transform(idg::ImageDomainToFourierDomain);
}
runtimes_fft.push_back(runtime_fft + omp_get_wtime());
clog << endl;
......
......@@ -449,7 +449,7 @@ void run_master() {
// Run FFT
runtimes_grid_fft[cycle] = -omp_get_wtime();
proxy.transform(idg::FourierDomainToImageDomain, *grid);
proxy.transform(idg::FourierDomainToImageDomain);
runtimes_grid_fft[cycle] += omp_get_wtime();
// Reduce grids
......@@ -477,7 +477,7 @@ void run_master() {
// Run FFT
runtimes_grid_fft[cycle] -= omp_get_wtime();
proxy.transform(idg::ImageDomainToFourierDomain, *grid);
proxy.transform(idg::ImageDomainToFourierDomain);
runtimes_grid_fft[cycle] += omp_get_wtime();
}
runtime_imaging += omp_get_wtime();
......@@ -626,7 +626,7 @@ void run_worker() {
grid = proxy.get_grid();
// Run FFT
proxy.transform(idg::FourierDomainToImageDomain, *grid);
proxy.transform(idg::FourierDomainToImageDomain);
// Reduce grids
reduce_grids(grid, rank, world_size);
......@@ -640,7 +640,7 @@ void run_worker() {
proxy.set_grid(grid);
// Run FFT
proxy.transform(idg::ImageDomainToFourierDomain, *grid);
proxy.transform(idg::ImageDomainToFourierDomain);
// Subtract model visibilities
// not implemented
......
......@@ -84,9 +84,11 @@ def gridding(
p.gridding(
w_step, shift, cell_size, kernel_size, subgrid_size,
frequencies, visibilities, uvw, baselines,
grid, aterms, aterms_offsets, spheroidal)
aterms, aterms_offsets, spheroidal)
p.get_grid(grid)
util.plot_grid(grid, scaling='log')
p.transform(idg.FourierDomainToImageDomain, grid)
p.transform(idg.FourierDomainToImageDomain)
p.get_grid(grid)
util.plot_grid(grid)
#util.plot_grid(grid, pol=0)
......@@ -97,11 +99,11 @@ def gridding(
def degridding(
p, w_step, shift, cell_size, kernel_size, subgrid_size, frequencies, visibilities,
uvw, baselines, grid, aterms, aterms_offsets, spheroidal):
p.transform(idg.ImageDomainToFourierDomain, grid)
p.transform(idg.ImageDomainToFourierDomain)
p.degridding(
w_step, shift, cell_size, kernel_size, subgrid_size,
frequencies, visibilities, uvw, baselines,
grid, aterms, aterms_offsets, spheroidal)
aterms, aterms_offsets, spheroidal)
#util.plot_visibilities(visibilities)
......
......@@ -103,8 +103,7 @@ int test01() {
idg::proxy::cpu::Reference proxy;
// Set grid
float w_step = 0.0;
proxy.set_grid(grid, subgrid_size, image_size, w_step, shift.data());
proxy.set_grid(grid);
// Create plan
clog << ">>> Create plan" << endl;
......@@ -119,7 +118,7 @@ int test01() {
proxy.gridding(plan, w_offset, shift, cell_size, kernel_size, subgrid_size,
frequencies, visibilities_ref, uvw, baselines, aterms,
aterms_offsets, spheroidal);
proxy.transform(idg::FourierDomainToImageDomain, *grid);
proxy.transform(idg::FourierDomainToImageDomain);
float grid_error = get_accuracy(grid_size * grid_size * nr_correlations,
(std::complex<float> *)grid->data(),
......@@ -128,10 +127,11 @@ int test01() {
// Predict visibilities
clog << ">>> Predict visibilities" << endl;
proxy.transform(idg::ImageDomainToFourierDomain, *grid_ref);
proxy.set_grid(grid_ref);
proxy.transform(idg::ImageDomainToFourierDomain);
// Set reference grid
proxy.set_grid(grid_ref, subgrid_size, image_size, w_step, shift.data());
proxy.set_grid(grid_ref);
proxy.degridding(plan, w_offset, shift, cell_size, kernel_size, subgrid_size,
frequencies, visibilities, uvw, baselines, aterms,
......
......@@ -178,9 +178,8 @@ int compare_to_reference(float tol = 1000 *
}
// Bind the grids to the respective proxies
float w_step = 0.0;
optimized.set_grid(grid, subgrid_size, image_size, w_step, shift.data());
reference.set_grid(grid_ref, subgrid_size, image_size, w_step, shift.data());
optimized.set_grid(grid);
reference.set_grid(grid_ref);
// Set w-terms to zero
for (unsigned bl = 0; bl < nr_baselines; bl++) {
......@@ -200,14 +199,14 @@ int compare_to_reference(float tol = 1000 *
#if TEST_GRIDDING
// Run gridder
std::clog << ">>> Run gridding" << std::endl;
optimized.set_grid(grid, subgrid_size, image_size, w_step, shift.data());
optimized.set_grid(grid);
optimized.gridding(plan, w_offset, shift, cell_size, kernel_size,
subgrid_size, frequencies, visibilities, uvw, baselines,
aterms, aterms_offsets, spheroidal);
optimized.get_grid();
std::clog << ">>> Run reference gridding" << std::endl;
reference.set_grid(grid_ref, subgrid_size, image_size, w_step, shift.data());
reference.set_grid(grid_ref);
reference.gridding(plan, w_offset, shift, cell_size, kernel_size,
subgrid_size, frequencies, visibilities, uvw, baselines,
aterms, aterms_offsets, spheroidal);
......@@ -217,27 +216,25 @@ int compare_to_reference(float tol = 1000 *
grid->data(), grid_ref->data());
// Use the same grid for both degridding calls
reference.set_grid(optimized.get_grid(), subgrid_size, image_size, w_step,
shift.data());
reference.set_grid(optimized.get_grid());
#else
optimized.set_grid(grid, subgrid_size, image_size, w_step, shift.data());
reference.set_grid(grid, subgrid_size, image_size, w_step, shift.data());
optimized.set_grid(grid);
reference.set_grid(grid);
#endif
reference.set_grid(optimized.get_grid(), subgrid_size, image_size, w_step,
shift.data());
reference.set_grid(optimized.get_grid());
#if TEST_DEGRIDDING
// Run degridder
std::clog << ">>> Run degridding" << std::endl;
visibilities.zero();
visibilities_ref.zero();
optimized.set_grid(grid, subgrid_size, image_size, w_step, shift.data());
optimized.set_grid(grid);
optimized.degridding(plan, w_offset, shift, cell_size, kernel_size,
subgrid_size, frequencies, visibilities, uvw, baselines,
aterms, aterms_offsets, spheroidal);
std::clog << ">>> Run reference degridding" << std::endl;
reference.set_grid(grid, subgrid_size, image_size, w_step, shift.data());
reference.set_grid(grid);
reference.degridding(plan, w_offset, shift, cell_size, kernel_size,
subgrid_size, frequencies, visibilities_ref, uvw,
baselines, aterms, aterms_offsets, spheroidal);
......
......@@ -21,7 +21,7 @@ namespace cpu {
// Constructor
CPU::CPU(std::vector<std::string> libraries) : kernels(libraries) {
#if defined(DEBUG)
std::cout << __func__ << std::endl;
std::cout << "CPU::" << __func__ << std::endl;
#endif
powerSensor = get_power_sensor(sensor_host);
......@@ -31,7 +31,7 @@ CPU::CPU(std::vector<std::string> libraries) : kernels(libraries) {
// Destructor
CPU::~CPU() {
#if defined(DEBUG)
std::cout << __func__ << std::endl;
std::cout << "CPU::" << __func__ << std::endl;
#endif
// Delete power sensor
......@@ -65,29 +65,23 @@ std::unique_ptr<Plan> CPU::make_plan(
}
}
void CPU::set_grid(std::shared_ptr<Grid> grid) {
Proxy::set_grid(grid);
m_wtiles = WTiles();
}
void CPU::set_grid(std::shared_ptr<Grid> grid, int subgrid_size,
float image_size, float w_step, const float *shift) {
Proxy::set_grid(grid, subgrid_size, image_size, w_step, shift);
void CPU::init_wtiles(float subgrid_size) {
m_wtiles = WTiles(kernel::cpu::InstanceCPU::kNrWTiles,
kernel::cpu::InstanceCPU::kWTileSize);
kernels.init_wtiles(subgrid_size);
}
std::shared_ptr<Grid> CPU::get_grid() {
void CPU::flush_wtiles(int subgrid_size, float image_size, float w_step,
const Array1D<float> &shift) {
// flush all pending Wtiles
WTileUpdateInfo wtile_flush_info = m_wtiles.clear();
if (wtile_flush_info.wtile_ids.size()) {
int grid_size = m_grid->get_x_dim();
kernels.run_adder_wtiles_to_grid(
m_grid_size, m_subgrid_size, m_image_size, m_w_step, m_shift,
grid_size, subgrid_size, image_size, w_step, shift.data(),
wtile_flush_info.wtile_ids.size(), wtile_flush_info.wtile_ids.data(),
wtile_flush_info.wtile_coordinates.data(), m_grid->data());
}
return m_grid;
}
unsigned int CPU::compute_jobsize(const Plan &plan,
......@@ -646,44 +640,57 @@ void CPU::do_calibrate_update_hessian_vector_product2(
station_nr, aterms, derivative_aterms, parameter_vector);
}
void CPU::do_transform(DomainAtoDomainB direction,
Array3D<std::complex<float>> &grid) {
void CPU::do_transform(DomainAtoDomainB direction) {
#if defined(DEBUG)
std::cout << __func__ << std::endl;
std::cout << "FFT (direction: " << direction << ")" << std::endl;
#endif
try {
int sign = (direction == FourierDomainToImageDomain) ? 1 : -1;
const auto &grid = get_grid();
// Constants
auto grid_size = grid.get_x_dim();
unsigned int nr_w_layers = grid->get_w_dim();
unsigned int nr_correlations = grid->get_z_dim();
unsigned int grid_size = grid->get_y_dim();
// Performance measurement
report.initialize(0, 0, grid_size);
kernels.set_report(report);
State states[2];
states[0] = powerSensor->read();
// FFT shift
if (direction == FourierDomainToImageDomain) {
kernels.shift(grid); // TODO: integrate into adder?
} else {
kernels.shift(grid); // TODO: remove
}
for (unsigned int w = 0; w < nr_w_layers; w++) {
int sign = (direction == FourierDomainToImageDomain) ? 1 : -1;
// Run FFT
kernels.run_fft(grid_size, grid_size, 1, grid.data(), sign);
// Grid pointer
idg::Array3D<std::complex<float>> grid_ptr(grid->data(w), nr_correlations,
grid_size, grid_size);
// FFT shift
if (direction == FourierDomainToImageDomain)
kernels.shift(grid); // TODO: remove
else
kernels.shift(grid); // TODO: integrate into splitter?
// Constants
auto grid_size = grid->get_x_dim();
// End measurement
states[1] = powerSensor->read();
report.update_host(states[0], states[1]);
State states[2];
states[0] = powerSensor->read();
// FFT shift
if (direction == FourierDomainToImageDomain) {
kernels.shift(grid_ptr); // TODO: integrate into adder?
} else {
kernels.shift(grid_ptr); // TODO: remove
}
// Run FFT
kernels.run_fft(grid_size, grid_size, 1, grid->data(), sign);
// FFT shift
if (direction == FourierDomainToImageDomain)
kernels.shift(grid_ptr); // TODO: remove
else
kernels.shift(grid_ptr); // TODO: integrate into splitter?
// End measurement
states[1] = powerSensor->read();
report.update_host(states[0], states[1]);
}
// Report performance
report.print_total();
......
......@@ -47,13 +47,10 @@ class CPU : public Proxy {
const Array1D<unsigned int>& aterms_offsets,
Plan::Options options) override;
using Proxy::set_grid; // prevents hiding set_grid overloads in Proxy
virtual void set_grid(std::shared_ptr<Grid> grid);
virtual void init_wtiles(float subgrid_size) override;
virtual void set_grid(std::shared_ptr<Grid> grid, int subgrid_size,
float image_size, float w_step,
const float* shift) override;
virtual std::shared_ptr<Grid> get_grid() override;
virtual void flush_wtiles(int subgrid_size, float image_size, float w_step,
const Array1D<float>& shift) override;
private:
unsigned int compute_jobsize(const Plan& plan,
......@@ -122,8 +119,7 @@ class CPU : public Proxy {
const Array4D<Matrix2x2<std::complex<float>>>& derivative_aterms,
Array2D<float>& parameter_vector) override;
virtual void do_transform(DomainAtoDomainB direction,
Array3D<std::complex<float>>& grid) override;
virtual void do_transform(DomainAtoDomainB direction) override;
virtual void do_compute_avg_beam(
const unsigned int nr_antennas, const unsigned int nr_channels,
......
......@@ -18,75 +18,16 @@ Generic::Generic(ProxyInfo info) : CUDA(info) {
#if defined(DEBUG)
std::cout << "Generic::" << __func__ << std::endl;
#endif
// Initialize host PowerSensor
hostPowerSensor = get_power_sensor(sensor_host);
}
// Destructor
Generic::~Generic() { delete hostPowerSensor; }
/* High level routines */
void Generic::do_transform(DomainAtoDomainB direction,
Array3D<std::complex<float>>& grid) {
Generic::~Generic() {
#if defined(DEBUG)
std::cout << __func__ << std::endl;
std::cout << "Transform direction: " << direction << std::endl;
std::cout << "Generic::" << __func__ << std::endl;
#endif
}
// Constants
auto grid_size = grid.get_x_dim();
// Load device
InstanceCUDA& device = get_device(0);
// Initialize
cu::Stream& stream = device.get_execute_stream();
// Device memory
cu::DeviceMemory& d_grid = device.retrieve_device_grid();
// Performance measurements
report.initialize(0, 0, grid_size);
device.set_report(report);
State powerStates[4];
powerStates[0] = hostPowerSensor->read();
powerStates[2] = device.measure();
// Perform fft shift
device.shift(grid);
// Copy grid to device
device.copy_htod(stream, d_grid, grid.data(), grid.bytes());
// Execute fft
device.launch_grid_fft(d_grid, grid_size, direction);
// Copy grid to host
device.copy_dtoh(stream, grid.data(), d_grid, grid.bytes());
stream.synchronize();
// Perform fft shift
device.shift(grid);
// Perform fft scaling
std::complex<float> scale =
std::complex<float>(2.0 / (grid_size * grid_size), 0);
if (direction == FourierDomainToImageDomain) {
device.scale(grid, scale);
}
// End measurements
stream.synchronize();
powerStates[1] = hostPowerSensor->read();
powerStates[3] = device.measure();
// Report performance
report.update_host(powerStates[0], powerStates[1]);
report.print_total();
report.print_device(powerStates[2], powerStates[3]);
} // end transform
/* High level routines */
void Generic::run_gridding(
const Plan& plan, const float w_step, const Array1D<float>& shift,
const float cell_size, const unsigned int kernel_size,
......@@ -537,12 +478,6 @@ void Generic::do_degridding(
} // end degridding
void Generic::set_grid(std::shared_ptr<Grid> grid) {
set_grid(grid, 0, 0.0, 0.0, nullptr);
}
void Generic::set_grid(std::shared_ptr<Grid> grid, int /* subgrid_size */,
float /* image_size */, float /* w_step */,
const float* /* shift */) {
m_grid = grid;
InstanceCUDA& device = get_device(0);
device.allocate_device_grid(grid->bytes());
......
......@@ -51,11 +51,6 @@ class Generic : public CUDA {
const Array1D<unsigned int>& aterms_offsets,
const Array2D<float>& spheroidal) override;
virtual void do_transform(DomainAtoDomainB direction,
Array3D<std::complex<float>>& grid) override;
powersensor::PowerSensor* hostPowerSensor;
void run_gridding(
const Plan& plan, const float w_step, const Array1D<float>& shift,
const float cell_size, const unsigned int kernel_size,
......@@ -79,10 +74,7 @@ class Generic : public CUDA {
const Array2D<float>& spheroidal);
public:
virtual void set_grid(std::shared_ptr<Grid> grid);
virtual void set_grid(std::shared_ptr<Grid> grid, int subgrid_size,
float image_size, float w_step,
const float* shift) override;
virtual void set_grid(std::shared_ptr<Grid> grid) override;
virtual std::shared_ptr<Grid> get_grid() override;
......
......@@ -34,74 +34,6 @@ Unified::~Unified() {
#endif
}
void Unified::do_transform(DomainAtoDomainB direction,
Array3D<std::complex<float>>& grid) {
#if defined(DEBUG)
std::cout << "Unified::" << __func__ << std::endl;
cout << "Transform direction: " << direction << endl;
#endif
// TODO: fix this method
// (1) does the m_grid_tiled need to be untiled before use?
// (2) even though m_grid_tiled is Unified Memory, cuFFT fails
// Constants
auto nr_correlations = grid.get_z_dim();
;
auto grid_size = grid.get_x_dim();
// Load device
InstanceCUDA& device = get_device(0);
// Get UnifiedMemory object for grid data
cu::UnifiedMemory u_grid(device.get_context(), m_grid_tiled->data(),
m_grid_tiled->bytes());
// Initialize
cu::Stream& stream = device.get_execute_stream();
// Performance measurements
report.initialize(0, 0, grid_size);
device.set_report(report);
State powerStates[4];
powerStates[0] = hostPowerSensor->read();
powerStates[2] = device.measure();
// Perform fft shift
double time_shift = -omp_get_wtime();
device.shift(grid);
time_shift += omp_get_wtime();
// Execute fft
device.launch_grid_fft_unified(grid_size, nr_correlations, grid, direction);
stream.synchronize();
// Perform fft shift
time_shift = -omp_get_wtime();
device.shift(grid);
time_shift += omp_get_wtime();
// Perform fft scaling
double time_scale = -omp_get_wtime();
complex<float> scale = complex<float>(2.0 / (grid_size * grid_size), 0);
if (direction == FourierDomainToImageDomain) {
device.scale(grid, scale);
}
time_scale += omp_get_wtime();
// End measurements
stream.synchronize();
powerStates[1] = hostPowerSensor->read();
powerStates[3] = device.measure();
// Report performance
report.update_fft_shift(time_shift);
report.update_fft_scale(time_scale);