Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
I
idg
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
7
Issues
7
List
Boards
Labels
Service Desk
Milestones
Iterations
Merge Requests
8
Merge Requests
8
Requirements
Requirements
List
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Test Cases
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ResearchAndDevelopment
idg
Commits
88a07700
Commit
88a07700
authored
Jan 19, 2021
by
Bram Veenboer
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'fix-interface' into 'master'
Fix Proxy interface See merge request
!54
parents
5fde8334
fdba5bf0
Pipeline
#8178
canceled with stages
Changes
25
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
25 changed files
with
328 additions
and
400 deletions
+328
-400
idg-api/BufferSet.cpp
idg-api/BufferSet.cpp
+10
-7
idg-api/BufferSetImpl.h
idg-api/BufferSetImpl.h
+2
-2
idg-bin/examples/cxx/common/common.h
idg-bin/examples/cxx/common/common.h
+3
-4
idg-bin/examples/cxx/distributed/common.h
idg-bin/examples/cxx/distributed/common.h
+4
-4
idg-bin/examples/python/common.py
idg-bin/examples/python/common.py
+6
-4
idg-bin/tests/cxx/CPU/Reference/main.cpp
idg-bin/tests/cxx/CPU/Reference/main.cpp
+5
-5
idg-bin/tests/cxx/common/common.h
idg-bin/tests/cxx/common/common.h
+10
-13
idg-lib/src/CPU/common/CPU.cpp
idg-lib/src/CPU/common/CPU.cpp
+42
-35
idg-lib/src/CPU/common/CPU.h
idg-lib/src/CPU/common/CPU.h
+4
-8
idg-lib/src/CUDA/Generic/Generic.cpp
idg-lib/src/CUDA/Generic/Generic.cpp
+4
-69
idg-lib/src/CUDA/Generic/Generic.h
idg-lib/src/CUDA/Generic/Generic.h
+1
-9
idg-lib/src/CUDA/Unified/Unified.cpp
idg-lib/src/CUDA/Unified/Unified.cpp
+0
-74
idg-lib/src/CUDA/Unified/Unified.h
idg-lib/src/CUDA/Unified/Unified.h
+1
-8
idg-lib/src/CUDA/common/CUDA.cpp
idg-lib/src/CUDA/common/CUDA.cpp
+72
-0
idg-lib/src/CUDA/common/CUDA.h
idg-lib/src/CUDA/common/CUDA.h
+4
-0
idg-lib/src/CUDA/common/InstanceCUDA.cpp
idg-lib/src/CUDA/common/InstanceCUDA.cpp
+4
-0
idg-lib/src/Hybrid/CUDA/GenericOptimized/GenericOptimized.cpp
...lib/src/Hybrid/CUDA/GenericOptimized/GenericOptimized.cpp
+20
-24
idg-lib/src/Hybrid/CUDA/GenericOptimized/GenericOptimized.h
idg-lib/src/Hybrid/CUDA/GenericOptimized/GenericOptimized.h
+4
-6
idg-lib/src/Hybrid/CUDA/UnifiedOptimized/UnifiedOptimized.cpp
...lib/src/Hybrid/CUDA/UnifiedOptimized/UnifiedOptimized.cpp
+2
-2
idg-lib/src/Hybrid/CUDA/UnifiedOptimized/UnifiedOptimized.h
idg-lib/src/Hybrid/CUDA/UnifiedOptimized/UnifiedOptimized.h
+1
-1
idg-lib/src/common/Proxy.cpp
idg-lib/src/common/Proxy.cpp
+16
-33
idg-lib/src/common/Proxy.h
idg-lib/src/common/Proxy.h
+8
-16
idg-lib/src/common/ProxyC.cpp
idg-lib/src/common/ProxyC.cpp
+25
-25
idg-lib/src/common/ProxyC.h
idg-lib/src/common/ProxyC.h
+1
-3
idg-lib/src/python/Proxy.py
idg-lib/src/python/Proxy.py
+79
-48
No files found.
idg-api/BufferSet.cpp
View file @
88a07700
...
...
@@ -73,13 +73,15 @@ BufferSetImpl::BufferSetImpl(Type architecture)
m_avg_aterm_correction
(
0
,
0
,
0
,
0
),
m_grid
(
new
Grid
(
0
,
0
,
0
,
0
)),
m_proxy
(
create_proxy
(
architecture
)),
m_shift
{
0.0
}
,
m_shift
(
3
)
,
m_get_image_watch
(
Stopwatch
::
create
()),
m_set_image_watch
(
Stopwatch
::
create
()),
m_avg_beam_watch
(
Stopwatch
::
create
()),
m_plan_watch
(
Stopwatch
::
create
()),
m_gridding_watch
(
Stopwatch
::
create
()),
m_degridding_watch
(
Stopwatch
::
create
())
{}
m_degridding_watch
(
Stopwatch
::
create
())
{
m_shift
.
zero
();
}
BufferSetImpl
::~
BufferSetImpl
()
{
// Free all objects allocated via the proxy before destroying the proxy.
...
...
@@ -241,9 +243,9 @@ void BufferSetImpl::init(size_t size, float cell_size, float max_w,
std
::
cout
<<
"nr_w_layers: "
<<
nr_w_layers
<<
std
::
endl
;
#endif
m_shift
[
0
]
=
shiftl
;
m_shift
[
1
]
=
shiftm
;
m_shift
[
2
]
=
shiftp
;
m_shift
(
0
)
=
shiftl
;
m_shift
(
1
)
=
shiftm
;
m_shift
(
2
)
=
shiftp
;
m_kernel_size
=
taper_kernel_size
+
w_kernel_size
+
a_term_kernel_size
;
...
...
@@ -268,8 +270,8 @@ void BufferSetImpl::init(size_t size, float cell_size, float max_w,
m_grid
.
reset
(
new
Grid
(
nr_w_layers
,
4
,
m_padded_size
,
m_padded_size
));
m_grid
->
zero
();
m_proxy
->
set_grid
(
m_grid
,
m_subgridsize
,
m_image_size
,
m_w_step
,
m_shift
.
data
()
);
m_proxy
->
set_grid
(
m_grid
);
m_proxy
->
init_wtiles
(
m_subgridsize
);
m_taper_subgrid
.
resize
(
m_subgridsize
);
m_taper_grid
.
resize
(
m_padded_size
);
...
...
@@ -585,6 +587,7 @@ void BufferSetImpl::get_image(double* image) {
m_get_image_watch
->
Start
();
// Flush all pending operations on the grid
m_proxy
->
flush_wtiles
(
m_subgridsize
,
m_image_size
,
m_w_step
,
m_shift
);
m_proxy
->
get_grid
();
double
runtime
=
-
omp_get_wtime
();
...
...
idg-api/BufferSetImpl.h
View file @
88a07700
...
...
@@ -76,7 +76,7 @@ class BufferSetImpl : public virtual BufferSet {
float
get_cell_size
()
const
{
return
m_cell_size
;
}
float
get_w_step
()
const
{
return
m_w_step
;
}
const
std
::
array
<
float
,
3
>&
get_shift
()
const
{
return
m_shift
;
}
const
idg
::
Array1D
<
float
>&
get_shift
()
const
{
return
m_shift
;
}
float
get_kernel_size
()
const
{
return
m_kernel_size
;
}
const
Array2D
<
float
>&
get_spheroidal
()
const
{
return
m_spheroidal
;
}
const
std
::
shared_ptr
<
Grid
>&
get_grid
()
const
{
return
m_grid
;
}
...
...
@@ -116,7 +116,7 @@ class BufferSetImpl : public virtual BufferSet {
float
m_image_size
;
float
m_cell_size
;
float
m_w_step
;
std
::
array
<
float
,
3
>
m_shift
;
Array1D
<
float
>
m_shift
;
size_t
m_size
;
size_t
m_padded_size
;
float
m_kernel_size
;
...
...
idg-bin/examples/cxx/common/common.h
View file @
88a07700
...
...
@@ -249,8 +249,7 @@ void run() {
// Iterate all cycles
for
(
unsigned
cycle
=
0
;
cycle
<
nr_cycles
;
cycle
++
)
{
// Set grid
float
w_step
=
0.0
;
proxy
.
set_grid
(
grid
,
subgrid_size
,
image_size
,
w_step
,
shift
.
data
());
proxy
.
set_grid
(
grid
);
// Iterate all time blocks
for
(
unsigned
time_offset
=
0
;
time_offset
<
total_nr_timesteps
;
...
...
@@ -329,8 +328,8 @@ void run() {
clog
<<
">>> Run fft"
<<
endl
;
double
runtime_fft
=
-
omp_get_wtime
();
if
(
!
disable_fft
)
{
proxy
.
transform
(
idg
::
FourierDomainToImageDomain
,
*
grid
);
proxy
.
transform
(
idg
::
ImageDomainToFourierDomain
,
*
grid
);
proxy
.
transform
(
idg
::
FourierDomainToImageDomain
);
proxy
.
transform
(
idg
::
ImageDomainToFourierDomain
);
}
runtimes_fft
.
push_back
(
runtime_fft
+
omp_get_wtime
());
clog
<<
endl
;
...
...
idg-bin/examples/cxx/distributed/common.h
View file @
88a07700
...
...
@@ -449,7 +449,7 @@ void run_master() {
// Run FFT
runtimes_grid_fft
[
cycle
]
=
-
omp_get_wtime
();
proxy
.
transform
(
idg
::
FourierDomainToImageDomain
,
*
grid
);
proxy
.
transform
(
idg
::
FourierDomainToImageDomain
);
runtimes_grid_fft
[
cycle
]
+=
omp_get_wtime
();
// Reduce grids
...
...
@@ -477,7 +477,7 @@ void run_master() {
// Run FFT
runtimes_grid_fft
[
cycle
]
-=
omp_get_wtime
();
proxy
.
transform
(
idg
::
ImageDomainToFourierDomain
,
*
grid
);
proxy
.
transform
(
idg
::
ImageDomainToFourierDomain
);
runtimes_grid_fft
[
cycle
]
+=
omp_get_wtime
();
}
runtime_imaging
+=
omp_get_wtime
();
...
...
@@ -626,7 +626,7 @@ void run_worker() {
grid
=
proxy
.
get_grid
();
// Run FFT
proxy
.
transform
(
idg
::
FourierDomainToImageDomain
,
*
grid
);
proxy
.
transform
(
idg
::
FourierDomainToImageDomain
);
// Reduce grids
reduce_grids
(
grid
,
rank
,
world_size
);
...
...
@@ -640,7 +640,7 @@ void run_worker() {
proxy
.
set_grid
(
grid
);
// Run FFT
proxy
.
transform
(
idg
::
ImageDomainToFourierDomain
,
*
grid
);
proxy
.
transform
(
idg
::
ImageDomainToFourierDomain
);
// Subtract model visibilities
// not implemented
...
...
idg-bin/examples/python/common.py
View file @
88a07700
...
...
@@ -84,9 +84,11 @@ def gridding(
p
.
gridding
(
w_step
,
shift
,
cell_size
,
kernel_size
,
subgrid_size
,
frequencies
,
visibilities
,
uvw
,
baselines
,
grid
,
aterms
,
aterms_offsets
,
spheroidal
)
aterms
,
aterms_offsets
,
spheroidal
)
p
.
get_grid
(
grid
)
util
.
plot_grid
(
grid
,
scaling
=
'log'
)
p
.
transform
(
idg
.
FourierDomainToImageDomain
,
grid
)
p
.
transform
(
idg
.
FourierDomainToImageDomain
)
p
.
get_grid
(
grid
)
util
.
plot_grid
(
grid
)
#util.plot_grid(grid, pol=0)
...
...
@@ -97,11 +99,11 @@ def gridding(
def
degridding
(
p
,
w_step
,
shift
,
cell_size
,
kernel_size
,
subgrid_size
,
frequencies
,
visibilities
,
uvw
,
baselines
,
grid
,
aterms
,
aterms_offsets
,
spheroidal
):
p
.
transform
(
idg
.
ImageDomainToFourierDomain
,
grid
)
p
.
transform
(
idg
.
ImageDomainToFourierDomain
)
p
.
degridding
(
w_step
,
shift
,
cell_size
,
kernel_size
,
subgrid_size
,
frequencies
,
visibilities
,
uvw
,
baselines
,
grid
,
aterms
,
aterms_offsets
,
spheroidal
)
aterms
,
aterms_offsets
,
spheroidal
)
#util.plot_visibilities(visibilities)
...
...
idg-bin/tests/cxx/CPU/Reference/main.cpp
View file @
88a07700
...
...
@@ -103,8 +103,7 @@ int test01() {
idg
::
proxy
::
cpu
::
Reference
proxy
;
// Set grid
float
w_step
=
0.0
;
proxy
.
set_grid
(
grid
,
subgrid_size
,
image_size
,
w_step
,
shift
.
data
());
proxy
.
set_grid
(
grid
);
// Create plan
clog
<<
">>> Create plan"
<<
endl
;
...
...
@@ -119,7 +118,7 @@ int test01() {
proxy
.
gridding
(
plan
,
w_offset
,
shift
,
cell_size
,
kernel_size
,
subgrid_size
,
frequencies
,
visibilities_ref
,
uvw
,
baselines
,
aterms
,
aterms_offsets
,
spheroidal
);
proxy
.
transform
(
idg
::
FourierDomainToImageDomain
,
*
grid
);
proxy
.
transform
(
idg
::
FourierDomainToImageDomain
);
float
grid_error
=
get_accuracy
(
grid_size
*
grid_size
*
nr_correlations
,
(
std
::
complex
<
float
>
*
)
grid
->
data
(),
...
...
@@ -128,10 +127,11 @@ int test01() {
// Predict visibilities
clog
<<
">>> Predict visibilities"
<<
endl
;
proxy
.
transform
(
idg
::
ImageDomainToFourierDomain
,
*
grid_ref
);
proxy
.
set_grid
(
grid_ref
);
proxy
.
transform
(
idg
::
ImageDomainToFourierDomain
);
// Set reference grid
proxy
.
set_grid
(
grid_ref
,
subgrid_size
,
image_size
,
w_step
,
shift
.
data
()
);
proxy
.
set_grid
(
grid_ref
);
proxy
.
degridding
(
plan
,
w_offset
,
shift
,
cell_size
,
kernel_size
,
subgrid_size
,
frequencies
,
visibilities
,
uvw
,
baselines
,
aterms
,
...
...
idg-bin/tests/cxx/common/common.h
View file @
88a07700
...
...
@@ -178,9 +178,8 @@ int compare_to_reference(float tol = 1000 *
}
// Bind the grids to the respective proxies
float
w_step
=
0.0
;
optimized
.
set_grid
(
grid
,
subgrid_size
,
image_size
,
w_step
,
shift
.
data
());
reference
.
set_grid
(
grid_ref
,
subgrid_size
,
image_size
,
w_step
,
shift
.
data
());
optimized
.
set_grid
(
grid
);
reference
.
set_grid
(
grid_ref
);
// Set w-terms to zero
for
(
unsigned
bl
=
0
;
bl
<
nr_baselines
;
bl
++
)
{
...
...
@@ -200,14 +199,14 @@ int compare_to_reference(float tol = 1000 *
#if TEST_GRIDDING
// Run gridder
std
::
clog
<<
">>> Run gridding"
<<
std
::
endl
;
optimized
.
set_grid
(
grid
,
subgrid_size
,
image_size
,
w_step
,
shift
.
data
()
);
optimized
.
set_grid
(
grid
);
optimized
.
gridding
(
plan
,
w_offset
,
shift
,
cell_size
,
kernel_size
,
subgrid_size
,
frequencies
,
visibilities
,
uvw
,
baselines
,
aterms
,
aterms_offsets
,
spheroidal
);
optimized
.
get_grid
();
std
::
clog
<<
">>> Run reference gridding"
<<
std
::
endl
;
reference
.
set_grid
(
grid_ref
,
subgrid_size
,
image_size
,
w_step
,
shift
.
data
()
);
reference
.
set_grid
(
grid_ref
);
reference
.
gridding
(
plan
,
w_offset
,
shift
,
cell_size
,
kernel_size
,
subgrid_size
,
frequencies
,
visibilities
,
uvw
,
baselines
,
aterms
,
aterms_offsets
,
spheroidal
);
...
...
@@ -217,27 +216,25 @@ int compare_to_reference(float tol = 1000 *
grid
->
data
(),
grid_ref
->
data
());
// Use the same grid for both degridding calls
reference
.
set_grid
(
optimized
.
get_grid
(),
subgrid_size
,
image_size
,
w_step
,
shift
.
data
());
reference
.
set_grid
(
optimized
.
get_grid
());
#else
optimized
.
set_grid
(
grid
,
subgrid_size
,
image_size
,
w_step
,
shift
.
data
()
);
reference
.
set_grid
(
grid
,
subgrid_size
,
image_size
,
w_step
,
shift
.
data
()
);
optimized
.
set_grid
(
grid
);
reference
.
set_grid
(
grid
);
#endif
reference
.
set_grid
(
optimized
.
get_grid
(),
subgrid_size
,
image_size
,
w_step
,
shift
.
data
());
reference
.
set_grid
(
optimized
.
get_grid
());
#if TEST_DEGRIDDING
// Run degridder
std
::
clog
<<
">>> Run degridding"
<<
std
::
endl
;
visibilities
.
zero
();
visibilities_ref
.
zero
();
optimized
.
set_grid
(
grid
,
subgrid_size
,
image_size
,
w_step
,
shift
.
data
()
);
optimized
.
set_grid
(
grid
);
optimized
.
degridding
(
plan
,
w_offset
,
shift
,
cell_size
,
kernel_size
,
subgrid_size
,
frequencies
,
visibilities
,
uvw
,
baselines
,
aterms
,
aterms_offsets
,
spheroidal
);
std
::
clog
<<
">>> Run reference degridding"
<<
std
::
endl
;
reference
.
set_grid
(
grid
,
subgrid_size
,
image_size
,
w_step
,
shift
.
data
()
);
reference
.
set_grid
(
grid
);
reference
.
degridding
(
plan
,
w_offset
,
shift
,
cell_size
,
kernel_size
,
subgrid_size
,
frequencies
,
visibilities_ref
,
uvw
,
baselines
,
aterms
,
aterms_offsets
,
spheroidal
);
...
...
idg-lib/src/CPU/common/CPU.cpp
View file @
88a07700
...
...
@@ -21,7 +21,7 @@ namespace cpu {
// Constructor
CPU
::
CPU
(
std
::
vector
<
std
::
string
>
libraries
)
:
kernels
(
libraries
)
{
#if defined(DEBUG)
std
::
cout
<<
__func__
<<
std
::
endl
;
std
::
cout
<<
"CPU::"
<<
__func__
<<
std
::
endl
;
#endif
powerSensor
=
get_power_sensor
(
sensor_host
);
...
...
@@ -31,7 +31,7 @@ CPU::CPU(std::vector<std::string> libraries) : kernels(libraries) {
// Destructor
CPU
::~
CPU
()
{
#if defined(DEBUG)
std
::
cout
<<
__func__
<<
std
::
endl
;
std
::
cout
<<
"CPU::"
<<
__func__
<<
std
::
endl
;
#endif
// Delete power sensor
...
...
@@ -65,29 +65,23 @@ std::unique_ptr<Plan> CPU::make_plan(
}
}
void
CPU
::
set_grid
(
std
::
shared_ptr
<
Grid
>
grid
)
{
Proxy
::
set_grid
(
grid
);
m_wtiles
=
WTiles
();
}
void
CPU
::
set_grid
(
std
::
shared_ptr
<
Grid
>
grid
,
int
subgrid_size
,
float
image_size
,
float
w_step
,
const
float
*
shift
)
{
Proxy
::
set_grid
(
grid
,
subgrid_size
,
image_size
,
w_step
,
shift
);
void
CPU
::
init_wtiles
(
float
subgrid_size
)
{
m_wtiles
=
WTiles
(
kernel
::
cpu
::
InstanceCPU
::
kNrWTiles
,
kernel
::
cpu
::
InstanceCPU
::
kWTileSize
);
kernels
.
init_wtiles
(
subgrid_size
);
}
std
::
shared_ptr
<
Grid
>
CPU
::
get_grid
()
{
void
CPU
::
flush_wtiles
(
int
subgrid_size
,
float
image_size
,
float
w_step
,
const
Array1D
<
float
>
&
shift
)
{
// flush all pending Wtiles
WTileUpdateInfo
wtile_flush_info
=
m_wtiles
.
clear
();
if
(
wtile_flush_info
.
wtile_ids
.
size
())
{
int
grid_size
=
m_grid
->
get_x_dim
();
kernels
.
run_adder_wtiles_to_grid
(
m_grid_size
,
m_subgrid_size
,
m_image_size
,
m_w_step
,
m_shift
,
grid_size
,
subgrid_size
,
image_size
,
w_step
,
shift
.
data
()
,
wtile_flush_info
.
wtile_ids
.
size
(),
wtile_flush_info
.
wtile_ids
.
data
(),
wtile_flush_info
.
wtile_coordinates
.
data
(),
m_grid
->
data
());
}
return
m_grid
;
}
unsigned
int
CPU
::
compute_jobsize
(
const
Plan
&
plan
,
...
...
@@ -646,44 +640,57 @@ void CPU::do_calibrate_update_hessian_vector_product2(
station_nr
,
aterms
,
derivative_aterms
,
parameter_vector
);
}
void
CPU
::
do_transform
(
DomainAtoDomainB
direction
,
Array3D
<
std
::
complex
<
float
>>
&
grid
)
{
void
CPU
::
do_transform
(
DomainAtoDomainB
direction
)
{
#if defined(DEBUG)
std
::
cout
<<
__func__
<<
std
::
endl
;
std
::
cout
<<
"FFT (direction: "
<<
direction
<<
")"
<<
std
::
endl
;
#endif
try
{
int
sign
=
(
direction
==
FourierDomainToImageDomain
)
?
1
:
-
1
;
const
auto
&
grid
=
get_grid
()
;
// Constants
auto
grid_size
=
grid
.
get_x_dim
();
unsigned
int
nr_w_layers
=
grid
->
get_w_dim
();
unsigned
int
nr_correlations
=
grid
->
get_z_dim
();
unsigned
int
grid_size
=
grid
->
get_y_dim
();
// Performance measurement
report
.
initialize
(
0
,
0
,
grid_size
);
kernels
.
set_report
(
report
);
State
states
[
2
];
states
[
0
]
=
powerSensor
->
read
();
// FFT shift
if
(
direction
==
FourierDomainToImageDomain
)
{
kernels
.
shift
(
grid
);
// TODO: integrate into adder?
}
else
{
kernels
.
shift
(
grid
);
// TODO: remove
}
for
(
unsigned
int
w
=
0
;
w
<
nr_w_layers
;
w
++
)
{
int
sign
=
(
direction
==
FourierDomainToImageDomain
)
?
1
:
-
1
;
// Run FFT
kernels
.
run_fft
(
grid_size
,
grid_size
,
1
,
grid
.
data
(),
sign
);
// Grid pointer
idg
::
Array3D
<
std
::
complex
<
float
>>
grid_ptr
(
grid
->
data
(
w
),
nr_correlations
,
grid_size
,
grid_size
);
// FFT shift
if
(
direction
==
FourierDomainToImageDomain
)
kernels
.
shift
(
grid
);
// TODO: remove
else
kernels
.
shift
(
grid
);
// TODO: integrate into splitter?
// Constants
auto
grid_size
=
grid
->
get_x_dim
();
// End measurement
states
[
1
]
=
powerSensor
->
read
();
report
.
update_host
(
states
[
0
],
states
[
1
]);
State
states
[
2
];
states
[
0
]
=
powerSensor
->
read
();
// FFT shift
if
(
direction
==
FourierDomainToImageDomain
)
{
kernels
.
shift
(
grid_ptr
);
// TODO: integrate into adder?
}
else
{
kernels
.
shift
(
grid_ptr
);
// TODO: remove
}
// Run FFT
kernels
.
run_fft
(
grid_size
,
grid_size
,
1
,
grid
->
data
(),
sign
);
// FFT shift
if
(
direction
==
FourierDomainToImageDomain
)
kernels
.
shift
(
grid_ptr
);
// TODO: remove
else
kernels
.
shift
(
grid_ptr
);
// TODO: integrate into splitter?
// End measurement
states
[
1
]
=
powerSensor
->
read
();
report
.
update_host
(
states
[
0
],
states
[
1
]);
}
// Report performance
report
.
print_total
();
...
...
idg-lib/src/CPU/common/CPU.h
View file @
88a07700
...
...
@@ -47,13 +47,10 @@ class CPU : public Proxy {
const
Array1D
<
unsigned
int
>&
aterms_offsets
,
Plan
::
Options
options
)
override
;
using
Proxy
::
set_grid
;
// prevents hiding set_grid overloads in Proxy
virtual
void
set_grid
(
std
::
shared_ptr
<
Grid
>
grid
);
virtual
void
init_wtiles
(
float
subgrid_size
)
override
;
virtual
void
set_grid
(
std
::
shared_ptr
<
Grid
>
grid
,
int
subgrid_size
,
float
image_size
,
float
w_step
,
const
float
*
shift
)
override
;
virtual
std
::
shared_ptr
<
Grid
>
get_grid
()
override
;
virtual
void
flush_wtiles
(
int
subgrid_size
,
float
image_size
,
float
w_step
,
const
Array1D
<
float
>&
shift
)
override
;
private:
unsigned
int
compute_jobsize
(
const
Plan
&
plan
,
...
...
@@ -122,8 +119,7 @@ class CPU : public Proxy {
const
Array4D
<
Matrix2x2
<
std
::
complex
<
float
>>>&
derivative_aterms
,
Array2D
<
float
>&
parameter_vector
)
override
;
virtual
void
do_transform
(
DomainAtoDomainB
direction
,
Array3D
<
std
::
complex
<
float
>>&
grid
)
override
;
virtual
void
do_transform
(
DomainAtoDomainB
direction
)
override
;
virtual
void
do_compute_avg_beam
(
const
unsigned
int
nr_antennas
,
const
unsigned
int
nr_channels
,
...
...
idg-lib/src/CUDA/Generic/Generic.cpp
View file @
88a07700
...
...
@@ -18,75 +18,16 @@ Generic::Generic(ProxyInfo info) : CUDA(info) {
#if defined(DEBUG)
std
::
cout
<<
"Generic::"
<<
__func__
<<
std
::
endl
;
#endif
// Initialize host PowerSensor
hostPowerSensor
=
get_power_sensor
(
sensor_host
);
}
// Destructor
Generic
::~
Generic
()
{
delete
hostPowerSensor
;
}
/* High level routines */
void
Generic
::
do_transform
(
DomainAtoDomainB
direction
,
Array3D
<
std
::
complex
<
float
>>&
grid
)
{
Generic
::~
Generic
()
{
#if defined(DEBUG)
std
::
cout
<<
__func__
<<
std
::
endl
;
std
::
cout
<<
"Transform direction: "
<<
direction
<<
std
::
endl
;
std
::
cout
<<
"Generic::"
<<
__func__
<<
std
::
endl
;
#endif
}
// Constants
auto
grid_size
=
grid
.
get_x_dim
();
// Load device
InstanceCUDA
&
device
=
get_device
(
0
);
// Initialize
cu
::
Stream
&
stream
=
device
.
get_execute_stream
();
// Device memory
cu
::
DeviceMemory
&
d_grid
=
device
.
retrieve_device_grid
();
// Performance measurements
report
.
initialize
(
0
,
0
,
grid_size
);
device
.
set_report
(
report
);
State
powerStates
[
4
];
powerStates
[
0
]
=
hostPowerSensor
->
read
();
powerStates
[
2
]
=
device
.
measure
();
// Perform fft shift
device
.
shift
(
grid
);
// Copy grid to device
device
.
copy_htod
(
stream
,
d_grid
,
grid
.
data
(),
grid
.
bytes
());
// Execute fft
device
.
launch_grid_fft
(
d_grid
,
grid_size
,
direction
);
// Copy grid to host
device
.
copy_dtoh
(
stream
,
grid
.
data
(),
d_grid
,
grid
.
bytes
());
stream
.
synchronize
();
// Perform fft shift
device
.
shift
(
grid
);
// Perform fft scaling
std
::
complex
<
float
>
scale
=
std
::
complex
<
float
>
(
2.0
/
(
grid_size
*
grid_size
),
0
);
if
(
direction
==
FourierDomainToImageDomain
)
{
device
.
scale
(
grid
,
scale
);
}
// End measurements
stream
.
synchronize
();
powerStates
[
1
]
=
hostPowerSensor
->
read
();
powerStates
[
3
]
=
device
.
measure
();
// Report performance
report
.
update_host
(
powerStates
[
0
],
powerStates
[
1
]);
report
.
print_total
();
report
.
print_device
(
powerStates
[
2
],
powerStates
[
3
]);
}
// end transform
/* High level routines */
void
Generic
::
run_gridding
(
const
Plan
&
plan
,
const
float
w_step
,
const
Array1D
<
float
>&
shift
,
const
float
cell_size
,
const
unsigned
int
kernel_size
,
...
...
@@ -537,12 +478,6 @@ void Generic::do_degridding(
}
// end degridding
void
Generic
::
set_grid
(
std
::
shared_ptr
<
Grid
>
grid
)
{
set_grid
(
grid
,
0
,
0.0
,
0.0
,
nullptr
);
}
void
Generic
::
set_grid
(
std
::
shared_ptr
<
Grid
>
grid
,
int
/* subgrid_size */
,
float
/* image_size */
,
float
/* w_step */
,
const
float
*
/* shift */
)
{
m_grid
=
grid
;
InstanceCUDA
&
device
=
get_device
(
0
);
device
.
allocate_device_grid
(
grid
->
bytes
());
...
...
idg-lib/src/CUDA/Generic/Generic.h
View file @
88a07700
...
...
@@ -51,11 +51,6 @@ class Generic : public CUDA {
const
Array1D
<
unsigned
int
>&
aterms_offsets
,
const
Array2D
<
float
>&
spheroidal
)
override
;
virtual
void
do_transform
(
DomainAtoDomainB
direction
,
Array3D
<
std
::
complex
<
float
>>&
grid
)
override
;
powersensor
::
PowerSensor
*
hostPowerSensor
;
void
run_gridding
(
const
Plan
&
plan
,
const
float
w_step
,
const
Array1D
<
float
>&
shift
,
const
float
cell_size
,
const
unsigned
int
kernel_size
,
...
...
@@ -79,10 +74,7 @@ class Generic : public CUDA {
const
Array2D
<
float
>&
spheroidal
);
public:
virtual
void
set_grid
(
std
::
shared_ptr
<
Grid
>
grid
);
virtual
void
set_grid
(
std
::
shared_ptr
<
Grid
>
grid
,
int
subgrid_size
,
float
image_size
,
float
w_step
,
const
float
*
shift
)
override
;
virtual
void
set_grid
(
std
::
shared_ptr
<
Grid
>
grid
)
override
;
virtual
std
::
shared_ptr
<
Grid
>
get_grid
()
override
;
...
...
idg-lib/src/CUDA/Unified/Unified.cpp
View file @
88a07700
...
...
@@ -34,74 +34,6 @@ Unified::~Unified() {
#endif
}
void
Unified
::
do_transform
(
DomainAtoDomainB
direction
,
Array3D
<
std
::
complex
<
float
>>&
grid
)
{
#if defined(DEBUG)
std
::
cout
<<
"Unified::"
<<
__func__
<<
std
::
endl
;
cout
<<
"Transform direction: "
<<
direction
<<
endl
;
#endif
// TODO: fix this method
// (1) does the m_grid_tiled need to be untiled before use?
// (2) even though m_grid_tiled is Unified Memory, cuFFT fails
// Constants
auto
nr_correlations
=
grid
.
get_z_dim
();
;
auto
grid_size
=
grid
.
get_x_dim
();
// Load device
InstanceCUDA
&
device
=
get_device
(
0
);
// Get UnifiedMemory object for grid data
cu
::
UnifiedMemory
u_grid
(
device
.
get_context
(),
m_grid_tiled
->
data
(),
m_grid_tiled
->
bytes
());
// Initialize
cu
::
Stream
&
stream
=
device
.
get_execute_stream
();
// Performance measurements
report
.
initialize
(
0
,
0
,
grid_size
);
device
.
set_report
(
report
);
State
powerStates
[
4
];
powerStates
[
0
]
=
hostPowerSensor
->
read
();
powerStates
[
2
]
=
device
.
measure
();
// Perform fft shift
double
time_shift
=
-
omp_get_wtime
();
device
.
shift
(
grid
);
time_shift
+=
omp_get_wtime
();
// Execute fft
device
.
launch_grid_fft_unified
(
grid_size
,
nr_correlations
,
grid
,
direction
);
stream
.
synchronize
();
// Perform fft shift
time_shift
=
-
omp_get_wtime
();
device
.
shift
(
grid
);
time_shift
+=
omp_get_wtime
();
// Perform fft scaling
double
time_scale
=
-
omp_get_wtime
();
complex
<
float
>
scale
=
complex
<
float
>
(
2.0
/
(
grid_size
*
grid_size
),
0
);
if
(
direction
==
FourierDomainToImageDomain
)
{
device
.
scale
(
grid
,
scale
);
}
time_scale
+=
omp_get_wtime
();
// End measurements
stream
.
synchronize
();
powerStates
[
1
]
=
hostPowerSensor
->
read
();
powerStates
[
3
]
=
device
.
measure
();
// Report performance
report
.
update_fft_shift
(
time_shift
);
report
.
update_fft_scale
(
time_scale
);