Skip to content
Snippets Groups Projects
Commit f39d7dfc authored by John Romein's avatar John Romein
Browse files

bug 225:

Mainly work on intro.
parent 57f1d7e3
No related branches found
No related tags found
No related merge requests found
No preview for this file type
...@@ -58,7 +58,7 @@ ...@@ -58,7 +58,7 @@
inkscape:window-width="1600" inkscape:window-width="1600"
inkscape:window-height="1125" inkscape:window-height="1125"
inkscape:window-x="0" inkscape:window-x="0"
inkscape:window-y="25" /> inkscape:window-y="0" />
<metadata <metadata
id="metadata8414"> id="metadata8414">
<rdf:RDF> <rdf:RDF>
...@@ -324,7 +324,7 @@ ...@@ -324,7 +324,7 @@
font-style="normal" font-style="normal"
y="515.58063" y="515.58063"
x="261.26437" x="261.26437"
xml:space="preserve">IONProc</text> xml:space="preserve">CNProc</text>
<text <text
style="font-size:5.89105511px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#a14000;stroke:#a14000;stroke-width:0.0007671in;font-family:Helvetica" style="font-size:5.89105511px;font-style:normal;font-weight:bold;text-anchor:middle;fill:#a14000;stroke:#a14000;stroke-width:0.0007671in;font-family:Helvetica"
id="text8539" id="text8539"
......
...@@ -101,7 +101,7 @@ newstring ...@@ -101,7 +101,7 @@ newstring
hjc hjc
vjc vjc
x 32.7 x 32.7
y 7.496 y 7.492
lcolor 1 0.827450931 0.125490189 lcolor 1 0.827450931 0.125490189
: 1 : 1
...@@ -110,23 +110,23 @@ copystring ...@@ -110,23 +110,23 @@ copystring
: 2 : 2
copystring copystring
y 6.696 y 6.692
: 3 : 3
copystring copystring
y 6.296 y 6.292
: 4 : 4
copystring copystring
y 5.896 y 5.892
: 5 : 5
copystring copystring
y 5.496 y 5.492
: 6 : 6
copystring copystring
y 5.096 y 5.092
: 7 : 7
newcurve newcurve
...@@ -141,7 +141,7 @@ newcurve ...@@ -141,7 +141,7 @@ newcurve
marktype box marktype box
marksize 3.4 .32 marksize 3.4 .32
gray 0 gray 0
cfill 1 0 1 cfill 1 .5 1
pts pts
32.7 7.1 32.7 7.1
...@@ -187,7 +187,7 @@ newcurve ...@@ -187,7 +187,7 @@ newcurve
newcurve newcurve
marktype circle marktype circle
marksize 1.2 marksize 1.3
gray 0 gray 0
fill 0 fill 0
pts pts
...@@ -262,7 +262,7 @@ marktype xbar ...@@ -262,7 +262,7 @@ marktype xbar
newcurve newcurve
marktype xbar marktype xbar
marksize 3.4 marksize 3.4
cfill 1 0 1 cfill 1 .5 1
pts pts
32 1.76 32 1.76
36 2.07 36 2.07
...@@ -428,8 +428,8 @@ newcurve ...@@ -428,8 +428,8 @@ newcurve
marktype none marktype none
rarrow rarrow
asize .6 .1 asize .6 .1
(* color 1 0 1 (* color 1 .5 1
acfill 1 0 1 *) acfill 1 .5 1 *)
pts pts
68 5.15 68 5.15
66 5.48 66 5.48
......
...@@ -9,54 +9,9 @@ Single ...@@ -9,54 +9,9 @@ Single
1200 2 1200 2
0 32 #ffd21f 0 32 #ffd21f
0 33 #ffffc0 0 33 #ffffc0
0 34 #ff410d 0 34 #569d1b
0 35 #c0c0c0 0 35 #c0c0c0
0 36 #f0c0c0 0 36 #f0c0c0
6 7125 1425 8475 2775
2 3 0 5 0 34 50 -1 20 0.000 0 0 -1 0 0 7
7200 2700 7200 2100 7800 2100 7800 1500 8400 1500 8400 2700
7200 2700
2 1 0 2 0 7 40 -1 -1 0.000 0 0 -1 0 0 3
7800 2700 7800 2100 8400 2100
4 1 0 40 -1 18 30 0.0000 4 360 360 7500 2550 A\001
4 1 0 40 -1 18 30 0.0000 4 360 360 8100 1950 A\001
-6
6 5925 2625 7275 3975
2 3 0 5 0 34 50 -1 20 0.000 0 0 -1 0 0 7
6000 3900 6000 3300 6600 3300 6600 2700 7200 2700 7200 3900
6000 3900
2 1 0 2 0 7 40 -1 -1 0.000 0 0 -1 0 0 3
6600 3900 6600 3300 7200 3300
4 1 0 40 -1 18 30 0.0000 4 360 360 6300 3750 A\001
4 1 0 40 -1 18 30 0.0000 4 360 360 6900 3150 A\001
-6
6 4725 3825 6075 5175
2 3 0 5 0 34 50 -1 20 0.000 0 0 -1 0 0 7
4800 5100 4800 4500 5400 4500 5400 3900 6000 3900 6000 5100
4800 5100
2 1 0 2 0 7 40 -1 -1 0.000 0 0 -1 0 0 3
5400 5100 5400 4500 6000 4500
4 1 0 40 -1 18 30 0.0000 4 360 360 5100 4950 A\001
4 1 0 40 -1 18 30 0.0000 4 360 360 5700 4350 A\001
-6
6 3525 5025 4875 6375
2 3 0 5 0 34 50 -1 20 0.000 0 0 -1 0 0 7
3600 6300 3600 5700 4200 5700 4200 5100 4800 5100 4800 6300
3600 6300
2 1 0 2 0 7 40 -1 -1 0.000 0 0 -1 0 0 3
4200 6300 4200 5700 4800 5700
4 1 0 40 -1 18 30 0.0000 4 360 360 3900 6150 A\001
4 1 0 40 -1 18 30 0.0000 4 360 360 4500 5550 A\001
-6
6 2325 6225 3675 7575
2 3 0 5 0 34 50 -1 20 0.000 0 0 -1 0 0 7
2400 7500 2400 6900 3000 6900 3000 6300 3600 6300 3600 7500
2400 7500
2 1 0 2 0 7 40 -1 -1 0.000 0 0 -1 0 0 3
3000 7500 3000 6900 3600 6900
4 1 0 40 -1 18 30 0.0000 4 360 360 2700 7350 A\001
4 1 0 40 -1 18 30 0.0000 4 360 360 3300 6750 A\001
-6
6 7125 2625 8475 3975 6 7125 2625 8475 3975
2 1 0 2 0 7 40 -1 -1 0.000 0 0 -1 0 0 2 2 1 0 2 0 7 40 -1 -1 0.000 0 0 -1 0 0 2
7200 3300 8400 3300 7200 3300 8400 3300
...@@ -143,6 +98,31 @@ Single ...@@ -143,6 +98,31 @@ Single
2 1 0 3 0 35 40 -1 -1 0.000 0 0 -1 1 0 2 2 1 0 3 0 35 40 -1 -1 0.000 0 0 -1 1 0 2
2 1 3.00 150.00 150.00 2 1 3.00 150.00 150.00
6600 8550 7275 8550 6600 8550 7275 8550
2 3 0 5 0 34 50 -1 20 0.000 0 0 -1 0 0 7
2400 7500 2400 6900 3000 6900 3000 6300 3600 6300 3600 7500
2400 7500
2 1 0 2 0 7 40 -1 -1 0.000 0 0 -1 0 0 3
3000 7500 3000 6900 3600 6900
2 3 0 5 0 34 50 -1 20 0.000 0 0 -1 0 0 7
3600 6300 3600 5700 4200 5700 4200 5100 4800 5100 4800 6300
3600 6300
2 1 0 2 0 7 40 -1 -1 0.000 0 0 -1 0 0 3
4200 6300 4200 5700 4800 5700
2 3 0 5 0 34 50 -1 20 0.000 0 0 -1 0 0 7
4800 5100 4800 4500 5400 4500 5400 3900 6000 3900 6000 5100
4800 5100
2 1 0 2 0 7 40 -1 -1 0.000 0 0 -1 0 0 3
5400 5100 5400 4500 6000 4500
2 3 0 5 0 34 50 -1 20 0.000 0 0 -1 0 0 7
7200 2700 7200 2100 7800 2100 7800 1500 8400 1500 8400 2700
7200 2700
2 1 0 2 0 7 40 -1 -1 0.000 0 0 -1 0 0 3
7800 2700 7800 2100 8400 2100
2 3 0 5 0 34 50 -1 20 0.000 0 0 -1 0 0 7
6000 3900 6000 3300 6600 3300 6600 2700 7200 2700 7200 3900
6000 3900
2 1 0 2 0 7 40 -1 -1 0.000 0 0 -1 0 0 3
6600 3900 6600 3300 7200 3300
4 1 0 40 -1 18 30 1.5708 4 360 1605 9450 5175 station\001 4 1 0 40 -1 18 30 1.5708 4 360 1605 9450 5175 station\001
4 1 0 40 -1 18 30 0.0000 4 360 1605 5625 8700 station\001 4 1 0 40 -1 18 30 0.0000 4 360 1605 5625 8700 station\001
4 1 0 40 -1 16 30 0.0000 4 345 270 2700 8100 0\001 4 1 0 40 -1 16 30 0.0000 4 345 270 2700 8100 0\001
...@@ -165,3 +145,13 @@ Single ...@@ -165,3 +145,13 @@ Single
4 0 0 40 -1 16 30 0.0000 4 345 270 8625 1950 9\001 4 0 0 40 -1 16 30 0.0000 4 345 270 8625 1950 9\001
4 0 0 40 -1 16 30 0.0000 4 345 270 8625 4950 4\001 4 0 0 40 -1 16 30 0.0000 4 345 270 8625 4950 4\001
4 0 0 40 -1 16 30 0.0000 4 345 270 8625 6150 2\001 4 0 0 40 -1 16 30 0.0000 4 345 270 8625 6150 2\001
4 1 32 40 -1 18 30 0.0000 4 360 360 2700 7350 A\001
4 1 32 40 -1 18 30 0.0000 4 360 360 3300 6750 A\001
4 1 32 40 -1 18 30 0.0000 4 360 360 3900 6150 A\001
4 1 32 40 -1 18 30 0.0000 4 360 360 4500 5550 A\001
4 1 32 40 -1 18 30 0.0000 4 360 360 5100 4950 A\001
4 1 32 40 -1 18 30 0.0000 4 360 360 5700 4350 A\001
4 1 32 40 -1 18 30 0.0000 4 360 360 6300 3750 A\001
4 1 32 40 -1 18 30 0.0000 4 360 360 6900 3150 A\001
4 1 32 40 -1 18 30 0.0000 4 360 360 7500 2550 A\001
4 1 32 40 -1 18 30 0.0000 4 360 360 8100 1950 A\001
...@@ -49,19 +49,22 @@ combines the signals from many thousands of simple antennas. ...@@ -49,19 +49,22 @@ combines the signals from many thousands of simple antennas.
Its revolutionary design allows observations in a frequency range that has Its revolutionary design allows observations in a frequency range that has
hardly been studied before. hardly been studied before.
This paper focuses on another novel feature: where traditional telescopes Another novel feature of LOFAR is the elaborate use of \emph{software\/} to
combine data using customized hardware, we process the data in \emph{software}. process data, where traditional telescopes use customized hardware.
This dramatically increases flexibility and substantially reduces costs, but This dramatically increases flexibility and substantially reduces costs, but
the high processing and bandwidth requirements compel the use of a the high processing and bandwidth requirements compel the use of a
supercomputer. supercomputer.
The antenna signals are centrally combined, filtered, optionally beam-formed, The antenna signals are centrally combined, filtered, optionally beam-formed,
and correlated by an IBM Blue Gene/P. and correlated by an IBM Blue Gene/P.
This paper describes the implementation of the so-called correlator.
To meet the real-time requirements, the application is highly optimized, and To meet the real-time requirements, the application is highly optimized, and
reaches exceptionally high computational and I/O efficiencies. reaches exceptionally high computational and I/O efficiencies.
This allows us to use only half the planned amount of resources, \emph{and\/} Additionally, we study the scalability of the system, and show that it scales
process 50\% more telescope data, significantly improving the effectiveness well beyond the requirements.
of the entire telescope. The optimizations allows us to use only half the planned amount of resources,
\emph{and\/} process 50\% more telescope data, significantly improving the
effectiveness of the entire telescope.
\end{abstract} \end{abstract}
...@@ -73,17 +76,19 @@ range. ...@@ -73,17 +76,19 @@ range.
It is the first of a new generation of radio telescopes, that breaks with It is the first of a new generation of radio telescopes, that breaks with
the concepts of traditional telescopes in several ways. the concepts of traditional telescopes in several ways.
Rather than using large, expensive dishes, LOFAR uses many thousands of Rather than using large, expensive dishes, LOFAR uses many thousands of
simple antennas that have no movable parts~\cite{Butcher:04,deVos:09}, see Figure~\ref{fig:lba-field}. simple antennas that have no movable parts~\cite{Butcher:04,deVos:09} (see
Figure~\ref{fig:lba-field}).
Essentially, it is a distributed sensor network that monitors the sky Essentially, it is a distributed sensor network that monitors the sky
and combines all signals centrally. and combines all signals centrally.
This concept requires much more signal processing, but the additional costs This concept requires much more signal processing, but the additional costs
of silicon are easily offset by cost savings in steel that would be needed for dishes. of silicon are easily offset by cost savings in steel that would be needed for dishes.
Moreover, LOFAR can observe the sky in many directions simultaneously and Moreover, LOFAR can observe the sky in many directions simultaneously and
switch directions instantaneously. switch directions instantaneously.
In several ways, LOFAR will be the largest telescope of the world, In several ways, LOFAR will be the largest telescope of the world, and will
and will enable groundbreaking research in several areas of astronomy and particle enable groundbreaking research in several areas of astronomy and particle
physics~\cite{Bruyn:02}. The different goals and observation types require physics~\cite{Bruyn:02}.
several different processing pipelines, however. The different goals and observation types require several different processing
pipelines, however.
\begin{figure}[t] \begin{figure}[t]
\vspace{-4mm} \vspace{-4mm}
...@@ -103,8 +108,8 @@ However, the desire for a flexible and reconfigurable instrument with ...@@ -103,8 +108,8 @@ However, the desire for a flexible and reconfigurable instrument with
different processing pipelines for different observation types demands a different processing pipelines for different observation types demands a
software solution. software solution.
The availability of sufficiently powerful supercomputers allows this. The availability of sufficiently powerful supercomputers allows this.
We have to perform all processing in real time, %We have to perform all processing in real time,
since the data streams simply are too large to store. %since the data streams simply are too large to store.
\begin{figure*} \begin{figure*}
\begin{minipage}[b]{11cm} \begin{minipage}[b]{11cm}
...@@ -122,13 +127,7 @@ since the data streams simply are too large to store. ...@@ -122,13 +127,7 @@ since the data streams simply are too large to store.
The most common mode of operation for LOFAR is the The most common mode of operation for LOFAR is the
\emph{standard imaging pipeline}, which is used to generate sky images. \emph{standard imaging pipeline}, which is used to generate sky images.
This mode filters and correlates the data sent by the stations. This mode filters and correlates the data sent by the stations in the field.
This paper describes the implementation and
performance characteristics of the real-time part of this pipeline on an IBM Blue Gene/P (BG/P)
supercomputer.
We present a highly-optimized implementation that achieves very high
computational performance: the correlator sustains 96\% of the theoretical
floating-point peak performance during the computational phase.
%The \emph{Epoch of Reionization (EoR)\/} pipeline should detect the faint, %The \emph{Epoch of Reionization (EoR)\/} pipeline should detect the faint,
%very first sky objects. %very first sky objects.
%It is a similar pipeline, but with even higher computational demands, due to %It is a similar pipeline, but with even higher computational demands, due to
...@@ -136,9 +135,26 @@ floating-point peak performance during the computational phase. ...@@ -136,9 +135,26 @@ floating-point peak performance during the computational phase.
Several \emph{pulsar pipelines\/} are being developed as well, that either Several \emph{pulsar pipelines\/} are being developed as well, that either
search large sky regions to find unknown pulsars or, once found, sensitively search large sky regions to find unknown pulsars or, once found, sensitively
observe their characteristics. observe their characteristics.
We also started development of a \emph{transient pipeline}, that observes the
sky for transient events.
The pipelines share common components, shortening their development time. The pipelines share common components, shortening their development time.
The software also supports multiple simultaneous observations, even of different The software also supports multiple simultaneous observations, even of
types. different types.
The first part of each pipeline runs in real time, since the receivers produce
too much data to store on disk.
Only after substantial reduction of the data volume, intermediate products
are written to disk.
In this paper, we focus on the real-time part of the standard imaging pipeline,
commonly called ``the correlator''.
We first present an integral overview of the correlator and a discussion of
the optimizations that we implemented.
The main contribution of this paper is an in-depth study of all performance
aspects, real-time behavior, and scalability characteristics of the correlator
as a whole (previous papers only focused on single aspects and did not
comprise a scalability study).
We also enumerate the conditions that are necessary to obtain good,
real-time performance, and assert that none of these conditions can be ignored.
The receivers produce hundreds of gigabits per second. The receivers produce hundreds of gigabits per second.
To handle the high data rate, To handle the high data rate,
...@@ -151,24 +167,33 @@ Additionally, we developed a low-overhead network protocol~\cite{Romein:09a} ...@@ -151,24 +167,33 @@ Additionally, we developed a low-overhead network protocol~\cite{Romein:09a}
for communication between I/O~nodes and compute nodes, since we were not able for communication between I/O~nodes and compute nodes, since we were not able
to achieve the required internal input and output data rates with the standard to achieve the required internal input and output data rates with the standard
network software. network software.
The correlator achieves very high computational performance: it sustains
% @@@ TODO, change back to ``we'' 96\% of the theoretical floating-point peak performance during the
In earlier work, Romein et al.~\cite{Romein:06}, presented an initial prototype computational phase~\cite{Romein:06}.
implementation of the LOFAR processing on our previous platform, the The application scales to data rates well beyond the requirements;
Blue Gene/L. The work presented in this paper differs in several in fact, the performance is so good that it led to the decision to change
important ways. First, we have now implemented the complete I/O path: the LOFAR specifications: the instrument can observe over 50\% more sources
from the LOFAR stations in the field to the Blue Gene/P, or frequencies simultaneously than in its original design specifications,
from the I/O~nodes to the compute nodes, the internal greatly enhancing the efficiency of the entire instrument.
data transpose, and finally the transfer %This paper describes the implementation of the LOFAR correlator and shows
to the storage cluster. In contrast, in~\cite{Romein:06} %how the optimized system scales to sizes beyond what is required
we focused only on computing, and not on I/O. %In earlier work, we presented an initial prototype
Second, we now have a fully functional \emph{real time} processing pipeline %implementation of the LOFAR processing on our previous platform, the
for the most important standard imaging mode, and prototypes for other %Blue Gene/L~\cite{Romein:06}.
pipelines, showing the flexibility of our design. Finally, for the %The work presented in this paper differs in several
first time, we perform a scalability analysis, proving that we can %important ways. First, we have now implemented the complete I/O path:
support the requirements of the full LOFAR instrument, both in %from the LOFAR stations in the field to the Blue Gene/P,
terms of computing and I/O, in real time. In fact, we even surpass the %from the I/O~nodes to the compute nodes, the internal
requirements. %data transpose, and finally the transfer
%to the storage cluster. In contrast, in~\cite{Romein:06}
%we focused only on computing, and not on I/O.
%Second, we now have a fully functional \emph{real time} processing pipeline
%for the most important standard imaging mode, and prototypes for other
%%pipelines, showing the flexibility of our design. Finally, for the
%first time, we perform a scalability analysis, proving that we can
%support the requirements of the full LOFAR instrument, both in
%terms of computing and I/O, in real time. In fact, we even surpass the
%requirements.
This paper is structured as follows. This paper is structured as follows.
We mention related work in Section~\ref{sec:related-work}. We mention related work in Section~\ref{sec:related-work}.
...@@ -266,7 +291,8 @@ studying the origin of high-energy ...@@ -266,7 +291,8 @@ studying the origin of high-energy
Neither the source, nor the physical process that accelerates such particles Neither the source, nor the physical process that accelerates such particles
is known. is known.
Third, LOFAR's ability to continuously monitor a large fraction of the sky Third, LOFAR's ability to continuously monitor a large fraction of the sky
makes it uniquely suited to find new \emph{pulsars} and to study \emph{transient sources}. makes it uniquely suited to find new \emph{pulsars\/} and to study
\emph{transient sources}.
Since LOFAR has no moving parts, it can instantaneously switch focus to Since LOFAR has no moving parts, it can instantaneously switch focus to
some galactic event. some galactic event.
Fourth, \emph{Deep Extragalactic Surveys\/} will be carried out to find the Fourth, \emph{Deep Extragalactic Surveys\/} will be carried out to find the
...@@ -313,10 +339,9 @@ used for all observations. ...@@ -313,10 +339,9 @@ used for all observations.
The roll-out of the stations is currently in progress. The roll-out of the stations is currently in progress.
Each station is equipped with 48--96 LBAs and 48--96 HBA tiles. Each station is equipped with 48--96 LBAs and 48--96 HBA tiles.
A station also features a cabinet where initial processing is done. A station also features a cabinet where initial processing is done, like
Typical operations that are performed here include analog-to-digital analog-to-digital conversion, filtering, frequency selection, and combination
conversion, filtering, frequency selection, and combination of the signals of the signals from the different receivers.
from the different receivers.
One of the distinctive properties of LOFAR is that the receivers are One of the distinctive properties of LOFAR is that the receivers are
omni-directional, and that multiple, simultaneous observation directions are omni-directional, and that multiple, simultaneous observation directions are
supported. supported.
...@@ -715,12 +740,11 @@ Such a chunk is the unit of data that is sent to the compute node for ...@@ -715,12 +740,11 @@ Such a chunk is the unit of data that is sent to the compute node for
further processing. Since processing a chunk typically takes much further processing. Since processing a chunk typically takes much
longer than one second, the chunks are distributed round robin over a longer than one second, the chunks are distributed round robin over a
group of processor cores, as illustrated by group of processor cores, as illustrated by
Figure~\ref{fig:round-robin}. Subsequent chunks are processed by Figure~\ref{fig:round-robin}.
different processor cores. A core must finish its work before it is Subsequent chunks are processed by different processor cores.
time to process the next chunk. A core first receives data from the A core first receives data from the I/O~node, processes them, sends back the
I/O~node (green in the figure, the left side of the bars), processes results, and idles until the I/O~node sends new data.
them (yellow, middle part of the bars), sends back the results (red, A core must finish its work before it is time to process the next chunk.
right side of the bars), and idles until the I/O~node sends new data.
For simplicity, Figure~\ref{fig:round-robin} shows the processing of For simplicity, Figure~\ref{fig:round-robin} shows the processing of
three subbands on six cores. three subbands on six cores.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment