diff --git a/.gitattributes b/.gitattributes
index 42aa792159d3f31d61bbf352b5a42563ac674277..2cbdedc39fdf8b198f73a5f67b3bfe838ae43a6e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -2602,6 +2602,7 @@ doc/papers/2011/europar/lofar.bib -text
 doc/papers/2011/europar/lofar.pdf -text
 doc/papers/2011/europar/lofar.tex -text
 doc/papers/2011/europar/pencilbeams.svg -text
+doc/papers/2011/europar/processing.fig -text
 doc/papers/2011/europar/stations-beams.jgr -text
 doc/papers/references.bib -text
 /lofar_config.h.cmake -text
diff --git a/doc/papers/2011/europar/ION-processing.pdf b/doc/papers/2011/europar/ION-processing.pdf
index 2489b70892e81c82e3cccba231c1489bfeae7ddf..fa611a67b2ce2a77d1fc34bc888f7a765dedff66 100644
Binary files a/doc/papers/2011/europar/ION-processing.pdf and b/doc/papers/2011/europar/ION-processing.pdf differ
diff --git a/doc/papers/2011/europar/Makefile b/doc/papers/2011/europar/Makefile
index c2a9d7160b4ca0155f54dfb856b5435b166e02b1..4f0f5ff824e0278bdf377f921c73cc07ed485aef 100644
--- a/doc/papers/2011/europar/Makefile
+++ b/doc/papers/2011/europar/Makefile
@@ -2,7 +2,7 @@ TEX_SOURCES =	lofar.tex
 
 BIB_SOURCES =	lofar.bib
 
-FIG_SOURCES =	delay.fig lofar-stations.fig
+FIG_SOURCES =	delay.fig lofar-stations.fig processing.fig
 
 JGR_SOURCES =	stations-beams.jgr execution_times.jgr coherent-dedispersion.jgr dispersed-signal.jgr
 
@@ -32,7 +32,7 @@ TEXFONTS =	:
 		jgraph $< > $@
 
 %.pdf:		%.eps
-		epstopdf --filter < $< > $@
+		epstopdf --outfile=$@ $<
 
 %.pdf:		%.fig
 		fig2dev -L pdf $< $@
@@ -47,6 +47,9 @@ TEXFONTS =	:
 %-bw.jpg:	%.jpg
 		convert $< -colorspace Gray $@
 
+%-bw.png:	%.png
+		convert $< -colorspace Gray $@
+
 lofar.pdf:	$(TEX_SOURCES) $(STY_SOURCES) $(BIB_SOURCES) $(FIGURES)
 		TEXINPUTS=$(TEXINPUTS) TEXFONTS=$(TEXFONTS) pdflatex lofar
 		bibtex lofar
diff --git a/doc/papers/2011/europar/lofar.pdf b/doc/papers/2011/europar/lofar.pdf
index cf15506ee97f911ece867d53e672a839320e00ed..efb479e9254370efcb433246ba88b066cadde563 100644
Binary files a/doc/papers/2011/europar/lofar.pdf and b/doc/papers/2011/europar/lofar.pdf differ
diff --git a/doc/papers/2011/europar/lofar.tex b/doc/papers/2011/europar/lofar.tex
index 6a13a703bf1bcf26092b857a3fd6b5c55e0d6933..372a80bd1c7cefc37bacd0637374fb837417096c 100644
--- a/doc/papers/2011/europar/lofar.tex
+++ b/doc/papers/2011/europar/lofar.tex
@@ -75,26 +75,35 @@ LOFAR imaging pipeline \cite{Romein:10a}
 \label{Sec:LOFAR}
 
 \begin{figure}[ht]
+\comment{
 \subfigure[A field with low-band antennas (dipoles).]{
   \makebox[35mm][c]{
-     \includegraphics[width=0.3\textwidth]{LBA-field.jpg}
+     \includegraphics[width=0.27\textwidth]{LBA-field.jpg}
      \label{fig:lbafield}
   }
 }
 \hfill
+}
 \subfigure[Locations of the stations.]{
-  \makebox[40mm][c]{
+  \makebox[37mm][c]{
      \includegraphics[width=0.35\textwidth]{lofar-stations.pdf}
      \label{fig:map}
   }
 }
 \hfill
 \subfigure[The left antenna receives the wave later.]{
-  \makebox[30mm][c]{
-    \includegraphics[width=0.25\textwidth]{delay.pdf}
+  \makebox[28mm][c]{
+    \includegraphics[width=0.20\textwidth]{delay.pdf}
     \label{fig:delay}
   }
 }
+\hfill
+\subfigure[Tied-array beams (hexagons) formed within two station beams (ellipse).]{
+  \makebox[50mm][c]{
+    \includegraphics[width=0.3\textwidth]{pencilbeams.pdf}
+    \label{fig:pencilbeams}
+  }  
+}
 \caption{LOFAR antennas}
 \end{figure}
 
@@ -129,22 +138,7 @@ The BG/P contains several networks. A fast \emph{3-dimensional torus\/} connects
 \subsection{External I/O}
 \label{Sec:Networks}
 
-\begin{figure}[ht]
-\begin{minipage}[t]{0.47\textwidth}
-\includegraphics[width=\textwidth]{ION-processing.pdf}
-\caption{Data flow diagram for the I/O nodes.}
-\label{fig:ion-processing}
-\end{minipage}
-\hfill
-\begin{minipage}[t]{0.4\textwidth}
-\center
-\includegraphics[width=0.8\textwidth]{pencilbeams.pdf}
-\caption{Tied-array beams (hexagons) formed within a station beam (ellipse).}
-\label{fig:pencilbeams}
-\end{minipage}
-\end{figure}
-
-We customised the I/O node software stack~\cite{Yoshii:10} and run a multi-threaded program on each I/O~node which is responsible for two tasks: the handling of input, and the handling of output (see Figure \ref{fig:ion-processing}). Even though the I/O nodes each have a 10~Gb/s Ethernet interface, they do not have enough computation power to handle 10~Gb/s of data. The overhead of handling IRQs, IP, and UDP/TCP put a high load on the 850~MHz cores of the I/O nodes, limiting performance. An I/O node can output at most 3.1~Gb/s, unless it has to handle station input (3.1~Gb/s), in which case it can output at most 1.1~Gb/s. We implemented a low-overhead communication protocol called FCNP~\cite{Romein:09a} to efficiently transport data to and from the compute nodes. Each I/O node forwards its data to the compute nodes, which perform all of the necessary processing. Once the compute nodes have finished processing the data, the results are sent back to the I/O nodes. The I/O nodes forward these results to our storage cluster, which can sustain a throughput up to 80~Gb/s. The I/O node drops output data if it cannot be sent, in order to keep the system running at real time.
+We customised the I/O node software stack~\cite{Yoshii:10} and run a multi-threaded program on each I/O~node which is responsible for the handling of both the input and the output. Even though the I/O nodes each have a 10~Gb/s Ethernet interface, they do not have enough computation power to handle 10~Gb/s of data. The overhead of handling IRQs, IP, and UDP/TCP put a high load on the 850~MHz cores of the I/O nodes, limiting performance. An I/O node can output at most 3.1~Gb/s, unless it has to handle station input (3.1~Gb/s), in which case it can output at most 1.1~Gb/s. We implemented a low-overhead communication protocol called FCNP~\cite{Romein:09a} to efficiently transport data to and from the compute nodes. Each I/O node forwards its data to the compute nodes, which perform all of the necessary processing. Once the compute nodes have finished processing the data, the results are sent back to the I/O nodes. The I/O nodes forward these results to our storage cluster, which can sustain a throughput up to 80~Gb/s. The I/O node drops output data if it cannot be sent, in order to keep the system running at real time.
 
 \comment{
   BG/P explanation:
@@ -163,23 +157,30 @@ The BG/P, which receives the signals from all stations, again performs delay com
 
 Different tied-array beams are created by adding the signals from the individual stations using different delays. The delays that have to be applied to obtain a tied-array beam depends on the relative positions of the stations and the relative direction of the tied-array beam with respect to the station beam. The delays are applied in two phases. First, the streams are aligned by shifting them a whole number of samples with respect to each other, which resolves delay differences up to the granularity of a single sample. Then, the remaining sub-sample delays are compensated for by shifting the phase of the signal. In order to obtain different tied-array beams, only the sub-sample delays have to be adjusted. A phase shift is performed by applying a complex multiplication. To form a beam, the beam former gathers the streams of samples from the stations, multiplies them with precomputed weights representing the required phase shift, and adds the streams together. The same weights are applied to both the X and the Y polarisations. The resulting data stream is called the \emph{XY polarisations}, and consists of 32-bit complex floating point numbers (complex floats).
 
-The XY polarisations can optionally be converted into \emph{Stokes IQUV} parameters, which represent the polarisation aspects in an alternative way. The Stokes parameters are defined as $I = X\overline{X} + Y\overline{Y}$, $Q = X\overline{X} - Y\overline{Y}$, $U = 2\mathrm{Re}(X\overline{Y})$, $V = 2\mathrm{Im}(X\overline{Y})$, with each parameter being a 32-bit floats.
+The XY polarisations can optionally be converted into \emph{Stokes IQUV} parameters, which represent the polarisation aspects in an alternative way. The Stokes parameters are defined as $I = X\overline{X} + Y\overline{Y}$, $Q = X\overline{X} - Y\overline{Y}$, $U = 2\mathrm{Re}(X\overline{Y})$, $V = 2\mathrm{Im}(X\overline{Y})$, with each parameter being a 32-bit float.
 
-Both the XY polarisations and the Stokes IQUV parameters require up to 6.2~Gb/s per beam, which severely limits the number of beams that can be produced, and which represents a time resolution which is not always necessary. For example, in sky surveys, it is desirable to create many beams. The data rate per beam thus has to be lowered. For many-beam observations, we convert the XY polarisations into just the \emph{Stokes I} parameter, which represents the amplitude of the signal in the X and Y polarisations combined. The resulting data rate is 1.5~Gb/s per beam, but we also allow time-wise integration to further reduce the data rate with an integer factor, allowing even more beams to be created.
+Both the XY polarisations and the Stokes IQUV parameters require up to 6.2~Gb/s per beam, which severely limits the number of beams that can be produced, and which represents a time resolution which is not always necessary. For example, in sky surveys, it is desirable to create many beams. The data rate per beam thus has to be lowered. For many-beam observations, we convert the XY polarisations into just the \emph{Stokes I} parameter, which represents the amplitude of the signal in the X and Y polarisations combined. The resulting data rate is 1.5~Gb/s per beam, but we also allow temporal integration to further reduce the data rate with an integer factor, allowing even more beams to be created. For each beam, each polarisation or Stokes parameter is stored in a separate file. If too many I/O nodes are limited to 1.1~Gb/s of output, full polarisation or Stokes parameter streams are too wide to transport. In such cases, we split the streams into \emph{slices} of 83 or 124 subbands per substream instead of 248.
 
-For each beam, each polarisation or Stokes parameter is stored in a separate file. If too many I/O nodes are limited to 1.1~Gb/s of output, full polarisation or Stokes parameter streams are too wide to transport. In such cases, we split the streams into \emph{slices} of 83 or 124 subbands per substream instead of 248.
+Finally, our software can produce the Stokes parameters (I or IQUV) of an \emph{incoherent} beam, which is an accumulation of the uncompensated station signals. The incoherent beam is less sensitive than a tied-array beam, but it maintains the wide field-of-view of the stations. The incoherent beam is used to detect the presence of sources, but does not reveal their location within the station beams.
 
 % TODO: incoherent stokes
 \section{Pulsar Pipeline}
 
 %To observe known pulsars, our beam former is put in the high-resolution mode, in which Complex Voltages or Stokes IQUV parameters are recorded at full bandwidth in order to closely study the shapes of the individual pulses.
 
-In this section, we will describe in detail how the full signal-processing pipeline operates, in and around the beam former. The use of a software pipeline allows us to reconfigure the components and design of our standard imaging pipeline, described in \cite{Romein:10a}. In fact, both pipelines can be run simultaneously.
+In this section, we will describe in detail how the full signal-processing pipeline operates, in and around the beam former. The use of a software pipeline allows us to reconfigure the components and design of our standard imaging pipeline, described in \cite{Romein:10a}. Both pipelines can be run simultaneously. Figure \ref{fig:processing} gives an overview of our system.
+
+\begin{figure}[ht]
+\center
+\includegraphics[width=0.8\textwidth]{processing.pdf}
+\caption{Data flow diagram describing three pipelines.}
+\label{fig:processing}
+\end{figure}
 
 \subsection{Input from Stations}
-The first step in the pipeline is receiving and collecting from the stations on the I/O nodes. Each I/O node receives the data of (at most) one station, and stores the received data in a circular buffer (recall Figure \ref{fig:ion-processing}). If necessary, the read pointer of the circular buffer is shifted a number of samples to reflect the coarse-grain delay compensation that will be necessary to align the streams from different stations.
+The first step in the pipeline is receiving and collecting from the stations on the I/O nodes. Each I/O node receives the data of (at most) one station, and stores the received data in a circular buffer. If necessary, the read pointer of the circular buffer is shifted a number of samples to reflect the coarse-grain delay compensation that will be necessary to align the streams from different stations.
 
-The station data are split into chunks of one subband and approximately 0.25 seconds. The chunk size is chosen such that the compute cores have enough memory to perform all of the necessary processing. Due to the BG/P design, an I/O node sends chunks to its own compute cores only. The compute cores exchange the chunks they obtain from their I/O node using an all-to-all exchange. 
+The station data are split into chunks of one subband and 0.25 seconds. The chunk size is chosen such that the compute cores have enough memory to perform all of the necessary processing. Due to the BG/P design, an I/O node sends chunks to its own compute cores only. The compute cores exchange the chunks they obtain from their I/O node using an all-to-all exchange. 
 
 \subsection{First All-to-all Exchange}
 
@@ -189,13 +190,15 @@ The communications in the all-to-all exchange are asynchronous, which allows a c
 
 \subsection{Signal Processing}
 
-Once a compute core receives a chunk, it can start processing. First, we convert the station data from 16-bit little-endian integers to 32-bit big-endian floats, in order to be able to do further processing using the powerful dual FPU units present in each core. The data doubles in size, which is the main reason why we implement it \emph{after} the exchange.
-
-Next, the data are filtered by applying a Poly-Phase Filter (PPF) bank, which consists of a Finite Impulse Response (FIR) filter and a Fast-Fourier Transform (FFT). The FFT allows the chunk, which represents a subband of 195~kHz, to be split into narrower subbands (\emph{channels}). A higher frequency resolution allows more precise corrections in the frequency domain, such as the removal of radio interference at very specific frequencies.
-
-Next, fine-grain delay compensation is performed to align the chunks from the different stations to the same source at which the stations are pointed. The fine-grain delay compensation is performed as a phase rotation, which is implemented as one complex multiplication per sample. The exact delays are computed for the begin time and end time of a chunk, and interpolated in frequency and time for each individual sample. %TODO: why a frequency-dependent component?
+Once a compute core receives a chunk, it performs a sequence of processing steps:
 
-Next, a band pass correction is applied to adjust the signal strengths in all channels. This is necessary, because the stations introduce a bias in the signal strengths across the channels within a subband.
+\begin{description}
+\item[Conversion] of the data from 16-bit little-endian integers to 32-bit big-endian floats, in order to be able to do further processing using the powerful dual FPU units present in each core. The data doubles in size, which is the main reason why we implement it \emph{after} the exchange.
+\item[Poly-Phase Filter] (PPF) bank filters the data, which consists of a Finite Impulse Response (FIR) filter and a Fast Fourier Transform (FFT). The FFT allows the chunk, which represents a subband of 195~kHz, to be split into narrower subbands (\emph{channels}). A higher frequency resolution allows more precise corrections in the frequency domain, such as the removal of radio interference at specific frequencies.
+\item[Clock correction] compensates for known clock offsets between stations.
+\item[Phase (fine-grain) delay compensation] is performed to align the chunks from the different stations. The fine-grain delay compensation is performed as a phase rotation, which is implemented as one complex multiplication per sample. The delays are both frequency and time dependent.
+\item[Band pass] correction is applied to adjust the signal strengths in all channels, because the stations introduce a bias in the signal strengths across the channels within a subband.
+\end{description}
 
 Up to this point, processing chunks from different stations can be done independently, but from here on, the data from all stations are required. The first all-to-all exchange thus ends here.
 
@@ -205,7 +208,7 @@ The beam former creates the beams as described in Section \ref{Sec:Beamforming}.
 
 The delays are applied to the station data through complex multiplications and additions, programmed in assembly. In order to take full advantage of the L1 cache and the available registers, data is processed in sets of 6 stations, producing 3 beams, or a subset thereof to cover the remaining stations and beams. While the exact ideal set size in which the data is to be processed depends on the architecture at hand, we have shown in previous work that similar tradeoffs exist for similar problems across different architectures~\cite{Nieuwpoort:09,BAR}.
 
-Because each beam is an accumulation of the data from all stations, the bandwidth of each beam is equal to the bandwidth of data from a single station, which is 6.2~Gb/s now that the samples are 32-bit floats. Once the beams are formed, they are kept as XY polarisations or transformed into the Stokes IQUV or the Stokes I parameters. In the latter case, the beams can also be integrated time-wise, in which groups of samples of fixed size are accumulated to reduce the resulting data rate.
+Because each beam is an accumulation of the data from all stations, the bandwidth of each beam is equal to the bandwidth of data from a single station, which is 6.2~Gb/s now that the samples are 32-bit floats. Once the beams are formed, they are kept as XY polarisations or transformed into the Stokes IQUV or the Stokes I parameters. In the latter case, the beams can also be integrated temporally to reduce the resulting data rate.
 
 The beam former transforms chunks representing station data into chunks representing beam data. Because a chunk representing station data contained data for only one subband, the chunks representing different subbands of the same beam are still spread out over the full BG/P. Chunks corresponding to the same beam are brought together using a second all-to-all exchange.
 
@@ -231,7 +234,7 @@ Figure \ref{fig:dispersed-signal} illustrates pulses of pulsar J0034-0534 at fou
 \end{minipage}
 \end{figure}
 
-Dedispersion is performed in the frequency domain, effectively by doing a 4096-point Fourier transform (FFT) that splits a 12~kHz channel into 3~Hz subchannels. The phases of the observed samples are corrected by applying a chirp function, i.e., by multiplication with precomputed, channel-dependent, complex weights. These multiplications are programmed in assembly, to reduce the computational costs. A backward FFT is done to revert to 12~kHz channels.
+Dedispersion is performed in the frequency domain, effectively by doing a 4096-point FFT that splits a 12~kHz channel into 3~Hz subchannels. The phases of the observed samples are corrected by applying a chirp function, i.e., by multiplication with precomputed, channel-dependent, complex weights. These multiplications are programmed in assembly, to reduce the computational costs. A backward FFT is done to revert to 12~kHz channels.
 
 Figure~\ref{fig:dedispersion-result} shows the observed effectiveness of channel-level dedispersion, which improves the effective time resolution from 0.51~ms to 0.082~ms, revealing a more detailed pulse and a better signal-to-noise ratio. Dedispersion thus contributes significantly to the data quality, but it also comes at a significant computational cost due to the two FFTs it requires. It demonstrates the power of using a \emph{software\/} telescope: the pipeline component was implemented, verified, and optimised in only one month time.
 
@@ -270,7 +273,7 @@ We will focus our performance analysis on edge cases that are of astronomical in
 \subsection{Overall Performance}
 
 % TODO: getallen kloppen niet.. 13 beams is 80.6 Gb/s, en met 70 Gb/s zouden we 11 beams aan moeten kunnen
-Figure \ref{fig:stations-beams} shows the maximum number of beams that can be created when using a various number of stations, in each of the three modes: Complex Voltages, Stokes IQUV, and Stokes I. In both the Complex Voltages and the Stokes IQUV modes, the pipeline is I/O bound. Each beam is 6.2~Gb/s wide. We can make at most 12 beams without exceeding the available 80~Gb/s to our storage cluster. The available bandwidth decreases down to 70~Gb/s due to the fact that an I/O node can only output 1.1~Gb/s if it also has to process station data. The granularity with which the output can be distributed over the I/O nodes, as well as scheduling details, determine the actual number of beams that can be created, but in all cases, the beam former can create at least 10 beams at LOFAR's full observational bandwidth.
+Figure \ref{fig:stations-beams} shows the maximum number of beams that can be created when using a various number of stations, in each of the three modes: XY polarisations, Stokes IQUV, and Stokes I. In both the XY polarisations and the Stokes IQUV modes, the pipeline is I/O bound. Each beam is 6.2~Gb/s wide. We can make at most 12 beams without exceeding the available 80~Gb/s to our storage cluster. The available bandwidth decreases down to 70~Gb/s due to the fact that an I/O node can only output 1.1~Gb/s if it also has to process station data. The granularity with which the output can be distributed over the I/O nodes, as well as scheduling details, determine the actual number of beams that can be created, but in all cases, the beam former can create at least 10 beams at LOFAR's full observational bandwidth.
 
 In the Stokes I mode, we applied several integration factors (1, 2, 4, 8, and 12) in order to show the trade-off between beam quality and the number of beams. Integration factors higher than 12 does not allow significantly more beams to be created, but could be used in order to further reduce the total output rate. For low integration factors, the beam former is again limited by the available output bandwidth. Once the Stokes I streams are integrated sufficiently, the system becomes bounded by the compute nodes: if only signals from a few stations have to be combined, the beam former is limited by the amount of available memory required to store the beams. If more input has to be combined, the beam former becomes limited by the CPU power available in the compute cores. For observations for which a high integration factor is acceptable, the beam former is able to create 155 up to 543 tied-array beams, depending on the number of stations used. For observations which need a high time resolution and thus a low integration factor, the beam former is still able to create at least 42 tied-array beams.
 
diff --git a/doc/papers/2011/europar/processing.fig b/doc/papers/2011/europar/processing.fig
new file mode 100644
index 0000000000000000000000000000000000000000..6ca855089727fbfeb07193d46e26170aec176cb0
--- /dev/null
+++ b/doc/papers/2011/europar/processing.fig
@@ -0,0 +1,309 @@
+#FIG 3.2  Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter  
+150.00
+Single
+-2
+1200 2
+0 32 #dedfff
+0 33 #ffffde
+0 34 #deffde
+0 35 #ffe0ff
+0 36 #d0a000
+0 37 #606060
+0 38 #e0e0e0
+6 8460 9360 10590 9840
+2 4 0 3 25 33 45 -1 20 0.000 0 0 7 0 0 5
+	 10575 9825 8475 9825 8475 9375 10575 9375 10575 9825
+4 1 25 40 -1 18 16 0.0000 4 210 1875 9525 9675 circular buffer\001
+-6
+6 8835 12885 10215 13365
+2 4 0 3 25 33 45 -1 20 0.000 0 0 7 0 0 5
+	 10200 13350 8850 13350 8850 12900 10200 12900 10200 13350
+4 1 25 40 -1 18 16 0.0000 4 210 1140 9525 13200 FIR filter\001
+-6
+6 11760 9435 14340 9915
+2 4 0 3 25 33 45 -1 20 0.000 0 0 7 0 0 5
+	 14325 9900 11775 9900 11775 9450 14325 9450 14325 9900
+4 1 25 40 -1 18 16 0.0000 4 270 2355 13050 9750 best-effort queue\001
+-6
+6 8745 17325 10275 17805
+2 4 0 3 25 33 45 -1 20 0.000 0 0 7 0 0 5
+	 10260 17790 8760 17790 8760 17340 10260 17340 10260 17790
+4 1 25 40 -1 18 16 0.0000 4 270 1305 9510 17640 bandpass\001
+-6
+6 11100 17325 13380 17805
+2 4 0 3 25 33 45 -1 20 0.000 0 0 7 0 0 5
+	 13365 17790 11115 17790 11115 17340 13365 17340 13365 17790
+4 1 25 40 -1 18 16 0.0000 4 270 2115 12240 17640 superstation BF\001
+-6
+6 14400 17250 16500 17850
+2 4 0 3 13 34 45 -1 20 0.000 0 0 7 0 0 5
+	 16425 17775 14475 17775 14475 17325 16425 17325 16425 17775
+4 1 13 40 -1 18 16 0.0000 4 270 1710 15450 17625 tied-array BF\001
+-6
+6 18285 12435 20115 12915
+2 4 0 3 13 34 45 -1 20 0.000 0 0 7 0 0 5
+	 20100 12900 18300 12900 18300 12450 20100 12450 20100 12900
+4 1 13 40 -1 18 16 0.0000 4 255 1560 19200 12750 transpose 2\001
+-6
+6 15135 14460 17565 14940
+2 4 0 3 13 34 45 -1 20 0.000 0 0 7 0 0 5
+	 17550 14925 15150 14925 15150 14475 17550 14475 17550 14925
+4 1 13 40 -1 18 16 0.0000 4 225 2340 16350 14775 coh. Stokes IQUV\001
+-6
+6 15135 13785 17565 14265
+2 4 0 3 13 34 45 -1 20 0.000 0 0 7 0 0 5
+	 17550 14250 15150 14250 15150 13800 17550 13800 17550 14250
+4 1 13 40 -1 18 16 0.0000 4 210 1710 16350 14100 coh. Stokes I\001
+-6
+6 15135 13110 17565 13590
+2 4 0 3 13 34 45 -1 20 0.000 0 0 7 0 0 5
+	 17550 13575 15150 13575 15150 13125 17550 13125 17550 13575
+4 1 13 40 -1 18 16 0.0000 4 225 2235 16350 13425 inc. Stokes IQUV\001
+-6
+6 18510 9450 19890 9915
+2 4 0 3 21 35 45 -1 20 0.000 0 0 7 0 0 5
+	 19875 9900 18525 9900 18525 9465 19875 9465 19875 9900
+4 1 21 40 -1 18 16 0.0000 4 270 900 19200 9765 trigger\001
+-6
+6 18510 10275 19890 10740
+2 4 0 3 21 35 45 -1 20 0.000 0 0 7 0 0 5
+	 19875 10725 18525 10725 18525 10290 19875 10290 19875 10725
+4 1 21 40 -1 18 16 0.0000 4 210 975 19200 10590 inv. FIR\001
+-6
+6 18510 11100 19890 11565
+2 4 0 3 21 35 45 -1 20 0.000 0 0 7 0 0 5
+	 19875 11550 18525 11550 18525 11115 19875 11115 19875 11550
+4 1 21 40 -1 18 16 0.0000 4 210 1050 19200 11415 inv. FFT\001
+-6
+6 18585 13785 19965 14265
+2 4 0 3 13 34 45 -1 20 0.000 0 0 7 0 0 5
+	 19950 14250 18600 14250 18600 13800 19950 13800 19950 14250
+4 1 13 40 -1 18 16 0.0000 4 270 1185 19275 14100 integrate\001
+-6
+6 15075 12375 17625 12975
+2 4 0 3 13 34 45 -1 20 0.000 0 0 7 0 0 5
+	 17550 12900 15150 12900 15150 12450 17550 12450 17550 12900
+4 1 13 40 -1 18 16 0.0000 4 210 1605 16350 12750 inc. Stokes I\001
+-6
+6 17235 16485 18615 16965
+2 4 0 3 13 34 45 -1 20 0.000 0 0 7 0 0 5
+	 18600 16950 17250 16950 17250 16500 18600 16500 18600 16950
+4 1 13 40 -1 18 16 0.0000 4 270 675 17925 16800 chirp\001
+-6
+6 17175 15600 18675 16200
+2 4 0 3 13 34 45 -1 20 0.000 0 0 7 0 0 5
+	 18600 16125 17250 16125 17250 15675 18600 15675 18600 16125
+4 1 13 40 -1 18 16 0.0000 4 210 1050 17925 15975 inv. FFT\001
+-6
+6 17235 17310 18615 17790
+2 4 0 3 13 34 45 -1 20 0.000 0 0 7 0 0 5
+	 18600 17775 17250 17775 17250 17325 18600 17325 18600 17775
+4 1 13 40 -1 18 16 0.0000 4 210 525 17925 17625 FFT\001
+-6
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 9525 8775 9525 9375
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 7 0 0 8
+	 9525 11025 9600 10950 9750 10950 9825 10875 9825 10125 9750 10050
+	 9600 10050 9525 9975
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 9525 9825 9525 10350
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 9525 10800 9525 11925
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 9525 12375 9525 12900
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 9525 13350 9525 13725
+2 4 0 3 25 33 45 -1 20 0.000 0 0 7 0 0 5
+	 10500 10800 8550 10800 8550 10350 10500 10350 10500 10800
+2 4 0 3 25 33 45 -1 20 0.000 0 0 7 0 0 5
+	 9975 14175 9075 14175 9075 13725 9975 13725 9975 14175
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 9525 14175 9525 14925
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 7 0 0 8
+	 9525 14400 9600 14325 9750 14325 9825 14250 9825 12675 9750 12600
+	 9600 12600 9525 12525
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 9525 15375 9525 16125
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 7 0 0 8
+	 9525 15600 9600 15525 9750 15525 9825 15450 9825 14700 9750 14625
+	 9600 14625 9525 14550
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 7 0 0 8
+	 9525 16800 9600 16725 9750 16725 9825 16650 9825 15900 9750 15825
+	 9600 15825 9525 15750
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 9525 16575 9525 17325
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 12225 11925 12225 10875
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 7 0 0 8
+	 12225 11250 12300 11175 12450 11175 12525 11100 12525 10350 12450 10275
+	 12300 10275 12225 10200
+2 4 0 3 4 30 45 -1 20 0.000 0 0 7 0 0 5
+	 12975 10890 11475 10890 11475 10425 12975 10425 12975 10890
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 12225 10425 12225 9900
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 14325 9675 15600 9675
+2 4 0 3 13 34 70 -1 20 0.000 0 0 7 0 0 5
+	 15075 11325 8175 11325 8175 9150 15075 9150 15075 11325
+2 4 0 3 4 30 70 -1 20 0.000 0 0 7 0 0 5
+	 17550 11325 15225 11325 15225 9150 17550 9150 17550 11325
+2 4 0 3 25 33 45 -1 20 0.000 0 0 7 0 0 5
+	 17100 9900 15600 9900 15600 9450 17100 9450 17100 9900
+2 3 2 3 4 30 65 -1 -1 1.000 0 0 -1 0 0 5
+	 11325 13425 13425 13425 13425 10200 11325 10200 11325 13425
+2 4 0 3 4 30 45 -1 20 0.000 0 0 7 0 0 5
+	 12975 13215 11475 13215 11475 12750 12975 12750 12975 13215
+2 4 0 3 4 30 45 -1 20 0.000 0 0 7 0 0 5
+	 12975 12390 11475 12390 11475 11925 12975 11925 12975 12390
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 12225 12750 12225 12375
+2 4 0 3 25 33 45 -1 20 0.000 0 0 7 0 0 5
+	 10425 16575 8625 16575 8625 16125 10425 16125 10425 16575
+2 4 0 3 25 33 45 -1 20 0.000 0 0 7 0 0 5
+	 10650 15375 8400 15375 8400 14925 10650 14925 10650 15375
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 10275 17550 11100 17550
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 7 0 0 6
+	 10500 17550 10425 17475 10425 17100 10350 17025 9600 17025 9525 16950
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 5
+	2 1 2.00 150.00 120.00
+	 19200 12150 19125 12075 13800 12075 13725 12000 13725 9900
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 7
+	2 1 2.00 150.00 120.00
+	 13725 17550 13800 17475 13800 17100 13725 17025 12300 17025 12225 16950
+	 12225 13200
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 13350 17550 14475 17550
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 7 0 0 8
+	 13575 17550 13500 17475 13500 17250 13425 17175 10800 17175 10725 17250
+	 10725 17475 10650 17550
+2 4 0 3 25 33 45 -1 20 0.000 0 0 7 0 0 5
+	 10425 12375 8625 12375 8625 11940 10425 11940 10425 12375
+2 3 2 4 13 35 65 -1 -1 1.000 0 0 -1 0 0 5
+	 13950 17925 20325 17925 20325 12300 13950 12300 13950 17925
+2 1 0 3 1 32 67 -1 -1 0.000 0 0 -1 0 0 2
+	 17550 11475 17700 11475
+2 1 0 3 1 32 67 -1 -1 0.000 0 0 -1 0 0 3
+	 17550 11475 17700 11475 17700 11250
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 7 0 0 8
+	 16650 17550 16575 17475 16575 17250 16500 17175 14250 17175 14175 17250
+	 14175 17475 14100 17550
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 16425 17550 17250 17550
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 -1 0 0 4
+	 16800 15900 16875 15975 16875 17475 16800 17550
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 19200 12450 19200 11550
+2 2 2 4 21 35 65 -1 -1 1.000 0 0 -1 0 0 5
+	 17925 9300 20325 9300 20325 11850 17925 11850 17925 9300
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 3
+	2 1 2.00 150.00 120.00
+	 14775 14775 14850 14700 15150 14700
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 19200 9450 19200 8700
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 19200 10275 19200 9900
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 19200 11100 19200 10725
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 -1 0 0 5
+	 17550 12675 17925 12675 18000 12750 18000 13950 18075 14025
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 -1 0 0 3
+	 17550 13350 17925 13350 18000 13425
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 17550 14025 18600 14025
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 -1 0 0 6
+	 18225 14025 18300 13950 18300 13725 18375 13650 19125 13650 19200 13575
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 19200 13800 19200 12900
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 -1 0 0 3
+	 17550 14700 17925 14700 18000 14625
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 3
+	2 1 2.00 150.00 120.00
+	 14775 13425 14850 13350 15150 13350
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 4
+	2 1 2.00 150.00 120.00
+	 14775 14250 14775 12750 14850 12675 15150 12675
+2 1 0 3 1 32 67 -1 -1 0.000 0 0 -1 0 0 2
+	 20475 11250 20475 12375
+2 1 0 3 1 7 60 -1 -1 0.000 0 0 -1 0 0 6
+	 14775 15225 14850 15150 17925 15150 18000 15075 18000 14100 18075 14025
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 14925 15150 15150 15150
+2 4 0 3 1 32 69 -1 20 0.000 0 0 7 0 0 5
+	 20475 12150 17700 12150 17700 9150 20475 9150 20475 12150
+2 4 0 3 1 32 70 -1 20 0.000 0 0 7 0 0 5
+	 20475 18075 8175 18075 8175 11475 20475 11475 20475 18075
+2 2 0 0 0 32 68 -1 20 0.000 0 0 -1 0 0 5
+	 17625 11475 20475 11475 20475 12225 17625 12225 17625 11475
+2 1 0 3 1 33 40 -1 -1 0.000 0 0 -1 1 0 6
+	2 1 2.00 150.00 120.00
+	 16800 15900 14850 15900 14775 15825 14775 14100 14850 14025 15150 14025
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 17250 15900 15525 15900
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 17925 17325 17925 16950
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 17925 16500 17925 16125
+2 1 0 3 1 33 60 -1 -1 0.000 0 0 -1 1 0 2
+	2 1 2.00 150.00 120.00
+	 16875 17025 16875 16650
+3 0 0 3 25 7 50 -1 -1 0.000 0 0 0 9
+	 8775 13125 8700 13125 8625 13200 8700 13500 8475 13575 8700 13650
+	 8625 13950 8700 14025 8775 14025
+	 0.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
+	 0.000
+3 0 0 3 25 7 50 -1 -1 0.000 0 0 0 9
+	 18750 15900 18825 15900 18900 16050 18825 16650 19125 16725 18825 16800
+	 18900 17325 18825 17550 18750 17550
+	 0.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
+	 0.000
+4 1 25 40 -1 18 16 0.0000 4 210 525 9525 14025 FFT\001
+4 1 4 40 -1 18 16 0.0000 4 270 1185 12225 10725 integrate\001
+4 1 25 40 -1 18 16 1.5708 4 210 1260 8475 13575 PPF bank\001
+4 1 4 60 -1 19 16 0.0000 4 210 630 16350 11175 node\001
+4 1 13 60 -1 19 16 0.0000 4 210 1080 14400 11175 I/O node\001
+4 1 13 60 -1 19 16 0.0000 4 210 660 14400 10875 BG/P\001
+4 1 25 40 -1 18 16 0.0000 4 210 1260 16350 9750 disk write\001
+4 1 25 40 -1 18 16 0.0000 4 270 1725 9510 10650 sample delay\001
+4 1 4 60 -1 19 16 0.0000 4 255 975 16350 10875 storage\001
+4 1 4 40 -1 18 16 0.0000 4 210 1185 12225 13050 correlate\001
+4 1 4 40 -1 18 16 0.0000 4 270 1185 12210 12240 integrate\001
+4 1 25 40 -1 18 16 0.0000 4 270 1590 9525 16425 phase delay\001
+4 1 25 40 -1 18 16 0.0000 4 210 2145 9525 15225 clock correction\001
+4 1 25 40 -1 18 17 0.0000 4 270 1650 9527 12220 transpose 1\001
+4 2 1 60 -1 19 16 0.0000 4 210 1560 9450 8925 from station\001
+4 1 4 60 -1 19 16 1.5708 4 270 2085 13275 11550 imaging     mode\001
+4 1 25 40 -1 18 16 1.5708 4 270 1770 19350 16725 dedispersion\001
+4 1 1 60 -1 19 16 0.0000 4 270 2550 16425 11775 BG/P compute node\001
+4 1 21 60 -1 19 16 1.5708 4 210 1515 18300 10575 UHEP mode\001
+4 0 13 60 -1 19 16 1.5708 4 270 2730 20175 17775 beam-forming modes\001
+4 0 1 60 -1 19 16 0.0000 4 210 885 19350 8925 to TBB\001
diff --git a/doc/papers/2011/europar/stations-beams.jgr b/doc/papers/2011/europar/stations-beams.jgr
index 5aa178f28ee52d5b9d4ecf5d382a55bec61f6964..ae41dd9a56de291b6df68c9f22534d807e69dd64 100644
--- a/doc/papers/2011/europar/stations-beams.jgr
+++ b/doc/papers/2011/europar/stations-beams.jgr
@@ -1,6 +1,7 @@
 newgraph
   xaxis
     label : number of stations
+    mhash 5
 
     min 0
     max 64
@@ -71,7 +72,7 @@ legend
   x 38 y 20
   linelength 5
 
-newstring : Complex Voltages / Stokes IQUV
+newstring : XY polarisations / Stokes IQUV
   x 2 y 2.5
   hjl vjc