diff --git a/.gitattributes b/.gitattributes
index 9c0ccc6789ef34299aa3f3410b496789260faa9b..a996021327876b7ba710a916d84894b7b665b857 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -2596,6 +2596,7 @@ doc/papers/2011/europar/Makefile -text
 doc/papers/2011/europar/coherent-dedispersion.jgr -text
 doc/papers/2011/europar/colinear.fig -text
 doc/papers/2011/europar/dataflow-1st-transpose.svg -text
+doc/papers/2011/europar/dataflow-2.svg -text
 doc/papers/2011/europar/dataflow-2nd-transpose.svg -text
 doc/papers/2011/europar/dataflow.svg -text
 doc/papers/2011/europar/delay.fig -text
diff --git a/doc/papers/2011/europar/Makefile b/doc/papers/2011/europar/Makefile
index 0e8f4d46c308d906cb1b9d41c9da1e4ac62db665..8a470c6b58bf72fbf8f8f5aef54337085d6e612a 100644
--- a/doc/papers/2011/europar/Makefile
+++ b/doc/papers/2011/europar/Makefile
@@ -14,7 +14,7 @@ PNG_SOURCES =
 
 STY_SOURCES =	
 
-SVG_SOURCES =	pencilbeams.svg LBAfield-delay.svg dataflow-1st-transpose.svg dataflow-2nd-transpose.svg
+SVG_SOURCES =	pencilbeams.svg LBAfield-delay.svg dataflow-2.svg
 
 AUX_FILES =	$(TEX_SOURCES:%.tex=%.aux)
 GEN_FIGURES =	$(FIG_SOURCES:%.fig=%.pdf) $(JGR_SOURCES:%.jgr=%.pdf) $(SVG_SOURCES:%.svg=%.pdf)
diff --git a/doc/papers/2011/europar/dataflow-2.svg b/doc/papers/2011/europar/dataflow-2.svg
new file mode 100644
index 0000000000000000000000000000000000000000..dfb12a3f862bcad4cd3bee7f324390852d43f7b3
--- /dev/null
+++ b/doc/papers/2011/europar/dataflow-2.svg
@@ -0,0 +1,856 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   sodipodi:docname="dataflow-2.svg"
+   inkscape:version="0.47pre4 r22446"
+   version="1.1"
+   id="svg3175"
+   height="256.00192"
+   width="592.65717">
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="1.9238383"
+     inkscape:cx="284.14261"
+     inkscape:cy="119.91868"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer1"
+     showgrid="false"
+     inkscape:snap-midpoints="false"
+     showguides="true"
+     inkscape:guide-bbox="true"
+     inkscape:snap-to-guides="false"
+     inkscape:window-width="1048"
+     inkscape:window-height="1659"
+     inkscape:window-x="0"
+     inkscape:window-y="0"
+     inkscape:window-maximized="1">
+    <sodipodi:guide
+       id="guide5285"
+       position="734.02499,-131.01005"
+       orientation="1,0" />
+    <sodipodi:guide
+       id="guide5492"
+       position="106.31206,73.005875"
+       orientation="1,0" />
+    <sodipodi:guide
+       id="guide5494"
+       position="151.88826,84.767475"
+       orientation="1,0" />
+    <sodipodi:guide
+       id="guide5496"
+       position="0.075576,209.45516"
+       orientation="0,1" />
+    <sodipodi:guide
+       id="guide5498"
+       position="-4.602572,137.72356"
+       orientation="0,1" />
+    <sodipodi:guide
+       orientation="0,1"
+       position="122.30049,18.976029"
+       id="guide7234" />
+  </sodipodi:namedview>
+  <defs
+     id="defs3177">
+    <marker
+       style="overflow:visible"
+       id="EmptyTriangleOutM"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="EmptyTriangleOutM">
+      <path
+         transform="matrix(0.4,0,0,0.4,-1.8,0)"
+         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
+         d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
+         id="path4209" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow1Mend">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
+         d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
+         id="path4054" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mend"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mend">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+         id="path4072" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Send"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Send">
+      <path
+         transform="matrix(-0.3,0,0,-0.3,0.69,0)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+         id="path4078" />
+    </marker>
+    <inkscape:perspective
+       id="perspective3183"
+       inkscape:persp3d-origin="372.04724 : 350.78739 : 1"
+       inkscape:vp_z="744.09448 : 526.18109 : 1"
+       inkscape:vp_y="0 : 1000 : 0"
+       inkscape:vp_x="0 : 526.18109 : 1"
+       sodipodi:type="inkscape:persp3d" />
+    <inkscape:perspective
+       sodipodi:type="inkscape:persp3d"
+       inkscape:vp_x="0 : 0.5 : 1"
+       inkscape:vp_y="0 : 1000 : 0"
+       inkscape:vp_z="1 : 0.5 : 1"
+       inkscape:persp3d-origin="0.5 : 0.33333333 : 1"
+       id="perspective3195" />
+    <inkscape:perspective
+       sodipodi:type="inkscape:persp3d"
+       inkscape:vp_x="0 : 0.5 : 1"
+       inkscape:vp_y="0 : 1000 : 0"
+       inkscape:vp_z="1 : 0.5 : 1"
+       inkscape:persp3d-origin="0.5 : 0.33333333 : 1"
+       id="perspective3195-8" />
+    <inkscape:perspective
+       sodipodi:type="inkscape:persp3d"
+       inkscape:vp_x="0 : 0.5 : 1"
+       inkscape:vp_y="0 : 1000 : 0"
+       inkscape:vp_z="1 : 0.5 : 1"
+       inkscape:persp3d-origin="0.5 : 0.33333333 : 1"
+       id="perspective3254" />
+    <inkscape:perspective
+       sodipodi:type="inkscape:persp3d"
+       inkscape:vp_x="0 : 0.5 : 1"
+       inkscape:vp_y="0 : 1000 : 0"
+       inkscape:vp_z="1 : 0.5 : 1"
+       inkscape:persp3d-origin="0.5 : 0.33333333 : 1"
+       id="perspective5101" />
+    <marker
+       style="overflow:visible"
+       id="Arrow2Send-5"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Send">
+      <path
+         transform="matrix(-0.3,0,0,-0.3,0.69,0)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+         id="path4078-9" />
+    </marker>
+    <inkscape:perspective
+       sodipodi:type="inkscape:persp3d"
+       inkscape:vp_x="0 : 0.5 : 1"
+       inkscape:vp_y="0 : 1000 : 0"
+       inkscape:vp_z="1 : 0.5 : 1"
+       inkscape:persp3d-origin="0.5 : 0.33333333 : 1"
+       id="perspective5131" />
+    <inkscape:perspective
+       sodipodi:type="inkscape:persp3d"
+       inkscape:vp_x="0 : 0.5 : 1"
+       inkscape:vp_y="0 : 1000 : 0"
+       inkscape:vp_z="1 : 0.5 : 1"
+       inkscape:persp3d-origin="0.5 : 0.33333333 : 1"
+       id="perspective5295" />
+    <inkscape:perspective
+       sodipodi:type="inkscape:persp3d"
+       inkscape:vp_x="0 : 0.5 : 1"
+       inkscape:vp_y="0 : 1000 : 0"
+       inkscape:vp_z="1 : 0.5 : 1"
+       inkscape:persp3d-origin="0.5 : 0.33333333 : 1"
+       id="perspective5327" />
+    <inkscape:perspective
+       sodipodi:type="inkscape:persp3d"
+       inkscape:vp_x="0 : 0.5 : 1"
+       inkscape:vp_y="0 : 1000 : 0"
+       inkscape:vp_z="1 : 0.5 : 1"
+       inkscape:persp3d-origin="0.5 : 0.33333333 : 1"
+       id="perspective5355" />
+    <marker
+       style="overflow:visible"
+       id="Arrow1Mend9"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow1Mend9">
+      <path
+         transform="matrix(-0.4,0,0,-0.4,-4,0)"
+         style="fill:#0000ff;fill-rule:evenodd;stroke:#0000ff;stroke-width:1pt;marker-start:none"
+         d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
+         id="path7071" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mend7"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mend7">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="font-size:12px;fill:#0000ff;fill-rule:evenodd;stroke:#0000ff;stroke-width:0.625;stroke-linejoin:round"
+         id="path7853" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mendh"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mendh">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="font-size:12px;fill:#0000ff;fill-rule:evenodd;stroke:#0000ff;stroke-width:0.625;stroke-linejoin:round"
+         id="path7856" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mendc"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mendc">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="font-size:12px;fill:#0000ff;fill-rule:evenodd;stroke:#0000ff;stroke-width:0.625;stroke-linejoin:round"
+         id="path7859" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mend2"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mend2">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="font-size:12px;fill:#0000ff;fill-rule:evenodd;stroke:#0000ff;stroke-width:0.625;stroke-linejoin:round"
+         id="path7862" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mendn"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mendn">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="font-size:12px;fill:#0000ff;fill-rule:evenodd;stroke:#0000ff;stroke-width:0.625;stroke-linejoin:round"
+         id="path7865" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mends"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mends">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="font-size:12px;fill:#0000ff;fill-rule:evenodd;stroke:#0000ff;stroke-width:0.625;stroke-linejoin:round"
+         id="path7868" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mendw"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mendw">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="font-size:12px;fill:#0000ff;fill-rule:evenodd;stroke:#0000ff;stroke-width:0.625;stroke-linejoin:round"
+         id="path7871" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mend21"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mend21">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="font-size:12px;fill:#0000ff;fill-rule:evenodd;stroke:#0000ff;stroke-width:0.625;stroke-linejoin:round"
+         id="path7874" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2MendJ"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2MendJ">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="font-size:12px;fill:#0000ff;fill-rule:evenodd;stroke:#0000ff;stroke-width:0.625;stroke-linejoin:round"
+         id="path7877" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2MendR"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2MendR">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="stroke-linejoin:round;font-size:12px;fill-rule:evenodd;stroke:#585858;stroke-width:0.625;fill:#585858"
+         id="path5628" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mendp"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mendp">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="stroke-linejoin:round;font-size:12px;fill-rule:evenodd;stroke:#585858;stroke-width:0.625;fill:#585858"
+         id="path5631" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mend3"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mend3">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="stroke-linejoin:round;font-size:12px;fill-rule:evenodd;stroke:#585858;stroke-width:0.625;fill:#585858"
+         id="path5634" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mendy"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mendy">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="stroke-linejoin:round;font-size:12px;fill-rule:evenodd;stroke:#585858;stroke-width:0.625;fill:#585858"
+         id="path5637" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2MendN"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2MendN">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="stroke-linejoin:round;font-size:12px;fill-rule:evenodd;stroke:#585858;stroke-width:0.625;fill:#585858"
+         id="path5640" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2MendI"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2MendI">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="stroke-linejoin:round;font-size:12px;fill-rule:evenodd;stroke:#585858;stroke-width:0.625;fill:#585858"
+         id="path5643" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mendp5"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mendp5">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="stroke-linejoin:round;font-size:12px;fill-rule:evenodd;stroke:#585858;stroke-width:0.625;fill:#585858"
+         id="path5646" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2Mendc8"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mendc8">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="stroke-linejoin:round;font-size:12px;fill-rule:evenodd;stroke:#585858;stroke-width:0.625;fill:#585858"
+         id="path5649" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow2MendM"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2MendM">
+      <path
+         transform="scale(-0.6,-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="stroke-linejoin:round;font-size:12px;fill-rule:evenodd;stroke:#585858;stroke-width:0.625;fill:#585858"
+         id="path5652" />
+    </marker>
+  </defs>
+  <metadata
+     id="metadata3180">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     transform="translate(-78.933143,-368.02794)"
+     id="layer1"
+     inkscape:groupmode="layer"
+     inkscape:label="Layer 1">
+    <g
+       id="g5390">
+      <rect
+         style="color:#000000;fill:#ff0000;fill-opacity:1;stroke:#ff6565;stroke-width:5.5999999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
+         id="rect3185"
+         width="74.347229"
+         height="32.324883"
+         x="171.37289"
+         y="398.36707"
+         ry="13.738075" />
+      <path
+         style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Send)"
+         d="m 80.933143,414.54872 c 82.331197,0 82.919277,0 82.919277,0"
+         id="path4040" />
+      <path
+         id="path5076"
+         d="m 248.5148,414.54872 c 82.3312,0 82.91928,0 82.91928,0"
+         style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Send)" />
+      <rect
+         ry="13.738075"
+         y="398.36707"
+         x="338.5387"
+         height="32.324883"
+         width="74.347229"
+         id="rect5091"
+         style="color:#000000;fill:#ff0000;fill-opacity:1;stroke:#ff6565;stroke-width:5.5999999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
+      <path
+         style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Send)"
+         d="m 415.68062,414.54872 c 82.3312,0 82.91927,0 82.91927,0"
+         id="path5121" />
+      <rect
+         style="color:#000000;fill:#ff0000;fill-opacity:1;stroke:#ff6565;stroke-width:5.5999999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
+         id="rect5145"
+         width="74.347229"
+         height="32.324883"
+         x="506.12036"
+         y="398.36707"
+         ry="13.738075" />
+      <path
+         id="path5147"
+         d="m 583.26226,414.54872 c 82.3312,0 82.91928,0 82.91928,0"
+         style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Send)" />
+      <text
+         xml:space="preserve"
+         style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
+         x="261.95966"
+         y="430.54706"
+         id="text5279"
+         sodipodi:linespacing="125%"><tspan
+           sodipodi:role="line"
+           x="261.95966"
+           y="430.54706"
+           id="tspan5283"
+           style="font-size:12px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:AlArabiya;-inkscape-font-specification:AlArabiya">all stations,</tspan><tspan
+           sodipodi:role="line"
+           x="261.95966"
+           y="445.54706"
+           style="font-size:12px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:AlArabiya;-inkscape-font-specification:AlArabiya"
+           id="tspan5317">1 subband</tspan></text>
+      <text
+         xml:space="preserve"
+         style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
+         x="90.45768"
+         y="430.54706"
+         id="text5279-8"
+         sodipodi:linespacing="125%"><tspan
+           sodipodi:role="line"
+           id="tspan5281-9"
+           x="90.45768"
+           y="430.54706"
+           style="font-size:12px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:AlArabiya;-inkscape-font-specification:AlArabiya">1 station,</tspan><tspan
+           sodipodi:role="line"
+           x="90.45768"
+           y="445.54706"
+           id="tspan5283-7"
+           style="font-size:12px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:AlArabiya;-inkscape-font-specification:AlArabiya">all subbands</tspan></text>
+      <text
+         xml:space="preserve"
+         style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
+         x="422.79733"
+         y="430.54706"
+         id="text5279-0"
+         sodipodi:linespacing="125%"><tspan
+           sodipodi:role="line"
+           x="422.79733"
+           y="430.54706"
+           id="tspan5283-9"
+           style="font-size:12px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:AlArabiya;-inkscape-font-specification:AlArabiya">all beams,</tspan><tspan
+           sodipodi:role="line"
+           x="422.79733"
+           y="445.54706"
+           style="font-size:12px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:AlArabiya;-inkscape-font-specification:AlArabiya"
+           id="tspan5381">all pol/stokes,</tspan><tspan
+           sodipodi:role="line"
+           x="422.79733"
+           y="460.54706"
+           style="font-size:12px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:AlArabiya;-inkscape-font-specification:AlArabiya"
+           id="tspan5317-2">1 subband</tspan></text>
+      <text
+         xml:space="preserve"
+         style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
+         x="595.90021"
+         y="430.54706"
+         id="text5279-0-5"
+         sodipodi:linespacing="125%"><tspan
+           sodipodi:role="line"
+           x="595.90021"
+           y="430.54706"
+           id="tspan5283-9-4"
+           style="font-size:12px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:AlArabiya;-inkscape-font-specification:AlArabiya">1 beam,</tspan><tspan
+           sodipodi:role="line"
+           x="595.90021"
+           y="445.54706"
+           style="font-size:12px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:AlArabiya;-inkscape-font-specification:AlArabiya"
+           id="tspan5379">1 pol/stokes,</tspan><tspan
+           sodipodi:role="line"
+           x="595.90021"
+           y="460.54706"
+           style="font-size:12px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:AlArabiya;-inkscape-font-specification:AlArabiya"
+           id="tspan5317-2-0">all subbands</tspan></text>
+    </g>
+    <text
+       id="text5375"
+       y="432.76749"
+       x="478.21066"
+       style="font-size:40px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
+       xml:space="preserve"><tspan
+         y="432.76749"
+         x="478.21066"
+         id="tspan5377"
+         sodipodi:role="line" /></text>
+    <text
+       id="text5413"
+       y="117.77221"
+       x="152.8195"
+       style="font-size:40px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
+       xml:space="preserve"><tspan
+         y="117.77221"
+         x="152.8195"
+         id="tspan5415"
+         sodipodi:role="line" /></text>
+    <rect
+       style="color:#000000;fill:#ff0000;fill-opacity:1;stroke:#ff6565;stroke-width:5.5999999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
+       id="rect5233"
+       width="74.347229"
+       height="32.324883"
+       x="171.37289"
+       y="470.11282"
+       ry="13.738075" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Send)"
+       d="m 80.933143,486.29448 c 82.331197,0 82.919277,0 82.919277,0"
+       id="path5235" />
+    <path
+       id="path5237"
+       d="m 248.5148,486.29448 c 82.3312,0 82.91928,0 82.91928,0"
+       style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Send)" />
+    <rect
+       ry="13.738075"
+       y="470.11282"
+       x="338.5387"
+       height="32.324883"
+       width="74.347229"
+       id="rect5239"
+       style="color:#000000;fill:#ff0000;fill-opacity:1;stroke:#ff6565;stroke-width:5.5999999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Send)"
+       d="m 415.68062,486.29448 c 82.3312,0 82.91927,0 82.91927,0"
+       id="path5241" />
+    <rect
+       style="color:#000000;fill:#ff0000;fill-opacity:1;stroke:#ff6565;stroke-width:5.5999999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
+       id="rect5243"
+       width="74.347229"
+       height="32.324883"
+       x="506.12036"
+       y="470.11282"
+       ry="13.738075" />
+    <path
+       id="path5245"
+       d="m 583.26226,486.29448 c 82.3312,0 82.91928,0 82.91928,0"
+       style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Send)" />
+    <rect
+       style="color:#000000;fill:#ff0000;fill-opacity:1;stroke:#ff6565;stroke-width:5.5999999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
+       id="rect5265"
+       width="74.347229"
+       height="32.324883"
+       x="171.37289"
+       y="588.90497"
+       ry="13.738075" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Send)"
+       d="m 80.933143,605.08664 c 82.331197,0 82.919277,0 82.919277,0"
+       id="path5267" />
+    <path
+       id="path5269"
+       d="m 248.5148,605.08664 c 82.3312,0 82.91928,0 82.91928,0"
+       style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Send)" />
+    <rect
+       ry="13.738075"
+       y="588.90497"
+       x="338.5387"
+       height="32.324883"
+       width="74.347229"
+       id="rect5271"
+       style="color:#000000;fill:#ff0000;fill-opacity:1;stroke:#ff6565;stroke-width:5.5999999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
+    <path
+       style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Send)"
+       d="m 415.68062,605.08664 c 82.3312,0 82.91927,0 82.91927,0"
+       id="path5273" />
+    <rect
+       style="color:#000000;fill:#ff0000;fill-opacity:1;stroke:#ff6565;stroke-width:5.5999999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
+       id="rect5275"
+       width="74.347229"
+       height="32.324883"
+       x="506.12036"
+       y="588.90497"
+       ry="13.738075" />
+    <path
+       id="path5277"
+       d="m 583.26226,605.08664 c 82.3312,0 82.91928,0 82.91928,0"
+       style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Send)" />
+    <path
+       sodipodi:nodetypes="cc"
+       id="path5510"
+       d="m 186.53163,605.52295 42.6358,-114.80606"
+       style="stroke-linejoin:miter;marker-end:url(#Arrow2MendR);stroke-opacity:1;stroke-dashoffset:0;stroke:#585858;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:2.50000000000000000, 5;stroke-width:2.50000000000000000;fill:none" />
+    <text
+       sodipodi:linespacing="125%"
+       id="text5514"
+       y="550.24097"
+       x="230.8214"
+       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
+       xml:space="preserve"><tspan
+         style="font-size:20px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:AlArabiya;-inkscape-font-specification:AlArabiya"
+         y="550.24097"
+         x="230.8214"
+         id="tspan5516"
+         sodipodi:role="line">x64</tspan></text>
+    <path
+       style="stroke-linejoin:miter;marker-end:url(#Arrow2Mendy);stroke-opacity:1;stroke-dashoffset:0;stroke:#585858;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:2.50000000000000000, 5;stroke-width:2.50000000000000000;fill:none"
+       d="m 186.34785,487.53502 33.64705,109.1775"
+       id="path7230"
+       sodipodi:nodetypes="cc" />
+    <path
+       sodipodi:nodetypes="cc"
+       id="path7232"
+       d="m 186.34786,415.86277 c 0,0 41.90069,66.28946 41.90069,66.28946"
+       style="stroke-linejoin:miter;marker-end:url(#Arrow2Mend3);stroke-opacity:1;stroke-dashoffset:0;stroke:#585858;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:2.50000000000000000, 5;stroke-width:2.50000000000000000;fill:none" />
+    <path
+       style="stroke-linejoin:miter;marker-end:url(#Arrow2Mendp);stroke-opacity:1;stroke-dashoffset:0;stroke:#585858;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:2.50000000000000000, 5;stroke-width:2.50000000000000000;fill:none"
+       d="m 186.34786,484.74312 c 0,0 34.62357,-62.6509 34.62357,-62.6509"
+       id="path7236"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="stroke-linejoin:miter;marker-end:url(#Arrow2MendM);stroke-opacity:1;stroke-dashoffset:0;stroke:#585858;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:2.50000000000000000, 5;stroke-width:2.50000000000000000;fill:none"
+       d="m 186.53163,605.52295 43.111,-185.6225"
+       id="path7238"
+       sodipodi:nodetypes="cc" />
+    <path
+       sodipodi:nodetypes="cc"
+       id="path7240"
+       d="m 186.34785,415.94808 43.3709,185.08424"
+       style="stroke-linejoin:miter;marker-end:url(#Arrow2Mendc8);stroke-opacity:1;stroke-dashoffset:0;stroke:#585858;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:2.50000000000000000, 5;stroke-width:2.50000000000000000;fill:none" />
+    <path
+       style="stroke-linejoin:miter;marker-end:url(#Arrow2Mendp5);stroke-opacity:1;stroke-dashoffset:0;stroke:#585858;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:2.50000000000000000, 5;stroke-width:2.50000000000000000;fill:none"
+       d="m 186.16408,414.94389 c 0,0 35.64469,-0.23709 35.64469,-0.23709"
+       id="path7242"
+       sodipodi:nodetypes="cc" />
+    <path
+       sodipodi:nodetypes="cc"
+       id="path7244"
+       d="m 186.16408,486.61614 c 0,0 40.06294,-0.23709 40.06294,-0.23709"
+       style="stroke-linejoin:miter;marker-end:url(#Arrow2MendI);stroke-opacity:1;stroke-dashoffset:0;stroke:#585858;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:2.50000000000000000, 5;stroke-width:2.50000000000000000;fill:none" />
+    <path
+       style="stroke-linejoin:miter;marker-end:url(#Arrow2MendN);stroke-opacity:1;stroke-dashoffset:0;stroke:#585858;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:2.50000000000000000, 5;stroke-width:2.50000000000000000;fill:none"
+       d="m 186.16408,605.33479 c 0,0 35.1249,-0.23709 35.1249,-0.23709"
+       id="path7246"
+       sodipodi:nodetypes="cc" />
+    <text
+       xml:space="preserve"
+       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
+       x="399.1593"
+       y="550.24097"
+       id="text8557"
+       sodipodi:linespacing="125%"><tspan
+         sodipodi:role="line"
+         id="tspan8559"
+         x="399.1593"
+         y="550.24097"
+         style="font-size:20px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:AlArabiya;-inkscape-font-specification:AlArabiya">x64</tspan></text>
+    <text
+       sodipodi:linespacing="125%"
+       id="text8561"
+       y="550.24097"
+       x="567.49719"
+       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
+       xml:space="preserve"><tspan
+         style="font-size:20px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:AlArabiya;-inkscape-font-specification:AlArabiya"
+         y="550.24097"
+         x="567.49719"
+         id="tspan8563"
+         sodipodi:role="line">x64</tspan></text>
+    <text
+       sodipodi:linespacing="125%"
+       id="text8565"
+       y="376.81461"
+       x="170.23744"
+       style="font-size:12px;font-style:italic;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:AlArabiya;-inkscape-font-specification:AlArabiya Italic"
+       xml:space="preserve"><tspan
+         y="376.81461"
+         x="170.23744"
+         id="tspan8567"
+         sodipodi:role="line">first transpose</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:12px;font-style:italic;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:AlArabiya;-inkscape-font-specification:AlArabiya Italic"
+       x="338.54105"
+       y="376.81461"
+       id="text8590"
+       sodipodi:linespacing="125%"><tspan
+         sodipodi:role="line"
+         id="tspan8592"
+         x="338.54105"
+         y="376.81461">beam forming</tspan></text>
+    <text
+       sodipodi:linespacing="125%"
+       id="text8594"
+       y="376.81461"
+       x="495.55865"
+       style="font-size:12px;font-style:italic;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:AlArabiya;-inkscape-font-specification:AlArabiya Italic"
+       xml:space="preserve"><tspan
+         y="376.81461"
+         x="495.55865"
+         id="tspan8596"
+         sodipodi:role="line">second transpose</tspan></text>
+    <path
+       sodipodi:nodetypes="cc"
+       id="path5417"
+       d="m 208.54651,513.48784 c 0,61.33572 0,66.11387 0,66.11387"
+       style="fill:none;stroke:#ff0000;stroke-width:5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:5, 15;stroke-dashoffset:0" />
+    <path
+       style="fill:none;stroke:#ff0000;stroke-width:5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:5, 15;stroke-dashoffset:0"
+       d="m 375.71231,513.48784 c 0,61.33572 0,66.11387 0,66.11387"
+       id="path5433"
+       sodipodi:nodetypes="cc" />
+    <path
+       sodipodi:nodetypes="cc"
+       id="path5435"
+       d="m 543.29398,513.48784 c 0,61.33572 0,66.11387 0,66.11387"
+       style="fill:none;stroke:#ff0000;stroke-width:5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:5, 15;stroke-dashoffset:0" />
+    <path
+       style="fill:none;stroke:#585858;stroke-width:2.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:2.5, 5;stroke-dashoffset:0;marker-end:url(#Arrow2MendR)"
+       d="M 520.7593,605.52295 563.3951,490.71689"
+       id="path5868"
+       sodipodi:nodetypes="cc" />
+    <path
+       sodipodi:nodetypes="cc"
+       id="path5870"
+       d="m 520.57552,487.53502 33.64705,109.1775"
+       style="fill:none;stroke:#585858;stroke-width:2.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:2.5, 5;stroke-dashoffset:0;marker-end:url(#Arrow2Mendy)" />
+    <path
+       style="fill:none;stroke:#585858;stroke-width:2.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:2.5, 5;stroke-dashoffset:0;marker-end:url(#Arrow2Mend3)"
+       d="m 520.57553,415.86277 c 0,0 41.90069,66.28946 41.90069,66.28946"
+       id="path5872"
+       sodipodi:nodetypes="cc" />
+    <path
+       sodipodi:nodetypes="cc"
+       id="path5874"
+       d="m 520.57553,484.74312 c 0,0 34.62357,-62.6509 34.62357,-62.6509"
+       style="fill:none;stroke:#585858;stroke-width:2.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:2.5, 5;stroke-dashoffset:0;marker-end:url(#Arrow2Mendp)" />
+    <path
+       sodipodi:nodetypes="cc"
+       id="path5876"
+       d="m 520.7593,605.52295 43.111,-185.6225"
+       style="fill:none;stroke:#585858;stroke-width:2.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:2.5, 5;stroke-dashoffset:0;marker-end:url(#Arrow2MendM)" />
+    <path
+       style="fill:none;stroke:#585858;stroke-width:2.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:2.5, 5;stroke-dashoffset:0;marker-end:url(#Arrow2Mendc8)"
+       d="m 520.57552,415.94808 43.3709,185.08424"
+       id="path5878"
+       sodipodi:nodetypes="cc" />
+    <path
+       sodipodi:nodetypes="cc"
+       id="path5880"
+       d="m 520.39175,414.94389 c 0,0 35.64469,-0.23709 35.64469,-0.23709"
+       style="fill:none;stroke:#585858;stroke-width:2.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:2.5, 5;stroke-dashoffset:0;marker-end:url(#Arrow2Mendp5)" />
+    <path
+       style="fill:none;stroke:#585858;stroke-width:2.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:2.5, 5;stroke-dashoffset:0;marker-end:url(#Arrow2MendI)"
+       d="m 520.39175,486.61614 c 0,0 40.06294,-0.23709 40.06294,-0.23709"
+       id="path5882"
+       sodipodi:nodetypes="cc" />
+    <path
+       sodipodi:nodetypes="cc"
+       id="path5884"
+       d="m 520.39175,605.33479 c 0,0 35.1249,-0.23709 35.1249,-0.23709"
+       style="fill:none;stroke:#585858;stroke-width:2.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:2.5, 5;stroke-dashoffset:0;marker-end:url(#Arrow2MendN)" />
+  </g>
+</svg>
diff --git a/doc/papers/2011/europar/lofar.pdf b/doc/papers/2011/europar/lofar.pdf
index 1e8c27dca597a67076596d19999c6fda43beed47..003e1fd772901b6ec0de75bbb27e3667db5d80d8 100644
Binary files a/doc/papers/2011/europar/lofar.pdf and b/doc/papers/2011/europar/lofar.pdf differ
diff --git a/doc/papers/2011/europar/lofar.tex b/doc/papers/2011/europar/lofar.tex
index ddd119c3b14d8a0ae4160011803f4a8d95cb7366..952803230032ad3886628f5997908ad2606645b1 100644
--- a/doc/papers/2011/europar/lofar.tex
+++ b/doc/papers/2011/europar/lofar.tex
@@ -1,8 +1,11 @@
 \documentclass{llncs}
-\usepackage{graphicx, subfigure, amsmath, xspace, txfonts}
+\usepackage{graphicx, subfigure, amsmath, xspace, txfonts, algorithm}
+\usepackage[noend]{algorithmic}
 \usepackage[usenames]{color}
 \usepackage{mathptmx}
 
+\algsetup{indent=2em}
+
 \definecolor{Gold}{rgb}{1,0.84,0}
 \newcommand{\circlenumber}[1]{%
   \begin{picture}(10,10)%
@@ -69,14 +72,35 @@ Another novelty is the elaborate use of software to process the telescope data i
 
 For processing LOFAR data, we use an IBM BlueGene/P (BG/P) supercomputer. The LOFAR antennas are grouped into stations, and each station sends its data (up to 198 Gb/s for all stations) to the BG/P. Inside the BG/P, the data are split and combined using both real-time signal-processing routines as well as two all-to-all exchanges. The output data streams are sufficiently reduced in size in order to be able to stream them out of the BG/P and store them on disks in our storage cluster.
 
-\begin{figure}[ht]
-\subfigure[The left antenna receives the wave later.]{
-  \makebox[35mm][c]{
-     \includegraphics[width=0.27\textwidth]{LBAfield-delay.pdf}
-     \label{fig:delay}
-  }
-}
-\hfill
+In this paper, we will present the LOFAR \emph{beam former}: a collection of software pipelines that allow the LOFAR telescope to be pointed at hundreds of sources simultaneously. A \emph{beam} consists of a 1D stream of data representing the signal from a certain area in the sky, and thus is different from a correlator, which creates 2D snapshot images of the sky. Simplified, a beam former performs a weighted addition on the input signals, while a correlator multiplies the input signals.
+
+It is LOFAR's unique design that allows us to point at many sources at once. Traditional telescopes use dishes which provide a narrow field-of-view, and thus are only sensitive to the source they are pointed at. LOFAR's antennas are omnidirectional. Groups of antennas (\emph{stations}) are sensitive to a wide field-of-view around the source. These \emph{station beams} are sent to the BG/P. The BG/P generates linear combinations of the station input data resulting in \emph{tied-array beams}, each of which represents a shift in pointing within the wide field-of-view of the stations.
+
+(niet noodzakelijk, wel interessant..) The primary scientific use case driving the work presented in this paper is pulsar research. A pulsar is a rapidly rotating, highly magnetised neutron star, which emits electromagnetic radiation from its poles. Similar to the behaviour of a lighthouse, the radiation is visible to us only if one of the poles points towards the Earth, and subsequently appears to us as a very regular series of pulses, with a period as low as 1.4~ms~\cite{Hessels:06}. Pulsars are weak radio sources, and their individual pulses often do not rise above the background noise that fills our universe. Our beam former can track several pulsars at LOFAR's full observational bandwidth, producing either complex voltages or Stokes IQUV data. Alternatively, the beam former is capable of efficiently performing sky surveys to discover new pulsars (or other radio sources) by covering the sky with hundreds of tied-array beams using our Stokes I pipeline.
+
+The main contributions of this paper are threefold. First, we demonstrate the power of a \emph{software\/} telescope; its flexibility allows us to add new functionality with modest effort. Second, we show how the use of supercomputer technology enables new science in astronomy and particle physics. Third, we elaborately analyse the performance of our application and the effectiveness of our optimisations. 
+
+In this paper, we will show how a software solution and the use of a massively parallel machine allows us to achieve these feats. We provide an in-depth study on all performance aspects, real-time behaviour, and scaling characteristics. This paper is organised as follows. First, we will describe the key characteristics of the IBM BlueGene/P supercomputer in Section \ref{Sec:bluegene}. Section \ref{Sec:pipelines} describes the implementation of our pipelines, followed by the performance analysis in Section \ref{Sec:performance}. We briefly discuss related work in Section \ref{Sec:related-work}, and conclude in Section \ref{Sec:conclusions}.
+
+\section{IBM BlueGene/P}
+\label{Sec:bluegene}
+
+We use an IBM BlueGene/P (BG/P) supercomputer for the real-time processing of station data. We will describe the key features of the BG/P, but more information can be found elsewhere~\cite{IBM:08}. Furthermore, we will describe how our BG/P is connected to its input and output systems.
+
+\subsection{System Description}
+
+Our system consists of 3 racks, with 12,480 processor cores that provide 42.4 TFLOPS peak processing power. One chip contains four PowerPC~450 cores, running at a modest 850~MHz clock speed to reduce power consumption and to increase package density. Each core has two floating-point units (FPUs) that provide support for operations on complex numbers. The chips are organised in \emph{psets}, each of which consists of 64 cores for computation (\emph{compute cores}) and one chip for communication (\emph{I/O node}). Each compute core runs a fast, simple, single-process kernel,  and has access to 512 MiB of memory. The I/O nodes consist of the same hardware as the compute nodes, but additionally have a 10~Gb/s Ethernet interface connected. They run Linux, which allows the I/O nodes to do full multitasking. One rack contains 64 psets, which is equal to 4096 compute cores and 64 I/O nodes.
+
+The BG/P contains several networks. A fast \emph{3-dimensional torus\/} connects all compute nodes and is used for point-to-point and all-to-all communications over 3.4~Gb/s links. The torus uses DMA to offload the CPUs and allows asynchronous communication. The \emph{collective network\/} is used for communication within a pset between an I/O node and the compute nodes, using 6.8~Gb/s links. In both networks, data is routed through compute nodes using a shortest path. Additional networks exist for fast barriers, initialisation, diagnostics, and debugging.
+
+\subsection{External I/O}
+\label{Sec:Networks}
+
+We customised the I/O node software stack~\cite{Yoshii:10} and run a multi-threaded program on each I/O~node which is responsible for the handling of both the input and the output. Even though the I/O nodes each have a 10~Gb/s Ethernet interface, they do not have enough computation power to handle 10~Gb/s of data. The overhead of handling IRQs, IP, and UDP/TCP puts a high load on the 850~MHz cores of the I/O nodes, limiting performance. An I/O node can output at most 3.1~Gb/s, unless it has to handle station input (3.1~Gb/s per station), in which case it can output at most 1.1~Gb/s. We implemented a low-overhead communication protocol called FCNP~\cite{Romein:09a} to efficiently transport data to and from the compute nodes, which perform the required signal processing. The I/O nodes forward the results to our storage cluster, which can sustain a throughput up to 80~Gb/s.
+
+\section{LOFAR and Beam Forming}
+
+\begin{figure}[t]
 \subfigure[Locations of the stations.]{
   \makebox[35mm][c]{
      \includegraphics[width=0.35\textwidth]{lofar-stations.pdf}
@@ -84,15 +108,13 @@ For processing LOFAR data, we use an IBM BlueGene/P (BG/P) supercomputer. The LO
   }
 }
 \hfill
-\comment{
 \subfigure[The left antenna receives the wave later.]{
-  \makebox[28mm][c]{
-    \includegraphics[width=0.20\textwidth]{delay.pdf}
-    \label{fig:delay}
+  \makebox[35mm][c]{
+     \includegraphics[width=0.27\textwidth]{LBAfield-delay.pdf}
+     \label{fig:delay}
   }
 }
 \hfill
-}
 \subfigure[Tied-array beams (hexagons) formed within two station beams (ellipse).]{
   \makebox[40mm][c]{
     \includegraphics[width=0.3\textwidth]{pencilbeams.pdf}
@@ -102,61 +124,16 @@ For processing LOFAR data, we use an IBM BlueGene/P (BG/P) supercomputer. The LO
 \caption{LOFAR antennas}
 \end{figure}
 
-In this paper, we will present the LOFAR \emph{beam former}: a collection of software pipelines that allow the LOFAR telescope to be aimed at hundreds of directions simultaneously. This feat is made possible by LOFAR's unique design. % and the resources offered by the BG/P. The
-The LOFAR antennas are omnidirectional, which provides a wide field-of-view when groups of antennas (\emph{stations}) are pointed at a source. Traditional radio telescopes have to pinpoint their dishes, resulting in a narrow field-of-view. Our beam former calculates linear combinations of the station data streams to form many \emph{tied-array beams} within the wide field-of-view of the stations.
-
-Traditional radio telescopes are pointed by rotating their dishes towards a source, resulting in a sharp but narrow field-of-view. The LOFAR antennas are omnidirectional, and are pointed through signal-processing techniques. The antenna signals are combined in groups called \emph{stations}, which provide wide fields-of-view. The LOFAR beam former produces many linear combinations of the station data streams to point at different sources within the wide fields-of-view provided by the stations. The linear combinations, called \emph{tied-array beams}, can be output as-is (\emph{XY polarisations}) or converted to Stokes parameters, which represent the polarisation aspects of the signal. The Stokes parameters can be integrated temporally to reduce the bandwidth per beam, which allows a great increase the number of beams that can be formed. % Finally, we can form an \emph{incoherent} tied-array beam, which retains the wide field-of-view of the stations by not pointing it at a specific source.
-Finally, our software can produce the Stokes parameters of an \emph{incoherent} beam, which is an accumulation of unweighted station signals. The incoherent beam is less sensitive than a tied-array beam, but it maintains the wide field-of-view of the stations. The incoherent beam is used to detect the presence of sources, but does not reveal their location within the station beams.
-
-%In this paper, we will present the LOFAR \emph{beam former}: a collection of software pipelines that allow the LOFAR telescope to be aimed at hundreds of directions simultaneously. This feat is made possible by LOFAR's unique design. % and the resources offered by the BG/P. The
-%Traditional radio telescopes are pointed by rotating their dishes towards a source, resulting in a sharp but narrow field-of-view. The LOFAR antennas are omnidirectional, and are pointed through signal-processing techniques. The antenna signals are combined in groups called \emph{stations}, which provide wide fields-of-view. The LOFAR beam former produces many linear combinations of the station data streams to point at different sources within the wide fields-of-view provided by the stations. The linear combinations, called \emph{tied-array beams}, can be output as-is (\emph{XY polarisations}) or converted to Stokes parameters, which represent the polarisation aspects of the signal. The Stokes parameters can be integrated temporally to reduce the bandwidth per beam, which allows a great increase the number of beams that can be formed. % Finally, we can form an \emph{incoherent} tied-array beam, which retains the wide field-of-view of the stations by not pointing it at a specific source.
-%Finally, our software can produce the Stokes parameters of an \emph{incoherent} beam, which is an accumulation of unweighted station signals. The incoherent beam is less sensitive than a tied-array beam, but it maintains the wide field-of-view of the stations. The incoherent beam is used to detect the presence of sources, but does not reveal their location within the station beams.
-
-%Traditional radio telescopes are aimed by focussing their dishes on a source, resulting in a sharp but narrow field-of-view per dish. The LOFAR antennas are omnidirectional, and do not have to be moved or rotated. A station is focussed on a source by taking advantage of the fact that the speed of electromagnetic waves is finite, causing an electromagnetic wave to arrive at different antennas at different times (see Figure \ref{fig:delay}). A process called \emph{delay compensation} counters the differences in arrival times by delaying the signals from different antennas such that they are synchronised with respect to the observed source. The synchronised signals from all antennas are accumulated (by the station \emph{beam former}) and sent to the BG/P. The resulting \emph{station beam} has a wide field-of-view around the source.
-
-% Tied array beam uitleggen!
-
-%It contains samples for both the X and the Y polarisations as 16-bit complex integers, resulting in 3.1~Gb/s of data per station.
-
-%The BG/P, which receives the signals from all stations, again performs delay compensation and beam forming, this time in software. The BG/P beam former can focus on sources anywhere in the fields of view of the station beams, creating \emph{tied-array beams} (beams). An example is shown in Figure \ref{fig:pencilbeams}, in which station beams (represented by an ellipse) contains several tied-array beams (represented by hexagons). The actual width of the station beams, as well as the width of the tied-array beams, depends on the number as well as the locations of the stations used. Hundreds of tied-array beams are typically needed to fully cover the field of view of a station beam. Different tied-array beams are created by adding the signals from the individual stations using different delays, which depend on the relative positions of the stations and the relative direction of the tied-array beam with respect to the station beam. The BG/P applies delay compensation in two steps. First, coarse-grain compensation is performed by shifting the samples from different stations with respect to each other. Then, for each tied-array beam, the remaining sub-sample delays are compensated for by shifting the phases of the signals. The phase of each sample is changed through a complex multiplication with a precomputed weight. Tied-array beams are thus a linear combination of the (shifted) signals from the stations.
-\comment{
-TODO ideeen:
-- psets prevent global scheduling across psets, data has to be routed through i/o nodes
-}
-
-%Our beam former supports several pipelines: \emph{XY polarisations}, \emph{Stokes IQUV}, and \emph{Stokes I}. The XY polarisations pipeline outputs the raw tied-array beams, which consist of two 3.1~Gb/s streams of 32-bit complex floating points numbers (floats), one stream for each polarisation. The Stokes IQUV pipeline applies a domain transformation to each sample of the raw tied-array beams, which is useful for polarisation-related studies. The four Stokes parameters, calculated through $I = X\overline{X} + Y\overline{Y}$, $Q = X\overline{X} - Y\overline{Y}$, $U = 2\mathrm{Re}(X\overline{Y})$, $V = 2\mathrm{Im}(X\overline{Y})$, with each parameter being a 32-bit float, resulting in four 1.5~Gb/s streams. The Stokes I pipeline calculates only the first Stokes parameter, which represents the signal strength in both polarisations. The Stokes I pipeline supports temporal integration to trade time resolution for a reduced bandwidth per beam, allowing more beams to be created.
-
-%Finally, our software can produce the Stokes parameters (I or IQUV) of an \emph{incoherent} beam, which is an accumulation of the uncompensated station signals. The incoherent beam is less sensitive than a tied-array beam, but it maintains the wide field-of-view of the stations. The incoherent beam is used to detect the presence of sources, but does not reveal their location within the station beams.
+The LOFAR antennas are grouped in \emph{stations}. The stations are strategically placed, with 20 stations acting as its centre (the \emph{core}) and 24 stations at increasing distances from the core, spanning five nations (see Figure \ref{fig:map}). A core station can act as two individual stations in some observational modes, resulting in a total of 64 stations. A station is able to produce 248 frequency subbands of 195~kHz in the sensitivity range from 10~MHz to 250~MHz. Each sample consists of two complex 16-bit integers, representing the amplitude and phase of the X and Y polarisations of the antennas.
 
-The primary scientific use case driving the work presented in this paper is pulsar research. A pulsar is a rapidly rotating, highly magnetised neutron star, which emits electromagnetic radiation from its poles. Similar to the behaviour of a lighthouse, the radiation is visible to us only if one of the poles points towards the Earth, and subsequently appears to us as a very regular series of pulses, with a period as low as 1.4~ms~\cite{Hessels:06}. Pulsars are weak radio sources, and their individual pulses often do not rise above the background noise that fills our universe. Our beam former can focus on several pulsars at LOFAR's full observational bandwidth, producing either XY polarisation or Stokes IQUV data. Alternatively, the beam former is capable of efficiently performing sky surveys to discover new pulsars (or other radio sources) by covering the sky with hundreds of tied-array beams.
+Even though the antennas are omnidirectionally, they can be pointed due to the fact that the speed of electromagnetic waves is finite. Signals emitted by a source reach different antennas at different times (see Figure \ref{fig:delay}). A process called \emph{delay compensation} delays the signals such that they align (are \emph{coherent}) for the desired source. Beam forming subsequently adds the aligned signals. The stations perform delay compensation and beam forming to combine the antenna signals into a station beam with a wide field-of-view. The BG/P subsequently combines the signals from different stations in order to form tied-array beams within the sensitive area of the station beams (see Figure \ref{fig:pencilbeams}). In the BG/P, the samples from different stations are shifted with respect to each other to compensate delay at a sample-level granularity. Sub-sample delay compensation is performed by a complex multiplication per sample, which shifts the phase of each sample. The weights used in the complex multiplication depend on the location of the stations, the observational frequency of the sample, and the sky coordinates of the tied-array beam. The beam former thus creates tied-array beams by adding the station signals using different weights for each beam.
 
-% The delays are applied in two phases. First, the streams are aligned by shifting them a whole number of samples with respect to each other, which resolves delay differences up to the granularity of a single sample. Then, the remaining sub-sample delays are compensated for by shifting the phase of the signal. In order to obtain different tied-array beams, only the sub-sample delays have to be adjusted. A phase shift is performed by applying a complex multiplication. To form a beam, the beam former gathers the streams of samples from the stations, multiplies them with precomputed weights representing the required phase shift, and adds the streams together. The same weights are applied to both the X and the Y polarisations. The resulting data stream is called the \emph{XY polarisations}, and consists of 32-bit complex floating point numbers (complex floats).
-
-
-%The stations can be configured to observe in several directions in parallel, but have to divide their output bandwidth among them. In this paper, we present the \emph{beam former}, which allows the telescope to be aimed in tens of directions simultaneously at LOFAR's full observational bandwidth, and in hundreds of directions at reduced bandwidth. Both feats cannot be matched by any other telescope. The data streams corresponding to each observational direction, called \emph{tied-array beams}, are generated through (weighted) summations of the station inputs, which are demultiplexed using an all-to-all exchange, and routed to the storage cluster.
-
-In this paper, we will show how a software solution and the use of a massively parallel machine allows us to achieve these feats. We provide an in-depth study on all performance aspects, real-time behaviour, and scaling characteristics. This paper is organised as follows. First, we will describe the key characteristics of the IBM BlueGene/P supercomputer in Section \ref{Sec:bluegene}. Section \ref{Sec:pipelines} describes the implementation of our pipelines, followed by the performance analysis in Section \ref{Sec:performance}. We briefly discuss related work in Section \ref{Sec:related-work}, and conclude in Section \ref{Sec:conclusions}.
-
-\section{IBM BlueGene/P}
-\label{Sec:bluegene}
-
-We use an IBM BlueGene/P (BG/P) supercomputer for the real-time processing of station data. We will describe the key features of the BG/P, but more information can be found elsewhere~\cite{IBM:08}. Furthermore, we will describe how our BG/P is connected to its input and output systems.
-
-\subsection{System Description}
-
-Our system consists of 3 racks, with 12,480 processor cores that provide 42.4 TFLOPS peak processing power. One chip contains four PowerPC~450 cores, running at a modest 850~MHz clock speed to reduce power consumption and to increase package density. Each core has two floating-point units (FPUs) that provide support for operations on complex numbers. The chips are organised in \emph{psets}, each of which consists of 64 cores for computation (\emph{compute cores}) and one chip for communication (\emph{I/O node}). Each compute core runs a fast, simple, single-process kernel,  and has access to 512 MiB of memory. The I/O nodes consist of the same hardware as the compute nodes, but additionally have a 10~Gb/s Ethernet interface connected. They run Linux, which allows the I/O nodes to do full multitasking. One rack contains 64 psets, which is equal to 4096 compute cores and 64 I/O nodes.
-
-The BG/P contains several networks. A fast \emph{3-dimensional torus\/} connects all compute nodes and is used for point-to-point and all-to-all communications over 3.4~Gb/s links. The torus uses DMA to offload the CPUs and allows asynchronous communication. The \emph{collective network\/} is used for communication within a pset between an I/O node and the compute nodes, using 6.8~Gb/s links. In both networks, data is routed through compute nodes using a shortest path. Additional networks exist for fast barriers, initialisation, diagnostics, and debugging.
-
-\subsection{External I/O}
-\label{Sec:Networks}
-
-We customised the I/O node software stack~\cite{Yoshii:10} and run a multi-threaded program on each I/O~node which is responsible for the handling of both the input and the output. Even though the I/O nodes each have a 10~Gb/s Ethernet interface, they do not have enough computation power to handle 10~Gb/s of data. The overhead of handling IRQs, IP, and UDP/TCP puts a high load on the 850~MHz cores of the I/O nodes, limiting performance. An I/O node can output at most 3.1~Gb/s, unless it has to handle station input (3.1~Gb/s), in which case it can output at most 1.1~Gb/s. We implemented a low-overhead communication protocol called FCNP~\cite{Romein:09a} to efficiently transport data to and from the compute nodes, which perform the required signal processing. The I/O nodes forward the results to our storage cluster, which can sustain a throughput up to 80~Gb/s.
+Our beam former supports several LOFAR pipelines. The \emph{complex voltages} pipeline stores the tied-array beams as is. The \emph{Stokes IQUV} pipeline transforms the complex voltages into Stokes parameters representing various polarisation aspects of the signal. Finally, the \emph{Stokes I} pipeline stores just the signal strength for each beam, and can be integrated temporally in order to increase the number of tied-array beams that can be formed. Finally, our software can produce the Stokes parameters of an \emph{incoherent} beam, which is an accumulation of unweighted station signals. The incoherent beam is less sensitive than a tied-array beam, but it maintains the wide field-of-view of the stations. The incoherent beam is produced in parallel with other pipelines, and is used to detect the presence of sources, but does not reveal their location within the station beams.
 
 \section{Beam Former Pipelines}
 \label{Sec:pipelines}
 
-In this section, we will describe in detail how the full signal-processing pipeline operates, in and around the beam former. The use of a software pipeline allows us to reconfigure the components and design of our standard imaging pipeline, described in \cite{Romein:10a}. Due to the flexibility of software, we can run several pipelines in parallel on the same data, as long as resource requirements are met. Figure \ref{fig:processing} gives an overview of our system. Our software is written in C++, with core routines ported to assembly to obtain maximal performance.
+In this section, we will describe in detail how the full signal-processing pipelines operate, in and around the beam former. The use of a software pipeline allows us to reconfigure the components and design of our standard imaging pipeline, described in \cite{Romein:10a}. Due to the flexibility of software, we can run several pipelines in parallel on the same data, as long as resource requirements are met. Figure \ref{fig:processing} gives an overview of our system. Our software is written in C++, with core routines ported to assembly to obtain maximal performance.
 
 \begin{figure}[ht]
 \center
@@ -166,25 +143,12 @@ In this section, we will describe in detail how the full signal-processing pipel
 \end{figure}
 
 \subsection{Input from Stations}
-The BG/P receives data from up to 64 stations. The stations are strategically placed, with 20 stations acting as its centre (the \emph{core}) and 24 stations at increasing distances from the core, spanning five nations (see Figure \ref{fig:map}). A core station can act as two individual stations in some observational modes. A station is able to produce 248 frequency subbands of 195~kHz out of the sensitivity range of 10~MHz to 250~MHz. Each sample consists of two complex 16-bit integers, representing the amplitude and phase of the X and Y polarisations of the antennas. The resulting data stream from a station is a 3.1~Gb/s UDP stream, which is sent to an I/O node in our BG/P.
-
-Figure \ref{fig:dataflow-1} shows the data flow across two psets that process station input. At the I/O nodes, the station data are split into chunks of one subband and 0.25 seconds. The chunk size is chosen such that the compute cores have enough memory to perform all of the necessary processing. Due to the BG/P design, an I/O node sends chunks to its own compute cores only. For each chunk, the I/O node send 62 subbands each to 4 compute cores, chosen round robin.
+Input data arrives at as many I/O nodes as there are stations used. The beam former however needs data from all stations together in order to form tied-array beams. The station data thus have to be rearranged inside the BG/P, to collect the data from different stations but also to split it along different dimensions in order to distribute the workload. At the I/O nodes, the station data are split into chunks of one subband and 0.25 seconds. The chunk size is chosen such that the compute cores have enough memory to perform all of the necessary processing. Due to the BG/P design, an I/O node sends chunks to its own compute cores only. The compute cores then exchange these chunks using an all-to-all exchange, shown in Figure \ref{fig:dataflow}.
 
 \begin{figure}[ht]
-\subfigure[From station input to the first exchange.]{
-  \makebox[60mm][c]{
-     \includegraphics[width=0.45\textwidth]{dataflow-1st-transpose.pdf}
-     \label{fig:dataflow-1}
-  }
-}
-\hfill
-\subfigure[From the second exchange to output to disk.]{
-  \makebox[60mm][c]{
-     \includegraphics[width=0.45\textwidth]{dataflow-2nd-transpose.pdf}
-     \label{fig:dataflow-2}
-  }
-}
-\caption{A subset of the data flow for a beam formed by two stations. The dotted lines depict chunks, and sb abbreviates subbands. The actual data flow involves all 64 psets.}
+\center
+\includegraphics[width=0.8\textwidth]{dataflow-2.pdf}
+\caption{The data flow and data ordening in our pipelines.}
 \label{fig:dataflow}
 \end{figure}
 
@@ -192,41 +156,30 @@ Figure \ref{fig:dataflow-1} shows the data flow across two psets that process st
 
 The first all-to-all exchange allows the compute cores to distribute the chunks from a single station, and to collect all the chunks of the same subband from all of the stations. The exchange is performed over the fast 3D-torus network, but with up to 198~Gb/s of station data to be exchanged, special care still has to be taken to avoid network bottlenecks. It is impossible to optimise for short network paths due to the physical distances between the different psets across a BG/P rack. Instead, we optimised the data exchange by creating as many paths as possible between compute cores that have to exchange data. Within each pset, we employ a virtual mapping such that the number of possible routes between communicating cores in different psets is maximised.
 
-The communications in the all-to-all exchange are asynchronous, which allows a compute core to start processing a subband from a station as soon as it arrives, up to the point that data from all stations are required. Communication and computation are thus overlapped as much as possible.
-
-\subsection{Signal Processing}
+The all-to-all exchange is asynchronous. Once a compute core receives a complete chunk from a single subband, it performs a sequence of processing steps on it. The first step is a conversion from 16-bit little-endian integers into 32-bit big-endian floats, to be able to use the BlueGene's powerful FPUs. Figure \ref{fig:processing} shows which steps are performed before the tied-array beam forming occurs. Of note is the Fast Fourier Transform (FFT), which divides  the 195~kHz subbands into (typically) 12~kHz channels. We use the efficient \emph{Vienna} version of FFTW~\cite{Lorenz:05}. The superstation beam former is a simplified version of our beam former, used to combine two stations as if it were one, and is mainly used in our imaging pipeline to reduce the workload. Once the chunks from all stations are received and processed asynchronously, the processed data are ready to be beam formed.
 
-Once a compute core receives a chunk from a single subband, it performs a sequence of processing steps on it, up to and including the \emph{superstation beam former} in Figure \ref{fig:processing}. The latter is used to combine two stations as if it were one, to increase sensitivity without increasing the load of the rest of the pipeline. Of note is also the Fast Fourier Transform (FFT), which divides the 195~kHz subbands into (typically) 12~kHz channels. We use the efficient \emph{Vienna} FFT library~\cite{...}. Once the chunks from all stations are received and processed asynchronously, the processed data are ready to be beam formed. %TODO: improve vienna wording
+\subsection{Beam Forming}
 
-\subsection{Beam Forming and Stokes Calculations}
+The beam former combines the chunks from all stations, producing a chunk for each tied-array beam. Each beam is formed using different weights for the frequency of the channel, the locations of the stations, and the beam coordinates. The positional weights are precomputed by the I/O nodes and sent along with the data to avoid a duplicated effort by the compute nodes. The delays are applied to the station data through complex multiplications and additions, on both the X and the Y polarisation samples from the stations.
 
-The beam former adds the chunks of all stations, producing a chunk for each tied-array beam. Each beam is formed using different weights for the frequency of the channel, the locations of the stations, and the beam coordinates. The positional weights are precomputed by the I/O nodes and sent along with the data to avoid a duplicated effort by the compute nodes.
-
-\comment{
-First, the different weights required for the different tied-array beams are computed, based on the station positions and the beam directions. Note that the data in the chunks are already delay compensated with respect to the source at which the stations are pointed. Any delay compensation performed by the beam former is therefore to compensate the delay differences between the desired beams and the station's source. The reason for this two-stage approach is flexibility. By already compensating for the station's source in the previous step, the resulting data can not only be fed to the beam former pipelines, but also to other pipelines, such as the imaging pipeline. Because we have a software pipeline, we can implement and connect different processing pipelines with only a small increase in complexity.
-}
+%The delays are applied to the station data through complex multiplications and additions, programmed in assembly. In order to take full advantage of the L1 cache and the available registers, data is processed in sets of 6 stations, producing 3 beams, in portions of 128 samples, or a subset thereof to cover the remainders. While the exact ideal set size in which the data is to be processed is platform specific, we have shown in previous work that similar tradeoffs exist for similar problems across different architectures~\cite{Nieuwpoort:09}.
 
-%TODO VVV
-The delays are applied to the station data through complex multiplications and additions, programmed in assembly. In order to take full advantage of the L1 cache and the available registers, data is processed in sets of 6 stations, producing 3 beams, in portions of 128 samples, or a subset thereof to cover the remainders. While the exact ideal set size in which the data is to be processed is platform specific, we have shown in previous work that similar tradeoffs exist for similar problems across different architectures~\cite{Nieuwpoort:09}.
+\begin{algorithm}
+\caption{Beam former}
+\label{lst:beam-forming}
+FOR Channel IN 1 .. NrChannels DO \\
+  FOR Station IN 1 .. NrStations STEP 6 DO \\
+    FOR Time IN 1 .. NrTimes STEP 128 DO \\
+      FOR Beam IN 1 .. NrBeams STEP 3 DO \\ 
+        BeamForm6StationsAnd128TimesTo3BeamsAssembly(...)
+\end{algorithm}        
 
-\comment{
-\begin{table}
-\center
-\begin{tabular}{l|r|r|r}
-Pipeline & Sample type & Streams/beam & Data rate/stream \\
-\hline
-XY polarisations & complex float & 2 & 3.1 Gb/s \\
-Stokes IQUV      & float         & 4 & 1.5 Gb/s \\
-Stokes I         & float         & 1 & 1.5 Gb/s
-\end{tabular}
-\caption{Output of the beam former pipelines.}
-\end{table}
-}
-Because each beam is an accumulation of the data from all stations, the bandwidth of each beam is equal to the bandwidth of data from a single station, which is 6.2~Gb/s now that the samples are 32-bit floats. Once the beams are formed, they are kept as XY polarisations or transformed into the Stokes IQUV or the Stokes I parameters. In the latter case, the beams can also be integrated temporally to reduce the resulting data rate. Finally, an incoherent beam can be created in parallel, and converted into either Stokes I or Stokes IQUV parameters.
+All time-consuming pipeline components are written in assembly, to achieve maximum performance.  The assembly code minimises the number of memory accesses, minimises load delays, minimises FPU pipeline stalls, and maximises instruction-level parallelism.  We learnt that optimal performance is often achieved by combining multiple iterations of a multi-dimensional loops like shown in Listing~\ref{lst:beam-forming}. This is much more efficient than to create all beams one at a time, due to better reuse of data loaded from main memory.  Finding the most efficient way to group work is a combination of careful analysis and, unfortunately, trial-and-error. The coherent beam former achieves 85\% of the FPU peak performance, not as high as the 96\% of the correlator~\cite{Romein:10a}, but still XXX times more than the C++ reference implementation. 
+%Because each beam is an accumulation of the data from all stations, the bandwidth of each beam is equal to the bandwidth of data from a single station, which is 6.2~Gb/s now that the samples are 32-bit floats. Once the beams are formed, they are kept as complex voltages or transformed into the Stokes IQUV or the Stokes I parameters. In the latter case, the beams can also be integrated temporally to reduce the resulting data rate. Finally, an incoherent beam can be created in parallel, and converted into either Stokes I or Stokes IQUV parameters.
 
-Our beam former supports several pipelines: \emph{XY polarisations}, \emph{Stokes IQUV}, and \emph{Stokes I}. The XY polarisations pipeline outputs the raw tied-array beams, which consist of two 3.1~Gb/s streams of 32-bit complex floating points numbers (floats), one stream for each polarisation. The Stokes IQUV pipeline applies a domain transformation to each sample of the raw tied-array beams, which is useful for polarisation-related studies. The four Stokes parameters, calculated through $I = X\overline{X} + Y\overline{Y}$, $Q = X\overline{X} - Y\overline{Y}$, $U = 2\mathrm{Re}(X\overline{Y})$, $V = 2\mathrm{Im}(X\overline{Y})$, with each parameter being a 32-bit float, resulting in four 1.5~Gb/s streams. The Stokes I pipeline calculates only the first Stokes parameter, which represents the signal strength in both polarisations. The Stokes I pipeline supports temporal integration to trade time resolution for a reduced bandwidth per beam, allowing more beams to be created.
+%Our beam former supports several pipelines: \emph{complex voltages}, \emph{Stokes IQUV}, and \emph{Stokes I}. The complex voltages pipeline outputs the raw tied-array beams, which consist of two 3.1~Gb/s streams of 32-bit complex floating points numbers (floats), one stream for each polarisation. The Stokes IQUV pipeline applies a domain transformation to each sample of the raw tied-array beams, which is useful for polarisation-related studies. The four Stokes parameters, calculated through $I = X\overline{X} + Y\overline{Y}$, $Q = X\overline{X} - Y\overline{Y}$, $U = 2\mathrm{Re}(X\overline{Y})$, $V = 2\mathrm{Im}(X\overline{Y})$, with each parameter being a 32-bit float, resulting in four 1.5~Gb/s streams. The Stokes I pipeline calculates only the first Stokes parameter, which represents the signal strength in both polarisations. The Stokes I pipeline supports temporal integration to trade time resolution for a reduced bandwidth per beam, allowing more beams to be created.
 
-The beam former transforms chunks representing station data into chunks representing beam data. Because a chunk representing station data contained data for only one subband, the chunks representing different subbands of the same beam are still spread out over the full BG/P. Chunks corresponding to the same beam are brought together using a second all-to-all exchange.
+%The beam former transforms chunks representing station data into chunks representing beam data. Because a chunk representing station data contained data for only one subband, the chunks representing different subbands of the same beam are still spread out over the full BG/P. Chunks corresponding to the same beam are brought together using a second all-to-all exchange.
 
 \subsection{Channel-level Dedispersion}
 
@@ -254,36 +207,25 @@ Dedispersion is performed in the frequency domain, effectively by doing a 4096-p
 
 Figure~\ref{fig:dedispersion-result} shows the observed effectiveness of channel-level dedispersion, which improves the effective time resolution from 0.51~ms to 0.082~ms, revealing a more detailed pulse and a better signal-to-noise ratio. Dedispersion thus contributes significantly to the data quality, but it also comes at a significant computational cost due to the two FFTs it requires. It demonstrates the power of using a \emph{software\/} telescope: the pipeline component was implemented, verified, and optimised in only one month time.
 
+\subsection{Stokes Calculations}
+
+The beams are optionally converted into Stokes IQUV or Stokes I parameters, again using assembly routines to achieve optimal performance. The Stokes parameters are calculated through $I = X\overline{X} + Y\overline{Y}$, $Q = X\overline{X} - Y\overline{Y}$, $U = 2\cdot\mathrm{Re}(X\overline{Y})$, $V = 2\cdot\mathrm{Im}(X\overline{Y})$. Although the formulas are simple, the Stokes parameters are expensive to calculate. The required operations do not map well onto the FPU instruction set of the BG/P, even though the instruction set is extended with support for operations on complex numbers.
+
 \subsection{Second All-to-all Exchange}
 
-In the second all-to-all exchange, the chunks made by the beam former are again exchanged over the 3D-torus network. Due to memory constrains on the compute cores, the cores that performed the beam forming cannot be the same cores that receive the beam data after the exchange. We assign a set of cores (\emph{output cores}) to receive the chunks. The output cores are chosen before an observation, and are distinct from the \emph{input cores} which perform the earlier computations in the pipeline. Figure \ref{fig:dataflow-2} shows a (partial) second exchange, in which two output cores receive chunks from all input cores.
+Even though the beams are formed and optionally converted into Stokes parameters, they are still distributed as chunks across the BlueGene. Because the compute nodes cannot send their data directly to the I/O node which sends it to storage, a second all-to-all exchange is required to rearrange the chunks for output. Only chunks that are sent to the same pset can be sent to storage as a single data stream.
+
+Unfortunately, the output bandwidth available at each I/O node can be less than the bandwidth required by the beams. An I/O node can output 3.1~Gb/s, and only 1.1~Gb/s if the I/O node has to process station input at the same time. The bandwidth required for a complex voltages, Stokes IQUV, or (unintegrated) Stokes I beam however is 6.2~Gb/s, 6.2~Gb/s, and 1.5~Gb/s, respectively. We therefore split the beams and send the differerent polarisations or Stokes parameters to different psets and therefore store them in different files in our storage cluster. In some cases, it is also necessary to split the beams further, in which case we do not collect and store 248 subbands, but have to distribute the output further by storing only 124 or 83 subbands per file.
 
-An output core gathers the chunks that contain different subbands but belong to the same output stream. An output stream consists of all 248 subbands belonging to the same polarisation or Stokes parameter. If the full 248 subbands cannot be exported by the I/O node due to data rate limitations, the polarisation or Stokes parameter is split into multiple streams containing 83 or 124 subbands each.
+Due to memory constrains on the compute cores, the cores that performed the beam forming cannot be the same cores that receive the beam data after the exchange. We assign a set of cores (\emph{output cores}) to receive the chunks. The output cores are chosen before an observation, and are distinct from the \emph{input cores} which perform the earlier computations in the pipeline.
 
-Then, it rearranges the dimensions of the data into their final ordering, which is necessary, because the data order that will be written to disk is not the same order that can be produced by our computations without taking heavy cache penalties. We hide this reordering cost at the output cores by overlapping computation (the reordering of a chunk) with communication (the arrival of other chunks). Once all of the chunks are received and reordered, they are sent back to the I/O node.
+The output cores again receive the chunks asynchronously, which we overlap with computations. For each chunk, the dimensions of the data are reordered into their final ordering. Reordering is necessary, because the data order that will be written to disk is not the same order that can be produced by our computations without taking heavy cache penalties. Once all of the chunks are received and reordered, they are forwarded to the I/O node.
 
-For the distribution of the workload over the available output cores, three factors have to be considered. First, all of the data belonging to the same beam has to be processed by output cores in the same pset, to ensure that one I/O node can concatenate all of the 0.25 second chunks that belong to the beam. Second, the maximum output rate per I/O node has to be respected. Finally, the presence of the first all-to-all exchange, which uses the same network at up to 198~Gb/s. The second exchange uses up to 80~Gb/s. Even though each link sustains 3.4~Gb/s, it has to process the traffic from four cores, as well as traffic routed through it between other nodes. The network links in the BG/P become overloaded unless enough output cores are used to spread the load.
+For the distribution of the workload over the available output cores, three factors have to be considered. First, all of the data belonging to the same beam has to be processed by output cores in the same pset, to ensure that one I/O node can concatenate all of the 0.25 second chunks that belong to the beam. Second, the maximum output rate per I/O node has to be respected. Finally, the presence of the first all-to-all exchange, which uses the same network at up to 198~Gb/s. The second exchange uses up to 80~Gb/s. Even though each link sustains 3.4~Gb/s, it has to process the traffic from four cores, as well as traffic routed through it between other nodes. The network links in the BG/P become overloaded unless the output cores are scattered enough to spread the load.
 
 \subsection{Transport to Disks}
 Once an output core has received and reordered all of its data, the data are sent to the core's I/O node. The I/O node forwards the data over TCP/IP to the storage cluster. To avoid any stalling in our pipeline due to network congestion or disk issues, the I/O node uses a best-effort buffer which drops data if it cannot be sent.
 
-\comment{
-  Pulsar pipeline (include picture):
-     - 1st transpose
-     - pre-beamforming signal processing 
-     - beam forming / stokes
-     - pre-transpose reordering
-     - 2nd transpose
-     - post-transpose reordering
-     - send to storage
-
-  Notable comments:
-     - optimal split of 6 stations, 3 beams, 128 samples for beam former, due to L1 cache and #registers.
-       is a more general problem, see ppopp and ics papers.
-     - software allows fast roll-out and testing of features, flexible flow control, and only
-       optimisation where needed but maintainability elsewhere.
-}
-
 \section{Performance Analysis}
 \label{Sec:performance}
 
@@ -292,7 +234,7 @@ We will focus our performance analysis on edge cases that are of astronomical in
 \subsection{Overall Performance}
 
 % TODO: getallen kloppen niet.. 13 beams is 80.6 Gb/s, en met 70 Gb/s zouden we 11 beams aan moeten kunnen
-Figure \ref{fig:stations-beams} shows the maximum number of beams that can be created when using a various number of stations, in each of the three modes: XY polarisations, Stokes IQUV, and Stokes I. In both the XY polarisations and the Stokes IQUV modes, the pipeline is I/O bound. Each beam is 6.2~Gb/s wide. We can make at most 12 beams without exceeding the available 80~Gb/s to our storage cluster. The available bandwidth decreases down to 70~Gb/s due to the fact that an I/O node can only output 1.1~Gb/s if it also has to process station data. The granularity with which the output can be distributed over the I/O nodes, as well as scheduling details, determine the actual number of beams that can be created, but in all cases, the beam former can create at least 10 beams at LOFAR's full observational bandwidth.
+Figure \ref{fig:stations-beams} shows the maximum number of beams that can be created when using a various number of stations, in each of the three modes: complex voltages, Stokes IQUV, and Stokes I. In both the complex voltages and the Stokes IQUV modes, the pipeline is I/O bound. Each beam is 6.2~Gb/s wide. We can make at most 12 beams without exceeding the available 80~Gb/s to our storage cluster. If 64 stations are used, the available bandwidth is 70~Gb/s due to the fact that an I/O node can only output 1.1~Gb/s if it also has to process station data. The granularity with which the output can be distributed over the I/O nodes, as well as scheduling details, determine the actual number of beams that can be created, but in all cases, the beam former can create at least 10 beams at LOFAR's full observational bandwidth.
 
 In the Stokes I mode, we applied several integration factors (1, 2, 4, 8, and 12) in order to show the trade-off between beam quality and the number of beams. Integration factors higher than 12 does not allow significantly more beams to be created, but could be used in order to further reduce the total output rate. For low integration factors, the beam former is again limited by the available output bandwidth. Once the Stokes I streams are integrated sufficiently, the system becomes bounded by the compute nodes: if only signals from a few stations have to be combined, the beam former is limited by the amount of available memory required to store the beams. If more input has to be combined, the beam former becomes limited by the CPU power available in the compute cores. For observations for which a high integration factor is acceptable, the beam former is able to create 155 up to 543 tied-array beams, depending on the number of stations used. For observations which need a high time resolution and thus a low integration factor, the beam former is still able to create at least 42 tied-array beams.
 
@@ -317,14 +259,14 @@ Case & Mode & Channel & Int. & Stations & Beams  & Input & Output & Bound & Used
 \begin{figure}[t]
 \begin{minipage}[t]{0.52\textwidth}
 \includegraphics[width=\textwidth]{stations-beams.pdf}
-\label{fig:stations-beams}
 \caption{The maximum number of beams that can be created in various configurations.}
+\label{fig:stations-beams}
 \end{minipage}
 \hfill
 \begin{minipage}[t]{0.5\textwidth}
 \includegraphics[width=\textwidth]{execution_times.pdf}
-\label{fig:execution-times}
 \caption{The time spent in the processing steps.}
+\label{fig:execution-times}
 \end{minipage}
 \end{figure}
 
@@ -332,7 +274,7 @@ Case & Mode & Channel & Int. & Stations & Beams  & Input & Output & Bound & Used
 
 We further analyse the workload of the compute cores by highlighting a set of cases, summarised in Table \ref{table:cases}. We will focus on a memory-bound case (\circlenumber{A}), which also creates the highest number of beams, on CPU-bound cases interesting for performing surveys, with either 24 stations (\circlenumber{B}) or 64 stations (\circlenumber{C}) as input. Cases \circlenumber{D} and \circlenumber{E} focus on high-resolution observations of known sources, and are I/O bound configurations with 24 and 64 stations, respectively. Case \circlenumber{F} focusses on the observations of known sources as well, using Stokes I output, which allows more beams to be created. Channel-level dedispersion is applied for all cases that observe known sources.
 
-The workload of the compute cores for each case is shown in Figure \ref{fig:execution-times}, which shows the average workload per core. For the CPU-bound cases \circlenumber{B} and \circlenumber{C}, the average load has to be lower than 100\% in order to prevent fluctuations from slowing down our real-time system. These fluctuations typically occur due to clashes within the BG/P 3D-torus network which is used for both all-to-all-exchanges, and cannot be avoided in all cases.
+The workload of the compute cores for each case is shown in Figure \ref{fig:execution-times}, which shows the average workload per core. For the CPU-bound cases \circlenumber{B} and \circlenumber{C}, the average load has to be lower than 100\% to recover from small delays in the processing, that can occur since the BG/P is not a real-time system. These fluctuations typically occur due to clashes within the BG/P 3D-torus network which is used for both all-to-all-exchanges, and cannot be avoided in all cases.
 
 The cases which create many beams (\circlenumber{A}\circlenumber{B}\circlenumber{C}) spend most of the cycles performing beam forming and calculation the Stokes I parameters. The beam forming scales with both the number of stations and the number of beams, while the Stokes I calculation costs depends solely on the number of beams. Case \circlenumber{A} has to beam form only four stations, and thus requires most of its time calculating the Stokes I parameters. Case \circlenumber{B} and \circlenumber{C} use more stations, and thus need more time to beam form.
 
@@ -378,7 +320,9 @@ We have shown the capabilities of our beam former pipelines, running in software
 
 The use of a software solution on powerful interconnected hardware is a key aspect in the development and deployment of our pipeline. Because we use software, rapid prototyping is cheap, allowing novel features to be tested to aid the exploration of the design space of a new instrument. The resulting pipelines retain the flexibility that software allows. The control flow and bookkeeping can become complex while remaining manageable through software abstraction. We are able to run the same station data through multiple pipelines in parallel, and even multiple independent observations in parallel, as long as there are enough available resources. The science which drives LOFAR, and which is driven by it, is greatly accelerated through the use of an easily reconfigurable instrument.
 
-The BG/P supercomputer provides us with enough computing power and powerful networks to be able to implement the signal processing and all-to-all-exchanges that we require, without having to resort to a dedicated system which inevitably curbs the design freedom that the supercomputer provides. As with any system, platform-specific parameters nevertheless become important when maximal performance is desired. We tuned the distribution of the workload over the cores to avoid network collisions, and implemented our core routines in assembly in order to maximise the throughput. 
+The BG/P supercomputer provides us with enough computing power and powerful networks to be able to implement the signal processing and all-to-all-exchanges that we require, without having to resort to a dedicated system which inevitably curbs the design freedom that the supercomputer provides. As with any system, platform-specific parameters nevertheless become important when maximal performance is desired. Although a C reference implementation allowed us to quickly develop and test features, we needed handcrafted assembly to keep the double FPUs of each compute core as busy as possible.
+
+The architecture of the BG/P makes some tasks more difficult as well. The fact that an I/O node can only communicate with its own compute cores prevents us from freely scheduling the workload. Instead, we had to manually route the data using two all-to-all exchanges in order to stream the data from and to the right I/O nodes. To achieve maximum performance, we tuned the distribution of the workload over the cores to avoid network collisions.
 
 \bibliographystyle{plain}
 \bibliography{lofar}
diff --git a/doc/papers/2011/europar/stations-beams.jgr b/doc/papers/2011/europar/stations-beams.jgr
index 44f390c8064826b099af09aac73b964f52b6af2c..ed8bd8e82a6ba490377392aecbf746dcc76b502d 100644
--- a/doc/papers/2011/europar/stations-beams.jgr
+++ b/doc/papers/2011/europar/stations-beams.jgr
@@ -75,7 +75,7 @@ legend
   x 38 y 20
   linelength 5
 
-newstring : XY polarisations / Stokes IQUV
+newstring : Complex voltages / Stokes IQUV
   x 2 y 2
   hjl vjc