diff --git a/.gitattributes b/.gitattributes index 09b09fca0f0a454312b6e356fb50ee002ea05234..2b154f375b541aa848096a43e7fb705edc7b7fd6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2459,8 +2459,10 @@ RTCP/RTCPTools/src/cexec-udp-copy -text RTCP/Run/CMakeLists.txt -text RTCP/Run/src/BlueGeneCheck.sh -text RTCP/Run/src/BlueGeneControl.conf -text -RTCP/Run/src/BlueGeneControl.sh -text RTCP/Run/src/CMakeLists.txt -text +RTCP/Run/src/CNProc.sh -text +RTCP/Run/src/IONProc.sh -text +RTCP/Run/src/IdentityControl.sh -text RTCP/Run/src/LOFAR/BGcontrol.py -text RTCP/Run/src/LOFAR/CMakeLists.txt -text RTCP/Run/src/LOFAR/CommandClient.py -text @@ -2481,6 +2483,7 @@ RTCP/Run/src/RSPConnections.dat -text RTCP/Run/src/commandOLAP.py -text RTCP/Run/src/deploy/Makefile -text RTCP/Run/src/generate_OLAP.parset.pl -text +RTCP/Run/src/locations.sh -text RTCP/Run/src/multitail-olap.conf -text RTCP/Run/src/packetanalysis.c -text RTCP/Run/src/runParset.py -text diff --git a/MAC/APL/APLCommon/src/swlevel.conf b/MAC/APL/APLCommon/src/swlevel.conf index 550ece83722dc9424a5eab66061d51d96902795b..b56a7e7710f1f80720796157effe6737d02a4fe7 100644 --- a/MAC/APL/APLCommon/src/swlevel.conf +++ b/MAC/APL/APLCommon/src/swlevel.conf @@ -15,11 +15,13 @@ 2:u:d:r::_EPAStub 2:u:d:r::RSPDriver 2:u:d:r::TBBDriver -2:u:d:::BlueGeneControl +2:u:d:::BGPPartition # 3:u:d:::AMCServer 3:u:d:::CalServer 3:u:d:::BeamServer +3:u:d:::IONProcessing # IONProc makes the logdir, so start it before CNProc +3:u:d:::CNProcessing # 4:u:d:::SoftwareMonitor 4:u:d:::HardwareMonitor diff --git a/RTCP/Run/src/BlueGeneControl.conf b/RTCP/Run/src/BlueGeneControl.conf index f2ea24109d64e5348525588bc7f672e138a79a2b..6d1d1b3213deef58515904c22a0cf5ae35ab6183 100644 --- a/RTCP/Run/src/BlueGeneControl.conf +++ b/RTCP/Run/src/BlueGeneControl.conf @@ -1,13 +1,2 @@ # BG/P Partition to use for the correlator PARTITION=R00 - -# Root directory for the binaries -BINPATH=/opt/lofar/bin - -# Location of PID file -PIDFILE=/tmp/BlueGeneControl-$PARTITION.pid - -# Location of log file -LOGDIR=/opt/lofar/log -LOGFILE=$LOGDIR/BlueGeneControl.log - diff --git a/RTCP/Run/src/BlueGeneControl.sh b/RTCP/Run/src/BlueGeneControl.sh deleted file mode 100755 index d851da575523e0633623547e6949ec242b833557..0000000000000000000000000000000000000000 --- a/RTCP/Run/src/BlueGeneControl.sh +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/bash -COMMAND=$1 - -CONFIG=/opt/lofar/etc/BlueGeneControl.conf - -. $CONFIG - -function getpid() { - if [ -e $PIDFILE ] - then - PID=`cat $PIDFILE` - if [ ! -e /proc/$PID ] - then - PID=DOWN - fi - else - PID=DOWN - fi -} - -function start() { - $BINPATH/LOFAR/Partitions.py -kfa $PARTITION - - $BINPATH/startOLAP.py -P $PARTITION & - PID=$! - echo $PID > $PIDFILE -} - -function wait_for_graceful_exit() { - # wait for correlator to stop - for i in `seq 1 30` - do - if [ -e /proc/$PID ] - then - break - fi - done -} - -function stop() { - $BINPATH/commandOLAP.py -P $PARTITION cancel all - $BINPATH/commandOLAP.py -P $PARTITION quit - - wait_for_graceful_exit - - if [ -e /proc/$PID ] - then - # nudge startOLAP - kill -2 $PID - - wait_for_graceful_exit - - # kill startOLAP.py script and all its children (mpiruns) - pkill -P $PID - $BINPATH/LOFAR/Partitions.py -k $PARTITION - fi - - rm -f $PIDFILE -} - -getpid - -case $COMMAND in - start) if [ "$PID" = "DOWN" ] - then - ( - start - ) >> $LOGFILE 2>&1 - fi - ;; - - stop) if [ "$PID" != "DOWN" ] - then - ( - stop - ) >> $LOGFILE 2>&1 - fi - ;; - - status) - SWLEVEL=$2 - echo "$SWLEVEL : BlueGeneControl $PID" - ;; - - *) echo "usage: $0 {start|stop|status}" - ;; -esac - diff --git a/RTCP/Run/src/CMakeLists.txt b/RTCP/Run/src/CMakeLists.txt index 7c85177ba8d984b22bd48a6ffa1c923c3c6cc118..037def73e3aa3c70e96081a74dd29ddade274d15 100644 --- a/RTCP/Run/src/CMakeLists.txt +++ b/RTCP/Run/src/CMakeLists.txt @@ -3,7 +3,11 @@ lofar_add_bin_program(packetanalysis packetanalysis.c) install(PROGRAMS - BlueGeneControl.sh + BGPPartition.sh + CNProc.sh + IONProc.sh + IdentityControl.sh + locations.sh watchlogs.sh startOLAP.py commandOLAP.py diff --git a/RTCP/Run/src/CNProc.sh b/RTCP/Run/src/CNProc.sh new file mode 100755 index 0000000000000000000000000000000000000000..b37de1f97478523949c83fe8ff77f28b17125ce8 --- /dev/null +++ b/RTCP/Run/src/CNProc.sh @@ -0,0 +1,16 @@ +#!/bin/bash +PARTITION="R01-M0-N04-64" + + +function start() { + source locations.sh + + mpirun -mode VN -partition $PARTITION -env DCMF_COLLECTIVES=0 -env BG_MAPPING=XYZT -env LD_LIBRARY_PATH=/bgsys/drivers/ppcfloor/comm/lib:/bgsys/drivers/ppcfloor/runtime/SPI:/globalhome/romein/lib.bgp -cwd $LOGDIR -exe $CNPROC >& $LOGSYMLINK/CNProc.log & +} + +function stop() { + # mpikill only works when mpirun has started running the application + mpikill "$PID" || kill -9 "$PID" +} + +. IdentityControl.sh diff --git a/RTCP/Run/src/IONProc.sh b/RTCP/Run/src/IONProc.sh new file mode 100755 index 0000000000000000000000000000000000000000..7759fd412271f112ce3f9e32231fe1df1a23fdbf --- /dev/null +++ b/RTCP/Run/src/IONProc.sh @@ -0,0 +1,22 @@ +#!/bin/bash +PARTITION="R01-M0-N04-64" + + +function start() { + source locations.sh + + # list both the partition directly (small partitions) and recursively (large partitions) to get all -32 subpartitions + SUBPARTITIONS=`bghierarchy -s $PARTITION;bghierarchy -s \`bghierarchy -s $PARTITION\`` + + # xxx-32 means both xxx-J00 and xxx-J01 + PSETS=`for i in $SUBPARTITIONS; do echo $i; done|grep -- "-32$"|sort -u|sed 's/-32$/-J00/;p;s/-J00$/-J01/'|xargs -L 1 host -4|cut -d\ -f 4|tr '\n' ','` + + # create a new log dir + rm -f $LOGSYMLINK || true + mkdir -p $LOGDIR + ln -s $LOGDIR $LOGSYMLINK + + /bgsys/LOFAR/openmpi-ion/bin/mpirun -host $PSETS --pernode -wd $LOGDIR $IONPROC $ISPRODUCTION >& $LOGSYMLINK/IONProc.log & +} + +. IdentityControl.sh diff --git a/RTCP/Run/src/IdentityControl.sh b/RTCP/Run/src/IdentityControl.sh new file mode 100755 index 0000000000000000000000000000000000000000..e2c77f5cccc12a75b4745bb82cf1df478f44cf88 --- /dev/null +++ b/RTCP/Run/src/IdentityControl.sh @@ -0,0 +1,74 @@ +#!/bin/bash +COMMAND=$1 + +type getpid >&/dev/null || function getpid() { + PID=DOWN + + if [ -f "$PIDFILE" ] + then + PID=`cat -- "$PIDFILE"` + fi + + if [ ! -e /proc/$PID ] + then + PID=DOWN + fi +} + +function isstarted() { + [ "DOWN" != "$PID" ] +} + +type setpid >&/dev/null || function setpid() { + PID=$1 + + if [ "x$PID" == "x" ] + then + exit + fi + + echo "$PID" > "$PIDFILE" +} + +type delpid >&/dev/null || function delpid() { + rm -f -- "$PIDFILE" +} + +function procname() { + # the basename of this script, without its extension + basename -- "$0" | sed 's/[.][^.]*$//g' +} + +type start >&/dev/null || function start() { + tail -F / >&/dev/null & +} + +type stop >&/dev/null || function stop() { + kill -15 "$PID" +} + +PIDFILE="/tmp/`procname`-$USER.pid" +getpid + +case $COMMAND in + start) if ! isstarted + then + start && setpid $! + fi + ;; + + stop) if isstarted + then + stop && delpid + fi + ;; + + status) + SWLEVEL=$2 + printf "%d : %-25s %s\n" "$SWLEVEL" "`procname`" "$PID" + ;; + + *) echo "usage: $0 {start|stop|status}" + ;; +esac + diff --git a/RTCP/Run/src/locations.sh b/RTCP/Run/src/locations.sh new file mode 100644 index 0000000000000000000000000000000000000000..c991384ebceae5a184e5e9d062055fe1214fbc76 --- /dev/null +++ b/RTCP/Run/src/locations.sh @@ -0,0 +1,24 @@ +function isproduction() { + [ "lofarsys" == "$USER" ] +} + +TIMESTAMP=`date +%Y-%m-%d_%H%M%S` + +if isproduction +then + ISPRODUCTION=1 + + CNPROC=$HOME/production/lofar/bgp_cn/bin/CN_Processing + IONPROC=$HOME/production/lofar/bgp_ion/bin/ION_Processing + + LOGDIR=$HOME/log/L$TIMESTAMP + LOGSYMLINK=$HOME/log/latest +else + ISPRODUCTION=0 + + CNPROC=$HOME/projects/LOFAR/installed/bgp_cn/bin/CN_Processing + IONPROC=$HOME/projects/LOFAR/installed/bgp_ion/bin/ION_Processing + + LOGDIR=$HOME/projects/LOFAR/L$TIMESTAMP + LOGSYMLINK=$HOME/projects/LOFAR/log +fi