Skip to content
Snippets Groups Projects
Commit 38861cc9 authored by Jorrit Schaap's avatar Jorrit Schaap
Browse files

COB-55: added cobalt2 qpid routes

parent 9d1872b9
No related branches found
No related tags found
1 merge request!6Import cobalt2 into lofar4
...@@ -5568,6 +5568,7 @@ SubSystems/Online_Cobalt/validation/intercluster/connectivity/cobalt2locus.test ...@@ -5568,6 +5568,7 @@ SubSystems/Online_Cobalt/validation/intercluster/connectivity/cobalt2locus.test
SubSystems/Online_Cobalt/validation/intercluster/ethernet/iperf-cobalt2locus.bw-req -text SubSystems/Online_Cobalt/validation/intercluster/ethernet/iperf-cobalt2locus.bw-req -text
SubSystems/Online_Cobalt/validation/intercluster/ethernet/iperf-cobalt2locus.test -text SubSystems/Online_Cobalt/validation/intercluster/ethernet/iperf-cobalt2locus.test -text
SubSystems/Online_Cobalt/validation/intercluster/funcs.sh eol=lf SubSystems/Online_Cobalt/validation/intercluster/funcs.sh eol=lf
SubSystems/Online_Cobalt/validation/intercluster/infiniband/cobalt2-to-cep4.test -text
SubSystems/Online_Cobalt/validation/system/gpu/basic-gpu.test eol=lf SubSystems/Online_Cobalt/validation/system/gpu/basic-gpu.test eol=lf
SubSystems/Online_Cobalt/validation/system/gpu/persistence-mode.test eol=lf SubSystems/Online_Cobalt/validation/system/gpu/persistence-mode.test eol=lf
SubSystems/Online_Cobalt/validation/system/hardware/sata-ahci.test eol=lf SubSystems/Online_Cobalt/validation/system/hardware/sata-ahci.test eol=lf
......
#!/bin/bash
source $(dirname $0)/../../validation_utils.sh
check_running_on_cobalt2
#find latest osu-micro-benchmarks dir
MPITESTSDIR=$(readlink -f -n "$(dirname $(which mpirun))/../tests")
OSUMBDIR=$(find $MPITESTSDIR -type d -name 'osu-micro-benchmarks*' | sort | tail -n 1)
echo "Latest mpi osu-micro-benchmarks dir: $OSUMBDIR"
REQUIRED_BW=90 #in Gbps
EXIT_CODE=0
# check mpi bandwith over infiniband between each pair of cobalt nodes (both ways, and even on the same source/dest)
for i in {201..213} ; do
for j in {1..2} ; do
COBALT_SOURCE_IF_NAME=$(printf "cbt%03d-IPoIB%02d.cobalt.lofar" "$i" "$j")
COBALT_SOURCE_IF_IP=$(host $COBALT_SOURCE_IF_NAME | awk '{ print $4}')
for k in {1..50} ; do
CEP4_DEST_IF_NAME=$(printf "cpu%02d.cep4.infiniband.lofar" $k)
CEP4_DEST_IF_IP=$(host $CEP4_DEST_IF_NAME | awk '{ print $4}')
echo "mpirun -x UCX_SHM_DEVICES="" -H $COBALT_SOURCE_IF_IP,$CEP4_DEST_IF_IP $OSUMBDIR/osu_bw"
# measure throughput for 4MB blocks, results are in MBps
RESULT=$(mpirun -x UCX_SHM_DEVICES="" -H $COBALT_SOURCE_IF_IP,$CEP4_DEST_IF_IP $OSUMBDIR/osu_bw)
if [ $? -ne 0 ] ; then
echo "Could not run infiniband bandwith test between $COBALT_SOURCE_IF_NAME ($COBALT_SOURCE_IF_IP) and $CEP4_DEST_IF_NAME ($CEP4_DEST_IF_IP): $RESULT"
EXIT_CODE=1 ;
else
THROUGHPUT=`echo "$RESULT" | grep "^4194304s*" | awk '{ print $2 }'`
#convert to Gbps
THROUGHPUT=`echo "$THROUGHPUT*8/1000" | bc`
echo "infiniband bandwith for 4MB block between $COBALT_SOURCE_IF_NAME ($COBALT_SOURCE_IF_IP) and $CEP4_DEST_IF_NAME ($CEP4_DEST_IF_IP) is $THROUGHPUT Gbps"
# check if throughput > 90Gbps
if [ $THROUGHPUT -lt "$REQUIRED_BW" ] ; then EXIT_CODE=1 ; fi
fi
done
done
done
if [ $EXIT_CODE -ne 0 ] ; then
echo "ERROR: not all pairs of cobalt nodes reach required $REQUIRED_BW Gbps over infiniband"
fi
exit $EXIT_CODE
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment