diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 63fca94e2abe454c0658e415e6785aa33f53267a..cfcf44c35227cae7e4a7525b7b5444be7988b24a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -59,9 +59,41 @@ variables: # .mac_install_package: &mac_install_package - *set_version - - ssh $LOFAR_USER@$LOFAR_TARGET "cd ~/incoming; wget --backups=1 --user=$MACINSTALL_USER --password=$MACINSTALL_PASSWORD https://git.astron.nl/api/v4/projects/6/packages/generic/$PACKAGE/$VERSION/${PACKAGE}_${CI_COMMIT_REF_NAME}_0.x86_64.ztar" + - ssh $LOFAR_USER@$LOFAR_TARGET "mkdir -p ~/incoming && cd ~/incoming && wget --backups=1 --user=$MACINSTALL_USER --password=$MACINSTALL_PASSWORD https://git.astron.nl/api/v4/projects/6/packages/generic/$PACKAGE/$VERSION/${PACKAGE}_${CI_COMMIT_REF_NAME}_0.x86_64.ztar" - ssh $LOFAR_USER@$LOFAR_TARGET "MAC_install -f ${PACKAGE}_${CI_COMMIT_REF_NAME}_0.x86_64.ztar -v ${CI_COMMIT_REF_NAME}" +# This script deploys COBALT's GPUProc on a host. It needs LOFAR_TARGET and LOFAR_USER to be set. +.cobalt_gpuproc_install: &cobalt_gpuproc_install + - *prepare_ssh + + # deploy tarball compiled in build job + - ssh $LOFAR_USER@$LOFAR_TARGET "mkdir -p ~/incoming && cd ~/incoming && wget --backups=1 --user=$MACINSTALL_USER --password=$MACINSTALL_PASSWORD https://git.astron.nl/api/v4/projects/6/packages/generic/$PACKAGE/$VERSION/${PACKAGE}_${CI_COMMIT_REF_NAME}_0.x86_64.ztar" + - ssh $LOFAR_USER@$LOFAR_TARGET "cd / && tar --no-overwrite-dir -zxvmf ~/incoming/${PACKAGE}_${CI_COMMIT_REF_NAME}_0.x86_64.ztar" + + # symlink directories shared across installations + - ssh $LOFAR_USER@$LOFAR_TARGET "cd /opt/lofar-versions/${CI_COMMIT_REF_NAME} && [ -d var ] && rmdir var" # some tarballs linger an empty var dir. remove. + - ssh $LOFAR_USER@$LOFAR_TARGET "cd /opt/lofar-versions/${CI_COMMIT_REF_NAME} && ln -sfT /localdata/lofar-userdata/var var" + - ssh $LOFAR_USER@$LOFAR_TARGET "cd /opt/lofar-versions/${CI_COMMIT_REF_NAME} && ln -sfT /localdata/lofar-userdata/parset-overrides etc/parset-additions.d/override" + - ssh $LOFAR_USER@$LOFAR_TARGET "cd /opt/lofar-versions/${CI_COMMIT_REF_NAME} && ln -sfT /opt/shared/lofar-userdata nfs" + + # elevate privileges for our real-time programs + - ssh $LOFAR_USER@$LOFAR_TARGET "sudo /sbin/setcap cap_net_raw,cap_sys_nice,cap_ipc_lock=ep /opt/lofar-versions/${CI_COMMIT_REF_NAME}/bin/rtcp" + - ssh $LOFAR_USER@$LOFAR_TARGET "sudo /sbin/setcap cap_sys_nice,cap_ipc_lock=ep /opt/lofar-versions/${CI_COMMIT_REF_NAME}/bin/outputProc" + +# This script deploys COBALT's OutputProc on a host. It needs LOFAR_TARGET and LOFAR_USER to be set. +.cobalt_outputproc_install: &cobalt_outputproc_install + - *prepare_ssh + + # deploy tarball compiled in build job + - ssh $LOFAR_USER@$LOFAR_TARGET "cd ~/incoming; wget --backups=1 --user=$MACINSTALL_USER --password=$MACINSTALL_PASSWORD https://git.astron.nl/api/v4/projects/6/packages/generic/$PACKAGE/$VERSION/${PACKAGE}_${CI_COMMIT_REF_NAME}_0.x86_64.ztar" + - ssh $LOFAR_USER@$LOFAR_TARGET "cd / && tar --no-overwrite-dir -zxvmf ~/incoming/${PACKAGE}_${CI_COMMIT_REF_NAME}_0.x86_64.ztar" + + # create var directories + - ssh $LOFAR_USER@$LOFAR_TARGET "mkdir -p /opt/outputproc-versions/{$CI_COMMIT_REF_NAME}/var/{log,run}" + + # elevate privileges for our real-time programs + - ssh $LOFAR_USER@$LOFAR_TARGET "sudo /sbin/setcap cap_sys_nice,cap_ipc_lock=ep /opt/outputproc-versions/{$CI_COMMIT_REF_NAME}/bin/outputProc" + # # PREPARE BASE STAGE # @@ -229,6 +261,65 @@ build_ST_MAC: paths: - build/gnucxx11_opt +build_COBALT_GPUProc: + # COBALT is compiled directly on the host system, to link against the actually installed + # libraries. The 'cbm206' runner uses an ssh executor to achieve this. + # + # This also avoids having to create a huge Docker image. + stage: build + tags: + - gpus + variables: + PACKAGE: "Online_Cobalt" + script: + - echo "Building ${PACKAGE}..." + - mkdir -p build/gnucxx11_2018_optarch + - mkdir -p build/gnucxx11_2018_optarch/install + - cd build/gnucxx11_2018_optarch + - cmake -DBUILD_PACKAGES="${PACKAGE}" -DUSE_CUDA=ON -DUSE_MPI=ON -DUSE_OPENMP=ON -DCMAKE_INSTALL_PREFIX=/opt/lofar-versions/${CI_COMMIT_REF_NAME} ../.. + - make -j $(nproc) + - make DESTDIR=install install + - cd install # COBALT tarballs start at /, see RTCP/Cobalt/GPUProc/src/scripts/Cobalt_install.sh + - tar --ignore-failed-read --exclude=include --exclude="*.ztar" -czf ${PACKAGE}_${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHORT_SHA}.ztar * + - *set_version + - 'curl --header "JOB-TOKEN: $CI_JOB_TOKEN" --upload-file ${PACKAGE}_${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHORT_SHA}.ztar "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/generic/${PACKAGE}/${VERSION}/${PACKAGE}_${CI_COMMIT_REF_NAME}_0.x86_64.ztar"' + interruptible: true + artifacts: + expire_in: 6 hours + paths: + - build/gnucxx11_2018_optarch + +build_COBALT_OutputProc: + # COBALT is compiled directly on the host system, to link against the actually installed + # libraries. The 'cbm206' runner uses an ssh executor to achieve this. + # + # This also avoids having to create a huge Docker image. + stage: build + tags: + - cep4 + variables: + PACKAGE: "Online_OutputProc" + # These are built and installed through head.cep4.control.lofar:~mol/cep4/outputproc_deps + CASACORE_VERSION: "v3.3.0" + DAL_VERSION: "v3.3.2" + script: + - mkdir -p build/gnucxx11_optarch + - mkdir -p build/gnucxx11_optarch/install + - cd build/gnucxx11_optarch + - cmake -DBUILD_PACKAGES=${PACKAGE} -DBUILD_TESTING=OFF -DCMAKE_INSTALL_PREFIX=/opt/outputproc-versions/${CI_COMMIT_REF_NAME} -DCASACORE_ROOT_DIR=/opt/casacore-${CASACORE_VERSION}/ -DDAL_ROOT_DIR=/opt/DAL-${DAL_VERSION} -DUSE_OPENMP=True ../.. + + - make -j $(nproc) + - make DESTDIR=install install + - cd install # COBALT tarballs start at / + - tar --ignore-failed-read --exclude=include --exclude="*.ztar" -czf ${PACKAGE}_${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHORT_SHA}.ztar * + - *set_version + - 'curl --header "JOB-TOKEN: $CI_JOB_TOKEN" --upload-file ${PACKAGE}_${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHORT_SHA}.ztar "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/generic/${PACKAGE}/${VERSION}/${PACKAGE}_${CI_COMMIT_REF_NAME}_0.x86_64.ztar"' + interruptible: true + artifacts: + expire_in: 6 hours + paths: + - build/gnucxx11_optarch + build_lofar_pulp_docker_image: stage: build script: @@ -307,6 +398,30 @@ unit_test_SCU: rules: - if: '$FASTLANE == "false"' +unit_test_COBALT_GPUProc: + stage: unit_test + tags: + - gpus + script: + - PACKAGE=Online_Cobalt + - echo "Testing $PACKAGE..." + - cd build/gnucxx11_2018_optarch + - SKIP_PYTHON_COVERAGE=true SKIP_INTEGRATION_TESTS=true SKIP_REGRESSION_TESTS=true ctest + after_script: + # cleanup lingering processes that block ports for subsequent runs + - killall outputProc || true + interruptible: true + allow_failure: true + needs: + - build_COBALT_GPUProc + artifacts: + name: unit-test-report + when: always + paths: + - build/gnucxx11_2018_optarch/Testing/Temporary/LastTest.log + rules: + - if: '$FASTLANE == "false"' + unit_test_LTAIngest: stage: unit_test image: ci_lta:$CI_COMMIT_SHORT_SHA @@ -651,6 +766,27 @@ deploy-SCU-test: allow_failure: true when: manual +deploy-COBALT_GPUProc-test: + stage: deploy-test + tags: + - cobalt + variables: + LOFAR_USER: "lofarbuild" + PACKAGE: "Online_Cobalt" + ACTIVATE: "1" + script: + # install & activate on test node + - LOFAR_TARGET="cbm206.control.lofar" + - *cobalt_gpuproc_install + - '[ "${ACTIVATE}" -eq "1" ] && ssh $LOFAR_USER@$LOFAR_TARGET "ln -sfT /opt/lofar-versions/${CI_COMMIT_REF_NAME} /opt/lofar-versions/current"' + environment: + name: test + needs: + - job: build_COBALT_GPUProc + artifacts: false + allow_failure: true + when: manual + # # deploy-prod stage # @@ -715,6 +851,94 @@ deploy-CCU_MAC-prod: only: - tags +deploy-COBALT_GPUProc-prod: + # A COBALT install involves the following paths: + # /opt/lofar-versions/RELEASE Where a specific release resides + # /opt/lofar-versions/current -> RELEASE Symlink to the current/active release, updated by lofarbuild + # /opt/lofar -> /opt/lofar-versions/current Symlink owned by root, which we cannot modify, as we would need write priviledges in /opt + # The active COBALT version used by the system + stage: deploy-prod + tags: + - cobalt + variables: + LOFAR_USER: "lofarbuild" + PACKAGE: "Online_Cobalt" + ACTIVATE: "1" # set to 0 to install the software, but not make it the current version + script: + # install on all production machines + - LOFAR_TARGET="cbm201.control.lofar" + - *cobalt_gpuproc_install + - LOFAR_TARGET="cbm202.control.lofar" + - *cobalt_gpuproc_install + - LOFAR_TARGET="cbm203.control.lofar" + - *cobalt_gpuproc_install + - LOFAR_TARGET="cbm204.control.lofar" + - *cobalt_gpuproc_install + - LOFAR_TARGET="cbm205.control.lofar" + - *cobalt_gpuproc_install + - LOFAR_TARGET="cbm207.control.lofar" + - *cobalt_gpuproc_install + - LOFAR_TARGET="cbm208.control.lofar" + - *cobalt_gpuproc_install + - LOFAR_TARGET="cbm209.control.lofar" + - *cobalt_gpuproc_install + - LOFAR_TARGET="cbm210.control.lofar" + - *cobalt_gpuproc_install + - LOFAR_TARGET="cbm211.control.lofar" + - *cobalt_gpuproc_install + - LOFAR_TARGET="cbm212.control.lofar" + - *cobalt_gpuproc_install + # install on head node + - LOFAR_TARGET="cbm2head.control.lofar" + - *cobalt_gpuproc_install + # activate on head node, if requested + - '[ "${ACTIVATE}" -eq "1" ] && ssh $LOFAR_USER@$LOFAR_TARGET "ln -sfT /opt/lofar-versions/${CI_COMMIT_REF_NAME} /opt/lofar-versions/current"' + environment: + name: production + needs: + - job: build_COBALT_GPUProc + artifacts: false + allow_failure: true + when: manual + +deploy-COBALT_OutputProc-prod: + # A COBALT install involves the following paths: + # /opt/outputproc-versions/RELEASE Where a specific release resides + # /opt/outputproc-versions/current -> RELEASE Symlink to the current/active release, updated by lofarbuild + # /opt/outputproc-latest -> /opt/outputproc-versions/current Symlink owned by root, which we cannot modify, as we would need write priviledges in /opt + stage: deploy-prod + tags: + - cep4 + variables: + LOFAR_USER: "lofarbuild" + PACKAGE: "Online_OutputProc" + ACTIVATE: "1" # set to 0 to install the software, but not make it the current version + script: + # install on all production machines + - LOFAR_TARGET="cpu01.cep4.control.lofar" + - *cobalt_outputproc_install + - LOFAR_TARGET="cpu02.cep4.control.lofar" + - *cobalt_outputproc_install + - LOFAR_TARGET="cpu03.cep4.control.lofar" + - *cobalt_outputproc_install + - LOFAR_TARGET="cpu04.cep4.control.lofar" + - *cobalt_outputproc_install + - LOFAR_TARGET="cpu05.cep4.control.lofar" + - *cobalt_outputproc_install + # activate on all production machines, if requested + - '[ "${ACTIVATE}" -eq "1" ] && ssh $LOFAR_USER@cpu01.cep4.control.lofar "ln -sfT /opt/outputproc-versions/${CI_COMMIT_REF_NAME} /opt/outputproc-versions/current"' + - '[ "${ACTIVATE}" -eq "1" ] && ssh $LOFAR_USER@cpu02.cep4.control.lofar "ln -sfT /opt/outputproc-versions/${CI_COMMIT_REF_NAME} /opt/outputproc-versions/current"' + - '[ "${ACTIVATE}" -eq "1" ] && ssh $LOFAR_USER@cpu03.cep4.control.lofar "ln -sfT /opt/outputproc-versions/${CI_COMMIT_REF_NAME} /opt/outputproc-versions/current"' + - '[ "${ACTIVATE}" -eq "1" ] && ssh $LOFAR_USER@cpu04.cep4.control.lofar "ln -sfT /opt/outputproc-versions/${CI_COMMIT_REF_NAME} /opt/outputproc-versions/current"' + - '[ "${ACTIVATE}" -eq "1" ] && ssh $LOFAR_USER@cpu05.cep4.control.lofar "ln -sfT /opt/outputproc-versions/${CI_COMMIT_REF_NAME} /opt/outputproc-versions/current"' + environment: + name: production + needs: + - job: build_COBALT_OutputProc + artifacts: false + allow_failure: true + when: manual + deploy-PULP-prod: stage: deploy-prod before_script: diff --git a/RTCP/Cobalt/GPUProc/src/CMakeLists.txt b/RTCP/Cobalt/GPUProc/src/CMakeLists.txt index 40ad6e3304fb60de4662b972b41f2cad7c482a15..56ff7a219d123db6f4761cd4c3868ef43f662012 100644 --- a/RTCP/Cobalt/GPUProc/src/CMakeLists.txt +++ b/RTCP/Cobalt/GPUProc/src/CMakeLists.txt @@ -107,8 +107,6 @@ lofar_add_bin_scripts( lofar_add_sbin_scripts( scripts/bw_monitor.sh scripts/cobaltswitch - scripts/Cobalt_install.sh - scripts/CobaltVersions.sh scripts/setloglevels.sh) # install logprop files diff --git a/RTCP/Cobalt/GPUProc/src/scripts/CobaltVersions.sh b/RTCP/Cobalt/GPUProc/src/scripts/CobaltVersions.sh deleted file mode 100755 index 3e24f5fe4f5927a21a81eb6bbc5670963bd0bbb1..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/GPUProc/src/scripts/CobaltVersions.sh +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/bash - -GET_VERSION=0 -SET_VERSION="" -LIST_VERSIONS=0 - -function error() { - echo "$@" >&2 - exit 1 -} - -function usage() { - echo "$0 [-l] [-g] [-s VERSION]" - echo "" - echo " -l List available Cobalt versions" - echo " -g Get active Cobalt version" - echo " -s VERSION Set active Cobalt version" - exit 1 -} - -while getopts "hgls:" opt; do - case $opt in - h) usage - ;; - g) GET_VERSION=1 - ;; - l) LIST_VERSIONS=1 - ;; - s) SET_VERSION="$OPTARG" - ;; - \?) error "Invalid option: -$OPTARG" - ;; - :) error "Option requires an argument: -$OPTARG" - ;; - esac -done -[ $OPTIND -eq 1 ] && usage - -COBALT_VERSIONS_DIR=/opt/lofar-versions - -[ -d "$COBALT_VERSIONS_DIR" ] || error "Directory not found: $COBALT_VERSIONS_DIR" - -# List Cobalt versions -if [ $LIST_VERSIONS -eq 1 ]; then - ls -1 $COBALT_VERSIONS_DIR -fi - -CURRENT_VERSION=`readlink -f /opt/lofar | awk -F/ '{ print $NF; }'` - -# Get current Cobalt version -if [ $GET_VERSION -eq 1 ]; then - echo "$CURRENT_VERSION" -fi - -# Set current Cobalt version -if [ -n "$SET_VERSION" ]; then - echo "Switching Cobalt to $SET_VERSION" - - function set_version { - VERSION="$1" - - # Don't create loops - [ $VERSION == current ] && return 1 - - # Don't activate non-existing releases - [ -d "${COBALT_VERSIONS_DIR}/${VERSION}" ] || return 1 - - # Move symlink, activating selected version - clush -g all -S ln -sfT "${COBALT_VERSIONS_DIR}/${VERSION}" "${COBALT_VERSIONS_DIR}/current" || return 1 - - return 0 - } - - if ! set_version "$SET_VERSION"; then - echo "------------------------------------------------------------------------------" - echo "ERROR Switching to $SET_VERSION. Switching back to $CURRENT_VERSION" - echo "------------------------------------------------------------------------------" - - set_version "$CURRENT_VERSION" - fi -fi - diff --git a/RTCP/Cobalt/GPUProc/src/scripts/Cobalt_install.sh b/RTCP/Cobalt/GPUProc/src/scripts/Cobalt_install.sh deleted file mode 100755 index e8f313bb30a24e730c023ce7f826d13e6551fa11..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/GPUProc/src/scripts/Cobalt_install.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/sh -# Install {RELEASE_NAME}.ztar from the NEXUS onto cbt nodes. -# -# Note: the 'current' symlink still has to be repointed on all cbt nodes to use the new release by default! -# -# $Id$ - -# location of the file in the NEXUS -if [ "${RELEASE_NAME}" = "" ]; then - echo "ERROR: RELEASE_NAME is not set or empty. Needed to download archive to install" - exit 1 -fi - -FILENAME=Online_Cobalt_${RELEASE_NAME}.ztar - -if [[ "${RELEASE_NAME}" == *"Release"* ]]; then - NEXUS_URL=https://support.astron.nl/nexus/content/repositories/releases/nl/astron/lofar/${RELEASE_NAME}/${FILENAME} -else - NEXUS_URL=https://support.astron.nl/nexus/content/repositories/branches/nl/astron/lofar/${RELEASE_NAME}/${FILENAME} -fi - -# Download archive from NEXUS. -N: clobber existing files -wget -N --tries=3 --no-check-certificate --user=macinstall --password=macinstall "${NEXUS_URL}" -O /tmp/${FILENAME} || exit 1 - -# The full pathnames are in the tar file, so unpack from root dir. -# -m: don't warn on timestamping /localhome -cd / && tar --no-overwrite-dir -zxvmf /tmp/${FILENAME} || exit 1 - -# Remove tarball -rm -f /tmp/${FILENAME} - -# -# Post-install -# - -cd /opt/lofar-versions/${RELEASE_NAME} || exit 1 - -# Sym link installed var/ to common location. Some tarballs linger an empty var dir, so remove that first -[ -d var ] && rmdir var -ln -sfT /localdata/lofar-userdata/var var - -# Sym link installed etc/parset-additions.d/override to common location. -ln -sfT /localdata/lofar-userdata/parset-overrides etc/parset-additions.d/override - -# Sym link installed var/ to NFS location. -ln -sfT /opt/shared/lofar-userdata nfs - - -# Set capabilities so our soft real-time programs can elevate prios. -# -# cap_sys_nice: allow real-time priority for threads -# cap_ipc_lock: allow app to lock in memory (prevent swap) -# cap_net_raw: allow binding sockets to NICs -OUTPUTPROC_CAPABILITIES='cap_sys_nice,cap_ipc_lock' -sudo /sbin/setcap "${OUTPUTPROC_CAPABILITIES}"=ep bin/outputProc || true -RTCP_CAPABILITIES='cap_net_raw,cap_sys_nice,cap_ipc_lock' -sudo /sbin/setcap "${RTCP_CAPABILITIES}"=ep bin/rtcp -