L2SS-530: Document device server step-by-step debug

e0e5eb7c · Corné Lukken · 069d0954 · e0e5eb7c · e0e5eb7c · e0e5eb7c
Commit e0e5eb7c authored 11 months ago by Corné Lukken
--- a/CDB/stations/cs032.json
+++ b/CDB/stations/cs032.json
--- a/CDB/stations/rs307.json
+++ b/CDB/stations/rs307.json
--- a/README.md
+++ b/README.md
@@ -162,6 +162,7 @@ Next change the version in the following places:
 # Release Notes
+* 0.40.2 Add `ds_debug_pycharm` command to debug device servers during integration tests
 * 0.40.1 Deploy SDPTR for HBA only on RS stations
 * 0.40.0 Added CS032/RS307, and scripts to generate their CDB file and caltables
 * 0.39.12 SDPTR v1.4.0

--- a/docker/Makefile
+++ b/docker/Makefile
 TAG ?= latest
 SHELL = /bin/bash
-TANGO_STATION_CONTROL ?= 1
+DEBUG_BUILD ?= 0
 LOCAL_DOCKER_REGISTRY ?= $(shell shyaml get-value registry.astron.url < "../infra/env/common.yaml")
 TANGO_ITANGO_VERSION ?= $(shell shyaml get-value tango.itango.version < "../infra/env/common.yaml")
 TANGO_DSCONFIG_VERSION ?= $(shell shyaml get-value tango.dsconfig.version < "../infra/env/common.yaml")
@@ -19,6 +19,7 @@ context: ## Move and build the necessary files to create minimal docker context
    fi; \
 	cp ../tangostationcontrol/dist/*.whl lofar-device-base/tmp/; \
 	cp ../tangostationcontrol/requirements.txt lofar-device-base/tmp/; \
+	cp ../tangostationcontrol/debug-requirements.txt lofar-device-base/tmp/; \
 jupyter-lab ec-sim grafana landing-page:
 	docker build -f $@/Dockerfile -t $(LOCAL_DOCKER_REGISTRY)/$@:$(TAG) $@
@@ -31,7 +32,7 @@ dsconfig:
 	docker build --build-arg SOURCE_IMAGE=$(LOCAL_DOCKER_REGISTRY)/tango-dsconfig:$(TANGO_DSCONFIG_VERSION) -f dsconfig/Dockerfile -t $(LOCAL_DOCKER_REGISTRY)/dsconfig:$(TAG) dsconfig
 lofar-device-base: context
-	docker build --build-arg TANGO_STATION_CONTROL=$(TANGO_STATION_CONTROL) --build-arg SOURCE_IMAGE=$(LOCAL_DOCKER_REGISTRY)/tango-itango:$(TANGO_ITANGO_VERSION) -f lofar-device-base/Dockerfile -t $(LOCAL_DOCKER_REGISTRY)/lofar-device-base:$(TAG) lofar-device-base
+	docker build --build-arg DEBUG_BUILD=$(DEBUG_BUILD) --build-arg SOURCE_IMAGE=$(LOCAL_DOCKER_REGISTRY)/tango-itango:$(TANGO_ITANGO_VERSION) -f lofar-device-base/Dockerfile -t $(LOCAL_DOCKER_REGISTRY)/lofar-device-base:$(TAG) lofar-device-base
 ci-runner:
 	docker build --build-arg SOURCE_IMAGE=$(LOCAL_DOCKER_REGISTRY)/tango-itango:$(TANGO_ITANGO_VERSION) -f ci-runner/Dockerfile -t $(LOCAL_DOCKER_REGISTRY)/ci-build-runner:$(TAG) ci-runner

--- a/docker/lofar-device-base/Dockerfile
+++ b/docker/lofar-device-base/Dockerfile
 ARG SOURCE_IMAGE
 FROM ${SOURCE_IMAGE}
-ARG TANGO_STATION_CONTROL
-ENV TANGO_STATION_CONTROL $TANGO_STATION_CONTROL
+ARG DEBUG_BUILD
+ENV DEBUG_BUILD=$DEBUG_BUILD
 RUN --mount=type=cache,target=/var/cache/apt \
    sudo apt-get update
@@ -59,21 +60,17 @@ RUN mkdir /tmp/python-casacore && \
 # Install tangostationcontrol and its dependencies
 COPY tmp/requirements.txt /tangostationcontrol-requirements.txt
+COPY tmp/debug-requirements.txt /tangostationcontrol-debug-requirements.txt
-RUN echo "TANGO_STATION_CONTROL: ${TANGO_STATION_CONTROL}"
+RUN echo "DEBUG_BUILD: ${DEBUG_BUILD}"
-RUN if [ -z $TANGO_STATION_CONTROL ]; then \
+RUN if [ $DEBUG_BUILD ]; then \
-    echo "Installing requirements only"; \
+    echo "Installing debug requirements"; \
-    sudo pip3 install -r /tangostationcontrol-requirements.txt --extra-index-url=https://git.astron.nl/api/v4/projects/395/packages/pypi/simple; \
+    sudo pip3 install -r /tangostationcontrol-debug-requirements.txt --extra-index-url=https://git.astron.nl/api/v4/projects/395/packages/pypi/simple; \
  fi
-# This COPY typically triggers a new build instead of pulling from cache.
-# We thus perform it after installing the requirements.txt in case
-# $TANGO_STATION_CONTROL is not defined.
 COPY tmp/*.whl /
-RUN if [ $TANGO_STATION_CONTROL ]; then \
+RUN echo "Installing prebuild Station Control wheel"; \
-    echo "Installing prebuild Station Control wheel"; \
+    sudo pip3 install /*.whl --extra-index-url=https://git.astron.nl/api/v4/projects/395/packages/pypi/simple;
-    sudo pip3 install /*.whl --extra-index-url=https://git.astron.nl/api/v4/projects/395/packages/pypi/simple; \
-  fi
 # install and use ephimerides and geodetic ("measures") tables for casacore.
 # we install a _stub_ since the tables need to be deployed explicitly from within the software.

--- a/infra/README.md
+++ b/infra/README.md
@@ -78,16 +78,16 @@ This container can be identified programmatically as follows. Use the additional
 ``-q`` parameter to obtain just the container ID:
 ```
-$ docker ps --filter 'name=client.station.nomad.nomad-cluster.jumppad.dev'
+$ docker ps --filter 'name=client.station.nomad.nomad-cluster.local.jmpd.in'
 CONTAINER ID   IMAGE                     COMMAND                  CREATED         STATUS         PORTS     NAMES
-90f6f253fb58   shipyardrun/nomad:1.6.1   "/usr/bin/supervisor…"   3 minutes ago   Up 3 minutes             fee02e87.client.station.nomad.nomad-cluster.jumppad.dev
+90f6f253fb58   shipyardrun/nomad:1.6.1   "/usr/bin/supervisor…"   3 minutes ago   Up 3 minutes             8592d129.client.station.nomad.nomad-cluster.local.jmpd.in
 $
 ```
 You can login interactively to this container using ``sh``:
 ```
-$ CLIENT_CONTAINER_ID=$(docker ps -q --filter 'name=client.station.nomad.nomad-cluster.jumppad.dev')
+$ CLIENT_CONTAINER_ID=$(docker ps -q --filter 'name=client.station.nomad.nomad-cluster.local.jmpd.in')
-$ docker exec -it ${CLIENT_CONTAINER_ID} sh
+$ docker exec -it ${CLIENT_CONTAINER_ID} bash
 #
 ```
@@ -120,6 +120,22 @@ logger=settings t=2024-02-13T13:14:47.767713444Z level=info msg="Config loaded f
 This allows you to use the regular docker commands like ``attach``, ``logs``, and ``restart``. Note that any interactive use requires ``-it`` for in the top lin ``docker exec -it "${CLIENT_CONTAINER_ID}"``.
+To debug a device server you could use `/sbin/run_integration_test.sh --interactive` combined with:
+```shell
+docker exec -it "${CLIENT_CONTAINER_ID}" docker attach device-xxx-xxx
+```
+Followed by manually starting the integration tests: `tox -e integration`.
+If your environment is already setup and you do not which to recreate it you could start the integration test
+container manually
+```shell
+docker run --rm  -e "TANGO_HOST=$TANGO_HOST" -e "DEBUG_HOST=${local_ip}" --network="station" --dns="$DNS" -it
+  -v "$LOFAR20_DIR":/opt/lofar/tango:rw -w="/opt/lofar/tango/tangostationcontrol" -e "TEST_MODULE=${test_module}" \
+  "git.astron.nl:5000/lofar2.0/tango/ci-build-runner:$TAG" tox -e integration
+```
 ## Patching a device server live
 Sometimes it is handy to modify the tangostationcontrol source code for a running device server. To do so:
@@ -147,6 +163,20 @@ b75f633c837e   shipyardrun/nomad:1.6.1   "/usr/bin/supervisor…"   2 minutes ag
 $
 ```
+## Using jummpad: live changes and adaptations
+When using the `run_integration_test.sh` the entire setup can be performed and left for further adaptation. This
+allows, for instance to step-by-step debug device servers during an integration test!
+Steps:
+1. Setup the environment: `./sbin/run_integration_test.sh --skip-tests`
+2. Identify the test volume: `docker volume ls | grep "test_"`, use this in the run command `export $TEST_VOLUME=xxx`
+3. Change the `/infra/dev/` files as required
+4. Run `DOCKER_HOST="unix:///var/run/docker.sock" ./.bin/jumppad up --var="host_volume=${TEST_VOLUME}" --var="lofar20_dir=$(pwd)" --var="image_tag=latest" infra/dev/all.hcl`
+Example, one could change the entrypoint in `device-server.nomad` for a specific device such that you can start the
+device manually and debug it during a run of integration test.
 ## Using nomad: Manage jobs on the client
 The server allows you to manage the jobs on the client through Nomad. Each *job* consists of one or more *tasks* that are collectively managed. The tasks are (typically) the docker containers. A job is run inside an *allocation*, which represents an execution instance of a job.

--- a/infra/dev/all.hcl
+++ b/infra/dev/all.hcl
@@ -8,6 +8,10 @@ variable "image_tag" {
  default = "latest"
 }
+variable "debug_host" {
+  default = ""
+}
 module "nomad" {
  source    = "./nomad"
  variables = {
@@ -22,6 +26,7 @@ module "tango" {
    nomad_cluster = module.nomad.output.nomad_cluster
    lofar20_dir = variable.lofar20_dir
    image_tag = variable.image_tag
+    debug_host = variable.debug_host
  }
 }

--- a/infra/dev/tango.hcl
+++ b/infra/dev/tango.hcl
@@ -8,6 +8,10 @@ variable "image_tag" {
  default = "latest"
 }
+variable "debug_host" {
+  default = ""
+}
 module "nomad" {
  source    = "./nomad"
  variables = {
@@ -22,5 +26,6 @@ module "tango" {
    nomad_cluster = module.nomad.output.nomad_cluster
    lofar20_dir = variable.lofar20_dir
    image_tag = variable.image_tag
+    debug_host = variable.debug_host
  }
 }
--- a/infra/dev/tango/variables.hcl
+++ b/infra/dev/tango/variables.hcl
@@ -9,3 +9,7 @@ variable "image_tag" {
 variable "lofar20_dir" {
  default = ""
 }
+variable "debug_host" {
+  default = ""
+}
--- a/infra/jobs/station/Makefile
+++ b/infra/jobs/station/Makefile
@@ -12,4 +12,4 @@ render: $(JOBS)
 # parse .levant.nomad files into .nomad files for jumppad
 %.nomad: %.levant.nomad
-	levant render $(addprefix -var-file=, $(realpath $(ENV))) -var image_tag="$(TAG)" -var station="$(STATION)" -out=$(realpath $(DIR_OUT))/$@ $(realpath $(DIR_SRC))/$<
+	levant render $(addprefix -var-file=, $(realpath $(ENV))) -var debug_host="$(LOCAL_IP)" -var image_tag="$(TAG)" -var station="$(STATION)" -out=$(realpath $(DIR_OUT))/$@ $(realpath $(DIR_SRC))/$<
--- a/infra/jobs/station/device-server.levant.nomad
+++ b/infra/jobs/station/device-server.levant.nomad
@@ -116,6 +116,9 @@ job "device-servers" {
                TANGO_ZMQ_HEARTBEAT_PORT = "4506"
                MINIO_ROOT_USER          = "minioadmin"
                MINIO_ROOT_PASSWORD      = "minioadmin"
+[[ if eq $.station "dev" ]]
+                DEBUG_HOST               = "[[ $.debug_host ]]"
+[[ end ]]
            }
            resources {

--- a/sbin/run_integration_test.sh
+++ b/sbin/run_integration_test.sh
@@ -5,6 +5,8 @@
 #
 export DNS=192.168.76.1
+LOCAL_IP=$(ip route get 195.169.155.206 | head -1 | cut -d' ' -f7)
+export LOCAL_IP
 export do_cleanup=true
 # Usage function explains how parameters are parsed
@@ -213,10 +215,9 @@ function integration_test {
  if [ -n "${interactive+x}" ]; then
    echo "Preparing interactive session ..."
-    local_ip=$(ip route get 195.169.155.206 | head -1 | cut -d' ' -f7)
+    echo "Using local ip: ${LOCAL_IP}"
-    echo "Using local ip: ${local_ip}"
    export docker_args=(
-      run --rm  -e "TANGO_HOST=$TANGO_HOST" -e "DEBUG_HOST=${local_ip}" --network="station" --dns="$DNS" -it
+      run --rm  -e "TANGO_HOST=$TANGO_HOST" -e "DEBUG_HOST=${LOCAL_IP}" --network="station" --dns="$DNS" -it
      -v "$LOFAR20_DIR":/opt/lofar/tango:rw
      -w="/opt/lofar/tango/tangostationcontrol"
    )
@@ -280,10 +281,11 @@ dsconfig_image="git.astron.nl:5000/lofar2.0/tango/dsconfig:$TAG"
 docker pull -q "$dsconfig_image" || docker pull -q "git.astron.nl:5000/lofar2.0/tango/dsconfig:latest" || true
 docker image inspect "$dsconfig_image" > /dev/null || docker tag "git.astron.nl:5000/lofar2.0/tango/dsconfig:latest" "$dsconfig_image"
-jumppad_options=(
+jumppad_options=(  # these don't seem to propagate
  --var="host_volume=$tmp_volume"
  --var="lofar20_dir=$LOFAR20_DIR"
  --var="image_tag=$TAG"
+  --var="debug_host=$LOCAL_IP"
 )
 echo "Start module: $module"

--- a/tangostationcontrol/VERSION
+++ b/tangostationcontrol/VERSION
-0.40.1
+0.40.2
--- a/tangostationcontrol/debug-requirements.txt
+++ b/tangostationcontrol/debug-requirements.txt
+pydevd-pycharm~=241.14494.241
--- a/tangostationcontrol/requirements.txt
+++ b/tangostationcontrol/requirements.txt
@@ -26,7 +26,6 @@ logfmter # MIT
 psutil >= 5.4.6 # BSD3
 grpcio # Apache 2
 grpcio-tools # Apache 2
 parse # MIT
 mergedeep # MIT
 getmac # MIT
--- a/tangostationcontrol/setup.cfg
+++ b/tangostationcontrol/setup.cfg
@@ -26,6 +26,9 @@ packages = find:
 python_requires = >3.10
 install_requires = file: requirements.txt
+[options.extras_require]
+debug = file: debug-requirements.txt
 [options.packages.find]
 where = .

--- a/tangostationcontrol/tangostationcontrol/configuration/schemas/__init__.py
+++ b/tangostationcontrol/tangostationcontrol/configuration/schemas/__init__.py
--- a/tangostationcontrol/tangostationcontrol/devices/base_device_classes/lofar_device.py
+++ b/tangostationcontrol/tangostationcontrol/devices/base_device_classes/lofar_device.py
@@ -30,9 +30,10 @@ from tango import (
    DevFailed,
    DevState,
 )
+from tango._tango import AttrWriteType
 # PyTango imports
-from tango.server import attribute, command, Device, device_property
+from tango.server import attribute, command, class_property, Device, device_property
 # Additional import
 from tangostationcontrol import __version__ as version
@@ -263,10 +264,17 @@ class LOFARDevice(Device):
    # -----------------
    # Device Properties
    # -----------------
    Power_Children = device_property(dtype="DevVarStringArray", mandatory=False)
    Control_Children = device_property(dtype="DevVarStringArray", mandatory=False)
+    Debug = class_property(
+        dtype=bool,
+        default_value=True,
+        doc="The initial debug state when the device is started",
+    )
    # ----------
    # Attributes
    # ----------
@@ -282,6 +290,19 @@ class LOFARDevice(Device):
        fget=lambda self: numpy.int64(time.time() - self.device_start_time),
    )
+    @attribute(
+        access=AttrWriteType.READ_WRITE,
+        dtype=bool,
+        doc="Runtime configuration to enable / disable debug functionality",
+    )
+    def Debug_RW(self):
+        return self._debug
+    @Debug_RW.write
+    def Debug_RW(self, value):
+        self._debug = value
+        self._set_log_level(self._debug)
    access_count_R = attribute(
        doc="How often this software device was accessed for commands or attributes",
        dtype=numpy.int64,
@@ -497,6 +518,8 @@ class LOFARDevice(Device):
        return self.attribute_poller.is_registered(attr_name)
    def __init__(self, cl, name):
+        self._debug = False
        # a proxy to ourself. can only be constructed in or after init_device
        # is called, during super().__init__().
        self.proxy = None
@@ -521,6 +544,9 @@ class LOFARDevice(Device):
    def _init_device(self):
        logger.debug("[LOFARDevice] init_device")
+        self._debug = self.Debug
+        self._set_log_level(self._debug)
        self.properties_changed()
        self.set_state(DevState.OFF)
@@ -535,6 +561,12 @@ class LOFARDevice(Device):
        self.proxy = create_device_proxy(self.get_name(), self.Device_Proxy_Timeout)
+    def _set_log_level(self, debug: bool):
+        if debug:
+            logger.setLevel(logging.DEBUG)
+        else:
+            logger.setLevel(logging.INFO)
    @command(dtype_out=str)
    def get_children(self):
        return pprint.pformat(self.control.children(-1))
@@ -888,6 +920,23 @@ class LOFARDevice(Device):
        restart_python()
        logger.error("Failed to restart Device Server")
+    @command()
+    def ds_debug_pycharm(self):
+        """Activate connection to pycharm debugger"""
+        import pydevd_pycharm
+        if os.environ.get("DEBUG_HOST") and os.environ.get("DEBUG_HOST") != "":
+            pydevd_pycharm.settrace(
+                os.environ["DEBUG_HOST"],
+                port=12345,
+                stdoutToServer=True,
+                stderrToServer=True,
+            )
+        else:
+            raise RuntimeError(
+                "Failed no ip to connect, need DEBUG_HOST environment variables"
+            )
    @only_in_states([DevState.OFF])
    def _boot(self):
        # setup connections