Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
LOFAR
Manage
Activity
Members
Labels
Plan
Issues
Wiki
Jira issues
Open Jira
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Code review analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
RadioObservatory
LOFAR
Commits
e7a9fa2d
Commit
e7a9fa2d
authored
8 years ago
by
Jan David Mol
Browse files
Options
Downloads
Patches
Plain Diff
Task #8887: Fixes for docker/slurm command line, and some minor tweaking
parent
b2e1c38c
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
Docker/docker-template
+2
-1
2 additions, 1 deletion
Docker/docker-template
MAC/Services/src/PipelineControl.py
+19
-8
19 additions, 8 deletions
MAC/Services/src/PipelineControl.py
with
21 additions
and
9 deletions
Docker/docker-template
+
2
−
1
View file @
e7a9fa2d
...
...
@@ -44,7 +44,8 @@ while getopts "hv:" opt; do
done
# Make sure we obtain info about the project source!
VERSION_INFO
=
`
$VERSION_DOCKER
`
# Drop stderr to prevent logger output from contaminating our output
VERSION_INFO
=
`
$VERSION_DOCKER
2>/dev/null
`
# Extract branch name w.r.t. repository root, e.g. branches/LOFAR-Task1234
export
LOFAR_BRANCH_NAME
=
`
echo
"
$VERSION_INFO
"
| perl
-ne
'print "$1" if /branch += +(.+)/;'
`
...
...
This diff is collapsed.
Click to expand it.
MAC/Services/src/PipelineControl.py
+
19
−
8
View file @
e7a9fa2d
...
...
@@ -77,7 +77,7 @@ import logging
logger
=
logging
.
getLogger
(
__name__
)
def
runCommand
(
cmdline
,
input
=
None
):
logger
.
info
(
"
R
un
n
ing
'
%s
'
"
,
cmdline
)
logger
.
info
(
"
r
un
Command start
ing
:
%s
"
,
cmdline
)
# Start command
proc
=
subprocess
.
Popen
(
...
...
@@ -90,8 +90,9 @@ def runCommand(cmdline, input=None):
)
# Feed input and wait for termination
logger
.
info
(
"
runCommand input: %s
"
,
input
)
stdout
,
_
=
proc
.
communicate
(
input
)
logger
.
debug
(
stdout
)
logger
.
info
(
"
runCommand output: %s
"
,
stdout
)
# Check exit status, bail on error
if
proc
.
returncode
!=
0
:
...
...
@@ -124,6 +125,10 @@ class Parset(dict):
def
processingCluster
(
self
):
return
self
[
PARSET_PREFIX
+
"
Observation.Cluster.ProcessingCluster.clusterName
"
]
or
"
CEP2
"
@staticmethod
def
dockerRepository
():
return
"
nexus.cep4.control.lofar:18080
"
@staticmethod
def
defaultDockerImage
():
return
runCommand
(
"
docker-template
"
,
"
lofar-pipeline:${LOFAR_TAG}
"
)
...
...
@@ -146,15 +151,19 @@ class Slurm(object):
# TODO: Derive SLURM partition name
self
.
partition
=
"
cpu
"
def
_runCommand
(
self
,
cmdline
):
def
_runCommand
(
self
,
cmdline
,
input
=
None
):
cmdline
=
"
ssh %s %s
"
%
(
self
.
headnode
,
cmdline
)
runCommand
(
cmdline
)
return
runCommand
(
cmdline
,
input
)
def
submit
(
self
,
jobName
,
cmdline
,
sbatch_params
=
None
):
if
sbatch_params
is
None
:
sbatch_params
=
[]
stdout
=
self
.
_runCommand
(
"
sbatch --partition=%s --job-name=%s %s bash -c
'
%s
'"
%
(
self
.
partition
,
jobName
,
"
"
.
join
(
sbatch_params
),
cmdline
))
script
=
"""
#!/bin/bash
{cmdline}
"""
.
format
(
cmdline
=
cmdline
)
stdout
=
self
.
_runCommand
(
"
sbatch --partition=%s --job-name=%s %s
"
%
(
self
.
partition
,
jobName
,
"
"
.
join
(
sbatch_params
)),
script
)
# Returns "Submitted batch job 3" -- extract ID
match
=
re
.
search
(
"
Submitted batch job (\d+)
"
,
stdout
)
...
...
@@ -327,7 +336,7 @@ class PipelineControl(OTDBBusListener):
"
--time=31-0
"
,
# TODO: Compute nr nodes
"
--nodes=
50
"
,
"
--nodes=
24
"
,
# Define better places to write the output
os
.
path
.
expandvars
(
"
--error=/data/log/runPipeline-%s.stderr
"
%
(
otdbId
,)),
...
...
@@ -385,11 +394,13 @@ class PipelineControl(OTDBBusListener):
),
sbatch_params
=
[
"
--cpus-per
=
task=1
"
,
"
--ntasks=1
"
"
--cpus-per
-
task=1
"
,
"
--ntasks=1
"
,
"
--dependency=afternotok:%s
"
%
slurm_job_id
,
"
--kill-on-invalid-dep=yes
"
,
"
--requeue
"
,
"
--error=/data/log/pipelineAborted-%s.stderr
"
%
(
otdbId
,),
"
--output=/data/log/pipelineAborted-%s.log
"
%
(
otdbId
,),
]
)
logger
.
info
(
"
Scheduled SLURM job %s
"
%
(
slurm_cancel_job_id
,))
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment