Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
SlurmExecutorPlugin
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
EOSC
SlurmExecutorPlugin
Commits
182106d4
Commit
182106d4
authored
5 years ago
by
Mattia Mancini
Browse files
Options
Downloads
Patches
Plain Diff
implemented scheduling workflow
parent
e82bd3ed
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
lib/slurm_executor/slurm.py
+44
-16
44 additions, 16 deletions
lib/slurm_executor/slurm.py
with
44 additions
and
16 deletions
lib/slurm_executor/slurm.py
+
44
−
16
View file @
182106d4
from
airflow.plugins_manager
import
AirflowPlugin
from
airflow.plugins_manager
import
AirflowPlugin
from
airflow.executors.base_executor
import
BaseExecutor
from
airflow.executors.base_executor
import
BaseExecutor
from
airflow.utils.state
import
State
from
airflow.utils.state
import
State
from
slurm_cli.slurm_control
import
get_jobs_status
from
slurm_cli.slurm_control
import
get_jobs_status
,
run_job
import
subprocess
import
logging
import
uuid
logger
=
logging
.
getLogger
(
__name__
)
def
reindex_job_status_by_job_name
(
job_list
):
return
{
job_status
.
job_name
:
job_status
for
job_status
in
job_list
.
values
()}
# Will show up under airflow.executors.slurm.SlurmExecutor
# Will show up under airflow.executors.slurm.SlurmExecutor
class
SlurmExecutor
(
BaseExecutor
):
class
SlurmExecutor
(
BaseExecutor
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
()
self
.
commands_to_
run
=
[]
self
.
commands_to_
check
=
{}
def
execute_async
(
self
,
key
,
command
,
queue
=
None
,
executor_config
=
None
):
def
execute_async
(
self
,
key
,
command
,
queue
=
None
,
executor_config
=
None
):
print
(
"
execute async called
"
)
print
(
"
execute async called
"
)
self
.
commands_to_run
.
append
((
key
,
command
,))
unique_id
=
str
(
key
[
0
])
+
str
(
uuid
.
uuid1
())
queue
=
queue
if
queue
!=
'
default
'
else
None
logging
.
debug
(
'
submitting job %s on queue %s
'
,
key
,
queue
)
run_job
(
cmd
=
command
,
queue
=
queue
,
task_name
=
unique_id
)
self
.
commands_to_check
[
unique_id
]
=
key
def
trigger_tasks
(
self
,
open_slots
):
def
check_state
(
self
):
print
(
'
trigger tasks called
'
,
open_slots
)
ids
=
list
(
self
.
commands_to_check
.
keys
())
super
().
trigger_tasks
(
open_slots
)
statuses
=
reindex_job_status_by_job_name
(
get_jobs_status
(
job_name
=
ids
))
logger
.
debug
(
'
statuses found are %s
'
,
statuses
)
logger
.
debug
(
'
commands to check are %s
'
,
self
.
commands_to_check
)
def
sync
(
self
):
completed_jobs
=
[]
for
key
,
command
in
self
.
commands_to_run
:
for
unique_id
,
key
in
self
.
commands_to_check
.
items
():
self
.
log
.
info
(
"
Executing command with key %s: %s
"
,
key
,
command
)
status
=
statuses
[
unique_id
]
if
status
.
status_code
==
'
CD
'
:
try
:
subprocess
.
check_call
(
command
,
close_fds
=
True
)
self
.
change_state
(
key
,
State
.
SUCCESS
)
self
.
change_state
(
key
,
State
.
SUCCESS
)
except
subprocess
.
CalledProcessError
as
e
:
completed_jobs
.
append
(
unique_id
)
elif
status
.
status_code
==
'
F
'
:
self
.
change_state
(
key
,
State
.
FAILED
)
self
.
change_state
(
key
,
State
.
FAILED
)
self
.
log
.
error
(
"
Failed to execute task %s.
"
,
str
(
e
))
completed_jobs
.
append
(
unique_id
)
elif
status
.
status_code
in
(
'
CG
'
,
'
R
'
):
self
.
change_state
(
key
,
State
.
RUNNING
)
elif
status
.
status_code
==
'
PD
'
:
self
.
change_state
(
key
,
State
.
SCHEDULED
)
for
unique_id
in
completed_jobs
:
if
unique_id
in
self
.
commands_to_check
:
self
.
commands_to_check
.
pop
(
unique_id
)
else
:
logger
.
error
(
'
id %s missing in %s
'
,
unique_id
,
self
.
commands_to_check
)
def
trigger_tasks
(
self
,
open_slots
):
self
.
check_state
()
super
().
trigger_tasks
(
open_slots
)
self
.
commands_to_run
=
[]
def
sync
(
self
):
pass
def
end
(
self
):
def
end
(
self
):
self
.
heartbeat
()
self
.
heartbeat
()
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment