Skip to content
Snippets Groups Projects
Commit 182106d4 authored by Mattia Mancini's avatar Mattia Mancini
Browse files

implemented scheduling workflow

parent e82bd3ed
No related branches found
No related tags found
No related merge requests found
from airflow.plugins_manager import AirflowPlugin from airflow.plugins_manager import AirflowPlugin
from airflow.executors.base_executor import BaseExecutor from airflow.executors.base_executor import BaseExecutor
from airflow.utils.state import State from airflow.utils.state import State
from slurm_cli.slurm_control import get_jobs_status from slurm_cli.slurm_control import get_jobs_status, run_job
import subprocess import logging
import uuid
logger = logging.getLogger(__name__)
def reindex_job_status_by_job_name(job_list):
return {job_status.job_name: job_status for job_status in job_list.values()}
# Will show up under airflow.executors.slurm.SlurmExecutor # Will show up under airflow.executors.slurm.SlurmExecutor
class SlurmExecutor(BaseExecutor): class SlurmExecutor(BaseExecutor):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.commands_to_run = [] self.commands_to_check = {}
def execute_async(self, key, command, queue=None, executor_config=None): def execute_async(self, key, command, queue=None, executor_config=None):
print("execute async called") print("execute async called")
self.commands_to_run.append((key, command,)) unique_id = str(key[0]) + str(uuid.uuid1())
queue = queue if queue != 'default' else None
logging.debug('submitting job %s on queue %s', key, queue)
run_job(cmd=command, queue=queue, task_name=unique_id)
self.commands_to_check[unique_id] = key
def trigger_tasks(self, open_slots): def check_state(self):
print('trigger tasks called', open_slots) ids = list(self.commands_to_check.keys())
super().trigger_tasks(open_slots) statuses = reindex_job_status_by_job_name(get_jobs_status(job_name=ids))
logger.debug('statuses found are %s', statuses)
logger.debug('commands to check are %s', self.commands_to_check)
def sync(self): completed_jobs = []
for key, command in self.commands_to_run: for unique_id, key in self.commands_to_check.items():
self.log.info("Executing command with key %s: %s", key, command) status = statuses[unique_id]
if status.status_code == 'CD':
try:
subprocess.check_call(command, close_fds=True)
self.change_state(key, State.SUCCESS) self.change_state(key, State.SUCCESS)
except subprocess.CalledProcessError as e: completed_jobs.append(unique_id)
elif status.status_code == 'F':
self.change_state(key, State.FAILED) self.change_state(key, State.FAILED)
self.log.error("Failed to execute task %s.", str(e)) completed_jobs.append(unique_id)
elif status.status_code in ('CG', 'R'):
self.change_state(key, State.RUNNING)
elif status.status_code == 'PD':
self.change_state(key, State.SCHEDULED)
for unique_id in completed_jobs:
if unique_id in self.commands_to_check:
self.commands_to_check.pop(unique_id)
else:
logger.error('id %s missing in %s', unique_id, self.commands_to_check)
def trigger_tasks(self, open_slots):
self.check_state()
super().trigger_tasks(open_slots)
self.commands_to_run = [] def sync(self):
pass
def end(self): def end(self):
self.heartbeat() self.heartbeat()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment