diff --git a/.gitignore b/.gitignore index 7a1698a101695f2f6944a0683629d865a840ea72..5fd81068c38fba7c11e68a45415b78407b6d9e04 100644 --- a/.gitignore +++ b/.gitignore @@ -94,6 +94,9 @@ ipython_config.py # install all needed dependencies. #Pipfile.lock +# IDE +.vscode + # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ @@ -120,6 +123,9 @@ venv.bak/ # Rope project settings .ropeproject +# Pycharm settings +.idea/ + # mkdocs documentation /site diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..20d42b0ba4003eaf414f70c8eb8e0ee10d2e70ff --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "modules/django-uws"] + path = modules/django-uws + url = git@git.astron.nl:astron-sdc/django-uws.git diff --git a/README.md b/README.md index 1f37d4a8a16f9cee4caacba5ed4e57f21ea8c045..4790220e1193ad09c4ae0a9870901d8747981856 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,15 @@ ESAP API Gateway is a 'backend' web application written in Django. It provides a range of services that can be accessed through a REST API. -## Documentation +## Documentation * backend (API Gateway): https://git.astron.nl/astron-sdc/esap-api-gateway/-/wikis/ESAP-API-gateway-Overview * frontend (GUI): https://git.astron.nl/astron-sdc/esap-gui/-/wikis/ESAP-GUI + +## Async ESAP + +For development with Async ESAP, take a look at the [wiki](https://git.astron.nl/astron-sdc/esap-api-gateway/-/wikis/WIP/Asynchronous-ESAP). + +## Contributing + +For developer access to this repository, please send a message on the [ESAP channel on Rocket Chat](https://chat.escape2020.de/channel/esap). diff --git a/compose/docker-compose.async-dev.yml b/compose/docker-compose.async-dev.yml new file mode 100644 index 0000000000000000000000000000000000000000..b8a25f86e065bb729e99df50a1ef8570a2bc8922 --- /dev/null +++ b/compose/docker-compose.async-dev.yml @@ -0,0 +1,18 @@ +services: + rabbitmq: + image: rabbitmq:3.9-alpine + ports: + - "5672:5672" + postgres: + image: postgres:14-alpine + ports: + - "5432:5432" + environment: + POSTGRES_PASSWORD: "secret" + POSTGRES_USER: "postgres" + POSTGRES_DB: "uws_jobs" + volumes: + - db:/var/lib/postgresql/data + +volumes: + db: diff --git a/esap/Dockerfile b/esap/Dockerfile index 8f486f9c867cf1035b378153545b27c2f6faebfd..b046d7242c7073e1dce3cd7c2be7277ea5384d42 100644 --- a/esap/Dockerfile +++ b/esap/Dockerfile @@ -1,5 +1,5 @@ FROM python:3.10-slim -RUN apt-get update && apt-get install --no-install-recommends -y bash nano mc libmagic1 +RUN apt-get update && apt-get install --no-install-recommends -y bash nano mc libmagic1 git RUN mkdir /src WORKDIR /src diff --git a/esap/batch/__init__.py b/esap/batch/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/esap/batch/admin.py b/esap/batch/admin.py new file mode 100644 index 0000000000000000000000000000000000000000..8c38f3f3dad51e4585f3984282c2a4bec5349c1e --- /dev/null +++ b/esap/batch/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/esap/batch/api/__init__.py b/esap/batch/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/esap/batch/api/services/__init__.py b/esap/batch/api/services/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/esap/batch/api/urls.py b/esap/batch/api/urls.py new file mode 100644 index 0000000000000000000000000000000000000000..17c7838ecf84002c58ba9b1fdbc354fdcbcd3e22 --- /dev/null +++ b/esap/batch/api/urls.py @@ -0,0 +1,20 @@ +from django.urls import path +from django.contrib import admin +from rest_framework import routers + +from . import views + +#router = routers.DefaultRouter() +#router.register('batch', views.BatchViewSet, 'batch') +#urlpatterns = router.urls + +urlpatterns = [ + path('', views.IndexView.as_view(), name='index-view'), + # example: /esap-api/get-services?dataset=ivoa?keyword=ukidss + #path('facilities/search', views.SearchFacilities.as_view(), name='facility-search'), + #path('workflows/search', views.SearchWorkflows.as_view(), name='workflows-search'), + #path('deploy', views.Deploy.deploy, name='deploy') + +] + + diff --git a/esap/batch/api/views/__init__.py b/esap/batch/api/views/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..46ab74ea0cca7c5af6a5fed2c9d3bdf4633bc459 --- /dev/null +++ b/esap/batch/api/views/__init__.py @@ -0,0 +1 @@ +from .batch_views import * diff --git a/esap/batch/api/views/batch_views.py b/esap/batch/api/views/batch_views.py new file mode 100644 index 0000000000000000000000000000000000000000..2d7588beefb16180a8e44cb7f66d4bea7852b70f --- /dev/null +++ b/esap/batch/api/views/batch_views.py @@ -0,0 +1,39 @@ +import logging +from urllib.parse import quote_plus as quote_url +from rest_framework import generics, pagination +from rest_framework.response import Response +#from batch.api.services import batch_controller +from django.views.generic import ListView +from django_filters import rest_framework as filters +from rest_framework import viewsets, permissions +from batch.models import * +from rest_framework import generics +#from ..serializers import * +from django.shortcuts import redirect +from rest_framework.views import APIView +from rest_framework.response import Response + +logger = logging.getLogger(__name__) + + +class BatchViewSet(viewsets.ModelViewSet): + #serializer_class = BatchSerializer + permission_classes = [permissions.IsAuthenticated] + + def get_queryset(self): + return self.request.user.staging.all() + + #def perform_create(self, serializer): + #serializer.save(owner=self.request.user) + + +# example: /esap/batch/ +class IndexView(ListView): + queryset = Batch.objects.all() + #serializer_class = BatchSerializer + template_name = 'batch/index.html' + + # by default this returns the list in an object called object_list, so use 'object_list' in the html page. + # but if 'context_object_name' is defined, then this returned list is named and can be accessed that way in html. + context_object_name = 'my_batch' + diff --git a/esap/batch/apps.py b/esap/batch/apps.py new file mode 100644 index 0000000000000000000000000000000000000000..3e099fb8c66513a8328d4a27860335e35fac2533 --- /dev/null +++ b/esap/batch/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class BatchConfig(AppConfig): + name = 'batch' diff --git a/esap/batch/batch.py b/esap/batch/batch.py new file mode 100644 index 0000000000000000000000000000000000000000..9b3cc864f9a083d1db6710458c2fefa46f60502b --- /dev/null +++ b/esap/batch/batch.py @@ -0,0 +1,17 @@ + +# definition of the Batch Interface +batch_schema = { + "name": "batch", + "title": "Batch Analysis", + "type": "object", + "properties": { + "compute": { + "type": "string", + "title": "Compute Facility", + "default": "jhub_ska", + "enum": ["jhub_ska", "jhub_uedin", "spark_uedin"], + "enumNames": ["JHub SKA", "JHub Edinburgh", "Spark Cluster Edi"], + "uniqueItems": True, + }, + }, +} diff --git a/esap/batch/database_router.py b/esap/batch/database_router.py new file mode 100644 index 0000000000000000000000000000000000000000..f409320c1239bdb4344654ca798f39ad4761d1a1 --- /dev/null +++ b/esap/batch/database_router.py @@ -0,0 +1,41 @@ +class BatchRouter: + + route_app_labels = {'batch'} + + def db_for_read(self, model, **hints): + """ + Attempts to read batch models go to batch database. + """ + if model._meta.app_label in self.route_app_labels: + # return 'batch' + return 'default' + + def db_for_write(self, model, **hints): + """ + Writes always go to batch. + """ + #return 'batch' + return 'default' + + def allow_relation(self, obj1, obj2, **hints): + """ + Allow relations if a model in the batch apps is + involved. + """ + if ( + obj1._meta.app_label in self.route_app_labels or + obj2._meta.app_label in self.route_app_labels + ): + return True + return None + + def allow_migrate(self, db, app_label, model_name=None, **hints): + """ + Make sure the batch apps only appear in the + 'batch' database. + """ + if app_label in self.route_app_labels: + #return db == 'batch' + return db == 'default' + + return None diff --git a/esap/batch/migrations/0001_initial.py b/esap/batch/migrations/0001_initial.py new file mode 100644 index 0000000000000000000000000000000000000000..1f8068efe0fa1ab8605e165038c741abcf450a54 --- /dev/null +++ b/esap/batch/migrations/0001_initial.py @@ -0,0 +1,56 @@ +# Generated by Django 3.1.4 on 2022-04-20 10:42 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Batch', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('uri', models.CharField(max_length=40)), + ('status', models.CharField(max_length=40)), + ], + ), + migrations.CreateModel( + name='Facility', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=30)), + ('description', models.CharField(max_length=240)), + ('url', models.CharField(max_length=240)), + ('facilitytype', models.CharField(max_length=240)), + ], + ), + migrations.CreateModel( + name='Workflow', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=30)), + ('description', models.CharField(max_length=240)), + ('url', models.CharField(max_length=240)), + ('ref', models.CharField(default='HEAD', max_length=240)), + ('filepath', models.CharField(blank=True, max_length=240)), + ('workflowtype', models.CharField(max_length=240)), + ], + ), + migrations.CreateModel( + name='ShoppingCart', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('user', models.PositiveIntegerField()), + ('dataset', models.PositiveIntegerField()), + ('datatype', models.CharField(max_length=240)), + ('facility', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='batch.facility')), + ('workflow', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='batch.workflow')), + ], + ), + ] diff --git a/esap/batch/migrations/__init__.py b/esap/batch/migrations/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/esap/batch/models.py b/esap/batch/models.py new file mode 100644 index 0000000000000000000000000000000000000000..d67adfb5991995b3459757f9bd26bbaf78dd922c --- /dev/null +++ b/esap/batch/models.py @@ -0,0 +1,11 @@ +from django.db import models +from django.db.models import Q +import django_filters + + +class Batch(models.Model): + uri = models.CharField(max_length=40, null=False) + status = models.CharField(max_length=40, null=False) + + + diff --git a/esap/batch/templates/batch/base.html b/esap/batch/templates/batch/base.html new file mode 100644 index 0000000000000000000000000000000000000000..dfc59070f02d45bfddaac7b40a57885eb1ff4113 --- /dev/null +++ b/esap/batch/templates/batch/base.html @@ -0,0 +1,52 @@ +<!DOCTYPE html> +{% load static %} +<html lang="en"> +<head> + + <!-- Required meta tags --> + <meta charset="utf-8"> + <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> + + <title>{% block myBlockTitle %}ESAP API - Batch Analysis {% endblock %}</title> + + <!-- loads the path to static files --> + <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css"> + + <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script> + <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.3/umd/popper.min.js" integrity="sha384-ZMP7rVo3mIykV+2+9J3UJ46jBk0WLaUAdn689aCwoqbBJiSnjAK/l8WvCWPIPm49" crossorigin="anonymous"></script> + <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/js/bootstrap.min.js" integrity="sha384-ChfqqxuZUCnJSK3+MXmPNIyE6ZbWh2IMqE241rYiqJxyMiZ6OW/JmZQ5stwEULTy" crossorigin="anonymous"></script> + + <link rel="stylesheet" type="text/css" href="{% static 'query/style.css' %}"/> + <link href='https://fonts.googleapis.com/css?family=Raleway' rel='stylesheet' type='text/css'> + <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.4.2/css/all.css" integrity="sha384-/rXc/GQVaYpyDdyxK+ecHPVYJSN9bmVFBvjA/9eOB+pb3F2w2N6fc5qB9Ew5yIns" crossorigin="anonymous"> + + <link rel="icon" href="http://uilennest.net/static/esap_icon.jpg"> + + {% block extra_js %}{% endblock %} + +</head> +<body> + <nav class="navbar navbar-expand-lg navbar-light bg-light"> + <div class="container-fluid"> + + <!-- Header --> + <div class="navbar-header"> + <a class="navbar-brand mb-0 h1"> + <h2> + <img src="{% static 'query/esap_logo.png' %}" alt=""> + ESAP API Gateway - Batch Analysis + </h2> + </a> + </div> + + </div> + </nav> + + <!-- to add blocks of code --> + {% block myBlock %} + {% endblock %} + +</body> +</html> + + diff --git a/esap/batch/templates/batch/index.html b/esap/batch/templates/batch/index.html new file mode 100644 index 0000000000000000000000000000000000000000..4220cdd733604ecf11fb885f5a220d0cbe15b75c --- /dev/null +++ b/esap/batch/templates/batch/index.html @@ -0,0 +1,36 @@ +{% extends 'batch/base.html' %} +{% load static %} +{% block myBlock %} + +<div class="container-fluid details-container"> + <div class="row"> + <div class="col-sm-12 col-md-12 col-lg-12"> + <div class="panel panel-success"> + + <div class="panel-body"> + + <h4>API resources</h4> + <table class="table table-striped table-bordered table-sm"> + <thead> + <tr> + <th>Function</th> + <th>URL</th> + </tr> + </thead> + <tbody> + + </tbody> + </table> + + </div> + </div> + </div> + + </div> + +</div> + + +<p class="footer" small>version 12 aug 2021</p> + +{% endblock %} diff --git a/esap/batch/tests.py b/esap/batch/tests.py new file mode 100644 index 0000000000000000000000000000000000000000..7ce503c2dd97ba78597f6ff6e4393132753573f6 --- /dev/null +++ b/esap/batch/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/esap/batch/views.py b/esap/batch/views.py new file mode 100644 index 0000000000000000000000000000000000000000..91ea44a218fbd2f408430959283f0419c921093e --- /dev/null +++ b/esap/batch/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. diff --git a/esap/esap/celery.py b/esap/esap/celery.py new file mode 100644 index 0000000000000000000000000000000000000000..aa8940a3b6d18c45e104b1e3f9cb58171cef0de4 --- /dev/null +++ b/esap/esap/celery.py @@ -0,0 +1,12 @@ +import os + +from celery import Celery + +# Set the default Django settings module for the 'celery' program. +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tests.test_settings") + +app = Celery("test") + +app.config_from_object("django.conf:settings", namespace="CELERY") + +app.autodiscover_tasks() diff --git a/esap/esap/configuration/navbar_adex.py b/esap/esap/configuration/navbar_adex.py index b3a81284bee476f4b5c53fb568301b404361339d..d9ddacd7f439e142f50a58f020a8de15f053f25e 100644 --- a/esap/esap/configuration/navbar_adex.py +++ b/esap/esap/configuration/navbar_adex.py @@ -7,5 +7,6 @@ nav1 = {'title': 'Archives', 'route': '/archives'} nav2 = {'title': 'Query', 'route': '/adex-query'} nav3 = {'title': 'Rucio', 'route': '/rucio'} nav4 = {'title': 'Interactive Analysis', 'route': '/interactive'} +nav4 = {'title': 'Batch Analysis', 'route': '/batch'} nav5 = {'title': 'SAMP', 'route': '/samp'} navbar = [nav1, nav2] diff --git a/esap/esap/configuration/navbar_esap.py b/esap/esap/configuration/navbar_esap.py index bc221094a36bf1d97bd61f86471a4c32be63683b..aa30ef87d175d52afd3e65e20f323438548a8712 100644 --- a/esap/esap/configuration/navbar_esap.py +++ b/esap/esap/configuration/navbar_esap.py @@ -7,5 +7,6 @@ nav1 = {'title': 'Archives', 'route': '/archives'} nav2 = {'title': 'Query', 'route': '/vo-query'} nav3 = {'title': 'Rucio', 'route': '/rucio'} nav4 = {'title': 'Interactive Analysis', 'route': '/interactive'} -nav5 = {'title': 'SAMP', 'route': '/samp'} -navbar = [nav1, nav2, nav3, nav4, nav5] +nav5 = {'title': 'Batch Analysis', 'route': '/batch'} +nav6 = {'title': 'SAMP', 'route': '/samp'} +navbar = [nav1, nav2, nav3, nav4, nav5, nav6] diff --git a/esap/esap/esap_ida_config.yaml b/esap/esap/esap_ida_config.yaml index 6e1517aecda67739914de891b7acc59f090acadb..65f3c9cffb06d96ff64dfc6eaf01b6d4f0a21d33 100644 --- a/esap/esap/esap_ida_config.yaml +++ b/esap/esap/esap_ida_config.yaml @@ -19,6 +19,13 @@ description: SKAO BinderHub url: https://srcdev.skatelescope.org/binderhub/ facilitytype: jupyterhub +- model: ida.facility + pk: 4 + fields: + name: Rosetta @ INAF OATS + description: The Rosetta platform deployed at INAF OATS computing centre + url: https://esap-rosetta.oats.inaf.it/ + facilitytype: jupyterhub - model: ida.workflow pk: 1 fields: diff --git a/esap/esap/settings/async.py b/esap/esap/settings/async.py new file mode 100644 index 0000000000000000000000000000000000000000..6c2df6f33ad3faa125ea88126841e77bf5472f18 --- /dev/null +++ b/esap/esap/settings/async.py @@ -0,0 +1,83 @@ +""" +Django settings for ASYNC ESAP development. Extends the dev.py settings: + +- Adds the UWS Django App +- Use the async-urls instead of the default one +- Configures Database with a separate Postgres db for Jobs +- Celery Configuration +""" + +import os + +from esap.settings.dev import * + +DEBUG = bool(os.getenv("DEBUG", "False")) + +# Add the UWS APP +INSTALLED_APPS = [ + "query.apps.MyAppConfig", + "accounts", + "rucio", + "ida", + "knox", + "batch", + "uws", + "django.contrib.admin", + "django.contrib.auth", + "mozilla_django_oidc", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", + "rest_framework", + "corsheaders", + "django_filters", +] + +# Use the +ROOT_URLCONF = "esap.urls_async" + +# Database +DATABASE_ROUTERS = [ + "uws.database_router.UWSDatabaseRouter", + "query.database_router.QueryRouter", + "accounts.database_router.AccountsRouter", + "ida.database_router.IdaRouter", + 'batch.database_router.BatchRouter', + "rucio.database_router.RucioRouter", +] + +DATABASES = { + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": os.path.join(BASE_DIR, "esap_config.sqlite3"), + }, + "accounts": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": os.path.join(BASE_DIR, "esap_accounts_config.sqlite3"), + }, + "ida": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": os.path.join(BASE_DIR, "esap_ida_config.sqlite3"), + }, + "rucio": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": os.path.join(BASE_DIR, "esap_rucio_config.sqlite3"), + }, + 'batch': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': os.path.join(BASE_DIR, 'esap_batch_config.sqlite3'), + }, + "uws": { + "ENGINE": "django.db.backends.postgresql_psycopg2", + "NAME": "uws_jobs", + "USER": "postgres", + "PASSWORD": "secret", + "HOST": "localhost", + "PORT": "5432", + }, +} + +CELERY_TIMEZONE = "Europe/Amsterdam" +CELERY_USE_UTC = True +CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "amqp://guest@localhost:5672") diff --git a/esap/esap/settings/base.py b/esap/esap/settings/base.py index bc3495313af52c0d2e2ffdb7afde11e3a520577a..9c3611e11b90b3954e505b624ac29419e9079a1e 100644 --- a/esap/esap/settings/base.py +++ b/esap/esap/settings/base.py @@ -31,6 +31,7 @@ INSTALLED_APPS = [ 'accounts', 'rucio', 'ida', + 'batch', 'knox', 'django.contrib.admin', 'django.contrib.auth', diff --git a/esap/esap/settings/dev.py b/esap/esap/settings/dev.py index d7d47357f709c54a0f0dd0e9eb95baedde7e06bc..a4e221bb62337c99adecfba162729677edb8ae51 100644 --- a/esap/esap/settings/dev.py +++ b/esap/esap/settings/dev.py @@ -25,6 +25,7 @@ DATABASE_ROUTERS = [ 'query.database_router.QueryRouter', 'accounts.database_router.AccountsRouter', 'ida.database_router.IdaRouter', + 'batch.database_router.BatchRouter', 'rucio.database_router.RucioRouter', ] @@ -41,6 +42,10 @@ DATABASES = { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': os.path.join(BASE_DIR, 'esap_ida_config.sqlite3'), }, + 'batch': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': os.path.join(BASE_DIR, 'esap_batch_config.sqlite3'), + }, 'rucio': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': os.path.join(BASE_DIR, 'esap_rucio_config.sqlite3'), @@ -67,4 +72,4 @@ try: OIDC_RENEW_ID_TOKEN_EXPIRY_SECONDS = float(os.environ['OIDC_RENEW_ID_TOKEN_EXPIRY_SECONDS']) except: OIDC_RENEW_ID_TOKEN_EXPIRY_SECONDS = 36000 -#OIDC_EXEMPT_URLS = ['/esap-api/accounts/user-profiles'] \ No newline at end of file +#OIDC_EXEMPT_URLS = ['/esap-api/accounts/user-profiles'] diff --git a/esap/esap/settings/docker.py b/esap/esap/settings/docker.py index 6adbf83a43c228fe68ed8cae7186f4d77fdbdae9..bf00a0ea63f939c501a9b7a82accc77534aa19f1 100644 --- a/esap/esap/settings/docker.py +++ b/esap/esap/settings/docker.py @@ -26,6 +26,7 @@ DATABASE_ROUTERS = [ 'query.database_router.QueryRouter', 'accounts.database_router.AccountsRouter', 'ida.database_router.IdaRouter', + 'batch.database_router.BatchRouter', 'rucio.database_router.RucioRouter', ] @@ -42,6 +43,10 @@ DATABASES = { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': '/shared/esap_ida_config.sqlite3', }, + 'batch': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': '/shared/esap_batch_config.sqlite3', + }, 'rucio': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': '/shared/esap_rucio_config.sqlite3', @@ -56,4 +61,4 @@ STATIC_ROOT = os.path.join(BASE_DIR, 'static') LOGIN_REDIRECT_URL = os.environ['LOGIN_REDIRECT_URL'] LOGOUT_REDIRECT_URL = os.environ['LOGOUT_REDIRECT_URL'] -LOGIN_REDIRECT_URL_FAILURE = os.environ['LOGIN_REDIRECT_URL_FAILURE'] \ No newline at end of file +LOGIN_REDIRECT_URL_FAILURE = os.environ['LOGIN_REDIRECT_URL_FAILURE'] diff --git a/esap/esap/urls.py b/esap/esap/urls.py index 7c498e3dab84234f42228674f4649311a4cfa499..3c31c556370e68f76b3a269acea8895bd8a6f785 100644 --- a/esap/esap/urls.py +++ b/esap/esap/urls.py @@ -21,6 +21,7 @@ urlpatterns = [ path('esap-api/rucio/', include('rucio.api.urls')), path('esap-api/accounts/', include('accounts.api.urls')), path('esap-api/ida/', include('ida.api.urls')), + path('esap-api/batch/', include('batch.api.urls')), path('esap-api/admin/', admin.site.urls, name='admin-view'), re_path('^esap-api/oidc/', include('mozilla_django_oidc.urls')), ] diff --git a/esap/esap/urls_async.py b/esap/esap/urls_async.py new file mode 100644 index 0000000000000000000000000000000000000000..97c2b9f767d995a6e004ee8707958dbba3bfafdb --- /dev/null +++ b/esap/esap/urls_async.py @@ -0,0 +1,9 @@ +""" Extends the base URL patterns with the UWS endpoint """ + +from django.urls import include, path + +from esap.urls import urlpatterns as base_patterns + +base_patterns.append(path("esap-api/uws/", include("uws.urls"))) + +urlpatterns = base_patterns diff --git a/esap/ida/api/services/harvester.py b/esap/ida/api/services/harvester.py index 0d3841dd8cc9dbd56a1a844db88a3ceb4175e2b1..8afb75a3221a6dda701c38f10b41e1931ccd8b61 100644 --- a/esap/ida/api/services/harvester.py +++ b/esap/ida/api/services/harvester.py @@ -1,9 +1,6 @@ -from rest_framework import serializers -import requests -import json +import concurrent.futures import logging -import string -import urllib + from eossr.api import get_ossr_records logger = logging.getLogger(__name__) @@ -12,47 +9,84 @@ ZENODO_HOST = "https://zenodo.org/api/communities" ZENODO_AUTH_TOKEN = "AUTH_TOKEN" - class Harvester(object): """ The Harvester class used to collect entries for existing Worfklows / Notebooks from the OSSR """ # Initializer - def __init__(self, url = ZENODO_HOST): + def __init__(self, url=ZENODO_HOST): # We may end up using this when we switch to the sandbox version self.url = url @staticmethod - def get_data_from_zenodo(query=None, keyword=None): - """ use Zenodo REST API to query the OSSR""" + def get_data_from_zenodo(query=None, keyword=None, timeout=5.0): + """Use the Zenodo REST API to query the OSSR + + Parameters + ---------- + query : `str` + Unused. + keyword : `str` + Unused. + timeout : `float` + Give up if Zenodo doesn't return within timeout seconds. + + Notes + ----- + This method uses a thread pool to submit multiple queries to Zenodo at + once, and has a timeout on each one. This prevents it from blocking + indefinitely (or even for num_records * timeout seconds) if Zenodo is + unresponsive. + """ def _format_results(records): results = [] - for record in records: + def item_from_record(record, timeout): + item = {} try: - item = {} - codemeta = record.get_codemeta() + codemeta = record.get_codemeta(timeout=timeout) item["id"] = record.data["id"] - item["description"] = record.data["metadata"].get("description","") - item["name"] = record.data["metadata"].get("title","") + item["description"] = record.data["metadata"].get("description", "") + item["name"] = record.data["metadata"].get("title", "") item["workflow"] = "notebook" - item["url"] = codemeta.get("codeRepository","") - item["runtimePlatform"] = codemeta.get("runtimePlatform","") - item["keywords"] = ", ".join(codemeta.get("keywords",[])) - item["author"] = codemeta["author"][0].get("givenName","") + " " + codemeta["author"][0].get("familyName", "") + item["url"] = codemeta.get("codeRepository", "") + item["runtimePlatform"] = codemeta.get("runtimePlatform", "") + item["keywords"] = ", ".join(codemeta.get("keywords", [])) + item["author"] = ", ".join( + [ + " ".join( + author.get(fieldName, "") + for fieldName in ("givenName", "familyName") + ).strip() + for author in codemeta.get("author", []) + ] + ) item["ref"] = "HEAD" item["filepath"] = "" - except Exception as e: - item = {} logging.exception(e) - finally: - results.append(item) + return item + + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [ + executor.submit(item_from_record, record, timeout) + for record in records + ] + for future in concurrent.futures.as_completed(futures): + item = future.result() + if "url" in item and item["url"]: + results.append(item) + return results - keywords='jupyter-notebook' - # Keep for later when we implement a keyword search: escape_records = get_ossr_records(search=query, keywords=keywords) if query else get_ossr_records(keywords=keywords) + keywords = "jupyter-notebook" + # Keep for later when we implement a keyword search: + # escape_records = ( + # get_ossr_records(search=query, keywords=keywords) + # if query + # else get_ossr_records(keywords=keywords) + # ) escape_records = get_ossr_records(keywords=keywords) return _format_results(escape_records) diff --git a/esap/ida/api/services/ida_controller.py b/esap/ida/api/services/ida_controller.py index 3991f3ed3af4924e19a0b9c8db521c6bfcb9384d..96fb9169c31983b5a0dc58e2e6a8f8440f58c2e5 100644 --- a/esap/ida/api/services/ida_controller.py +++ b/esap/ida/api/services/ida_controller.py @@ -44,6 +44,10 @@ def search_workflows(keyword="", objectclass=""): db_workflows = serializers.serialize("python", Workflow.objects.all()) zenodo_workflows = Harvester.get_data_from_zenodo(query=keyword, keyword="jupyter-notebook") for db_entry in db_workflows: + if db_entry["fields"]["workflowtype"].lower() == "notebook": + db_entry["fields"]["keywords"] = "jupyter-notebook" + db_entry["fields"]["runtimePlatform"] = "" + db_entry["fields"]["author"] = "" response["results"].append(db_entry["fields"]) response["results"].extend(zenodo_workflows) return response @@ -56,8 +60,8 @@ def search(model, keyword="", objectclass=""): :param keyword: comma separated keywords :return: """ - - def apply_search(keyword, model, objectclass): + + def apply_search(keyword, model, objectclass): if objectclass.lower()=="workflow": results = model.objects.filter( Q(name__icontains=keyword) | Q(description__icontains=keyword) | Q(url__icontains=keyword) diff --git a/esap/ida/models.py b/esap/ida/models.py index 8b3ff9f69ff4877ea699466de5c5ccbf81d0e74a..d0f96a829bfe559f60911c1c08ad4c4759e20bfc 100644 --- a/esap/ida/models.py +++ b/esap/ida/models.py @@ -6,53 +6,39 @@ import django_filters class Ida(models.Model): uri = models.CharField(max_length=40, null=False) status = models.CharField(max_length=40, null=False) - - -""" -Facility -""" -class Facility(models.Model): - # fields + +class Facility(models.Model): name = models.CharField(max_length=30) description = models.CharField(max_length=240) url = models.CharField(max_length=240) facilitytype = models.CharField(max_length=240) - def __str__(self): return str(self.name) - -""" -Workflow -""" -class Workflow(models.Model): + class Meta: + ordering = ["facilitytype", "name"] - # fields + +class Workflow(models.Model): name = models.CharField(max_length=30) description = models.CharField(max_length=240) url = models.CharField(max_length=240) ref = models.CharField(max_length=240, default="HEAD") filepath = models.CharField(max_length=240, blank=True) workflowtype = models.CharField(max_length=240) - + def __str__(self): return str(self.name) -""" -ShoppingCart -""" class ShoppingCart(models.Model): - - # fields user = models.PositiveIntegerField() - workflow = models.ForeignKey(Workflow, models.CASCADE) - facility = models.ForeignKey(Facility, models.CASCADE) + workflow = models.ForeignKey(Workflow, models.CASCADE) + facility = models.ForeignKey(Facility, models.CASCADE) dataset = models.PositiveIntegerField() - datatype = models.CharField(max_length=240) + datatype = models.CharField(max_length=240) def __str__(self): return str(self.name) - diff --git a/esap/query/api/services/zenodo.py b/esap/query/api/services/zenodo.py index 5551d4e99e27fe4cc0a82a2b4ccc9f5bf26c70f2..30df3368596f6c04a67fdb17624bb832c8610f71 100644 --- a/esap/query/api/services/zenodo.py +++ b/esap/query/api/services/zenodo.py @@ -1,74 +1,52 @@ -""" - File name: zenodo.py - Date created: 2021-05-16 - Description: Zenodo Service Connector for ESAP. -""" -#from eossr.api import get_ossr_records +"""Zenodo Service Connector for ESAP.""" + from eossr.api import get_zenodo_records from rest_framework import serializers from .query_base import query_base -import requests -import json import logging -import string logger = logging.getLogger(__name__) -AMP_REPLACEMENT = "_and_" - -# -------------------------------------------------------------------------------------------------------------------- - class zenodo_connector(query_base): - """ - The connector to access the data lake through ZENODO - """ - - # Initializer - def __init__(self, url): - self.url = url + """A connector to query Zenodo archives""" # construct a query for the ZENODO REST API - def construct_query( - self, dataset, esap_query_params, translation_parameters - ): + def construct_query(self, dataset, esap_query_params, translation_parameters): - query = {'size': '1000'} + query = {"size": "1000"} where = {} error = {} - query['communities'] = str.lower(esap_query_params.pop('community')[0]) + query["communities"] = esap_query_params.pop("community")[0] - if 'keyword' in esap_query_params.keys(): - query['keywords'] = str(esap_query_params.pop('keyword')[0]) + if "keyword" in esap_query_params.keys(): + query["keywords"] = str(esap_query_params.pop("keyword")[0]) - desired_value = 'undefined' + desired_value = "undefined" for key, value in query.items(): - if value == desired_value: - del query[key] - break + if value == desired_value: + del query[key] + break return query, where, error def _get_data_from_zenodo(self, query, session): - """ use Zenodo REST API to query the data lake """ + """use Zenodo REST API to query the data lake""" results = [] response = [] if query != "empty": try: - response = get_zenodo_records(**query) + response = get_zenodo_records(**query) except: - logger.info("No Results Found in Zenodo Archive Search") + logger.info("No Results Found in Zenodo Archive Search") else: - logger.info("Empty search in Zenodo Archive Search") + logger.info("Empty search in Zenodo Archive Search") if len(response) > 0: - results = [ - element.data - for element in response - ] + results = [element.data for element in response] return results @@ -77,7 +55,7 @@ class zenodo_connector(query_base): dataset, dataset_name, query, - session, + session, override_access_url=None, override_service_type=None, ): @@ -87,43 +65,11 @@ class zenodo_connector(query_base): :return: results: an array of dicts with the following structure; """ - # create a function that reads the data from lofar zenodo_results = self._get_data_from_zenodo(query, session) - - ##logger.info("RESULTS: " + str(zenodo_results)) + logger.debug("RESULTS: " + str(zenodo_results)) return zenodo_results - # custom serializer for the 'query' endpoint - - class TypeToSerializerMap: - - map = { - type(float): serializers.FloatField(), - type(int): serializers.IntegerField(), - type(str): serializers.CharField(), - type(dict): serializers.DictField(), - type(list): serializers.ListField(), - } - - @classmethod - def getFieldForType(cls, value): - return cls.map.get(type(value), serializers.JSONField()) - class CreateAndRunQuerySerializer(serializers.Serializer): - """ - Custom serializer classes implement dynamic field definition based on - the contents of the query passed to it. - """ - - def __init__(self, *args, **kwargs): - - self.example_result = kwargs.get("instance", [])[0] - - super().__init__(*args, **kwargs) - - self.fields.update( - { - key: zenodo_connector.TypeToSerializerMap.getFieldForType(value) - for key, value in self.example_result.items() - } - ) + links = serializers.DictField() + metadata = serializers.DictField() + doi = serializers.CharField() diff --git a/esap/query/database_router.py b/esap/query/database_router.py index cfeded5afa32998f33e481e0eef1bfd3961eac06..608928cc94d5a40d3e85578d27316f7be5b3e89a 100644 --- a/esap/query/database_router.py +++ b/esap/query/database_router.py @@ -1,7 +1,7 @@ class QueryRouter: route_app_labels = {'query', 'auth', 'contenttypes', 'sessions', 'admin'} - custom_router_app_labels = {'ida', 'rucio', 'accounts'} + custom_router_app_labels = {'batch', 'ida', 'rucio', 'accounts'} def db_for_read(self, model, **hints): diff --git a/esap/requirements/async.txt b/esap/requirements/async.txt new file mode 100644 index 0000000000000000000000000000000000000000..d8fca3534956ef232426926c6ea30586f7b6e6ba --- /dev/null +++ b/esap/requirements/async.txt @@ -0,0 +1,3 @@ +# Extends dev.txt with the django-uws module (installed as editable) +-r dev.txt +-e ./modules/django-uws diff --git a/esap/requirements/base.txt b/esap/requirements/base.txt index d31076c22beca273baa0192f15c0356857a06762..b9552f55882b92e39b5e3bdd3c3a07ed091e888c 100644 --- a/esap/requirements/base.txt +++ b/esap/requirements/base.txt @@ -12,7 +12,7 @@ django-cors-headers==3.11.0 django-filter==21.1 django-rest-knox==4.2.0 djangorestframework==3.13.1 -eossr==0.3.3 +git+https://gitlab.in2p3.fr/escape2020/wp3/eossr.git@f9d9d16ad267ec9a63e1a344286642fe663b1c16#egg=eossr future==0.18.2 idna==2.10 josepy==1.12.0 diff --git a/modules/django-uws b/modules/django-uws new file mode 160000 index 0000000000000000000000000000000000000000..5c6912e5caf9f6b4815eb4c11d153680e7ff288b --- /dev/null +++ b/modules/django-uws @@ -0,0 +1 @@ +Subproject commit 5c6912e5caf9f6b4815eb4c11d153680e7ff288b