From 12104ed3b7987ac9b4f95cf3c4dc51e661683d05 Mon Sep 17 00:00:00 2001
From: Jorrit Schaap <schaap@astron.nl>
Date: Fri, 30 Apr 2021 10:32:50 +0200
Subject: [PATCH] TMSS-717: SIP schema location and namespace. Why does xml
 make everything so much harder than json?

---
 LTA/sip/lib/siplib.py                         |  9 +++++--
 SAS/TMSS/backend/src/tmss/tmssapp/views.py    | 27 +++++++++++++++++++
 .../src/tmss/tmssapp/viewsets/scheduling.py   | 19 +++++++++++--
 SAS/TMSS/backend/src/tmss/urls.py             |  1 +
 4 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/LTA/sip/lib/siplib.py b/LTA/sip/lib/siplib.py
index e81b00ed557..71b7c184c50 100644
--- a/LTA/sip/lib/siplib.py
+++ b/LTA/sip/lib/siplib.py
@@ -1488,11 +1488,16 @@ class Sip(object):
             raise Exception("This SIP does not describe a correlated dataproduct. No subarray pointing available.")
 
     # this will also validate the document so far
-    def get_prettyxml(self):
+    def get_prettyxml(self, schema_location:str = None):
         try:
             dom = self.__sip.toDOM()
             dom.documentElement.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
-            dom.documentElement.setAttribute('xsi:schemaLocation', "http://www.astron.nl/SIP-Lofar LTA-SIP-2.7.0.xsd")
+            if schema_location is None:
+                # this is/was the default schema location, even though we never hosted the xsd at the astron server
+                # That makes xmllint fail to validate (because the schema obviously can't be found)
+                schema_location = "http://www.astron.nl/SIP-Lofar LTA-SIP-2.7.2.xsd"
+            dom.documentElement.setAttribute('xsi:schemaLocation', schema_location)
+            dom.documentElement.setAttribute('xmlns:sip', schema_location.split(' ')[0])
             return dom.toprettyxml()
         except pyxb.ValidationError as err:
             logger.error(err.details())
diff --git a/SAS/TMSS/backend/src/tmss/tmssapp/views.py b/SAS/TMSS/backend/src/tmss/tmssapp/views.py
index 85bdfe0de03..f59385ae9ce 100644
--- a/SAS/TMSS/backend/src/tmss/tmssapp/views.py
+++ b/SAS/TMSS/backend/src/tmss/tmssapp/views.py
@@ -12,6 +12,7 @@ from rest_framework.authtoken.models import Token
 from rest_framework.permissions import AllowAny
 from rest_framework.decorators import authentication_classes, permission_classes
 from django.apps import apps
+import re
 
 from rest_framework.decorators import api_view
 from datetime import datetime
@@ -78,6 +79,32 @@ def get_template_json_schema(request, template:str, name:str, version:str):
     return response
 
 
+# Allow everybody to GET our publicly available LTA SIP XSD (XML Schema Definition for the LTA SIP)
+@permission_classes([AllowAny])
+@authentication_classes([AllowAny])
+@swagger_auto_schema(#method='GET',
+                     responses={200: 'Get the LTA SIP XSD',
+                                404: 'not available'},
+                     operation_description="Get the LTA SIP XSD.")
+#@api_view(['GET'])   # todo: !! decorating this as api_view somehow breaks json ref resolution !! fix this and double url issue in urls.py, then use decorator here to include in Swagger
+def get_lta_sip_xsd(request):
+
+    lta_sip_xsd_path = os.path.join(os.environ["LOFARROOT"], "share", "lta", "LTA-SIP.xsd")
+    with open(lta_sip_xsd_path, 'rt') as file:
+        xsd = file.read()
+
+        # hacky way of setting the namespace to this url
+        # can/should be done with proper xml dom setAttribute on document node.
+        # but this string manipulation is faster, and works just as well.
+        # the namespace should point to the absolute url of this request, without the document name.
+        abs_uri = "%s://%s/%s" % (request.scheme, request.get_host().rstrip('/'), request.get_full_path().lstrip('/'))
+        abs_uri = abs_uri[:abs_uri.rfind('/')]
+        for attr in ('targetNamespace', 'xmlns'):
+            xsd = xsd.replace('''%s="http://www.astron.nl/SIP-Lofar"'''%attr, '''%s="%s"'''%(attr,abs_uri))
+
+        return HttpResponse(content=xsd, content_type='application/xml')
+
+
 # Allow everybody to GET our publicly available station group lookups
 @permission_classes([AllowAny])
 @authentication_classes([AllowAny])
diff --git a/SAS/TMSS/backend/src/tmss/tmssapp/viewsets/scheduling.py b/SAS/TMSS/backend/src/tmss/tmssapp/viewsets/scheduling.py
index 28ad1dbc6a6..df92de0ce14 100644
--- a/SAS/TMSS/backend/src/tmss/tmssapp/viewsets/scheduling.py
+++ b/SAS/TMSS/backend/src/tmss/tmssapp/viewsets/scheduling.py
@@ -395,9 +395,24 @@ class DataproductViewSet(LOFARViewSet):
                          operation_description="Get the Submission Information Package (SIP) for this dataproduct")
     @action(methods=['get'], detail=True, url_name="sip")
     def sip(self, request, pk=None):
-        dataproduct = get_object_or_404(models.Dataproduct, pk=pk)
         from lofar.sas.tmss.tmss.tmssapp.adapters.sip import generate_sip_for_dataproduct
-        return HttpResponse(generate_sip_for_dataproduct(dataproduct).get_prettyxml(), content_type='application/xml')
+        from lofar.sas.tmss.tmss.tmssapp import views
+        from django.urls import reverse
+
+        # get the dataproduct...
+        dataproduct = get_object_or_404(models.Dataproduct, pk=pk)
+
+        # construct the schema loction for the sip
+        lta_sip_xsd_path = reverse(views.get_lta_sip_xsd)
+        lta_sip_xsd_uri = "%s://%s/%s" % (request.scheme, request.get_host().rstrip('/'), lta_sip_xsd_path.lstrip('/'))
+        # the schema_location should point to a weird 2 part url, the path -space- document.
+        schema_location = lta_sip_xsd_uri[:lta_sip_xsd_uri.rfind('/')] + ' ' + lta_sip_xsd_uri[lta_sip_xsd_uri.rfind('/')+1:]
+
+        # generate the sip
+        sip = generate_sip_for_dataproduct(dataproduct).get_prettyxml(schema_location=schema_location)
+
+        # and return it
+        return HttpResponse(sip, content_type='application/xml')
 
     @swagger_auto_schema(responses={200: 'The SIP graph for this dataproduct',
                                     403: 'forbidden'},
diff --git a/SAS/TMSS/backend/src/tmss/urls.py b/SAS/TMSS/backend/src/tmss/urls.py
index 5306787cb40..74b59016346 100644
--- a/SAS/TMSS/backend/src/tmss/urls.py
+++ b/SAS/TMSS/backend/src/tmss/urls.py
@@ -67,6 +67,7 @@ urlpatterns = [
     #re_path('schemas/<str:template>/<str:name>/<str:version>', views.get_template_json_schema, name='get_template_json_schema'),  # !! use of regex here breaks reverse url lookup
     path('schemas/<str:template>/<str:name>/<str:version>', views.get_template_json_schema, name='get_template_json_schema'),   # !! two urls for same view break Swagger, one url break json ref resolution !!
     path('schemas/<str:template>/<str:name>/<str:version>/', views.get_template_json_schema, name='get_template_json_schema'),  # !! two urls for same view break Swagger, one url break json ref resolution !!
+    path('xsd/LTA-SIP.xsd', views.get_lta_sip_xsd, name='get_lta_sip_xsd'),
     #re_path('station_groups/<str:template_name>/<str:template_version>/<str:station_group>/?', views.get_stations_in_group, name='get_stations_in_group'), # !! use of regex here somehow breaks functionality (because parameters?) -> index page
     path('station_groups/<str:template_name>/<str:template_version>/<str:station_group>', views.get_stations_in_group, name='get_stations_in_group'),
     path('station_groups/<str:template_name>/<str:template_version>/<str:station_group>/', views.get_stations_in_group, name='get_stations_in_group'),
-- 
GitLab