From be2930cb32681f7fd875b197bc9f765b50a542c6 Mon Sep 17 00:00:00 2001
From: Nico Vermaas <vermaas@astron.nl>
Date: Mon, 5 Sep 2022 17:04:13 +0200
Subject: [PATCH] expand the datamodel with pid and add 'inner joins' to the
 alta select query

---
 README.md                                 |  7 +++++-
 database/models.py                        |  3 ++-
 database/schemas.py                       |  3 ++-
 dev_scripts/convert_alta_to_adex_cache.py | 29 +++++++++++++++++------
 schemas/sql_scripts.py                    | 27 +++++++++++++++++----
 5 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 8862d06..97d8298 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,12 @@ Deployment Diagram: https://app.diagrams.net/#G10-LtvKbhC-yzjVoTIg1bnr1wBKsA4s-V
   
   
 ### load esap_cache.sql into dockerized Postgres database
-manually copy `esap_cache.sql` to ~/shared directory 
+manually copy `esap_cache.sql` to ~/shared/sql directory 
+manually drop the skyviews table
+
+then load the data like this:
+
 ```
+sudo docker exec -it Postgres14 psql -U postgres -d adex_cache -c "DROP TABLE skyviews"
   docker exec -it adex-postgres psql -U postgres -d adex_cache -f /shared/sql/adex_cache.sql
 ```
diff --git a/database/models.py b/database/models.py
index 98ad27f..dc7c9eb 100644
--- a/database/models.py
+++ b/database/models.py
@@ -6,7 +6,8 @@ from .database import Base
 class SkyView(Base):
     __tablename__ = "skyviews"
     id = Column(Integer, primary_key=True, index=True)
-    title = Column(String)
+    pid = Column(String)
+    name = Column(String)
     ra = Column(Float, index=True)
     dec = Column(Float, index=True)
     observation = Column(String)
diff --git a/database/schemas.py b/database/schemas.py
index 19893ee..076f852 100644
--- a/database/schemas.py
+++ b/database/schemas.py
@@ -7,7 +7,8 @@ from typing import Optional
 
 class SkyView(BaseModel):
     id: int
-    title: str
+    pid: str
+    name: str
     ra: float
     dec: float
     observation: str
diff --git a/dev_scripts/convert_alta_to_adex_cache.py b/dev_scripts/convert_alta_to_adex_cache.py
index b8f9467..b93e238 100644
--- a/dev_scripts/convert_alta_to_adex_cache.py
+++ b/dev_scripts/convert_alta_to_adex_cache.py
@@ -24,6 +24,23 @@ def parse_database_url(url):
     return user, password, host, database, db_port
 
 
+def calc_beam(name):
+    # the beam number can be found by parsing the number in the string pattern "_B012."
+    beam = 0
+    try:
+        position = name.find("_B")
+        if position>=0:
+           beam_string = name[position:position+6]
+           if beam_string.find(".") == 5:
+               beam = int(beam_string[2:5])
+
+           return beam
+    except:
+        pass
+
+    return 0
+
+
 def do_convert(source, target):
 
     try:
@@ -48,8 +65,6 @@ def do_convert(source, target):
         )
 
         source_cursor = source_connection.cursor()
-        source_cursor.execute(sql_scripts.select_from_alta)
-
         target_cursor = target_connection.cursor()
 
         # first drop the existing table and recreate it
@@ -58,6 +73,7 @@ def do_convert(source, target):
         target_connection.commit()
 
         print('fetching records from ALTA...')
+        source_cursor.execute(sql_scripts.select_from_alta)
         rows = source_cursor.fetchall()
         count = len(rows)
         print(str(count) + ' records fetched')
@@ -65,7 +81,7 @@ def do_convert(source, target):
         print('inserting records into adex_cache...')
         insert_count = 0
         for row in rows:
-            access_url,ra,dec,dt,dst,observation = row
+            name, pid, access_url,ra,dec,dt,dst,observation = row
 
             # TODO: move this algorithm to a sane place, finish it and have scientists review it.
             # determine which dataproducts to skip
@@ -87,11 +103,10 @@ def do_convert(source, target):
             if 'cube' in dt:
                 level=2
 
-            # todo: extract beam from name, but first JOIN with api_dataentity (see sql_scripts)
-            title = "Not available yet"
-            beam = 0
+            # todo: calculate beam
+            beam = calc_beam(name)
 
-            record_to_insert = (title, observation, beam, ra, dec, collection, level, dt,dst, access_url)
+            record_to_insert = (name, pid, observation, beam, ra, dec, collection, level, dt,dst, access_url)
             target_cursor.execute(sql_scripts.insert_into_skyviews,record_to_insert)
             target_connection.commit()
             insert_count = insert_count + 1
diff --git a/schemas/sql_scripts.py b/schemas/sql_scripts.py
index e7d9889..48d98bc 100644
--- a/schemas/sql_scripts.py
+++ b/schemas/sql_scripts.py
@@ -14,7 +14,8 @@ create_table_skyviews = """
 CREATE TABLE public.skyviews
 (
     "id" SERIAL,
-    "title" character varying(50),
+    "pid" character varying(50),
+    "name" character varying(50),
     "observation" character varying(50),
     "beam" integer,
     "ra" double precision,
@@ -32,7 +33,8 @@ CREATE TABLE public.skyviews
 insert_into_skyviews = """
 INSERT INTO public.skyviews
 (
-    title,
+    name,
+    pid,
     observation,
     beam,
     ra,
@@ -42,11 +44,11 @@ INSERT INTO public.skyviews
     dataproduct_type,
     dataproduct_subtype,
     access_url) 
-    VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
+    VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
 """
 
 # todo: join with api_dataentity table to retrieve the name (which also holds the beam)
-select_from_alta = """
+select_from_alta_xxx = """
 SELECT 
 "storageRef" as access_url, 
 "RA" as ra, dec, 
@@ -54,4 +56,21 @@ SELECT
 "dataProductSubType" as dst, 
 "datasetID" as observation 
 FROM api_dataproduct
+"""
+
+select_from_alta = """
+SELECT 
+"name" as name,
+"PID" as pid,
+"storageRef" as access_url, 
+"RA" as ra, dec, 
+"dataProductType" as dt, 
+"dataProductSubType" as dst, 
+"datasetID" as observation 
+
+FROM api_dataproduct as table1
+INNER JOIN api_dataentity as table2
+INNER JOIN api_entity as table3
+ON table3.altaobject_ptr_id = table2.entity_ptr_id
+ON table1.dataentity_ptr_id = table2.entity_ptr_id;
 """
\ No newline at end of file
-- 
GitLab