From be2930cb32681f7fd875b197bc9f765b50a542c6 Mon Sep 17 00:00:00 2001 From: Nico Vermaas <vermaas@astron.nl> Date: Mon, 5 Sep 2022 17:04:13 +0200 Subject: [PATCH] expand the datamodel with pid and add 'inner joins' to the alta select query --- README.md | 7 +++++- database/models.py | 3 ++- database/schemas.py | 3 ++- dev_scripts/convert_alta_to_adex_cache.py | 29 +++++++++++++++++------ schemas/sql_scripts.py | 27 +++++++++++++++++---- 5 files changed, 55 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 8862d06..97d8298 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,12 @@ Deployment Diagram: https://app.diagrams.net/#G10-LtvKbhC-yzjVoTIg1bnr1wBKsA4s-V ### load esap_cache.sql into dockerized Postgres database -manually copy `esap_cache.sql` to ~/shared directory +manually copy `esap_cache.sql` to ~/shared/sql directory +manually drop the skyviews table + +then load the data like this: + ``` +sudo docker exec -it Postgres14 psql -U postgres -d adex_cache -c "DROP TABLE skyviews" docker exec -it adex-postgres psql -U postgres -d adex_cache -f /shared/sql/adex_cache.sql ``` diff --git a/database/models.py b/database/models.py index 98ad27f..dc7c9eb 100644 --- a/database/models.py +++ b/database/models.py @@ -6,7 +6,8 @@ from .database import Base class SkyView(Base): __tablename__ = "skyviews" id = Column(Integer, primary_key=True, index=True) - title = Column(String) + pid = Column(String) + name = Column(String) ra = Column(Float, index=True) dec = Column(Float, index=True) observation = Column(String) diff --git a/database/schemas.py b/database/schemas.py index 19893ee..076f852 100644 --- a/database/schemas.py +++ b/database/schemas.py @@ -7,7 +7,8 @@ from typing import Optional class SkyView(BaseModel): id: int - title: str + pid: str + name: str ra: float dec: float observation: str diff --git a/dev_scripts/convert_alta_to_adex_cache.py b/dev_scripts/convert_alta_to_adex_cache.py index b8f9467..b93e238 100644 --- a/dev_scripts/convert_alta_to_adex_cache.py +++ b/dev_scripts/convert_alta_to_adex_cache.py @@ -24,6 +24,23 @@ def parse_database_url(url): return user, password, host, database, db_port +def calc_beam(name): + # the beam number can be found by parsing the number in the string pattern "_B012." + beam = 0 + try: + position = name.find("_B") + if position>=0: + beam_string = name[position:position+6] + if beam_string.find(".") == 5: + beam = int(beam_string[2:5]) + + return beam + except: + pass + + return 0 + + def do_convert(source, target): try: @@ -48,8 +65,6 @@ def do_convert(source, target): ) source_cursor = source_connection.cursor() - source_cursor.execute(sql_scripts.select_from_alta) - target_cursor = target_connection.cursor() # first drop the existing table and recreate it @@ -58,6 +73,7 @@ def do_convert(source, target): target_connection.commit() print('fetching records from ALTA...') + source_cursor.execute(sql_scripts.select_from_alta) rows = source_cursor.fetchall() count = len(rows) print(str(count) + ' records fetched') @@ -65,7 +81,7 @@ def do_convert(source, target): print('inserting records into adex_cache...') insert_count = 0 for row in rows: - access_url,ra,dec,dt,dst,observation = row + name, pid, access_url,ra,dec,dt,dst,observation = row # TODO: move this algorithm to a sane place, finish it and have scientists review it. # determine which dataproducts to skip @@ -87,11 +103,10 @@ def do_convert(source, target): if 'cube' in dt: level=2 - # todo: extract beam from name, but first JOIN with api_dataentity (see sql_scripts) - title = "Not available yet" - beam = 0 + # todo: calculate beam + beam = calc_beam(name) - record_to_insert = (title, observation, beam, ra, dec, collection, level, dt,dst, access_url) + record_to_insert = (name, pid, observation, beam, ra, dec, collection, level, dt,dst, access_url) target_cursor.execute(sql_scripts.insert_into_skyviews,record_to_insert) target_connection.commit() insert_count = insert_count + 1 diff --git a/schemas/sql_scripts.py b/schemas/sql_scripts.py index e7d9889..48d98bc 100644 --- a/schemas/sql_scripts.py +++ b/schemas/sql_scripts.py @@ -14,7 +14,8 @@ create_table_skyviews = """ CREATE TABLE public.skyviews ( "id" SERIAL, - "title" character varying(50), + "pid" character varying(50), + "name" character varying(50), "observation" character varying(50), "beam" integer, "ra" double precision, @@ -32,7 +33,8 @@ CREATE TABLE public.skyviews insert_into_skyviews = """ INSERT INTO public.skyviews ( - title, + name, + pid, observation, beam, ra, @@ -42,11 +44,11 @@ INSERT INTO public.skyviews dataproduct_type, dataproduct_subtype, access_url) - VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) + VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) """ # todo: join with api_dataentity table to retrieve the name (which also holds the beam) -select_from_alta = """ +select_from_alta_xxx = """ SELECT "storageRef" as access_url, "RA" as ra, dec, @@ -54,4 +56,21 @@ SELECT "dataProductSubType" as dst, "datasetID" as observation FROM api_dataproduct +""" + +select_from_alta = """ +SELECT +"name" as name, +"PID" as pid, +"storageRef" as access_url, +"RA" as ra, dec, +"dataProductType" as dt, +"dataProductSubType" as dst, +"datasetID" as observation + +FROM api_dataproduct as table1 +INNER JOIN api_dataentity as table2 +INNER JOIN api_entity as table3 +ON table3.altaobject_ptr_id = table2.entity_ptr_id +ON table1.dataentity_ptr_id = table2.entity_ptr_id; """ \ No newline at end of file -- GitLab