Skip to content
Snippets Groups Projects
Commit be2930cb authored by Nico Vermaas's avatar Nico Vermaas
Browse files

expand the datamodel with pid and add 'inner joins' to the alta select query

parent c0d832c5
Branches
No related tags found
1 merge request!2Main
Pipeline #35432 passed
...@@ -23,7 +23,12 @@ Deployment Diagram: https://app.diagrams.net/#G10-LtvKbhC-yzjVoTIg1bnr1wBKsA4s-V ...@@ -23,7 +23,12 @@ Deployment Diagram: https://app.diagrams.net/#G10-LtvKbhC-yzjVoTIg1bnr1wBKsA4s-V
### load esap_cache.sql into dockerized Postgres database ### load esap_cache.sql into dockerized Postgres database
manually copy `esap_cache.sql` to ~/shared directory manually copy `esap_cache.sql` to ~/shared/sql directory
manually drop the skyviews table
then load the data like this:
``` ```
sudo docker exec -it Postgres14 psql -U postgres -d adex_cache -c "DROP TABLE skyviews"
docker exec -it adex-postgres psql -U postgres -d adex_cache -f /shared/sql/adex_cache.sql docker exec -it adex-postgres psql -U postgres -d adex_cache -f /shared/sql/adex_cache.sql
``` ```
...@@ -6,7 +6,8 @@ from .database import Base ...@@ -6,7 +6,8 @@ from .database import Base
class SkyView(Base): class SkyView(Base):
__tablename__ = "skyviews" __tablename__ = "skyviews"
id = Column(Integer, primary_key=True, index=True) id = Column(Integer, primary_key=True, index=True)
title = Column(String) pid = Column(String)
name = Column(String)
ra = Column(Float, index=True) ra = Column(Float, index=True)
dec = Column(Float, index=True) dec = Column(Float, index=True)
observation = Column(String) observation = Column(String)
......
...@@ -7,7 +7,8 @@ from typing import Optional ...@@ -7,7 +7,8 @@ from typing import Optional
class SkyView(BaseModel): class SkyView(BaseModel):
id: int id: int
title: str pid: str
name: str
ra: float ra: float
dec: float dec: float
observation: str observation: str
......
...@@ -24,6 +24,23 @@ def parse_database_url(url): ...@@ -24,6 +24,23 @@ def parse_database_url(url):
return user, password, host, database, db_port return user, password, host, database, db_port
def calc_beam(name):
# the beam number can be found by parsing the number in the string pattern "_B012."
beam = 0
try:
position = name.find("_B")
if position>=0:
beam_string = name[position:position+6]
if beam_string.find(".") == 5:
beam = int(beam_string[2:5])
return beam
except:
pass
return 0
def do_convert(source, target): def do_convert(source, target):
try: try:
...@@ -48,8 +65,6 @@ def do_convert(source, target): ...@@ -48,8 +65,6 @@ def do_convert(source, target):
) )
source_cursor = source_connection.cursor() source_cursor = source_connection.cursor()
source_cursor.execute(sql_scripts.select_from_alta)
target_cursor = target_connection.cursor() target_cursor = target_connection.cursor()
# first drop the existing table and recreate it # first drop the existing table and recreate it
...@@ -58,6 +73,7 @@ def do_convert(source, target): ...@@ -58,6 +73,7 @@ def do_convert(source, target):
target_connection.commit() target_connection.commit()
print('fetching records from ALTA...') print('fetching records from ALTA...')
source_cursor.execute(sql_scripts.select_from_alta)
rows = source_cursor.fetchall() rows = source_cursor.fetchall()
count = len(rows) count = len(rows)
print(str(count) + ' records fetched') print(str(count) + ' records fetched')
...@@ -65,7 +81,7 @@ def do_convert(source, target): ...@@ -65,7 +81,7 @@ def do_convert(source, target):
print('inserting records into adex_cache...') print('inserting records into adex_cache...')
insert_count = 0 insert_count = 0
for row in rows: for row in rows:
access_url,ra,dec,dt,dst,observation = row name, pid, access_url,ra,dec,dt,dst,observation = row
# TODO: move this algorithm to a sane place, finish it and have scientists review it. # TODO: move this algorithm to a sane place, finish it and have scientists review it.
# determine which dataproducts to skip # determine which dataproducts to skip
...@@ -87,11 +103,10 @@ def do_convert(source, target): ...@@ -87,11 +103,10 @@ def do_convert(source, target):
if 'cube' in dt: if 'cube' in dt:
level=2 level=2
# todo: extract beam from name, but first JOIN with api_dataentity (see sql_scripts) # todo: calculate beam
title = "Not available yet" beam = calc_beam(name)
beam = 0
record_to_insert = (title, observation, beam, ra, dec, collection, level, dt,dst, access_url) record_to_insert = (name, pid, observation, beam, ra, dec, collection, level, dt,dst, access_url)
target_cursor.execute(sql_scripts.insert_into_skyviews,record_to_insert) target_cursor.execute(sql_scripts.insert_into_skyviews,record_to_insert)
target_connection.commit() target_connection.commit()
insert_count = insert_count + 1 insert_count = insert_count + 1
......
...@@ -14,7 +14,8 @@ create_table_skyviews = """ ...@@ -14,7 +14,8 @@ create_table_skyviews = """
CREATE TABLE public.skyviews CREATE TABLE public.skyviews
( (
"id" SERIAL, "id" SERIAL,
"title" character varying(50), "pid" character varying(50),
"name" character varying(50),
"observation" character varying(50), "observation" character varying(50),
"beam" integer, "beam" integer,
"ra" double precision, "ra" double precision,
...@@ -32,7 +33,8 @@ CREATE TABLE public.skyviews ...@@ -32,7 +33,8 @@ CREATE TABLE public.skyviews
insert_into_skyviews = """ insert_into_skyviews = """
INSERT INTO public.skyviews INSERT INTO public.skyviews
( (
title, name,
pid,
observation, observation,
beam, beam,
ra, ra,
...@@ -42,11 +44,11 @@ INSERT INTO public.skyviews ...@@ -42,11 +44,11 @@ INSERT INTO public.skyviews
dataproduct_type, dataproduct_type,
dataproduct_subtype, dataproduct_subtype,
access_url) access_url)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
""" """
# todo: join with api_dataentity table to retrieve the name (which also holds the beam) # todo: join with api_dataentity table to retrieve the name (which also holds the beam)
select_from_alta = """ select_from_alta_xxx = """
SELECT SELECT
"storageRef" as access_url, "storageRef" as access_url,
"RA" as ra, dec, "RA" as ra, dec,
...@@ -55,3 +57,20 @@ SELECT ...@@ -55,3 +57,20 @@ SELECT
"datasetID" as observation "datasetID" as observation
FROM api_dataproduct FROM api_dataproduct
""" """
select_from_alta = """
SELECT
"name" as name,
"PID" as pid,
"storageRef" as access_url,
"RA" as ra, dec,
"dataProductType" as dt,
"dataProductSubType" as dst,
"datasetID" as observation
FROM api_dataproduct as table1
INNER JOIN api_dataentity as table2
INNER JOIN api_entity as table3
ON table3.altaobject_ptr_id = table2.entity_ptr_id
ON table1.dataentity_ptr_id = table2.entity_ptr_id;
"""
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment