Skip to content
Snippets Groups Projects
Commit be2930cb authored by Nico Vermaas's avatar Nico Vermaas
Browse files

expand the datamodel with pid and add 'inner joins' to the alta select query

parent c0d832c5
No related branches found
No related tags found
1 merge request!2Main
Pipeline #35432 passed
......@@ -23,7 +23,12 @@ Deployment Diagram: https://app.diagrams.net/#G10-LtvKbhC-yzjVoTIg1bnr1wBKsA4s-V
### load esap_cache.sql into dockerized Postgres database
manually copy `esap_cache.sql` to ~/shared directory
manually copy `esap_cache.sql` to ~/shared/sql directory
manually drop the skyviews table
then load the data like this:
```
sudo docker exec -it Postgres14 psql -U postgres -d adex_cache -c "DROP TABLE skyviews"
docker exec -it adex-postgres psql -U postgres -d adex_cache -f /shared/sql/adex_cache.sql
```
......@@ -6,7 +6,8 @@ from .database import Base
class SkyView(Base):
__tablename__ = "skyviews"
id = Column(Integer, primary_key=True, index=True)
title = Column(String)
pid = Column(String)
name = Column(String)
ra = Column(Float, index=True)
dec = Column(Float, index=True)
observation = Column(String)
......
......@@ -7,7 +7,8 @@ from typing import Optional
class SkyView(BaseModel):
id: int
title: str
pid: str
name: str
ra: float
dec: float
observation: str
......
......@@ -24,6 +24,23 @@ def parse_database_url(url):
return user, password, host, database, db_port
def calc_beam(name):
# the beam number can be found by parsing the number in the string pattern "_B012."
beam = 0
try:
position = name.find("_B")
if position>=0:
beam_string = name[position:position+6]
if beam_string.find(".") == 5:
beam = int(beam_string[2:5])
return beam
except:
pass
return 0
def do_convert(source, target):
try:
......@@ -48,8 +65,6 @@ def do_convert(source, target):
)
source_cursor = source_connection.cursor()
source_cursor.execute(sql_scripts.select_from_alta)
target_cursor = target_connection.cursor()
# first drop the existing table and recreate it
......@@ -58,6 +73,7 @@ def do_convert(source, target):
target_connection.commit()
print('fetching records from ALTA...')
source_cursor.execute(sql_scripts.select_from_alta)
rows = source_cursor.fetchall()
count = len(rows)
print(str(count) + ' records fetched')
......@@ -65,7 +81,7 @@ def do_convert(source, target):
print('inserting records into adex_cache...')
insert_count = 0
for row in rows:
access_url,ra,dec,dt,dst,observation = row
name, pid, access_url,ra,dec,dt,dst,observation = row
# TODO: move this algorithm to a sane place, finish it and have scientists review it.
# determine which dataproducts to skip
......@@ -87,11 +103,10 @@ def do_convert(source, target):
if 'cube' in dt:
level=2
# todo: extract beam from name, but first JOIN with api_dataentity (see sql_scripts)
title = "Not available yet"
beam = 0
# todo: calculate beam
beam = calc_beam(name)
record_to_insert = (title, observation, beam, ra, dec, collection, level, dt,dst, access_url)
record_to_insert = (name, pid, observation, beam, ra, dec, collection, level, dt,dst, access_url)
target_cursor.execute(sql_scripts.insert_into_skyviews,record_to_insert)
target_connection.commit()
insert_count = insert_count + 1
......
......@@ -14,7 +14,8 @@ create_table_skyviews = """
CREATE TABLE public.skyviews
(
"id" SERIAL,
"title" character varying(50),
"pid" character varying(50),
"name" character varying(50),
"observation" character varying(50),
"beam" integer,
"ra" double precision,
......@@ -32,7 +33,8 @@ CREATE TABLE public.skyviews
insert_into_skyviews = """
INSERT INTO public.skyviews
(
title,
name,
pid,
observation,
beam,
ra,
......@@ -42,11 +44,11 @@ INSERT INTO public.skyviews
dataproduct_type,
dataproduct_subtype,
access_url)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
# todo: join with api_dataentity table to retrieve the name (which also holds the beam)
select_from_alta = """
select_from_alta_xxx = """
SELECT
"storageRef" as access_url,
"RA" as ra, dec,
......@@ -55,3 +57,20 @@ SELECT
"datasetID" as observation
FROM api_dataproduct
"""
select_from_alta = """
SELECT
"name" as name,
"PID" as pid,
"storageRef" as access_url,
"RA" as ra, dec,
"dataProductType" as dt,
"dataProductSubType" as dst,
"datasetID" as observation
FROM api_dataproduct as table1
INNER JOIN api_dataentity as table2
INNER JOIN api_entity as table3
ON table3.altaobject_ptr_id = table2.entity_ptr_id
ON table1.dataentity_ptr_id = table2.entity_ptr_id;
"""
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment