Skip to content
Snippets Groups Projects
Commit c21e52a4 authored by Roy de Goei's avatar Roy de Goei
Browse files

SDC-685: Use the connection module of ldvspec.

parent f024b925
No related branches found
No related tags found
1 merge request!4SDC-685: Script which migrates ldvadmin data to ldv-spec-db
Pipeline #33933 passed
from configparser import ConfigParser from configparser import ConfigParser
import os
def read_config(section, filename='database.cfg'): def read_config(section, filename='database.cfg'):
full_filename = os.path.join(os.path.dirname(__file__), filename)
parser = ConfigParser() parser = ConfigParser()
try: read_result = parser.read(full_filename)
parser.read(filename) # If file not found then parser returns just an empty list it does not raise an Exception!
except FileNotFoundError as exc: if len(read_result) == 0:
raise FileNotFoundError( raise Exception("Configuration file with filename {0} not found".format(full_filename))
"Configuration file with filename {0} not found".format(filename)
) from exc
db_settings = {} db_settings = {}
if parser.has_section(section): if parser.has_section(section):
......
import sys import sys
import psycopg2 import psycopg2
from config import read_config from ldvspec.connection.config import read_config
import logging import logging
import argparse import argparse
from sshtunnel import SSHTunnelForwarder from sshtunnel import SSHTunnelForwarder
......
#!/usr/bin/env python3 #!/usr/bin/env python3
import json """
This script migrates data from ldvadmin to ldv-spec-db with user arguments.
By default, the local ldv-spec-db is used and the ldvadmin on sdc-db.astron.nl
Script requires token to access the ldv-spec-db with REST API
Some examples:
- Show latest version:
python ./ldvspec/lofardata/scripts/migrate_ldvadmin_to_ldvspec.py --version
- Import only 1000 records and show more verbosity:
python ./ldvspec/lofardata/scripts/migrate_ldvadmin_to_ldvspec.py --limit 1000 --verbose
- Import 50000 records and insert in steps of 10000 (so 5 steps)
python ./ldvspec/lofardata/scripts/migrate_ldvadmin_to_ldvspec.py --limit 50000 --max_nbr_dps_to_insert_per_request 10000
- Import only 1000 records at production:
python ./ldvspec/lofardata/scripts/migrate_ldvadmin_to_ldvspec.py --limit 1000 --host prod
"""
import os import os
import time import time
import logging import logging
import argparse import argparse
import sys import sys
import psycopg2
import math import math
from ldv_specification_interface import LDVSpecInterface from ldv_specification_interface import LDVSpecInterface
import ldvspec.connection.retrieve_db_connection as connector
logger = logging.getLogger(__file__) logger = logging.getLogger(__file__)
handler = logging.StreamHandler(sys.stdout) handler = logging.StreamHandler(sys.stdout)
...@@ -25,50 +39,11 @@ def change_logging_level(level): ...@@ -25,50 +39,11 @@ def change_logging_level(level):
handler.setLevel(level) handler.setLevel(level)
def open_connection_to_postgres_database():
"""
Create a database connection to a Postgres database
The 'connector' will be implemented differently, for now some hard coding with the assumption
that tunnel to ldvadmin is already created
:return connection to the database
"""
host = "localhost"
user = "ldvrbow"
password = "Wehn5CTYj1RcbstMSGls"
database = "ldvadmin"
port = 4321
connection = None
try:
logging.info("Connection to Database")
# establishing the connection
connection = psycopg2.connect(
database=database,
user=user,
password=password,
host=host,
port=port
)
# create a cursor
cur = connection.cursor()
# display the PostgreSQL database server version
cur.execute('SELECT version()')
logging.info("PostgreSQL database version: {}".format(cur.fetchone()))
except psycopg2.Error as exp:
logger.error("Could not connect to DataBase {}, results in {}".format(database, exp))
connection.close()
logging.info("Database Connection Closed")
return connection
def execute_query(connection, sql_query, data=None): def execute_query(connection, sql_query, data=None):
""" """
Execute query of the ldvadmin Database Execute query of the ldvadmin Database
There is not commit required because we are doing only read queries There is no commit required because we are doing only read queries
The 'connector' will be implemented differently, for now some hard coding with the assumption :param: connection: Database 'connection'
that tunnel to ldvadmin is already created
:param: sql_query :param: sql_query
:param: data (optional) the data arguments of the sql_query :param: data (optional) the data arguments of the sql_query
:return result of the query :return result of the query
...@@ -79,7 +54,7 @@ def execute_query(connection, sql_query, data=None): ...@@ -79,7 +54,7 @@ def execute_query(connection, sql_query, data=None):
connection.commit() connection.commit()
return cursor.fetchall() return cursor.fetchall()
except Exception as exp: except Exception as exp:
logger.error("Could not execute query! '{}' results in {}".format(sql_query, exp)) logger.error("Could not execute query! '{}' results in -> {}".format(sql_query, exp))
def main(): def main():
...@@ -96,13 +71,14 @@ def main(): ...@@ -96,13 +71,14 @@ def main():
parser.add_argument("-t", "--token", default="ad9b37a24380948601257f9c1f889b07a00ac81e", parser.add_argument("-t", "--token", default="ad9b37a24380948601257f9c1f889b07a00ac81e",
help="Token to access the REST API of ldvspec", action="store") help="Token to access the REST API of ldvspec", action="store")
parser.add_argument("--host", nargs="?", default='dev', parser.add_argument("--host", nargs="?", default='dev',
help="Presets are 'dev', 'test', 'prod'. Otherwise give a full url like https://atdb.astron.nl/atdb") help="The ldv-spec-db host. Presets are 'dev', 'test', 'prod', otherwise give a full url like https://atdb.astron.nl/atdb")
parser.add_argument("-s", "--section", default='postgresql-ldv',
help="Add the configuration's section from the database.cfg.")
# Have payload of more millions will most likely not work # Have payload of more millions will most likely not work
# tested with 10.000 results in 90 seconds so # 11 mil. will be at least 28 hours # tested with 10.000 results in 90 seconds so # 11 mil. will be at least 28 hours
parser.add_argument("-r", "--max_nbr_dps_to_insert_per_request", default=1000, type=int, parser.add_argument("-r", "--max_nbr_dps_to_insert_per_request", default=1000, type=int,
help="The number of dataproducts to insert per REST request (0 is no limit)", action="store") help="The number of dataproducts to insert per REST request (0 is no limit)", action="store")
args = parser.parse_args() args = parser.parse_args()
if args.version: if args.version:
...@@ -128,22 +104,23 @@ def main(): ...@@ -128,22 +104,23 @@ def main():
no_limit_to_insert = False no_limit_to_insert = False
logging.debug("Limit on number of dataproducts to insert REST is set to {}".format(args.max_nbr_dps_to_insert_per_request)) logging.debug("Limit on number of dataproducts to insert REST is set to {}".format(args.max_nbr_dps_to_insert_per_request))
with open_connection_to_postgres_database() as conn: # Create connection using ssh tunnel with the ldvadmin database
count_raw_dps = execute_query(conn, "select count(*) from astrowise.raw_dataproducts")[0][0] conn, tunnel = connector.connect_postgresql(args.section)
count_pl_dps = execute_query(conn, "select count(*) from astrowise.pl_dataproducts")[0][0] count_raw_dps = execute_query(conn, "select count(*) from astrowise.raw_dataproducts")[0][0]
logging.info(f"There are {count_raw_dps} raw dataproducts and {count_pl_dps} pipeline dataproduct in the ldvadmin.astrowise table!!") count_pl_dps = execute_query(conn, "select count(*) from astrowise.pl_dataproducts")[0][0]
logging.info(f"There are {count_raw_dps} raw dataproducts and {count_pl_dps} pipeline dataproduct in the ldvadmin.astrowise table!!")
result_query_all_dps = execute_query(conn,
"select obsid, obsid_source, dp_type, project, activity, uri, size, dysco " result_query_all_dps = execute_query(conn,
"from astrowise.raw_dataproducts {}".format(limit_str)) "select obsid, obsid_source, dp_type, project, activity, uri, size, dysco "
# Are there still dataproducts left to query? "from astrowise.raw_dataproducts {}".format(limit_str))
nbr_dps_left = len(result_query_all_dps) - args.limit # Are there still dataproducts left to query?
if nbr_dps_left > 0 and args.limit > 0: nbr_dps_left = len(result_query_all_dps) - args.limit
logging.debug("Limit on number of leftover dataproducts to query is set to {}".format(nbr_dps_left)) if nbr_dps_left > 0 and args.limit > 0:
limit_str = "limit {}".format(nbr_dps_left) logging.debug("Limit on number of leftover dataproducts to query is set to {}".format(nbr_dps_left))
result_query_all_dps.extend(execute_query(conn, limit_str = "limit {}".format(nbr_dps_left)
"select obsid, obsid_source, dp_type, project, activity, uri, size, dysco " result_query_all_dps.extend(execute_query(conn,
"from astrowise.pl_dataproducts {}".format(limit_str))) "select obsid, obsid_source, dp_type, project, activity, uri, size, dysco "
"from astrowise.pl_dataproducts {}".format(limit_str)))
logging.info("{} dataproduct retrieved from ldvadmin".format(len(result_query_all_dps))) logging.info("{} dataproduct retrieved from ldvadmin".format(len(result_query_all_dps)))
# Create connection with ldv-spec-db using REST API, use temp. token created in my test-env # Create connection with ldv-spec-db using REST API, use temp. token created in my test-env
...@@ -180,4 +157,4 @@ def main(): ...@@ -180,4 +157,4 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
main() main()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment