diff --git a/ldvspec/connection/config.py b/ldvspec/connection/config.py index 947d8c96042c4254fad4177bf46b4952903fbe7d..aa517437534e4cd3898ae6d983079dc152e11e00 100644 --- a/ldvspec/connection/config.py +++ b/ldvspec/connection/config.py @@ -1,14 +1,14 @@ from configparser import ConfigParser +import os def read_config(section, filename='database.cfg'): + full_filename = os.path.join(os.path.dirname(__file__), filename) parser = ConfigParser() - try: - parser.read(filename) - except FileNotFoundError as exc: - raise FileNotFoundError( - "Configuration file with filename {0} not found".format(filename) - ) from exc + read_result = parser.read(full_filename) + # If file not found then parser returns just an empty list it does not raise an Exception! + if len(read_result) == 0: + raise Exception("Configuration file with filename {0} not found".format(full_filename)) db_settings = {} if parser.has_section(section): diff --git a/ldvspec/connection/retrieve_db_connection.py b/ldvspec/connection/retrieve_db_connection.py index af8659de67ab2ac9f68ae86aa2b84a4e5521c789..c7a65417099482a6a28c9270fd8d94fd11058d51 100644 --- a/ldvspec/connection/retrieve_db_connection.py +++ b/ldvspec/connection/retrieve_db_connection.py @@ -1,7 +1,7 @@ import sys import psycopg2 -from config import read_config +from ldvspec.connection.config import read_config import logging import argparse from sshtunnel import SSHTunnelForwarder diff --git a/ldvspec/lofardata/scripts/migrate_ldvadmin_to_ldvspec.py b/ldvspec/lofardata/scripts/migrate_ldvadmin_to_ldvspec.py index 3b15383308f63cf4ddeab6ba6b89744b38a3efc0..0cd9dd8598d0b3721f8d06c6eba5c831977dbfea 100644 --- a/ldvspec/lofardata/scripts/migrate_ldvadmin_to_ldvspec.py +++ b/ldvspec/lofardata/scripts/migrate_ldvadmin_to_ldvspec.py @@ -1,14 +1,28 @@ #!/usr/bin/env python3 -import json +""" +This script migrates data from ldvadmin to ldv-spec-db with user arguments. +By default, the local ldv-spec-db is used and the ldvadmin on sdc-db.astron.nl +Script requires token to access the ldv-spec-db with REST API + +Some examples: +- Show latest version: + python ./ldvspec/lofardata/scripts/migrate_ldvadmin_to_ldvspec.py --version +- Import only 1000 records and show more verbosity: + python ./ldvspec/lofardata/scripts/migrate_ldvadmin_to_ldvspec.py --limit 1000 --verbose +- Import 50000 records and insert in steps of 10000 (so 5 steps) + python ./ldvspec/lofardata/scripts/migrate_ldvadmin_to_ldvspec.py --limit 50000 --max_nbr_dps_to_insert_per_request 10000 +- Import only 1000 records at production: + python ./ldvspec/lofardata/scripts/migrate_ldvadmin_to_ldvspec.py --limit 1000 --host prod + +""" import os import time import logging import argparse import sys -import psycopg2 import math from ldv_specification_interface import LDVSpecInterface - +import ldvspec.connection.retrieve_db_connection as connector logger = logging.getLogger(__file__) handler = logging.StreamHandler(sys.stdout) @@ -25,50 +39,11 @@ def change_logging_level(level): handler.setLevel(level) -def open_connection_to_postgres_database(): - """ - Create a database connection to a Postgres database - The 'connector' will be implemented differently, for now some hard coding with the assumption - that tunnel to ldvadmin is already created - :return connection to the database - """ - host = "localhost" - user = "ldvrbow" - password = "Wehn5CTYj1RcbstMSGls" - database = "ldvadmin" - port = 4321 - - connection = None - try: - logging.info("Connection to Database") - # establishing the connection - connection = psycopg2.connect( - database=database, - user=user, - password=password, - host=host, - port=port - ) - # create a cursor - cur = connection.cursor() - # display the PostgreSQL database server version - cur.execute('SELECT version()') - logging.info("PostgreSQL database version: {}".format(cur.fetchone())) - - except psycopg2.Error as exp: - logger.error("Could not connect to DataBase {}, results in {}".format(database, exp)) - connection.close() - logging.info("Database Connection Closed") - - return connection - - def execute_query(connection, sql_query, data=None): """ Execute query of the ldvadmin Database - There is not commit required because we are doing only read queries - The 'connector' will be implemented differently, for now some hard coding with the assumption - that tunnel to ldvadmin is already created + There is no commit required because we are doing only read queries + :param: connection: Database 'connection' :param: sql_query :param: data (optional) the data arguments of the sql_query :return result of the query @@ -79,7 +54,7 @@ def execute_query(connection, sql_query, data=None): connection.commit() return cursor.fetchall() except Exception as exp: - logger.error("Could not execute query! '{}' results in {}".format(sql_query, exp)) + logger.error("Could not execute query! '{}' results in -> {}".format(sql_query, exp)) def main(): @@ -96,13 +71,14 @@ def main(): parser.add_argument("-t", "--token", default="ad9b37a24380948601257f9c1f889b07a00ac81e", help="Token to access the REST API of ldvspec", action="store") parser.add_argument("--host", nargs="?", default='dev', - help="Presets are 'dev', 'test', 'prod'. Otherwise give a full url like https://atdb.astron.nl/atdb") + help="The ldv-spec-db host. Presets are 'dev', 'test', 'prod', otherwise give a full url like https://atdb.astron.nl/atdb") + parser.add_argument("-s", "--section", default='postgresql-ldv', + help="Add the configuration's section from the database.cfg.") # Have payload of more millions will most likely not work # tested with 10.000 results in 90 seconds so # 11 mil. will be at least 28 hours parser.add_argument("-r", "--max_nbr_dps_to_insert_per_request", default=1000, type=int, help="The number of dataproducts to insert per REST request (0 is no limit)", action="store") - args = parser.parse_args() if args.version: @@ -128,22 +104,23 @@ def main(): no_limit_to_insert = False logging.debug("Limit on number of dataproducts to insert REST is set to {}".format(args.max_nbr_dps_to_insert_per_request)) - with open_connection_to_postgres_database() as conn: - count_raw_dps = execute_query(conn, "select count(*) from astrowise.raw_dataproducts")[0][0] - count_pl_dps = execute_query(conn, "select count(*) from astrowise.pl_dataproducts")[0][0] - logging.info(f"There are {count_raw_dps} raw dataproducts and {count_pl_dps} pipeline dataproduct in the ldvadmin.astrowise table!!") - - result_query_all_dps = execute_query(conn, - "select obsid, obsid_source, dp_type, project, activity, uri, size, dysco " - "from astrowise.raw_dataproducts {}".format(limit_str)) - # Are there still dataproducts left to query? - nbr_dps_left = len(result_query_all_dps) - args.limit - if nbr_dps_left > 0 and args.limit > 0: - logging.debug("Limit on number of leftover dataproducts to query is set to {}".format(nbr_dps_left)) - limit_str = "limit {}".format(nbr_dps_left) - result_query_all_dps.extend(execute_query(conn, - "select obsid, obsid_source, dp_type, project, activity, uri, size, dysco " - "from astrowise.pl_dataproducts {}".format(limit_str))) + # Create connection using ssh tunnel with the ldvadmin database + conn, tunnel = connector.connect_postgresql(args.section) + count_raw_dps = execute_query(conn, "select count(*) from astrowise.raw_dataproducts")[0][0] + count_pl_dps = execute_query(conn, "select count(*) from astrowise.pl_dataproducts")[0][0] + logging.info(f"There are {count_raw_dps} raw dataproducts and {count_pl_dps} pipeline dataproduct in the ldvadmin.astrowise table!!") + + result_query_all_dps = execute_query(conn, + "select obsid, obsid_source, dp_type, project, activity, uri, size, dysco " + "from astrowise.raw_dataproducts {}".format(limit_str)) + # Are there still dataproducts left to query? + nbr_dps_left = len(result_query_all_dps) - args.limit + if nbr_dps_left > 0 and args.limit > 0: + logging.debug("Limit on number of leftover dataproducts to query is set to {}".format(nbr_dps_left)) + limit_str = "limit {}".format(nbr_dps_left) + result_query_all_dps.extend(execute_query(conn, + "select obsid, obsid_source, dp_type, project, activity, uri, size, dysco " + "from astrowise.pl_dataproducts {}".format(limit_str))) logging.info("{} dataproduct retrieved from ldvadmin".format(len(result_query_all_dps))) # Create connection with ldv-spec-db using REST API, use temp. token created in my test-env @@ -180,4 +157,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main()