diff --git a/LTA/ltastorageoverview/lib/create_db_ltastorageoverview.sql b/LTA/ltastorageoverview/lib/create_db_ltastorageoverview.sql index 2c5c5e5f9f0de617b49b0e9b0f7e4acc5d6ab21d..9615750ea8773f43d2598dd2c3c329705dcb8014 100644 --- a/LTA/ltastorageoverview/lib/create_db_ltastorageoverview.sql +++ b/LTA/ltastorageoverview/lib/create_db_ltastorageoverview.sql @@ -297,6 +297,26 @@ CREATE TRIGGER trigger_on_directory_inserted_parse_project_info -------------------------------------------------------------------------------- +CREATE OR REPLACE FUNCTION lta.on_site_deleted_delete_site_directories() + RETURNS trigger AS +$BODY$ +BEGIN + DELETE FROM lta.directory WHERE id in (SELECT dir_id FROM lta.site_directory_tree WHERE site_id = OLD.ID); + + RETURN OLD; +END; +$BODY$ + LANGUAGE plpgsql VOLATILE + COST 100; + +CREATE TRIGGER trigger_on_site_deleted_delete_site_directories + BEFORE DELETE + ON lta.storage_site + FOR EACH ROW + EXECUTE PROCEDURE lta.on_site_deleted_delete_site_directories(); + +-------------------------------------------------------------------------------- + -- END TRIGGERS diff --git a/LTA/ltastorageoverview/lib/store.py b/LTA/ltastorageoverview/lib/store.py index 7c53ef89fdc43dca2d87df6d5e54a9e4283d4c59..a566a466212eef580a64f517d79fb4dcbf65b7ca 100644 --- a/LTA/ltastorageoverview/lib/store.py +++ b/LTA/ltastorageoverview/lib/store.py @@ -22,323 +22,271 @@ # TODO: add comment to methods # TODO: reuse connection in methods (take care of exceptions closing the connection) # TODO: use generators and yield for faster and more memory efficient processing of results. -# TODO: use other database? MariaDB? instead of sqlite? import os import os.path -import sqlite3 import datetime +import logging +import psycopg2 +import psycopg2.extras +from lofar.common import dbcredentials +from lofar.common.postgres import PostgresDatabaseConnection +from lofar.common.postgres import FETCH_NONE,FETCH_ONE,FETCH_ALL + +logger = logging.getLogger(__name__) class EntryNotFoundException(Exception): pass -class LTAStorageDb: - def __init__(self, db_filename, removeIfExisting = False): - self.db_filename = db_filename - - if os.path.exists(self.db_filename) and removeIfExisting: - os.remove(self.db_filename) - - if not os.path.exists(self.db_filename): - with sqlite3.connect(self.db_filename) as conn: - create_script_path = os.path.join(os.path.dirname(__file__), 'create_db_ltastorageoverview.sql') - - with open(create_script_path) as script: - conn.executescript(script.read()) - - # save created tables and triggers - conn.commit() +class LTAStorageDb(PostgresDatabaseConnection): + def __init__(self, dbcreds=None, log_queries=False): + super(LTAStorageDb, self).__init__(dbcreds=dbcreds, log_queries=log_queries) def insertSite(self, siteName, srmurl): - with sqlite3.connect(self.db_filename) as conn: - cursor = conn.cursor() - - site_row = cursor.execute('select id from storage_site where url = ?', [srmurl]).fetchone() - site_id = site_row[0] if site_row else cursor.execute('insert into storage_site (name, url) values (?, ?)', (siteName, srmurl)).lastrowid + site = self.siteByName(siteName) - conn.commit() + if site: + return site['id'] - return site_id + site_id = self.executeQuery('insert into lta.storage_site (name, url) values (%s, %s) returning id;', (siteName, srmurl), fetch=FETCH_ONE)['id'] + self.commit() + return site_id def insertRootDirectory(self, siteName, rootDirectory): - with sqlite3.connect(self.db_filename) as conn: - cursor = conn.cursor() - - site_row = cursor.execute('select id from storage_site where name = ?', [siteName]).fetchone() - - if not site_row: - raise EntryNotFoundException() - - site_id = site_row[0] - - dir_id = cursor.execute('insert into directory (name) values (?)', [rootDirectory]).lastrowid - - cursor.execute('insert into storage_site_root (storage_site_id, directory_id) values (?, ?)', (site_id, dir_id)).lastrowid - - conn.commit() + site = self.siteByName(siteName) - return dir_id + if not site: + raise EntryNotFoundException() - def insertRootLocation(self, siteName, srmurl, rootDirectory): - with sqlite3.connect(self.db_filename) as conn: - cursor = conn.cursor() + site_id = site['id'] - site_row = cursor.execute('select id from storage_site where url = ?', [srmurl]).fetchone() - site_id = site_row[0] if site_row else cursor.execute('insert into storage_site (name, url) values (?, ?)', (siteName, srmurl)).lastrowid + dir_id = self.executeQuery('insert into lta.directory (name) values (%s) returning id;', [rootDirectory], fetch=FETCH_ONE)['id'] - dir_id = cursor.execute('insert into directory (name) values (?)', [rootDirectory]).lastrowid + self.executeQuery('insert into lta.storage_site_root_dir (storage_site_id, directory_id) values (%s, %s);', (site_id, dir_id)) + self.commit() + return dir_id - cursor.execute('insert into storage_site_root (storage_site_id, directory_id) values (?, ?)', (site_id, dir_id)).lastrowid + def insertSubDirectory(self, parent_directory_id, sub_directory_path): + dir_id = self.executeQuery('insert into lta.directory (name, parent_directory_id) values (%s, %s) returning id;', (sub_directory_path, parent_directory_id), fetch=FETCH_ONE)['id'] + self.commit() + return dir_id - conn.commit() + def insertSubDirectories(self, subDirectoryPaths, parentDirId, directoryLastVisitTime = None): + insert_values = ','.join(self._cursor.mogrify('(%s, %s)', (name, parentDirId)) for name in subDirectoryPaths) - return dir_id + query = '''insert into lta.directory (name, parent_directory_id) + VALUES {values} + RETURNING id;'''.format(values=insert_values) - def insertSubDirectory(self, parent_directory_id, sub_directory): - with sqlite3.connect(self.db_filename) as conn: - cursor = conn.cursor() + subDirIds = [x['id'] for x in self.executeQuery(query, fetch=FETCH_ALL)] - dir_id = cursor.execute('insert into directory (name, parent_directory_id) values (?, ?)', (sub_directory, parent_directory_id)).lastrowid + if [x for x in subDirIds if x < 0]: + logger.error("One or more subDirectoryPaths could not be inserted. Rolling back.") + self.rollback() + return None - conn.commit() + if directoryLastVisitTime: + insert_values = ','.join(self._cursor.mogrify('(%s, %s)', (directoryLastVisitTime, id)) for id in subDirIds) - return dir_id + query = '''insert into scraper.last_directory_visit (visit_date, directory_id) + VALUES {values} + RETURNING id;'''.format(values=insert_values) - def insertSubDirectories(self, subDirectoryNames, parentDirId, directoryLastVisitTime = None): - with sqlite3.connect(self.db_filename) as conn: - cursor = conn.cursor() + ldvIds = [x['id'] for x in self.executeQuery(query, fetch=FETCH_ALL)] - cursor.executemany('insert into directory (name, parent_directory_id) values (?, ?)', - [(name, parentDirId) for name in subDirectoryNames]) + if [x for x in ldvIds if x < 0]: + logger.error("One or more scraper.last_directory_visit's could not be inserted. Rolling back.") + self.rollback() + return None - if directoryLastVisitTime: - subDirIds = cursor.execute('''select id from directory - where parent_directory_id = %s - and name in (%s)''' % (parentDirId, ', '.join(["'%s'" % x for x in subDirectoryNames]))).fetchall() - - subDirIds = [x[0] for x in subDirIds] - - for subDirId in subDirIds: - cursor.execute('''insert into scraper_last_directory_visit (visit_date, directory_id) - values (?, ?)''', (directoryLastVisitTime, subDirId)) - - conn.commit() + self.commit() + return subDirIds def insertFileInfo(self, name, size, creation_date, parent_directory_id): - with sqlite3.connect(self.db_filename) as conn: - cursor = conn.cursor() + fileinfo_id = self.executeQuery('insert into lta.fileinfo (name, size, creation_date, directory_id) values (%s, %s, %s, %s) returning id;', + (name.split('/')[-1], size, creation_date, parent_directory_id)) - fileinfo_id = cursor.execute('insert into fileinfo (name, size, creation_date, directory_id) values (?, ?, ?, ?)', - (name.split('/')[-1], size, creation_date, parent_directory_id)) - - conn.commit() - - return fileinfo_id + self.commit() + return fileinfo_id def insertFileInfos(self, file_infos): - with sqlite3.connect(self.db_filename) as conn: - conn.executemany('insert into fileinfo (name, size, creation_date, directory_id) values (?, ?, ?, ?)', - [(f[0].split('/')[-1], f[1], f[2], f[3]) for f in file_infos]) - - conn.commit() + insert_values = ','.join(self._cursor.mogrify('(%s, %s, %s, %s)', [(f[0].split('/')[-1], f[1], f[2], f[3]) for f in file_infos])) - def insertLocationResult(self, result): - with sqlite3.connect(self.db_filename) as conn: - cursor = conn.cursor() + query = '''insert into lta.fileinfo (name, size, creation_date, directory_id) + VALUES {values} + RETURNING id;'''.format(values=insert_values) - dir_row = cursor.execute('''select directory.id from storage_site - join storage_site_root on storage_site_root.storage_site_id = storage_site.id - join directory on directory.id = storage_site_root.directory_id - where storage_site.url = ? - and directory.name = ? - ''', (result.location.srmurl, result.location.directory)).fetchone() + ids = [x['id'] for x in self.executeQuery(query, fetch=FETCH_ALL)] - if dir_row: - dir_id = dir_row[0] - cursor.executemany('insert into directory (name, parent_directory_id) values (?, ?)', - [(subDir.directory, dir_id) for subDir in result.subDirectories]) + if [x for x in ids if x < 0]: + logger.error("One or more file_infos could not be inserted. Rolling back.") + self.rollback() + return None - cursor.executemany('insert into fileinfo (name, size, creation_date, directory_id) values (?, ?, ?, ?)', - [(file.filename.split('/')[-1], file.size, datetime.datetime.utcnow(), dir_id) for file in result.files]) - - conn.commit() + self.commit() + return ids def updateDirectoryLastVisitTime(self, directory_id, timestamp): - with sqlite3.connect(self.db_filename) as conn: - cursor = conn.cursor() - - updated = cursor.execute('''update or ignore scraper_last_directory_visit - set visit_date=? - where directory_id = ?''', (timestamp, directory_id)).rowcount + updated = bool(self.executeQuery('''update scraper.last_directory_visit + set visit_date=%s + where directory_id = %s + returning id;''', (timestamp, directory_id), fetch=FETCH_ONE)) - if not updated: - cursor.execute('''insert into scraper_last_directory_visit - (visit_date, directory_id) - values (?, ?)''', (timestamp, directory_id)) + if not updated: + self.executeQuery('''insert into scraper.last_directory_visit + (visit_date, directory_id) + values (%s, %s)''', (timestamp, directory_id)) - conn.commit() + self.commit() def sites(self): '''returns list of tuples (id, name, url) of all sites''' - with sqlite3.connect(self.db_filename) as conn: - return conn.execute('''SELECT id, name, url FROM storage_site''').fetchall() + return self.executeQuery('SELECT id, name, url FROM lta.storage_site;', fetch=FETCH_ALL) def site(self, site_id): '''returns tuple (id, name, url) for site with id=site_id''' - with sqlite3.connect(self.db_filename) as conn: - return conn.execute('''SELECT id, name, url FROM storage_site where id = ?''', [site_id]).fetchone() + return self.executeQuery('SELECT id, name, url FROM lta.storage_site where id = %s;', [site_id], FETCH_ONE) + + def siteByName(self, site_name): + '''returns tuple (id, name, url) for site with id=site_id''' + return self.executeQuery('SELECT id, name, url FROM lta.storage_site where name = %s;', [site_name], FETCH_ONE) def directory(self, directory_id): - '''returns directory tuple (id, name, site_id, site_name) for the given directory_id''' - with sqlite3.connect(self.db_filename) as conn: - return conn.execute('''SELECT dir.id, dir.name, site.id, site.name - FROM storage_site_root - join storage_site site on site.id = storage_site_root.storage_site_id - join directory_closure dc on dc.ancestor_id = storage_site_root.directory_id - join directory dir on dir.id = dc.descendant_id - where dc.descendant_id = ? - ''', [directory_id]).fetchone() + '''returns lta.directory (id, name, site_id, site_name) for the given directory_id''' + return self.executeQuery('''SELECT dir.id as dir_id, dir.name as dir_name, site.id as site_id, site.name as site_name + FROM lta.storage_site_root_dir + join storage_site site on site.id = storage_site_root_dir.storage_site_id + join lta.directory_closure dc on dc.ancestor_id = storage_site_root_dir.directory_id + join lta.directory dir on dir.id = dc.descendant_id + where dc.descendant_id = %s; + ''', [directory_id], fetch=FETCH_ONE) def directory_id(self, site_id, directory_name): - '''returns directory id for the given site_id, directory_name''' - with sqlite3.connect(self.db_filename) as conn: - result = conn.execute('''SELECT dir.id - FROM storage_site_root - join directory_closure dc on dc.ancestor_id = storage_site_root.directory_id - join directory dir on dir.id = dc.descendant_id - where storage_site_root.storage_site_id = ? - and dir.name = ? - ''', [site_id, directory_name]).fetchone() + '''returns lta.directory id for the given site_id, directory_name''' + result = self.executeQuery('''SELECT dir.id + FROM lta.storage_site_root_dir + join lta.directory_closure dc on dc.ancestor_id = storage_site_root_dir.directory_id + join lta.directory dir on dir.id = dc.descendant_id + where storage_site_root_dir.storage_site_id = %s + and dir.name = %s;''', [site_id, directory_name], fetch=FETCH_ONE) - if result: - return result[0] + if result['id']: + return result['id'] - return -1 + return -1 def rootDirectories(self): - '''returns list of all root directories (id, name, site_id, site_name) for all sites''' - with sqlite3.connect(self.db_filename) as conn: - return conn.execute(''' - SELECT * - FROM root_directories - ''').fetchall() + '''returns list of all root directories for all sites''' + return self.executeQuery('''SELECT * FROM lta.root_directories;''', fetch=FETCH_ALL) def rootDirectoriesForSite(self, site_id): '''returns list of all root directories (id, name) for given site_id''' - with sqlite3.connect(self.db_filename) as conn: - return conn.execute('''SELECT dir_id, dir_name - FROM root_directories - where site_id = ?''', [site_id]).fetchall() + return self.executeQuery('''SELECT * FROM lta.root_directories where site_id = %s;''', [site_id], fetch=FETCH_ALL) def subDirectories(self, directory_id, depth = 1, includeSelf=False): - '''returns list of all sub directories up to the given depth (id, name, site_id, site_name, depth) for the given directory_id''' - with sqlite3.connect(self.db_filename) as conn: - return conn.execute(''' - SELECT dir.id, dir.name, dir.parent_directory_id, directory_closure.depth FROM directory_closure - join directory dir on dir.id = directory_closure.descendant_id - where ancestor_id = ? and depth <= ? and depth > ? - order by depth asc - ''', (directory_id, depth, -1 if includeSelf else 0)).fetchall() + '''returns list of all sub directories up to the given depth (id, name, parent_dir_id, depth) for the given directory_id''' + return self.executeQuery(''' + SELECT dir.id as id, dir.name as name, dir.parent_directory_id as parent_dir_id, lta.directory_closure.depth as depth + FROM lta.directory_closure + join lta.directory dir on dir.id = lta.directory_closure.descendant_id + where ancestor_id = %s and depth <= %s and depth > %s + order by depth asc; + ''', (directory_id, depth, -1 if includeSelf else 0), fetch=FETCH_ALL) def parentDirectories(self, directory_id): - with sqlite3.connect(self.db_filename) as conn: - return conn.execute(''' - SELECT dir.* FROM directory_closure dc - join directory dir on dir.id = dc.ancestor_id - where dc.descendant_id = ? and depth > 0 - order by depth desc - ''', [directory_id]).fetchall() + return self.executeQuery(''' + SELECT dir.* FROM lta.directory_closure dc + join lta.directory dir on dir.id = dc.ancestor_id + where dc.descendant_id = %s and depth > 0 + order by depth desc; + ''', [directory_id], fetch=FETCH_ALL) def _date_bounded(self, query, args, table_column, from_date=None, to_date=None): result_query = query result_args = args if from_date: - result_query += ' and %s >= ?' % table_column + result_query += ' and %s >= %s' % table_column result_args += (from_date,) if to_date: - result_query += ' and %s <= ?' % table_column + result_query += ' and %s <= %s' % table_column result_args += (to_date,) return result_query, result_args def filesInDirectory(self, directory_id, from_date=None, to_date=None): - with sqlite3.connect(self.db_filename) as conn: - query = '''SELECT * FROM fileinfo - where directory_id = ?''' + query = '''SELECT * FROM lta.fileinfo + where directory_id = %s''' - args = (directory_id,) + args = (directory_id,) - query, args = self._date_bounded(query, args, 'fileinfo.creation_date', from_date, to_date) + query, args = self._date_bounded(query, args, 'fileinfo.creation_date', from_date, to_date) - return conn.execute(query, args).fetchall() + return self.executeQuery(query, args, fetch=FETCH_ALL) def numFilesInDirectory(self, directory_id, from_date=None, to_date=None): - with sqlite3.connect(self.db_filename) as conn: - query = '''SELECT count(id) FROM fileinfo - where directory_id = ?''' + query = '''SELECT count(id) FROM lta.fileinfo + where directory_id = %s''' - args = (directory_id,) + args = (directory_id,) - query, args = self._date_bounded(query, args, 'fileinfo.creation_date', from_date, to_date) + query, args = self._date_bounded(query, args, 'fileinfo.creation_date', from_date, to_date) - result = conn.execute(query, args).fetchone() + result = self.executeQuery(query, args, fetch=FETCH_ONE) - if result: - return result[0] + if result['count']: + return result['count'] - return 0 + return 0 def filesInTree(self, base_directory_id, from_date=None, to_date=None): - with sqlite3.connect(self.db_filename) as conn: - query = '''SELECT dir.id, dir.name, dc.depth, fileinfo.id, fileinfo.name, fileinfo.size, fileinfo.creation_date FROM directory_closure dc - join directory dir on dir.id = dc.descendant_id - join fileinfo on fileinfo.directory_id = dc.descendant_id - where dc.ancestor_id = ?''' + query = '''SELECT dir.id as dir_id, dir.name as dir_name, dc.depth as dir_depth, fi.id as file_id, fi.name as file_name, fi.size as file_size, fi.creation_date as file_creation_date + FROM lta.directory_closure dc + JOIN lta.directory dir on dir.id = dc.descendant_id + JOIN lta.fileinfo fi on fi.directory_id = dc.descendant_id + WHERE dc.ancestor_id = %s''' - args = (base_directory_id,) + args = (base_directory_id,) - query, args = self._date_bounded(query, args, 'fileinfo.creation_date', from_date, to_date) + query, args = self._date_bounded(query, args, 'fileinfo.creation_date', from_date, to_date) - return conn.execute(query, args).fetchall() + return self.executeQuery(query, args, fetch=FETCH_ALL) def numFilesInTree(self, base_directory_id, from_date=None, to_date=None): - with sqlite3.connect(self.db_filename) as conn: - query = ''' - SELECT sum(directory_stats.num_files) FROM directory_stats - join directory_closure dc on dc.descendant_id = directory_stats.directory_id - where ancestor_id = ? - ''' + query = ''' + SELECT sum(ds.num_files) FROM metainfo.directory_stats ds + join lta.directory_closure dc on dc.descendant_id = ds.directory_id + where ancestor_id = %s + ''' - args = (base_directory_id,) + args = (base_directory_id,) - query, args = self._date_bounded(query, args, 'directory_stats.min_file_creation_date', from_date=from_date) - query, args = self._date_bounded(query, args, 'directory_stats.max_file_creation_date', to_date=to_date) + query, args = self._date_bounded(query, args, 'ds.min_file_creation_date', from_date=from_date) + query, args = self._date_bounded(query, args, 'ds.max_file_creation_date', to_date=to_date) - result = conn.execute(query, args).fetchone() + result = self.executeQuery(query, args, fetch=FETCH_ONE) - if result[0]: - return result[0] + if result['sum']: + return result['sum'] - return 0 + return 0 def totalFileSizeInTree(self, base_directory_id, from_date=None, to_date=None): - with sqlite3.connect(self.db_filename) as conn: - query = ''' - SELECT sum(directory_stats.total_file_size) FROM directory_stats - join directory_closure dc on dc.descendant_id = directory_stats.directory_id - where ancestor_id = ? - ''' - args = (base_directory_id,) + query = ''' + SELECT sum(ds.total_file_size) FROM metainfo.directory_stats ds + join lta.directory_closure dc on dc.descendant_id = ds.directory_id + where ancestor_id = %s + ''' + args = (base_directory_id,) - query, args = self._date_bounded(query, args, 'directory_stats.min_file_creation_date', from_date=from_date) - query, args = self._date_bounded(query, args, 'directory_stats.max_file_creation_date', to_date=to_date) + query, args = self._date_bounded(query, args, 'ds.min_file_creation_date', from_date=from_date) + query, args = self._date_bounded(query, args, 'ds.max_file_creation_date', to_date=to_date) - result = conn.execute(query, args).fetchone() + result = self.executeQuery(query, args, fetch=FETCH_ONE) - if result[0]: - return result[0] - return 0 + if result['sum']: + return result['sum'] + return 0 def numFilesInSite(self, site_id, from_date=None, to_date=None): num_files = 0L @@ -346,7 +294,7 @@ class LTAStorageDb: root_dirs = self.rootDirectoriesForSite(site_id) for root_dir in root_dirs: - num_files += long(self.numFilesInTree(root_dir[0], from_date, to_date)) + num_files += long(self.numFilesInTree(root_dir['dir_id'], from_date, to_date)) return num_files @@ -356,59 +304,53 @@ class LTAStorageDb: root_dirs = self.rootDirectoriesForSite(site_id) for root_dir in root_dirs: - total_size += long(self.totalFileSizeInTree(root_dir[0], from_date, to_date)) + total_size += long(self.totalFileSizeInTree(root_dir['dir_id'], from_date, to_date)) return total_size def datetimeRangeOfFilesInTree(self, base_directory_id = None): - with sqlite3.connect(self.db_filename) as conn: - query = ''' - SELECT min(fileinfo.creation_date) as min_creation_date, - max(fileinfo.creation_date) as max_creation_date - FROM fileinfo - ''' - args = [] + query = ''' + SELECT min(fileinfo.creation_date) as min_creation_date, + max(fileinfo.creation_date) as max_creation_date + FROM lta.fileinfo + ''' + args = [] - if base_directory_id: - query += '''\njoin directory_closure dc on dc.descendant_id = fileinfo.directory_id - where ancestor_id = ?''' - args.append(base_directory_id) + if base_directory_id: + query += '''\njoin lta.directory_closure dc on dc.descendant_id = lta.fileinfo.directory_id + where ancestor_id = %s''' + args.append(base_directory_id) - result = conn.execute(query, args).fetchone() + result = self.executeQuery(query, args, fetch=FETCH_ONE) - if result[0]: - format = '%Y-%m-%d %H:%M:%S %Z' - return (datetime.datetime.strptime(result[0]+' UTC', format), - datetime.datetime.strptime(result[1]+' UTC', format)) + if result: + return (result['min_creation_date'], result['max_creation_date']) - utcnow = datetime.datetime.utcnow() - return (utcnow, utcnow) + utcnow = datetime.datetime.utcnow() + return (utcnow, utcnow) def mostRecentVisitDate(self): - with sqlite3.connect(self.db_filename) as conn: - result = conn.execute(''' - SELECT visit_date FROM scraper_last_directory_visit - order by visit_date desc - limit 1 - ''').fetchone() + result = self.executeQuery(''' + SELECT visit_date FROM scraper.last_directory_visit + order by visit_date desc + limit 1 + ''', fetch=FETCH_ONE) - if result: - format = '%Y-%m-%d %H:%M:%S.%f %Z' - return datetime.datetime.strptime(result[0]+' UTC', format) + if result: + return result['visit_date'] - return datetime.datetime(2011, 1, 1) + return datetime.datetime(2011, 1, 1) def numDirectoriesNotVisitedSince(self, timestamp): - with sqlite3.connect(self.db_filename) as conn: - result = conn.execute(''' - SELECT count(directory_id) FROM scraper_last_directory_visit - WHERE visit_date < ? - ''', [timestamp]).fetchone() + result = self.executeQuery(''' + SELECT count(directory_id) FROM scraper.last_directory_visit + WHERE visit_date < %s + ''', [timestamp], fetch=FETCH_ONE) - if result: - return result[0] + if result: + return result['count'] - return 0 + return 0 def visitStats(self, before_timestamp = None): if not before_timestamp: @@ -417,24 +359,33 @@ class LTAStorageDb: sites = self.sites() siteStats = {} - with sqlite3.connect(self.db_filename) as conn: - - for site in sites: - site_id = site[0] - site_name = site[1] - siteStats[site_name] = {'id': site_id} + for site in sites: + site_id = site['id'] + site_name = site['name'] + siteStats[site_name] = {'site_id': site_id} - visits = conn.execute(''' - select * - from site_scraper_last_directoy_visit - where site_id = ? - and last_visit < ? - order by last_visit asc - ''', [site_id, before_timestamp]).fetchall() + visits = self.executeQuery(''' + select * + from scraper.site_scraper_last_directoy_visit + where site_id = %s + and last_visit < %s + order by last_visit asc + ''', [site_id, before_timestamp], fetch=FETCH_ALL) - siteStats[site_name]['queue_length'] = len(visits) - if len(visits) > 0: - siteStats[site_name]['least_recent_visited_dir_id'] = visits[0][2] - siteStats[site_name]['least_recent_visit'] = visits[0][4] + siteStats[site_name]['queue_length'] = len(visits) + if len(visits) > 0: + siteStats[site_name]['least_recent_visited_dir_id'] = visits[0]['dir_id'] + siteStats[site_name]['least_recent_visit'] = visits[0]['last_visit'] return siteStats + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', + level=logging.INFO) + import pprint + dbcreds = dbcredentials.DBCredentials().get('LTASO') + db = LTAStorageDb(dbcreds, True) + print db.rootDirectoriesForSite(1) + print db.directory_id(1, 'rootDir_0') + pprint.pprint(db.parentDirectories(7)) + pprint.pprint(db.subDirectories(1)) diff --git a/LTA/ltastorageoverview/test/test_store.py b/LTA/ltastorageoverview/test/test_store.py index 30b08fa0325956534ad5ca7da396c61c888f55ad..2816cfeb01d2662a7992e60c410f55072f9dba4e 100755 --- a/LTA/ltastorageoverview/test/test_store.py +++ b/LTA/ltastorageoverview/test/test_store.py @@ -20,40 +20,45 @@ # $Id$ import unittest +import logging import datetime import time import os import os.path -import tempfile +from lofar.common import dbcredentials from ltastorageoverview import store +logger = logging.getLogger(__name__) class TestLTAStorageDb(unittest.TestCase): def setUp(self): - tmpfile = os.path.join(tempfile.gettempdir(), 'test.sqlite') - self.db = store.LTAStorageDb(tmpfile, True) - - self.assertTrue(os.path.exists(self.db.db_filename)) + dbcreds = dbcredentials.DBCredentials().get('LTASO') + self.db = store.LTAStorageDb(dbcreds, True) + self.db.executeQuery('delete from lta.fileinfo;') + self.db.executeQuery('delete from lta.directory;') + self.db.executeQuery('delete from lta.directory_closure;') + self.db.executeQuery('delete from lta.storage_site;') + self.db.commit() #def tearDown(self): #if os.path.exists(self.db.db_filename): #os.remove(self.db.db_filename) def testSites(self): - self.db.insertSite('siteA', 'srm://siteA.org') - self.db.insertSite('siteB', 'srm://siteB.org') + siteA_id = self.db.insertSite('siteA', 'srm://siteA.org') + siteB_id = self.db.insertSite('siteB', 'srm://siteB.org') sites = self.db.sites() - siteNames = [x[1] for x in sites] + siteNames = [x['name'] for x in sites] self.assertEquals(2, len(siteNames)) self.assertTrue('siteA' in siteNames) self.assertTrue('siteB' in siteNames) - site = self.db.site(1) - self.assertTrue('siteA' in site[1]) + site = self.db.site(siteA_id) + self.assertEqual('siteA', site['name']) - site = self.db.site(2) - self.assertTrue('siteB' in site[1]) + site = self.db.site(siteB_id) + self.assertEqual('siteB', site['name']) def testRootDirs(self): siteA_id = self.db.insertSite('siteA', 'srm://siteA.org') @@ -68,11 +73,28 @@ class TestLTAStorageDb(unittest.TestCase): rootDirs = self.db.rootDirectories() self.assertEquals(5, len(rootDirs)) - self.assertTrue((dirA1_id, 'rootDir1', siteA_id, 'siteA') in rootDirs) - self.assertTrue((dirA2_id, 'rootDir2', siteA_id, 'siteA') in rootDirs) - self.assertTrue((dirA3_id, 'path/to/rootDir3', siteA_id, 'siteA') in rootDirs) - self.assertTrue((dirB1_id, 'rootDir1', siteB_id, 'siteB') in rootDirs) - self.assertTrue((dirB2_id, 'path/to/otherRootDir', siteB_id, 'siteB') in rootDirs) + + rootDirsDict = {rd['dir_id']:rd for rd in rootDirs} + + self.assertEqual('rootDir1', rootDirsDict[dirA1_id]['dir_name']) + self.assertEqual(siteA_id, rootDirsDict[dirA1_id]['site_id']) + self.assertEqual('siteA', rootDirsDict[dirA1_id]['site_name']) + + self.assertEqual('rootDir2', rootDirsDict[dirA2_id]['dir_name']) + self.assertEqual(siteA_id, rootDirsDict[dirA2_id]['site_id']) + self.assertEqual('siteA', rootDirsDict[dirA2_id]['site_name']) + + self.assertEqual('path/to/rootDir3', rootDirsDict[dirA3_id]['dir_name']) + self.assertEqual(siteA_id, rootDirsDict[dirA3_id]['site_id']) + self.assertEqual('siteA', rootDirsDict[dirA3_id]['site_name']) + + self.assertEqual('rootDir1', rootDirsDict[dirB1_id]['dir_name']) + self.assertEqual(siteB_id, rootDirsDict[dirB1_id]['site_id']) + self.assertEqual('siteB', rootDirsDict[dirB1_id]['site_name']) + + self.assertEqual('path/to/otherRootDir', rootDirsDict[dirB2_id]['dir_name']) + self.assertEqual(siteB_id, rootDirsDict[dirB2_id]['site_id']) + self.assertEqual('siteB', rootDirsDict[dirB2_id]['site_name']) def testDirectoryTrees(self): siteA_id = self.db.insertSite('siteA', 'srm://siteA.org') @@ -92,10 +114,10 @@ class TestLTAStorageDb(unittest.TestCase): rootDirs = self.db.rootDirectories() self.assertEquals(2, len(rootDirs)) - for (id, name, site_id, site_name) in rootDirs: + for id in [x['dir_id'] for x in rootDirs]: subDirs = self.db.subDirectories(id, 1, False) for subDir in subDirs: - subDir_parent_id = subDir[2] + subDir_parent_id = subDir['parent_dir_id'] self.assertEquals(id, subDir_parent_id) print '\n'.join([str(x) for x in self.db.filesInTree(rootDir_id)]) @@ -128,4 +150,7 @@ class TestLTAStorageDb(unittest.TestCase): # run tests if main if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', + level=logging.INFO) + unittest.main() diff --git a/LTA/ltastorageoverview/test/test_store.run b/LTA/ltastorageoverview/test/test_store.run index b2e574673fcffe8730b24a19d2b3ccd8ba1a5be7..952bff78ec3045c9679e29a7b3b29d5ecefacc4c 100755 --- a/LTA/ltastorageoverview/test/test_store.run +++ b/LTA/ltastorageoverview/test/test_store.run @@ -1,3 +1,4 @@ #!/bin/bash -python test_store.py +source python-coverage.sh +python_coverage_test "ltas*" test_store.py