From f97c186fa84107d838cbd84af7cb65734ae52522 Mon Sep 17 00:00:00 2001
From: Jorrit Schaap <schaap@astron.nl>
Date: Wed, 18 Jul 2018 13:38:41 +0000
Subject: [PATCH] SW-402: merged ltastorageoverview from trunk into
 LOFAR-Release-3_1 for deployment to production as per request by SOS/SDOS

---
 .gitattributes                                |  23 +-
 CMake/LofarPackageList.cmake                  |   2 +-
 LCS/PyCommon/datetimeutils.py                 |  14 +-
 LCS/PyCommon/postgres.py                      | 178 +++-
 .../LTAIngestClient/lib/ingestbuslistener.py  |   3 +
 LTA/LTAIngest/LTAIngestCommon/CMakeLists.txt  |   1 +
 LTA/LTAIngest/LTAIngestCommon/config.py       |   2 +-
 LTA/LTAIngest/LTAIngestCommon/srm.py          | 264 +++++
 .../LTAIngestCommon/test/CMakeLists.txt       |   1 +
 LTA/LTAIngest/LTAIngestCommon/test/t_srm.py   |  61 ++
 LTA/LTAIngest/LTAIngestCommon/test/t_srm.run  |   6 +
 LTA/LTAIngest/LTAIngestCommon/test/t_srm.sh   |   3 +
 .../lib/ingestjobmanagementserver.py          |   1 -
 .../test/t_ingestjobmanagementserver.py       |  30 +-
 .../LTAIngestTransferServer/lib/ltacp.py      | 135 +--
 .../LTAIngestTransferServer/test/ltastubs.py  |   8 +-
 .../test/t_ingestpipeline.py                  | 481 ++++-----
 .../LTAIngestTransferServer/test/t_ltacp.py   | 229 +++--
 .../LTAIngestWebServer/lib/ingestwebserver.py |   2 +-
 LTA/ltastorageoverview/CMakeLists.txt         |   3 +-
 LTA/ltastorageoverview/bin/CMakeLists.txt     |  12 +-
 .../bin/ltastorageoverviewreport              |  25 +
 .../bin/ltastorageoverviewscraper             |   4 +-
 .../bin/ltastorageoverviewscraper.ini         |   8 +
 .../bin/ltastorageoverviewwebservice          |  26 +
 .../bin/ltastorageoverviewwebservice.ini      |   8 +
 .../doc/lta_storage_overview.md               | 149 ++-
 LTA/ltastorageoverview/lib/CMakeLists.txt     |   9 +-
 .../lib/create_db_ltastorageoverview.sql      | 204 ----
 .../lib/ingesteventhandler.py                 | 110 ++
 .../ltaso/create_db_ltastorageoverview.sql    | 952 ++++++++++++++++++
 LTA/ltastorageoverview/lib/report.py          |  99 +-
 LTA/ltastorageoverview/lib/scraper.py         | 331 ++++--
 LTA/ltastorageoverview/lib/store.py           | 776 ++++++++------
 .../lib/webservice/templates/index.html       | 136 ++-
 .../lib/webservice/webservice.py              | 177 +++-
 .../ltastorageoverview_build.sh               |  37 -
 LTA/ltastorageoverview/test/CMakeLists.txt    |   8 +-
 .../test/common_test_ltastoragedb.py          |  73 ++
 .../test/db_performance_test.py               | 108 ++
 .../test/integration_test_store.py            | 204 ++++
 .../test/integration_test_store.run           |   4 +
 .../test/integration_test_store.sh            |   3 +
 .../test/test_ingesteventhandler.py           | 303 ++++++
 .../test/test_ingesteventhandler.run          |   4 +
 .../test/test_ingesteventhandler.sh           |   3 +
 .../test/test_lso_webservice.py               |  90 +-
 LTA/ltastorageoverview/test/test_scraper.py   |  55 +
 LTA/ltastorageoverview/test/test_scraper.run  |   4 +
 LTA/ltastorageoverview/test/test_scraper.sh   |   3 +
 LTA/ltastorageoverview/test/test_store.py     | 282 ++++--
 LTA/ltastorageoverview/test/test_store.run    |   3 +-
 LTA/sip/lib/CMakeLists.txt                    |   1 -
 LTA/sip/lib/query.py                          |   9 +-
 LTA/sip/lib/validator.py                      |   6 +-
 55 files changed, 4282 insertions(+), 1391 deletions(-)
 create mode 100755 LTA/LTAIngest/LTAIngestCommon/srm.py
 create mode 100755 LTA/LTAIngest/LTAIngestCommon/test/t_srm.py
 create mode 100755 LTA/LTAIngest/LTAIngestCommon/test/t_srm.run
 create mode 100755 LTA/LTAIngest/LTAIngestCommon/test/t_srm.sh
 create mode 100755 LTA/ltastorageoverview/bin/ltastorageoverviewreport
 mode change 100644 => 100755 LTA/ltastorageoverview/bin/ltastorageoverviewscraper
 create mode 100644 LTA/ltastorageoverview/bin/ltastorageoverviewscraper.ini
 create mode 100755 LTA/ltastorageoverview/bin/ltastorageoverviewwebservice
 create mode 100644 LTA/ltastorageoverview/bin/ltastorageoverviewwebservice.ini
 delete mode 100644 LTA/ltastorageoverview/lib/create_db_ltastorageoverview.sql
 create mode 100755 LTA/ltastorageoverview/lib/ingesteventhandler.py
 create mode 100644 LTA/ltastorageoverview/lib/ltaso/create_db_ltastorageoverview.sql
 delete mode 100755 LTA/ltastorageoverview/ltastorageoverview_build.sh
 create mode 100755 LTA/ltastorageoverview/test/common_test_ltastoragedb.py
 create mode 100755 LTA/ltastorageoverview/test/db_performance_test.py
 create mode 100755 LTA/ltastorageoverview/test/integration_test_store.py
 create mode 100755 LTA/ltastorageoverview/test/integration_test_store.run
 create mode 100755 LTA/ltastorageoverview/test/integration_test_store.sh
 create mode 100755 LTA/ltastorageoverview/test/test_ingesteventhandler.py
 create mode 100755 LTA/ltastorageoverview/test/test_ingesteventhandler.run
 create mode 100755 LTA/ltastorageoverview/test/test_ingesteventhandler.sh
 create mode 100755 LTA/ltastorageoverview/test/test_scraper.py
 create mode 100755 LTA/ltastorageoverview/test/test_scraper.run
 create mode 100755 LTA/ltastorageoverview/test/test_scraper.sh

diff --git a/.gitattributes b/.gitattributes
index 9a485495462..f4459ccb22e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -2070,10 +2070,14 @@ LTA/LTAIngest/LTAIngestClient/lib/ingestbuslistener.py -text
 LTA/LTAIngest/LTAIngestClient/lib/rpc.py -text
 LTA/LTAIngest/LTAIngestCommon/CMakeLists.txt -text
 LTA/LTAIngest/LTAIngestCommon/config.py -text
+LTA/LTAIngest/LTAIngestCommon/srm.py -text
 LTA/LTAIngest/LTAIngestCommon/test/CMakeLists.txt -text
 LTA/LTAIngest/LTAIngestCommon/test/t_job.py -text
 LTA/LTAIngest/LTAIngestCommon/test/t_job.run -text
 LTA/LTAIngest/LTAIngestCommon/test/t_job.sh -text
+LTA/LTAIngest/LTAIngestCommon/test/t_srm.py -text
+LTA/LTAIngest/LTAIngestCommon/test/t_srm.run -text
+LTA/LTAIngest/LTAIngestCommon/test/t_srm.sh -text
 LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/bin/CMakeLists.txt -text
 LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/bin/ingestjobmanagementserver -text
 LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/bin/ingestjobmanagementserver.ini -text
@@ -2120,22 +2124,37 @@ LTA/LTAIngest/test/CMakeLists.txt -text
 LTA/doc/package.dox -text
 LTA/ltastorageoverview/CMakeLists.txt -text
 LTA/ltastorageoverview/bin/CMakeLists.txt -text
+LTA/ltastorageoverview/bin/ltastorageoverviewreport -text
 LTA/ltastorageoverview/bin/ltastorageoverviewscraper -text
+LTA/ltastorageoverview/bin/ltastorageoverviewscraper.ini -text
+LTA/ltastorageoverview/bin/ltastorageoverviewwebservice -text
+LTA/ltastorageoverview/bin/ltastorageoverviewwebservice.ini -text
 LTA/ltastorageoverview/doc/lta_storage_overview.md -text
 LTA/ltastorageoverview/lib/CMakeLists.txt -text
 LTA/ltastorageoverview/lib/__init__.py -text
-LTA/ltastorageoverview/lib/create_db_ltastorageoverview.sql -text
+LTA/ltastorageoverview/lib/ingesteventhandler.py -text
+LTA/ltastorageoverview/lib/ltaso/create_db_ltastorageoverview.sql -text
 LTA/ltastorageoverview/lib/report.py -text
 LTA/ltastorageoverview/lib/scraper.py -text
 LTA/ltastorageoverview/lib/store.py -text
 LTA/ltastorageoverview/lib/webservice/__init__.py -text
 LTA/ltastorageoverview/lib/webservice/templates/index.html -text
 LTA/ltastorageoverview/lib/webservice/webservice.py -text
-LTA/ltastorageoverview/ltastorageoverview_build.sh -text
 LTA/ltastorageoverview/test/CMakeLists.txt -text
+LTA/ltastorageoverview/test/common_test_ltastoragedb.py -text
+LTA/ltastorageoverview/test/db_performance_test.py -text
+LTA/ltastorageoverview/test/integration_test_store.py -text
+LTA/ltastorageoverview/test/integration_test_store.run -text
+LTA/ltastorageoverview/test/integration_test_store.sh -text
+LTA/ltastorageoverview/test/test_ingesteventhandler.py -text
+LTA/ltastorageoverview/test/test_ingesteventhandler.run -text
+LTA/ltastorageoverview/test/test_ingesteventhandler.sh -text
 LTA/ltastorageoverview/test/test_lso_webservice.py -text
 LTA/ltastorageoverview/test/test_lso_webservice.run -text
 LTA/ltastorageoverview/test/test_lso_webservice.sh -text
+LTA/ltastorageoverview/test/test_scraper.py -text
+LTA/ltastorageoverview/test/test_scraper.run -text
+LTA/ltastorageoverview/test/test_scraper.sh -text
 LTA/ltastorageoverview/test/test_store.py -text
 LTA/ltastorageoverview/test/test_store.run -text
 LTA/ltastorageoverview/test/test_store.sh -text
diff --git a/CMake/LofarPackageList.cmake b/CMake/LofarPackageList.cmake
index 5ff72645226..33f7b2097b1 100644
--- a/CMake/LofarPackageList.cmake
+++ b/CMake/LofarPackageList.cmake
@@ -1,7 +1,7 @@
 # - Create for each LOFAR package a variable containing the absolute path to
 # its source directory. 
 #
-# Generated by gen_LofarPackageList_cmake.sh at di 26 jun 2018  9:29:29 CEST
+# Generated by gen_LofarPackageList_cmake.sh at wo 18 jul 2018 15:04:44 CEST
 #
 #                      ---- DO NOT EDIT ----
 #
diff --git a/LCS/PyCommon/datetimeutils.py b/LCS/PyCommon/datetimeutils.py
index abdc707d268..038c880ea04 100644
--- a/LCS/PyCommon/datetimeutils.py
+++ b/LCS/PyCommon/datetimeutils.py
@@ -24,24 +24,24 @@ import sys
 import os
 
 
-def monthRanges(min_date, max_date):
+def monthRanges(min_date, max_date, month_step=1):
     ranges = []
 
     min_month_start = datetime(min_date.year, min_date.month, 1, tzinfo=min_date.tzinfo)
 
     month_start = min_month_start
     while month_start < max_date:
-        if month_start.month < 12:
-            month_end = datetime(month_start.year, month_start.month+1, 1, tzinfo=month_start.tzinfo) - timedelta(milliseconds=1)
+        if month_start.month <= 12-month_step:
+            month_end = datetime(month_start.year, month_start.month+month_step, 1, tzinfo=month_start.tzinfo) - timedelta(milliseconds=1)
         else:
-            month_end = datetime(month_start.year+1, month_start.month-11, 1, tzinfo=month_start.tzinfo) - timedelta(milliseconds=1)
+            month_end = datetime(month_start.year+1, month_start.month-12+month_step, 1, tzinfo=month_start.tzinfo) - timedelta(milliseconds=1)
 
         ranges.append((month_start, month_end))
 
-        if month_start.month < 12:
-            month_start = datetime(month_start.year, month_start.month+1, 1, tzinfo=min_date.tzinfo)
+        if month_start.month <= 12-month_step:
+            month_start = datetime(month_start.year, month_start.month+month_step, 1, tzinfo=min_date.tzinfo)
         else:
-            month_start = datetime(month_start.year+1, month_start.month-11, 1, tzinfo=min_date.tzinfo)
+            month_start = datetime(month_start.year+1, month_start.month-12+month_step, 1, tzinfo=min_date.tzinfo)
 
     return ranges
 
diff --git a/LCS/PyCommon/postgres.py b/LCS/PyCommon/postgres.py
index 59ea46dbe63..2c60585127f 100644
--- a/LCS/PyCommon/postgres.py
+++ b/LCS/PyCommon/postgres.py
@@ -26,10 +26,15 @@ Module with nice postgres helper methods and classes.
 import logging
 from threading import Thread, Lock
 from Queue import Queue, Empty
+from datetime import  datetime
+import time
+import re
 import select
 import psycopg2
 import psycopg2.extras
 import psycopg2.extensions
+from lofar.common.datetimeutils import totalSeconds
+from lofar.common import dbcredentials
 
 logger = logging.getLogger(__name__)
 
@@ -87,7 +92,149 @@ def makePostgresNotificationQueries(schema, table, action, column_name='id'):
     sql_lines = '\n'.join([s.strip() for s in sql.split('\n')]) + '\n'
     return sql_lines
 
-class PostgresListener(object):
+FETCH_NONE=0
+FETCH_ONE=1
+FETCH_ALL=2
+
+class PostgresDatabaseConnection(object):
+    def __init__(self,
+                 host='',
+                 database='',
+                 username='',
+                 password='',
+                 port=5432,
+                 log_queries=False, auto_commit_selects=True, num_connect_retries=5, connect_retry_interval=1.0):
+        self._host = host
+        self._database = database
+        self._username = username
+        self._password = password
+        self._port = port
+        self._connection = None
+        self._log_queries = log_queries
+        self.__connection_retries = 0
+        self.__auto_commit_selects = auto_commit_selects
+        self.__num_connect_retries = num_connect_retries
+        self.__connect_retry_interval = connect_retry_interval
+        self._connect()
+
+    def _connect(self):
+        for i in range(self.__num_connect_retries):
+            try:
+                self._disconnect()
+
+                logger.debug("%s connecting to db %s:*****@%s on %s:%s", type(self).__name__,
+                             self._username,
+                             self._database,
+                             self._host,
+                             self._port)
+                self._connection = psycopg2.connect(host=self._host,
+                                                    user=self._username,
+                                                    password=self._password,
+                                                    database=self._database,
+                                                    port=self._port,
+                                                    connect_timeout=5)
+
+                if self._connection:
+                    logger.debug("%s connected to db %s", type(self).__name__, self._database)
+                    return
+            except Exception as e:
+                logger.error(e)
+                if i == self.__num_connect_retries-1:
+                    raise
+
+                logger.debug('retrying to connect to %s in %s seconds', self._database, self.__connect_retry_interval)
+                time.sleep(self.__connect_retry_interval)
+
+    def _disconnect(self):
+        if self._connection:
+            logger.debug("%s disconnecting from db: %s", type(self).__name__, self._database)
+            self._connection.close()
+            self._connection = None
+
+    def __enter__(self):
+        '''connects to the database'''
+        self._connect()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        '''disconnects from the database'''
+        self._disconnect()
+
+    def _queryAsSingleLine(self, query, qargs=None):
+        line = ' '.join(query.replace('\n', ' ').split())
+        if qargs:
+            line = line % tuple(['\'%s\'' % a if isinstance(a, basestring) else a for a in qargs])
+        return line
+
+    def executeQuery(self, query, qargs=None, fetch=FETCH_NONE):
+        '''execute the query and reconnect upon OperationalError'''
+        try:
+            with self._connection.cursor(cursor_factory = psycopg2.extras.RealDictCursor) as cursor:
+                start = datetime.utcnow()
+                cursor.execute(query, qargs)
+                if self._log_queries:
+                    elapsed = datetime.utcnow() - start
+                    elapsed_ms = 1000.0 * totalSeconds(elapsed)
+                    logger.info('executed query in %.1fms%s yielding %s rows: %s', elapsed_ms,
+                                                                                   ' (SLOW!)' if elapsed_ms > 250 else '', # for easy log grep'ing
+                                                                                   cursor.rowcount,
+                                                                                   self._queryAsSingleLine(query, qargs))
+
+                try:
+                    self._log_database_notifications()
+
+                    result = []
+                    if fetch == FETCH_ONE:
+                        result = cursor.fetchone()
+
+                    if fetch == FETCH_ALL:
+                        result = cursor.fetchall()
+
+                    if self.__auto_commit_selects and re.search('select', query, re.IGNORECASE):
+                        #prevent dangling in idle transaction on server
+                        self.commit()
+
+                    return result
+                except Exception as e:
+                    logger.error("error while fetching result(s) for %s: %s", self._queryAsSingleLine(query, qargs), e)
+
+        except (psycopg2.OperationalError, AttributeError) as e:
+            logger.error(str(e))
+            while self.__connection_retries < 5:
+                logger.info("(re)trying to connect to database")
+                self.__connection_retries += 1
+                self._connect()
+                if self._connection:
+                    self.__connection_retries = 0
+                    return self.executeQuery(query, qargs, fetch)
+                time.sleep(i*i)
+        except (psycopg2.IntegrityError, psycopg2.ProgrammingError, psycopg2.InternalError, psycopg2.DataError)as e:
+            logger.error("Rolling back query=\'%s\' due to error: \'%s\'" % (self._queryAsSingleLine(query, qargs), e))
+            self.rollback()
+            return []
+        except Exception as e:
+            logger.error(str(e))
+
+        return []
+
+    def _log_database_notifications(self):
+        if self._log_queries and self._connection.notices:
+            for notice in self._connection.notices:
+                logger.info('database log message %s', notice.strip())
+        del self._connection.notices[:]
+
+    def commit(self):
+        if self._log_queries:
+            logger.debug('commit')
+        self._connection.commit()
+
+    def rollback(self):
+        if self._log_queries:
+            logger.info('rollback')
+        self._connection.rollback()
+
+
+class PostgresListener(PostgresDatabaseConnection):
     ''' This class lets you listen to postgress notifications
     It execute callbacks when a notifocation occurs.
     Make your own subclass with your callbacks and subscribe them to the appriate channel.
@@ -122,32 +269,34 @@ class PostgresListener(object):
                  password='',
                  port=5432):
         '''Create a new PostgresListener'''
-        self.conn = psycopg2.connect(host=host,
-                                     user=username,
-                                     password=password,
-                                     database=database,
-                                     port=port)
-        self.conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
-        self.cursor = self.conn.cursor()
+        super(PostgresListener, self).__init__(host=host,
+                                               database=database,
+                                               username=username,
+                                               password=password,
+                                               port=port)
         self.__listening = False
         self.__lock = Lock()
         self.__callbacks = {}
         self.__waiting = False
         self.__queue = Queue()
 
+    def _connect(self):
+        super(PostgresListener, self)._connect()
+        self._connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
+
     def subscribe(self, notification, callback):
         '''Subscribe to a certain postgres notification.
         Call callback method in case such a notification is received.'''
         logger.info("Subscribed %sto %s" % ('and listening ' if self.isListening() else '', notification))
         with self.__lock:
-            self.cursor.execute("LISTEN %s;", (psycopg2.extensions.AsIs(notification),))
+            self.executeQuery("LISTEN %s;", (psycopg2.extensions.AsIs(notification),))
             self.__callbacks[notification] = callback
 
     def unsubscribe(self, notification):
         '''Unubscribe from a certain postgres notification.'''
         logger.info("Unsubscribed from %s" % notification)
         with self.__lock:
-            self.cursor.execute("UNLISTEN %s;", (psycopg2.extensions.AsIs(notification),))
+            self.executeQuery("UNLISTEN %s;", (psycopg2.extensions.AsIs(notification),))
             if notification in self.__callbacks:
                 del self.__callbacks[notification]
 
@@ -173,11 +322,11 @@ class PostgresListener(object):
 
         def eventLoop():
             while self.isListening():
-                if select.select([self.conn],[],[],2) != ([],[],[]):
-                    self.conn.poll()
-                    while self.conn.notifies:
+                if select.select([self._connection],[],[],2) != ([],[],[]):
+                    self._connection.poll()
+                    while self._connection.notifies:
                         try:
-                            notification = self.conn.notifies.pop(0)
+                            notification = self._connection.notifies.pop(0)
                             logger.debug("Received notification on channel %s payload %s" % (notification.channel, notification.payload))
 
                             if self.isWaiting():
@@ -270,3 +419,4 @@ class PostgresListener(object):
                 pass
 
         self.stopWaiting()
+
diff --git a/LTA/LTAIngest/LTAIngestClient/lib/ingestbuslistener.py b/LTA/LTAIngest/LTAIngestClient/lib/ingestbuslistener.py
index 6a9824fcfc2..aefdb6600d8 100644
--- a/LTA/LTAIngest/LTAIngestClient/lib/ingestbuslistener.py
+++ b/LTA/LTAIngest/LTAIngestClient/lib/ingestbuslistener.py
@@ -156,6 +156,9 @@ class IngestBusListener(AbstractBusListener):
             if job_dict.get('average_speed') != None:
                 msg += ' avg speed: %s' % humanreadablesize(job_dict.get('average_speed'), 'Bps')
 
+            if job_dict.get('srm_url'):
+                msg += ' srm_url: %s' % job_dict.get('srm_url')
+
             if job_dict.get('message'):
                 msg += ' message: %s' % job_dict.get('message')
 
diff --git a/LTA/LTAIngest/LTAIngestCommon/CMakeLists.txt b/LTA/LTAIngest/LTAIngestCommon/CMakeLists.txt
index 9d033933e68..9ef3200635e 100644
--- a/LTA/LTAIngest/LTAIngestCommon/CMakeLists.txt
+++ b/LTA/LTAIngest/LTAIngestCommon/CMakeLists.txt
@@ -2,6 +2,7 @@ lofar_package(LTAIngestCommon 2.0 DEPENDS PyMessaging PyCommon)
 
 python_install(config.py
                job.py
+               srm.py
                DESTINATION lofar/lta/ingest/common)
 
 add_subdirectory(test)
diff --git a/LTA/LTAIngest/LTAIngestCommon/config.py b/LTA/LTAIngest/LTAIngestCommon/config.py
index 3ffeb428fdb..8b741c7cf66 100644
--- a/LTA/LTAIngest/LTAIngestCommon/config.py
+++ b/LTA/LTAIngest/LTAIngestCommon/config.py
@@ -11,7 +11,7 @@ DEFAULT_INGEST_NOTIFICATION_BUSNAME = adaptNameToEnvironment('lofar.lta.ingest.n
 DEFAULT_INGEST_NOTIFICATION_PREFIX = 'LTAIngest.'
 DEFAULT_INGEST_NOTIFICATION_SUBJECTS=DEFAULT_INGEST_NOTIFICATION_PREFIX+'*'
 
-DEFAULT_BROKER = '10.178.1.3' if isProductionEnvironment() else 'localhost'
+DEFAULT_BROKER = 'lexar003' if isProductionEnvironment() else 'localhost'
 
 def hostnameToIp(hostname):
     if 'lexar001' in hostname:
diff --git a/LTA/LTAIngest/LTAIngestCommon/srm.py b/LTA/LTAIngest/LTAIngestCommon/srm.py
new file mode 100755
index 00000000000..abed17b82db
--- /dev/null
+++ b/LTA/LTAIngest/LTAIngestCommon/srm.py
@@ -0,0 +1,264 @@
+# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy)
+# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+#
+# This file is part of the LOFAR software suite.
+# The LOFAR software suite is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The LOFAR software suite is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+
+# $Id:  $
+
+from subprocess import Popen, PIPE
+import socket
+import os
+import time
+import re
+from datetime import datetime, timedelta
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+"""
+This srm module provides python methods for the most used srm calls like srmls, srmrm, etc.
+Furthermore, this module provides methods for surl (srm-url) and turl (transfer-url) manipulation.
+"""
+
+
+class SrmException(Exception):
+    """ Generic exception for srm errors"""
+    pass
+
+
+def srmrm(surl, log_prefix='', timeout=-1):
+    """ remove file from srm
+    :param surl: an srm url like: srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884/L652884_SAP000_B000_P001_bf_e619e5da.tar
+    :param log_prefix: an optional prefix for all log lines (can be used to provide a unique identifier to filter log lines in a file)
+    :param timeout: optional timeout in seconds
+    :return: (stdout, stderr, returncode) tuple with the results of the system call to srm.
+    """
+    logger.info('%s removing surl: %s', log_prefix, surl)
+    return __execute(['/bin/bash', '-c', 'srmrm %s' % (surl,)], log_prefix, timeout)
+
+
+def srmrmdir(surl, log_prefix='', timeout=-1):
+    """ remove (empty) directory from srm
+    :param surl: an srm url like: srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+    :param log_prefix: an optional prefix for all log lines (can be used to provide a unique identifier to filter log lines in a file)
+    :param timeout: optional timeout in seconds
+    :return: (stdout, stderr, returncode) tuple with the results of the system call to srm.
+    """
+    return __execute(['/bin/bash', '-c', 'srmrmdir %s' % (surl,)], log_prefix, timeout)
+
+
+def srmmkdir(surl, log_prefix='', timeout=-1):
+    """ create directory in srm
+    :param surl: an srm url like: srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+    :param log_prefix: an optional prefix for all log lines (can be used to provide a unique identifier to filter log lines in a file)
+    :param timeout: optional timeout in seconds
+    :return: (stdout, stderr, returncode) tuple with the results of the system call to srm.
+    """
+    return __execute(['/bin/bash', '-c', 'srmmkdir -retry_num=0 %s' % (surl,)], log_prefix, timeout)
+
+
+def srmls(surl, log_prefix='', timeout=-1):
+    """ get listing in directory
+    :param surl: an srm url like: srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+    :param log_prefix: an optional prefix for all log lines (can be used to provide a unique identifier to filter log lines in a file)
+    :param timeout: optional timeout in seconds
+    :return: (stdout, stderr, returncode) tuple with the results of the system call to srm.
+    """
+    return __execute(['/bin/bash', '-c', 'srmls %s' % (surl,)], log_prefix, timeout)
+
+
+def srmll(surl, log_prefix='', timeout=-1):
+    """ get detailed listing of a surl (directory or file)
+    :param surl: an srm url like: srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+    :param log_prefix: an optional prefix for all log lines (can be used to provide a unique identifier to filter log lines in a file)
+    :param timeout: optional timeout in seconds
+    :return: (stdout, stderr, returncode) tuple with the results of the system call to srm.
+    """
+    return __execute(['/bin/bash', '-c', 'srmls -l %s' % (surl,)], log_prefix, timeout)
+
+
+def __execute(cmd, log_prefix='', timeout=-1):
+    """ helper method, wrapper around subprocess.
+    execute command and return (stdout, stderr, returncode) tuple
+    :param cmd: a subprocess Popen cmd like list
+    :param log_prefix: an optional prefix for all log lines (can be used to provide a unique identifier to filter log lines in a file)
+    :param timeout: optional timeout in seconds
+    :return: (stdout, stderr, returncode) tuple
+    """
+    if log_prefix:
+        if not isinstance(log_prefix, basestring):
+            log_prefix = str(log_prefix)
+        if log_prefix[-1] != ' ':
+            log_prefix += ' '
+
+    logger.info('%sexecuting: %s', log_prefix, ' '.join(cmd))
+    p_cmd = Popen(cmd, stdout=PIPE, stderr=PIPE)
+
+    if timeout > 0:
+        timeout = timedelta(seconds=timeout)
+        logger.debug('%swaiting at most %s for command to finish...', log_prefix, timeout)
+        start_wait = datetime.now()
+        while datetime.now() - start_wait < timeout:
+            if p_cmd.poll() is not None:
+                break
+            time.sleep(1)
+
+        if p_cmd.poll() is None:
+            raise SrmException('%s%s did not finish within %s.' % (log_prefix, cmd, timeout))
+
+    stdout, stderr = p_cmd.communicate()
+    return stdout, stderr, p_cmd.returncode
+
+
+def get_srm_size_and_a32_checksum(surl, log_prefix='', timeout=-1):
+    """ get file size and checksum from srm via srmll
+    :param surl: an srm url like: srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884/L652884_SAP000_B000_P001_bf_e619e5da.tar
+    :param log_prefix: an optional prefix for all log lines (can be used to provide a unique identifier to filter log lines in a file)
+    :param timeout: optional timeout in seconds
+    :return: (success, file_size, a32_checksum) tuple.
+    """
+    try:
+        output, errors, code = srmll(surl, log_prefix, timeout)
+        logger.debug(output)
+
+        if code != 0:
+            return False, None, None
+
+        path_line = output.strip()
+        path_line_items = [x.strip() for x in path_line.split()]
+
+        if len(path_line_items) < 2:
+            # path line shorter than expected
+            return False, None, None
+
+        file_size = int(path_line_items[0])
+
+        if 'Checksum type:' not in output:
+            return False, None, None
+
+        if 'Checksum type:' in output:
+            cstype = output.split('Checksum type:')[1].split()[0].strip()
+            if cstype.lower() != 'adler32':
+                return False, None, None
+
+        if 'Checksum value:' in output:
+            a32_value = output.split('Checksum value:')[1].lstrip().split()[0]
+            return True, file_size, a32_value
+
+    except Exception as e:
+        logger.error(e)
+
+    return False, None, None
+
+
+def create_missing_directories(surl):
+    """ recursively checks for presence of parent directory and created the missing part of a tree
+    :param surl: an srm url like: srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+    :return: exit-code of srmmkdir of final dir
+    """
+    parent, child = os.path.split(surl)
+    missing = []
+
+    # determine missing dirs
+    while parent:
+        logger.info('checking path: %s' % parent)
+        o, e, code = srmls(parent)
+        if code == 0:
+            logger.info('srmls returned successfully, so this path apparently exists: %s' % parent)
+            break
+        else:
+            parent, child = os.path.split(parent)
+            missing.append(child)
+
+    # recreate missing dirs
+    while len(missing) > 0:
+        parent = parent + '/' + missing.pop()
+        code = srmmkdir(parent)[2]
+        if code != 0:
+            logger.info('failed to create missing directory: %s' % parent)
+            return code
+
+    logger.info('successfully created parent directory: %s' % parent)
+    return 0
+
+
+def convert_surl_to_turl(surl):
+    """ converts given srm url of an LTA site into a transport url as needed by gridftp.
+    """
+    if 'grid.sara.nl' in surl:
+        # sara provides dynamic hostnames via a round-robin dns. Get a random/dynamic host as provided by them.
+        dyn_hostname = socket.getfqdn(socket.gethostbyname('gridftp.grid.sara.nl'))
+        return re.sub('srm://srm\.grid\.sara\.nl:?\d*', 'gsiftp://%s:2811' % (dyn_hostname,), surl)
+
+    if 'lta-head.lofar.psnc.pl' in surl:
+        # poznan provides dynamic hostnames via a round-robin dns. Get a random/dynamic host as provided by them.
+        dyn_hostname = socket.getfqdn(socket.gethostbyname('gridftp.lofar.psnc.pl'))
+        return re.sub('srm://lta-head\.lofar\.psnc\.pl:?\d*', 'gsiftp://%s:2811' % (dyn_hostname,), surl)
+
+    if 'lofar-srm.fz-juelich.de' in surl:
+        # juelich provides dynamic hostnames via a round-robin dns. Get a random/dynamic host as provided by them.
+        dyn_hostname = socket.getfqdn(socket.gethostbyname('lofar-gridftp.fz-juelich.de'))
+        return re.sub('srm://lofar-srm\.fz-juelich\.de:?\d*', 'gsiftp://%s:2811' % (dyn_hostname,), surl)
+
+    raise SrmException('Cannot convert surl to turl. Unknown destination in surl: \'%s\'.' % surl)
+
+
+def get_site_surl(surl):
+    """
+    extract the site surl from a given surl.
+    for example srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+    becomes: srm://lofar-srm.fz-juelich.de:8443
+    :param surl: an srm url like: srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+    :return: the 'site-part' of the surl, like: srm://lofar-srm.fz-juelich.de:8443
+    """
+    if not surl.startswith('srm://'):
+        raise SrmException('invalid srm_url: %s' % surl)
+
+    return 'srm://' + surl[6:].split('/')[0]
+
+
+def get_path_in_site(surl):
+    """
+    cut the site 'prefix' of the srm url and returns the path.
+    for example srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884/L652884_SAP000_B000_P001_bf_e619e5da.tar
+    becomes: /pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884/L652884_SAP000_B000_P001_bf_e619e5da.tar
+    :param surl: an srm url like: srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884/L652884_SAP000_B000_P001_bf_e619e5da.tar
+    :return: the 'path-part' of the surl, like: /pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884/L652884_SAP000_B000_P001_bf_e619e5da.tar
+    """
+    site_surl = get_site_surl(surl)
+    return surl[len(site_surl):].rstrip('/')
+
+
+def get_dir_path_in_site(surl):
+    """
+    cut the site 'prefix' of the srm url and cut an optional file 'postfix' and return the directory path.
+    It is assumed that a filename contains a '.'
+    for example (1) srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+    becomes: /pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+    for example (2) srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884/L652884_SAP000_B000_P001_bf_e619e5da.tar
+    becomes: /pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+    :param surl: an srm url like: srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+    :return: the 'dir-path-part' of the surl, like: /pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+    """
+    path = get_path_in_site(surl)
+    parts = path.split('/')
+    if '.' in parts[-1]:
+        # last part is a filename, because it contains a '.'
+        # return only dir-parts
+        return '/'.join(parts[:-1])
+
+    # path contains no filename, just return it
+    return path
diff --git a/LTA/LTAIngest/LTAIngestCommon/test/CMakeLists.txt b/LTA/LTAIngest/LTAIngestCommon/test/CMakeLists.txt
index a028c7174cd..fbd25d400ee 100644
--- a/LTA/LTAIngest/LTAIngestCommon/test/CMakeLists.txt
+++ b/LTA/LTAIngest/LTAIngestCommon/test/CMakeLists.txt
@@ -1,5 +1,6 @@
 include(LofarCTest)
 
 lofar_add_test(t_job)
+lofar_add_test(t_srm)
 
 
diff --git a/LTA/LTAIngest/LTAIngestCommon/test/t_srm.py b/LTA/LTAIngest/LTAIngestCommon/test/t_srm.py
new file mode 100755
index 00000000000..d8b3f79beb8
--- /dev/null
+++ b/LTA/LTAIngest/LTAIngestCommon/test/t_srm.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+import unittest
+from lofar.lta.ingest.common.srm import *
+
+import logging
+logger = logging.getLogger(__name__)
+
+class TestSrm(unittest.TestCase):
+    """
+    Test various methods from the srm module.
+    Unfortunately, we cannot do unittests on actual srm calls, as we need real srm sites and certificates for that.
+    """
+
+    def test_get_site_surl(self):
+        self.assertEqual('srm://srm.grid.sara.nl:8443',
+                         get_site_surl('srm://srm.grid.sara.nl:8443/pnfs/grid.sara.nl/data/lofar/ops/projects/lc10_010/658346/L658346_SB019_uv.MS_8190b749.tar'))
+
+        self.assertEqual('srm://lofar-srm.fz-juelich.de:8443',
+                         get_site_surl('srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884'))
+
+        self.assertEqual('srm://lta-head.lofar.psnc.pl:8443',
+                         get_site_surl('srm://lta-head.lofar.psnc.pl:8443/lofar/ops/projects/lt10_004/658456/L658456_SAP000_B000_P012_bf_03c23eb1.tar'))
+
+        with self.assertRaises(SrmException) as context:
+            get_site_surl('http://nu.nl')
+        self.assertTrue('invalid srm_url' in context.exception.message)
+
+    def test_path_in_site(self):
+        self.assertEqual('/pnfs/grid.sara.nl/data/lofar/ops/projects/lc10_010/658346/L658346_SB019_uv.MS_8190b749.tar',
+                         get_path_in_site('srm://srm.grid.sara.nl:8443/pnfs/grid.sara.nl/data/lofar/ops/projects/lc10_010/658346/L658346_SB019_uv.MS_8190b749.tar'))
+
+        self.assertEqual('/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884',
+                         get_path_in_site('srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884'))
+
+        self.assertEqual('/lofar/ops/projects/lt10_004/658456/L658456_SAP000_B000_P012_bf_03c23eb1.tar',
+                         get_path_in_site('srm://lta-head.lofar.psnc.pl:8443/lofar/ops/projects/lt10_004/658456/L658456_SAP000_B000_P012_bf_03c23eb1.tar'))
+
+        # check if tailing '/' is removed
+        self.assertEqual('/foo/bar',
+                         get_path_in_site('srm://lta-head.lofar.psnc.pl:8443/foo/bar/'))
+
+        with self.assertRaises(SrmException) as context:
+            get_path_in_site('http://nu.nl')
+        self.assertTrue('invalid srm_url' in context.exception.message)
+
+    def test_dir_path_in_site(self):
+        self.assertEqual('/pnfs/grid.sara.nl/data/lofar/ops/projects/lc10_010/658346',
+                         get_dir_path_in_site('srm://srm.grid.sara.nl:8443/pnfs/grid.sara.nl/data/lofar/ops/projects/lc10_010/658346'))
+
+        self.assertEqual('/pnfs/grid.sara.nl/data/lofar/ops/projects/lc10_010/658346',
+                         get_dir_path_in_site('srm://srm.grid.sara.nl:8443/pnfs/grid.sara.nl/data/lofar/ops/projects/lc10_010/658346/'))
+
+        self.assertEqual('/pnfs/grid.sara.nl/data/lofar/ops/projects/lc10_010/658346',
+                         get_dir_path_in_site('srm://srm.grid.sara.nl:8443/pnfs/grid.sara.nl/data/lofar/ops/projects/lc10_010/658346/L658346_SB019_uv.MS_8190b749.tar'))
+
+
+if __name__ == '__main__':
+    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                        level=logging.DEBUG)
+    unittest.main()
diff --git a/LTA/LTAIngest/LTAIngestCommon/test/t_srm.run b/LTA/LTAIngest/LTAIngestCommon/test/t_srm.run
new file mode 100755
index 00000000000..1f133d359b9
--- /dev/null
+++ b/LTA/LTAIngest/LTAIngestCommon/test/t_srm.run
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+# Run the unit test
+source python-coverage.sh
+python_coverage_test "*srm*" t_srm.py
+
diff --git a/LTA/LTAIngest/LTAIngestCommon/test/t_srm.sh b/LTA/LTAIngest/LTAIngestCommon/test/t_srm.sh
new file mode 100755
index 00000000000..88d8d4090d1
--- /dev/null
+++ b/LTA/LTAIngest/LTAIngestCommon/test/t_srm.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+./runctest.sh t_srm
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/lib/ingestjobmanagementserver.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/lib/ingestjobmanagementserver.py
index 77b2743f010..3c9ee04fe2f 100644
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/lib/ingestjobmanagementserver.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/lib/ingestjobmanagementserver.py
@@ -445,7 +445,6 @@ class IngestJobManager:
                     job_admin_dict['runs'][job_admin_dict.get('retry_attempt', 0)]['started_at'] = datetime.utcnow()
 
                 if new_status == JobProduced or new_status == JobTransferFailed:
-                    job_admin_dict['runs'][job_admin_dict.get('retry_attempt', 0)] = {}
                     job_admin_dict['runs'][job_admin_dict.get('retry_attempt', 0)]['finished_at'] = datetime.utcnow()
 
                 if lta_site:
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/test/t_ingestjobmanagementserver.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/test/t_ingestjobmanagementserver.py
index 71837dd8fd5..c8ebbf5499f 100755
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/test/t_ingestjobmanagementserver.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/test/t_ingestjobmanagementserver.py
@@ -97,6 +97,8 @@ try:
                 if percentage_done:
                     content['percentage_done'] = percentage_done
                 event_msg = EventMessage(context=config.DEFAULT_INGEST_NOTIFICATION_PREFIX + event, content=content)
+                logger.info('sending test event message on %s subject=%s content=%s',
+                            test_notifier.address, event_msg.subject, event_msg.content)
                 test_notifier.send(event_msg)
 
             def receiveJobForTransfer():
@@ -145,7 +147,7 @@ try:
                 #just finish normally
                 sendNotification('JobFinished', job1['JobId'])
 
-                time.sleep(0.5)
+                time.sleep(1.0) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
                 assert manager.nrOfUnfinishedJobs() == 2, 'expected 2 jobs unfinished'
 
                 #check report
@@ -162,7 +164,7 @@ try:
                 assert job2['JobId'] == 'A_999999999_777777778_L888888888_SB001_uv.MS', 'unexpected job %s' % job2['JobId']
                 sendNotification('JobStarted', job2['JobId'])
 
-                time.sleep(0.5)
+                time.sleep(1.5) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
                 assert manager.nrOfUnfinishedJobs() == 2, 'expected 2 jobs unfinished'
 
                 #check report
@@ -177,7 +179,7 @@ try:
                 # let job2 fail
                 sendNotification('JobTransferFailed', job2['JobId'], message='something went wrong')
 
-                time.sleep(0.5)
+                time.sleep(1.5) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
                 assert manager.nrOfUnfinishedJobs() == 2, 'expected 2 jobs unfinished'
 
                 #check report
@@ -196,7 +198,7 @@ try:
                 assert job3['JobId'] == 'A_999999999_777777779_L888888888_SB002_uv.MS', 'unexpected job %s' % job3['JobId']
                 sendNotification('JobStarted', job3['JobId'])
 
-                time.sleep(0.5)
+                time.sleep(1.5) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
                 assert manager.nrOfUnfinishedJobs() == 2, 'expected 2 jobs unfinished'
 
                 #check report
@@ -214,7 +216,7 @@ try:
                 #3rd job will fail all the time
                 sendNotification('JobTransferFailed', job3['JobId'], message='something went wrong')
 
-                time.sleep(0.5)
+                time.sleep(1.5) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
                 assert manager.nrOfUnfinishedJobs() == 2, 'expected 2 jobs unfinished'
 
                 #check report
@@ -236,7 +238,7 @@ try:
                 assert job2['JobId'] == 'A_999999999_777777778_L888888888_SB001_uv.MS', 'unexpected job %s' % job2['JobId']
                 sendNotification('JobStarted', job2['JobId'])
 
-                time.sleep(0.5)
+                time.sleep(1.5) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
                 assert manager.nrOfUnfinishedJobs() == 2, 'expected 2 jobs unfinished'
 
                 #keep job2 running while we process job3
@@ -259,7 +261,7 @@ try:
                 assert job3['JobId'] == 'A_999999999_777777779_L888888888_SB002_uv.MS', 'unexpected job %s' % job3['JobId']
                 sendNotification('JobStarted', job3['JobId'])
 
-                time.sleep(0.5)
+                time.sleep(1.5) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
                 assert manager.nrOfUnfinishedJobs() == 2, 'expected 2 jobs unfinished'
 
                 #check report
@@ -279,7 +281,7 @@ try:
                 #3rd job will fail again
                 sendNotification('JobTransferFailed', job3['JobId'], message='something went wrong')
 
-                time.sleep(0.5)
+                time.sleep(1.5) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
                 assert manager.nrOfUnfinishedJobs() == 2, 'expected 2 jobs unfinished'
 
                 #check report
@@ -302,7 +304,7 @@ try:
                 sendNotification('JobFinished', job2['JobId'])
 
                 #one job to go
-                time.sleep(0.5)
+                time.sleep(1.5) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
                 assert manager.nrOfUnfinishedJobs() == 1, 'expected 1 job unfinished'
 
                 #check report
@@ -328,7 +330,7 @@ try:
                 assert job3['JobId'] == 'A_999999999_777777779_L888888888_SB002_uv.MS', 'unexpected job %s' % job3['JobId']
                 sendNotification('JobStarted', job3['JobId'])
 
-                time.sleep(0.5)
+                time.sleep(1.5) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
                 assert manager.nrOfUnfinishedJobs() == 1, 'expected 1 job unfinished'
 
                 #check report
@@ -354,14 +356,14 @@ try:
 
                 #3rd job should have failed after 3 retries
                 #no more jobs to go
-                time.sleep(0.5)
+                time.sleep(1.5) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
                 assert manager.nrOfUnfinishedJobs() == 0, 'expected 0 jobs unfinished'
 
                 #there should be no more reports, cause the job group 999999999 is finished as a whole
                 #and is removed from the manager at this point
                 reports = manager.getStatusReportDict()
                 assert 0 == len(reports), 'expected 0 reports'
-                time.sleep(0.5)
+                time.sleep(1.5) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
 
                 jobgroup_999999999_failed_dir = os.path.join(config.JOBS_DIR, 'failed', 'MoM_999999999')
                 failed_jobgroup_999999999_files = [os.path.join(jobgroup_999999999_failed_dir, f) for f in
@@ -385,14 +387,14 @@ try:
                 sendNotification('JobStarted', job3['JobId'])
                 sendNotification('JobFinished', job3['JobId'])
 
-                time.sleep(0.5)
+                time.sleep(1.5) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
 
                 #there should be no more reports, cause the job group 999999999 is finished as a whole
                 #and is removed from the manager at this point
                 reports = manager.getStatusReportDict()
                 assert 0 == len(reports), 'expected 0 reports'
                 assert manager.nrOfUnfinishedJobs() == 0, 'expected 0 jobs unfinished'
-                time.sleep(0.5)
+                time.sleep(1.5) #TODO: should not wait fixed amount of time, but poll for expected output with a timeout
 
                 manager.quit()
                 manager_thread.join()
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ltacp.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ltacp.py
index f10960ce767..5c847da68f3 100755
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ltacp.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ltacp.py
@@ -23,6 +23,7 @@ from lofar.common.datetimeutils import totalSeconds
 from lofar.common.subprocess import PipeReader
 from lofar.lta.ingest.common.config import hostnameToIp
 from lofar.lta.ingest.server.config import GLOBUS_TIMEOUT
+from lofar.lta.ingest.common.srm import *
 
 logger = logging.getLogger()
 
@@ -39,27 +40,6 @@ class LtacpDestinationExistsException(LtacpException):
 def getLocalIPAddress():
     return hostnameToIp(socket.gethostname())
 
-def convert_surl_to_turl(surl):
-    ''' converts given srm url of an LTA site into a transport url as needed by gridftp.
-    '''
-    if 'grid.sara.nl' in surl:
-        # sara provides dynamic hostnames via a round-robin dns. Get a random/dynamic host as provided by them.
-        dyn_hostname = socket.getfqdn(socket.gethostbyname('gridftp.grid.sara.nl'))
-        return re.sub('srm://srm\.grid\.sara\.nl:?\d*', 'gsiftp://%s:2811' % (dyn_hostname,), surl)
-
-    if 'lta-head.lofar.psnc.pl' in surl:
-        # poznan provides dynamic hostnames via a round-robin dns. Get a random/dynamic host as provided by them.
-        dyn_hostname = socket.getfqdn(socket.gethostbyname('gridftp.lofar.psnc.pl'))
-        return re.sub('srm://lta-head\.lofar\.psnc\.pl:?\d*', 'gsiftp://%s:2811' % (dyn_hostname,), surl)
-
-    if 'lofar-srm.fz-juelich.de' in surl:
-        # juelich provides dynamic hostnames via a round-robin dns. Get a random/dynamic host as provided by them.
-        dyn_hostname = socket.getfqdn(socket.gethostbyname('lofar-gridftp.fz-juelich.de'))
-        return re.sub('srm://lofar-srm\.fz-juelich\.de:?\d*', 'gsiftp://%s:2811' % (dyn_hostname,), surl)
-
-    raise LtacpException('Cannot convert surl to turl. Unknown destination in surl: \'%s\'.' % surl)
-
-
 def createNetCatCmd(listener, user=None, host=None):
     '''helper method to determine the proper call syntax for netcat on host'''
 
@@ -654,119 +634,6 @@ class LtaCp:
 
         logger.debug('ltacp %s: finished cleaning up' % (self.logId))
 
-
-
-# execute command and return (stdout, stderr, returncode) tuple
-def execute(cmd, log_prefix='', timeout=-1):
-    if log_prefix:
-        if not isinstance(log_prefix, basestring):
-            log_prefix = str(log_prefix)
-        if log_prefix[-1] != ' ':
-            log_prefix += ' '
-
-    logger.info('%sexecuting: %s', log_prefix, ' '.join(cmd))
-    p_cmd = Popen(cmd, stdout=PIPE, stderr=PIPE)
-
-    if timeout > 0:
-        timeout = timedelta(seconds=timeout)
-        logger.debug('%swaiting at most %s for command to finish...', log_prefix, timeout)
-        start_wait = datetime.now()
-        while datetime.now() - start_wait < timeout:
-            if p_cmd.poll() is not None:
-                break;
-            time.sleep(1)
-
-        if p_cmd.poll() is None:
-            raise Exception('%s%s did not finish within %s.' % (log_prefix, cmd, timeout))
-
-    stdout, stderr = p_cmd.communicate()
-    return (stdout, stderr, p_cmd.returncode)
-
-# remove file from srm
-def srmrm(surl, log_prefix='', timeout=-1):
-    logger.info('%s removing surl: %s', log_prefix, surl)
-    return execute(['/bin/bash', '-c', 'srmrm %s' % (surl,)], log_prefix, timeout)
-
-# remove (empty) directory from srm
-def srmrmdir(surl, log_prefix='', timeout=-1):
-    return execute(['/bin/bash', '-c', 'srmrmdir %s' % (surl,)], log_prefix, timeout)
-
-# create directory in srm
-def srmmkdir(surl, log_prefix='', timeout=-1):
-    return execute(['/bin/bash', '-c', 'srmmkdir -retry_num=0 %s' % (surl,)], log_prefix, timeout)
-
-# detailed listing
-def srmls(surl, log_prefix='', timeout=-1):
-    return execute(['/bin/bash', '-c', 'srmls %s' % (surl,)], log_prefix, timeout)
-
-# detailed listing
-def srmll(surl, log_prefix='', timeout=-1):
-    return execute(['/bin/bash', '-c', 'srmls -l %s' % (surl,)], log_prefix, timeout)
-
-# get file size and checksum from srm via srmll
-def get_srm_size_and_a32_checksum(surl, log_prefix='', timeout=-1):
-    try:
-        output, errors, code = srmll(surl, log_prefix, timeout)
-        logger.debug(output)
-
-        if code != 0:
-            return (False, None, None)
-
-        pathLine = output.strip()
-        pathLineItems = [x.strip() for x in pathLine.split()]
-
-        if len(pathLineItems) < 2:
-            #path line shorter than expected
-            return (False, None, None)
-
-        file_size = int(pathLineItems[0])
-
-        if not 'Checksum type:' in output:
-            return False
-
-        if 'Checksum type:' in output:
-            cstype = output.split('Checksum type:')[1].split()[0].strip()
-            if cstype.lower() != 'adler32':
-                return (False, None, None)
-
-        if 'Checksum value:' in output:
-            a32_value = output.split('Checksum value:')[1].lstrip().split()[0]
-            return (True, file_size, a32_value)
-
-    except Exception as e:
-        logger.error(e)
-
-    return (False, None, None)
-
-#recursively checks for presence of parent directory and created the missing part of a tree
-def create_missing_directories(surl):
-
-    parent, child = os.path.split(surl)
-    missing = []
-
-    # determine missing dirs
-    while parent:
-        logger.info('checking path: %s' % parent)
-        o, e, code = srmls(parent)
-        if code == 0:
-            logger.info('srmls returned successfully, so this path apparently exists: %s' % parent)
-            break;
-        else:
-            parent, child = os.path.split(parent)
-            missing.append(child)
-
-    # recreate missing dirs
-    while len(missing) > 0:
-        parent = parent + '/' + missing.pop()
-        code = srmmkdir(parent)[2]
-        if code != 0:
-            logger.info('failed to create missing directory: %s' % parent)
-            return code
-
-    logger.info('successfully created parent directory: %s' % parent)
-    return 0
-
-
 # limited standalone mode for testing:
 # usage: ltacp.py <remote-host> <remote-path> <surl>
 def main():
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/test/ltastubs.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/test/ltastubs.py
index b8ecfcd5225..4f75e11d189 100644
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/test/ltastubs.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/test/ltastubs.py
@@ -25,7 +25,7 @@ def stub():
 
         if 'globus-url-copy' in ' '.join(args):
             dppath = [x for x in args[2].split() if 'file://' in x][0]
-            dest_path = [x for x in args[2].split() if 'srm://' in x][0]
+            dest_path = [x for x in args[2].split() if 'gsiftp://' in x][0]
             dest_filename = os.path.basename(dest_path)
             global _local_globus_file_path
             _local_globus_file_path = '/tmp/globus_output_%s/%s' % (uuid.uuid1(), dest_filename)
@@ -68,8 +68,8 @@ def stub():
 
             return '\n'.join(lines), '', 0
 
-    lofar.lta.ingest.server.ltacp.srmll_org = lofar.lta.ingest.server.ltacp.srmll
-    lofar.lta.ingest.server.ltacp.srmll = stub_srmll
+    lofar.lta.ingest.common.srm.srmll_org = lofar.lta.ingest.common.srm.srmll
+    lofar.lta.ingest.common.srm.srmll = stub_srmll
 
 def un_stub():
     global _local_globus_file_path
@@ -78,7 +78,7 @@ def un_stub():
     subprocess.Popen.__init__ = subprocess.Popen.__init__org
 
     logger.info('un-stubbing srmll command')
-    lofar.lta.ingest.server.ltacp.srmll = lofar.lta.ingest.server.ltacp.srmll_org
+    lofar.lta.ingest.common.srm.srmll = lofar.lta.ingest.common.srm.srmll_org
 
     if _local_globus_file_path and os.path.exists(_local_globus_file_path):
         logger.info('removing _local_globus_file_path: %s', _local_globus_file_path)
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/test/t_ingestpipeline.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/test/t_ingestpipeline.py
index c59de5adcde..3d4c972e58c 100755
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/test/t_ingestpipeline.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/test/t_ingestpipeline.py
@@ -23,6 +23,12 @@ except ImportError:
     print 'Please install MagicMock: pip install mock'
     exit(3)
 
+from subprocess import call
+if call(['ssh', '-o', 'PasswordAuthentication=no', '-o', 'PubkeyAuthentication=yes', '-o', 'ConnectTimeout=1', 'localhost', 'true']) != 0:
+    print 'this test depends on keybased ssh login to localhost, which is not setup correctly. skipping test...'
+    exit(3)
+
+
 connection = None
 broker = None
 
@@ -56,244 +62,249 @@ try:
             # modify the return values of the various MoMClient methods with pre-cooked answers
             mommock.setStatus.return_value = True
 
-            from lofar.lta.ingest.common.job import createJobXml, parseJobXml
-            from lofar.lta.ingest.server.ltaclient import LTAClient # <-- thanks to magick mock, we get the mocked ltaclient
-            from lofar.lta.ingest.server.momclient import MoMClient # <-- thanks to magick mock, we get the mocked momclient
-            from lofar.lta.ingest.server.ingestpipeline import *
-            import ltastubs
-
-            logger = logging.getLogger()
-
-            class TestIngestPipeline(unittest.TestCase):
-                def setUp(self):
-                    ltastubs.stub()
-                    self.ltaclient = LTAClient()
-                    self.momclient = MoMClient()
-
-                def tearDown(self):
-                    ltastubs.un_stub()
-
-                def test_single_file(self):
-                    try:
-                        project_name = 'test-project'
-                        obs_id = 987654321
-                        dpname = 'L%s_SAP000_SB000_im.h5' % obs_id
-                        test_dir_path = os.path.join(os.getcwd(), 'testdir_%s' % uuid.uuid1())
-
-                        def stub_GetStorageTicket(project, filename, filesize, archive_id, job_id, obs_id, check_mom_id=True, id_source='MoM'):
-                            return { 'primary_uri_rnd': 'srm://some.site.name:8443/some/path/data/lofar/ops/projects/%s/%s/%s' % (project, obs_id, dpname),
-                                     'result': 'ok',
-                                     'error': '',
-                                     'ticket': '3E0A47ED860D6339E053B316A9C3BEE2'}
-                        ltamock.GetStorageTicket.side_effect = stub_GetStorageTicket
-
-                        def stub_uploadDataAndGetSIP(archive_id, storage_ticket, filename, uri, filesize, md5_checksum, adler32_checksum, validate=True):
-                            #return unpecified sip with proper details
-                            from lofar.lta.ingest.server.unspecifiedSIP import makeSIP
-                            return makeSIP(project_name, obs_id, archive_id, storage_ticket, filename, filesize, md5_checksum, adler32_checksum, 'TEST')
-                        mommock.uploadDataAndGetSIP.side_effect = stub_uploadDataAndGetSIP
-
-                        os.makedirs(test_dir_path)
-                        test_file_path = os.path.join(test_dir_path, dpname)
-                        with open(test_file_path, 'w') as file:
-                            file.write(4096*'a')
-
-                        job_xml = createJobXml(testname, 123456789, obs_id, dpname, 918273645, 'localhost:%s' % test_file_path)
-                        logger.info('job xml: %s', job_xml)
-                        job = parseJobXml(job_xml)
-
-                        pl = IngestPipeline(job, self.momclient, self.ltaclient)
-                        pl.run()
-
-                    except Exception as e:
-                        self.assertTrue(False, 'Unexpected exception in pipeline: %s' % e)
-                    finally:
-                        # the 'stub-transfered' file ended up in out local stub lta
-                        # with the path: ltastubs._local_globus_file_path
-                        #check extension
-                        self.assertEqual(os.path.splitext(test_file_path)[-1], os.path.splitext(ltastubs._local_globus_file_path)[-1])
-
-                        #compare with original
-                        with open(test_file_path) as input, open(ltastubs._local_globus_file_path) as output:
-                            self.assertEqual(input.read(), output.read())
-
-                        for f in os.listdir(test_dir_path):
-                            os.remove(os.path.join(test_dir_path, f))
-                        os.removedirs(test_dir_path)
-
-                def test_h5_plus_raw_file(self):
-                    #beam formed h5 files are always accompanied by a raw file
-                    #these should be tarred togheter
-                    try:
-                        project_name = 'test-project'
-                        obs_id = 987654321
-                        dpname = 'L%s_SAP000_SB000_bf.h5' % obs_id
-                        rawname = dpname.replace('.h5', '.raw')
-                        test_dir_path = os.path.join(os.getcwd(), 'testdir_%s' % uuid.uuid1())
-
-                        def stub_GetStorageTicket(project, filename, filesize, archive_id, job_id, obs_id, check_mom_id=True, id_source='MoM'):
-                            return { 'primary_uri_rnd': 'srm://some.site.name:8443/some/path/data/lofar/ops/projects/%s/%s/%s.tar' % (project, obs_id, dpname),
-                                     'result': 'ok',
-                                     'error': '',
-                                     'ticket': '3E0A47ED860D6339E053B316A9C3BEE2'}
-                        ltamock.GetStorageTicket.side_effect = stub_GetStorageTicket
-
-                        def stub_uploadDataAndGetSIP(archive_id, storage_ticket, filename, uri, filesize, md5_checksum, adler32_checksum, validate=True):
-                            #return unpecified sip with proper details
-                            from lofar.lta.ingest.server.unspecifiedSIP import makeSIP
-                            return makeSIP(project_name, obs_id, archive_id, storage_ticket, filename, filesize, md5_checksum, adler32_checksum, 'TEST')
-                        mommock.uploadDataAndGetSIP.side_effect = stub_uploadDataAndGetSIP
-
-                        os.makedirs(test_dir_path)
-                        test_file_path = os.path.join(test_dir_path, dpname)
-                        with open(test_file_path, 'w') as file:
-                            file.write(4096*'a')
-                        raw_test_file_path = os.path.join(test_dir_path, dpname.replace('.h5', '.raw'))
-                        with open(raw_test_file_path, 'w') as file:
-                            file.write(4096*'b')
-
-                        job_xml = createJobXml(testname, 123456789, obs_id, dpname, 918273645, 'localhost:%s' % test_file_path)
-                        logger.info('job xml: %s', job_xml)
-                        job = parseJobXml(job_xml)
-
-                        pl = IngestPipeline(job, self.momclient, self.ltaclient)
-                        pl.run()
-
-                    except Exception as e:
-                        self.assertTrue(False, 'Unexpected exception in pipeline: %s' % e)
-                    finally:
-                        # the 'stub-transfered' file ended up in out local stub lta
-                        # with the path: ltastubs._local_globus_file_path
-                        #check extension
-                        self.assertEqual('.tar', os.path.splitext(ltastubs._local_globus_file_path)[-1])
-
-                        #check tar contents
-                        tar = subprocess.Popen(['tar', '--list', '-f', ltastubs._local_globus_file_path], stdout=subprocess.PIPE)
-                        tar_file_list, err = tar.communicate()
-                        self.assertEqual(tar.returncode, 0)
-                        logger.info('file list in tar:\n%s', tar_file_list)
-
-                        self.assertTrue(os.path.basename(test_file_path) in tar_file_list)
-                        self.assertTrue(os.path.basename(raw_test_file_path) in tar_file_list)
-                        logger.info('all expected source files are in tar!')
-
-                        os.remove(test_file_path)
-                        os.remove(raw_test_file_path)
-                        os.removedirs(test_dir_path)
-
-
-                def test_directory(self):
-                    try:
-                        project_name = 'test-project'
-                        obs_id = 987654321
-                        dpname = 'L%s_SAP000_SB000_uv.MS' % obs_id
-                        test_dir_path = os.path.join(os.getcwd(), 'testdir_%s' % uuid.uuid1(), dpname)
-
-                        def stub_GetStorageTicket(project, filename, filesize, archive_id, job_id, obs_id, check_mom_id=True, id_source='MoM'):
-                            return { 'primary_uri_rnd': 'srm://some.site.name:8443/some/path/data/lofar/ops/projects/%s/%s/%s.tar' % (project, obs_id, dpname),
-                                     'result': 'ok',
-                                     'error': '',
-                                     'ticket': '3E0A47ED860D6339E053B316A9C3BEE2'}
-                        ltamock.GetStorageTicket.side_effect = stub_GetStorageTicket
-
-                        def stub_uploadDataAndGetSIP(archive_id, storage_ticket, filename, uri, filesize, md5_checksum, adler32_checksum, validate=True):
-                            #return unpecified sip with proper details
-                            from lofar.lta.ingest.server.unspecifiedSIP import makeSIP
-                            return makeSIP(project_name, obs_id, archive_id, storage_ticket, filename, filesize, md5_checksum, adler32_checksum, 'TEST')
-                        mommock.uploadDataAndGetSIP.side_effect = stub_uploadDataAndGetSIP
-
-                        os.makedirs(test_dir_path)
-                        test_file_paths = []
-                        for i in range(10):
-                            test_file_path = os.path.join(test_dir_path, 'testfile_%s.txt' % i)
-                            test_file_paths.append(test_file_path)
+            # patch (mock) the convert_surl_to_turl method during these tests.
+            with patch('lofar.lta.ingest.server.ltacp.convert_surl_to_turl') as mock_convert_surl_to_turl:
+                mock_convert_surl_to_turl.side_effect = lambda surl: surl.replace('srm', 'gsiftp')
+
+                from lofar.lta.ingest.common.job import createJobXml, parseJobXml
+                from lofar.lta.ingest.server.ltaclient import LTAClient # <-- thanks to magick mock, we get the mocked ltaclient
+                from lofar.lta.ingest.server.momclient import MoMClient # <-- thanks to magick mock, we get the mocked momclient
+                from lofar.lta.ingest.server.ingestpipeline import *
+                import ltastubs
+
+                logger = logging.getLogger()
+
+                class TestIngestPipeline(unittest.TestCase):
+                    def setUp(self):
+                        ltastubs.stub()
+                        self.ltaclient = LTAClient()
+                        self.momclient = MoMClient()
+
+                    def tearDown(self):
+                        ltastubs.un_stub()
+
+                    def test_single_file(self):
+                        try:
+                            project_name = 'test-project'
+                            obs_id = 987654321
+                            dpname = 'L%s_SAP000_SB000_im.h5' % obs_id
+                            test_dir_path = os.path.join(os.getcwd(), 'testdir_%s' % uuid.uuid1())
+
+                            def stub_GetStorageTicket(project, filename, filesize, archive_id, job_id, obs_id, check_mom_id=True, id_source='MoM'):
+                                return { 'primary_uri_rnd': 'srm://some.site.name:8443/some/path/data/lofar/ops/projects/%s/%s/%s' % (project, obs_id, dpname),
+                                         'result': 'ok',
+                                         'error': '',
+                                         'ticket': '3E0A47ED860D6339E053B316A9C3BEE2'}
+                            ltamock.GetStorageTicket.side_effect = stub_GetStorageTicket
+
+                            def stub_uploadDataAndGetSIP(archive_id, storage_ticket, filename, uri, filesize, md5_checksum, adler32_checksum, validate=True):
+                                #return unpecified sip with proper details
+                                from lofar.lta.ingest.server.unspecifiedSIP import makeSIP
+                                return makeSIP(project_name, obs_id, archive_id, storage_ticket, filename, filesize, md5_checksum, adler32_checksum, 'TEST')
+                            mommock.uploadDataAndGetSIP.side_effect = stub_uploadDataAndGetSIP
+
+                            os.makedirs(test_dir_path)
+                            test_file_path = os.path.join(test_dir_path, dpname)
                             with open(test_file_path, 'w') as file:
-                                file.write(1000*'a')
-
-                        job_xml = createJobXml(testname, 123456789, obs_id, dpname, 918273645, 'localhost:%s' % test_dir_path)
-                        logger.info('job xml: %s', job_xml)
-                        job = parseJobXml(job_xml)
-
-                        pl = IngestPipeline(job, self.momclient, self.ltaclient)
-                        pl.run()
-                    except Exception as e:
-                        self.assertTrue(False, 'Unexpected exception in pipeline: %s' % e)
-                    finally:
-                        # the 'stub-transfered' file ended up in out local stub lta
-                        # with the path: ltastubs._local_globus_file_path
-                        #check extension
-                        self.assertTrue('.tar' == os.path.splitext(ltastubs._local_globus_file_path)[-1])
-
-                        #check tar contents
-                        tar = subprocess.Popen(['tar', '--list', '-f', ltastubs._local_globus_file_path], stdout=subprocess.PIPE)
-                        tar_file_list, err = tar.communicate()
-                        self.assertEqual(tar.returncode, 0)
-                        logger.info('file list in tar:\n%s', tar_file_list)
-
-                        for test_file_path in test_file_paths:
-                            self.assertTrue(os.path.basename(test_file_path) in tar_file_list)
-                        logger.info('all expected source files are in tar!')
-
-                        for f in os.listdir(test_dir_path):
-                            os.remove(os.path.join(test_dir_path, f))
-                        os.removedirs(test_dir_path)
-
-                def test_directory_with_odd_dataproduct_name(self):
-                    #sometimes somebody has data in a odd directory
-                    #and gives the dataproduct a different name than it's directory
-                    try:
-                        project_name = 'test-project'
-                        obs_id = 987654321
-                        dpname = 'my_funky_dp_name'
-                        test_dir_path = os.path.join(os.getcwd(), 'testdir_%s' % uuid.uuid1(), 'my_data_dir')
-
-                        def stub_uploadDataAndGetSIP(archive_id, storage_ticket, filename, uri, filesize, md5_checksum, adler32_checksum, validate=True):
-                            #return unpecified sip with proper details
-                            from lofar.lta.ingest.server.unspecifiedSIP import makeSIP
-                            return makeSIP(project_name, obs_id, archive_id, storage_ticket, filename, filesize, md5_checksum, adler32_checksum, 'TEST')
-                        mommock.uploadDataAndGetSIP.side_effect = stub_uploadDataAndGetSIP
-
-                        os.makedirs(test_dir_path)
-                        test_file_paths = []
-                        for i in range(10):
-                            test_file_path = os.path.join(test_dir_path, 'testfile_%s.txt' % i)
-                            test_file_paths.append(test_file_path)
+                                file.write(4096*'a')
+
+                            job_xml = createJobXml(testname, 123456789, obs_id, dpname, 918273645, 'localhost:%s' % test_file_path)
+                            logger.info('job xml: %s', job_xml)
+                            job = parseJobXml(job_xml)
+
+                            pl = IngestPipeline(job, self.momclient, self.ltaclient)
+                            pl.run()
+
+                        except Exception as e:
+                            self.assertTrue(False, 'Unexpected exception in pipeline: %s' % e)
+                        finally:
+                            # the 'stub-transfered' file ended up in out local stub lta
+                            # with the path: ltastubs._local_globus_file_path
+                            #check extension
+                            self.assertEqual(os.path.splitext(test_file_path)[-1],
+                                             os.path.splitext(ltastubs._local_globus_file_path)[-1])
+
+                            #compare with original
+                            with open(test_file_path) as input, open(ltastubs._local_globus_file_path) as output:
+                                self.assertEqual(input.read(), output.read())
+
+                            for f in os.listdir(test_dir_path):
+                                os.remove(os.path.join(test_dir_path, f))
+                            os.removedirs(test_dir_path)
+
+                    def test_h5_plus_raw_file(self):
+                        #beam formed h5 files are always accompanied by a raw file
+                        #these should be tarred togheter
+                        try:
+                            project_name = 'test-project'
+                            obs_id = 987654321
+                            dpname = 'L%s_SAP000_SB000_bf.h5' % obs_id
+                            rawname = dpname.replace('.h5', '.raw')
+                            test_dir_path = os.path.join(os.getcwd(), 'testdir_%s' % uuid.uuid1())
+
+                            def stub_GetStorageTicket(project, filename, filesize, archive_id, job_id, obs_id, check_mom_id=True, id_source='MoM'):
+                                return { 'primary_uri_rnd': 'srm://some.site.name:8443/some/path/data/lofar/ops/projects/%s/%s/%s.tar' % (project, obs_id, dpname),
+                                         'result': 'ok',
+                                         'error': '',
+                                         'ticket': '3E0A47ED860D6339E053B316A9C3BEE2'}
+                            ltamock.GetStorageTicket.side_effect = stub_GetStorageTicket
+
+                            def stub_uploadDataAndGetSIP(archive_id, storage_ticket, filename, uri, filesize, md5_checksum, adler32_checksum, validate=True):
+                                #return unpecified sip with proper details
+                                from lofar.lta.ingest.server.unspecifiedSIP import makeSIP
+                                return makeSIP(project_name, obs_id, archive_id, storage_ticket, filename, filesize, md5_checksum, adler32_checksum, 'TEST')
+                            mommock.uploadDataAndGetSIP.side_effect = stub_uploadDataAndGetSIP
+
+                            os.makedirs(test_dir_path)
+                            test_file_path = os.path.join(test_dir_path, dpname)
                             with open(test_file_path, 'w') as file:
-                                file.write(1000*'a')
-
-                        job_xml = createJobXml(testname, 123456789, obs_id, dpname, 918273645, 'localhost:%s' % test_dir_path)
-                        logger.info('job xml: %s', job_xml)
-                        job = parseJobXml(job_xml)
-
-                        pl = IngestPipeline(job, self.momclient, self.ltaclient)
-                        pl.run()
-                    except Exception as e:
-                        self.assertTrue(False, 'Unexpected exception in pipeline: %s' % e)
-                    finally:
-                        # the 'stub-transfered' file ended up in out local stub lta
-                        # with the path: ltastubs._local_globus_file_path
-                        #check extension
-                        self.assertTrue('.tar' == os.path.splitext(ltastubs._local_globus_file_path)[-1])
-
-                        #check tar contents
-                        tar = subprocess.Popen(['tar', '--list', '-f', ltastubs._local_globus_file_path], stdout=subprocess.PIPE)
-                        tar_file_list, err = tar.communicate()
-                        self.assertEqual(tar.returncode, 0)
-                        logger.info('file list in tar:\n%s', tar_file_list)
-
-                        for test_file_path in test_file_paths:
-                            self.assertTrue(os.path.basename(test_file_path) in tar_file_list)
-                        logger.info('all expected source files are in tar!')
-
-                        for f in os.listdir(test_dir_path):
-                            os.remove(os.path.join(test_dir_path, f))
-                        os.removedirs(test_dir_path)
+                                file.write(4096*'a')
+                            raw_test_file_path = os.path.join(test_dir_path, dpname.replace('.h5', '.raw'))
+                            with open(raw_test_file_path, 'w') as file:
+                                file.write(4096*'b')
+
+                            job_xml = createJobXml(testname, 123456789, obs_id, dpname, 918273645, 'localhost:%s' % test_file_path)
+                            logger.info('job xml: %s', job_xml)
+                            job = parseJobXml(job_xml)
+
+                            pl = IngestPipeline(job, self.momclient, self.ltaclient)
+                            pl.run()
+
+                        except Exception as e:
+                            self.assertTrue(False, 'Unexpected exception in pipeline: %s' % e)
+                        finally:
+                            # the 'stub-transfered' file ended up in out local stub lta
+                            # with the path: ltastubs._local_globus_file_path
+                            #check extension
+                            self.assertEqual('.tar', os.path.splitext(ltastubs._local_globus_file_path)[-1])
+
+                            #check tar contents
+                            tar = subprocess.Popen(['tar', '--list', '-f', ltastubs._local_globus_file_path], stdout=subprocess.PIPE)
+                            tar_file_list, err = tar.communicate()
+                            self.assertEqual(tar.returncode, 0)
+                            logger.info('file list in tar:\n%s', tar_file_list)
 
-
-            if __name__ == '__main__':
-                logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
-                                    level=logging.DEBUG)
-                unittest.main()
+                            self.assertTrue(os.path.basename(test_file_path) in tar_file_list)
+                            self.assertTrue(os.path.basename(raw_test_file_path) in tar_file_list)
+                            logger.info('all expected source files are in tar!')
+
+                            os.remove(test_file_path)
+                            os.remove(raw_test_file_path)
+                            os.removedirs(test_dir_path)
+
+
+                    def test_directory(self):
+                        try:
+                            project_name = 'test-project'
+                            obs_id = 987654321
+                            dpname = 'L%s_SAP000_SB000_uv.MS' % obs_id
+                            test_dir_path = os.path.join(os.getcwd(), 'testdir_%s' % uuid.uuid1(), dpname)
+
+                            def stub_GetStorageTicket(project, filename, filesize, archive_id, job_id, obs_id, check_mom_id=True, id_source='MoM'):
+                                return { 'primary_uri_rnd': 'srm://some.site.name:8443/some/path/data/lofar/ops/projects/%s/%s/%s.tar' % (project, obs_id, dpname),
+                                         'result': 'ok',
+                                         'error': '',
+                                         'ticket': '3E0A47ED860D6339E053B316A9C3BEE2'}
+                            ltamock.GetStorageTicket.side_effect = stub_GetStorageTicket
+
+                            def stub_uploadDataAndGetSIP(archive_id, storage_ticket, filename, uri, filesize, md5_checksum, adler32_checksum, validate=True):
+                                #return unpecified sip with proper details
+                                from lofar.lta.ingest.server.unspecifiedSIP import makeSIP
+                                return makeSIP(project_name, obs_id, archive_id, storage_ticket, filename, filesize, md5_checksum, adler32_checksum, 'TEST')
+                            mommock.uploadDataAndGetSIP.side_effect = stub_uploadDataAndGetSIP
+
+                            os.makedirs(test_dir_path)
+                            test_file_paths = []
+                            for i in range(10):
+                                test_file_path = os.path.join(test_dir_path, 'testfile_%s.txt' % i)
+                                test_file_paths.append(test_file_path)
+                                with open(test_file_path, 'w') as file:
+                                    file.write(1000*'a')
+
+                            job_xml = createJobXml(testname, 123456789, obs_id, dpname, 918273645, 'localhost:%s' % test_dir_path)
+                            logger.info('job xml: %s', job_xml)
+                            job = parseJobXml(job_xml)
+
+                            pl = IngestPipeline(job, self.momclient, self.ltaclient)
+                            pl.run()
+                        except Exception as e:
+                            self.assertTrue(False, 'Unexpected exception in pipeline: %s' % e)
+                        finally:
+                            # the 'stub-transfered' file ended up in out local stub lta
+                            # with the path: ltastubs._local_globus_file_path
+                            #check extension
+                            self.assertTrue('.tar' == os.path.splitext(ltastubs._local_globus_file_path)[-1])
+
+                            #check tar contents
+                            tar = subprocess.Popen(['tar', '--list', '-f', ltastubs._local_globus_file_path], stdout=subprocess.PIPE)
+                            tar_file_list, err = tar.communicate()
+                            self.assertEqual(tar.returncode, 0)
+                            logger.info('file list in tar:\n%s', tar_file_list)
+
+                            for test_file_path in test_file_paths:
+                                self.assertTrue(os.path.basename(test_file_path) in tar_file_list)
+                            logger.info('all expected source files are in tar!')
+
+                            for f in os.listdir(test_dir_path):
+                                os.remove(os.path.join(test_dir_path, f))
+                            os.removedirs(test_dir_path)
+
+                    def test_directory_with_odd_dataproduct_name(self):
+                        #sometimes somebody has data in a odd directory
+                        #and gives the dataproduct a different name than it's directory
+                        try:
+                            project_name = 'test-project'
+                            obs_id = 987654321
+                            dpname = 'my_funky_dp_name'
+                            test_dir_path = os.path.join(os.getcwd(), 'testdir_%s' % uuid.uuid1(), 'my_data_dir')
+
+                            def stub_uploadDataAndGetSIP(archive_id, storage_ticket, filename, uri, filesize, md5_checksum, adler32_checksum, validate=True):
+                                #return unpecified sip with proper details
+                                from lofar.lta.ingest.server.unspecifiedSIP import makeSIP
+                                return makeSIP(project_name, obs_id, archive_id, storage_ticket, filename, filesize, md5_checksum, adler32_checksum, 'TEST')
+                            mommock.uploadDataAndGetSIP.side_effect = stub_uploadDataAndGetSIP
+
+                            os.makedirs(test_dir_path)
+                            test_file_paths = []
+                            for i in range(10):
+                                test_file_path = os.path.join(test_dir_path, 'testfile_%s.txt' % i)
+                                test_file_paths.append(test_file_path)
+                                with open(test_file_path, 'w') as file:
+                                    file.write(1000*'a')
+
+                            job_xml = createJobXml(testname, 123456789, obs_id, dpname, 918273645, 'localhost:%s' % test_dir_path)
+                            logger.info('job xml: %s', job_xml)
+                            job = parseJobXml(job_xml)
+
+                            pl = IngestPipeline(job, self.momclient, self.ltaclient)
+                            pl.run()
+                        except Exception as e:
+                            self.assertTrue(False, 'Unexpected exception in pipeline: %s' % e)
+                        finally:
+                            # the 'stub-transfered' file ended up in out local stub lta
+                            # with the path: ltastubs._local_globus_file_path
+                            #check extension
+                            self.assertTrue('.tar' == os.path.splitext(ltastubs._local_globus_file_path)[-1])
+
+                            #check tar contents
+                            tar = subprocess.Popen(['tar', '--list', '-f', ltastubs._local_globus_file_path], stdout=subprocess.PIPE)
+                            tar_file_list, err = tar.communicate()
+                            self.assertEqual(tar.returncode, 0)
+                            logger.info('file list in tar:\n%s', tar_file_list)
+
+                            for test_file_path in test_file_paths:
+                                self.assertTrue(os.path.basename(test_file_path) in tar_file_list)
+                            logger.info('all expected source files are in tar!')
+
+                            for f in os.listdir(test_dir_path):
+                                os.remove(os.path.join(test_dir_path, f))
+                            os.removedirs(test_dir_path)
+
+
+                if __name__ == '__main__':
+                    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                                        level=logging.DEBUG)
+                    unittest.main()
 
 except ConnectError as ce:
     logger.error(ce)
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/test/t_ltacp.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/test/t_ltacp.py
index 3d872db9ac0..6c239c4acdc 100755
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/test/t_ltacp.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/test/t_ltacp.py
@@ -1,134 +1,147 @@
 #!/usr/bin/env python
 
+try:
+    import mock
+except ImportError:
+    print 'Cannot run test without python MagicMock'
+    print 'Please install MagicMock: pip install mock'
+    exit(3)
+
 import logging
 import unittest
 import uuid
 import os, os.path
-import mock
-import lofar.lta.ingest.server.ltacp as ltacp
-import ltastubs
 
-logger = logging.getLogger()
+with mock.patch('lofar.lta.ingest.common.srm.convert_surl_to_turl',
+                new=lambda surl: surl.replace('srm', 'gsiftp')):
 
-class TestLtaCp(unittest.TestCase):
-    def setUp(self):
-        ltastubs.stub()
+    import lofar.lta.ingest.server.ltacp as ltacp
+    import ltastubs
 
-    def tearDown(self):
-        ltastubs.un_stub()
+    logger = logging.getLogger()
 
-    def test_path_exists(self):
-        test_file_path = os.path.join(os.getcwd(), str(uuid.uuid1()), 'testfile.txt')
-        os.makedirs(os.path.dirname(test_file_path))
-        with open(test_file_path, 'w') as file:
-            file.write(1000*'a')
+    class TestLtaCp(unittest.TestCase):
+        def setUp(self):
+            ltastubs.stub()
 
-        try:
-            cp = ltacp.LtaCp('localhost', test_file_path, 'srm://fake_surl')
-            self.assertTrue(cp.source_exists())
-        except Exception as e:
-            self.assertTrue(False, 'Unexpected exception in transfer: %s' % e)
-        finally:
-            os.remove(test_file_path)
-
-    def test_path_mounted(self):
-        #first test with a valid path, the current working dir + some random dir + file
-        test_file_path = os.path.join(os.getcwd(), str(uuid.uuid1()), 'testfile.txt')
-        cp = ltacp.LtaCp('localhost', test_file_path, 'srm://fake_surl')
-
-        #the path should not exists, but it should be mounted
-        self.assertFalse(cp.source_exists())
-        self.assertTrue(cp.source_mounted())
-
-        #let's try to transfer this file, should not succeed, but raise an exception
-        try:
+        def tearDown(self):
+            ltastubs.un_stub()
+
+        def test_path_exists(self):
+            test_file_path = os.path.join(os.getcwd(), str(uuid.uuid1()), 'testfile.txt')
+            os.makedirs(os.path.dirname(test_file_path))
+            with open(test_file_path, 'w') as file:
+                file.write(1000*'a')
+
+            try:
+                cp = ltacp.LtaCp('localhost', test_file_path, 'srm://fake_surl')
+                self.assertTrue(cp.source_exists())
+            except Exception as e:
+                self.assertTrue(False, 'Unexpected exception in transfer: %s' % e)
+            finally:
+                os.remove(test_file_path)
+
+        def test_path_mounted(self):
+            #first test with a valid path, the current working dir + some random dir + file
+            test_file_path = os.path.join(os.getcwd(), str(uuid.uuid1()), 'testfile.txt')
             cp = ltacp.LtaCp('localhost', test_file_path, 'srm://fake_surl')
-            cp.transfer()
-        except ltacp.LtacpException as e:
-            logger.info('caught expected LtacpException: %s', e.value)
-            self.assertTrue('source path' in e.value and 'does not exist' in e.value)
-        except Exception as e:
-            self.assertTrue(False, 'Unexpected exception in transfer: %s' % e)
 
+            #the path should not exists, but it should be mounted
+            self.assertFalse(cp.source_exists())
+            self.assertTrue(cp.source_mounted())
+
+            #let's try to transfer this file, should not succeed, but raise an exception
+            try:
+                cp = ltacp.LtaCp('localhost', test_file_path, 'srm://fake_surl')
+                cp.transfer()
+            except ltacp.LtacpException as e:
+                logger.info('caught expected LtacpException: %s', e.value)
+                self.assertTrue('source path' in e.value and 'does not exist' in e.value)
+            except Exception as e:
+                self.assertTrue(False, 'Unexpected exception in transfer: %s' % e)
 
-        #repeat same test, but now with a non-mounted disk
-        test_file_path = '/non-existing-root-dir/dir1/dir2/file.txt'
-        cp = ltacp.LtaCp('localhost', test_file_path, 'srm://fake_surl')
-        self.assertFalse(cp.source_mounted())
 
-        #let's try to transfer this file, should not succeed, but raise an exception
-        try:
+            #repeat same test, but now with a non-mounted disk
+            test_file_path = '/non-existing-root-dir/dir1/dir2/file.txt'
             cp = ltacp.LtaCp('localhost', test_file_path, 'srm://fake_surl')
-            cp.transfer()
-        except ltacp.LtacpException as e:
-            logger.info('caught expected LtacpException: %s', e.value)
-            self.assertTrue('the disk of source path' in e.value and 'does not seem to be mounted' in e.value)
-        except Exception as e:
-            self.assertTrue(False, 'Unexpected exception in transfer: %s' % e)
-
-    def test_single_file(self):
-        test_file_path = os.path.join(os.getcwd(), str(uuid.uuid1()), 'testfile.txt')
-        os.makedirs(os.path.dirname(test_file_path))
-        with open(test_file_path, 'w') as file:
-            file.write(1000*'a')
-
-        try:
-            with mock.patch('lofar.lta.ingest.server.ltacp.convert_surl_to_turl', new=lambda x: x):
+            self.assertFalse(cp.source_mounted())
+
+            #let's try to transfer this file, should not succeed, but raise an exception
+            try:
+                cp = ltacp.LtaCp('localhost', test_file_path, 'srm://fake_surl')
+                cp.transfer()
+            except ltacp.LtacpException as e:
+                logger.info('caught expected LtacpException: %s', e.value)
+                self.assertTrue('the disk of source path' in e.value and 'does not seem to be mounted' in e.value)
+            except Exception as e:
+                self.assertTrue(False, 'Unexpected exception in transfer: %s' % e)
+
+        def test_single_file(self):
+            test_file_path = os.path.join(os.getcwd(), str(uuid.uuid1()), 'testfile.txt')
+            os.makedirs(os.path.dirname(test_file_path))
+            with open(test_file_path, 'w') as file:
+                file.write(1000*'a')
+
+            try:
                 cp = ltacp.LtaCp('localhost', test_file_path, 'srm://fake_surl')
                 md5cs, a32cs, fs = cp.transfer()
                 #it suffices to check only the filesize as transfer result
                 #if the checksums whould have been different between source, local, and/or 'lta'
                 #then an exception would have been raised, and that is asserted below
                 self.assertEqual(1000, int(fs))
-        except Exception as e:
-            self.assertTrue(False, 'Unexpected exception in transfer: %s' % e)
-        finally:
-            os.remove(test_file_path)
-
-    def test_multiple_files(self):
-        test_dir_path = os.path.join(os.getcwd(), 'testdir_%s' % uuid.uuid1())
-        os.makedirs(test_dir_path)
-        test_file_paths = []
-        for i in range(10):
-            test_file_path = os.path.join(test_dir_path, 'testfile_%s.txt' % i)
-            with open(test_file_path, 'w') as file:
-                file.write(1000*'a')
-
-            if i%2==0: #only transfer half the files in the directory
-                test_file_paths.append(test_file_path)
-
-        try:
-            with mock.patch('lofar.lta.ingest.server.ltacp.convert_surl_to_turl', new=lambda x: x):
+            except Exception as e:
+                logger.exception(e)
+                self.assertTrue(False, 'Unexpected exception in transfer: %s' % e)
+            finally:
+                os.remove(test_file_path)
+
+        def test_multiple_files(self):
+            test_dir_path = os.path.join(os.getcwd(), 'testdir_%s' % uuid.uuid1())
+            os.makedirs(test_dir_path)
+            test_file_paths = []
+            for i in range(10):
+                test_file_path = os.path.join(test_dir_path, 'testfile_%s.txt' % i)
+                with open(test_file_path, 'w') as file:
+                    file.write(1000*'a')
+
+                if i%2==0: #only transfer half the files in the directory
+                    test_file_paths.append(test_file_path)
+
+            try:
                 cp = ltacp.LtaCp('localhost', test_file_paths, 'srm://fake_surl')
                 md5cs, a32cs, fs = cp.transfer()
-        except Exception as e:
-            self.assertTrue(False, 'Unexpected exception in transfer: %s' % e)
-        finally:
-            for f in os.listdir(test_dir_path):
-                os.remove(os.path.join(test_dir_path, f))
-            os.removedirs(test_dir_path)
-
-    def test_directory(self):
-        test_dir_path = os.path.join(os.getcwd(), 'testdir_%s' % uuid.uuid1())
-        os.makedirs(test_dir_path)
-        for i in range(10):
-            test_file_path = os.path.join(test_dir_path, 'testfile_%s.txt' % i)
-            with open(test_file_path, 'w') as file:
-                file.write(1000*'a')
-
-        try:
-            with mock.patch('lofar.lta.ingest.server.ltacp.convert_surl_to_turl', new=lambda x: x):
+            except Exception as e:
+                self.assertTrue(False, 'Unexpected exception in transfer: %s' % e)
+            finally:
+                for f in os.listdir(test_dir_path):
+                    os.remove(os.path.join(test_dir_path, f))
+                os.removedirs(test_dir_path)
+
+        def test_directory(self):
+            test_dir_path = os.path.join(os.getcwd(), 'testdir_%s' % uuid.uuid1())
+            os.makedirs(test_dir_path)
+            for i in range(10):
+                test_file_path = os.path.join(test_dir_path, 'testfile_%s.txt' % i)
+                with open(test_file_path, 'w') as file:
+                    file.write(1000*'a')
+
+            try:
                 cp = ltacp.LtaCp('localhost', test_dir_path, 'srm://fake_surl')
                 md5cs, a32cs, fs = cp.transfer()
-        except Exception as e:
-            self.assertTrue(False, 'Unexpected exception in transfer: %s' % e)
-        finally:
-            for f in os.listdir(test_dir_path):
-                os.remove(os.path.join(test_dir_path, f))
-            os.removedirs(test_dir_path)
-
-
-if __name__ == '__main__':
-    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
-                        level=logging.DEBUG)
-    unittest.main()
+            except Exception as e:
+                self.assertTrue(False, 'Unexpected exception in transfer: %s' % e)
+            finally:
+                for f in os.listdir(test_dir_path):
+                    os.remove(os.path.join(test_dir_path, f))
+                os.removedirs(test_dir_path)
+
+
+    if __name__ == '__main__':
+        from subprocess import call
+        if call(['ssh', '-o', 'PasswordAuthentication=no', '-o', 'PubkeyAuthentication=yes', '-o', 'ConnectTimeout=1', 'localhost', 'true']) != 0:
+            print 'this test depends on keybased ssh login to localhost, which is not setup correctly. skipping test...'
+            exit(3)
+        
+        logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                            level=logging.DEBUG)
+        unittest.main()
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestWebServer/lib/ingestwebserver.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestWebServer/lib/ingestwebserver.py
index 65458fa7e7a..3a1bea6f8f2 100644
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestWebServer/lib/ingestwebserver.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestWebServer/lib/ingestwebserver.py
@@ -87,7 +87,7 @@ def index():
         nr_of_jobs_in_queue += status_dict['jobs']['scheduled']
         nr_of_jobs_in_queue += status_dict['jobs']['retry']
 
-    body = '''<p style="max-width: 1400px; margin: auto; margin-bottom: 12px; text-align: right;">Help and monitoring: <a href="https://www.astron.nl/lofarwiki/doku.php?id=engineering:software:ingest_services#faq_support" target=_blank>Ingest FAQ</a> / <a href="https://proxy.lofar.eu/zabbix/screens.php?sid=3ffcb45c82da9d9d&form_refresh=1&fullscreen=0&elementid=25&groupid=0&hostid=0" target=_blank>Zabbix ingest network transfer speeds</a> / <a href="https://lofar.astron.nl/birt-viewer/frameset?__report=Ingest.rptdesign&sample=my+parameter" target=_blank>MoM BIRT view of exports</a> / <a href="http://web.grid.sara.nl/cgi-bin/lofar.py" target=_blank>SARA maintenance</a></p>'''
+    body = '''<p style="max-width: 1400px; margin: auto; margin-bottom: 12px; text-align: right;">Help and monitoring: <a href="https://www.astron.nl/lofarwiki/doku.php?id=engineering:software:ingest_services#faq_support" target=_blank>Ingest FAQ</a> / <a href="https://proxy.lofar.eu/zabbix/screens.php?sid=3ffcb45c82da9d9d&form_refresh=1&fullscreen=0&elementid=25&groupid=0&hostid=0" target=_blank>Zabbix ingest network transfer speeds</a> / <a href="https://lofar.astron.nl/birt-viewer/frameset?__report=Ingest.rptdesign&sample=my+parameter" target=_blank>MoM BIRT view of exports</a> / <a href="http://web.grid.sara.nl/cgi-bin/lofar.py" target=_blank>SARA maintenance</a> / <a href="http://scu001.control.lofar:9632/" target=_blank>LTA storage overview</a></p>'''
 
     body += '''<p style="max-width: 1400px; margin: auto; margin-bottom: 8px; font-size: 16px; font-weight: bold">Total #jobs waiting in queue: %s</p>''' % nr_of_jobs_in_queue
     body += '''<table>'''
diff --git a/LTA/ltastorageoverview/CMakeLists.txt b/LTA/ltastorageoverview/CMakeLists.txt
index 1d1e0e0ad79..3ec201ad5d3 100644
--- a/LTA/ltastorageoverview/CMakeLists.txt
+++ b/LTA/ltastorageoverview/CMakeLists.txt
@@ -1,10 +1,9 @@
 # $Id$
 
 lofar_find_package(Python 2.6 REQUIRED)
-lofar_package(ltastorageoverview 0.1 DEPENDS PyCommon)
+lofar_package(ltastorageoverview 0.1 DEPENDS PyCommon LTAIngestClient)
 
 include(PythonInstall)
-set(USE_PYTHON_COMPILATION Off)
 
 add_subdirectory(lib)
 add_subdirectory(bin)
diff --git a/LTA/ltastorageoverview/bin/CMakeLists.txt b/LTA/ltastorageoverview/bin/CMakeLists.txt
index a4da3191e54..49bf80f19d7 100644
--- a/LTA/ltastorageoverview/bin/CMakeLists.txt
+++ b/LTA/ltastorageoverview/bin/CMakeLists.txt
@@ -1,5 +1,11 @@
 # $Id$
 
-install(PROGRAMS
-  ltastorageoverviewscraper
-  DESTINATION bin)
+lofar_add_bin_scripts(ltastorageoverviewscraper
+                      ltastorageoverviewreport
+                      ltastorageoverviewwebservice)
+
+# supervisord config files
+install(FILES
+  ltastorageoverviewscraper.ini
+  ltastorageoverviewwebservice.ini
+  DESTINATION etc/supervisord.d)
diff --git a/LTA/ltastorageoverview/bin/ltastorageoverviewreport b/LTA/ltastorageoverview/bin/ltastorageoverviewreport
new file mode 100755
index 00000000000..53f9a0b86aa
--- /dev/null
+++ b/LTA/ltastorageoverview/bin/ltastorageoverviewreport
@@ -0,0 +1,25 @@
+#!/usr/bin/python
+
+# Copyright (C) 2012-2015  ASTRON (Netherlands Institute for Radio Astronomy)
+# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+#
+# This file is part of the LOFAR software suite.
+# The LOFAR software suite is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The LOFAR software suite is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+
+from lofar.lta.ltastorageoverview import report
+
+''' Starts the scraper from ltastorageoverview'''
+if __name__ == "__main__":
+    report.main()
+
diff --git a/LTA/ltastorageoverview/bin/ltastorageoverviewscraper b/LTA/ltastorageoverview/bin/ltastorageoverviewscraper
old mode 100644
new mode 100755
index 94397565639..88951d48f38
--- a/LTA/ltastorageoverview/bin/ltastorageoverviewscraper
+++ b/LTA/ltastorageoverview/bin/ltastorageoverviewscraper
@@ -18,9 +18,9 @@
 # with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
 
 import sys
-from ltastorageoverview import scraper
+from lofar.lta.ltastorageoverview import scraper
 
 ''' Starts the scraper from ltastorageoverview'''
 if __name__ == "__main__":
-    scraper.main(sys.argv[1:])
+    scraper.main()
 
diff --git a/LTA/ltastorageoverview/bin/ltastorageoverviewscraper.ini b/LTA/ltastorageoverview/bin/ltastorageoverviewscraper.ini
new file mode 100644
index 00000000000..12a7e1e7c39
--- /dev/null
+++ b/LTA/ltastorageoverview/bin/ltastorageoverviewscraper.ini
@@ -0,0 +1,8 @@
+[program:ltastorageoverviewscraper]
+command=/bin/bash -c 'source $LOFARROOT/lofarinit.sh;exec ltastorageoverviewscraper --parallel 32'
+user=lofarsys
+stopsignal=INT ; KeyboardInterrupt
+stopasgroup=true ; bash does not propagate signals
+stdout_logfile=%(program_name)s.log
+redirect_stderr=true
+stderr_logfile=NONE
diff --git a/LTA/ltastorageoverview/bin/ltastorageoverviewwebservice b/LTA/ltastorageoverview/bin/ltastorageoverviewwebservice
new file mode 100755
index 00000000000..0a0d9dc4b4c
--- /dev/null
+++ b/LTA/ltastorageoverview/bin/ltastorageoverviewwebservice
@@ -0,0 +1,26 @@
+#!/usr/bin/python
+
+# Copyright (C) 2012-2015  ASTRON (Netherlands Institute for Radio Astronomy)
+# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+#
+# This file is part of the LOFAR software suite.
+# The LOFAR software suite is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The LOFAR software suite is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+
+import sys
+from lofar.lta.ltastorageoverview.webservice import webservice
+
+''' Starts the webservice from ltastorageoverview'''
+if __name__ == "__main__":
+    webservice.main()
+
diff --git a/LTA/ltastorageoverview/bin/ltastorageoverviewwebservice.ini b/LTA/ltastorageoverview/bin/ltastorageoverviewwebservice.ini
new file mode 100644
index 00000000000..a644588fc82
--- /dev/null
+++ b/LTA/ltastorageoverview/bin/ltastorageoverviewwebservice.ini
@@ -0,0 +1,8 @@
+[program:ltastorageoverviewwebservice]
+command=/bin/bash -c 'source $LOFARROOT/lofarinit.sh;exec ltastorageoverviewwebservice'
+user=lofarsys
+stopsignal=INT ; KeyboardInterrupt
+stopasgroup=true ; bash does not propagate signals
+stdout_logfile=%(program_name)s.log
+redirect_stderr=true
+stderr_logfile=NONE
diff --git a/LTA/ltastorageoverview/doc/lta_storage_overview.md b/LTA/ltastorageoverview/doc/lta_storage_overview.md
index 260d6f5f6ea..756e4810268 100644
--- a/LTA/ltastorageoverview/doc/lta_storage_overview.md
+++ b/LTA/ltastorageoverview/doc/lta_storage_overview.md
@@ -1,3 +1,150 @@
 # LTA Storage Overview {#lta_storage_overview}
 
-...
+## General
+
+### Description/Summary
+
+For the Lofar LTA we have the [LTA catalogue](https://lta.lofar.eu/) which gives an overview of all described dataproducts in the LTA. There are however (quite a lot of) files in the LTA which are not described in the [LTA catalogue](https://lta.lofar.eu/).
+Apart from that, we would like to have an overview of all files/directories on tape/disk in the LTA, and relate that to the current quota which we get each year from SARA, Juelich and Poznan.
+
+So, the LTA Storage Overview services provides the following features:
+ - gather information from the LTA at *file level* for each and every file in the LTA, even those which are not in the catalogue. (via ltastorageoverviewscraper)
+ - provide RO/SOS with 'du-like' information on the available and used storage per site. (via ltastorageoverviewwebservice)
+
+
+It uses [srm](https://sdm.lbl.gov/srm-wg/documents.html) + our grid certificates from the lexars to gather this info. The file/directory tree is stored in a database ('ltaso' at ldb003.control.lofar, and exposed via a simple overview website http://scu001.control.lofar:9632
+
+### Authors/Owners
+
+- Jorrit Schaap <mailto:schaap@astron.nl>
+
+### Overview
+
+There are 2 services which run individually on the scu001 under supervisord.
+Furthermore the 2 services both use one postgres database ('ltaso' at ldb003.control.lofar) to store and retrieve the information.
+- service ltastorageoverviewscraper:
+  - This service runs in the background and "scrapes" information from the LTA sites using srmls (via ssh calls to lexar003/lexar004, because only the lexars have grid access and certificates).
+  - The gathered information about files and directories is stored in the ltaso database.
+  - It keeps track of when each directory is visited, and plans a revisit once in a while.
+  - It listens for events from [Ingest](@ref lta_ingest) to schedule a scraper visit for each new directory that an ingest job creates.
+- service ltastorageoverviewwebservice:
+  - Very simple (and slow...) python flask webservice which generates one webpage with an overview of:
+    - amount of data stored at each site (trend, delta/month, pie chart)
+    - amount of quota used
+    - amount of free space left
+
+- - -
+
+## DEVELOPMENT
+
+### Analyses
+This project originated from the need by SOS to have an overview of:
+- what is in the LTA at *file level* (because not every file is in the [LTA catalogue](https://lta.lofar.eu/))
+- set quota per year per LTA site.
+- summarize tape usage (in (peta)bytes) per site.
+- have insight in free tape space per site until the end of the quota period.
+- A future requirement might be to have an (REST?) API to query for certain projects/sites/quotas/timespans etc.
+
+Before this package ltastorageoverview existed, we generated similar overviews using srm to do a tree walk on the LTA sites, but nowadays with a large LTA this takes more than a week to complete. So we needed a background process which does the tree walk, and stores the information in a database. The scraper service was based on this original script/idea.
+
+### Design
+- The software needs to run in the background (standard lofar solution: service under supervisord)
+- The infermation needs to be instantaneously retreivable (so, use a database. standard lofar database: postgres)
+- Website can be simple (and slow) for now, so in this first phase we chose python flask.
+
+### Source Code
+- [LTA Storage Overview in SVN](https://svn.astron.nl/LOFAR/trunk/LTA/ltastorageoverview/)
+- [LTA Storage Overview Code Documentation](@ref lta_storage_overview)
+
+### Testing
+
+#### Unit Testing
+
+Unit tests are available in:
+    <source-root>/LTA/ltastorageoverview/test
+
+The tests cover:
+- the creation of the ltaso database
+- inserts of sites, files and directories
+- checks on site and directory statistics
+- a minor webservice test
+
+#### Integration Testing
+
+There are no integration tests since these services operate independently from other lofar software.
+The 2 services work on the same (shared) database, so there is some integration there, which is tested in the unittests.
+
+#### Build & Deploy
+
+##### Build locally
+
+    svn co https://svn.astron.nl/LOFAR/<some_branch_or_trunk> <my_source_dir>
+    cd <my_source_dir>
+    mkdir -p build/gnu_debug
+    cd build/gnu_debug
+    cmake -DBUILD_PACKAGES=ltastorageoverview -DCMAKE_INSTALL_PREFIX=/opt/lofar/ ../..
+    cd ../..
+    make
+    make install
+
+##### Build using Jenkins
+
+1. Open [the generic CentOS7 LOFAR SubSystems Jenkins project](https://support.astron.nl/jenkins/view/LOFAR%20Subsystems/view/Subsystems%20builds/job/Subsystems_CentOS7/build?delay=0sec)
+2. Select buildhost (defaults to correct buildhostcentos7)
+3. Select the branch you want to build:
+  - For a release/rollout: Select the latest release tag
+  - For a (test) build of a branch: select any branch you like (for example the one you are working on)
+4. Set the MINOR_RELEASE_NR (should be equal to tag minor version number for release/rollout build)
+5. Select SubSystem: RAServices (which should be named SCU because it's more services now than just resource assigner services)
+6. Click "Build" button, wait, build should finish successfully.
+
+##### Deploy / SubSystems
+
+The lofar package 'ltastorageoverview' is part of the RAServices subsystems package. So building and deploying the standard RAServices package for deployement on scu001 automatically gives you the ltastorageoverview services on scu001 as well.
+
+- - -
+
+## OPERATIONS
+
+### Configuration
+- There are no configuration files, except from the standard supervisord ini files.
+- Both services come with a -h or --help option which explain the available options.
+
+### Log Files
+- Log files are located in the standard location. In this specific case, you can find ltastorageoverviewscraper.log and ltastorageoverviewwebservice.log in scu001.control.lofar:/opt/lofar/var/log/
+
+### Runtime
+- the services run under supervisord on host scu001.control.lofar
+- There is no need to run these services manually from the commandline. (There is no harm in doing so either, even when the services already run under supervisord).
+- It is perfectly safe to stop/start/restart the services at any time. Really, no harm is done. All information is always stored in the database.
+
+### Interfaces (API)
+- These services run standalone and have no external API.
+- These services are not connected to the qpid messagebus.
+- There is a start for a simple REST API in the webservice, but that's only for testing/development purposes. Might be improved when needed by SOS.
+- The only user interface is the website: http://scu001.control.lofar:9632
+
+### Files/Databases
+- A single postgres 9.3+ database called 'ltaso' is used, which runs on ldb003.control.lofar
+- A database create sql script is deployed (along with the python packages) in /opt/lofar/share/ltaso
+- the ltaso database login credentials are stored in the standard lofar credentials location: ~/.lofar/dbcredentials/ltaso.ini
+- No other files and/or databases are needed.
+
+### Dependencies
+- dependencies on 3rd party Python packages
+  - python-flask
+  - psycopg2
+- dependencies on LTA software
+  - the scraper uses srmls to get file/directory information from the LTA sites. It just uses the srm tools and the grid certificates from [Ingest](@ref lta_ingest) via ssh calls to lexar003/lexar004.
+- dependencies on network:
+  - a working ssh connection with key-based logging for lofarsys from scu001 to ingest@lexar003 or ingest@lexar004
+- dependencies on QPID:
+  - the scraper listens for events from [Ingest](@ref lta_ingest) via qpid.
+    - the exchange 'lofar.lta.ingest.notification' is federated from lexar003 to scu001 (so all services on scu001 can listen for ingest events)
+    - the exchange 'lofar.lta.ingest.notification' on scu001 is routed to queue 'lofar.lta.ingest.notification.for.ltastorageoverview' on which the scraper listens. We use a dedicated queue for the scraper so that no events are lost, and all ingested data is found as quickly as possible by a scraper visit.
+
+### Security
+- It is assumed that the grid certificates for user 'ingest' on lexar003/lexar004 are in place and valid. If not, contact holties@astron.nl or schaap@astron.nl
+- the ltaso database login credentials are stored in the standard lofar credentials location: ~/.lofar/dbcredentials/ltaso.ini
+
+
diff --git a/LTA/ltastorageoverview/lib/CMakeLists.txt b/LTA/ltastorageoverview/lib/CMakeLists.txt
index 799e321a88c..8f6241c4ebb 100644
--- a/LTA/ltastorageoverview/lib/CMakeLists.txt
+++ b/LTA/ltastorageoverview/lib/CMakeLists.txt
@@ -4,16 +4,19 @@ python_install(
     __init__.py
     scraper.py
     store.py
-    create_db_ltastorageoverview.sql
     report.py
+    ingesteventhandler.py
     webservice/webservice.py
     webservice/__init__.py
-    DESTINATION ltastorageoverview)
+    DESTINATION lofar/lta/ltastorageoverview)
+
+set(sql_files ltaso/create_db_ltastorageoverview.sql)
+lofar_add_data_files(${sql_files})
 
 set(web_files webservice/templates/index.html)
 
 install(FILES ${web_files}
-  DESTINATION ${PYTHON_INSTALL_DIR}/ltastorageoverview/webservice/templates/)
+  DESTINATION ${PYTHON_INSTALL_DIR}/lofar/lta/ltastorageoverview/webservice/templates/)
 
 foreach(web_file ${web_files})
     get_filename_component(web_file_path ${web_file} PATH)
diff --git a/LTA/ltastorageoverview/lib/create_db_ltastorageoverview.sql b/LTA/ltastorageoverview/lib/create_db_ltastorageoverview.sql
deleted file mode 100644
index 4a87ff64211..00000000000
--- a/LTA/ltastorageoverview/lib/create_db_ltastorageoverview.sql
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
-# Copyright (C) 2012-2015  asTRON (Netherlands Institute for Radio Astronomy)
-# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
-#
-# This file is part of the LOFAR software suite.
-# The LOFAR software suite is free software: you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as published
-# by the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# The LOFAR software suite is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
-*/
-
--- $Id$
-
--- sqlite3 create script for ltastorageoverview database
-
-PRAGMA foreign_keys = ON;
-
-create table storage_site (
-    id                  integer primary key autoincrement unique not null,
-    name                text unique not null,
-    url                 text not null);
-
-create index ss_name_idx on storage_site(name);
-
-create table directory (
-    id                  integer primary key autoincrement unique not null,
-    name                text key not null COLLATE NOCASE,
-    parent_directory_id integer,
-    foreign key (parent_directory_id) references directory(id) );
-
-create table directory_closure (
-    ancestor_id         integer not null,
-    descendant_id       integer not null,
-    depth               integer not null,
-    primary key (ancestor_id, descendant_id)
-    foreign key (ancestor_id) references directory(id)
-    foreign key (descendant_id) references directory(id) );
-
-create index dc_ancestor_id_idx on directory_closure(ancestor_id);
-create index dc_descendant_id_idx on directory_closure(descendant_id);
-create index dc_depth_idx on directory_closure(depth);
-
-create trigger directory_closure_trigger
-    after insert on directory
-    begin
-        insert into directory_closure (ancestor_id, descendant_id, depth) values (new.id, new.id, 0) ;
-
-        insert into directory_closure (ancestor_id, descendant_id, depth)
-            select p.ancestor_id, c.descendant_id, p.depth+c.depth+1
-            from directory_closure p, directory_closure c
-            where p.descendant_id=new.parent_directory_id and c.ancestor_id=new.id ;
-    end;
-
-create table storage_site_root (
-    storage_site_id                 integer not null,
-    directory_id                        integer not null,
-    primary key (storage_site_id, directory_id),
-    foreign key (storage_site_id) references storage_site(id),
-    foreign key (directory_id) references directory(id) );
-
-create index ssr_storage_site_id_idx on storage_site_root(storage_site_id);
-create index ssr_directory_id_idx on storage_site_root(directory_id);
-
-create table fileinfo (
-    id                          integer primary key autoincrement not null,
-    name                        text key not null,
-    size                        integer not null,
-    creation_date               datetime not null,
-    directory_id                integer not null,
-    foreign key (directory_id)  references directory(id) );
-
-create index fi_directory_id_idx on fileinfo(directory_id);
-create index fi_creation_date_idx on fileinfo(creation_date);
-
-create table directory_stats (
-    id                      integer primary key autoincrement unique not null,
-    directory_id            integer unique not null,
-    num_files               integer,
-    total_file_size         integer,
-    min_file_size           integer,
-    max_file_size           integer,
-    min_file_creation_date  datetime,
-    max_file_creation_date  datetime,
-    foreign key (directory_id) references directory(id) );
-
-create index ds_directory_id_idx on directory_stats(directory_id);
-create index ds_min_file_creation_date_idx on directory_stats(min_file_creation_date);
-create index ds_max_file_creation_date_idx on directory_stats(max_file_creation_date);
-
-create table _temp_fileinfo_for_dir_stats (
-    size                        integer not null,
-    creation_date               datetime not null );
-
-create trigger fileinfo_to_directory_stats_trigger
-    after insert on fileinfo
-    begin
-        insert or ignore into directory_stats (directory_id)
-        values (new.directory_id) ;
-
-        delete from _temp_fileinfo_for_dir_stats ;
-
-        insert into _temp_fileinfo_for_dir_stats
-          select fileinfo.size, fileinfo.creation_date from fileinfo
-          where directory_id = new.directory_id ;
-
-         update directory_stats set
-            num_files=(select count(size) from _temp_fileinfo_for_dir_stats),
-            total_file_size=(select sum(size) from _temp_fileinfo_for_dir_stats),
-            min_file_size=(select min(size) from _temp_fileinfo_for_dir_stats),
-            max_file_size=(select max(size) from _temp_fileinfo_for_dir_stats),
-            min_file_creation_date=(select min(creation_date) from _temp_fileinfo_for_dir_stats),
-            max_file_creation_date=(select max(creation_date) from _temp_fileinfo_for_dir_stats)
-         where directory_id = new.directory_id ;
-    end;
-
-create table project (
-    id              integer primary key autoincrement unique not null,
-    name            text unique not null);
-
-create index project_name_idx on project(name);
-
-create table project_top_level_directory (
-    project_id      integer,
-    directory_id    integer,
-    primary key (project_id, directory_id)
-    foreign key (project_id) references project(id)
-    foreign key (directory_id) references directory(id) );
-
-
-
-create table scraper_last_directory_visit (
-    directory_id       integer not null,
-    visit_date         datetime not null,
-    primary key (directory_id)
-    foreign key (directory_id) references directory(id) );
-
-create view root_directories as
-    select dir.id as dir_id, dir.name as dir_name, ss.id as site_id, ss.name as site_name
-        from storage_site_root
-        join directory dir on dir.id = storage_site_root.directory_id
-        join storage_site ss on ss.id = storage_site_root.storage_site_id ;
-
-create view site_directory_tree as
-    select rootdir.site_id as site_id,
-        rootdir.site_name as site_name,
-        rootdir.dir_id as rootdir_id,
-        rootdir.dir_name as rootdir_name,
-        dir.id as dir_id,
-        dir.name as dir_name,
-        dir.parent_directory_id as parent_directory_id,
-        dc.depth as depth
-        from root_directories rootdir
-        inner join directory_closure dc on dc.ancestor_id = rootdir.dir_id
-        inner join directory dir on dc.descendant_id = dir.id ;
-
-create view site_scraper_last_directoy_visit as
-    select rootdir.site_id as site_id,
-        rootdir.site_name as site_name,
-        dir.id as dir_id,
-        dir.name as dir_name,
-        sldv.visit_date as last_visit
-        from root_directories rootdir
-        inner join directory_closure dc on dc.ancestor_id = rootdir.dir_id
-        inner join directory dir on dc.descendant_id = dir.id
-        inner join scraper_last_directory_visit sldv on sldv.directory_id = dir.id ;
-
-create view site_directory_file as
-    select site.id as site_id,
-        site.name as site_name,
-        dir.id as dir_id,
-        dir.name as dir_name,
-        fileinfo.id as file_id,
-        fileinfo.name as file_name,
-        fileinfo.size as file_size,
-        fileinfo.creation_date as file_creation_date
-        from storage_site site
-        join storage_site_root on storage_site_root.storage_site_id = site.id
-        inner join directory_closure dc on dc.ancestor_id = storage_site_root.directory_id
-        inner join directory dir on dc.descendant_id = dir.id
-        inner join fileinfo on fileinfo.directory_id = dir.id ;
-
-create view project_directory as
-    select
-        project.id as project_id,
-        project.name as project_name,
-        dir.id as dir_id,
-        dir.name as dir_name
-        from project_top_level_directory
-        inner join project on project.id = project_top_level_directory.project_id
-        inner join directory_closure dc on dc.ancestor_id = project_top_level_directory.directory_id
-        inner join directory dir on dc.descendant_id = dir.id ;
-
-create view project_directory_stats as
-    select * from project_directory
-    inner join directory_stats ds on ds.directory_id = project_directory.dir_id ;
-
diff --git a/LTA/ltastorageoverview/lib/ingesteventhandler.py b/LTA/ltastorageoverview/lib/ingesteventhandler.py
new file mode 100755
index 00000000000..5cb762ea3fb
--- /dev/null
+++ b/LTA/ltastorageoverview/lib/ingesteventhandler.py
@@ -0,0 +1,110 @@
+# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy)
+# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+#
+# This file is part of the LOFAR software suite.
+# The LOFAR software suite is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The LOFAR software suite is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+
+# $Id$
+
+from lofar.lta.ltastorageoverview import store
+from lofar.lta.ingest.common.srm import *
+from lofar.lta.ingest.client.ingestbuslistener import IngestBusListener
+from lofar.lta.ingest.common.config import DEFAULT_INGEST_NOTIFICATION_SUBJECTS
+from lofar.lta.ingest.common.config import DEFAULT_BROKER
+from lofar.messaging import adaptNameToEnvironment
+
+import logging
+logger = logging.getLogger(__name__)
+
+DEFAULT_INGEST_NOTIFICATION_QUEUE = adaptNameToEnvironment('lofar.lta.ingest.notification.for.ltastorageoverview')
+
+
+class IngestEventHandler(IngestBusListener):
+    def __init__(self, dbcreds,
+                 busname=DEFAULT_INGEST_NOTIFICATION_QUEUE,
+                 subjects=DEFAULT_INGEST_NOTIFICATION_SUBJECTS,
+                 broker=DEFAULT_BROKER):
+        self._dbcreds = dbcreds
+        super(IngestEventHandler, self).__init__(busname=busname, subjects=subjects, broker=broker)
+
+    def onJobFinished(self, job_dict):
+        """onJobFinished is called upon receiving a JobFinished message.
+        In this IngestEventHandler, it calls _schedule_srmurl_for_visit to schedule the finished surl for a scraper visit.
+        :param job_dict: dictionary with the finised job"""
+        self._logJobNotification('finished', job_dict)
+        self._schedule_srmurl_for_visit(job_dict.get('srm_url'))
+
+    def onTaskFinished(self, task_dict):
+        """onTaskFinished is called upon receiving a TaskFinished message. (when all dataproducts of a observation/pipeline were ingested)
+        In this IngestEventHandler, it calls _schedule_srmurl_for_visit to schedule the finished surl for a scraper visit.
+        :param task_dict: dictionary with the finished task"""
+        self._logJobNotification('task finised', task_dict)
+        self._schedule_srmurl_for_visit(task_dict.get('srm_url'))
+
+    def _schedule_srmurl_for_visit(self, srm_url):
+        """process the given srm_url, insert it in the db if needed, and mark it as not visited,
+        so that the scraper will visit it soon.
+        :param srm_url: a valid srm url like: srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884/L652884_SAP000_B000_P001_bf_e619e5da.tar
+        :return: None
+        """
+        if srm_url:
+            with store.LTAStorageDb(self._dbcreds) as db:
+                site = self._get_site_from_db(srm_url)
+                dir_path = get_dir_path_in_site(srm_url)
+                directory = db.directoryByName(dir_path, site['id'])
+
+                if directory is None:
+                    dir_id = self._insert_missing_directory_tree_if_needed(srm_url).get(dir_path)
+                else:
+                    dir_id = directory.get('dir_id')
+
+                if dir_id is not None:
+                    self._mark_directory_for_a_visit(dir_id)
+
+    def _mark_directory_for_a_visit(self, dir_id):
+        """
+        update the directory's last visit time to unix-epoch (which is the lowest possible visit timestamp), so that it
+        appears in the visitStats which are used by the scraper to determine the next directory to be visited.
+        :param int dir_id: the id of the directory
+        :return: None
+        """
+        with store.LTAStorageDb(self._dbcreds) as db:
+            return db.updateDirectoryLastVisitTime(dir_id, datetime.fromtimestamp(0))
+
+    def _get_site_from_db(self, srm_url):
+        """
+        find the site entry in the database for the given srm_url.
+        raises a lookup error if not found.
+        :param string srm_url: a valid srm url
+        :return: a site entry dict from the database
+        """
+        site_url = get_site_surl(srm_url)
+
+        # find site in db
+        with store.LTAStorageDb(self._dbcreds) as db:
+            site = next((s for s in db.sites() if s['url'] == site_url), None)
+            if site is None:
+                raise LookupError('Could not find site %s in database %s' % (site_url, self._dbcreds.database))
+            return site
+
+    def _insert_missing_directory_tree_if_needed(self, srm_url):
+        # example url: srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884/L652884_SAP000_B000_P001_bf_e619e5da.tar
+        # or for a dir: srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+        # site_url then becomes: srm://lofar-srm.fz-juelich.de:8443
+        # dir_path then becomes: /pnfs/fz-juelich.de/data/lofar/ops/projects/lc8_029/652884
+        site = self._get_site_from_db(srm_url)
+        dir_path = get_dir_path_in_site(srm_url)
+
+        with store.LTAStorageDb(self._dbcreds) as db:
+            return db.insert_missing_directory_tree_if_needed(dir_path, site['id'])
diff --git a/LTA/ltastorageoverview/lib/ltaso/create_db_ltastorageoverview.sql b/LTA/ltastorageoverview/lib/ltaso/create_db_ltastorageoverview.sql
new file mode 100644
index 00000000000..ed68325dce5
--- /dev/null
+++ b/LTA/ltastorageoverview/lib/ltaso/create_db_ltastorageoverview.sql
@@ -0,0 +1,952 @@
+/*
+# Copyright (C) 2012-2015  asTRON (Netherlands Institute for Radio Astronomy)
+# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+#
+# This file is part of the LOFAR software suite.
+# The LOFAR software suite is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The LOFAR software suite is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+-- $Id$
+
+-- postgresql create script for ltastorageoverview database
+
+-- run from command line as:
+-- psql ltaso -f create_db_ltastorageoverview.sql -W
+
+-- \set VERBOSITY terse
+
+BEGIN;
+
+DROP SCHEMA IF EXISTS lta CASCADE;
+DROP SCHEMA IF EXISTS scraper CASCADE;
+DROP SCHEMA IF EXISTS metainfo CASCADE;
+
+CREATE SCHEMA lta;
+CREATE SCHEMA scraper;
+CREATE SCHEMA metainfo;
+
+-- TABLES
+
+CREATE TABLE lta.site (
+    id                  serial,
+    name                text UNIQUE NOT NULL,
+    url                 text UNIQUE NOT NULL,
+    PRIMARY KEY (id)
+) WITH (OIDS=FALSE);
+
+CREATE INDEX ss_name_idx on lta.site(name);
+
+CREATE TABLE lta.directory (
+    id                  serial,
+    name                text NOT NULL,
+    parent_dir_id       integer REFERENCES lta.directory ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+    PRIMARY KEY (id),
+    UNIQUE (name, parent_dir_id)
+) WITH (OIDS=FALSE);
+
+CREATE INDEX d_parent_dir_id_idx on lta.directory(parent_dir_id);
+CREATE INDEX d_name_idx on lta.directory(name);
+
+CREATE TABLE lta.directory_closure (
+    ancestor_id         integer NOT NULL REFERENCES lta.directory ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+    descendant_id       integer NOT NULL REFERENCES lta.directory ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+    depth               integer NOT NULL,
+    primary key (ancestor_id, descendant_id)
+) WITH (OIDS=FALSE);
+
+CREATE INDEX dc_ancestor_id_idx on lta.directory_closure(ancestor_id);
+CREATE INDEX dc_descendant_id_idx on lta.directory_closure(descendant_id);
+CREATE INDEX dc_depth_idx on lta.directory_closure(depth);
+
+CREATE TABLE lta.fileinfo (
+    id                  serial,
+    name                text NOT NULL,
+    size                bigint NOT NULL,
+    creation_date       timestamp without time zone NOT NULL,
+    dir_id              integer NOT NULL REFERENCES lta.directory ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+    PRIMARY KEY (id),
+    UNIQUE (name, dir_id)
+) WITH (OIDS=FALSE);
+
+CREATE INDEX fi_dir_id_idx on lta.fileinfo(dir_id);
+CREATE INDEX fi_creation_date_idx on lta.fileinfo(creation_date);
+CREATE INDEX fi_name_idx on lta.fileinfo(name);
+
+CREATE TABLE lta.site_root_dir (
+    site_id       integer NOT NULL REFERENCES lta.site ON DELETE CASCADE DEFERRABLE INITIALLY IMMEDIATE,
+    root_dir_id   integer NOT NULL REFERENCES lta.directory ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+    primary key (site_id, root_dir_id)
+) WITH (OIDS=FALSE);
+
+CREATE INDEX ssr_site_id_idx on lta.site_root_dir(site_id);
+CREATE INDEX ssr_root_dir_id_idx on lta.site_root_dir(root_dir_id);
+
+CREATE TABLE lta.site_quota (
+    id                  serial,
+    site_id             integer NOT NULL REFERENCES lta.site ON DELETE CASCADE DEFERRABLE INITIALLY IMMEDIATE,
+    quota               bigint NOT NULL,
+    valid_until_date    timestamp without time zone NOT NULL,
+    primary key (id)
+) WITH (OIDS=FALSE);
+
+CREATE TABLE lta.quota_root_dirs (
+    site_id             integer NOT NULL REFERENCES lta.site ON DELETE CASCADE DEFERRABLE INITIALLY IMMEDIATE,
+    root_dir_id         integer NOT NULL REFERENCES lta.directory ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+    primary key (site_id, root_dir_id)
+);
+
+CREATE TABLE lta._directory_update_cache (
+    dir_id              integer NOT NULL REFERENCES lta.directory ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+    PRIMARY KEY (dir_id)
+) WITH (OIDS=FALSE);
+
+CREATE TABLE scraper.last_directory_visit (
+    id                  serial,
+    dir_id              integer NOT NULL REFERENCES lta.directory ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+    visit_date          timestamp without time zone NOT NULL DEFAULT '1970-01-01',
+    PRIMARY KEY (id)
+) WITH (OIDS=FALSE);
+
+CREATE INDEX ldv_dir_id_idx on scraper.last_directory_visit(dir_id);
+CREATE INDEX ldv_visit_date_idx on scraper.last_directory_visit(visit_date);
+
+CREATE TABLE metainfo.stats (
+    id serial,
+    dir_id integer NOT NULL REFERENCES lta.directory ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+    dir_num_files integer DEFAULT 0 NOT NULL,
+    dir_total_file_size bigint DEFAULT 0 NOT NULL,
+    dir_min_file_size bigint DEFAULT 0 NOT NULL,
+    dir_max_file_size bigint DEFAULT 0 NOT NULL,
+    dir_min_file_creation_date timestamp without time zone DEFAULT NULL,
+    dir_max_file_creation_date timestamp without time zone DEFAULT NULL,
+    tree_num_files integer DEFAULT 0 NOT NULL,
+    tree_total_file_size bigint DEFAULT 0 NOT NULL,
+    tree_min_file_size bigint DEFAULT NULL,
+    tree_max_file_size bigint DEFAULT NULL,
+    tree_min_file_creation_date timestamp without time zone DEFAULT NULL,
+    tree_max_file_creation_date timestamp without time zone DEFAULT NULL,
+    PRIMARY KEY (id)
+);
+
+CREATE INDEX stats_dir_id_idx on metainfo.stats(dir_id);
+CREATE INDEX stats_dir_min_file_creation_date_idx on metainfo.stats(dir_min_file_creation_date);
+CREATE INDEX stats_dir_max_file_creation_date_idx on metainfo.stats(dir_max_file_creation_date);
+CREATE INDEX stats_tree_min_file_creation_date_idx on metainfo.stats(tree_min_file_creation_date);
+CREATE INDEX stats_tree_max_file_creation_date_idx on metainfo.stats(tree_max_file_creation_date);
+
+CREATE TABLE metainfo.project (
+    id              serial,
+    name            text UNIQUE NOT NULL,
+    PRIMARY KEY (id)
+) WITH (OIDS=FALSE);
+
+CREATE INDEX project_name_idx on metainfo.project(name);
+
+CREATE TABLE metainfo.project_top_level_directory (
+    project_id      integer NOT NULL REFERENCES metainfo.project ON DELETE CASCADE DEFERRABLE INITIALLY IMMEDIATE,
+    dir_id    integer NOT NULL REFERENCES lta.directory ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+    primary key (project_id, dir_id)
+) WITH (OIDS=FALSE);
+
+CREATE INDEX ptld_project_id_idx on metainfo.project_top_level_directory(project_id);
+CREATE INDEX ptld_dir_id_idx on metainfo.project_top_level_directory(dir_id);
+
+CREATE TABLE metainfo.observation (
+    id              int, -- sas id, like 'L123456', but then as integer, so 123456
+    PRIMARY KEY (id)
+) WITH (OIDS=FALSE);
+
+CREATE TABLE metainfo.project_observation (
+    project_id      integer NOT NULL REFERENCES metainfo.project ON DELETE CASCADE DEFERRABLE INITIALLY IMMEDIATE,
+    observation_id  integer NOT NULL REFERENCES metainfo.observation ON DELETE CASCADE DEFERRABLE INITIALLY IMMEDIATE,
+    PRIMARY KEY (project_id, observation_id)
+) WITH (OIDS=FALSE);
+
+CREATE TABLE metainfo.dataproduct (
+    id              serial,
+    fileinfo_id     integer NOT NULL REFERENCES lta.fileinfo ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+    observation_id  integer NOT NULL REFERENCES metainfo.observation ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+    name            text NOT NULL,
+    PRIMARY KEY (id)
+) WITH (OIDS=FALSE);
+
+CREATE INDEX dp_dataproduct_name_idx on metainfo.dataproduct(name);
+CREATE INDEX dp_fileinfo_id_idx on metainfo.dataproduct(fileinfo_id);
+
+-- END TABLES
+
+
+-- TRIGGERS
+
+CREATE OR REPLACE FUNCTION lta.on_site_root_dir_deleted_do_delete_directory()
+RETURNS trigger AS
+$BODY$
+BEGIN
+    DELETE FROM lta.directory WHERE id = OLD.root_dir_id;
+    RETURN OLD;
+END;
+$BODY$
+LANGUAGE plpgsql VOLATILE
+COST 100;
+
+CREATE TRIGGER trigger_on_site_root_dir_deleted_do_delete_directory
+AFTER DELETE
+ON lta.site_root_dir
+FOR EACH ROW
+EXECUTE PROCEDURE lta.on_site_root_dir_deleted_do_delete_directory();
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION lta.on_directory_inserted_add_directory_closure_entry()
+RETURNS trigger AS
+$BODY$
+BEGIN
+ INSERT INTO lta.directory_closure (ancestor_id, descendant_id, depth) values (NEW.id, NEW.id, 0) ;
+
+ INSERT INTO lta.directory_closure (ancestor_id, descendant_id, depth)
+     SELECT p.ancestor_id, c.descendant_id, p.depth+c.depth+1
+     FROM lta.directory_closure p, lta.directory_closure c
+     WHERE p.descendant_id=new.parent_dir_id AND c.ancestor_id=new.id ;
+
+ RETURN NEW;
+END;
+$BODY$
+LANGUAGE plpgsql VOLATILE
+COST 100;
+
+CREATE TRIGGER trigger_on_directory_inserted_add_directory_closure_entry
+AFTER INSERT
+ON lta.directory
+FOR EACH ROW
+EXECUTE PROCEDURE lta.on_directory_inserted_add_directory_closure_entry();
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION scraper.on_directory_inserted_add_last_directory_visit_entry()
+RETURNS trigger AS
+$BODY$
+BEGIN
+    --RAISE NOTICE 'on_directory_inserted_add_last_directory_visit_entry, NEW=%', NEW;
+    --postgres < 9.5 way of doing INSERT...ON CONFLICT DO NOTHING
+    INSERT INTO scraper.last_directory_visit(dir_id)
+    (SELECT NEW.id WHERE NOT EXISTS (SELECT dir_id FROM scraper.last_directory_visit WHERE dir_id = NEW.id));
+
+    RETURN NEW;
+END;
+$BODY$
+LANGUAGE plpgsql VOLATILE
+COST 100;
+
+CREATE TRIGGER trigger_on_directory_inserted_add_last_directory_visit_entry
+AFTER INSERT
+ON lta.directory
+FOR EACH ROW
+EXECUTE PROCEDURE scraper.on_directory_inserted_add_last_directory_visit_entry();
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION scraper.on_site_root_dir_inserted_do_add_to_quota_root_dirs()
+RETURNS trigger AS
+$BODY$
+BEGIN
+    -- by default, add each root directory as 'directory under quota'
+    -- users can remove them by hand
+    INSERT INTO lta.quota_root_dirs(site_id, root_dir_id)
+    VALUES (NEW.site_id, NEW.root_dir_id);
+
+ RETURN NEW;
+END;
+$BODY$
+LANGUAGE plpgsql VOLATILE
+COST 100;
+
+CREATE TRIGGER trigger_on_site_root_dir_inserted_do_add_to_quota_root_dirs
+AFTER INSERT
+ON lta.site_root_dir
+FOR EACH ROW
+EXECUTE PROCEDURE scraper.on_site_root_dir_inserted_do_add_to_quota_root_dirs();
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION metainfo.on_directory_inserted_add_stats_entry()
+RETURNS trigger AS
+$BODY$
+BEGIN
+    --RAISE NOTICE 'on_directory_inserted_add_stats_entry, NEW=%', NEW;
+    INSERT INTO metainfo.stats(dir_id) values (NEW.id);
+
+    -- always trim trailing slashes from dirname
+    NEW.name := trim(trailing '/' from NEW.name);
+
+    RETURN NEW;
+END;
+$BODY$
+LANGUAGE plpgsql VOLATILE
+COST 100;
+
+CREATE TRIGGER trigger_on_directory_inserted_add_stats_entry
+BEFORE INSERT
+ON lta.directory
+FOR EACH ROW
+EXECUTE PROCEDURE metainfo.on_directory_inserted_add_stats_entry();
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION metainfo.on_fileinfo_insert_update_delete_store_in_cache()
+RETURNS trigger AS
+$BODY$
+BEGIN
+    IF TG_OP = 'DELETE' THEN
+        INSERT INTO lta._directory_update_cache (dir_id) VALUES (OLD.dir_id);
+        RETURN OLD;
+    END IF;
+
+    --postgres < 9.5 way of doing INSERT...ON CONFLICT DO NOTHING
+    INSERT INTO lta._directory_update_cache (dir_id)
+    (SELECT NEW.dir_id WHERE NOT EXISTS (SELECT dir_id FROM lta._directory_update_cache WHERE dir_id = NEW.dir_id));
+
+    RETURN NEW;
+END;
+$BODY$
+LANGUAGE plpgsql VOLATILE
+COST 100;
+
+CREATE TRIGGER trigger_on_fileinfo_insert_update_delete_store_in_cache
+AFTER INSERT OR UPDATE OR DELETE
+ON lta.fileinfo
+FOR EACH ROW
+EXECUTE PROCEDURE metainfo.on_fileinfo_insert_update_delete_store_in_cache();
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION metainfo.on_directory_update_cache_commit_do_update_dir_stats()
+RETURNS trigger AS
+$BODY$
+DECLARE
+    fileinfo_row lta.fileinfo%ROWTYPE;
+    _dir_id integer;
+    _dir_num_files bigint;
+    _dir_total_file_size bigint;
+    _dir_min_file_size bigint;
+    _dir_max_file_size bigint;
+    _dir_min_file_creation_date timestamp without time zone;
+    _dir_max_file_creation_date timestamp without time zone;
+BEGIN
+    FOR _dir_id in (SELECT DISTINCT(c.dir_id) FROM lta._directory_update_cache c) LOOP
+        _dir_num_files := 0;
+        _dir_total_file_size := 0;
+        _dir_min_file_size := NULL;
+        _dir_max_file_size := NULL;
+
+        -- aggregate results
+        FOR fileinfo_row IN (SELECT * FROM lta.fileinfo fi where fi.dir_id = _dir_id) LOOP
+            _dir_num_files := _dir_num_files + 1;
+            _dir_total_file_size := _dir_total_file_size + fileinfo_row.size;
+            _dir_min_file_size := LEAST(_dir_min_file_size, fileinfo_row.size);
+            _dir_max_file_size := GREATEST(_dir_max_file_size, fileinfo_row.size);
+            _dir_min_file_creation_date := LEAST(_dir_min_file_creation_date, fileinfo_row.creation_date);
+            _dir_max_file_creation_date := GREATEST(_dir_max_file_creation_date, fileinfo_row.creation_date);
+        END LOOP;
+
+        UPDATE metainfo.stats
+        SET (dir_num_files, dir_total_file_size, dir_min_file_size, dir_max_file_size, dir_min_file_creation_date, dir_max_file_creation_date) =
+            (_dir_num_files, _dir_total_file_size, _dir_min_file_size, _dir_max_file_size, _dir_min_file_creation_date, _dir_max_file_creation_date)
+        WHERE dir_id = _dir_id;
+
+        DELETE FROM lta._directory_update_cache WHERE dir_id = _dir_id;
+    END LOOP;
+
+    RETURN NULL;
+END;
+$BODY$
+LANGUAGE plpgsql VOLATILE
+COST 100;
+
+-- use DEFERRABLE INITIALLY DEFERRED CONSTRAINT trigger which fires only once upon committing the file inserts
+-- then run method on_directory_update_cache_commit_do_update_dir_stats to collect all inserted fileinfo's into dir/tree stats
+CREATE CONSTRAINT TRIGGER trigger_on_directory_update_cache_commit_do_update_dir_stats
+AFTER INSERT
+ON lta._directory_update_cache
+DEFERRABLE INITIALLY DEFERRED
+FOR EACH ROW
+EXECUTE PROCEDURE metainfo.on_directory_update_cache_commit_do_update_dir_stats();
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION metainfo.on_dir_stats_update_do_update_tree_stats()
+RETURNS trigger AS
+$BODY$
+DECLARE
+    stats_row metainfo.stats%ROWTYPE;
+BEGIN
+    -- initialize the NEW.tree_* variables with this dir's dir_stats...
+    NEW.tree_num_files := NEW.dir_num_files;
+    NEW.tree_total_file_size := NEW.dir_total_file_size;
+    NEW.tree_min_file_size := NEW.dir_min_file_size;
+    NEW.tree_max_file_size := NEW.dir_max_file_size;
+    NEW.tree_min_file_creation_date := NEW.dir_min_file_creation_date;
+    NEW.tree_max_file_creation_date := NEW.dir_max_file_creation_date;
+
+    -- loop over the tree stats from all filled subdirs of this directory
+    -- and aggregate them to the new_tree_* variables
+    FOR stats_row IN SELECT st.* FROM metainfo.stats st
+                     INNER JOIN lta.directory dir ON dir.id = st.dir_id
+                     WHERE dir.parent_dir_id = NEW.dir_id
+                     AND tree_max_file_creation_date IS NOT NULL    
+                     AND dir_max_file_creation_date IS NOT NULL LOOP
+
+        -- aggregate
+        NEW.tree_num_files := NEW.tree_num_files + stats_row.tree_num_files;
+        NEW.tree_total_file_size := NEW.tree_total_file_size + stats_row.tree_total_file_size;
+        NEW.tree_min_file_size := LEAST(NEW.tree_min_file_size, stats_row.tree_min_file_size);
+        NEW.tree_max_file_size := GREATEST(NEW.tree_max_file_size, stats_row.tree_max_file_size);
+        NEW.tree_min_file_creation_date := LEAST(NEW.tree_min_file_creation_date, stats_row.tree_min_file_creation_date);
+        NEW.tree_max_file_creation_date := GREATEST(NEW.tree_max_file_creation_date, stats_row.tree_max_file_creation_date);
+    END LOOP;
+
+    -- return the NEW row with the updated tree_* variables
+    RETURN NEW;
+END;
+$BODY$
+LANGUAGE plpgsql VOLATILE
+COST 100;
+
+CREATE TRIGGER trigger_on_dir_stats_update_do_update_tree_stats
+BEFORE UPDATE OF dir_num_files, dir_total_file_size, dir_min_file_size, dir_max_file_size, dir_min_file_creation_date, dir_max_file_creation_date
+ON metainfo.stats
+FOR EACH ROW
+EXECUTE PROCEDURE metainfo.on_dir_stats_update_do_update_tree_stats();
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION metainfo.on_stats_update_do_update_parents_tree_stats()
+RETURNS trigger AS
+$BODY$
+DECLARE
+    stats_row metainfo.stats%ROWTYPE;
+    parent_stats_row metainfo.stats%ROWTYPE;
+    new_tree_num_files bigint;
+    new_tree_total_file_size bigint;
+    new_tree_min_file_size bigint;
+    new_tree_max_file_size bigint;
+    new_tree_min_file_creation_date timestamp without time zone;
+    new_tree_max_file_creation_date timestamp without time zone;
+BEGIN
+    -- climb up the tree until at root, start with the first direct parent
+    SELECT st.* FROM metainfo.stats st
+    INNER JOIN lta.directory dir on dir.parent_dir_id = st.dir_id
+    WHERE dir.id = NEW.dir_id
+    LIMIT 1
+    INTO parent_stats_row;
+
+    --loop and climb further up the tree until at root
+    WHILE parent_stats_row.id IS NOT NULL LOOP
+        -- initialize all new_tree_* vars with the current parent_stats_row's values or 0/null.
+        new_tree_num_files := GREATEST(0, parent_stats_row.dir_num_files);
+        new_tree_total_file_size := GREATEST(0, parent_stats_row.dir_total_file_size);
+        new_tree_min_file_size := parent_stats_row.tree_min_file_size;
+        new_tree_max_file_size := parent_stats_row.tree_max_file_size;
+        new_tree_min_file_creation_date := parent_stats_row.tree_min_file_creation_date;
+        new_tree_max_file_creation_date := parent_stats_row.tree_max_file_creation_date;
+
+        -- loop over the tree stats from all filled subdirs of the parent's directory
+        -- and aggregate them to the new_tree_* variables
+        FOR stats_row in SELECT st.* FROM metainfo.stats st
+                         INNER JOIN lta.directory dir ON dir.id = st.dir_id
+                         WHERE dir.parent_dir_id = parent_stats_row.dir_id LOOP
+
+            -- aggregate
+            new_tree_num_files := new_tree_num_files + stats_row.tree_num_files;
+            new_tree_total_file_size := new_tree_total_file_size + stats_row.tree_total_file_size;
+            new_tree_min_file_size := LEAST(new_tree_min_file_size, stats_row.tree_min_file_size);
+            new_tree_max_file_size := GREATEST(new_tree_max_file_size, stats_row.tree_max_file_size);
+            new_tree_min_file_creation_date := LEAST(new_tree_min_file_creation_date, stats_row.tree_min_file_creation_date);
+            new_tree_max_file_creation_date := GREATEST(new_tree_max_file_creation_date, stats_row.tree_max_file_creation_date);
+        END LOOP;
+
+        -- and update the parent stats row with the aggregated results
+        UPDATE metainfo.stats stats
+        SET (tree_num_files, tree_total_file_size, tree_min_file_size, tree_max_file_size, tree_min_file_creation_date, tree_max_file_creation_date) =
+            (new_tree_num_files, new_tree_total_file_size, new_tree_min_file_size, new_tree_max_file_size, new_tree_min_file_creation_date, new_tree_max_file_creation_date)
+        WHERE stats.dir_id = parent_stats_row.dir_id;
+
+        -- climb the tree by selecting the parent's parent, and loop again.
+        SELECT st.* FROM metainfo.stats st
+        INNER JOIN lta.directory dir on dir.parent_dir_id = st.dir_id
+        WHERE dir.id = parent_stats_row.dir_id
+        LIMIT 1
+        INTO parent_stats_row;
+    END LOOP;
+
+    RETURN NEW;
+END;
+$BODY$
+LANGUAGE plpgsql VOLATILE
+COST 100;
+
+CREATE TRIGGER trigger_on_stats_update_do_update_parents_tree_stats
+AFTER UPDATE OF dir_num_files, dir_total_file_size, dir_min_file_size, dir_max_file_size, dir_min_file_creation_date, dir_max_file_creation_date
+ON metainfo.stats
+FOR EACH ROW
+EXECUTE PROCEDURE metainfo.on_stats_update_do_update_parents_tree_stats();
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION lta.on_directory_inserted_parse_project_info()
+RETURNS trigger AS
+$BODY$
+DECLARE
+    project_pos int;
+    next_slash_pos int;
+    new_dir_name text;
+    dir_name_tail text;
+    new_project_name text;
+    new_project_id int;
+    project_dir_name text;
+    project_dir_id int;
+    obs_id int;
+    obs_dir_name text;
+BEGIN
+    new_dir_name := trim(trailing '/' from NEW.name);
+    project_pos := strpos(new_dir_name, '/projects');
+
+    IF project_pos > 0 THEN
+     dir_name_tail := substring(new_dir_name from project_pos + 10);
+     IF length(dir_name_tail) > 0 THEN
+         next_slash_pos := strpos(dir_name_tail, '/');
+         IF next_slash_pos > 0 THEN
+             new_project_name := substring(dir_name_tail from 0 for next_slash_pos);
+         ELSE
+             new_project_name := dir_name_tail;
+         END IF;
+
+         IF length(new_project_name) > 0 THEN
+             --postgres < 9.5 way of doing INSERT...ON CONFLICT DO NOTHING
+             INSERT INTO metainfo.project(name)
+             (SELECT new_project_name WHERE NOT EXISTS (SELECT name FROM metainfo.project WHERE name = new_project_name));
+
+             SELECT id FROM metainfo.project WHERE name = new_project_name LIMIT 1 INTO new_project_id;
+
+             IF new_project_id IS NOT NULL THEN
+                 IF next_slash_pos > 0 THEN
+                     project_dir_name := substring(new_dir_name from 0 for project_pos + 10 + next_slash_pos - 1);
+                 ELSE
+                     project_dir_name := new_dir_name;
+                 END IF;
+
+                 IF project_dir_name = new_dir_name THEN
+                     --postgres < 9.5 way of doing INSERT...ON CONFLICT DO NOTHING
+                     INSERT INTO metainfo.project_top_level_directory(project_id, dir_id)
+                     (SELECT new_project_id, NEW.id WHERE NOT EXISTS (SELECT ptld.project_id, ptld.dir_id FROM metainfo.project_top_level_directory ptld WHERE ptld.project_id = new_project_id AND ptld.dir_id = NEW.id));
+                 ELSE
+                     dir_name_tail := substring(dir_name_tail from length(new_project_name)+2);
+                     next_slash_pos := strpos(dir_name_tail, '/');
+                     IF next_slash_pos > 0 THEN
+                         obs_dir_name := substring(dir_name_tail from 0 for next_slash_pos);
+                     ELSE
+                         obs_dir_name := dir_name_tail;
+                     END IF;
+                         BEGIN
+                             obs_id := obs_dir_name::integer;
+
+                             --postgres < 9.5 way of doing INSERT...ON CONFLICT DO NOTHING
+                             INSERT INTO metainfo.observation(id)
+                             (SELECT obs_id WHERE NOT EXISTS (SELECT id FROM metainfo.observation WHERE id = obs_id));
+
+                             --postgres < 9.5 way of doing INSERT...ON CONFLICT DO NOTHING
+                             INSERT INTO metainfo.project_observation(project_id, observation_id)
+                             (SELECT new_project_id, obs_id WHERE NOT EXISTS (SELECT project_id, observation_id FROM metainfo.project_observation WHERE project_id = new_project_id AND observation_id = obs_id));
+                         EXCEPTION WHEN invalid_text_representation THEN
+                         END;
+                 END IF;
+             END IF;
+         END IF;
+     END IF;
+    END IF;
+
+    RETURN NEW;
+END;
+$BODY$
+LANGUAGE plpgsql VOLATILE
+COST 100;
+
+CREATE TRIGGER trigger_on_directory_inserted_parse_project_info
+AFTER INSERT
+ON lta.directory
+FOR EACH ROW
+EXECUTE PROCEDURE lta.on_directory_inserted_parse_project_info();
+
+--------------------------------------------------------------------------------
+
+ CREATE OR REPLACE FUNCTION lta.on_fileinfo_inserted_parse_observation_info()
+   RETURNS trigger AS
+ $BODY$
+ DECLARE
+ new_file_name text;
+ L_pos int;
+ first_underscore_pos int;
+ first_dot_pos int;
+ obs_id int;
+ dataproduct_name text;
+ BEGIN
+     new_file_name := trim(leading '/' from NEW.name);
+     L_pos := strpos(new_file_name, 'L');
+     first_underscore_pos := strpos(new_file_name, '_');
+     IF L_pos > 0 AND first_underscore_pos > L_pos THEN
+         BEGIN
+                 obs_id := substring(new_file_name from L_pos+1 for first_underscore_pos-2)::integer;
+
+                 --postgres < 9.5 way of doing INSERT...ON CONFLICT DO NOTHING
+                 INSERT INTO metainfo.observation(id)
+                 (SELECT obs_id WHERE NOT EXISTS (SELECT id FROM metainfo.observation WHERE id = obs_id));
+
+                 first_dot_pos := strpos(new_file_name, '.');
+                 IF first_dot_pos > L_pos THEN
+                         dataproduct_name := substring(new_file_name from L_pos for first_dot_pos-1);
+
+                         --postgres < 9.5 way of doing INSERT...ON CONFLICT DO NOTHING
+                         INSERT INTO metainfo.dataproduct(fileinfo_id, observation_id, name)
+                         (SELECT NEW.id, obs_id, dataproduct_name WHERE NOT EXISTS (SELECT fileinfo_id, observation_id, name FROM metainfo.dataproduct WHERE fileinfo_id = NEW.id AND observation_id = obs_id AND name = dataproduct_name));
+                 END IF;
+
+         EXCEPTION WHEN invalid_text_representation THEN
+         END;
+     END IF;
+     RETURN NEW;
+ END;
+ $BODY$
+   LANGUAGE plpgsql VOLATILE
+   COST 100;
+
+CREATE TRIGGER trigger_on_fileinfo_inserted_parse_observation_info
+   AFTER INSERT
+   ON lta.fileinfo
+   FOR EACH ROW
+   EXECUTE PROCEDURE lta.on_fileinfo_inserted_parse_observation_info();
+
+-- END TRIGGERS
+
+
+-- BEGIN NORMAL FUNCTIONS
+
+--TODO: this method get_tree_stats is recursive (it calls itself), which is notoriously slow in sql. rewrite method to use WITH RECURSIVE statements, see https://www.postgresql.org/docs/9.3/static/queries-with.html
+CREATE OR REPLACE FUNCTION metainfo.get_tree_stats(tree_root_dir_id integer, lower_ts timestamp without time zone DEFAULT NULL, upper_ts timestamp without time zone DEFAULT NULL,
+                                                   OUT dir_id integer, OUT tree_num_files bigint, OUT tree_total_file_size bigint)
+RETURNS record AS $$
+DECLARE
+    stats_row metainfo.stats%ROWTYPE;
+    dir_num_files bigint;
+    dir_total_file_size bigint;
+    subdirs_tree_num_files bigint;
+    subdirs_tree_total_file_size bigint;
+BEGIN
+    -- we need to provide the requested tree_root_dir_id also as an output, so we can join on it
+    dir_id := tree_root_dir_id;
+
+    -- check for valid lower_ts/upper_ts
+    IF lower_ts IS NULL THEN
+        lower_ts := '-infinity';
+    END IF;
+    IF upper_ts IS NULL THEN
+        upper_ts := 'infinity';
+    END IF;
+
+    SELECT st.* FROM metainfo.stats st
+    WHERE st.dir_id = tree_root_dir_id
+    LIMIT 1
+    INTO stats_row;
+
+    -- directory has no tree stats. So return 0,0
+    IF stats_row.tree_min_file_creation_date IS NULL OR stats_row.tree_max_file_creation_date IS NULL THEN
+        tree_num_files := 0;
+        tree_total_file_size := 0;
+        RETURN;
+    END IF;
+
+
+    -- the tree stats of this directory have no overlap at all for the requested timerange
+    IF (stats_row.tree_min_file_creation_date > upper_ts) OR (stats_row.tree_max_file_creation_date < lower_ts) THEN
+        tree_num_files := 0;
+        tree_total_file_size := 0;
+        RETURN;
+    END IF;
+
+    -- the tree stats of this directory have full overlap the requested timerange
+    IF stats_row.tree_min_file_creation_date >= lower_ts AND stats_row.tree_max_file_creation_date <= upper_ts THEN
+        tree_num_files := stats_row.tree_num_files;
+        tree_total_file_size := stats_row.tree_total_file_size;
+        RETURN;
+    END IF;
+
+    -- the tree stats of this directory have partial overlap the requested timerange
+    -- recurse into subdirectories, and accumulate subdir results
+    IF stats_row.tree_min_file_creation_date <= upper_ts OR stats_row.tree_max_file_creation_date >= lower_ts THEN
+        --sum all results from the subdirs which have at least partial overlap
+        SELECT SUM(gts.tree_num_files), SUM(gts.tree_total_file_size)
+        FROM lta.directory d
+        INNER JOIN metainfo.stats s ON s.dir_id = d.id
+        INNER JOIN metainfo.get_tree_stats(d.id, lower_ts, upper_ts) gts ON gts.dir_id = d.id
+        WHERE d.parent_dir_id = tree_root_dir_id
+        AND NOT (s.tree_min_file_creation_date > upper_ts OR s.tree_max_file_creation_date < lower_ts)
+        INTO subdirs_tree_num_files, subdirs_tree_total_file_size;
+
+        IF subdirs_tree_num_files IS NULL THEN
+            subdirs_tree_num_files := 0;
+        END IF;
+
+        IF subdirs_tree_total_file_size IS NULL THEN
+            subdirs_tree_total_file_size := 0;
+        END IF;
+
+        -- and add the num_files and total_file_size in this dir...
+        IF stats_row.dir_num_files > 0 THEN
+            IF stats_row.dir_min_file_creation_date >= lower_ts AND stats_row.dir_max_file_creation_date <= upper_ts THEN
+                -- all files in this dir are in the requested time range
+                -- when 'all files'=0, that's ok, cause then dir_num_files and dir_total_file_size are 0 which is the answer we need
+                dir_num_files := stats_row.dir_num_files;
+                dir_total_file_size := stats_row.dir_total_file_size;
+            ELSE
+                -- some files in this dir are in the requested time range
+                -- make selection of files in this dir in the requested time range
+                SELECT COUNT(fi.id), SUM(fi.size) FROM lta.fileinfo fi
+                WHERE fi.dir_id = tree_root_dir_id
+                AND fi.creation_date >= lower_ts AND fi.creation_date <= upper_ts
+                INTO dir_num_files, dir_total_file_size;
+            END IF;
+
+            IF dir_num_files IS NULL OR dir_num_files = 0 THEN
+                dir_total_file_size := 0;
+            END IF;
+        ELSE
+            dir_num_files := 0;
+            dir_total_file_size := 0;
+        END IF;
+
+        tree_num_files := subdirs_tree_num_files + dir_num_files;
+        tree_total_file_size := subdirs_tree_total_file_size + dir_total_file_size;
+
+        RETURN;
+    END IF;
+
+    --this should never occur
+    RAISE EXCEPTION 'metainfo.get_tree_stats could not find no/partial/full overlap';
+END;
+$$ LANGUAGE plpgsql;
+
+
+--TODO: this method get_site_stats calls the recursive get_tree_stats methods, which needs a rewrite. After that, it is quite likely that this method also performs way faster.
+CREATE OR REPLACE FUNCTION metainfo.get_site_stats(_site_id integer, lower_ts timestamp without time zone DEFAULT NULL::timestamp without time zone, upper_ts timestamp without time zone DEFAULT NULL::timestamp without time zone,
+                                                   OUT tree_num_files bigint, OUT tree_total_file_size bigint)
+  RETURNS record AS $$
+BEGIN
+    SELECT SUM(gts.tree_num_files), SUM(gts.tree_total_file_size)
+    FROM lta.site_root_dir srd, metainfo.get_tree_stats(srd.root_dir_id, lower_ts, upper_ts) gts
+    WHERE srd.site_id = _site_id
+    INTO tree_num_files, tree_total_file_size;
+
+    IF tree_num_files IS NULL THEN
+        tree_num_files := 0;
+    END IF;
+
+    IF tree_total_file_size IS NULL THEN
+        tree_total_file_size := 0;
+    END IF;
+END;
+$$ LANGUAGE plpgsql;
+
+
+
+--TODO: see remarks at get_site_stats and get_tree_stats for optimizations.
+CREATE OR REPLACE FUNCTION metainfo.get_site_quota_usage(_site_quota_id integer, OUT site_id integer, OUT site_name text, OUT quota bigint, OUT total_file_size bigint, OUT space_left bigint, OUT num_files bigint, OUT valid_until_date timestamp without time zone)
+  RETURNS record AS $$
+BEGIN
+    SELECT s.id, s.name, sq.quota, sq.valid_until_date
+    FROM lta.site_quota sq
+    JOIN lta.site s on s.id = sq.site_id
+    WHERE sq.id = _site_quota_id
+    LIMIT 1
+    INTO site_id, site_name, quota, valid_until_date;
+
+    SELECT gts.tree_total_file_size, gts.tree_num_files
+    FROM metainfo.get_site_stats(site_id, NULL, valid_until_date) gts
+    LIMIT 1
+    INTO total_file_size, num_files;
+
+    space_left := quota - total_file_size;
+END;
+$$ LANGUAGE plpgsql;
+
+
+
+-- END NORMAL FUNCTIONS
+
+--
+--
+-- -- VIEWS
+
+CREATE VIEW lta.site_root_directory as
+ select ss.id as site_id, ss.name as site_name, srd.root_dir_id, dir.name as dir_name
+     from lta.site_root_dir srd
+     join lta.directory dir on dir.id = srd.root_dir_id
+     join lta.site ss on ss.id = srd.site_id ;
+
+CREATE VIEW lta.site_quota_view as
+ select ss.id as site_id, ss.name as site_name, ssq.quota, ssq.valid_until_date
+     from lta.site ss
+     left join lta.site_quota ssq on ssq.site_id = ss.id;
+
+CREATE VIEW lta.site_quota_root_directory as
+    SELECT s.id AS site_id, s.name AS site_name, d.id AS dir_id, d.name AS dir_name
+    FROM lta.quota_root_dirs qrd
+    JOIN lta.site s ON s.id = qrd.site_id
+    JOIN lta.directory d ON d.id = qrd.root_dir_id;
+
+CREATE VIEW lta.site_directory_tree as
+ select rd.site_id as site_id,
+     rd.site_name as site_name,
+     rd.root_dir_id as root_dir_id,
+     rd.dir_name as root_dir_name,
+     dir.id as dir_id,
+     dir.name as dir_name,
+     dir.parent_dir_id as parent_dir_id,
+     dc.depth as depth
+     from lta.site_root_directory rd
+     inner join lta.directory_closure dc on dc.ancestor_id = rd.root_dir_id
+     inner join lta.directory dir on dc.descendant_id = dir.id;
+
+CREATE VIEW scraper.site_scraper_last_directory_visit as
+ select rd.site_id as site_id,
+     rd.site_name as site_name,
+     dir.id as dir_id,
+     dir.name as dir_name,
+     sldv.visit_date as last_visit
+     from lta.site_root_directory rd
+     inner join lta.directory_closure dc on dc.ancestor_id = rd.root_dir_id
+     inner join lta.directory dir on dc.descendant_id = dir.id
+     inner join scraper.last_directory_visit sldv on sldv.dir_id = dir.id ;
+
+CREATE VIEW lta.site_directory_file as
+ select site.id as site_id,
+     site.name as site_name,
+     dir.id as dir_id,
+     dir.name as dir_name,
+     fileinfo.id as file_id,
+     fileinfo.name as file_name,
+     fileinfo.size as file_size,
+     fileinfo.creation_date as file_creation_date
+     from lta.site site
+     join lta.site_root_dir srd on srd.site_id = site.id
+     inner join lta.directory_closure dc on dc.ancestor_id = srd.root_dir_id
+     inner join lta.directory dir on dc.descendant_id = dir.id
+     inner join lta.fileinfo on fileinfo.dir_id = dir.id ;
+
+CREATE VIEW metainfo.project_directory as
+     select
+         project.id as project_id,
+         project.name as project_name,
+         dir.id as dir_id,
+         dir.name as dir_name
+         from metainfo.project_top_level_directory ptld
+         inner join metainfo.project on project.id = ptld.project_id
+         inner join lta.directory_closure dc on dc.ancestor_id = ptld.dir_id
+         inner join lta.directory dir on dc.descendant_id = dir.id ;
+
+CREATE VIEW metainfo.site_directory_stats as
+ select sdt.site_id,
+     sdt.site_name,
+     sdt.dir_id,
+     sdt.dir_name,
+     st.dir_num_files,
+     st.dir_total_file_size,
+     st.dir_min_file_size,
+     st.dir_max_file_size,
+     st.dir_min_file_creation_date,
+     st.dir_max_file_creation_date,
+     st.tree_num_files,
+     st.tree_total_file_size,
+     st.tree_min_file_size,
+     st.tree_max_file_size,
+     st.tree_min_file_creation_date,
+     st.tree_max_file_creation_date
+     from lta.site_directory_tree sdt
+     left join metainfo.stats st on st.dir_id = sdt.dir_id;
+
+CREATE OR REPLACE VIEW metainfo.project_directory_stats AS
+    SELECT pd.project_id, pd.project_name, sds.*
+    FROM metainfo.project_directory pd
+    JOIN metainfo.site_directory_stats sds ON sds.dir_id = pd.dir_id;
+
+CREATE VIEW metainfo.observation_dataproduct_file as
+  SELECT sdf.site_id, sdf.site_name, dp.observation_id, dp.id as dataproduct_id, dp.name as dataproduct_name, sdf.dir_id, sdf.dir_name, sdf.file_id, sdf.file_name, sdf.file_size, sdf.file_creation_date
+  FROM metainfo.dataproduct dp
+    JOIN lta.site_directory_file sdf ON sdf.file_id = dp.fileinfo_id;
+
+CREATE VIEW metainfo.project_observation_dataproduct as
+  SELECT p.id AS project_id,
+     p.name AS project_name,
+     dp.observation_id,
+     dp.id AS dataproduct_id,
+     dp.name AS dataproduct_name,
+     dp.fileinfo_id AS fileinfo_id
+    FROM metainfo.dataproduct dp
+      INNER JOIN metainfo.project_observation po ON po.observation_id = dp.observation_id
+      INNER JOIN metainfo.project p ON p.id = po.project_id;
+
+CREATE VIEW metainfo.dataproduct_all as
+  SELECT pod.*, sdf.*
+    FROM metainfo.project_observation_dataproduct pod
+      INNER JOIN lta.site_directory_file sdf on sdf.file_id = pod.fileinfo_id;
+
+CREATE VIEW metainfo.site_project_stats as
+     select ptld.project_id, p.name as project_name, site_id, site_name, sds.dir_id, sds.dir_name, tree_num_files, tree_total_file_size, tree_min_file_creation_date, tree_max_file_creation_date
+     from metainfo.project_top_level_directory ptld
+     inner join metainfo.project p on p.id = ptld.project_id
+     inner join metainfo.site_directory_stats sds on sds.dir_id = ptld.dir_id
+     where tree_num_files IS NOT NULL;
+
+CREATE VIEW metainfo.project_stats AS
+	SELECT project_id, project_name, COUNT(site_id) num_sites, SUM(tree_num_files) total_num_files, SUM(tree_total_file_size) total_file_size, MIN(tree_min_file_creation_date) min_file_creation_date, MAX(tree_max_file_creation_date) max_file_creation_date
+	FROM metainfo.site_project_stats
+	group by project_id, project_name;
+
+CREATE VIEW metainfo.site_project_observation_dataproduct_dir_file AS
+    SELECT sdf.site_id, sdf.site_name, pod.project_id, pod.project_name, pod.observation_id, pod.dataproduct_id, pod.dataproduct_name, sdf.dir_id, sdf.dir_name, sdf.file_id, sdf.file_name, sdf.file_size, sdf.file_creation_date
+    FROM metainfo.project_observation_dataproduct pod
+    JOIN lta.site_directory_file sdf ON sdf.file_id = pod.fileinfo_id;
+
+CREATE VIEW metainfo.site_root_dir_tree_stats AS
+    SELECT srd.site_id, srd.site_name, srd.root_dir_id as root_dir_id, srd.dir_name as root_dir_name,
+    sds.tree_num_files, sds.tree_total_file_size, sds.tree_min_file_size, sds.tree_max_file_size, sds.tree_min_file_creation_date, sds.tree_max_file_creation_date
+    FROM lta.site_root_directory srd
+    INNER JOIN metainfo.site_directory_stats sds ON sds.dir_id = srd.root_dir_id;
+
+CREATE VIEW metainfo.site_stats as
+    SELECT site_id, site_name, SUM(tree_num_files) total_num_files, SUM(tree_total_file_size) total_file_size, MIN(tree_min_file_size) min_file_size, MAX(tree_max_file_size) max_file_size, MIN(tree_min_file_creation_date) min_file_creation_date, MAX(tree_max_file_creation_date) max_file_creation_date
+    from metainfo.site_root_dir_tree_stats
+    group by site_id, site_name;
+
+CREATE VIEW metainfo.site_quota_usage AS
+    select gsqu.*
+    from lta.site_quota sq
+    join metainfo.get_site_quota_usage(sq.id) gsqu on gsqu.site_id = sq.site_id;
+
+CREATE OR REPLACE VIEW metainfo.site_quota_root_dir_stats AS
+    SELECT sds.site_id, sds.site_name, sds.dir_id, sds.dir_name, sds.tree_num_files, sds.tree_total_file_size
+    FROM lta.quota_root_dirs qrd
+    INNER JOIN metainfo.site_directory_stats sds on sds.dir_id = qrd.root_dir_id;
+
+
+-- END VIEWS
+
+COMMIT;
diff --git a/LTA/ltastorageoverview/lib/report.py b/LTA/ltastorageoverview/lib/report.py
index b77f0a764bd..168ee9833dc 100755
--- a/LTA/ltastorageoverview/lib/report.py
+++ b/LTA/ltastorageoverview/lib/report.py
@@ -25,59 +25,55 @@ from datetime import datetime, timedelta
 import sys
 import os
 import os.path
-from ltastorageoverview import store
+from lofar.lta.ltastorageoverview import store
 from lofar.common.util import humanreadablesize
 from lofar.common.datetimeutils import monthRanges
 
-def main(argv):
-    dbpath = argv[0] if argv else 'ltastorageoverview.sqlite'
-    print 'Report for ' + dbpath
+logger = logging.getLogger()
 
-    db = store.LTAStorageDb(dbpath)
+def main():
+    from optparse import OptionParser
+    from lofar.common import dbcredentials
+
+    # Check the invocation arguments
+    parser = OptionParser("%prog [options]", description='runs the lta scraper and stores results in the speficied database.')
+    parser.add_option('-V', '--verbose', dest='verbose', action='store_true', help='verbose logging')
+    parser.add_option_group(dbcredentials.options_group(parser))
+    parser.set_defaults(dbcredentials="LTASO")
+    (options, args) = parser.parse_args()
+
+    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                        level=logging.DEBUG if options.verbose else logging.INFO)
+
+    dbcreds = dbcredentials.parse_options(options)
+
+    logger.info("Using dbcreds: %s" % dbcreds.stringWithHiddenPassword())
+
+    db = store.LTAStorageDb(dbcreds, options.verbose)
 
     sites = db.sites()
 
-    numFilesTotal = sum([db.numFilesInSite(s[0]) for s in sites])
-    totalFileSize = sum([db.totalFileSizeInSite(s[0]) for s in sites])
+    numFilesTotal = sum([db.numFilesInSite(s['id']) for s in sites])
+    totalFileSize = sum([db.totalFileSizeInSite(s['id']) for s in sites])
 
     print '\n*** TOTALS *** #files=%s total_size=%s' % (humanreadablesize(numFilesTotal, ''),
                                                         humanreadablesize(totalFileSize))
 
     for site in sites:
-        numFilesInSite = db.numFilesInSite(site[0])
-        totalFileSizeInSite = db.totalFileSizeInSite(site[0])
+        numFilesInSite = db.numFilesInSite(site['id'])
+        totalFileSizeInSite = db.totalFileSizeInSite(site['id'])
 
-        print '\n--- %s ---  #files=%s total_size=%s' % (site[1],
+        print '\n--- %s ---  #files=%s total_size=%s' % (site['name'],
                                                          humanreadablesize(numFilesInSite, ''),
                                                          humanreadablesize(totalFileSizeInSite))
 
-        root_dirs = db.rootDirectoriesForSite(site[0])
+        root_dirs = db.rootDirectoriesForSite(site['id'])
 
         for root_dir in root_dirs:
-            numFilesInTree = db.numFilesInTree(root_dir[0])
-            totalFileSizeInTree = db.totalFileSizeInTree(root_dir[0])
-
-            print "  %s #files=%d total_size=%s" % (root_dir[1], numFilesInTree, humanreadablesize(totalFileSizeInTree))
-
-            subdirs = db.subDirectories(root_dir[0], 1, False)
-            subdirs = sorted(subdirs, key=lambda x: x[1])
-
-            for subdir in subdirs:
-                numFilesInTree = db.numFilesInTree(subdir[0])
-                totalFileSizeInTree = db.totalFileSizeInTree(subdir[0])
-
-                print "    %s #files=%d total_size=%s" % (subdir[1], numFilesInTree, humanreadablesize(totalFileSizeInTree))
-
-                if subdir[1].endswith('projects/'):
-                    projectsSubDirs = db.subDirectories(subdir[0], 1, False)
-                    projectsSubDirs = sorted(projectsSubDirs, key=lambda x: x[1])
-
-                    for projectsSubDir in projectsSubDirs:
-                        numFilesInTree = db.numFilesInTree(projectsSubDir[0])
-                        totalFileSizeInTree = db.totalFileSizeInTree(projectsSubDir[0])
-
-                        print "      %s #files=%d total_size=%s" % (projectsSubDir[1], numFilesInTree, humanreadablesize(totalFileSizeInTree))
+            numFilesInTree = db.numFilesInTree(root_dir['root_dir_id'])
+            totalFileSizeInTree = db.totalFileSizeInTree(root_dir['root_dir_id'])
 
+            print "  %s #files=%d total_size=%s" % (root_dir['dir_name'], numFilesInTree, humanreadablesize(totalFileSizeInTree))
 
     utcnow = datetime.utcnow()
     monthbegin = datetime(utcnow.year, utcnow.month, 1)
@@ -85,36 +81,36 @@ def main(argv):
     print '\n\n*** CHANGES THIS MONTH %s ***' % monthbegin.strftime('%Y/%m')
 
     for site in sites:
-        root_dirs = db.rootDirectoriesForSite(site[0])
+        root_dirs = db.rootDirectoriesForSite(site['id'])
 
-        numChangedFilesInSite = db.numFilesInSite(site[0],
+        numChangedFilesInSite = db.numFilesInSite(site['id'],
                                                   monthbegin,
                                                   monthend)
 
         if numChangedFilesInSite == 0:
-            print '\n--- %s --- None' % (site[1],)
+            print '\n--- %s --- None' % (site['name'],)
             continue
 
-        totalChangedFileSizeInSite = db.totalFileSizeInSite(site[0],
+        totalChangedFileSizeInSite = db.totalFileSizeInSite(site['id'],
                                                             monthbegin,
                                                             monthend)
 
-        print '\n--- %s --- #files=%d total_size=%s' % (site[1],
+        print '\n--- %s --- #files=%d total_size=%s' % (site['name'],
                                                         numChangedFilesInSite,
                                                         humanreadablesize(totalChangedFileSizeInSite))
 
         for root_dir in root_dirs:
-            changedFiles = db.filesInTree(root_dir[0], monthbegin, monthend)
+            changedFiles = db.filesInTree(root_dir['dir_id'], monthbegin, monthend)
 
             if len(changedFiles) > 0:
-                numFilesInTree = db.numFilesInTree(root_dir[0],
+                numFilesInTree = db.numFilesInTree(root_dir['dir_id'],
                                                    monthbegin,
                                                    monthend)
-                totalFileSizeInTree = db.totalFileSizeInTree(root_dir[0],
+                totalFileSizeInTree = db.totalFileSizeInTree(root_dir['dir_id'],
                                                              monthbegin,
                                                              monthend)
 
-                print "  %s #files=%d total_size=%s" % (root_dir[1],
+                print "  %s #files=%d total_size=%s" % (root_dir['dir_name'],
                                                         numFilesInTree,
                                                         humanreadablesize(totalFileSizeInTree))
 
@@ -133,18 +129,19 @@ def main(argv):
     print '\n\n*** CHANGES PER MONTH ***'
 
     min_date, max_date = db.datetimeRangeOfFilesInTree()
-    month_ranges = monthRanges(min_date, max_date)
+    if min_date and max_date:
+        month_ranges = monthRanges(min_date, max_date)
 
-    for site in sites:
-        print '\n--- %s ---' % site[1]
+        for site in sites:
+            print '\n--- %s ---' % site['name']
 
-        for month_range in month_ranges:
-            numFilesInSite = db.numFilesInSite(site[0], month_range[0], month_range[1])
-            totalFileSizeInSite = db.totalFileSizeInSite(site[0], month_range[0], month_range[1])
+            for month_range in month_ranges:
+                numFilesInSite = db.numFilesInSite(site['id'], month_range[0], month_range[1])
+                totalFileSizeInSite = db.totalFileSizeInSite(site['id'], month_range[0], month_range[1])
 
-            print "  %s %s %s #files=%d total_size=%s" % (site[1], month_range[0], month_range[1], numFilesInSite, humanreadablesize(totalFileSizeInSite))
+                print "  %s %s %s #files=%d total_size=%s" % (site['name'], month_range[0], month_range[1], numFilesInSite, humanreadablesize(totalFileSizeInSite))
 
 
 if __name__ == "__main__":
-    main(sys.argv[1:])
+    main()
 
diff --git a/LTA/ltastorageoverview/lib/scraper.py b/LTA/ltastorageoverview/lib/scraper.py
index 5b9cbe2bdc3..96c2a39e6a4 100755
--- a/LTA/ltastorageoverview/lib/scraper.py
+++ b/LTA/ltastorageoverview/lib/scraper.py
@@ -28,18 +28,19 @@ import logging
 import time
 import datetime
 import sys
+import socket
 import os
 import os.path
 import threading
 import multiprocessing
-from ltastorageoverview import store
-from ltastorageoverview.utils import humanreadablesize
-from random import random
+from lofar.lta.ltastorageoverview import store
+from lofar.common.util import humanreadablesize
+from random import random, randint
 
-#logging.basicConfig(filename='scraper.' + time.strftime("%Y-%m-%d") + '.log', level=logging.DEBUG, format="%(asctime)-15s %(levelname)s %(message)s")
-logging.basicConfig(level=logging.DEBUG, format="%(asctime)-15s %(levelname)s %(message)s")
 logger = logging.getLogger()
 
+VISIT_INTERVAL = datetime.timedelta(days=3)
+LEXAR_HOST = 'ingest@lexar004.offline.lofar'
 
 class FileInfo:
     '''Simple struct to hold filename and size'''
@@ -58,6 +59,7 @@ class FileInfo:
         return self.filename + " " + humanreadablesize(self.size) + " " + str(self.created_at)
 
 class SrmlsException(Exception):
+    '''Exception which is raised when an srmls command failes'''
     def __init__(self, command, exitcode, stdout, stderr):
         self.command = command
         self.exitcode = exitcode
@@ -69,6 +71,7 @@ class SrmlsException(Exception):
                 (self.command, self.exitcode, self.stdout, self.stderr)
 
 class ParseException(Exception):
+    '''Exception which is raised when parsing srmls results fails'''
     def __init__(self, message):
         self.message = message
 
@@ -87,8 +90,14 @@ class Location:
         directory : int
             a directory at the storage site. for example: /pnfs/grid.sara.nl/data/lofar/storage
         '''
-        self.srmurl = srmurl
-        self.directory = directory
+        self.srmurl = srmurl.rstrip('/')
+        self.directory = directory.rstrip('/') if len(directory) > 1 else directory
+
+        if not self.srmurl.startswith('srm://'):
+            raise ValueError('malformed srm url: %s' % (self.srmurl,))
+
+        if not self.directory.startswith('/'):
+            raise ValueError('malformed directory path: "%s". should start with a /' % (self.directory,))
 
     def path(self):
         '''returns the full path srmurl + directory'''
@@ -121,12 +130,17 @@ class Location:
         foundFiles = []
         foundDirectories = []
 
-        logger.info("Scanning %s", self.path())
+        logger.info("Scanning %s with offset=%s", self.path(), offset)
 
         # the core command: do an srmls call and parse the results
         # srmls can only yield max 900 items in a result, hence we can recurse for the next 900 by using the offset
-        cmd = ["bash", "-c", "source %s;srmls -l -count=900 -offset=%d %s%s" % ('/globalhome/ingest/service/bin/init.sh', offset, self.srmurl, self.directory)]
-        # logger.debug(' '.join(cmd))
+        cmd = ['ssh', '-tt', '-n', '-x', '-q', LEXAR_HOST, "bash", "-c",
+               "\'source /globalhome/ingest/.grid/.ingest_profile; srmls -l -count=900 -offset=%d %s%s\'" % (
+                offset,
+                self.srmurl,
+                self.directory) ]
+
+        logger.debug(' '.join(cmd))
         p = subprocess.Popen(cmd, stdin=open('/dev/null'), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         logs = p.communicate()
         # logger.debug('Shell command for %s exited with code %s' % (self.path(), p.returncode))
@@ -164,7 +178,7 @@ class Location:
                         raise ParseException("Could not parse dirname from line: %s\nloglines:\n%s"
                             % (pathLineItems[1], logs[0]))
 
-                    foundDirectories.append(Location(self.srmurl, dirname))
+                    foundDirectories.append(Location(self.srmurl, dirname.rstrip('/')))
                 elif entryType.lower() == 'file':
                     try:
                         filesize = int(pathLineItems[0])
@@ -172,7 +186,7 @@ class Location:
                         timestamplines = [x for x in lines if 'ed at:' in x]
                         timestampline = None
                         for line in timestamplines:
-                            if 'created' in line:
+                            if 'created' in line and '1970' not in line:
                                 timestampline = line
                                 break
                             timestampline = line
@@ -230,133 +244,279 @@ class LocationResult:
         return sum([fileinfo.size for fileinfo in self.files])
 
 
-# our lock for safe guarding locations and results
-# which will be queried in parallel
-lock = threading.Lock()
-
 class ResultGetterThread(threading.Thread):
     '''Helper class to query Locations asynchronously for results.
     Gets the result for the first Location in the locations deque and appends it to the results deque
     Appends the subdirectory Locations at the end of the locations deque for later processing'''
-    def __init__(self, db, dir_id):
+    def __init__(self, dbcreds, dir_id, log_queries=False):
         threading.Thread.__init__(self)
         self.daemon = True
-        self.db = db
+        self.dbcreds = dbcreds
+        self.log_queries = log_queries
         self.dir_id = dir_id
 
     def run(self):
         '''A single location is pop\'ed from the locations deque and the results are queried.
         Resulting subdirectories are appended to the locations deque'''
         try:
-            with lock:
-                dir = self.db.directory(self.dir_id)
+            with store.LTAStorageDb(self.dbcreds, self.log_queries) as db:
+                dir = db.directory(self.dir_id)
 
                 if not dir:
                     return
 
-                dir_id = dir[0]
-                dir_name = dir[1]
-                self.db.updateDirectoryLastVisitTime(dir_id, datetime.datetime.utcnow())
+                dir_id = dir['dir_id']
+                dir_name = dir['dir_name']
 
-                site_id = dir[2]
-                site = self.db.site(site_id)
-                srm_url = site[2]
+                site_id = dir['site_id']
+                site = db.site(site_id)
+                srm_url = site['url']
 
             location = Location(srm_url, dir_name)
 
             try:
+                def rescheduleVisit():
+                    for i in range(5):
+                        try:
+                            with store.LTAStorageDb(self.dbcreds, self.log_queries) as db:
+                                logger.info('Rescheduling %s for new visit.' % (location.path(),))
+                                db.updateDirectoryLastVisitTime(self.dir_id, datetime.datetime.utcnow() - VISIT_INTERVAL + datetime.timedelta(mins=1))
+                                break
+                        except:
+                            time.sleep(1)
+
+
                 # get results... long blocking
                 result = location.getResult()
                 logger.info(result)
 
-                with lock:
-                    self.db.insertFileInfos([(file.filename, file.size, file.created_at, dir_id) for file in result.files])
+                with store.LTAStorageDb(self.dbcreds, self.log_queries) as db:
+                    # convert the result.files list into a dict
+                    #with (filename, dir_id) as key and a tuple with all file info as value
+                    result_file_tuple_dict = {}
+                    for file in result.files:
+                        filename = file.filename.split('/')[-1]
+                        key = (filename, dir_id)
+                        file_tuple = (filename, int(file.size), file.created_at, dir_id)
+                        result_file_tuple_dict[key] = file_tuple
+
+                    # create a dict of all already known files from the db
+                    known_file_dict = {}
+                    for file in db.filesInDirectory(dir_id):
+                        key = (str(file['name']), dir_id)
+                        known_file_dict[key] = file
+
+                    # now compare the result and known (filename, dir_id) sets
+                    # and find out which a new, and which are known.
+                    # compare only by (filename, dir_id) because for a given file the size and/or date might have changed,
+                    # but that does not make it a new/unique file.
+                    result_file_key_set = set(result_file_tuple_dict.keys())
+                    known_file_key_set = set(known_file_dict.keys())
+                    new_file_key_set = result_file_key_set - known_file_key_set
+                    removed_file_key_set = known_file_key_set - result_file_key_set
+
+                    logger.info("%s %s: %d out of %d files are new, and %d are already known", site['name'],
+                                                                                                dir_name,
+                                                                                                len(new_file_key_set),
+                                                                                                len(result_file_key_set),
+                                                                                                len(known_file_key_set))
+
+                    if new_file_key_set:
+                        new_file_tuple_set = [result_file_tuple_dict[key] for key in new_file_key_set]
+                        file_ids = db.insertFileInfos(new_file_tuple_set)
+
+                        if len(file_ids) != len(new_file_tuple_set):
+                            rescheduleVisit()
+
+                    if known_file_key_set:
+                        for key, known_file in known_file_dict.items():
+                            if key in result_file_tuple_dict:
+                                result_file_tuple = result_file_tuple_dict[key]
+
+                                known_size = int(known_file['size'])
+
+                                result_size = result_file_tuple[1]
+
+                                if known_size != result_size:
+                                    logger.info("%s %s: updating %s (id=%d) size from %d to %d",
+                                                site['name'], dir_name, known_file['name'], known_file['id'],
+                                                known_size, result_size)
+                                    db.updateFileInfoSize(known_file['id'], result_size)
+
+                    if removed_file_key_set:
+                        for removed_file_key in removed_file_key_set:
+                            db.deleteFileInfoFromDirectory(removed_file_key[0], removed_file_key[1])
 
                     # skip empty nikhef dirs
                     filteredSubDirectories = [loc for loc in result.subDirectories
-                                            if not ('nikhef' in loc.srmurl and 'generated' in loc.directory) ]
+                                              if not ('nikhef' in loc.srmurl and 'generated' in loc.directory) ]
 
-                    # filteredSubDirectories = [loc for loc in filteredSubDirectories
-                    #                        if not 'lc3_007' in loc.directory ]
+                    # skip sksp spectroscopy project
+                    filteredSubDirectories = [loc for loc in filteredSubDirectories
+                                              if not ('sara' in loc.srmurl and 'sksp' in loc.directory and 'spectro' in loc.directory) ]
 
                     subDirectoryNames = [loc.directory for loc in filteredSubDirectories]
 
                     if subDirectoryNames:
-                        self.db.insertSubDirectories(subDirectoryNames, dir_id,
-                                                    datetime.datetime.utcnow() - datetime.timedelta(days=1000))
+                        #check for already known subdirectories in the db
+                        known_subDirectoryNames_set = set(subdir['name'] for subdir in db.subDirectories(dir_id))
+
+                        new_subdir_name_set = set(subDirectoryNames) - known_subDirectoryNames_set;
+
+                        logger.info("%s %s: %d out of %d subdirs are new, and %d are already known", site['name'], dir_name, len(new_subdir_name_set), len(subDirectoryNames), len(known_subDirectoryNames_set))
+
+                        if new_subdir_name_set:
+                            subdir_ids = db.insertSubDirectories(new_subdir_name_set, dir_id)
+
+                            if len(subdir_ids) != len(new_subdir_name_set):
+                                rescheduleVisit()
 
             except (SrmlsException, ParseException) as e:
                 logger.error('Error while scanning %s\n%s' % (location.path(), str(e)))
 
-                logger.info('Rescheduling %s for new visit.' % (location.path(),))
-                self.db.updateDirectoryLastVisitTime(self.dir_id, datetime.datetime.utcnow() - datetime.timedelta(days=1000))
+                if 'does not exist' in str(e):
+                    with store.LTAStorageDb(self.dbcreds, self.log_queries) as db:
+                        db.deleteDirectory(self.dir_id)
+                else:
+                    rescheduleVisit()
 
         except Exception as e:
             logger.error(str(e))
 
-            logger.info('Rescheduling dir_id %d for new visit.' % (self.dir_id,))
-            self.db.updateDirectoryLastVisitTime(self.dir_id, datetime.datetime.utcnow() - datetime.timedelta(days=1000))
-
-def main(argv):
-    '''the main function scanning all locations and gathering the results'''
-
-    db = store.LTAStorageDb('/data2/ltastorageoverview.sqlite')
+            with store.LTAStorageDb(self.dbcreds, self.log_queries) as db:
+                logger.info('Rescheduling dir_id %d for new visit.' % (self.dir_id,))
+                db.updateDirectoryLastVisitTime(self.dir_id, datetime.datetime.utcnow() - VISIT_INTERVAL)
 
+def populateDbWithLTASitesAndRootDirs(db):
+    """
+    Helper method to fill empty database with (hardcoded) information about our LTA partners/sites/quotas
+    """
     if not db.sites():
-        db.insertSite('target', 'srm://srm.target.rug.nl:8444')
-        db.insertSite('nikhef', 'srm://tbn18.nikhef.nl:8446')
-        db.insertSite('sara', 'srm://srm.grid.sara.nl:8443')
-        db.insertSite('juelich', 'srm://lofar-srm.fz-juelich.de:8443')
-
-        db.insertRootDirectory('target', '/lofar/ops')
-        db.insertRootDirectory('target', '/lofar/ops/disk')
-        db.insertRootDirectory('nikhef', '/dpm/nikhef.nl/home/lofar')
-        db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/ops')
-        db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/user')
+        #db.insertSite('nikhef', 'srm://tbn18.nikhef.nl:8446')
+        sara_id = db.insertSiteIfNotExists('sara', 'srm://srm.grid.sara.nl:8443')
+        juelich_id = db.insertSiteIfNotExists('juelich', 'srm://lofar-srm.fz-juelich.de:8443')
+        poznan_id = db.insertSiteIfNotExists('poznan', 'srm://lta-head.lofar.psnc.pl:8443')
+
+        # insert the LTA site root dir(s)
         db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/software')
+        db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/ops')
         db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/storage')
+        db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/eor')
         db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/pulsar')
+        db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/cosmics')
+        db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/surveys')
+        db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/user')
+        db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/proc')
+        db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/trans')
+        db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/lotest')
         db.insertRootDirectory('juelich', '/pnfs/fz-juelich.de/data/lofar/ops')
+        db.insertRootDirectory('poznan', '/lofar/ops')
+        #db.insertRootDirectory('nikhef', '/dpm/nikhef.nl/home/lofar')
+
+        def end_of_year(year):
+            '''little helper function which returns a datetime timestamp for the end of the given year'''
+            return datetime.datetime(year, 12, 31, 23, 59, 59)
+
+        # insert quota as given by our LTA partners
+        db.insertSiteQuota(sara_id, 5e15, end_of_year(2012))
+        db.insertSiteQuota(sara_id, 8e15, end_of_year(2013))
+        db.insertSiteQuota(sara_id, 11e15, end_of_year(2014))
+        db.insertSiteQuota(sara_id, 14e15, end_of_year(2015))
+        db.insertSiteQuota(sara_id, 17e15, end_of_year(2016))
+        db.insertSiteQuota(sara_id, 20e15, end_of_year(2017))
+        db.insertSiteQuota(sara_id, 23e15, end_of_year(2018))
+
+        db.insertSiteQuota(juelich_id, 2.5e15, end_of_year(2013))
+        db.insertSiteQuota(juelich_id, 4.5e15, end_of_year(2014))
+        db.insertSiteQuota(juelich_id, 6.5e15, end_of_year(2015))
+        db.insertSiteQuota(juelich_id, 8.5e15, end_of_year(2016))
+        db.insertSiteQuota(juelich_id, 10.5e15, end_of_year(2017))
+        db.insertSiteQuota(juelich_id, 12.5e15, end_of_year(2018))
+
+        db.insertSiteQuota(poznan_id, 0.5e15, end_of_year(2016))
+        db.insertSiteQuota(poznan_id, 3.5e15, end_of_year(2017))
+        db.insertSiteQuota(poznan_id, 5.5e15, end_of_year(2018))
+
+
+def main():
+    '''the main function scanning all locations and gathering the results'''
 
-        for dir_id in [x[0] for x in db.rootDirectories()]:
-            db.updateDirectoryLastVisitTime(dir_id, datetime.datetime.utcnow() - datetime.timedelta(days=1000))
+    from optparse import OptionParser
+    from lofar.common import dbcredentials
+    from lofar.messaging import setQpidLogLevel
+    from lofar.lta.ltastorageoverview.ingesteventhandler import IngestEventHandler
+    from lofar.lta.ltastorageoverview.ingesteventhandler import DEFAULT_BROKER
+    from lofar.lta.ltastorageoverview.ingesteventhandler import DEFAULT_INGEST_NOTIFICATION_QUEUE
+    from lofar.lta.ltastorageoverview.ingesteventhandler import DEFAULT_INGEST_NOTIFICATION_SUBJECTS
+
+    # Check the invocation arguments
+    parser = OptionParser("%prog [options]", description='runs the lta scraper and stores results in the speficied database.')
+    parser.add_option('-j', '--parallel', dest='parallel', type='int', default=8, help='number of parallel srmls jobs to run, default: %default')
+
+    parser.add_option('-q', '--broker', dest='broker', type='string', default=DEFAULT_BROKER,
+                      help='Address of the qpid broker, default: %default')
+    parser.add_option('--ingest_notification_busname', dest='ingest_notification_busname', type='string',
+                      default=DEFAULT_INGEST_NOTIFICATION_QUEUE,
+                      help='Name of the notification bus exchange on the qpid broker on which the ingest notifications are published, default: %default')
+    parser.add_option('--ingest_notification_subjects', dest='ingest_notification_subjects', type='string',
+                      default=DEFAULT_INGEST_NOTIFICATION_SUBJECTS,
+                      help='Subject(s) to listen for on the ingest notification bus exchange on the qpid broker, default: %default')
+
+    parser.add_option('-V', '--verbose', dest='verbose', action='store_true', help='verbose logging')
+    parser.add_option('-Q', '--log-queries', dest='log_queries', action='store_true', help='log all pqsl queries')
+    parser.add_option_group(dbcredentials.options_group(parser))
+    parser.set_defaults(dbcredentials="LTASO")
+    (options, args) = parser.parse_args()
+
+    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                        level=logging.DEBUG if options.verbose else logging.INFO)
+    setQpidLogLevel(logging.INFO)
+    options.parallel = max(1, min(8*multiprocessing.cpu_count(), options.parallel))
+    logger.info("Using maximum number of parallel srmls jobs: %d" % options.parallel)
+
+    dbcreds = dbcredentials.parse_options(options)
+    logger.info("Using dbcreds: %s" % dbcreds.stringWithHiddenPassword())
+
+    db = store.LTAStorageDb(dbcreds, options.log_queries)
+    populateDbWithLTASitesAndRootDirs(db)
 
     # for each site we want one or more ResultGetterThreads
     # so make a dict with a list per site based on the locations
-    getters = dict([(site[1],[]) for site in db.sites()])
+    getters = dict([(site['name'],[]) for site in db.sites()])
 
     # some helper functions
     def numLocationsInQueues():
         '''returns the total number of locations in the queues'''
-        return db.numDirectoriesNotVisitedSince(datetime.datetime.utcnow() - datetime.timedelta(days=1))
+        return db.numDirectoriesNotVisitedSince(datetime.datetime.utcnow() - VISIT_INTERVAL)
 
     def totalNumGetters():
         '''returns the total number of parallel running ResultGetterThreads'''
         return sum([len(v) for v in getters.values()])
 
-    # only enter main loop if there is anything to process
-    if numLocationsInQueues() > 0:
+    def cleanupFinishedGetters():
+        # get rid of old finished ResultGetterThreads
+        finishedGetters = dict([(site_name, [getter for getter in getterList if not getter.isAlive()]) for site_name, getterList in getters.items()])
+        for site_name,finishedGetterList in finishedGetters.items():
+            for finishedGetter in finishedGetterList:
+                getters[site_name].remove(finishedGetter)
 
-        # the main loop
-        # loop over the locations and spawn ResultGetterThreads to get the results parallel
-        # use load balancing over the different sites and with respect to queue lengths
-        # do not overload this host system
-        while numLocationsInQueues() > 0 or totalNumGetters() > 0:
 
-            # get rid of old finished ResultGetterThreads
-            finishedGetters = dict([(site_name, [getter for getter in getterList if not getter.isAlive()]) for site_name, getterList in getters.items()])
-            for site_name,finishedGetterList in finishedGetters.items():
-                for finishedGetter in finishedGetterList:
-                    getters[site_name].remove(finishedGetter)
+    # the main loop
+    # loop over the locations and spawn ResultGetterThreads to get the results parallel
+    # use load balancing over the different sites and with respect to queue lengths
+    # do not overload this host system
+    with IngestEventHandler(dbcreds=dbcreds, busname=options.ingest_notification_busname,
+                            subjects=options.ingest_notification_subjects, broker=options.broker):
+        while True:
+
+            cleanupFinishedGetters()
 
             # spawn new ResultGetterThreads
             # do not overload this host system
-            while numLocationsInQueues() > 0 and (totalNumGetters() <= 4 or
-                                                  (os.getloadavg()[0] < 3*multiprocessing.cpu_count() and
-                                                  totalNumGetters() < 2.5*multiprocessing.cpu_count())):
-
-                with lock:
-                    sitesStats = db.visitStats(datetime.datetime.utcnow() - datetime.timedelta(days=1))
+            while (numLocationsInQueues() > 0 and
+                   totalNumGetters() < options.parallel and
+                   os.getloadavg()[0] < 4*multiprocessing.cpu_count()):
+                sitesStats = db.visitStats(datetime.datetime.utcnow() - VISIT_INTERVAL)
 
                 for site_name, site_stats in sitesStats.items():
                     numGetters = len(getters[site_name])
@@ -367,9 +527,9 @@ def main(argv):
                     site_stats['# get'] = numGetters
                     site_stats['weight'] = weight
 
-                totalWeight = sum([site_stats['weight'] for site_stats in sitesStats.values()])
+                totalWeight = max(1.0, sum([site_stats['weight'] for site_stats in sitesStats.values()]))
 
-                #logger.debug("siteStats:\n%s" % str('\n'.join([str((k, v)) for k, v in sitesStats.items()])))
+                logger.debug("siteStats:\n%s" % str('\n'.join([str((k, v)) for k, v in sitesStats.items()])))
 
                 # now pick a random site using the weights
                 chosen_site_name = None
@@ -387,24 +547,29 @@ def main(argv):
                     break
 
                 chosen_dir_id = sitesStats[chosen_site_name]['least_recent_visited_dir_id']
+                db.updateDirectoryLastVisitTime(chosen_dir_id, datetime.datetime.utcnow())
+
+                logger.debug("chosen_site_name: %s chosen_dir_id: %s", chosen_site_name, chosen_dir_id)
 
                 # make and start a new ResultGetterThread the location deque of the chosen site
-                newGetter = ResultGetterThread(db, chosen_dir_id)
+                newGetter = ResultGetterThread(dbcreds, chosen_dir_id, options.log_queries)
                 newGetter.start()
                 getters[chosen_site_name].append(newGetter)
 
-                logger.info('numLocationsInQueues=%d totalNumGetters=%d' % (numLocationsInQueues(), totalNumGetters()))
+                cleanupFinishedGetters()
 
-                # small sleep between starting multiple getters
-                time.sleep(0.25)
+                logger.info('numLocationsInQueues=%d totalNumGetters=%d siteQueueLengths: %s load_5min: %.1f' % (numLocationsInQueues(),
+                                                                                                                 totalNumGetters(),
+                                                                                                                 ' '.join(['%s:%d' % (name, stats['queue_length']) for name, stats in sitesStats.items()]),
+                                                                                                                 os.getloadavg()[0]))
 
             # sleep before main loop next iteration
             # to wait for some results
-            # and some getters to finis
-            time.sleep(1)
+            # and some getters to finish
+            time.sleep(5 if numLocationsInQueues() <= options.parallel else 0.25)
 
         # all locations were processed
 
 if __name__ == "__main__":
-    main(sys.argv[1:])
+    main()
 
diff --git a/LTA/ltastorageoverview/lib/store.py b/LTA/ltastorageoverview/lib/store.py
index 7c53ef89fdc..ea1c7ee88bb 100644
--- a/LTA/ltastorageoverview/lib/store.py
+++ b/LTA/ltastorageoverview/lib/store.py
@@ -22,393 +22,525 @@
 # TODO: add comment to methods
 # TODO: reuse connection in methods (take care of exceptions closing the connection)
 # TODO: use generators and yield for faster and more memory efficient processing of results.
-# TODO: use other database? MariaDB? instead of sqlite?
 
-import os
-import os.path
-import sqlite3
 import datetime
+import logging
+from lofar.common import dbcredentials
+from lofar.common.postgres import PostgresDatabaseConnection
+from lofar.common.postgres import FETCH_NONE,FETCH_ONE,FETCH_ALL
+
+logger = logging.getLogger(__name__)
 
 class EntryNotFoundException(Exception):
     pass
 
-class LTAStorageDb:
-    def __init__(self, db_filename, removeIfExisting = False):
-        self.db_filename = db_filename
-
-        if os.path.exists(self.db_filename) and removeIfExisting:
-            os.remove(self.db_filename)
 
-        if not os.path.exists(self.db_filename):
-            with sqlite3.connect(self.db_filename) as conn:
-                create_script_path = os.path.join(os.path.dirname(__file__), 'create_db_ltastorageoverview.sql')
+class LTAStorageDb(PostgresDatabaseConnection):
+    """LTAStorageDb is a python API to the ltaso postgres database."""
 
-                with open(create_script_path) as script:
-                    conn.executescript(script.read())
-
-                # save created tables and triggers
-                conn.commit()
+    def __init__(self, dbcreds=None, log_queries=True):
+        """Create an instance of a LTAStorageDb
+        :param dbcredentials.DBCredentials dbcreds: the credential for logging in into the db
+        :param bool log_queries: do or don't log all queries
+        """
+        super(LTAStorageDb, self).__init__(host=dbcreds.host,
+                                           database=dbcreds.database,
+                                           username=dbcreds.user,
+                                           password=dbcreds.password,
+                                           port=dbcreds.port,
+                                           log_queries=log_queries)
 
     def insertSite(self, siteName, srmurl):
-        with sqlite3.connect(self.db_filename) as conn:
-            cursor = conn.cursor()
-
-            site_row = cursor.execute('select id from storage_site where url = ?', [srmurl]).fetchone()
-            site_id = site_row[0] if site_row else cursor.execute('insert into storage_site (name, url) values (?, ?)', (siteName, srmurl)).lastrowid
-
-            conn.commit()
-
-            return site_id
+        """insert a site into the database
+        :param string siteName: the name of the site
+        :param string srmurls: the srm url to that site
+        :return int: the new id of the inserted site
+        """
+        site_id = self.executeQuery('insert into lta.site (name, url) values (%s, %s) returning id;', (siteName, srmurl), fetch=FETCH_ONE)['id']
+        self.commit()
+        return site_id
+
+    def insertSiteIfNotExists(self, siteName, srmurl):
+        """insert a site into the database and return the id.
+        If the site already exists, then the id of that site is just returned.
+        :param string siteName: the name of the site
+        :param string srmurls: the srm url to that site
+        :return int: the new id of the inserted site
+        """
+        site = self.siteByName(siteName)
+
+        if site:
+            return site['id']
+
+        return self.insertSite(siteName, srmurl)
 
     def insertRootDirectory(self, siteName, rootDirectory):
-        with sqlite3.connect(self.db_filename) as conn:
-            cursor = conn.cursor()
-
-            site_row = cursor.execute('select id from storage_site where name = ?', [siteName]).fetchone()
-
-            if not site_row:
-                raise EntryNotFoundException()
-
-            site_id = site_row[0]
-
-            dir_id = cursor.execute('insert into directory (name) values (?)', [rootDirectory]).lastrowid
-
-            cursor.execute('insert into storage_site_root (storage_site_id, directory_id) values (?, ?)', (site_id, dir_id)).lastrowid
-
-            conn.commit()
-
-            return dir_id
-
-    def insertRootLocation(self, siteName, srmurl, rootDirectory):
-        with sqlite3.connect(self.db_filename) as conn:
-            cursor = conn.cursor()
-
-            site_row = cursor.execute('select id from storage_site where url = ?', [srmurl]).fetchone()
-            site_id = site_row[0] if site_row else cursor.execute('insert into storage_site (name, url) values (?, ?)', (siteName, srmurl)).lastrowid
-
-            dir_id = cursor.execute('insert into directory (name) values (?)', [rootDirectory]).lastrowid
-
-            cursor.execute('insert into storage_site_root (storage_site_id, directory_id) values (?, ?)', (site_id, dir_id)).lastrowid
-
-            conn.commit()
-
-            return dir_id
-
-    def insertSubDirectory(self, parent_directory_id, sub_directory):
-        with sqlite3.connect(self.db_filename) as conn:
-            cursor = conn.cursor()
-
-            dir_id = cursor.execute('insert into directory (name, parent_directory_id) values (?, ?)', (sub_directory, parent_directory_id)).lastrowid
-
-            conn.commit()
-
-            return dir_id
-
-    def insertSubDirectories(self, subDirectoryNames, parentDirId, directoryLastVisitTime = None):
-        with sqlite3.connect(self.db_filename) as conn:
-            cursor = conn.cursor()
-
-            cursor.executemany('insert into directory (name, parent_directory_id) values (?, ?)',
-                             [(name, parentDirId) for name in subDirectoryNames])
-
-            if directoryLastVisitTime:
-                subDirIds = cursor.execute('''select id from directory
-                    where parent_directory_id = %s
-                    and name in (%s)''' % (parentDirId, ', '.join(["'%s'" % x for x in subDirectoryNames]))).fetchall()
+        """
+        Insert a root directory for a site. Each site has at least one root directory (with no parent).
+        For all non-root directories, use insertSubDirectory.
+        Beware: Uniqueness of the root dir for a site is not enforced.
+        :param string siteName: the name of the site (should already be in the database)
+        :param string rootDirectory: the full path of the directory
+        :return integer: the new id of the inserted root directory
+        """
+        site = self.siteByName(siteName)
+
+        if not site:
+            raise EntryNotFoundException()
+
+        site_id = site['id']
+
+        dir_id = self.executeQuery('insert into lta.directory (name) values (%s) returning id;', [rootDirectory], fetch=FETCH_ONE)['id']
+
+        self.executeQuery('insert into lta.site_root_dir (site_id, root_dir_id) values (%s, %s);', (site_id, dir_id))
+        self.commit()
+        return dir_id
+
+    def insertSubDirectory(self, sub_directory_path, parent_dir_id):
+        """
+        Insert a sub directory which is a child of the directory with parent_dir_id
+        :param int parent_dir_id: the id of this subdirectories parent
+        :param string sub_directory_path: the full path of the subdirectory
+        :return integer: the new id of the inserted subdirectory
+        """
+        result = self.executeQuery('insert into lta.directory (name, parent_dir_id) values (%s, %s) returning id;', (sub_directory_path, parent_dir_id), fetch=FETCH_ONE)
+
+        if result and 'id' in result:
+            self.commit()
+            return result['id']
+
+        return None
+
+    def insertSubDirectories(self, subDirectoryPaths, parentDirId, directoryLastVisitTime = None):
+        """
+        Insert multiple sub directories which are all a child of the directory with parent_dir_id
+        :param int parent_dir_id: the id of this subdirectories parent
+        :param [string] subDirectoryPaths: a list of full paths of the subdirectories
+        :return [integer]: a list of new ids of the inserted subdirectories
+        """
+        with self._connection.cursor() as cursor:
+            insert_values = ','.join(cursor.mogrify('(%s, %s)', (name, parentDirId)) for name in subDirectoryPaths)
+
+        query = '''insert into lta.directory (name, parent_dir_id)
+        VALUES {values}
+        RETURNING id;'''.format(values=insert_values)
+
+        subDirIds = [x['id'] for x in self.executeQuery(query, fetch=FETCH_ALL)]
+
+        if [x for x in subDirIds if x < 0]:
+            logger.error("One or more subDirectoryPaths could not be inserted. Rolling back.")
+            self.rollback()
+            return None
+
+        if directoryLastVisitTime:
+            with self._connection.cursor() as cursor:
+                insert_values = ','.join(cursor.mogrify('(%s, %s)', (directoryLastVisitTime, id)) for id in subDirIds)
+
+            query = '''insert into scraper.last_directory_visit (visit_date, dir_id)
+            VALUES {values}
+            RETURNING id;'''.format(values=insert_values)
+
+            ldvIds = [x['id'] for x in self.executeQuery(query, fetch=FETCH_ALL)]
+
+            if [x for x in ldvIds if x < 0]:
+                logger.error("One or more scraper.last_directory_visit's could not be inserted. Rolling back.")
+                self.rollback()
+                return None
+
+        self.commit()
+        return subDirIds
+
+    def insert_missing_directory_tree_if_needed(self, dir_path, site_id):
+        """Insert all directories in the dir_path tree which are not in the database yet.
+        example: root_dir         = '/path/to/root'
+                 known_subdir_1   = '/path/to/root/sub1'
+                 known_subdir_2   = '/path/to/root/sub1/sub2'
+                 (input) dir_path = '/path/to/root/sub1/sub2/sub3/sub4'
+                 would insert '/path/to/root/sub1/sub2/sub3 under known_subdir_2 and
+                              '/path/to/root/sub1/sub2/sub3/sub4' under the new sub3 dir.
+        :param str dir_path: a full path to a (sub)directory
+        :param int site_id: the id of the site for which you want to insert the dir tree.
+        :return: a dict of the inserted directories with their new dir id's.
+        """
+        # for this site (which might have multiple root dirs), find the root_dir under which this dir_path belongs
+        parent_root_dir = self.get_root_dir_for_dir_path(dir_path, site_id)
+
+        if parent_root_dir is None:
+            raise LookupError("Could not find parent root dir for site_id=%d for dir_path=%s" % (site_id, dir_path))
+
+        # find the lowest known dir in the database
+        # and get the list of missing subdirs for dir_path, which are not in the database ye
+        missing_child_dirs, lowest_known_db_dir = self._get_lowest_known_directory(dir_path, parent_root_dir)
+
+        # now we should have a known parent dir from the db, and we know which child dirs are missing.
+        # append the missing children in reverse order
+        # (from just under the known parent, down to the lowest missing subdir).
+        result = {}
+        missing_childs_parent_dir_id = lowest_known_db_dir['dir_id']
+        for missing_child_dir in reversed(missing_child_dirs):
+            missing_childs_parent_dir_id = self.insertSubDirectory(missing_child_dir, missing_childs_parent_dir_id)
+            result[missing_child_dir] = missing_childs_parent_dir_id
+
+        # return the dict of inserted child dirs with their new dir id's
+        return result
+
+    def deleteDirectory(self, dir_id, commit=True):
+        """
+        delete the directory with id dir_id. Cascacades and also deletes all subdirs, files and stats under this directory.
+        :param int dir_id: the id of the directory to be deleted
+        :param bool commit: optional, commit directly when True
+        """
+        self.executeQuery('DELETE FROM lta.directory where id = %s;', (dir_id,), fetch=FETCH_NONE)
+
+        if commit:
+            self.commit()
+
+
+    def insertFileInfo(self, name, size, creation_date, parent_dir_id, commit=True):
+        fileinfo_id = self.executeQuery('insert into lta.fileinfo (name, size, creation_date, dir_id) values (%s, %s, %s, %s) returning id;',
+                                        (name.split('/')[-1], size, creation_date, parent_dir_id))
+
+        if commit:
+            self.commit()
+        return fileinfo_id
 
-                subDirIds = [x[0] for x in subDirIds]
-
-                for subDirId in subDirIds:
-                    cursor.execute('''insert into scraper_last_directory_visit (visit_date, directory_id)
-                    values (?, ?)''', (directoryLastVisitTime, subDirId))
-
-            conn.commit()
-
-    def insertFileInfo(self, name, size, creation_date, parent_directory_id):
-        with sqlite3.connect(self.db_filename) as conn:
-            cursor = conn.cursor()
-
-            fileinfo_id = cursor.execute('insert into fileinfo (name, size, creation_date, directory_id) values (?, ?, ?, ?)',
-                                         (name.split('/')[-1], size, creation_date, parent_directory_id))
-
-            conn.commit()
+    def insertFileInfos(self, file_infos):
+        with self._connection.cursor() as cursor:
+            insert_values = [cursor.mogrify('(%s, %s, %s, %s)', (f[0].split('/')[-1], f[1], f[2], f[3])) for f in file_infos]
 
-            return fileinfo_id
+        insert_values = ','.join([x for x in insert_values])
 
-    def insertFileInfos(self, file_infos):
-        with sqlite3.connect(self.db_filename) as conn:
-            conn.executemany('insert into fileinfo (name, size, creation_date, directory_id) values (?, ?, ?, ?)',
-                             [(f[0].split('/')[-1], f[1], f[2], f[3]) for f in file_infos])
+        query = '''insert into lta.fileinfo (name, size, creation_date, dir_id)
+        VALUES {values}
+        RETURNING id;'''.format(values=insert_values)
 
-            conn.commit()
+        ids = [x['id'] for x in self.executeQuery(query, fetch=FETCH_ALL)]
 
-    def insertLocationResult(self, result):
-        with sqlite3.connect(self.db_filename) as conn:
-            cursor = conn.cursor()
+        if [x for x in ids if x < 0]:
+            logger.error("One or more file_infos could not be inserted. Rolling back.")
+            self.rollback()
+            return None
 
-            dir_row = cursor.execute('''select directory.id from storage_site
-                                                                    join storage_site_root on storage_site_root.storage_site_id = storage_site.id
-                                                                    join directory on directory.id = storage_site_root.directory_id
-                                                                    where storage_site.url = ?
-                                                                    and directory.name = ?
-                                                                    ''', (result.location.srmurl, result.location.directory)).fetchone()
+        self.commit()
+        return ids
 
-            if dir_row:
-                dir_id = dir_row[0]
-                cursor.executemany('insert into directory (name, parent_directory_id) values (?, ?)',
-                                                        [(subDir.directory, dir_id) for subDir in result.subDirectories])
+    def updateFileInfoSize(self, id, size, commit=True):
+        fileinfo_id = self.executeQuery('''update lta.fileinfo set size=%s where id=%s;''', (size, id))
 
-                cursor.executemany('insert into fileinfo (name, size, creation_date, directory_id) values (?, ?, ?, ?)',
-                                                        [(file.filename.split('/')[-1], file.size, datetime.datetime.utcnow(), dir_id) for file in    result.files])
+        if commit:
+            self.commit()
 
-                conn.commit()
+    def deleteFileInfoFromDirectory(self, file_name, dir_id, commit=True):
+        self.executeQuery('DELETE FROM lta.fileinfo where dir_id = %s and name = %s;', (dir_id,file_name), fetch=FETCH_NONE)
 
-    def updateDirectoryLastVisitTime(self, directory_id, timestamp):
-        with sqlite3.connect(self.db_filename) as conn:
-            cursor = conn.cursor()
+        if commit:
+            self.commit()
 
-            updated = cursor.execute('''update or ignore scraper_last_directory_visit
-                set visit_date=?
-                where directory_id = ?''', (timestamp, directory_id)).rowcount
+    def updateDirectoryLastVisitTime(self, dir_id, timestamp, commit=True):
+        self.executeQuery('''update scraper.last_directory_visit
+                             set visit_date=%s
+                             where dir_id = %s;''', (timestamp, dir_id), fetch=FETCH_NONE)
 
-            if not updated:
-                cursor.execute('''insert into scraper_last_directory_visit
-                (visit_date, directory_id)
-                values (?, ?)''', (timestamp, directory_id))
+        if commit:
+            self.commit()
 
-            conn.commit()
+    def directoryLastVisitTime(self, dir_id):
+        """
+        get the timestamp when the directory was last visited.
+        :param int dir_id: the id of the directory
+        :return datetime: the timestamp when the directory was last visited.
+        """
+        result = self.executeQuery('''select visit_date FROM scraper.last_directory_visit
+                                      where dir_id = %s;''', (dir_id,), fetch=FETCH_ONE)
+        if result is None:
+            return None
+        return result.get('visit_date')
 
     def sites(self):
         '''returns list of tuples (id, name, url) of all sites'''
-        with sqlite3.connect(self.db_filename) as conn:
-            return conn.execute('''SELECT id, name, url FROM storage_site''').fetchall()
+        return self.executeQuery('SELECT id, name, url FROM lta.site;', fetch=FETCH_ALL)
 
     def site(self, site_id):
         '''returns tuple (id, name, url) for site with id=site_id'''
-        with sqlite3.connect(self.db_filename) as conn:
-            return conn.execute('''SELECT id, name, url FROM storage_site where id = ?''', [site_id]).fetchone()
-
-    def directory(self, directory_id):
-        '''returns directory tuple (id, name, site_id, site_name) for the given directory_id'''
-        with sqlite3.connect(self.db_filename) as conn:
-            return conn.execute('''SELECT dir.id, dir.name, site.id, site.name
-                FROM storage_site_root
-                join storage_site site on site.id = storage_site_root.storage_site_id
-                join directory_closure dc on dc.ancestor_id = storage_site_root.directory_id
-                join directory dir on dir.id = dc.descendant_id
-                where dc.descendant_id = ?
-                ''', [directory_id]).fetchone()
-
-    def directory_id(self, site_id, directory_name):
-        '''returns directory id for the given site_id, directory_name'''
-        with sqlite3.connect(self.db_filename) as conn:
-            result = conn.execute('''SELECT dir.id
-                FROM storage_site_root
-                join directory_closure dc on dc.ancestor_id = storage_site_root.directory_id
-                join directory dir on dir.id = dc.descendant_id
-                where storage_site_root.storage_site_id = ?
-                and dir.name = ?
-                ''', [site_id, directory_name]).fetchone()
-
-            if result:
-                return result[0]
-
-            return -1
+        return self.executeQuery('SELECT id, name, url FROM lta.site where id = %s;', [site_id], FETCH_ONE)
+
+    def siteByName(self, site_name):
+        '''returns tuple (id, name, url) for site with id=site_id'''
+        return self.executeQuery('SELECT id, name, url FROM lta.site where name = %s;', [site_name], FETCH_ONE)
+
+    def siteQuota(self, site_id):
+        '''returns list of quota tuples (site_id, site_name, quota, valid_until_date)'''
+        return self.executeQuery('SELECT * FROM lta.site_quota;', FETCH_All)
+
+    def insertSiteQuota(self, site_id, quota, valid_until_date, commit=True):
+        """
+        insert the quota for a given site with a date until which this quota is valid.
+        :param int site_id: the id of the site for which you want to set the quota.
+        :param int quota: the quota in number of bytes.
+        :param datetime valid_until_date: the timestamp until which this given quota is valid.
+        :param bool commit: do/don't commit immediately.
+        :return: the id of the new quota
+        """
+        id =  self.executeQuery('INSERT INTO lta.site_quota(site_id, quota, valid_until_date) values (%s, %s, %s) RETURNING id;',
+                                (site_id, quota, valid_until_date))
+        if commit:
+            self.commit()
+        return id
+
+        '''returns list of quota tuples (site_id, site_name, quota, valid_until_date)'''
+        return self.executeQuery('SELECT * FROM lta.site_quota;', FETCH_All)
+
+    def directory(self, dir_id):
+        '''returns lta.directory (id, name, site_id, site_name) for the given dir_id'''
+        return self.executeQuery('''SELECT dir.id as dir_id, dir.name as dir_name, site.id as site_id, site.name as site_name
+            FROM lta.site_root_dir
+            join lta.site site on site.id = site_root_dir.site_id
+            join lta.directory_closure dc on dc.ancestor_id = site_root_dir.root_dir_id
+            join lta.directory dir on dir.id = dc.descendant_id
+            where dc.descendant_id = %s;
+            ''', [dir_id], fetch=FETCH_ONE)
+
+    def directoryByName(self, dir_name, site_id=None):
+        """
+        returns lta.directory (id, name, site_id, site_name) for the given dir_name
+        :param string dir_name: the directory to search for
+        :param int site_id: optional site_id to limit the search for this given site.
+        :return:
+        """
+        query = '''SELECT dir.id as dir_id, dir.name as dir_name, site.id as site_id, site.name as site_name
+            FROM lta.site_root_dir
+            join lta.site site on site.id = site_root_dir.site_id
+            join lta.directory_closure dc on dc.ancestor_id = site_root_dir.root_dir_id
+            join lta.directory dir on dir.id = dc.descendant_id
+            where dir.name = %s'''
+        args = [dir_name]
+        if site_id is not None:
+            query += " and site.id = %s"
+            args.append(site_id)
+
+        return self.executeQuery(query, args, fetch=FETCH_ONE)
+
+    def dir_id(self, site_id, directory_name):
+        '''returns lta.directory id for the given site_id, directory_name'''
+        result = self.executeQuery('''SELECT dir.id
+            FROM lta.site_root_dir
+            join lta.directory_closure dc on dc.ancestor_id = site_root_dir.root_dir_id
+            join lta.directory dir on dir.id = dc.descendant_id
+            where site_root_dir.site_id = %s
+            and dir.name = %s;''', [site_id, directory_name], fetch=FETCH_ONE)
+
+        if result['id']:
+            return result['id']
+
+        return -1
 
     def rootDirectories(self):
-        '''returns list of all root directories (id, name, site_id, site_name) for all sites'''
-        with sqlite3.connect(self.db_filename) as conn:
-            return conn.execute('''
-                SELECT *
-                FROM root_directories
-                ''').fetchall()
+        '''returns list of all root directories for all sites'''
+        return self.executeQuery('''SELECT * FROM lta.site_root_directory;''', fetch=FETCH_ALL)
 
     def rootDirectoriesForSite(self, site_id):
         '''returns list of all root directories (id, name) for given site_id'''
-        with sqlite3.connect(self.db_filename) as conn:
-            return conn.execute('''SELECT dir_id, dir_name
-                FROM root_directories
-                where site_id = ?''', [site_id]).fetchall()
-
-    def subDirectories(self, directory_id, depth = 1, includeSelf=False):
-        '''returns list of all sub directories up to the given depth (id, name, site_id, site_name, depth) for the given directory_id'''
-        with sqlite3.connect(self.db_filename) as conn:
-            return conn.execute('''
-                SELECT dir.id, dir.name, dir.parent_directory_id, directory_closure.depth FROM directory_closure
-                join directory dir on dir.id = directory_closure.descendant_id
-                where ancestor_id = ? and depth <= ? and depth > ?
-                order by depth asc
-                ''', (directory_id, depth, -1 if includeSelf else 0)).fetchall()
-
-    def parentDirectories(self, directory_id):
-        with sqlite3.connect(self.db_filename) as conn:
-            return conn.execute('''
-                SELECT dir.* FROM directory_closure dc
-                join directory dir on dir.id = dc.ancestor_id
-                where dc.descendant_id = ? and depth > 0
-                order by depth desc
-                ''', [directory_id]).fetchall()
+        return self.executeQuery('''SELECT * FROM lta.site_root_directory where site_id = %s;''', [site_id], fetch=FETCH_ALL)
+
+    def rootDirectory(self, root_dir_id):
+        '''returns the root directory for the given root_dir_id'''
+        return self.executeQuery('''SELECT * FROM lta.site_root_directory WHERE root_dir_id = %s;''',
+                                 [root_dir_id], fetch=FETCH_ONE)
+
+    def get_root_dir_for_dir_path(self, dir_path, site_id):
+        """
+        find the root_dir under which this dir_path at the given site_id belongs
+        :param str dir_path: a full path to a (sub)directory
+        :param int site_id: the id of the site which contains the root dir under which the dir_path resides.
+        :return: the dict for the root directory under which the given dir_path resides.
+        """
+        root_dirs = self.rootDirectoriesForSite(site_id)
+        return next((rd for rd in root_dirs if dir_path.startswith(rd['dir_name'])), None)
+
+    def subDirectories(self, dir_id, depth = 1, includeSelf=False):
+        '''returns list of all sub directories up to the given depth (id, name, parent_dir_id, depth) for the given dir_id'''
+        if depth == 1 and not includeSelf:
+            return self.executeQuery('''
+                SELECT dir.id as id, dir.name as name, dir.parent_dir_id as parent_dir_id
+                FROM lta.directory dir
+                where dir.parent_dir_id = %s;
+                ''', (dir_id, ), fetch=FETCH_ALL)
+        return self.executeQuery('''
+            SELECT dir.id as id, dir.name as name, dir.parent_dir_id as parent_dir_id, lta.directory_closure.depth as depth
+            FROM lta.directory_closure
+            join lta.directory dir on dir.id = lta.directory_closure.descendant_id
+            where ancestor_id = %s and depth <= %s and depth > %s
+            order by depth asc;
+            ''', (dir_id, depth, -1 if includeSelf else 0), fetch=FETCH_ALL)
+
+    def parentDirectories(self, dir_id):
+        return self.executeQuery('''
+            SELECT dir.* FROM lta.directory_closure dc
+            join lta.directory dir on dir.id = dc.ancestor_id
+            where dc.descendant_id = %s and depth > 0
+            order by depth desc;
+            ''', [dir_id], fetch=FETCH_ALL)
+
+    def _get_lowest_known_directory(self, dir_path, parent_root_dir):
+        """
+        given the dir_path, find try to find the lowest known dir which is a subdir under the given parent_root_dir
+        example: root_dir         = '/path/to/root'
+                 known_subdir_1   = '/path/to/root/sub1'
+                 known_subdir_2   = '/path/to/root/sub1/sub2'
+                 (input) dir_path = '/path/to/root/sub1/sub2/sub3/sub4'
+                 would return (['/path/to/root/sub1/sub2/sub3/sub4', '/path/to/root/sub1/sub2/sub3'], <dict_for_known_subdir_2>)
+        :param str dir_path: a full directory path (which should start with the same path as the parent root dir)
+        :param dict parent_root_dir: a self.rootDirectory() result dict the supposed parent root dir
+        :return: a tuple (list, dict) where the list is the list of missing full subdir paths, and the dict is the
+                 lowest known subdir, or None if not found.
+        """
+        site_id = parent_root_dir['site_id']
+        missing_child_dirs = []
+
+        # search for dir_path in the database... it might already be known
+        climbing_dir_path = dir_path
+        db_dir = self.directoryByName(climbing_dir_path, site_id)
+        # if climbing_dir_path is not known, then walk up one dir, and repeat until at top.
+        while db_dir is None and parent_root_dir['dir_name'] != climbing_dir_path:
+            # climb up one dir, add lowest subdir as missing child
+            path_parts = climbing_dir_path.split('/')
+            missing_child_dirs.append(climbing_dir_path)
+            climbing_dir_path = '/'.join(path_parts[:-1])
+            db_dir = self.directoryByName(climbing_dir_path, site_id)
+
+        # return the list of missing_child_dirs (which might be empty)
+        # and the found lowest found db_dir (which might be None)
+        return missing_child_dirs, db_dir
 
     def _date_bounded(self, query, args, table_column, from_date=None, to_date=None):
         result_query = query
         result_args = args
         if from_date:
-            result_query += ' and %s >= ?' % table_column
+            result_query += ' and {column} >= %s'.format(column=table_column)
             result_args += (from_date,)
 
         if to_date:
-            result_query += ' and %s  <= ?' % table_column
+            result_query += ' and {column} <= %s'.format(column=table_column)
             result_args += (to_date,)
 
         return result_query, result_args
 
-    def filesInDirectory(self, directory_id, from_date=None, to_date=None):
-        with sqlite3.connect(self.db_filename) as conn:
-            query = '''SELECT * FROM fileinfo
-            where directory_id = ?'''
+    def filesInDirectory(self, dir_id, from_date=None, to_date=None):
+        query = '''SELECT * FROM lta.fileinfo
+        where dir_id = %s'''
 
-            args = (directory_id,)
+        args = (dir_id,)
 
-            query, args = self._date_bounded(query, args, 'fileinfo.creation_date', from_date, to_date)
+        query, args = self._date_bounded(query, args, 'fileinfo.creation_date', from_date, to_date)
 
-            return conn.execute(query, args).fetchall()
+        return self.executeQuery(query, args, fetch=FETCH_ALL)
 
-    def numFilesInDirectory(self, directory_id, from_date=None, to_date=None):
-        with sqlite3.connect(self.db_filename) as conn:
-            query = '''SELECT count(id) FROM fileinfo
-            where directory_id = ?'''
+    def numFilesInDirectory(self, dir_id, from_date=None, to_date=None):
+        query = '''SELECT count(id) FROM lta.fileinfo
+        where dir_id = %s'''
 
-            args = (directory_id,)
+        args = (dir_id,)
 
-            query, args = self._date_bounded(query, args, 'fileinfo.creation_date', from_date, to_date)
+        query, args = self._date_bounded(query, args, 'fileinfo.creation_date', from_date, to_date)
 
-            result = conn.execute(query, args).fetchone()
+        result = self.executeQuery(query, args, fetch=FETCH_ONE)
 
-            if result:
-                return result[0]
+        if result['count']:
+            return result['count']
 
-            return 0
+        return 0
 
-    def filesInTree(self, base_directory_id, from_date=None, to_date=None):
-        with sqlite3.connect(self.db_filename) as conn:
-            query = '''SELECT dir.id, dir.name, dc.depth, fileinfo.id, fileinfo.name, fileinfo.size, fileinfo.creation_date FROM directory_closure dc
-            join directory dir on dir.id = dc.descendant_id
-            join fileinfo on fileinfo.directory_id = dc.descendant_id
-            where dc.ancestor_id = ?'''
+    def directoryTreeStats(self, dir_id):
+        query = '''SELECT * FROM metainfo.stats WHERE dir_id = %s'''
+        args = (dir_id,)
 
-            args = (base_directory_id,)
+        return self.executeQuery(query, args, fetch=FETCH_ONE)
 
-            query, args = self._date_bounded(query, args, 'fileinfo.creation_date', from_date, to_date)
+    def filesInTree(self, base_dir_id, from_date=None, to_date=None):
+        query = '''SELECT dir.id as dir_id, dir.name as dir_name, dc.depth as dir_depth, fi.id as file_id, fi.name as file_name, fi.size as file_size, fi.creation_date as file_creation_date
+        FROM lta.directory_closure dc
+        JOIN lta.directory dir on dir.id = dc.descendant_id
+        JOIN lta.fileinfo fi on fi.dir_id = dc.descendant_id
+        WHERE dc.ancestor_id = %s'''
 
-            return conn.execute(query, args).fetchall()
+        args = (base_dir_id,)
 
-    def numFilesInTree(self, base_directory_id, from_date=None, to_date=None):
-        with sqlite3.connect(self.db_filename) as conn:
-            query = '''
-                SELECT sum(directory_stats.num_files) FROM directory_stats
-                join directory_closure dc on dc.descendant_id = directory_stats.directory_id
-                where ancestor_id = ?
-                '''
+        query, args = self._date_bounded(query, args, 'fi.creation_date', from_date, to_date)
 
-            args = (base_directory_id,)
+        return self.executeQuery(query, args, fetch=FETCH_ALL)
 
-            query, args = self._date_bounded(query, args, 'directory_stats.min_file_creation_date', from_date=from_date)
-            query, args = self._date_bounded(query, args, 'directory_stats.max_file_creation_date', to_date=to_date)
+    def totalFileSizeAndNumFilesInSite(self, site_id, from_date=None, to_date=None):
+        query = '''SELECT * FROM metainfo.get_site_stats(%s, %s, %s)'''
+        args = (site_id, from_date, to_date)
 
-            result = conn.execute(query, args).fetchone()
+        return self.executeQuery(query, args, fetch=FETCH_ONE)
 
-            if result[0]:
-                return result[0]
+    def totalFileSizeAndNumFilesInTree(self, base_dir_id, from_date=None, to_date=None):
+        query = '''SELECT * FROM metainfo.get_tree_stats(%s, %s, %s)'''
+        args = (base_dir_id, from_date, to_date)
 
-            return 0
+        return self.executeQuery(query, args, fetch=FETCH_ONE)
 
-    def totalFileSizeInTree(self, base_directory_id, from_date=None, to_date=None):
-        with sqlite3.connect(self.db_filename) as conn:
-            query = '''
-                SELECT sum(directory_stats.total_file_size) FROM directory_stats
-                join directory_closure dc on dc.descendant_id = directory_stats.directory_id
-                where ancestor_id = ?
-                '''
-            args = (base_directory_id,)
+    def totalFileSizeInTree(self, base_dir_id, from_date=None, to_date=None):
+        return self.totalFileSizeAndNumFilesInTree(base_dir_id, from_date, to_date)['tree_total_file_size']
 
-            query, args = self._date_bounded(query, args, 'directory_stats.min_file_creation_date', from_date=from_date)
-            query, args = self._date_bounded(query, args, 'directory_stats.max_file_creation_date', to_date=to_date)
-
-            result = conn.execute(query, args).fetchone()
-
-            if result[0]:
-                return result[0]
-            return 0
+    def numFilesInTree(self, base_dir_id, from_date=None, to_date=None):
+        return self.totalFileSizeAndNumFilesInTree(base_dir_id, from_date, to_date)['tree_num_files']
 
     def numFilesInSite(self, site_id, from_date=None, to_date=None):
-        num_files = 0L
-
-        root_dirs = self.rootDirectoriesForSite(site_id)
-
-        for root_dir in root_dirs:
-            num_files += long(self.numFilesInTree(root_dir[0], from_date, to_date))
-
-        return num_files
+        return self.totalFileSizeAndNumFilesInSite(site_id, from_date, to_date)['tree_num_files']
 
     def totalFileSizeInSite(self, site_id, from_date=None, to_date=None):
-        total_size = 0L
-
-        root_dirs = self.rootDirectoriesForSite(site_id)
-
-        for root_dir in root_dirs:
-            total_size += long(self.totalFileSizeInTree(root_dir[0], from_date, to_date))
-
-        return total_size
+        return self.totalFileSizeAndNumFilesInSite(site_id, from_date, to_date)['tree_total_file_size']
 
-    def datetimeRangeOfFilesInTree(self, base_directory_id = None):
-        with sqlite3.connect(self.db_filename) as conn:
-            query = '''
-                SELECT min(fileinfo.creation_date) as min_creation_date,
-                max(fileinfo.creation_date) as max_creation_date
-                FROM fileinfo
-                '''
-            args = []
+    def datetimeRangeOfFilesInTree(self, base_dir_id = None):
+        query = '''SELECT min(fileinfo.creation_date) as min_creation_date,
+                   max(fileinfo.creation_date) as max_creation_date
+                   FROM lta.fileinfo
+                   LIMIT 1'''
+        args = None
 
-            if base_directory_id:
-                query += '''\njoin directory_closure dc on dc.descendant_id = fileinfo.directory_id
-                where ancestor_id = ?'''
-                args.append(base_directory_id)
+        if base_dir_id:
+            query += '''\njoin lta.directory_closure dc on dc.descendant_id = lta.fileinfo.dir_id
+            where ancestor_id = %s'''
+            args = [base_dir_id]
 
-            result = conn.execute(query, args).fetchone()
+        result = self.executeQuery(query, args, fetch=FETCH_ONE)
 
-            if result[0]:
-                format = '%Y-%m-%d %H:%M:%S %Z'
-                return (datetime.datetime.strptime(result[0]+' UTC', format),
-                        datetime.datetime.strptime(result[1]+' UTC', format))
+        if result:
+            return (result['min_creation_date'], result['max_creation_date'])
 
-            utcnow = datetime.datetime.utcnow()
-            return (utcnow, utcnow)
+        utcnow = datetime.datetime.utcnow()
+        return (utcnow, utcnow)
 
     def mostRecentVisitDate(self):
-        with sqlite3.connect(self.db_filename) as conn:
-            result = conn.execute('''
-                SELECT visit_date FROM scraper_last_directory_visit
-                order by visit_date desc
-                limit 1
-                ''').fetchone()
+        result = self.executeQuery('''
+            SELECT visit_date FROM scraper.last_directory_visit
+            order by visit_date desc
+            limit 1
+            ''', fetch=FETCH_ONE)
 
-            if result:
-                format = '%Y-%m-%d %H:%M:%S.%f %Z'
-                return datetime.datetime.strptime(result[0]+' UTC', format)
+        if result:
+            return result['visit_date']
 
-            return datetime.datetime(2011, 1, 1)
+        return datetime.datetime(2011, 1, 1)
 
     def numDirectoriesNotVisitedSince(self, timestamp):
-        with sqlite3.connect(self.db_filename) as conn:
-            result = conn.execute('''
-                SELECT count(directory_id) FROM scraper_last_directory_visit
-                WHERE visit_date < ?
-                ''', [timestamp]).fetchone()
+        result = self.executeQuery('''
+            SELECT count(dir_id) FROM scraper.last_directory_visit
+            WHERE visit_date < %s
+            ''', [timestamp], fetch=FETCH_ONE)
 
-            if result:
-                return result[0]
+        if result:
+            return result['count']
 
-            return 0
+        return 0
+
+    def siteQuotaUsages(self):
+        return self.executeQuery('''SELECT * FROM metainfo.site_quota_usage;''', fetch=FETCH_ALL)
+
+    def siteQuotaRootDirStats(self):
+        return self.executeQuery('''SELECT * FROM metainfo.site_quota_root_dir_stats;''', fetch=FETCH_ALL)
 
     def visitStats(self, before_timestamp = None):
         if not before_timestamp:
@@ -417,24 +549,32 @@ class LTAStorageDb:
         sites = self.sites()
         siteStats = {}
 
-        with sqlite3.connect(self.db_filename) as conn:
-
-            for site in sites:
-                site_id = site[0]
-                site_name = site[1]
-                siteStats[site_name] = {'id': site_id}
+        for site in sites:
+            site_id = site['id']
+            site_name = site['name']
+            siteStats[site_name] = {'site_id': site_id}
 
-                visits = conn.execute('''
-                    select *
-                    from site_scraper_last_directoy_visit
-                    where site_id = ?
-                    and last_visit < ?
-                    order by last_visit asc
-                    ''', [site_id, before_timestamp]).fetchall()
+            visits = self.executeQuery('''
+                select *
+                from scraper.site_scraper_last_directory_visit
+                where site_id = %s
+                and last_visit < %s
+                order by last_visit asc
+                ''', [site_id, before_timestamp], fetch=FETCH_ALL)
 
-                siteStats[site_name]['queue_length'] = len(visits)
-                if len(visits) > 0:
-                    siteStats[site_name]['least_recent_visited_dir_id'] = visits[0][2]
-                    siteStats[site_name]['least_recent_visit'] = visits[0][4]
+            siteStats[site_name]['queue_length'] = len(visits)
+            if len(visits) > 0:
+                siteStats[site_name]['least_recent_visited_dir_id'] = visits[0]['dir_id']
+                siteStats[site_name]['least_recent_visit'] = visits[0]['last_visit']
 
         return siteStats
+
+
+
+if __name__ == '__main__':
+    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                        level=logging.INFO)
+    dbcreds = dbcredentials.DBCredentials().get('LTASO')
+    with LTAStorageDb(dbcreds, True) as db:
+        print db.rootDirectoriesForSite(1)
+        print db.dir_id(1, 'rootDir_0')
diff --git a/LTA/ltastorageoverview/lib/webservice/templates/index.html b/LTA/ltastorageoverview/lib/webservice/templates/index.html
index 174bac4088b..0ee88d2638d 100644
--- a/LTA/ltastorageoverview/lib/webservice/templates/index.html
+++ b/LTA/ltastorageoverview/lib/webservice/templates/index.html
@@ -5,10 +5,21 @@
 <head>
     <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
     <title>{{title}}</title>
-
+    <meta http-equiv="refresh" content="300">
     <script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script>
     <script type="text/javascript" src="http://code.highcharts.com/highcharts.js"></script>
     <script type="text/javascript" src="http://code.highcharts.com/modules/exporting.js"></script>
+    <style>
+        table, th, td {
+            border: 1px solid black;
+            border-collapse: collapse;
+            text-align: right;
+            font-size: 12px;
+        }
+        th, td {
+            padding: 8px;
+        }
+</style>
 </head>
 <body>
     <script type="text/javascript">
@@ -21,10 +32,11 @@
 
             $('#usage_piechart_container').highcharts({
                 chart: {
-                plotBackgroundColor: null,
-                plotBorderWidth: null,
-                plotShadow: false,
-                type: 'pie'
+                    animation: false,
+                    plotBackgroundColor: null,
+                    plotBorderWidth: null,
+                    plotShadow: false,
+                    type: 'pie'
                 },
                 title: {
                     text: '<span style="font-size: 22px">{{storagesitetitle|safe}}</span>',
@@ -38,6 +50,7 @@
                 },
                 plotOptions: {
                     pie: {
+                        animation: false,
                         allowPointSelect: true,
                         cursor: 'pointer',
                         dataLabels: {
@@ -56,12 +69,52 @@
                 colorByPoint: true,
                 data: {{storagesitedata|safe}}
                 }]
-                });
+            });
 
+            $('#free_space_piechart_container').highcharts({
+                chart: {
+                    animation: false,
+                    plotBackgroundColor: null,
+                    plotBorderWidth: null,
+                    plotShadow: false,
+                    type: 'pie'
+                },
+                title: {
+                    text: '<span style="font-size: 22px">{{storagesite_free_space_title|safe}}</span>',
+                },
+                subtitle: {
+                    text: '<span style="font-size: 20px">{{storagesite_free_space_subtitle|safe}}</span>',
+                },
+                legend: { itemStyle: { fontSize: '22px' } },
+                tooltip: {
+                    pointFormat: '{series.name}: <b>{point.percentage:.1f}%</b>'
+                },
+                plotOptions: {
+                    pie: {
+                        animation: false,
+                        allowPointSelect: true,
+                        cursor: 'pointer',
+                        dataLabels: {
+                        enabled: true,
+                        format: '<b>{point.name}</b>: {point.percentage:.1f} %',
+                        style: {
+                            color: (Highcharts.theme && Highcharts.theme.contrastTextColor) || 'black',
+                                    fontFamily: '"Lucida Grande", "Lucida Sans Unicode", Verdana, Arial, Helvetica, sans-serif', // default font
+                                    fontSize: '16px'
+                            }
+                        }
+                    }
+                },
+                series: [{
+                name: "StorageSiteUsage",
+                colorByPoint: true,
+                data: {{storagesite_free_space|safe}}
+                }]
+            });
 
         $(function () {
             $('#usage_trend_container').highcharts({
-                chart: { type: 'area'},
+                chart: { type: 'area', animation: false},
                 title: {
                     text: '<span style="font-size: 22px">LTA Storage Site Usage Trend</span>',
                 },
@@ -96,6 +149,7 @@
                 },
                 plotOptions: {
                     area: {
+                        animation: false,
                         stacking: 'normal',
                         lineColor: '#666666',
                         lineWidth: 1,
@@ -112,13 +166,13 @@
 
         $(function () {
             $('#usage_deltas_container').highcharts({
-                chart: { type: 'column'},
+                chart: { type: 'column', animation: false},
                 title: {
-                    text: '<span style="font-size: 22px">LTA Storage Site Deltas Per Month</span>',
+                    text: '<span style="font-size: 22px">LTA Storage Site Deltas</span>',
                     },
                 xAxis: {
                     type: 'datetime',
-                    labels: { style: { fontSize: '22px'} },
+                    labels: { style: { fontSize: '22px'}, align: 'right' }
                 },
                 yAxis: {
                     title: { text: '<span style="font-size: 22px">TB</span>'},
@@ -145,6 +199,7 @@
                 },
                 plotOptions: {
                     column: {
+                        animation: false,
                         stacking:'normal',
                         pointPadding: 0.01,
                         groupPadding: 0.0,
@@ -154,13 +209,72 @@
                 series: {{deltas_per_month_series|safe}}
             });
         });
+        });
 
+        $(function () {
+            $('#quotas_chart_container').highcharts({
+                chart: { type: 'column',
+                         animation: false },
+                title: {
+                    text: '<span style="font-size: 22px">LTA Storage Site Tape Quota</span>',
+                },
+                xAxis: {
+                    type: 'datetime',
+                    labels: { style: { fontSize: '22px'}, align: 'left' },
+                },
+                yAxis: {
+                    title: { text: '<span style="font-size: 22px">PB</span>'},
+                    labels: {
+                        style: { fontSize: '22px'},
+                        formatter: function () { return this.value / 1e15;}},
+                },
+                legend: { itemStyle: { fontSize: '14px' } },
+                tooltip: {
+                    formatter: function () {
+                        var s = '<b>' + Highcharts.dateFormat('%Y/%m/%d', this.x) + '</b>';
+
+                        $.each(this.points, function () {
+                            s += '<br/><b>' + this.series.name + '</b>: ' +
+                                Highcharts.numberFormat(this.y / 1e15, 2, '.') + ' PB';
+                        });
 
+                        return s;
+                    },
+                    shared: true
+                },
+                style: {
+                color: (Highcharts.theme && Highcharts.theme.contrastTextColor) || 'black',
+                        fontFamily: '"Lucida Grande", "Lucida Sans Unicode", Verdana, Arial, Helvetica, sans-serif', // default font
+                        fontSize: '22px'
+                },
+                plotOptions: {
+                    column: {
+                        animation: false,
+                        stacking:'normal',
+                        pointPadding: 0.01,
+                        groupPadding: 0.05,
+                        pointPlacement: -0.45
+                      }
+                },
+                series: {{quota_series|safe}}
+            });
         });
+
         </script>
     <div id="usage_trend_container" style="min-width: 310px; min-height: 600px; width: 95%; height: 100%; margin: 24px; margin-left: auto; margin-right: auto; "></div>
     <div id="usage_deltas_container" style="min-width: 310px; min-height: 600px; width: 95%; height: 100%; margin: 24px; margin-left: auto; margin-right: auto; "></div>
-    <div id="usage_piechart_container" style="min-width: 310px; min-height: 600px; width: 80%; height: 100%; margin: 24px; margin-left: auto; margin-right: auto; "></div>
+    <div style="overflow: hidden; ">
+        <div id="usage_piechart_container" style="float: left; min-width: 310px; min-height: 600px; width: 48%; height: 100%; margin: 24px; margin-left: auto; "></div>
+        <div id="free_space_piechart_container" style="float: left; min-width: 310px; min-height: 600px; width: 48%; height: 100%; margin: 24px; margin-right: auto; "></div>
+    </div>
+    <div style="overflow: hidden; ">
+        <div id="quotas_chart_container" style="float: left; min-width: 310px; min-height: 600px; width: 48%; height: 100%; margin: 24px; margin-left: auto; "></div>
+        <div style="float: left; min-width: 310px; min-height: 600px; width: 48%; height: 100%; margin: 24px; margin-right: auto; ">
+            <p>Latest quota and usages per site and tape quotable dirs</p>
+            {{site_tape_usages|safe}}
+            <p>Please note that Juelich provides us quota- and usage numbers which are 700TB lower than the actual number of bytes on tape, due to a technical issue at their site. The numbers presented here are the actual number of bytes on tape.</p>
+        </div>
+    </div>
     Data last gathered at {{data_gathered_timestamp}}.
 </body>
 </html>
diff --git a/LTA/ltastorageoverview/lib/webservice/webservice.py b/LTA/ltastorageoverview/lib/webservice/webservice.py
index 7acbf8f8269..1ac8f3f1e21 100755
--- a/LTA/ltastorageoverview/lib/webservice/webservice.py
+++ b/LTA/ltastorageoverview/lib/webservice/webservice.py
@@ -27,15 +27,25 @@ import sys
 import os
 import os.path
 from datetime import datetime
+import logging
 from flask import Flask
 from flask import render_template
 from flask import json
-from ltastorageoverview import store
+from lofar.lta.ltastorageoverview import store
 from lofar.common.util import humanreadablesize
 from lofar.common.datetimeutils import monthRanges
 
-app = Flask('LTA storage overview')
-app.config.root_path = os.path.dirname(__file__)
+logger = logging.getLogger(__name__)
+
+__root_path = os.path.dirname(os.path.realpath(__file__))
+
+'''The flask webservice app'''
+app = Flask('LTA storage overview',
+            instance_path=__root_path,
+            template_folder=os.path.join(__root_path, 'templates'),
+            static_folder=os.path.join(__root_path, 'static'),
+            instance_relative_config=True)
+
 db = None
 
 @app.route('/')
@@ -44,23 +54,36 @@ def index():
     # TODO: serve html first, and let client request data via ajax
     usages = {}
 
+    colors = {'sara': {'used': '#90ed7d', 'free': '#c5f6bc'},
+              'juelich': {'used': '#494950', 'free': '#a1a1aa'},
+              'poznan': {'used': '#7cb5ec', 'free': '#bcdaf5'}}
+
     sites = db.sites()
-    sites2 = [x for x in sites if x[1] != 'nikhef']
-    sites = [sites2[0], sites2[2], sites2[1]]
+    sitesDict = { s['name']:s for s in sites }
+    sites = [sitesDict[sitename] for sitename in ['poznan', 'juelich', 'sara'] if sitename in sitesDict]
 
-    total = 0.0
-    numFiles = 0L
+    total_lta_size = 0.0
+    total_lta_num_files = 0L
     for site in sites:
-        site_usage = float(db.totalFileSizeInSite(site[0]))
-        usages[site[1]] = site_usage
-        total += site_usage
-        numFiles += db.numFilesInSite(site[0])
-
-    storagesitedata='[' + ', '.join(['''{name: "%s %s", y: %.1f}''' % (site[1], humanreadablesize(usages[site[1]]), 100.0*usages[site[1]]/total) for site in sites]) + ']'
+        totals = db.totalFileSizeAndNumFilesInSite(site['id'])
+        total_lta_size += totals['tree_total_file_size']
+        total_lta_num_files += totals['tree_num_files']
+        usages[site['name']] = totals['tree_total_file_size']
+
+    if total_lta_size > 0:
+        storagesitedata='[' + ', '.join(['''{name: "%s %s", color:'%s', y: %.2f}''' % (site['name'], humanreadablesize(usages[site['name']]),
+                                                                                       colors[site['name']]['used'],
+                                                                                       100.0*usages[site['name']]/total_lta_size) for site in sites]) + ']'
+    else:
+        storagesitedata ='[]'
 
     min_date, max_date = db.datetimeRangeOfFilesInTree()
-    min_date = datetime(2012, 1, 1)
-    month_ranges = monthRanges(min_date, max_date)
+    if min_date is None:
+        min_date = datetime(2012, 1, 1)
+    if max_date is None:
+        max_date = datetime.utcnow()
+    min_date = max(datetime(2012, 1, 1), min_date)
+    month_ranges = monthRanges(min_date, max_date, 3)
 
     # convert end-of-month timestamps to milliseconds since epoch
     epoch = datetime.utcfromtimestamp(0)
@@ -69,54 +92,109 @@ def index():
     usage_per_month_series='['
     deltas_per_month_series='['
     for site in sites:
-        cumulatives = [db.totalFileSizeInSite(site[0], to_date=mr[1]) for mr in month_ranges]
+        deltas_per_month = [db.totalFileSizeInSite(site['id'], from_date=mr[0], to_date=mr[1]) for mr in month_ranges]
+        data = ', '.join(['[%s, %s]' % (x[0], str(x[1])) for x in zip(datestamps, deltas_per_month)])
+        deltas_per_month_series += '''{name: '%s', color:'%s', data: [%s]},\n''' % (site['name'], colors[site['name']]['used'], data)
 
-        data = ', '.join(['[%s, %s]' % (x[0], str(x[1])) for x in zip(datestamps, cumulatives)])
-        usage_per_month_series += '''{name: '%s', data: [%s]},\n''' % (site[1], data)
+        cumulatives = [deltas_per_month[0]]
+        for delta in deltas_per_month[1:]:
+            cumulative = cumulatives[-1] + delta
+            cumulatives.append(cumulative)
 
-        deltas = [0]
-        for i in range(1, len(cumulatives)):
-            delta = cumulatives[i] - cumulatives[i-1]
-            deltas.append(delta)
+        data = ', '.join(['[%s, %s]' % (x[0], str(x[1])) for x in zip(datestamps, cumulatives)])
+        usage_per_month_series += '''{name: '%s', color:'%s', data: [%s]},\n''' % (site['name'], colors[site['name']]['used'], data)
 
-        data = ', '.join(['[%s, %s]' % (x[0], str(x[1])) for x in zip(datestamps, deltas)])
-        deltas_per_month_series += '''{name: '%s', data: [%s]},\n''' % (site[1], data)
 
 
     usage_per_month_series+=']'
     deltas_per_month_series+=']'
 
+    quota_dir_stats = db.siteQuotaRootDirStats()
+    quota_dir_stats = sorted(quota_dir_stats, reverse=True, key=lambda x: x['tree_total_file_size'])
+
+    site_usages_per_site = {}
+    latest_usages_per_site = {}
+    for site_usage in db.siteQuotaUsages():
+        site_name = site_usage['site_name']
+        if site_name not in site_usages_per_site:
+            site_usages_per_site[site_name] = []
+        site_usages_per_site[site_name].append(site_usage)
+        if site_name not in latest_usages_per_site:
+            latest_usages_per_site[site_name] = site_usage
+        if site_usage['valid_until_date'] > latest_usages_per_site[site_name]['valid_until_date']:
+            latest_usages_per_site[site_name] = site_usage
+
+
+    quota_series='['
+    storagesite_free_space='['
+    site_tape_usages_table = '<table>\n'
+    site_tape_usages_table += '<tr><th style="text-align: left;">site</th><th style="text-align: left;">directory</th><th>total #files</th><th>total file size</th><th>quota</th><th>free</th><th>expiration</th></tr>\n'
+    total_lta_free_space = sum(u['space_left'] for u in latest_usages_per_site.values() if u['space_left'] > 0)
+    total_lta_quota = sum(u['quota'] for u in latest_usages_per_site.values())
+
+    for site_name in ['sara','juelich', 'poznan']:
+        if site_name in latest_usages_per_site:
+            latest_usage = latest_usages_per_site[site_name]
+            site_tape_usages_table += '<tr style="font-weight: bold;"><td style="text-align: left;">%s</td><td style="text-align: left;">%s</td><td>%s</td><td>%s</td><td>%s</td><td %s>%s</td><td>%s</td></tr>\n' % (latest_usage['site_name'], '', latest_usage['num_files'], humanreadablesize(latest_usage['total_file_size']), humanreadablesize(latest_usage['quota']), 'style="color: red;"' if latest_usage['space_left'] < 0 else '', humanreadablesize(latest_usage['space_left']), latest_usage['valid_until_date'])
+
+            for qds in quota_dir_stats:
+                if qds['site_name'] == site_name:
+                    site_tape_usages_table += '<tr><td style="text-align: left;">%s</td><td style="text-align: left;">%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>\n' % (
+                        '', qds['dir_name'], qds['tree_num_files'], humanreadablesize(qds['tree_total_file_size']), '', '', '')
+
+            storagesite_free_space += '''{name: "%s %s", color:'%s', y: %.2f}, ''' % (site_name,
+                                                                                      humanreadablesize(latest_usage['space_left']),
+                                                                                      colors[site_name]['free'],
+                                                                                      max(0, 100.0 * latest_usage['space_left']) / total_lta_free_space)
+
+
+    site_tape_usages_table += '</table>\n'
+
+    for site_name in ['poznan','juelich', 'sara']:
+        if site_name in site_usages_per_site:
+            site_usages_for_site = site_usages_per_site[site_name]
+            site_usages_for_site = sorted(site_usages_for_site, key=lambda x: x['valid_until_date'])
+            data = ','.join('[%d, %s]' % ((su['valid_until_date'] - epoch).total_seconds()*1000, su['space_left']) for su in site_usages_for_site)
+            quota_series+='''{ name:'%s_free', stack:'%s', color:'%s', data:[%s] },''' % (site_name,site_name,colors[site_name]['free'],data)
+            data = ','.join('[%d, %s]' % ((su['valid_until_date'] - epoch).total_seconds()*1000, su['total_file_size']) for su in site_usages_for_site)
+            quota_series+='''{ name:'%s_used', stack:'%s', color:'%s', data:[%s] },''' % (site_name,site_name,colors[site_name]['used'], data)
+
+
+    quota_series+=']'
+    storagesite_free_space+=']'
+
     return render_template('index.html',
                            title='LTA storage overview',
                            storagesitetitle='LTA Storage Site Usage',
-                           storagesitesubtitle='Total: %s #dataproducts: %s' % (humanreadablesize(total, 'B', 1000), humanreadablesize(numFiles, '', 1000)),
+                           storagesitesubtitle='Total: %s #dataproducts: %s' % (humanreadablesize(total_lta_size, 'B', 1000), humanreadablesize(total_lta_num_files, '', 1000)),
+                           storagesite_free_space_title='LTA Storage Site Free Space',
+                           storagesite_free_space_subtitle='Total free space: %s Current total quota: %s' % (humanreadablesize(total_lta_free_space, 'B', 1000),humanreadablesize(total_lta_quota, 'B', 1000)),
                            storagesitedata=storagesitedata,
+                           storagesite_free_space=storagesite_free_space,
                            usage_per_month_series=usage_per_month_series,
                            deltas_per_month_series=deltas_per_month_series,
+                           quota_series=quota_series,
+                           site_tape_usages=site_tape_usages_table,
                            data_gathered_timestamp=db.mostRecentVisitDate().strftime('%Y/%m/%d %H:%M:%S'))
 
 @app.route('/rest/sites/')
 def get_sites():
-    sites = {'sites': [{'id': x[0], 'name': x[1], 'url': x[2]} for x in db.sites()]}
-    return json.jsonify(sites)
+    return json.jsonify({'sites': db.sites()})
 
 @app.route('/rest/sites/<int:site_id>')
 def get_site(site_id):
-    site = db.site(site_id)
-    site_dict = {'id': site[0], 'name': site[1], 'url': site[2]}
-    return json.jsonify(site_dict)
+    return json.jsonify(db.site(site_id))
 
 @app.route('/rest/sites/usages')
 def get_sites_usages():
-    sites = {'sites_usages': [{'id': x[0],
-                               'name': x[1]} for x in db.sites()]}
+    sites = {'sites_usages': db.sites()}
 
     for site in sites['sites_usages']:
         rootDirs = db.rootDirectoriesForSite(site['id'])
 
         site_usage = 0L
         for rootDir in rootDirs:
-            usage = long(db.totalFileSizeInTree(rootDir[0]))
+            usage = long(db.totalFileSizeInTree(rootDir['dir_id']))
             site_usage += usage
         site['usage'] = site_usage
         site['usage_hr'] = humanreadablesize(site_usage)
@@ -125,34 +203,43 @@ def get_sites_usages():
 
 @app.route('/rest/rootdirectories/',)
 def get_rootDirectories():
-    rootDirs = {'rootDirectories': [{'id': x[0], 'name': x[1], 'site_id': x[2], 'site_name': x[3]} for x in db.rootDirectories()]}
+    rootDirs = {'rootDirectories': db.rootDirectories()}
     return json.jsonify(rootDirs)
 
 @app.route('/rest/directory/<int:dir_id>/subdirectories/',)
 def get_directoryTree(dir_id):
-    subDirsList = {'subdirectories': [{'id': x[0], 'name': x[1], 'parent_dir_id': x[2]} for x in db.subDirectories(dir_id, 1, False)]}
+    subDirsList = {'subdirectories': db.subDirectories(dir_id, 1, False)}
     return json.jsonify(subDirsList)
 
 @app.route('/rest/directory/<int:dir_id>/files')
 def get_filesInDirectory(dir_id):
-    files = {'files': [{'id': x[0], 'name': x[1], 'size': x[2], 'creation_date': x[3]} for x in db.filesInDirectory(dir_id)]}
+    files = {'files': db.filesInDirectory(dir_id)}
     return json.jsonify(files)
 
 
-def main(argv):
-    dbpath = argv[0] if argv else 'ltastorageoverview.sqlite'
+def main():
+    from optparse import OptionParser
+    from lofar.common import dbcredentials
+
+    # Check the invocation arguments
+    parser = OptionParser("%prog [options]", description='runs the lta scraper and stores results in the speficied database.')
+    parser.add_option('-V', '--verbose', dest='verbose', action='store_true', help='verbose logging')
+    parser.add_option_group(dbcredentials.options_group(parser))
+    parser.set_defaults(dbcredentials="LTASO")
+    (options, args) = parser.parse_args()
+
+    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                        level=logging.DEBUG if options.verbose else logging.INFO)
 
-    if not os.path.exists(dbpath):
-        print 'No database file found at \'%s\'' % (dbpath,)
-        sys.exit(-1)
+    dbcreds = dbcredentials.parse_options(options)
 
-    print 'Using database at \'%s\'' % (dbpath,)
+    logger.info("Using dbcreds: %s" % dbcreds.stringWithHiddenPassword())
 
     global db
-    db = store.LTAStorageDb(dbpath)
+    db = store.LTAStorageDb(dbcreds, options.verbose)
 
-    app.run(debug=True,host='0.0.0.0')
+    app.run(debug=False,host='0.0.0.0',port=9632)
 
 if __name__ == '__main__':
-    main(sys.argv[1:])
+    main()
 
diff --git a/LTA/ltastorageoverview/ltastorageoverview_build.sh b/LTA/ltastorageoverview/ltastorageoverview_build.sh
deleted file mode 100755
index dce9ca20c26..00000000000
--- a/LTA/ltastorageoverview/ltastorageoverview_build.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash 
-# $Id: ltastorageoverview_build.sh 32113 2015-08-03 10:07:57Z schaap $
-
-if [ "$#" -ne 1 ]; then
-    echo "Usage: ./ltastorageoverview_build.sh <tag>"
-    echo "where tag is a name or version number which is added to the tarballs."
-    echo "This script creates two build flavours (local_dev/lexar) in source_root_dir/build and builds ltastorageoverview"
-    echo "Then it performs a local install (in the each specific build dir) and creates a deployable tarball"
-    echo "Final result is a tarball in source_root_dir/build which can be copied to the ingest servers"
-    exit 1
-fi
-
-#get path of this build script and determine source root from there
-REL_PATH="`dirname \"$0\"`"
-ABS_PATH="`( cd \"$REL_PATH\" && pwd )`"
-SOURCE_ROOT="$ABS_PATH/../.."
-
-echo "Using '$SOURCE_ROOT' as source route"
-
-BUILD_TAG="$1"
-echo "Using Build tag $BUILD_TAG"
-
-LOCAL_DEV_BUILD_DIR=$SOURCE_ROOT/build/local_dev/gnu_debug
-LOCAL_DEV_INSTALL_DIR=$LOCAL_DEV_BUILD_DIR/local_install
-
-mkdir -p $LOCAL_DEV_BUILD_DIR
-
-cd $LOCAL_DEV_BUILD_DIR && cmake -DBUILD_PACKAGES=ltastorageoverview -DCMAKE_INSTALL_PREFIX=$LOCAL_DEV_INSTALL_DIR/ltastorageoverview__$BUILD_TAG $SOURCE_ROOT
-cd $LOCAL_DEV_BUILD_DIR && make && make local_dev && rm -rf $LOCAL_DEV_INSTALL_DIR && make install
-
-
-LEXAR_BUILD_DIR=$SOURCE_ROOT/build/lexar/gnu_debug
-mkdir -p $LEXAR_BUILD_DIR
-
-cd $LEXAR_BUILD_DIR && cmake -DBUILD_PACKAGES=ltastorageoverview -DCMAKE_INSTALL_PREFIX=/globalhome/ingest/ltastorageoverview_$BUILD_TAG $SOURCE_ROOT
-cd $LEXAR_BUILD_DIR && make && rm -rf ./local_install && make DESTDIR=./local_install install
-cd $LEXAR_BUILD_DIR/local_install/globalhome/ingest && tar cvzf $SOURCE_ROOT/build/ltastorageoverview_"$BUILD_TAG"_lexar.tgz ltastorageoverview_$BUILD_TAG
diff --git a/LTA/ltastorageoverview/test/CMakeLists.txt b/LTA/ltastorageoverview/test/CMakeLists.txt
index 4eef4029638..bb3b942d550 100644
--- a/LTA/ltastorageoverview/test/CMakeLists.txt
+++ b/LTA/ltastorageoverview/test/CMakeLists.txt
@@ -2,10 +2,8 @@
 include(LofarCTest)
 
 lofar_add_test(test_store)
+lofar_add_test(test_scraper)
 lofar_add_test(test_lso_webservice)
+lofar_add_test(test_ingesteventhandler)
 
-python_install(
-    test_store.py
-    test_lso_webservice.py
-    DESTINATION ltastorageoverview/test)
-
+lofar_add_test(integration_test_store)
diff --git a/LTA/ltastorageoverview/test/common_test_ltastoragedb.py b/LTA/ltastorageoverview/test/common_test_ltastoragedb.py
new file mode 100755
index 00000000000..4c216f44bd2
--- /dev/null
+++ b/LTA/ltastorageoverview/test/common_test_ltastoragedb.py
@@ -0,0 +1,73 @@
+# Copyright (C) 2012-2015    ASTRON (Netherlands Institute for Radio Astronomy)
+# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+#
+# This file is part of the LOFAR software suite.
+# The LOFAR software suite is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The LOFAR software suite is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+
+# $Id$
+
+import unittest
+import logging
+import os, os.path
+import psycopg2
+import lofar.common.dbcredentials as dbc
+
+try:
+    import testing.postgresql
+except ImportError as e:
+    print str(e)
+    print 'Please install python package testing.postgresql: sudo pip install testing.postgresql'
+    exit(3)  # special lofar test exit code: skipped test
+
+logger = logging.getLogger(__name__)
+
+class CommonLTAStorageDbTest(unittest.TestCase):
+    def setUp(self):
+        logger.info('setting up test LTASO database server...')
+
+        # create a test db
+        logger.info('  creating test postgres server')
+        self.test_psql = testing.postgresql.Postgresql()
+        dsn = self.test_psql.dsn()
+        logger.info('  created test postgres server, dsn=%s', dsn)
+
+        self.dbcreds = dbc.Credentials()
+        self.dbcreds.user = 'test_user'
+        self.dbcreds.password = 'test_password'
+
+        with psycopg2.connect(**dsn) as conn:
+            cursor = conn.cursor()
+            #use same user/pass as stored in local dbcreds
+            query = "CREATE USER %s WITH SUPERUSER PASSWORD '%s'" % (self.dbcreds.user, self.dbcreds.password)
+            cursor.execute(query)
+
+            create_script_path = os.path.normpath(os.path.join(os.environ['LOFARROOT'], 'share', 'ltaso', 'create_db_ltastorageoverview.sql'))
+            logger.info('  running ltaso create script create_script=%s', create_script_path)
+            with open(create_script_path, 'r') as script:
+                cursor.execute(script.read())
+            logger.info('  completed ltaso create script')
+
+        # copy the test postgres server settings into dbcreds
+        # we can use these dbcreds in each test method to connect to the testing ltaso database
+        self.dbcreds.host = dsn['host']
+        self.dbcreds.database = dsn['database']
+        self.dbcreds.port = dsn['port']
+
+        logger.info('finished setting up test LTASO database')
+
+    def tearDown(self):
+        logger.info('removing test LTASO database server...')
+        self.test_psql.stop()
+        logger.info('removed test LTASO database server')
+
diff --git a/LTA/ltastorageoverview/test/db_performance_test.py b/LTA/ltastorageoverview/test/db_performance_test.py
new file mode 100755
index 00000000000..f224b3ee9c9
--- /dev/null
+++ b/LTA/ltastorageoverview/test/db_performance_test.py
@@ -0,0 +1,108 @@
+#!/usr/bin/python
+
+# Copyright (C) 2012-2015    ASTRON (Netherlands Institute for Radio Astronomy)
+# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+#
+# This file is part of the LOFAR software suite.
+# The LOFAR software suite is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The LOFAR software suite is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+
+import logging
+from datetime import datetime, timedelta
+import os
+
+from lofar.lta.ltastorageoverview import store
+from lofar.common.datetimeutils import totalSeconds
+
+logger = logging.getLogger()
+
+def main():
+    from optparse import OptionParser
+    from lofar.common import dbcredentials
+    import testing.postgresql
+    import psycopg2
+
+    # Check the invocation arguments
+    parser = OptionParser("%prog [options]", description='execute a performance test by inserting many files on an empty test database.')
+    parser.add_option('-V', '--verbose', dest='verbose', action='store_true', help='verbose logging')
+    (options, args) = parser.parse_args()
+
+    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                        level=logging.DEBUG if options.verbose else logging.INFO)
+
+
+    # create a test webservice.db
+    logger.info('  creating test postgres server')
+    with testing.postgresql.Postgresql() as test_psql:
+        dsn = test_psql.dsn()
+        logger.info('  created test postgres server, dsn=%s', dsn)
+
+        dbcreds = dbcredentials.Credentials()
+        dbcreds.user = 'test_user'
+        dbcreds.password = 'test_password'
+
+        with psycopg2.connect(**dsn) as conn:
+            cursor = conn.cursor()
+            #use same user/pass as stored in local webservice.dbcreds
+            query = "CREATE USER %s WITH SUPERUSER PASSWORD '%s'" % (dbcreds.user, dbcreds.password)
+            cursor.execute(query)
+
+            create_script_path = os.path.normpath(os.path.join(os.environ['LOFARROOT'], 'share', 'ltaso', 'create_db_ltastorageoverview.sql'))
+            logger.info('  running ltaso create script create_script=%s', create_script_path)
+            with open(create_script_path, 'r') as script:
+                cursor.execute(script.read())
+            logger.info('  completed ltaso create script')
+
+        # copy the test postgres server settings into webservice.dbcreds
+        # we can use these webservice.dbcreds in each test method to connect to the testing ltaso database
+        dbcreds.host = dsn['host']
+        dbcreds.database = dsn['database']
+        dbcreds.port = dsn['port']
+
+        logger.info('finished setting up test LTASO database')
+
+        base_date = datetime.utcnow()
+
+        db = store.LTAStorageDb(dbcreds, options.verbose)
+
+        db.insertSiteIfNotExists('sara', 'srm://srm.siteA.nl:8444')
+        rootdir_id = db.insertRootDirectory('sara', '/pnfs/grid.siteA.nl/data/lofar/ops')
+        projects_dir_id = db.insertSubDirectory('/pnfs/grid.siteA.nl/data/lofar/ops/projects', rootdir_id)
+
+        total_num_files_inserted = 0
+
+        with open('db_perf.csv', 'w') as file:
+            for cycle_nr in range(1, 10):
+                for project_nr in range(1, 10):
+                    # project_name = 'lc%d_%03d/%d' % (cycle_nr, project_nr, os.getpid())
+                    project_name = 'lc%d_%03d' % (cycle_nr, project_nr)
+                    projectdir_id = db.insertSubDirectory('/pnfs/grid.siteA.nl/data/lofar/ops/projects/%s' % (project_name,), projects_dir_id)
+
+                    obs_base_id = cycle_nr*100000+project_nr*1000
+                    for obs_nr, obsId in enumerate(range(obs_base_id, obs_base_id+20)):
+                        obsName = 'L%s' % obsId
+
+                        obsdir_id = db.insertSubDirectory('/pnfs/grid.siteA.nl/data/lofar/ops/projects/%s/%s' % (project_name, obsName), projectdir_id)
+
+                        fileinfos = [('%s_SB%3d' % (obsName, sbNr), 1000+sbNr+project_nr*cycle_nr, base_date + timedelta(days=10*cycle_nr+project_nr, minutes=obs_nr, seconds=sbNr), obsdir_id) for sbNr in range(0, 2)]
+                        now = datetime.utcnow()
+                        file_ids = db.insertFileInfos(fileinfos)
+                        total_num_files_inserted += len(file_ids)
+                        elapsed = totalSeconds(datetime.utcnow() - now)
+                        line = '%s,%s' % (total_num_files_inserted, elapsed)
+                        print line
+                        file.write(line + '\n')
+
+if __name__ == "__main__":
+    main()
+
diff --git a/LTA/ltastorageoverview/test/integration_test_store.py b/LTA/ltastorageoverview/test/integration_test_store.py
new file mode 100755
index 00000000000..b141011a2d2
--- /dev/null
+++ b/LTA/ltastorageoverview/test/integration_test_store.py
@@ -0,0 +1,204 @@
+#!/usr/bin/python
+
+# Copyright (C) 2012-2015    ASTRON (Netherlands Institute for Radio Astronomy)
+# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+#
+# This file is part of the LOFAR software suite.
+# The LOFAR software suite is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The LOFAR software suite is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+
+# $Id$
+
+import logging
+from datetime import datetime, timedelta
+import time
+from common_test_ltastoragedb import *
+from lofar.lta.ltastorageoverview import store
+
+logger = logging.getLogger(__name__)
+
+class IntegrationTestLTAStorageDb(CommonLTAStorageDbTest):
+    """
+    Bigger tests for the lofar.lta.ltastorageoverview.store.LTAStorageDb
+    which test more complex behaviour with bigger amounts of data.
+    """
+
+    def testDirectoryTreesAndStats(self):
+        """Quite a big test, almost an integration test.
+        It consists of two stages/phases:
+        1) inserts a tree of directories and files in various sites and projects,
+        2) test if the automatically computed tree- and dirstats are correct.
+        """
+
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            base_time = datetime.utcnow()
+            base_time -= timedelta(seconds=base_time.second, microseconds=base_time.microsecond)
+
+            ###########################################################
+            # STAGE 1: insertion and check phase.
+            # insert the sites, directories, and files
+            # and check the dir- and tree stats directly after insertion
+            ###########################################################
+            NUM_SITES = 2
+            NUM_PROJECTS = 3
+            NUM_PROJECT_SUBDIRS = 4
+            NUM_SUB_SUBDIRS = 5
+
+            # helper dict to store all subdir id's for each dir.
+            dir2subdir = {}
+
+            for site_nr in range(NUM_SITES):
+                site_name = 'site%d' % site_nr
+                site_url = 'srm://%s.org' % site_name
+                db.insertSiteIfNotExists(site_name, site_url)
+
+                for project_nr in range(NUM_PROJECTS):
+                    rootDir_id = db.insertRootDirectory(site_name, 'rootDir_%d' % project_nr)
+                    dir2subdir[rootDir_id] = []
+
+                    for subdir_nr in range(NUM_PROJECT_SUBDIRS):
+                        subDir_id = db.insertSubDirectory('subDir_%d' % subdir_nr, rootDir_id)
+                        dir2subdir[subDir_id] = []
+                        dir2subdir[rootDir_id].append(subDir_id)
+                        for file_nr in range(project_nr*subdir_nr):
+                            db.insertFileInfo('file_%d' % file_nr, 271*(file_nr+1), base_time + timedelta(days=10*site_nr+project_nr, hours=subdir_nr, seconds=file_nr), subDir_id)
+
+                            dir_files = db.filesInDirectory(subDir_id)
+                            dir_stats = db.directoryTreeStats(subDir_id)
+
+                            self.assertEqual(sum(f['size'] for f in dir_files), dir_stats['dir_total_file_size'])
+                            self.assertEqual(len(dir_files), dir_stats['dir_num_files'])
+                            if dir_files:
+                                self.assertEqual(min(f['size'] for f in dir_files), dir_stats['dir_min_file_size'])
+                                self.assertEqual(max(f['size'] for f in dir_files), dir_stats['dir_max_file_size'])
+                                self.assertEqual(min(f['creation_date'] for f in dir_files), dir_stats['dir_min_file_creation_date'])
+                                self.assertEqual(max(f['creation_date'] for f in dir_files), dir_stats['dir_max_file_creation_date'])
+
+                        for subsubdir_nr in range(NUM_SUB_SUBDIRS):
+                            subsubDir_id = db.insertSubDirectory('subsubDir_%d' % subsubdir_nr, subDir_id)
+                            dir2subdir[subsubDir_id] = []
+                            dir2subdir[subDir_id].append(subsubDir_id)
+                            for kk in range(project_nr*subdir_nr*subsubdir_nr):
+                                db.insertFileInfo('file_%d_%d' % (subdir_nr,kk), 314*(kk+1), base_time + timedelta(days=10*site_nr+project_nr, hours=10*subdir_nr+subsubdir_nr+2, seconds=kk), subsubDir_id)
+
+                                dir_files = db.filesInDirectory(subsubDir_id)
+                                dir_stats = db.directoryTreeStats(subsubDir_id)
+
+                                self.assertEqual(sum(f['size'] for f in dir_files), dir_stats['dir_total_file_size'])
+                                self.assertEqual(len(dir_files), dir_stats['dir_num_files'])
+                                if dir_files:
+                                    self.assertEqual(min(f['size'] for f in dir_files), dir_stats['dir_min_file_size'])
+                                    self.assertEqual(max(f['size'] for f in dir_files), dir_stats['dir_max_file_size'])
+                                    self.assertEqual(min(f['creation_date'] for f in dir_files), dir_stats['dir_min_file_creation_date'])
+                                    self.assertEqual(max(f['creation_date'] for f in dir_files), dir_stats['dir_max_file_creation_date'])
+
+                                    tree_totals = db.totalFileSizeAndNumFilesInTree(subDir_id, dir_stats['dir_min_file_creation_date'], dir_stats['dir_max_file_creation_date'])
+                                    self.assertEqual(tree_totals['tree_num_files'], dir_stats['dir_num_files'])
+                                    self.assertEqual(tree_totals['tree_total_file_size'], dir_stats['dir_total_file_size'])
+
+                        # test 1st level subdir again, and also check inclusion of 2nd level subdirs in tree stats
+                        dir_files = db.filesInDirectory(subDir_id)
+                        dir_stats = db.directoryTreeStats(subDir_id)
+                        # this dir only...
+                        self.assertEqual(sum(f['size'] for f in dir_files), dir_stats['dir_total_file_size'])
+                        self.assertEqual(len(dir_files), dir_stats['dir_num_files'])
+                        if dir_files:
+                            self.assertEqual(min(f['size'] for f in dir_files), dir_stats['dir_min_file_size'])
+                            self.assertEqual(max(f['size'] for f in dir_files), dir_stats['dir_max_file_size'])
+                            self.assertEqual(min(f['creation_date'] for f in dir_files), dir_stats['dir_min_file_creation_date'])
+                            self.assertEqual(max(f['creation_date'] for f in dir_files), dir_stats['dir_max_file_creation_date'])
+
+                        # including subdirs in tree...
+                        self.assertEqual(sum(f['file_size'] for f in db.filesInTree(subDir_id)), dir_stats['tree_total_file_size'])
+                        self.assertEqual(len(db.filesInTree(subDir_id)), dir_stats['tree_num_files'])
+
+            ####################################################################################
+            # STAGE 2: reporting phase.
+            # loop over the sites, directories, and files now that the database has been filled.
+            # and check the dir- and tree stats totals
+            ####################################################################################
+            for site in db.sites():
+                site_id = site['id']
+
+                rootDirs = db.rootDirectoriesForSite(site_id)
+                self.assertEquals(NUM_PROJECTS, len(rootDirs))
+
+                for root_dir_id in [x['root_dir_id'] for x in rootDirs]:
+                    subDirs = db.subDirectories(root_dir_id, 1, False)
+                    self.assertEquals(NUM_PROJECT_SUBDIRS, len(subDirs))
+
+                    for subDir in subDirs:
+                        subDir_parent_id = subDir['parent_dir_id']
+                        self.assertEquals(root_dir_id, subDir_parent_id)
+                        self.assertTrue(subDir['id'] in dir2subdir[root_dir_id])
+
+                        subsubDirs = db.subDirectories(subDir['id'], 1, False)
+                        self.assertEquals(NUM_SUB_SUBDIRS, len(subsubDirs))
+
+                        for subsubDir in subsubDirs:
+                            subsubDir_parent_id = subsubDir['parent_dir_id']
+                            self.assertEquals(subDir['id'], subsubDir_parent_id)
+                            self.assertTrue(subsubDir['id'] in dir2subdir[subDir['id']])
+
+                    # check various selects of files in the tree, for each file
+                    tree_files = sorted(db.filesInTree(root_dir_id), key=lambda f: f['file_creation_date'])
+                    for file in tree_files:
+                        # check if filesInTree return this one file when time delimited for this specific file_creation_date
+                        file_creation_date = file['file_creation_date']
+                        selected_tree_files = db.filesInTree(root_dir_id, file_creation_date, file_creation_date)
+                        self.assertEqual(1, len(selected_tree_files))
+                        self.assertEqual(file['file_creation_date'], selected_tree_files[0]['file_creation_date'])
+                        self.assertEqual(file['file_size'], selected_tree_files[0]['file_size'])
+
+                        # get the 'totals' for this root_dir, but select only this file by date.
+                        # should return 1 file.
+                        tree_totals = db.totalFileSizeAndNumFilesInTree(root_dir_id, file_creation_date, file_creation_date)
+                        self.assertEqual(1, tree_totals['tree_num_files'])
+                        self.assertEqual(file['file_size'], tree_totals['tree_total_file_size'])
+
+                    # check some ranges files/times
+                    for idx, file in enumerate(tree_files):
+                        file_creation_date = file['file_creation_date']
+
+                        #select any file >= file_creation_date
+                        expected_selected_tree_files = tree_files[idx:]
+                        selected_tree_files = db.filesInTree(root_dir_id, file_creation_date, None)
+                        self.assertEqual(len(expected_selected_tree_files), len(selected_tree_files))
+                        selected_tree_files_ids = set([f['file_id'] for f in selected_tree_files])
+                        for expected_file in expected_selected_tree_files:
+                            self.assertTrue(expected_file['file_id'] in selected_tree_files_ids)
+
+                        # and check the totals as well
+                        tree_totals = db.totalFileSizeAndNumFilesInTree(root_dir_id, file_creation_date, None)
+                        self.assertEqual(len(expected_selected_tree_files), tree_totals['tree_num_files'])
+                        self.assertEqual(sum(f['file_size'] for f in expected_selected_tree_files), tree_totals['tree_total_file_size'])
+
+                        #select any file <= file_creation_date
+                        expected_selected_tree_files = tree_files[:idx+1]
+                        selected_tree_files = db.filesInTree(root_dir_id, None, file_creation_date)
+                        self.assertEqual(len(expected_selected_tree_files), len(selected_tree_files))
+                        selected_tree_files_ids = set([f['file_id'] for f in selected_tree_files])
+                        for expected_file in expected_selected_tree_files:
+                            self.assertTrue(expected_file['file_id'] in selected_tree_files_ids)
+
+                        # and check the totals as well
+                        tree_totals = db.totalFileSizeAndNumFilesInTree(root_dir_id, None, file_creation_date)
+                        self.assertEqual(len(expected_selected_tree_files), tree_totals['tree_num_files'])
+                        self.assertEqual(sum(f['file_size'] for f in expected_selected_tree_files), tree_totals['tree_total_file_size'])
+
+# run tests if main
+if __name__ == '__main__':
+    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                        level=logging.INFO)
+
+    unittest.main()
diff --git a/LTA/ltastorageoverview/test/integration_test_store.run b/LTA/ltastorageoverview/test/integration_test_store.run
new file mode 100755
index 00000000000..ae46b39f467
--- /dev/null
+++ b/LTA/ltastorageoverview/test/integration_test_store.run
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+source python-coverage.sh
+python_coverage_test "ltas*" integration_test_store.py
diff --git a/LTA/ltastorageoverview/test/integration_test_store.sh b/LTA/ltastorageoverview/test/integration_test_store.sh
new file mode 100755
index 00000000000..de706b2bcf9
--- /dev/null
+++ b/LTA/ltastorageoverview/test/integration_test_store.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+./runctest.sh integration_test_store
diff --git a/LTA/ltastorageoverview/test/test_ingesteventhandler.py b/LTA/ltastorageoverview/test/test_ingesteventhandler.py
new file mode 100755
index 00000000000..39adb104ce2
--- /dev/null
+++ b/LTA/ltastorageoverview/test/test_ingesteventhandler.py
@@ -0,0 +1,303 @@
+#!/usr/bin/python
+
+# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy)
+# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+#
+# This file is part of the LOFAR software suite.
+# The LOFAR software suite is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The LOFAR software suite is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+
+# $Id$
+
+from datetime import datetime
+
+from common_test_ltastoragedb import *
+from lofar.lta.ltastorageoverview import store
+from lofar.lta.ltastorageoverview.ingesteventhandler import IngestEventHandler
+
+import logging
+logger = logging.getLogger(__name__)
+
+class TestIngestEventHandler(CommonLTAStorageDbTest):
+    def setUp(self):
+        # allow superclass to setup empty database
+        super(TestIngestEventHandler, self).setUp()
+
+        # fill empty database with simple sites and root dirs
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            db.insertSiteIfNotExists('siteA', 'srm://siteA.foo.bar:8443')
+            db.insertSiteIfNotExists('siteB', 'srm://siteB.foo.bar:8443')
+
+            db.insertRootDirectory('siteA', '/root_dir_1')
+            db.insertRootDirectory('siteA', '/root_dir_2')
+            db.insertRootDirectory('siteA', '/long/path/to/root_dir_3')
+            db.insertRootDirectory('siteB', '/root_dir_1')
+
+        self._markAllDirectoriesRecentlyVisited()
+
+    def _markAllDirectoriesRecentlyVisited(self):
+        """pretend that all dirs were recently visited
+        """
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            db.executeQuery('''update scraper.last_directory_visit
+                               set visit_date=%s;''', (datetime.utcnow(), ))
+            db.commit()
+
+    def test_01_schedule_srmurl_for_visit_unknown_site(self):
+        """ try to schedule some unknown site's surl. Should raise.
+        """
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            handler = IngestEventHandler(dbcreds=self.dbcreds)
+
+            with self.assertRaises(LookupError) as context:
+                surl = 'srm://foo.bar:1234/fdjsalfja5h43535h3oiu/5u905u3f'
+                handler._schedule_srmurl_for_visit(surl)
+            self.assertTrue('Could not find site' in context.exception.message)
+
+    def test_02_mark_directory_for_a_visit(self):
+        """ Test core method _mark_directory_for_a_visit for all known root dirs.
+        Should set the last visit time for each dir way in the past.
+        """
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            handler = IngestEventHandler(dbcreds=self.dbcreds)
+            now = datetime.utcnow()
+
+            for site in db.sites():
+                for root_dir in db.rootDirectoriesForSite(site['id']):
+                    dir_id = root_dir['root_dir_id']
+                    # make sure the dir's last visit time is recent
+                    db.updateDirectoryLastVisitTime(dir_id, now)
+                    timestamp_before_mark = db.directoryLastVisitTime(dir_id)
+                    self.assertEqual(now, timestamp_before_mark)
+
+                    # let the handler mark the dir for a next visit...
+                    handler._mark_directory_for_a_visit(dir_id)
+
+                    # by marking the dir for a next visit, the dir's last visit time is set way in the past.
+                    timestamp_after_mark = db.directoryLastVisitTime(dir_id)
+                    self.assertLess(timestamp_after_mark, timestamp_before_mark)
+
+    def test_03_insert_missing_directory_tree_if_needed(self):
+        """ Test core method _insert_missing_directory_tree_if_needed for all known root dirs.
+        Should result in new directory entries in the database for the new sub directories only.
+        """
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            handler = IngestEventHandler(dbcreds=self.dbcreds)
+
+            for site in db.sites():
+                site_surl = site['url']
+                site_id = site['id']
+                for root_dir in db.rootDirectoriesForSite(site_id):
+                    dir_path = root_dir['dir_name']
+                    surl = site_surl + dir_path
+
+                    # root dir should already exist
+                    dir = db.directoryByName(dir_path, site_id)
+                    self.assertIsNotNone(dir)
+
+                    # let the handler insert the not-so-missing dirs.
+                    # nothing should happen, because the root dir already exists
+                    new_dir_ids = handler._insert_missing_directory_tree_if_needed(surl)
+                    self.assertEqual(0, len(new_dir_ids))
+
+                    # now insert some new subdirs, with multiple levels.
+                    for subdir_path in ['/foo', '/bar/xyz']:
+                        dir_path = root_dir['dir_name'] + subdir_path
+                        surl = site_surl + dir_path
+                        # dir should not exist yet
+                        self.assertIsNone(db.directoryByName(dir_path, site_id))
+
+                        # let the handler insert the missing dirs.
+                        handler._insert_missing_directory_tree_if_needed(surl)
+
+                        # dir should exist now
+                        dir = db.directoryByName(dir_path, site_id)
+                        self.assertIsNotNone(dir)
+
+                        # check if new dir has expected root dir
+                        parents = db.parentDirectories(dir['dir_id'])
+                        self.assertEqual(root_dir['root_dir_id'], parents[0]['id'])
+
+    def test_04_insert_missing_directory_tree_if_needed_for_path_with_unknown_rootdir(self):
+        """ Test core method _insert_missing_directory_tree_if_needed for a path with an unknown root dir
+        Should raise LookupError.
+        """
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            handler = IngestEventHandler(dbcreds=self.dbcreds)
+
+            for site in db.sites():
+                with self.assertRaises(LookupError) as context:
+                    surl = site['url'] + '/fdjsalfja5h43535h3oiu/5u905u3f'
+                    handler._insert_missing_directory_tree_if_needed(surl)
+                self.assertTrue('Could not find parent root dir' in context.exception.message)
+
+    def test_05_schedule_srmurl_for_visit_for_root_dir(self):
+        """ Test higher level method _schedule_srmurl_for_visit for all known root dirs.
+        Should result in marking the dir matching the surl as being the dir which should be visited next.
+        """
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            handler = IngestEventHandler(dbcreds=self.dbcreds)
+
+            for site in db.sites():
+                for root_dir in db.rootDirectoriesForSite(site['id']):
+                    self._markAllDirectoriesRecentlyVisited()
+                    now = datetime.utcnow()
+
+                    dir_id = root_dir['root_dir_id']
+                    surl = site['url'] + root_dir['dir_name']
+                    handler._schedule_srmurl_for_visit(surl)
+
+                    # surl was scheduled for a visit, so this dir should be the least_recent_visited_dir
+                    site_visit_stats = db.visitStats(datetime.utcnow())[site['name']]
+                    self.assertEqual(dir_id, site_visit_stats['least_recent_visited_dir_id'])
+
+                    # mimick a directory visit by the scraper, by setting the last visit time to now.
+                    db.updateDirectoryLastVisitTime(dir_id, now)
+
+                    # we faked a visit, so this dir should not be the least_recent_visited_dir anymore
+                    site_visit_stats = db.visitStats(now)[site['name']]
+                    self.assertNotEqual(dir_id, site_visit_stats.get('least_recent_visited_dir_id'))
+
+    def test_06_schedule_srmurl_for_visit_for_new_root_sub_dir(self):
+        """ Test higher level method _schedule_srmurl_for_visit for all new unknown subdirs of the known root dirs.
+        Should result in marking the dir matching the surl as being the dir which should be visited next.
+        """
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            handler = IngestEventHandler(dbcreds=self.dbcreds)
+
+            for site in db.sites():
+                for root_dir in db.rootDirectoriesForSite(site['id']):
+                    self._markAllDirectoriesRecentlyVisited()
+                    now = datetime.utcnow()
+
+                    # create the subdir surl
+                    sub_dir_name = '/foo'
+                    sub_dir_path = root_dir['dir_name'] + sub_dir_name
+                    surl = site['url'] + sub_dir_path
+
+                    # call the method under test
+                    handler._schedule_srmurl_for_visit(surl)
+
+                    # surl was scheduled for a visit, all other dir's were marked as visited already...
+                    # so there should be a new dir for this surl, and it should be the least_recent_visited_dir
+                    site_visit_stats = db.visitStats(datetime.utcnow())[site['name']]
+
+                    least_recent_visited_dir_id = site_visit_stats.get('least_recent_visited_dir_id')
+                    self.assertIsNotNone(least_recent_visited_dir_id)
+
+                    least_recent_visited_dir = db.directory(least_recent_visited_dir_id)
+                    self.assertEqual(sub_dir_path, least_recent_visited_dir['dir_name'])
+
+                    # mimick a directory visit by the scraper, by setting the last visit time to now.
+                    db.updateDirectoryLastVisitTime(least_recent_visited_dir_id, now)
+
+                    # we faked a visit, so this dir should not be the least_recent_visited_dir anymore
+                    site_visit_stats = db.visitStats(now)[site['name']]
+                    self.assertNotEqual(least_recent_visited_dir_id, site_visit_stats.get('least_recent_visited_dir_id'))
+
+    def test_07_schedule_srmurl_for_visit_for_path_with_unknown_rootdir(self):
+        """ Test higher level method _schedule_srmurl_for_visit for a path with an unknown root dir
+        Should raise LookupError.
+        """
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            handler = IngestEventHandler(dbcreds=self.dbcreds)
+
+            for site in db.sites():
+                with self.assertRaises(LookupError) as context:
+                    surl = site['url'] + '/fdjsalfja5h43535h3oiu/5u905u3f'
+                    handler._schedule_srmurl_for_visit(surl)
+                self.assertTrue('Could not find parent root dir' in context.exception.message)
+
+    def test_08_integration_test_with_messagebus(self):
+        """ Full blown integration test listening for notifications on the bus,
+        and checking which dir is up for a visit next.
+        Needs a working local qpid broker. Test is skipped if qpid not available.
+        """
+        try:
+            broker = None
+            connection = None
+
+            import uuid
+            from threading import Event
+            from qpid.messaging import Connection, ConnectError
+            from qpidtoollibs import BrokerAgent
+            from lofar.messaging.messagebus import ToBus
+            from lofar.messaging.messages import EventMessage
+            from lofar.lta.ingest.common.config import DEFAULT_INGEST_NOTIFICATION_PREFIX
+
+            # setup broker connection
+            connection = Connection.establish('127.0.0.1')
+            broker = BrokerAgent(connection)
+
+            # add test service bus
+            busname = 'test-ingesteventhandler-%s' % (uuid.uuid1())
+            broker.addExchange('topic', busname)
+
+            sync_event = Event()
+
+            class SyncedIngestEventHandler(IngestEventHandler):
+                """This derived IngestEventHandler behaves exactly like the normal
+                object under test IngestEventHandler, but it also sets a sync_event
+                to sync between the listener thread and this main test thread"""
+                def _handleMessage(self, msg):
+                    super(SyncedIngestEventHandler, self)._handleMessage(msg)
+                    sync_event.set()
+
+            with SyncedIngestEventHandler(self.dbcreds, busname=busname):
+                with store.LTAStorageDb(self.dbcreds, True) as db:
+                    for site in db.sites():
+                        for root_dir in db.rootDirectoriesForSite(site['id']):
+                            self._markAllDirectoriesRecentlyVisited()
+
+                            # create the subdir surl
+                            sub_dir_name = '/foo'
+                            sub_dir_path = root_dir['dir_name'] + sub_dir_name
+                            surl = site['url'] + sub_dir_path
+
+                            with ToBus(busname) as sender:
+                                msg = EventMessage(context=DEFAULT_INGEST_NOTIFICATION_PREFIX+"TaskFinished",
+                                                   content={'srm_url': surl})
+                                sender.send(msg)
+
+                            # wait for the handler to have processed the message
+                            self.assertTrue(sync_event.wait(2))
+                            sync_event.clear()
+
+                            # surl should have been scheduled for a visit, all other dir's were marked as visited already...
+                            # so there should be a new dir for this surl, and it should be the least_recent_visited_dir
+                            site_visit_stats = db.visitStats(datetime.utcnow())[site['name']]
+
+                            least_recent_visited_dir_id = site_visit_stats.get('least_recent_visited_dir_id')
+                            self.assertIsNotNone(least_recent_visited_dir_id)
+
+                            least_recent_visited_dir = db.directory(least_recent_visited_dir_id)
+                            self.assertEqual(sub_dir_path, least_recent_visited_dir['dir_name'])
+
+        except ImportError as e:
+            logger.warning("skipping test due to: %s", e)
+        except ConnectError as e:
+            logger.warning("skipping test due to: %s", e)
+        finally:
+            # cleanup test bus and exit
+            if broker:
+                broker.delExchange(busname)
+            if connection:
+                connection.close()
+
+
+# run tests if main
+if __name__ == '__main__':
+    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                        level=logging.INFO)
+
+    unittest.main()
diff --git a/LTA/ltastorageoverview/test/test_ingesteventhandler.run b/LTA/ltastorageoverview/test/test_ingesteventhandler.run
new file mode 100755
index 00000000000..8b7d318ffff
--- /dev/null
+++ b/LTA/ltastorageoverview/test/test_ingesteventhandler.run
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+source python-coverage.sh
+python_coverage_test "*ingesteventhandler*" test_ingesteventhandler.py
diff --git a/LTA/ltastorageoverview/test/test_ingesteventhandler.sh b/LTA/ltastorageoverview/test/test_ingesteventhandler.sh
new file mode 100755
index 00000000000..4f5d35a3038
--- /dev/null
+++ b/LTA/ltastorageoverview/test/test_ingesteventhandler.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+./runctest.sh test_ingesteventhandler
diff --git a/LTA/ltastorageoverview/test/test_lso_webservice.py b/LTA/ltastorageoverview/test/test_lso_webservice.py
index 4831f3bec65..c81e140777b 100755
--- a/LTA/ltastorageoverview/test/test_lso_webservice.py
+++ b/LTA/ltastorageoverview/test/test_lso_webservice.py
@@ -28,21 +28,70 @@ import tempfile
 import urllib2
 import json
 import datetime
+import psycopg2
 from StringIO import StringIO
-from flask.ext.testing import LiveServerTestCase as FlaskLiveTestCase
-from ltastorageoverview import store
-from ltastorageoverview.webservice import webservice as webservice
+import lofar.common.dbcredentials as dbc
+from lofar.lta.ltastorageoverview import store
+from lofar.lta.ltastorageoverview.webservice import webservice as webservice
+
+import logging
+logger = logging.getLogger(__name__)
+
+try:
+    from flask.ext.testing import LiveServerTestCase as FlaskLiveTestCase
+except ImportError as e:
+    print str(e)
+    print 'Please install python-flask-testing: sudo apt-get install python-flask-testing'
+    exit(3) #special lofar skip test return code
+
+try:
+    import testing.postgresql
+except ImportError as e:
+    print str(e)
+    print 'Please install python package testing.test_psql: sudo pip install testing.test_psql'
+    exit(3)  # special lofar test exit code: skipped test
+
+test_psql = None
 
 def setUpModule():
-    tmpfile = os.path.join(tempfile.gettempdir(), 'test.sqlite')
+    logger.info('setting up test LTASO database server...')
+
+    # create a test webservice.db
+    logger.info('  creating test postgres server')
+    global test_psql
+    test_psql = testing.postgresql.Postgresql()
+    dsn = test_psql.dsn()
+    logger.info('  created test postgres server, dsn=%s', dsn)
+
+    dbcreds = dbc.Credentials()
+    dbcreds.user = 'test_user'
+    dbcreds.password = 'test_password'
 
-    if os.path.exists(tmpfile):
-        os.remove(tmpfile)
+    with psycopg2.connect(**dsn) as conn:
+        cursor = conn.cursor()
+        #use same user/pass as stored in local webservice.dbcreds
+        query = "CREATE USER %s WITH SUPERUSER PASSWORD '%s'" % (dbcreds.user, dbcreds.password)
+        cursor.execute(query)
 
-    webservice.db = store.LTAStorageDb(tmpfile)
+        create_script_path = os.path.normpath(os.path.join(os.environ['LOFARROOT'], 'share', 'ltaso', 'create_db_ltastorageoverview.sql'))
+        logger.info('  running ltaso create script create_script=%s', create_script_path)
+        with open(create_script_path, 'r') as script:
+            cursor.execute(script.read())
+        logger.info('  completed ltaso create script')
 
-    webservice.db.insertSite('siteA', 'srm://siteA.org')
-    webservice.db.insertSite('siteB', 'srm://siteB.org')
+    # copy the test postgres server settings into webservice.dbcreds
+    # we can use these webservice.dbcreds in each test method to connect to the testing ltaso database
+    dbcreds.host = dsn['host']
+    dbcreds.database = dsn['database']
+    dbcreds.port = dsn['port']
+
+    logger.info('finished setting up test LTASO database')
+
+    webservice.db = store.LTAStorageDb(dbcreds, True)
+
+    logger.info('filling test LTASO database with test data')
+    webservice.db.insertSiteIfNotExists('siteA', 'srm://siteA.org')
+    webservice.db.insertSiteIfNotExists('siteB', 'srm://siteB.org')
 
     rootDir_ids = []
     rootDir_ids.append(webservice.db.insertRootDirectory('siteA', 'rootDir1'))
@@ -51,20 +100,24 @@ def setUpModule():
 
     for rootDir_id in rootDir_ids:
         for j in range(2):
-            subDir_id = webservice.db.insertSubDirectory(rootDir_id, 'subDir_%d' % j)
+            subDir_id = webservice.db.insertSubDirectory('subDir_%d' % j, rootDir_id)
 
             if j == 0:
-                webservice.db.insertFileInfo('file_%d' % j, 271*(j+1), datetime.datetime.utcnow(), subDir_id)
+                webservice.db.insertFileInfo('file_%d' % j, 271 * (j + 1), datetime.datetime.utcnow(), subDir_id)
 
             for k in range(2):
-                subsubDir_id = webservice.db.insertSubDirectory(subDir_id, 'subsubDir_%d' % k)
+                subsubDir_id = webservice.db.insertSubDirectory('subsubDir_%d' % k, subDir_id)
 
-                for l in range((j+1)*(k+1)):
-                    webservice.db.insertFileInfo('file_%d' % l, 314*(l+1), datetime.datetime.utcnow(), subsubDir_id)
+                for l in range((j + 1) * (k + 1)):
+                    webservice.db.insertFileInfo('file_%d' % l, 314 * (l + 1), datetime.datetime.utcnow(), subsubDir_id)
+
+    logger.info('finished filling test LTASO database with test data')
 
 def tearDownModule():
-    if os.path.exists(webservice.db.db_filename):
-        os.remove(webservice.db.db_filename)
+    logger.info('removing test LTASO database server...')
+    test_psql.stop()
+    logger.info('removed test LTASO database server')
+
 
 class TestLTAStorageWebService(FlaskLiveTestCase):
     def create_app(self):
@@ -108,15 +161,16 @@ class TestLTAStorageWebService(FlaskLiveTestCase):
         rootDirectories = content['rootDirectories']
         self.assertEqual(3, len(rootDirectories))
 
-        rootDirsDict = dict([(x['name'], x) for x in rootDirectories])
+        rootDirsDict = dict([(x['dir_name'], x) for x in rootDirectories])
 
         self.assertEqual('siteA', rootDirsDict['rootDir1']['site_name'])
         self.assertEqual('siteA', rootDirsDict['rootDir2']['site_name'])
         self.assertEqual('siteB', rootDirsDict['path/to/rootDir3']['site_name'])
 
 
-
 def main(argv):
+    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                        level=logging.INFO)
     unittest.main()
 
 # run tests if main
diff --git a/LTA/ltastorageoverview/test/test_scraper.py b/LTA/ltastorageoverview/test/test_scraper.py
new file mode 100755
index 00000000000..ce7d1ff1889
--- /dev/null
+++ b/LTA/ltastorageoverview/test/test_scraper.py
@@ -0,0 +1,55 @@
+#!/usr/bin/python
+
+# Copyright (C) 2012-2015    ASTRON (Netherlands Institute for Radio Astronomy)
+# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+#
+# This file is part of the LOFAR software suite.
+# The LOFAR software suite is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The LOFAR software suite is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+
+# $Id$
+
+import logging
+
+from common_test_ltastoragedb import *
+from lofar.lta.ltastorageoverview import scraper
+
+logger = logging.getLogger(__name__)
+
+class TestLocation(unittest.TestCase):
+    def test_isRoot(self):
+        loc = scraper.Location('srm://srm.grid.sara.nl:8443', '/foo/bar')
+        self.assertFalse(loc.isRoot())
+
+        loc = scraper.Location('srm://srm.grid.sara.nl:8443', '/')
+        self.assertTrue(loc.isRoot())
+
+    def test_malformed_location(self):
+        with self.assertRaises(ValueError) as context:
+            scraper.Location('http://astron.nl', '/foo/bar')
+            self.assertTrue('malformed srm url' in str(context.exception))
+
+        with self.assertRaises(ValueError) as context:
+            scraper.Location('srm://srm.grid.sara.nl:8443', 'some_dir_name')
+            self.assertTrue('malformed directory' in str(context.exception))
+
+
+class TestScraper(CommonLTAStorageDbTest):
+    pass
+
+# run tests if main
+if __name__ == '__main__':
+    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                        level=logging.INFO)
+
+    unittest.main()
diff --git a/LTA/ltastorageoverview/test/test_scraper.run b/LTA/ltastorageoverview/test/test_scraper.run
new file mode 100755
index 00000000000..b47e4dcc959
--- /dev/null
+++ b/LTA/ltastorageoverview/test/test_scraper.run
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+source python-coverage.sh
+python_coverage_test "ltas*" test_scraper.py
diff --git a/LTA/ltastorageoverview/test/test_scraper.sh b/LTA/ltastorageoverview/test/test_scraper.sh
new file mode 100755
index 00000000000..66ce2e9f7a3
--- /dev/null
+++ b/LTA/ltastorageoverview/test/test_scraper.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+./runctest.sh test_scraper
diff --git a/LTA/ltastorageoverview/test/test_store.py b/LTA/ltastorageoverview/test/test_store.py
index 30b08fa0325..358bef808d7 100755
--- a/LTA/ltastorageoverview/test/test_store.py
+++ b/LTA/ltastorageoverview/test/test_store.py
@@ -19,113 +19,233 @@
 
 # $Id$
 
-import unittest
-import datetime
+from datetime import datetime
 import time
-import os
-import os.path
-import tempfile
-from ltastorageoverview import store
+from pprint import pformat
 
+from common_test_ltastoragedb import *
+from lofar.lta.ltastorageoverview import store
+from lofar.common.postgres import FETCH_ALL
 
-class TestLTAStorageDb(unittest.TestCase):
-    def setUp(self):
-        tmpfile = os.path.join(tempfile.gettempdir(), 'test.sqlite')
-        self.db = store.LTAStorageDb(tmpfile, True)
+import logging
+logger = logging.getLogger(__name__)
 
-        self.assertTrue(os.path.exists(self.db.db_filename))
-
-    #def tearDown(self):
-        #if os.path.exists(self.db.db_filename):
-            #os.remove(self.db.db_filename)
 
+class TestLTAStorageDb(CommonLTAStorageDbTest):
     def testSites(self):
-        self.db.insertSite('siteA', 'srm://siteA.org')
-        self.db.insertSite('siteB', 'srm://siteB.org')
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            siteA_id = db.insertSiteIfNotExists('siteA', 'srm://siteA.org')
+            siteB_id = db.insertSiteIfNotExists('siteB', 'srm://siteB.org')
 
-        sites = self.db.sites()
-        siteNames = [x[1] for x in sites]
-        self.assertEquals(2, len(siteNames))
-        self.assertTrue('siteA' in siteNames)
-        self.assertTrue('siteB' in siteNames)
+            sites = db.sites()
+            siteNames = [x['name'] for x in sites]
+            self.assertEquals(2, len(siteNames))
+            self.assertTrue('siteA' in siteNames)
+            self.assertTrue('siteB' in siteNames)
 
-        site = self.db.site(1)
-        self.assertTrue('siteA' in site[1])
+            site = db.site(siteA_id)
+            self.assertEqual('siteA', site['name'])
 
-        site = self.db.site(2)
-        self.assertTrue('siteB' in site[1])
+            site = db.site(siteB_id)
+            self.assertEqual('siteB', site['name'])
 
     def testRootDirs(self):
-        siteA_id = self.db.insertSite('siteA', 'srm://siteA.org')
-        siteB_id = self.db.insertSite('siteB', 'srm://siteB.org')
-
-        dirA1_id = self.db.insertRootDirectory('siteA', 'rootDir1')
-        dirA2_id = self.db.insertRootDirectory('siteA', 'rootDir2')
-        dirA3_id = self.db.insertRootDirectory('siteA', 'path/to/rootDir3')
-
-        dirB1_id = self.db.insertRootDirectory('siteB', 'rootDir1')
-        dirB2_id = self.db.insertRootDirectory('siteB', 'path/to/otherRootDir')
-
-        rootDirs = self.db.rootDirectories()
-        self.assertEquals(5, len(rootDirs))
-        self.assertTrue((dirA1_id, 'rootDir1', siteA_id, 'siteA') in rootDirs)
-        self.assertTrue((dirA2_id, 'rootDir2', siteA_id, 'siteA') in rootDirs)
-        self.assertTrue((dirA3_id, 'path/to/rootDir3', siteA_id, 'siteA') in rootDirs)
-        self.assertTrue((dirB1_id, 'rootDir1', siteB_id, 'siteB') in rootDirs)
-        self.assertTrue((dirB2_id, 'path/to/otherRootDir', siteB_id, 'siteB') in rootDirs)
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            siteA_id = db.insertSiteIfNotExists('siteA', 'srm://siteA.org')
+            siteB_id = db.insertSiteIfNotExists('siteB', 'srm://siteB.org')
 
-    def testDirectoryTrees(self):
-        siteA_id = self.db.insertSite('siteA', 'srm://siteA.org')
-        siteB_id = self.db.insertSite('siteB', 'srm://siteB.org')
+            dirA1_id = db.insertRootDirectory('siteA', 'rootDir1')
+            dirA2_id = db.insertRootDirectory('siteA', 'rootDir2')
+            dirA3_id = db.insertRootDirectory('siteA', 'path/to/rootDir3')
 
-        for i in range(2):
-            rootDir_id = self.db.insertRootDirectory('siteA', 'rootDir_%d' % i)
+            dirB1_id = db.insertRootDirectory('siteB', 'rootDir1')
+            dirB2_id = db.insertRootDirectory('siteB', 'path/to/otherRootDir')
 
-            for j in range(2):
-                subDir_id = self.db.insertSubDirectory(rootDir_id, 'subDir_%d' % j)
-                self.db.insertFileInfo('file_%d' % j, 271*(j+1), datetime.datetime.utcnow(), subDir_id)
+            rootDirs = db.rootDirectories()
+            self.assertEquals(5, len(rootDirs))
 
-                for k in range(2):
-                    subsubDir_id = self.db.insertSubDirectory(subDir_id, 'subsubDir_%d' % k)
-                    self.db.insertFileInfo('file_%d_%d' % (j,k), 314*(k+1), datetime.datetime.utcnow(), subsubDir_id)
+            rootDirsDict = {rd['root_dir_id']:rd for rd in rootDirs}
 
-        rootDirs = self.db.rootDirectories()
-        self.assertEquals(2, len(rootDirs))
+            self.assertEqual('rootDir1', rootDirsDict[dirA1_id]['dir_name'])
+            self.assertEqual(siteA_id, rootDirsDict[dirA1_id]['site_id'])
+            self.assertEqual('siteA', rootDirsDict[dirA1_id]['site_name'])
 
-        for (id, name, site_id, site_name) in rootDirs:
-            subDirs = self.db.subDirectories(id, 1, False)
-            for subDir in subDirs:
-                subDir_parent_id = subDir[2]
-                self.assertEquals(id, subDir_parent_id)
+            self.assertEqual('rootDir2', rootDirsDict[dirA2_id]['dir_name'])
+            self.assertEqual(siteA_id, rootDirsDict[dirA2_id]['site_id'])
+            self.assertEqual('siteA', rootDirsDict[dirA2_id]['site_name'])
 
-        print '\n'.join([str(x) for x in self.db.filesInTree(rootDir_id)])
+            self.assertEqual('path/to/rootDir3', rootDirsDict[dirA3_id]['dir_name'])
+            self.assertEqual(siteA_id, rootDirsDict[dirA3_id]['site_id'])
+            self.assertEqual('siteA', rootDirsDict[dirA3_id]['site_name'])
 
-    def testLeastRecentlyVisitedDirectory(self):
-        siteA_id = self.db.insertSite('siteA', 'srm://siteA.org')
-
-        dir_ids = []
-        for i in range(3):
-            dir_id = self.db.insertRootDirectory('siteA', 'rootDir_%d' % i)
-            dir_ids.append(dir_id)
+            self.assertEqual('rootDir1', rootDirsDict[dirB1_id]['dir_name'])
+            self.assertEqual(siteB_id, rootDirsDict[dirB1_id]['site_id'])
+            self.assertEqual('siteB', rootDirsDict[dirB1_id]['site_name'])
 
-            self.db.updateDirectoryLastVisitTime(dir_id, datetime.datetime.utcnow())
-            time.sleep(0.002)
+            self.assertEqual('path/to/otherRootDir', rootDirsDict[dirB2_id]['dir_name'])
+            self.assertEqual(siteB_id, rootDirsDict[dirB2_id]['site_id'])
+            self.assertEqual('siteB', rootDirsDict[dirB2_id]['site_name'])
 
-        visitStats = self.db.visitStats()
-        self.assertTrue('siteA' in visitStats)
-        self.assertTrue('least_recent_visited_dir_id' in visitStats['siteA'])
+            root_dir_ids_siteA = set(d['root_dir_id'] for d in db.rootDirectoriesForSite(siteA_id))
+            self.assertEqual(set([dirA1_id, dirA2_id, dirA3_id]), root_dir_ids_siteA)
 
-        lvr_dir_id = visitStats['siteA']['least_recent_visited_dir_id']
-        self.assertEquals(dir_ids[0], lvr_dir_id)
+            root_dir_ids_siteB = set(d['root_dir_id'] for d in db.rootDirectoriesForSite(siteB_id))
+            self.assertEqual(set([dirB1_id, dirB2_id]), root_dir_ids_siteB)
 
-        self.db.updateDirectoryLastVisitTime(dir_ids[0], datetime.datetime.utcnow())
-        self.db.updateDirectoryLastVisitTime(dir_ids[1], datetime.datetime.utcnow())
+            root_dirs_non_existing_site = db.rootDirectoriesForSite(999)
+            self.assertEqual([], root_dirs_non_existing_site)
 
-        visitStats = self.db.visitStats()
-        lvr_dir_id = visitStats['siteA']['least_recent_visited_dir_id']
-        self.assertEquals(dir_ids[2], lvr_dir_id)
+    def testNonExistingDir(self):
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            dir = db.directoryByName('fjsdka;58432aek5843rfsjd8-sa')
+            self.assertEqual(None, dir)
 
+    def testLeastRecentlyVisitedDirectory(self):
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            db.insertSiteIfNotExists('siteA', 'srm://siteA.org')
+
+            dir_ids = []
+            for i in range(3):
+                dir_id = db.insertRootDirectory('siteA', 'rootDir_%d' % i)
+                dir_ids.append(dir_id)
+
+                db.updateDirectoryLastVisitTime(dir_id, datetime.utcnow())
+                time.sleep(0.002)
+
+            visitStats = db.visitStats()
+            self.assertTrue('siteA' in visitStats)
+            self.assertTrue('least_recent_visited_dir_id' in visitStats['siteA'])
+
+            lvr_dir_id = visitStats['siteA']['least_recent_visited_dir_id']
+            self.assertEquals(dir_ids[0], lvr_dir_id)
+
+            db.updateDirectoryLastVisitTime(dir_ids[0], datetime.utcnow())
+            db.updateDirectoryLastVisitTime(dir_ids[1], datetime.utcnow())
+
+            visitStats = db.visitStats()
+            lvr_dir_id = visitStats['siteA']['least_recent_visited_dir_id']
+            self.assertEquals(dir_ids[2], lvr_dir_id)
+
+    def testDuplicateSubDirs(self):
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            db.insertSiteIfNotExists('siteA', 'srm://siteA.org')
+            db.insertSiteIfNotExists('siteB', 'srm://siteB.org')
+
+            dirA_id = db.insertRootDirectory('siteA', 'rootDir1')
+            dirB_id = db.insertRootDirectory('siteB', 'rootDir1')
+
+            subDirA1_id = db.insertSubDirectory('foo', dirA_id)
+            subDirA2_id = db.insertSubDirectory('bar', dirA_id)
+            subDirB1_id = db.insertSubDirectory('foo', dirB_id)
+
+            self.assertNotEquals(None, subDirA1_id)
+            self.assertNotEquals(None, subDirA2_id)
+            self.assertNotEquals(None, subDirB1_id)
+
+            subDirA1a_id = db.insertSubDirectory('foo', dirA_id)
+            self.assertEquals(None, subDirA1a_id)
+
+    def _fill_test_db_with_sites_and_root_dirs(self, db):
+        """
+        helper method to fill empty database with simple sites and root dirs
+        """
+        db.insertSiteIfNotExists('siteA', 'srm://siteA.foo.bar:8443')
+        db.insertSiteIfNotExists('siteB', 'srm://siteB.foo.bar:8443')
+
+        db.insertRootDirectory('siteA', '/root_dir_1')
+        db.insertRootDirectory('siteA', '/root_dir_2')
+        db.insertRootDirectory('siteA', '/long/path/to/root_dir_3')
+        db.insertRootDirectory('siteB', '/root_dir_1')
+
+
+    def test_insert_missing_directory_tree_if_needed(self):
+        """ Test core method _insertMissingDirectoryTreeIfNeeded for all known root dirs.
+        Should result in new directory entries in the database for the new sub directories only.
+        """
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            self._fill_test_db_with_sites_and_root_dirs(db)
+
+            for site in db.sites():
+                site_id = site['id']
+                for root_dir in db.rootDirectoriesForSite(site_id):
+                    root_dir_path = root_dir['dir_name']
+
+                    # root dir should already exist
+                    dir = db.directoryByName(root_dir_path, site_id)
+                    self.assertIsNotNone(dir)
+
+                    # insert the not-so-missing root dir.
+                    # nothing should happen, because the root dir already exists
+                    new_dir_ids = db.insert_missing_directory_tree_if_needed(root_dir_path, site_id)
+                    self.assertEqual(0, len(new_dir_ids))
+
+                    # now insert some new subdirs, with multiple levels.
+                    for subdir_path in ['/foo', '/bar/xyz']:
+                        dir_path = root_dir_path + subdir_path
+                        # dir should not exist yet
+                        self.assertIsNone(db.directoryByName(dir_path, site_id))
+
+                        # let the handler insert the missing dirs.
+                        db.insert_missing_directory_tree_if_needed(dir_path, site_id)
+
+                        # dir should exist now
+                        dir = db.directoryByName(dir_path, site_id)
+                        self.assertIsNotNone(dir)
+
+                        # check if new dir has expected root dir
+                        parents = db.parentDirectories(dir['dir_id'])
+                        self.assertEqual(root_dir['root_dir_id'], parents[0]['id'])
+
+    def test_insert_missing_directory_tree_if_needed_for_path_with_unknown_rootdir(self):
+        """ Test core method _insertMissingDirectoryTreeIfNeeded for a path with an unknown root dir
+        Should raise LookupError.
+        """
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            self._fill_test_db_with_sites_and_root_dirs(db)
+
+            for site in db.sites():
+                site_id = site['id']
+                with self.assertRaises(LookupError) as context:
+                    incorrect_dir_path = '/fdjsalfja5h43535h3oiu/5u905u3f'
+                    db.insert_missing_directory_tree_if_needed(incorrect_dir_path, site_id)
+                self.assertTrue('Could not find parent root dir' in context.exception.message)
+
+    def testProjectsAndObservations(self):
+        with store.LTAStorageDb(self.dbcreds, True) as db:
+            #first insert a lot of data...
+            db.insertSiteIfNotExists('juelich', 'srm://lofar-srm.fz-juelich.de:8443')
+            db.insertSiteIfNotExists('sara', 'srm://srm.grid.sara.nl:8443')
+
+            juelich_root_dir_id = db.insertRootDirectory('juelich', '/pnfs/fz-juelich.de/data/lofar/ops/')
+            sara_root_dir_id = db.insertRootDirectory('sara', '/pnfs/grid.sara.nl/data/lofar/ops')
+
+            juelich_projects_dir_id = db.insertSubDirectory('/pnfs/fz-juelich.de/data/lofar/ops/projects', juelich_root_dir_id)
+            sara_projects_dir_id = db.insertSubDirectory('/pnfs/grid.sara.nl/data/lofar/ops/projects', sara_root_dir_id)
+
+            for project_nr, project_name in enumerate(['lc8_001', '2017lofarobs', 'ddt5_001']):
+                # projects are sometimes stored at multiple sites
+                for projects_dir_id in [juelich_projects_dir_id, sara_projects_dir_id]:
+                    project_dir_id = db.insertSubDirectory('/pnfs/fz-juelich.de/data/lofar/ops/projects/' + project_name,
+                                                           projects_dir_id)
+                    for obs_nr in range(3):
+                        obs_name = 'L%06d' % ((project_nr+1)*1000 + obs_nr)
+                        obs_dir_id = db.insertSubDirectory('/pnfs/fz-juelich.de/data/lofar/ops/projects/' + project_name + '/' + obs_name,
+                                                           project_dir_id)
+
+                        for sb_nr in range(244):
+                            file_name = '%s_SB%03d.MS.tar' % (obs_name, sb_nr)
+                            db.insertFileInfo(file_name, 1, datetime.utcnow(), obs_dir_id, False)
+                        db.commit()
+
+            # then check the results
+            # TODO check the results
+            logger.info(pformat(db.executeQuery('select * from metainfo.project_directory', fetch=FETCH_ALL)))
+            logger.info(pformat(db.executeQuery('select * from metainfo.project_stats', fetch=FETCH_ALL)))
+            logger.info(pformat(db.executeQuery('select * from metainfo.project_observation_dataproduct', fetch=FETCH_ALL)))
 
 # run tests if main
 if __name__ == '__main__':
+    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                        level=logging.INFO)
+
     unittest.main()
diff --git a/LTA/ltastorageoverview/test/test_store.run b/LTA/ltastorageoverview/test/test_store.run
index b2e574673fc..952bff78ec3 100755
--- a/LTA/ltastorageoverview/test/test_store.run
+++ b/LTA/ltastorageoverview/test/test_store.run
@@ -1,3 +1,4 @@
 #!/bin/bash
 
-python test_store.py
+source python-coverage.sh
+python_coverage_test "ltas*" test_store.py
diff --git a/LTA/sip/lib/CMakeLists.txt b/LTA/sip/lib/CMakeLists.txt
index f0c9f79189c..c6b1cdbe38e 100644
--- a/LTA/sip/lib/CMakeLists.txt
+++ b/LTA/sip/lib/CMakeLists.txt
@@ -17,7 +17,6 @@ set(_py_files
 
 set(resource_files
   station_coordinates.conf
-  LTA-SIP.xsd
 )
 
 
diff --git a/LTA/sip/lib/query.py b/LTA/sip/lib/query.py
index c00b253c1d0..2c10f9b9f54 100644
--- a/LTA/sip/lib/query.py
+++ b/LTA/sip/lib/query.py
@@ -3,7 +3,7 @@
 
 import urllib
 import requests
-from os.path import expanduser
+from os.path import expanduser, exists
 import xml.etree.ElementTree as ET
 import xmlrpclib
 import uuid
@@ -15,6 +15,13 @@ passw = None
 #host = "lta-ingest-test.lofar.eu:19443"
 host = "lofar-ingest.target.rug.nl:9443"
 
+if not exists(path):
+    # write default file
+    with open(path, 'w') as file:
+        file.write("user=\n")
+        file.write("password=\n")
+        file.write("host=\n")
+
 with open(path,'r') as file:
         print "Parsing user credentials from",path
         for line in file:
diff --git a/LTA/sip/lib/validator.py b/LTA/sip/lib/validator.py
index 095063e8ddc..6a60919f6a9 100644
--- a/LTA/sip/lib/validator.py
+++ b/LTA/sip/lib/validator.py
@@ -6,7 +6,9 @@ import ltasip
 d = os.path.dirname(os.path.realpath(__file__))
 XSDPATH = d+"/LTA-SIP.xsd"
 
-def validate(xmlpath, xsdpath=XSDPATH):
+DEFAULT_SIP_XSD_PATH = os.path.join(os.environ.get('LOFARROOT', '/opt/lofar'), 'etc', 'lta', 'LTA-SIP.xsd')
+
+def validate(xmlpath, xsdpath=DEFAULT_SIP_XSD_PATH):
     '''validates given xml file against given xsd file'''
 
     print "validating", xmlpath, "against", xsdpath
@@ -103,7 +105,7 @@ def main(xml):
 
     try:
         xml = xml
-        xsd = XSDPATH
+        xsd = DEFAULT_SIP_XSD_PATH
         valid = validate(xml, xsd)
         consistent = check_consistency(xml)
         return valid and consistent
-- 
GitLab