Skip to content
Snippets Groups Projects
Commit 8ecea43c authored by Jorrit Schaap's avatar Jorrit Schaap
Browse files

Task #9607: do dirstats computation once per commit (statement), use cache...

Task #9607: do dirstats computation once per commit (statement), use cache table for changed fileinfo's to get touched directories
parent 45fa183f
Branches
Tags
No related merge requests found
......@@ -87,6 +87,11 @@ CREATE TABLE lta.fileinfo (
CREATE INDEX fi_directory_id_idx on lta.fileinfo(directory_id);
CREATE INDEX fi_creation_date_idx on lta.fileinfo(creation_date);
CREATE TABLE lta._changed_fileinfo_cache (
fileinfo_id integer NOT NULL REFERENCES lta.fileinfo ON DELETE CASCADE DEFERRABLE INITIALLY IMMEDIATE,
directory_id integer NOT NULL REFERENCES lta.directory ON DELETE CASCADE DEFERRABLE INITIALLY IMMEDIATE,
PRIMARY KEY (fileinfo_id)
) WITH (OIDS=FALSE);
CREATE TABLE scraper.last_directory_visit (
id serial,
......@@ -193,14 +198,39 @@ CREATE TRIGGER trigger_on_directory_inserted_add_directory_closure_entry
--------------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION lta.on_fileinfo_inserted_add_to_cache()
RETURNS trigger AS
$BODY$
DECLARE
BEGIN
INSERT INTO lta._changed_fileinfo_cache (fileinfo_id,directory_id)
VALUES (NEW.id, NEW.directory_id)
ON CONFLICT DO NOTHING ;
RETURN NEW;
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
CREATE TRIGGER trigger_on_fileinfo_inserted_add_to_cache
AFTER INSERT
ON lta.fileinfo
FOR EACH ROW
EXECUTE PROCEDURE lta.on_fileinfo_inserted_add_to_cache();
--------------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION lta.on_fileinfo_inserted_add_directory_stats()
RETURNS trigger AS
$BODY$
DECLARE
rec record;
touched_dir_rec record;
dc_rec record;
BEGIN
FOR touched_dir_rec IN (SELECT DISTINCT ON (directory_id) directory_id FROM lta._changed_fileinfo_cache) LOOP
INSERT INTO metainfo.directory_stats (directory_id)
VALUES (new.directory_id)
VALUES (touched_dir_rec.directory_id)
ON CONFLICT DO NOTHING ;
DROP TABLE IF EXISTS temp_fileinfo_for_dirstats;
......@@ -209,7 +239,7 @@ BEGIN
ON COMMIT DROP
AS (SELECT fi.size, fi.creation_date
FROM lta.fileinfo fi
WHERE fi.directory_id = NEW.directory_id) ;
WHERE fi.directory_id = touched_dir_rec.directory_id) ;
UPDATE metainfo.directory_stats SET
num_files=(SELECT count(size) FROM temp_fileinfo_for_dirstats),
......@@ -218,11 +248,11 @@ BEGIN
max_file_size=(SELECT max(size) FROM temp_fileinfo_for_dirstats),
min_file_creation_date=(SELECT min(creation_date) FROM temp_fileinfo_for_dirstats),
max_file_creation_date=(SELECT max(creation_date) FROM temp_fileinfo_for_dirstats)
WHERE directory_id = NEW.directory_id ;
WHERE directory_id = touched_dir_rec.directory_id ;
FOR rec IN (SELECT dc.ancestor_id as dir_id FROM lta.directory_closure dc WHERE dc.descendant_id = NEW.directory_id) LOOP
FOR dc_rec IN (SELECT dc.ancestor_id as dir_id FROM lta.directory_closure dc WHERE dc.descendant_id = touched_dir_rec.directory_id) LOOP
INSERT INTO metainfo.tree_stats (tree_root_directory_id)
VALUES (rec.dir_id)
VALUES (dc_rec.dir_id)
ON CONFLICT DO NOTHING ;
DROP TABLE IF EXISTS temp_fileinfo_for_treestats;
......@@ -232,7 +262,7 @@ BEGIN
AS (SELECT fi.size, fi.creation_date
FROM lta.directory_closure dc
INNER JOIN lta.fileinfo fi ON fi.directory_id = dc.descendant_id
WHERE dc.ancestor_id = rec.dir_id) ;
WHERE dc.ancestor_id = dc_rec.dir_id) ;
UPDATE metainfo.tree_stats SET
num_files=(SELECT count(size) FROM temp_fileinfo_for_treestats),
......@@ -241,8 +271,11 @@ BEGIN
max_file_size=(SELECT max(size) FROM temp_fileinfo_for_treestats),
min_file_creation_date=(SELECT min(creation_date) FROM temp_fileinfo_for_treestats),
max_file_creation_date=(SELECT max(creation_date) FROM temp_fileinfo_for_treestats)
WHERE tree_root_directory_id = rec.dir_id ;
WHERE tree_root_directory_id = dc_rec.dir_id ;
END LOOP;
END LOOP;
TRUNCATE lta._changed_fileinfo_cache;
RETURN NEW;
END;
......@@ -253,7 +286,7 @@ $BODY$
CREATE TRIGGER trigger_on_fileinfo_inserted_add_directory_stats
AFTER INSERT
ON lta.fileinfo
FOR EACH ROW
FOR EACH STATEMENT
EXECUTE PROCEDURE lta.on_fileinfo_inserted_add_directory_stats();
--------------------------------------------------------------------------------
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment