From a8e644d5a7407c1b248092029d12a2c158e5ea39 Mon Sep 17 00:00:00 2001 From: jsmith Date: Fri, 7 Oct 2022 19:06:04 +0000 Subject: [PATCH] Update 'PostgreSQL/uuid-index-maintenance.sql' --- PostgreSQL/uuid-index-maintenance.sql | 355 ++++++++++++++------------ 1 file changed, 188 insertions(+), 167 deletions(-) diff --git a/PostgreSQL/uuid-index-maintenance.sql b/PostgreSQL/uuid-index-maintenance.sql index a19b88f..5794d92 100644 --- a/PostgreSQL/uuid-index-maintenance.sql +++ b/PostgreSQL/uuid-index-maintenance.sql @@ -1,205 +1,226 @@ /* * Author: DBRE - Joe Smith - * Date: Sep 2022 - * Purpose: sets up tables, functions and extensions for index maintenance when using uuids. + * Date: October 2022 + * Package: index_maintenance + * Purpose: sets up tables, functions and extensions for index maintenance when deleting on a regular basis. */ --- required to schedule job +-- pre-requisite extensions. create extension pg_cron; - --- required to calculate bloat create extension pgstattuple; --- table defines indexes to maintain, target bloat percentage which triggers maintenance, and an acceptable window to --- do maintenance if the bloat is met during hours we dont want to run the maintenance CREATE TABLE index_maintenance_configuration ( - id int primary key, - index_name varchar(512), - maintenance_threshold_percent float, - man_window_start timestamp, - man_window_end timestamp + id SERIAL primary key, + index_name varchar(63) UNIQUE not null, + maintenance_threshold_percent float not null, + man_window_start time not null, + man_window_end time not null, + weekday_override boolean default false, + weekdays integer[] default array[1, 2, 3, 4, 5, 6, 7] not null ); --- table tracks number of times maintenance is done. --- when maintenance was done CREATE TABLE index_maintenance ( - index_name varchar(512), - index_iteration bigint, - maintenance_done timestamp, - scheduled boolean + index_name varchar(63), + index_iteration bigint default 0, + maintenance_started timestamp default null, + maintenance_done timestamp default null, + scheduled boolean default false, + threshold_breach_value float, PRIMARY KEY(index_name, index_iteration, scheduled) ); --- no maintenance if you don't have a config +CREATE TABLE index_maintenance_lock ( + is_locked boolean, + lock_change_time timestamp default now(), + index_holding_lock text +); + ALTER TABLE index_maintenance ADD FOREIGN KEY (index_name) REFERENCES index_maintenance_configuration(index_name); --- returns bloat as float for an index. -create or replace function return_idx_bloat (idx varchar(512)) - language plpgsql - as - $$ - RETURNS float AS $$ + +/* + * Author: DBRE - Joe Smith + * Date: October 2022 + * Function: idx_maintenance + * Purpose: function called by pg_cron at a regular interval. +*/ + +Create or replace function idx_maintenance() returns void as $$ declare - current_bloat float; + rec record; + current_iteration record; + lock_rec record; + done boolean; + today int; + day_allowed boolean; + Begin + today = EXTRACT(isodow FROM now()); - begin - SELECT 100-(pgstatindex(idx)).avg_leaf_density into current_bloat; - return current_bloat; - end; -$$; + SELECT * INTO lock_rec FROM index_maintenance_lock; + -- if the lock is held already, we do nothing. + if lock_rec.is_locked == true then + raise notice 'index_man_lock_held(lock held already, skipping maintenance until lock is released)', + exit; + end if; --- calls the re-index function, concurrently. --- raises a notice in logs which can be parsed from cloud watch --- guarantees date is between dates, casts then to timestamps. -create or replace function call_reindex(ind varchar(512), man_window_start timestamp, - man_window_end timestamp) - language plpgsql - as - $$ - RETURNS boolean AS $$ - declare - today timestamp; - new_index bigint; - begin - today = now(); - - if not man_window_start then; - -- see call_reindex function - RAISE NOTICE 'call_reindex(%, no_man_window_specified)', idx; - REINDEX CONCURRENTLY idx; - return true; - end if; - - -- if the time of the run is within the right window, we re-index. - -- we return the same iteration to close out that record. - if today::timestamp between man_window_start::timestamp and man_window_end::timestamp; - RAISE NOTICE 'call_reindex(%, %)', idx, today; - REINDEX CONCURRENTLY idx; - return true; - else; - return false; - end if; - end; -$$; - - -create or replace function do_idx_maintenance(idx varchar(512)) - language plpgsql - as - $$ - RETURNS RECORD AS $$ - declare - current_iteration bigint; - new_iteration bigint; - is_scheduled boolean; - man_window_start timestamp; - man_window_end timestamp; - maintenance_threshold_percent float; - current_bloat float; - maintenance_ran boolean; - - begin - - -- look for configuration for current ind - maintenance_threshold_percent, man_window_start, man_window_end = select maintenance_threshold_percent, - man_window_start. man_window_end - from index_maintenance_configuration - where index_name = idx; - - -- check for a current iteration to increment, if none, then we start one at 0. - SELECT max(index_iteration), scheduled INTO current_iteration, is_scheduled FROM index_maintenance WHERE index_name = idx; - - -- set iteration of current record if one does not exist. - -- other wise, we use the current_iteration from the table. - if not current_iteration then; - current_iteration = 0; - end if; - - -- no maintenance window is allowed and will reindex each time. - if not man_window_start then; - -- see call_reindex function - maintenance_ran = call_reindex(idx, man_window_start, man_window_end); - - end if; - - -- if the row we get back is scheduled, we try to run it. - if is_scheduled; - - maintenance_ran = call_reindex(idx, man_window_start, man_window_end); - - - else; - -- here we have no row that is scheduled. - -- we have a maintenance window to check. - -- we check the current bloat, and try to re-index. - current_bloat = return_idx_bloat(idx); - - if current_bloat > maintenance_threshold_percent then; - maintenance_ran = call_reindex(idx, man_window_start, man_window_end); + -- if a lock is not held, we check the indexes we know about. + for rec in select * from index_maintenance_configuration + loop + day_allowed = true; + + if rec.weekday_override == true then + day_allowed = today=ANY(rec.weekdays); end if; + -- check for existing maintenance records. + SELECT * INTO current_iteration FROM index_maintenance + WHERE index_name = rec.index_name; + + -- set current iteration if no records are found for this index + -- insert record into index_maintenance table with scheduled set to false + if current_iteration.index_iteration is null then + insert into index_maintenance(index_name, scheduled) VALUES (rec.index_name, false) on conflict do nothing; + -- return record back into cursor so we can be lazy and reusing the current_iteration pointer. + SELECT * INTO current_iteration FROM index_maintenance + WHERE index_name = rec.index_name; + end if; + + end if; + + done = do_idx_maintenance(current_iteration); + raise notice 'idx_maintenance(index_name: %, maintenance_done: %)', current_iteration.index_name, done; + + end loop; + End; +$$ +Language 'plpgsql'; + +/* + * Author: DBRE - Joe Smith + * Date: October 2022 + * Function: do_idx_maintenance + * Purpose: calls lock_and_reindex function if we are within the maintenance window of a given index +*/ + +Create or replace function do_idx_maintenance(r record, maintenance_percent float) returns boolean as $$ + declare + lf float; + is_between_man_window boolean; + Begin + + -- we assume we are not allowed to do maintenance + is_between_man_window = false; + -- this avoids calculating this multiple times. + if now()::time between r.man_window_start::time and r.man_window_end::time then + is_between_man_window = true + end if; + + -- scheduled, no re-index running, fragmentation determined to be greater than threshold in prior run + if r.scheduled == true then + if is_between_man_window == true then + lock_and_reindex(r, r.threshold_breach_value) + return true; + else + return false; end if; - -- using simple logic, we will either increment the maintenance row, or create a new one. - return maintenance_ran, current_iteration; + else + -- not scheduled, so we check fragmentation on the index. + select leaf_fragmentation into lf from pgstatindex(r.index_name); - end; -$$; + -- not scheduled, no re-index running, fragmentation is greater than our maintenance threshold. + -- lf is passed to track the amount of fragmentation that triggered the run. + if lf > maintenance_percent then + if is_between_man_window == true then + lock_and_reindex(r, lf); + return true; + else + -- leaf fragmentation is above threshold + update index_maintenance set scheduled = true where id = r.id; + return false; + end if; + else + -- leaf fragmentation is under threshold, nothing to do + return false; + end if; + end if; + End; +$$ +Language 'plpgsql'; -create or replace function uuid_idx_maintenance() - language plpgsql - as - $$ +/* + * Author: DBRE - Joe Smith + * Date: October 2022 + * Function: lock_and_reindex + * Purpose: sets up tables, functions and extensions for index maintenance when deleting on a regular basis. +*/ + +Create or replace function lock_and_reindex(r record, lf float) returns void as $$ declare - idx_array varchar[]; + is_between_man_window boolean; + Begin - begin - -- return all indexes we are to maintain, if none, we do nothing. - select array_agg( distinct idx_name ) - into idx_array - from index_maintenance_configuration; + raise notice 'lock_and_reindex(index_name: %, time: %)', r.index_name, now()::text; + -- lock the lock table to prevent duplicate operations + update index_maintenance_lock set is_locked = true, index_holding_lock = rec.index_name where index_holding_lock is null; - -- if there are no indexes, its a no op. - if array_length(idx_array) > 0 then; + -- set the maintenance started value in the record we have + update index_maintenance set maintenance_started = now(), threshold_breach_value = lf where id = r.id; - -- iterate over each name in array - foreach idx in array idx_array loop + -- reindex + REINDEX INDEX CONCURRENTLY rec.index_name; - -- define variable for current iteration - maintenance_done boolean; - current_iteration bigint; - - rec record; + -- set the maintenance done value in the record we have. + update index_maintenance set maintenance_done = now() where id = r.id; - -- call function to check idx, return record with multiple results. - -- parse out information into variables. + -- add next record to table, incrementing the iteration value by 1. + insert into index_maintenance (index_name, index_iteration) VALUES (r.index_name, index_iteration + 1); - rec = do_idx_maintenance(idx); - maintenance_done = rec[0] - current_iteration = rec[1] + End; +$$ +Language 'plpgsql'; - -- maintenance_done coming back true means the maintenance was done. - -- maintenance_done coming back false means that there was no maintenance done. - if maintenance_done then; - UPDATE set maintenance_done = now(), scheduled = false - WHERE index_name = idx (idx, current_iteration, now()) - AND index_iteration = current_iteration; - else; - -- this record will violate a primary key if a row already exists. - -- otherwise, it schedules a row for the next run to review. - INSERT into - index_maintenance(index_name, index_iteration, maintenance_done, scheduled ) - VALUES (idx, current_iteration,null, true) on conflict do nothing; - end if; +/* + * Author: DBRE - Joe Smith + * Date: October 2022 + * Function: add_index_configuration + * Purpose: inserts a configuration into the configuration table, can be called by internal users. +*/ - continue; +Create or replace function add_index_configuration(index_name varchar(63), man_start time, man_end time, threshold float) returns void as $$ + Begin + insert into index_maintenance_configuration(index_name, maintenance_threshold_percent, man_window_start, man_window_end) VALUES (index_name, threshold, man_start, man_end) on conflict do nothing; + raise notice 'attempted to add config for %, manually set weekday override if needed.', index_name; + End; +$$ +Language 'plpgsql'; - end loop; - else; - return - end if; - end; -$$; +/* + * Author: DBRE - Joe Smith + * Date: October 2022 + * Function: kill_reindex + * Purpose: end current reindex operation safely +*/ + +Create or replace function kill_reindex() returns void as $$ + declare + is_locked boolean; + idx_name varchar(63); + proc_id int; + Begin + -- not scheduled, so we check fragmentation on the index. + select index_name into idx_name from index_maintenance_lock; + raise notice 'killing pid running reindex operation for %', index_name; + + -- this should only ever return 1 row. + select pid into proc_id from pg_stat_activity where query like 'REINDEX INDEX %' LIMIT 1; + raise notice 'found pid % proc_id running reindex operation for %', proc_id::text, index_name; + + SELECT pg_cancel_backend(proc_id); + raise notice 'killed pid % proc_id running reindex operation for %',proc_id::text, index_name; + End; +$$ +Language 'plpgsql';