From 0122f020ee92bec1422f01cea6b38af07eabf0d7 Mon Sep 17 00:00:00 2001
From: Nikolay Samokhvalov <nik@postgres.ai>
Date: Mon, 11 Dec 2023 02:33:55 +0000
Subject: [PATCH 1/6] Cleanup formatting for H002

---
 resources/checks/H002_unused_indexes.sh | 90 +++++++++++++------------
 1 file changed, 48 insertions(+), 42 deletions(-)

diff --git a/resources/checks/H002_unused_indexes.sh b/resources/checks/H002_unused_indexes.sh
index 7891145..f67e43b 100755
--- a/resources/checks/H002_unused_indexes.sh
+++ b/resources/checks/H002_unused_indexes.sh
@@ -25,13 +25,14 @@ with fk_indexes as (
      and ci.relpages > ${MIN_RELPAGES}
      and si.idx_scan < 10
 ), table_scans as (
-  select relid,
-      tables.idx_scan + tables.seq_scan as all_scans,
-      ( tables.n_tup_ins + tables.n_tup_upd + tables.n_tup_del ) as writes,
+  select
+    relid,
+    tables.idx_scan + tables.seq_scan as all_scans,
+    (tables.n_tup_ins + tables.n_tup_upd + tables.n_tup_del) as writes,
     pg_relation_size(relid) as table_size
-      from pg_stat_user_tables as tables
-      join pg_class c on c.oid = relid
-      where c.relpages > ${MIN_RELPAGES}
+  from pg_stat_user_tables as tables
+  join pg_class c on c.oid = relid
+  where c.relpages > ${MIN_RELPAGES}
 ), all_writes as (
   select sum(writes) as total_writes
   from table_scans
@@ -53,11 +54,11 @@ with fk_indexes as (
     pg_get_indexdef(i.indexrelid) as index_def,
     array_to_string(i.indclass, ', ') as opclasses
   from pg_index i
-     join pg_class ci on ci.oid = i.indexrelid and ci.relkind = 'i'
-     join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r'
-     join pg_namespace n on n.oid = ci.relnamespace
-     join pg_am a ON ci.relam = a.oid
-     left join pg_stat_user_indexes si on si.indexrelid = i.indexrelid
+  join pg_class ci on ci.oid = i.indexrelid and ci.relkind = 'i'
+  join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r'
+  join pg_namespace n on n.oid = ci.relnamespace
+  join pg_am a ON ci.relam = a.oid
+  left join pg_stat_user_indexes si on si.indexrelid = i.indexrelid
   where
     i.indisunique = false
     and i.indisvalid = true
@@ -70,11 +71,21 @@ with fk_indexes as (
     i.index_name,
     idx_scan,
     all_scans,
-        round(( case when all_scans = 0 then 0.0::numeric
-          else idx_scan::numeric/all_scans * 100 end), 2) as index_scan_pct,
+    round(
+      case
+        when all_scans = 0 then 0.0::numeric
+        else idx_scan::numeric/all_scans * 100
+      end,
+      2
+    ) as index_scan_pct,
     writes,
-    round((case when writes = 0 then idx_scan::numeric else idx_scan::numeric/writes end), 2)
-      as scans_per_write,
+    round(
+      case
+        when writes = 0 then idx_scan::numeric
+        else idx_scan::numeric/writes
+      end,
+      2
+    ) as scans_per_write,
     index_bytes as index_size_bytes,
     table_size as table_size_bytes,
     i.relpages,
@@ -88,15 +99,14 @@ with fk_indexes as (
     (
       select count(1)
       from fk_indexes fi
-      where fi.fk_table_ref = i.table_name
+      where
+        fi.fk_table_ref = i.table_name
         and fi.schema_name = i.schema_name
         and fi.opclasses like (i.opclasses || '%')
     ) > 0 as supports_fk
   from indexes i
   join table_scans ts on ts.relid = i.indrelid
-),
--- Never used indexes
-never_used_indexes as (
+), never_used_indexes as ( -- Never used indexes
   select
     'Never Used Indexes' as reason,
     ir.*
@@ -106,32 +116,31 @@ never_used_indexes as (
     and idx_is_btree
   order by index_size_bytes desc
 ), never_used_indexes_num as (
-  select row_number() over () num, nui.*
+  select
+    row_number() over () num,
+    nui.*
   from never_used_indexes nui
 ), never_used_indexes_total as (
   select
     sum(index_size_bytes) as index_size_bytes_sum,
     sum(table_size_bytes) as table_size_bytes_sum
   from never_used_indexes
-
 ), never_used_indexes_json as (
   select
     json_object_agg(coalesce(nuin.schema_name, 'public') || '.' || nuin.index_name, nuin) as json
   from never_used_indexes_num nuin
-),
--- Rarely used indexes
-rarely_used_indexes as (
+), rarely_used_indexes as ( -- Rarely used indexes
   select
     'Low Scans, High Writes' as reason,
     *,
     1 as grp
   from index_ratios
   where
-      scans_per_write <= 1
-      and index_scan_pct < 10
-      and idx_scan > 0
-      and writes > 100
-      and idx_is_btree
+    scans_per_write <= 1
+    and index_scan_pct < 10
+    and idx_scan > 0
+    and writes > 100
+    and idx_is_btree
   union all
   select
     'Seldom Used Large Indexes' as reason,
@@ -139,11 +148,11 @@ rarely_used_indexes as (
     2 as grp
   from index_ratios
   where
-      index_scan_pct < 5
-      and scans_per_write > 1
-      and idx_scan > 0
-      and idx_is_btree
-      and index_size_bytes > 100000000
+    index_scan_pct < 5
+    and scans_per_write > 1
+    and idx_scan > 0
+    and idx_is_btree
+    and index_size_bytes > 100000000
   union all
   select
     'High-Write Large Non-Btree' as reason,
@@ -151,9 +160,9 @@ rarely_used_indexes as (
     3 as grp
   from index_ratios, all_writes
   where
-      ( writes::numeric / ( total_writes + 1 ) ) > 0.02
-      and not idx_is_btree
-      and index_size_bytes > 100000000
+    (writes::numeric / ( total_writes + 1 )) > 0.02
+    and not idx_is_btree
+    and index_size_bytes > 100000000
   order by grp, index_size_bytes desc
 ), rarely_used_indexes_num as (
   select row_number() over () num, rui.*
@@ -167,8 +176,7 @@ rarely_used_indexes as (
   select
     json_object_agg(coalesce(ruin.schema_name, 'public') || '.' || ruin.index_name, ruin) as json
   from rarely_used_indexes_num ruin
-), 
-do_lines as (
+), do_lines as (
   select 
     format(
       'DROP INDEX CONCURRENTLY %s; -- %s, %s, table %s',
@@ -188,8 +196,7 @@ do_lines as (
     ) as line
   from never_used_indexes nui
   order by table_name, index_name
-),
-database_stat as (
+), database_stat as (
   select
     row_to_json(dbstat)
   from (
@@ -205,7 +212,6 @@ database_stat as (
     where datname = current_database()
   ) dbstat
 )
--- summarize data
 select
   json_build_object(
     'never_used_indexes',
-- 
GitLab


From 6e48d0ddb777cffefccf970823839c6572d965d6 Mon Sep 17 00:00:00 2001
From: Nikolay Samokhvalov <nik@postgres.ai>
Date: Mon, 11 Dec 2023 02:37:51 +0000
Subject: [PATCH 2/6] polish

---
 resources/checks/H002_unused_indexes.sh | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/resources/checks/H002_unused_indexes.sh b/resources/checks/H002_unused_indexes.sh
index f67e43b..f41e80b 100755
--- a/resources/checks/H002_unused_indexes.sh
+++ b/resources/checks/H002_unused_indexes.sh
@@ -5,7 +5,9 @@ else
 fi
 
 ${CHECK_HOST_CMD} "${_PSQL} -f -" <<SQL
-with fk_indexes as (
+with const as (
+  select ${MIN_RELPAGES} as min_relpages -- on very large DBs, increase it to, say, 100
+), fk_indexes as (
   select
     n.nspname as schema_name,
     ci.relname as index_name,
@@ -22,7 +24,7 @@ with fk_indexes as (
      contype = 'f'
      and i.indisunique is false
      and conkey is not null
-     and ci.relpages > ${MIN_RELPAGES}
+     and ci.relpages > (select min_relpages from const)
      and si.idx_scan < 10
 ), table_scans as (
   select
@@ -32,7 +34,7 @@ with fk_indexes as (
     pg_relation_size(relid) as table_size
   from pg_stat_user_tables as tables
   join pg_class c on c.oid = relid
-  where c.relpages > ${MIN_RELPAGES}
+  where c.relpages > (select min_relpages from const)
 ), all_writes as (
   select sum(writes) as total_writes
   from table_scans
@@ -62,7 +64,7 @@ with fk_indexes as (
   where
     i.indisunique = false
     and i.indisvalid = true
-    and ci.relpages > ${MIN_RELPAGES}
+    and ci.relpages > (select min_relpages from const)
 ), index_ratios as (
   select
     i.indexrelid as index_id,
@@ -206,7 +208,7 @@ with fk_indexes as (
         date_trunc('minute',now()),
         date_trunc('minute',sd.stats_reset)
       ) as stats_age,
-      ((extract(epoch from now()) - extract(epoch from sd.stats_reset))/86400)::int as days,
+      ((extract(epoch from now()) - extract(epoch from sd.stats_reset)) / 86400)::int as days,
       (select pg_database_size(current_database())) as database_size_bytes
     from pg_stat_database sd
     where datname = current_database()
@@ -229,6 +231,6 @@ select
     'database_stat',
     (select * from database_stat),
     'min_index_size_bytes',
-    (select ${MIN_RELPAGES} * 8192)
+    (select min_relpages * current_setting('block_size')::numeric from const)
   );
 SQL
-- 
GitLab


From 3a04db570ed916ba143821ab7102cf3cc4030785 Mon Sep 17 00:00:00 2001
From: Nikolay Samokhvalov <nik@postgres.ai>
Date: Mon, 11 Dec 2023 02:40:39 +0000
Subject: [PATCH 3/6] Apply 2 suggestion(s) to 1 file(s)

---
 resources/checks/H002_unused_indexes.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/checks/H002_unused_indexes.sh b/resources/checks/H002_unused_indexes.sh
index f41e80b..86e982d 100755
--- a/resources/checks/H002_unused_indexes.sh
+++ b/resources/checks/H002_unused_indexes.sh
@@ -76,7 +76,7 @@ with const as (
     round(
       case
         when all_scans = 0 then 0.0::numeric
-        else idx_scan::numeric/all_scans * 100
+        else idx_scan::numeric / all_scans * 100
       end,
       2
     ) as index_scan_pct,
@@ -84,7 +84,7 @@ with const as (
     round(
       case
         when writes = 0 then idx_scan::numeric
-        else idx_scan::numeric/writes
+        else idx_scan::numeric / writes
       end,
       2
     ) as scans_per_write,
-- 
GitLab


From 7f5c45b18b04c665d11f744486db047a1dbc680e Mon Sep 17 00:00:00 2001
From: Nikolay Samokhvalov <nik@postgres.ai>
Date: Mon, 11 Dec 2023 02:46:06 +0000
Subject: [PATCH 4/6] Missing spaces

---
 resources/checks/H002_unused_indexes.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/checks/H002_unused_indexes.sh b/resources/checks/H002_unused_indexes.sh
index 86e982d..d3a4612 100755
--- a/resources/checks/H002_unused_indexes.sh
+++ b/resources/checks/H002_unused_indexes.sh
@@ -205,8 +205,8 @@ with const as (
     select
       sd.stats_reset::timestamptz(0),
       age(
-        date_trunc('minute',now()),
-        date_trunc('minute',sd.stats_reset)
+        date_trunc('minute', now()),
+        date_trunc('minute', sd.stats_reset)
       ) as stats_age,
       ((extract(epoch from now()) - extract(epoch from sd.stats_reset)) / 86400)::int as days,
       (select pg_database_size(current_database())) as database_size_bytes
-- 
GitLab


From a6c8b6ae0772d5f7afb0a7004a740f6643a31aa2 Mon Sep 17 00:00:00 2001
From: Nikolay Samokhvalov <nik@postgres.ai>
Date: Tue, 12 Dec 2023 06:47:14 +0000
Subject: [PATCH 5/6] polish H004

---
 resources/checks/H004_redundant_indexes.sh | 102 ++++++++++-----------
 1 file changed, 46 insertions(+), 56 deletions(-)

diff --git a/resources/checks/H004_redundant_indexes.sh b/resources/checks/H004_redundant_indexes.sh
index 85f4830..e1bcb7a 100755
--- a/resources/checks/H004_redundant_indexes.sh
+++ b/resources/checks/H004_redundant_indexes.sh
@@ -5,7 +5,9 @@ else
 fi
 
 ${CHECK_HOST_CMD} "${_PSQL} -f -" <<SQL
-with fk_indexes as (
+with const as (
+  select ${MIN_RELPAGES} as min_relpages -- on very large DBs, increase it to, say, 100
+), fk_indexes as (
   select
     n.nspname as schema_name,
     ci.relname as index_name,
@@ -19,21 +21,19 @@ with fk_indexes as (
   join pg_constraint cn on cn.conrelid = cr.oid
   left join pg_stat_user_indexes si on si.indexrelid = i.indexrelid
   where
-     contype = 'f'
-     and i.indisunique is false
-     and conkey is not null
-     and ci.relpages > ${MIN_RELPAGES}
-     and si.idx_scan < 10
-),
--- Redundant indexes
-index_data as (
+    contype = 'f'
+    and not i.indisunique
+    and conkey is not null
+    and ci.relpages > (select min_relpages from const)
+    and si.idx_scan < 10
+), index_data as ( -- Redundant indexes
   select
     *,
     indkey::text as columns,
     array_to_string(indclass, ', ') as opclasses
   from pg_index i
   join pg_class ci on ci.oid = i.indexrelid and ci.relkind = 'i'
-  where indisvalid = true and ci.relpages > ${MIN_RELPAGES}
+  where indisvalid = true and ci.relpages > (select min_relpages from const)
 ), redundant_indexes as (
   select
     i2.indexrelid as index_id,
@@ -54,29 +54,24 @@ index_data as (
     quote_ident(trel.relname) AS formated_table_name,
     coalesce(nullif(quote_ident(tnsp.nspname), 'public') || '.', '') || quote_ident(trel.relname) as formated_relation_name,
     i2.opclasses
-  from (
-    select indrelid, indexrelid, opclasses, indclass, indexprs, indpred, indisprimary, indisunique, columns
-      from index_data
-      order by indexrelid
-    ) as i1
-    join index_data as i2 on (
-        i1.indrelid = i2.indrelid -- same table
-        and i1.indexrelid <> i2.indexrelid -- NOT same index
-    )
-    inner join pg_opclass op1 on i1.indclass[0] = op1.oid
-    inner join pg_opclass op2 on i2.indclass[0] = op2.oid
-    inner join pg_am am1 on op1.opcmethod = am1.oid
-    inner join pg_am am2 on op2.opcmethod = am2.oid
-    join pg_stat_user_indexes as s on s.indexrelid = i2.indexrelid
-    join pg_class as trel on trel.oid = i2.indrelid
-    join pg_namespace as tnsp on trel.relnamespace = tnsp.oid
-    join pg_class as irel on irel.oid = i2.indexrelid
+  from index_data as i1
+  join index_data as i2 on
+    i1.indrelid = i2.indrelid -- the same table
+    and i1.indexrelid <> i2.indexrelid -- NOT the same index
+  join pg_opclass op1 on i1.indclass[0] = op1.oid
+  join pg_opclass op2 on i2.indclass[0] = op2.oid
+  join pg_am am1 on op1.opcmethod = am1.oid
+  join pg_am am2 on op2.opcmethod = am2.oid
+  join pg_stat_user_indexes as s on s.indexrelid = i2.indexrelid
+  join pg_class as trel on trel.oid = i2.indrelid
+  join pg_namespace as tnsp on trel.relnamespace = tnsp.oid
+  join pg_class as irel on irel.oid = i2.indexrelid
   where
-    not i2.indisprimary -- index 1 is not primary
+    not i2.indisprimary -- index 1 is not PK
     and not ( -- skip if index1 is (primary or uniq) and is NOT (primary and uniq)
-        i2.indisunique and not i1.indisprimary
+      i2.indisunique and not i1.indisprimary
     )
-    and  am1.amname = am2.amname -- same access type
+    and am1.amname = am2.amname -- same access type
     and i1.columns like (i2.columns || '%') -- index 2 includes all columns from index 1
     and i1.opclasses like (i2.opclasses || '%')
     -- index expressions is same
@@ -94,22 +89,21 @@ index_data as (
         and fi.opclasses like (ri.opclasses || '%')
      ) > 0 as supports_fk
   from redundant_indexes ri
-),
--- Cut recursive links
-redundant_indexes_tmp_num as (
+), redundant_indexes_tmp_num as ( -- Cut recursive links
   select row_number() over () num, rig.*
   from redundant_indexes_fk rig
 ), redundant_indexes_tmp_links as (
-    select
-     ri1.*,
-     ri2.num as r_num
-    from redundant_indexes_tmp_num ri1
-    left join redundant_indexes_tmp_num ri2 on ri2.reason_index_id = ri1.index_id and ri1.reason_index_id = ri2.index_id
+  select
+    ri1.*,
+    ri2.num as r_num
+  from redundant_indexes_tmp_num ri1
+  left join redundant_indexes_tmp_num ri2 on
+    ri2.reason_index_id = ri1.index_id
+    and ri1.reason_index_id = ri2.index_id
 ), redundant_indexes_tmp_cut as (
-    select
-     *
-    from redundant_indexes_tmp_links
-    where num < r_num or r_num is null
+  select *
+  from redundant_indexes_tmp_links
+  where num < r_num or r_num is null
 ), redundant_indexes_cut_grouped as (
   select
     distinct(num),
@@ -160,12 +154,11 @@ redundant_indexes_tmp_num as (
     json_object_agg(coalesce(rin.schema_name, 'public') || '.' || rin.index_name, rin) as json
   from redundant_indexes_num rin
 ), redundant_indexes_total as (
-    select
-      sum(index_size_bytes) as index_size_bytes_sum,
-      sum(table_size_bytes) as table_size_bytes_sum
-    from redundant_indexes_grouped
-),
-do_lines as (
+  select
+    sum(index_size_bytes) as index_size_bytes_sum,
+    sum(table_size_bytes) as table_size_bytes_sum
+  from redundant_indexes_grouped
+), do_lines as (
   select 
     format(
       'DROP INDEX CONCURRENTLY %s; -- %s, %s, table %s',
@@ -185,10 +178,8 @@ do_lines as (
     ) as line
   from redundant_indexes_grouped
   order by table_name, index_name
-),
-database_stat as (
-  select
-    row_to_json(dbstat)
+), database_stat as (
+  select row_to_json(dbstat)
   from (
     select
       sd.stats_reset::timestamptz(0),
@@ -200,9 +191,8 @@ database_stat as (
       (select pg_database_size(current_database())) as database_size_bytes
     from pg_stat_database sd
     where datname = current_database()
-  ) dbstat
-)
--- summarize data
+  ) as dbstat
+) -- final result
 select
   json_build_object(
     'redundant_indexes',
@@ -216,6 +206,6 @@ select
     'database_stat',
     (select * from database_stat),
     'min_index_size_bytes',
-    (select ${MIN_RELPAGES} * 8192)
+    (select min_relpages * current_setting('block_size')::numeric from const)
   );
 SQL
-- 
GitLab


From 208738b5cfc1cfbde568f7dd63c65857c2e5e4da Mon Sep 17 00:00:00 2001
From: Nikolay Samokhvalov <nik@postgres.ai>
Date: Sat, 16 Dec 2023 07:26:26 +0000
Subject: [PATCH 6/6] Improve code style in L003

---
 resources/checks/L003_integer_in_pk.sh | 53 +++++++++++++-------------
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/resources/checks/L003_integer_in_pk.sh b/resources/checks/L003_integer_in_pk.sh
index 692ce9d..0c7a880 100644
--- a/resources/checks/L003_integer_in_pk.sh
+++ b/resources/checks/L003_integer_in_pk.sh
@@ -10,28 +10,27 @@ f_stderr=$(mktemp)
 (${CHECK_HOST_CMD} "${_PSQL} -f - " <<SQL
 do \$$
 declare
-  MIN_RELPAGES int8 = ${MIN_RELPAGES}; -- skip tables with small number of pages
+  min_relpages int8 = ${MIN_RELPAGES}; -- skip tables with small number of pages
   rec record;
-  out text;
+  out text := '';
   out1 json;
-  i numeric;
+  i numeric := 0;
   val int8;
   ratio numeric;
   sql text;
 begin
-  out := '';
-  i := 0;
   for rec in
     select
       c.oid,
-      (select spcname from pg_tablespace where oid = reltablespace) as tblspace,
+      spcname as tblspace,
       nspname as schema_name,
       relname as table_name,
       t.typname,
-      (select pg_get_serial_sequence(quote_ident(nspname) || '.' || quote_ident(relname), attname)) as seq,
+      pg_get_serial_sequence(format('%I.%I', nspname, relname), attname) as seq,
       min(attname) as attname
     from pg_index i
     join pg_class c on c.oid = i.indrelid
+    left join pg_tablespace tsp on tsp.oid = reltablespace
     left join pg_namespace n on n.oid = c.relnamespace
     join pg_attribute a on
       a.attrelid = i.indrelid
@@ -39,18 +38,20 @@ begin
     join pg_type t on t.oid = atttypid
     where
       i.indisprimary
-      and (c.relpages > MIN_RELPAGES or (select pg_get_serial_sequence(quote_ident(nspname) || '.' || quote_ident(relname), attname)) is not null)
-      and t.typname in ('int2', 'int4')
+      and (
+        c.relpages >= min_relpages
+        or pg_get_serial_sequence(format('%I.%I', nspname, relname), attname) is not null
+      ) and t.typname in ('int2', 'int4')
       and nspname <> 'pg_toast'
       group by 1, 2, 3, 4, 5, 6
-      having count(*) = 1 -- skip PKs with 2+ columns
+      having count(*) = 1 -- skip PKs with 2+ columns (TODO: analyze them too)
   loop
     raise debug 'table: %', rec.table_name;
 
     if rec.seq is null then
-        sql := format('select max(%I) from %I.%I;', rec.attname, rec.schema_name, rec.table_name);
+      sql := format('select max(%I) from %I.%I;', rec.attname, rec.schema_name, rec.table_name);
     else
-        sql := format('select last_value from %s;', rec.seq);
+      sql := format('select last_value from %s;', rec.seq);
     end if;
 
     raise debug 'sql: %', sql;
@@ -68,29 +69,27 @@ begin
       i := i + 1;
 
       out1 := json_build_object(
-          'table',
-          coalesce(nullif(quote_ident(rec.schema_name), 'public') || '.', '') || quote_ident(rec.table_name),
-          'pk',
-          rec.attname,
-          'type',
-          rec.typname,
-          'current_max_value',
-          val,
-          'capacity_used_percent',
-          round(100 * ratio, 2)
+        'table', (
+          coalesce(nullif(quote_ident(rec.schema_name), 'public') || '.', '')
+          || quote_ident(rec.table_name)
+        ),
+        'pk', rec.attname,
+        'type', rec.typname,
+        'current_max_value', val,
+        'capacity_used_percent', round(100 * ratio, 2)
       );
 
       raise debug 'cur: %', out1;
 
-      if out <> '' then out := out || ', '; end if;
+      if out <> '' then
+        out := out || e',\n';
+      end if;
 
-      out := out || '"' || rec.table_name || '":' || out1 || '';
+      out := out || format('  "%s": %s', rec.table_name, out1);
     end if;
   end loop;
 
-  out := '{' || out || '}';
-
-  raise info '%', out;
+  raise info e'{\n%\n}', out;
 end;
 \$$ language plpgsql;
 SQL
-- 
GitLab