PostgreSQL Source Code git master
pg_stat_statements.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_stat_statements.c
4 * Track statement planning and execution times as well as resource
5 * usage across a whole database cluster.
6 *
7 * Execution costs are totaled for each distinct source query, and kept in
8 * a shared hashtable. (We track only as many distinct queries as will fit
9 * in the designated amount of shared memory.)
10 *
11 * Starting in Postgres 9.2, this module normalized query entries. As of
12 * Postgres 14, the normalization is done by the core if compute_query_id is
13 * enabled, or optionally by third-party modules.
14 *
15 * To facilitate presenting entries to users, we create "representative" query
16 * strings in which constants are replaced with parameter symbols ($n), to
17 * make it clearer what a normalized entry can represent. To save on shared
18 * memory, and to avoid having to truncate oversized query strings, we store
19 * these strings in a temporary external query-texts file. Offsets into this
20 * file are kept in shared memory.
21 *
22 * Note about locking issues: to create or delete an entry in the shared
23 * hashtable, one must hold pgss->lock exclusively. Modifying any field
24 * in an entry except the counters requires the same. To look up an entry,
25 * one must hold the lock shared. To read or update the counters within
26 * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27 * disappear!) and also take the entry's mutex spinlock.
28 * The shared state variable pgss->extent (the next free spot in the external
29 * query-text file) should be accessed only while holding either the
30 * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31 * allow reserving file space while holding only shared lock on pgss->lock.
32 * Rewriting the entire external query-text file, eg for garbage collection,
33 * requires holding pgss->lock exclusively; this allows individual entries
34 * in the file to be read or written while holding only shared lock.
35 *
36 *
37 * Copyright (c) 2008-2025, PostgreSQL Global Development Group
38 *
39 * IDENTIFICATION
40 * contrib/pg_stat_statements/pg_stat_statements.c
41 *
42 *-------------------------------------------------------------------------
43 */
44#include "postgres.h"
45
46#include <math.h>
47#include <sys/stat.h>
48#include <unistd.h>
49
50#include "access/parallel.h"
51#include "catalog/pg_authid.h"
52#include "common/int.h"
53#include "executor/instrument.h"
54#include "funcapi.h"
55#include "jit/jit.h"
56#include "mb/pg_wchar.h"
57#include "miscadmin.h"
58#include "nodes/queryjumble.h"
59#include "optimizer/planner.h"
60#include "parser/analyze.h"
61#include "parser/scanner.h"
62#include "pgstat.h"
63#include "storage/fd.h"
64#include "storage/ipc.h"
65#include "storage/lwlock.h"
66#include "storage/shmem.h"
67#include "storage/spin.h"
68#include "tcop/utility.h"
69#include "utils/acl.h"
70#include "utils/builtins.h"
71#include "utils/memutils.h"
72#include "utils/timestamp.h"
73
75 .name = "pg_stat_statements",
76 .version = PG_VERSION
77);
78
79/* Location of permanent stats file (valid when database is shut down) */
80#define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
81
82/*
83 * Location of external query text file.
84 */
85#define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
86
87/* Magic number identifying the stats file format */
88static const uint32 PGSS_FILE_HEADER = 0x20220408;
89
90/* PostgreSQL major version number, changes in which invalidate all entries */
91static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
92
93/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
94#define USAGE_EXEC(duration) (1.0)
95#define USAGE_INIT (1.0) /* including initial planning */
96#define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
97#define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
98#define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
99#define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
100#define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
101#define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
102
103/*
104 * Extension version number, for supporting older extension versions' objects
105 */
106typedef enum pgssVersion
107{
118
119typedef enum pgssStoreKind
120{
122
123 /*
124 * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
125 * reference the underlying values in the arrays in the Counters struct,
126 * and this order is required in pg_stat_statements_internal().
127 */
131
132#define PGSS_NUMKIND (PGSS_EXEC + 1)
133
134/*
135 * Hashtable key that defines the identity of a hashtable entry. We separate
136 * queries by user and by database even if they are otherwise identical.
137 *
138 * If you add a new key to this struct, make sure to teach pgss_store() to
139 * zero the padding bytes. Otherwise, things will break, because pgss_hash is
140 * created using HASH_BLOBS, and thus tag_hash is used to hash this.
141
142 */
143typedef struct pgssHashKey
144{
145 Oid userid; /* user OID */
146 Oid dbid; /* database OID */
147 uint64 queryid; /* query identifier */
148 bool toplevel; /* query executed at top level */
150
151/*
152 * The actual stats counters kept within pgssEntry.
153 */
154typedef struct Counters
155{
156 int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
157 double total_time[PGSS_NUMKIND]; /* total planning/execution time,
158 * in msec */
159 double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
160 * msec since min/max reset */
161 double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
162 * msec since min/max reset */
163 double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
164 * msec */
165 double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
166 * planning/execution time in msec */
167 int64 rows; /* total # of retrieved or affected rows */
168 int64 shared_blks_hit; /* # of shared buffer hits */
169 int64 shared_blks_read; /* # of shared disk blocks read */
170 int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
171 int64 shared_blks_written; /* # of shared disk blocks written */
172 int64 local_blks_hit; /* # of local buffer hits */
173 int64 local_blks_read; /* # of local disk blocks read */
174 int64 local_blks_dirtied; /* # of local disk blocks dirtied */
175 int64 local_blks_written; /* # of local disk blocks written */
176 int64 temp_blks_read; /* # of temp blocks read */
177 int64 temp_blks_written; /* # of temp blocks written */
178 double shared_blk_read_time; /* time spent reading shared blocks,
179 * in msec */
180 double shared_blk_write_time; /* time spent writing shared blocks,
181 * in msec */
182 double local_blk_read_time; /* time spent reading local blocks, in
183 * msec */
184 double local_blk_write_time; /* time spent writing local blocks, in
185 * msec */
186 double temp_blk_read_time; /* time spent reading temp blocks, in msec */
187 double temp_blk_write_time; /* time spent writing temp blocks, in
188 * msec */
189 double usage; /* usage factor */
190 int64 wal_records; /* # of WAL records generated */
191 int64 wal_fpi; /* # of WAL full page images generated */
192 uint64 wal_bytes; /* total amount of WAL generated in bytes */
193 int64 wal_buffers_full; /* # of times the WAL buffers became full */
194 int64 jit_functions; /* total number of JIT functions emitted */
195 double jit_generation_time; /* total time to generate jit code */
196 int64 jit_inlining_count; /* number of times inlining time has been
197 * > 0 */
198 double jit_deform_time; /* total time to deform tuples in jit code */
199 int64 jit_deform_count; /* number of times deform time has been >
200 * 0 */
201
202 double jit_inlining_time; /* total time to inline jit code */
203 int64 jit_optimization_count; /* number of times optimization time
204 * has been > 0 */
205 double jit_optimization_time; /* total time to optimize jit code */
206 int64 jit_emission_count; /* number of times emission time has been
207 * > 0 */
208 double jit_emission_time; /* total time to emit jit code */
209 int64 parallel_workers_to_launch; /* # of parallel workers planned
210 * to be launched */
211 int64 parallel_workers_launched; /* # of parallel workers actually
212 * launched */
214
215/*
216 * Global statistics for pg_stat_statements
217 */
218typedef struct pgssGlobalStats
219{
220 int64 dealloc; /* # of times entries were deallocated */
221 TimestampTz stats_reset; /* timestamp with all stats reset */
223
224/*
225 * Statistics per statement
226 *
227 * Note: in event of a failure in garbage collection of the query text file,
228 * we reset query_offset to zero and query_len to -1. This will be seen as
229 * an invalid state by qtext_fetch().
230 */
231typedef struct pgssEntry
232{
233 pgssHashKey key; /* hash key of entry - MUST BE FIRST */
234 Counters counters; /* the statistics for this query */
235 Size query_offset; /* query text offset in external file */
236 int query_len; /* # of valid bytes in query string, or -1 */
237 int encoding; /* query text encoding */
238 TimestampTz stats_since; /* timestamp of entry allocation */
239 TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
240 slock_t mutex; /* protects the counters only */
242
243/*
244 * Global shared state
245 */
246typedef struct pgssSharedState
247{
248 LWLock *lock; /* protects hashtable search/modification */
249 double cur_median_usage; /* current median usage in hashtable */
250 Size mean_query_len; /* current mean entry text length */
251 slock_t mutex; /* protects following fields only: */
252 Size extent; /* current extent of query file */
253 int n_writers; /* number of active writers to query file */
254 int gc_count; /* query file garbage collection cycle count */
255 pgssGlobalStats stats; /* global statistics for pgss */
257
258/*---- Local variables ----*/
259
260/* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
261static int nesting_level = 0;
262
263/* Saved hook values */
273
274/* Links to shared memory state */
275static pgssSharedState *pgss = NULL;
276static HTAB *pgss_hash = NULL;
277
278/*---- GUC variables ----*/
279
280typedef enum
281{
282 PGSS_TRACK_NONE, /* track no statements */
283 PGSS_TRACK_TOP, /* only top level statements */
284 PGSS_TRACK_ALL, /* all statements, including nested ones */
286
287static const struct config_enum_entry track_options[] =
288{
289 {"none", PGSS_TRACK_NONE, false},
290 {"top", PGSS_TRACK_TOP, false},
291 {"all", PGSS_TRACK_ALL, false},
292 {NULL, 0, false}
293};
294
295static int pgss_max = 5000; /* max # statements to track */
296static int pgss_track = PGSS_TRACK_TOP; /* tracking level */
297static bool pgss_track_utility = true; /* whether to track utility commands */
298static bool pgss_track_planning = false; /* whether to track planning
299 * duration */
300static bool pgss_save = true; /* whether to save stats across shutdown */
301
302#define pgss_enabled(level) \
303 (!IsParallelWorker() && \
304 (pgss_track == PGSS_TRACK_ALL || \
305 (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
306
307#define record_gc_qtexts() \
308 do { \
309 SpinLockAcquire(&pgss->mutex); \
310 pgss->gc_count++; \
311 SpinLockRelease(&pgss->mutex); \
312 } while(0)
313
314/*---- Function declarations ----*/
315
328
329static void pgss_shmem_request(void);
330static void pgss_shmem_startup(void);
331static void pgss_shmem_shutdown(int code, Datum arg);
332static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
333 JumbleState *jstate);
335 const char *query_string,
336 int cursorOptions,
337 ParamListInfo boundParams);
338static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
339static void pgss_ExecutorRun(QueryDesc *queryDesc,
340 ScanDirection direction,
341 uint64 count);
342static void pgss_ExecutorFinish(QueryDesc *queryDesc);
343static void pgss_ExecutorEnd(QueryDesc *queryDesc);
344static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
345 bool readOnlyTree,
346 ProcessUtilityContext context, ParamListInfo params,
347 QueryEnvironment *queryEnv,
349static void pgss_store(const char *query, uint64 queryId,
350 int query_location, int query_len,
351 pgssStoreKind kind,
352 double total_time, uint64 rows,
353 const BufferUsage *bufusage,
354 const WalUsage *walusage,
355 const struct JitInstrumentation *jitusage,
356 JumbleState *jstate,
357 int parallel_workers_to_launch,
358 int parallel_workers_launched);
360 pgssVersion api_version,
361 bool showtext);
362static Size pgss_memsize(void);
363static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
364 int encoding, bool sticky);
365static void entry_dealloc(void);
366static bool qtext_store(const char *query, int query_len,
367 Size *query_offset, int *gc_count);
368static char *qtext_load_file(Size *buffer_size);
369static char *qtext_fetch(Size query_offset, int query_len,
370 char *buffer, Size buffer_size);
371static bool need_gc_qtexts(void);
372static void gc_qtexts(void);
373static TimestampTz entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only);
374static char *generate_normalized_query(JumbleState *jstate, const char *query,
375 int query_loc, int *query_len_p);
376static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
377 int query_loc);
378static int comp_location(const void *a, const void *b);
379
380
381/*
382 * Module load callback
383 */
384void
386{
387 /*
388 * In order to create our shared memory area, we have to be loaded via
389 * shared_preload_libraries. If not, fall out without hooking into any of
390 * the main system. (We don't throw error here because it seems useful to
391 * allow the pg_stat_statements functions to be created even when the
392 * module isn't active. The functions must protect themselves against
393 * being called then, however.)
394 */
396 return;
397
398 /*
399 * Inform the postmaster that we want to enable query_id calculation if
400 * compute_query_id is set to auto.
401 */
403
404 /*
405 * Define (or redefine) custom GUC variables.
406 */
407 DefineCustomIntVariable("pg_stat_statements.max",
408 "Sets the maximum number of statements tracked by pg_stat_statements.",
409 NULL,
410 &pgss_max,
411 5000,
412 100,
413 INT_MAX / 2,
415 0,
416 NULL,
417 NULL,
418 NULL);
419
420 DefineCustomEnumVariable("pg_stat_statements.track",
421 "Selects which statements are tracked by pg_stat_statements.",
422 NULL,
423 &pgss_track,
426 PGC_SUSET,
427 0,
428 NULL,
429 NULL,
430 NULL);
431
432 DefineCustomBoolVariable("pg_stat_statements.track_utility",
433 "Selects whether utility commands are tracked by pg_stat_statements.",
434 NULL,
436 true,
437 PGC_SUSET,
438 0,
439 NULL,
440 NULL,
441 NULL);
442
443 DefineCustomBoolVariable("pg_stat_statements.track_planning",
444 "Selects whether planning duration is tracked by pg_stat_statements.",
445 NULL,
447 false,
448 PGC_SUSET,
449 0,
450 NULL,
451 NULL,
452 NULL);
453
454 DefineCustomBoolVariable("pg_stat_statements.save",
455 "Save pg_stat_statements statistics across server shutdowns.",
456 NULL,
457 &pgss_save,
458 true,
460 0,
461 NULL,
462 NULL,
463 NULL);
464
465 MarkGUCPrefixReserved("pg_stat_statements");
466
467 /*
468 * Install hooks.
469 */
488}
489
490/*
491 * shmem_request hook: request additional shared resources. We'll allocate or
492 * attach to the shared resources in pgss_shmem_startup().
493 */
494static void
496{
499
501 RequestNamedLWLockTranche("pg_stat_statements", 1);
502}
503
504/*
505 * shmem_startup hook: allocate or attach to shared memory,
506 * then load any pre-existing statistics from file.
507 * Also create and load the query-texts file, which is expected to exist
508 * (even if empty) while the module is enabled.
509 */
510static void
512{
513 bool found;
514 HASHCTL info;
515 FILE *file = NULL;
516 FILE *qfile = NULL;
517 uint32 header;
518 int32 num;
519 int32 pgver;
520 int32 i;
521 int buffer_size;
522 char *buffer = NULL;
523
526
527 /* reset in case this is a restart within the postmaster */
528 pgss = NULL;
529 pgss_hash = NULL;
530
531 /*
532 * Create or attach to the shared memory state, including hash table
533 */
534 LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
535
536 pgss = ShmemInitStruct("pg_stat_statements",
537 sizeof(pgssSharedState),
538 &found);
539
540 if (!found)
541 {
542 /* First time through ... */
543 pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
547 pgss->extent = 0;
548 pgss->n_writers = 0;
549 pgss->gc_count = 0;
550 pgss->stats.dealloc = 0;
552 }
553
554 info.keysize = sizeof(pgssHashKey);
555 info.entrysize = sizeof(pgssEntry);
556 pgss_hash = ShmemInitHash("pg_stat_statements hash",
558 &info,
560
561 LWLockRelease(AddinShmemInitLock);
562
563 /*
564 * If we're in the postmaster (or a standalone backend...), set up a shmem
565 * exit hook to dump the statistics to disk.
566 */
569
570 /*
571 * Done if some other process already completed our initialization.
572 */
573 if (found)
574 return;
575
576 /*
577 * Note: we don't bother with locks here, because there should be no other
578 * processes running when this code is reached.
579 */
580
581 /* Unlink query text file possibly left over from crash */
582 unlink(PGSS_TEXT_FILE);
583
584 /* Allocate new query text temp file */
586 if (qfile == NULL)
587 goto write_error;
588
589 /*
590 * If we were told not to load old statistics, we're done. (Note we do
591 * not try to unlink any old dump file in this case. This seems a bit
592 * questionable but it's the historical behavior.)
593 */
594 if (!pgss_save)
595 {
596 FreeFile(qfile);
597 return;
598 }
599
600 /*
601 * Attempt to load old statistics from the dump file.
602 */
604 if (file == NULL)
605 {
606 if (errno != ENOENT)
607 goto read_error;
608 /* No existing persisted stats file, so we're done */
609 FreeFile(qfile);
610 return;
611 }
612
613 buffer_size = 2048;
614 buffer = (char *) palloc(buffer_size);
615
616 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
617 fread(&pgver, sizeof(uint32), 1, file) != 1 ||
618 fread(&num, sizeof(int32), 1, file) != 1)
619 goto read_error;
620
621 if (header != PGSS_FILE_HEADER ||
622 pgver != PGSS_PG_MAJOR_VERSION)
623 goto data_error;
624
625 for (i = 0; i < num; i++)
626 {
627 pgssEntry temp;
628 pgssEntry *entry;
629 Size query_offset;
630
631 if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
632 goto read_error;
633
634 /* Encoding is the only field we can easily sanity-check */
636 goto data_error;
637
638 /* Resize buffer as needed */
639 if (temp.query_len >= buffer_size)
640 {
641 buffer_size = Max(buffer_size * 2, temp.query_len + 1);
642 buffer = repalloc(buffer, buffer_size);
643 }
644
645 if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
646 goto read_error;
647
648 /* Should have a trailing null, but let's make sure */
649 buffer[temp.query_len] = '\0';
650
651 /* Skip loading "sticky" entries */
652 if (IS_STICKY(temp.counters))
653 continue;
654
655 /* Store the query text */
656 query_offset = pgss->extent;
657 if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
658 goto write_error;
659 pgss->extent += temp.query_len + 1;
660
661 /* make the hashtable entry (discards old entries if too many) */
662 entry = entry_alloc(&temp.key, query_offset, temp.query_len,
663 temp.encoding,
664 false);
665
666 /* copy in the actual stats */
667 entry->counters = temp.counters;
668 entry->stats_since = temp.stats_since;
670 }
671
672 /* Read global statistics for pg_stat_statements */
673 if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
674 goto read_error;
675
676 pfree(buffer);
677 FreeFile(file);
678 FreeFile(qfile);
679
680 /*
681 * Remove the persisted stats file so it's not included in
682 * backups/replication standbys, etc. A new file will be written on next
683 * shutdown.
684 *
685 * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
686 * because we remove that file on startup; it acts inversely to
687 * PGSS_DUMP_FILE, in that it is only supposed to be around when the
688 * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
689 * when the server is not running. Leaving the file creates no danger of
690 * a newly restored database having a spurious record of execution costs,
691 * which is what we're really concerned about here.
692 */
693 unlink(PGSS_DUMP_FILE);
694
695 return;
696
697read_error:
698 ereport(LOG,
700 errmsg("could not read file \"%s\": %m",
702 goto fail;
703data_error:
704 ereport(LOG,
705 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
706 errmsg("ignoring invalid data in file \"%s\"",
708 goto fail;
709write_error:
710 ereport(LOG,
712 errmsg("could not write file \"%s\": %m",
714fail:
715 if (buffer)
716 pfree(buffer);
717 if (file)
718 FreeFile(file);
719 if (qfile)
720 FreeFile(qfile);
721 /* If possible, throw away the bogus file; ignore any error */
722 unlink(PGSS_DUMP_FILE);
723
724 /*
725 * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
726 * server is running with pg_stat_statements enabled
727 */
728}
729
730/*
731 * shmem_shutdown hook: Dump statistics into file.
732 *
733 * Note: we don't bother with acquiring lock, because there should be no
734 * other processes running when this is called.
735 */
736static void
738{
739 FILE *file;
740 char *qbuffer = NULL;
741 Size qbuffer_size = 0;
742 HASH_SEQ_STATUS hash_seq;
743 int32 num_entries;
744 pgssEntry *entry;
745
746 /* Don't try to dump during a crash. */
747 if (code)
748 return;
749
750 /* Safety check ... shouldn't get here unless shmem is set up. */
751 if (!pgss || !pgss_hash)
752 return;
753
754 /* Don't dump if told not to. */
755 if (!pgss_save)
756 return;
757
759 if (file == NULL)
760 goto error;
761
762 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
763 goto error;
764 if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
765 goto error;
766 num_entries = hash_get_num_entries(pgss_hash);
767 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
768 goto error;
769
770 qbuffer = qtext_load_file(&qbuffer_size);
771 if (qbuffer == NULL)
772 goto error;
773
774 /*
775 * When serializing to disk, we store query texts immediately after their
776 * entry data. Any orphaned query texts are thereby excluded.
777 */
778 hash_seq_init(&hash_seq, pgss_hash);
779 while ((entry = hash_seq_search(&hash_seq)) != NULL)
780 {
781 int len = entry->query_len;
782 char *qstr = qtext_fetch(entry->query_offset, len,
783 qbuffer, qbuffer_size);
784
785 if (qstr == NULL)
786 continue; /* Ignore any entries with bogus texts */
787
788 if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
789 fwrite(qstr, 1, len + 1, file) != len + 1)
790 {
791 /* note: we assume hash_seq_term won't change errno */
792 hash_seq_term(&hash_seq);
793 goto error;
794 }
795 }
796
797 /* Dump global statistics for pg_stat_statements */
798 if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
799 goto error;
800
801 free(qbuffer);
802 qbuffer = NULL;
803
804 if (FreeFile(file))
805 {
806 file = NULL;
807 goto error;
808 }
809
810 /*
811 * Rename file into place, so we atomically replace any old one.
812 */
814
815 /* Unlink query-texts file; it's not needed while shutdown */
816 unlink(PGSS_TEXT_FILE);
817
818 return;
819
820error:
821 ereport(LOG,
823 errmsg("could not write file \"%s\": %m",
824 PGSS_DUMP_FILE ".tmp")));
825 free(qbuffer);
826 if (file)
827 FreeFile(file);
828 unlink(PGSS_DUMP_FILE ".tmp");
829 unlink(PGSS_TEXT_FILE);
830}
831
832/*
833 * Post-parse-analysis hook: mark query with a queryId
834 */
835static void
837{
839 prev_post_parse_analyze_hook(pstate, query, jstate);
840
841 /* Safety check... */
843 return;
844
845 /*
846 * If it's EXECUTE, clear the queryId so that stats will accumulate for
847 * the underlying PREPARE. But don't do this if we're not tracking
848 * utility statements, to avoid messing up another extension that might be
849 * tracking them.
850 */
851 if (query->utilityStmt)
852 {
854 {
855 query->queryId = UINT64CONST(0);
856 return;
857 }
858 }
859
860 /*
861 * If query jumbling were able to identify any ignorable constants, we
862 * immediately create a hash table entry for the query, so that we can
863 * record the normalized form of the query string. If there were no such
864 * constants, the normalized string would be the same as the query text
865 * anyway, so there's no need for an early entry.
866 */
867 if (jstate && jstate->clocations_count > 0)
868 pgss_store(pstate->p_sourcetext,
869 query->queryId,
870 query->stmt_location,
871 query->stmt_len,
873 0,
874 0,
875 NULL,
876 NULL,
877 NULL,
878 jstate,
879 0,
880 0);
881}
882
883/*
884 * Planner hook: forward to regular planner, but measure planning time
885 * if needed.
886 */
887static PlannedStmt *
889 const char *query_string,
890 int cursorOptions,
891 ParamListInfo boundParams)
892{
893 PlannedStmt *result;
894
895 /*
896 * We can't process the query if no query_string is provided, as
897 * pgss_store needs it. We also ignore query without queryid, as it would
898 * be treated as a utility statement, which may not be the case.
899 */
901 && pgss_track_planning && query_string
902 && parse->queryId != UINT64CONST(0))
903 {
906 BufferUsage bufusage_start,
907 bufusage;
908 WalUsage walusage_start,
909 walusage;
910
911 /* We need to track buffer usage as the planner can access them. */
912 bufusage_start = pgBufferUsage;
913
914 /*
915 * Similarly the planner could write some WAL records in some cases
916 * (e.g. setting a hint bit with those being WAL-logged)
917 */
918 walusage_start = pgWalUsage;
920
922 PG_TRY();
923 {
925 result = prev_planner_hook(parse, query_string, cursorOptions,
926 boundParams);
927 else
928 result = standard_planner(parse, query_string, cursorOptions,
929 boundParams);
930 }
931 PG_FINALLY();
932 {
934 }
935 PG_END_TRY();
936
939
940 /* calc differences of buffer counters. */
941 memset(&bufusage, 0, sizeof(BufferUsage));
942 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
943
944 /* calc differences of WAL counters. */
945 memset(&walusage, 0, sizeof(WalUsage));
946 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
947
948 pgss_store(query_string,
949 parse->queryId,
950 parse->stmt_location,
951 parse->stmt_len,
952 PGSS_PLAN,
954 0,
955 &bufusage,
956 &walusage,
957 NULL,
958 NULL,
959 0,
960 0);
961 }
962 else
963 {
964 /*
965 * Even though we're not tracking plan time for this statement, we
966 * must still increment the nesting level, to ensure that functions
967 * evaluated during planning are not seen as top-level calls.
968 */
970 PG_TRY();
971 {
973 result = prev_planner_hook(parse, query_string, cursorOptions,
974 boundParams);
975 else
976 result = standard_planner(parse, query_string, cursorOptions,
977 boundParams);
978 }
979 PG_FINALLY();
980 {
982 }
983 PG_END_TRY();
984 }
985
986 return result;
987}
988
989/*
990 * ExecutorStart hook: start up tracking if needed
991 */
992static void
993pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
994{
996 prev_ExecutorStart(queryDesc, eflags);
997 else
998 standard_ExecutorStart(queryDesc, eflags);
999
1000 /*
1001 * If query has queryId zero, don't track it. This prevents double
1002 * counting of optimizable statements that are directly contained in
1003 * utility statements.
1004 */
1005 if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
1006 {
1007 /*
1008 * Set up to track total elapsed time in ExecutorRun. Make sure the
1009 * space is allocated in the per-query context so it will go away at
1010 * ExecutorEnd.
1011 */
1012 if (queryDesc->totaltime == NULL)
1013 {
1014 MemoryContext oldcxt;
1015
1016 oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
1017 queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
1018 MemoryContextSwitchTo(oldcxt);
1019 }
1020 }
1021}
1022
1023/*
1024 * ExecutorRun hook: all we need do is track nesting depth
1025 */
1026static void
1027pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
1028{
1029 nesting_level++;
1030 PG_TRY();
1031 {
1032 if (prev_ExecutorRun)
1033 prev_ExecutorRun(queryDesc, direction, count);
1034 else
1035 standard_ExecutorRun(queryDesc, direction, count);
1036 }
1037 PG_FINALLY();
1038 {
1039 nesting_level--;
1040 }
1041 PG_END_TRY();
1042}
1043
1044/*
1045 * ExecutorFinish hook: all we need do is track nesting depth
1046 */
1047static void
1049{
1050 nesting_level++;
1051 PG_TRY();
1052 {
1054 prev_ExecutorFinish(queryDesc);
1055 else
1056 standard_ExecutorFinish(queryDesc);
1057 }
1058 PG_FINALLY();
1059 {
1060 nesting_level--;
1061 }
1062 PG_END_TRY();
1063}
1064
1065/*
1066 * ExecutorEnd hook: store results if needed
1067 */
1068static void
1070{
1071 uint64 queryId = queryDesc->plannedstmt->queryId;
1072
1073 if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
1075 {
1076 /*
1077 * Make sure stats accumulation is done. (Note: it's okay if several
1078 * levels of hook all do this.)
1079 */
1080 InstrEndLoop(queryDesc->totaltime);
1081
1082 pgss_store(queryDesc->sourceText,
1083 queryId,
1084 queryDesc->plannedstmt->stmt_location,
1085 queryDesc->plannedstmt->stmt_len,
1086 PGSS_EXEC,
1087 queryDesc->totaltime->total * 1000.0, /* convert to msec */
1088 queryDesc->estate->es_total_processed,
1089 &queryDesc->totaltime->bufusage,
1090 &queryDesc->totaltime->walusage,
1091 queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1092 NULL,
1095 }
1096
1097 if (prev_ExecutorEnd)
1098 prev_ExecutorEnd(queryDesc);
1099 else
1100 standard_ExecutorEnd(queryDesc);
1101}
1102
1103/*
1104 * ProcessUtility hook
1105 */
1106static void
1107pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1108 bool readOnlyTree,
1109 ProcessUtilityContext context,
1110 ParamListInfo params, QueryEnvironment *queryEnv,
1112{
1113 Node *parsetree = pstmt->utilityStmt;
1114 uint64 saved_queryId = pstmt->queryId;
1115 int saved_stmt_location = pstmt->stmt_location;
1116 int saved_stmt_len = pstmt->stmt_len;
1118
1119 /*
1120 * Force utility statements to get queryId zero. We do this even in cases
1121 * where the statement contains an optimizable statement for which a
1122 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1123 * cases, runtime control will first go through ProcessUtility and then
1124 * the executor, and we don't want the executor hooks to do anything,
1125 * since we are already measuring the statement's costs at the utility
1126 * level.
1127 *
1128 * Note that this is only done if pg_stat_statements is enabled and
1129 * configured to track utility statements, in the unlikely possibility
1130 * that user configured another extension to handle utility statements
1131 * only.
1132 */
1133 if (enabled)
1134 pstmt->queryId = UINT64CONST(0);
1135
1136 /*
1137 * If it's an EXECUTE statement, we don't track it and don't increment the
1138 * nesting level. This allows the cycles to be charged to the underlying
1139 * PREPARE instead (by the Executor hooks), which is much more useful.
1140 *
1141 * We also don't track execution of PREPARE. If we did, we would get one
1142 * hash table entry for the PREPARE (with hash calculated from the query
1143 * string), and then a different one with the same query string (but hash
1144 * calculated from the query tree) would be used to accumulate costs of
1145 * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1146 * actually run the planner (only parse+rewrite), its costs are generally
1147 * pretty negligible and it seems okay to just ignore it.
1148 */
1149 if (enabled &&
1150 !IsA(parsetree, ExecuteStmt) &&
1151 !IsA(parsetree, PrepareStmt))
1152 {
1155 uint64 rows;
1156 BufferUsage bufusage_start,
1157 bufusage;
1158 WalUsage walusage_start,
1159 walusage;
1160
1161 bufusage_start = pgBufferUsage;
1162 walusage_start = pgWalUsage;
1164
1165 nesting_level++;
1166 PG_TRY();
1167 {
1169 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1170 context, params, queryEnv,
1171 dest, qc);
1172 else
1173 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1174 context, params, queryEnv,
1175 dest, qc);
1176 }
1177 PG_FINALLY();
1178 {
1179 nesting_level--;
1180 }
1181 PG_END_TRY();
1182
1183 /*
1184 * CAUTION: do not access the *pstmt data structure again below here.
1185 * If it was a ROLLBACK or similar, that data structure may have been
1186 * freed. We must copy everything we still need into local variables,
1187 * which we did above.
1188 *
1189 * For the same reason, we can't risk restoring pstmt->queryId to its
1190 * former value, which'd otherwise be a good idea.
1191 */
1192
1195
1196 /*
1197 * Track the total number of rows retrieved or affected by the utility
1198 * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1199 * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1200 */
1201 rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1202 qc->commandTag == CMDTAG_FETCH ||
1203 qc->commandTag == CMDTAG_SELECT ||
1204 qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
1205 qc->nprocessed : 0;
1206
1207 /* calc differences of buffer counters. */
1208 memset(&bufusage, 0, sizeof(BufferUsage));
1209 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1210
1211 /* calc differences of WAL counters. */
1212 memset(&walusage, 0, sizeof(WalUsage));
1213 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1214
1215 pgss_store(queryString,
1216 saved_queryId,
1217 saved_stmt_location,
1218 saved_stmt_len,
1219 PGSS_EXEC,
1221 rows,
1222 &bufusage,
1223 &walusage,
1224 NULL,
1225 NULL,
1226 0,
1227 0);
1228 }
1229 else
1230 {
1231 /*
1232 * Even though we're not tracking execution time for this statement,
1233 * we must still increment the nesting level, to ensure that functions
1234 * evaluated within it are not seen as top-level calls. But don't do
1235 * so for EXECUTE; that way, when control reaches pgss_planner or
1236 * pgss_ExecutorStart, we will treat the costs as top-level if
1237 * appropriate. Likewise, don't bump for PREPARE, so that parse
1238 * analysis will treat the statement as top-level if appropriate.
1239 *
1240 * To be absolutely certain we don't mess up the nesting level,
1241 * evaluate the bump_level condition just once.
1242 */
1243 bool bump_level =
1244 !IsA(parsetree, ExecuteStmt) &&
1245 !IsA(parsetree, PrepareStmt);
1246
1247 if (bump_level)
1248 nesting_level++;
1249 PG_TRY();
1250 {
1252 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1253 context, params, queryEnv,
1254 dest, qc);
1255 else
1256 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1257 context, params, queryEnv,
1258 dest, qc);
1259 }
1260 PG_FINALLY();
1261 {
1262 if (bump_level)
1263 nesting_level--;
1264 }
1265 PG_END_TRY();
1266 }
1267}
1268
1269/*
1270 * Store some statistics for a statement.
1271 *
1272 * If jstate is not NULL then we're trying to create an entry for which
1273 * we have no statistics as yet; we just want to record the normalized
1274 * query string. total_time, rows, bufusage and walusage are ignored in this
1275 * case.
1276 *
1277 * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1278 * for the arrays in the Counters field.
1279 */
1280static void
1281pgss_store(const char *query, uint64 queryId,
1282 int query_location, int query_len,
1283 pgssStoreKind kind,
1284 double total_time, uint64 rows,
1285 const BufferUsage *bufusage,
1286 const WalUsage *walusage,
1287 const struct JitInstrumentation *jitusage,
1288 JumbleState *jstate,
1289 int parallel_workers_to_launch,
1290 int parallel_workers_launched)
1291{
1293 pgssEntry *entry;
1294 char *norm_query = NULL;
1296
1297 Assert(query != NULL);
1298
1299 /* Safety check... */
1300 if (!pgss || !pgss_hash)
1301 return;
1302
1303 /*
1304 * Nothing to do if compute_query_id isn't enabled and no other module
1305 * computed a query identifier.
1306 */
1307 if (queryId == UINT64CONST(0))
1308 return;
1309
1310 /*
1311 * Confine our attention to the relevant part of the string, if the query
1312 * is a portion of a multi-statement source string, and update query
1313 * location and length if needed.
1314 */
1315 query = CleanQuerytext(query, &query_location, &query_len);
1316
1317 /* Set up key for hashtable search */
1318
1319 /* clear padding */
1320 memset(&key, 0, sizeof(pgssHashKey));
1321
1322 key.userid = GetUserId();
1323 key.dbid = MyDatabaseId;
1324 key.queryid = queryId;
1325 key.toplevel = (nesting_level == 0);
1326
1327 /* Lookup the hash table entry with shared lock. */
1329
1330 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1331
1332 /* Create new entry, if not present */
1333 if (!entry)
1334 {
1335 Size query_offset;
1336 int gc_count;
1337 bool stored;
1338 bool do_gc;
1339
1340 /*
1341 * Create a new, normalized query string if caller asked. We don't
1342 * need to hold the lock while doing this work. (Note: in any case,
1343 * it's possible that someone else creates a duplicate hashtable entry
1344 * in the interval where we don't hold the lock below. That case is
1345 * handled by entry_alloc.)
1346 */
1347 if (jstate)
1348 {
1350 norm_query = generate_normalized_query(jstate, query,
1351 query_location,
1352 &query_len);
1354 }
1355
1356 /* Append new query text to file with only shared lock held */
1357 stored = qtext_store(norm_query ? norm_query : query, query_len,
1358 &query_offset, &gc_count);
1359
1360 /*
1361 * Determine whether we need to garbage collect external query texts
1362 * while the shared lock is still held. This micro-optimization
1363 * avoids taking the time to decide this while holding exclusive lock.
1364 */
1365 do_gc = need_gc_qtexts();
1366
1367 /* Need exclusive lock to make a new hashtable entry - promote */
1370
1371 /*
1372 * A garbage collection may have occurred while we weren't holding the
1373 * lock. In the unlikely event that this happens, the query text we
1374 * stored above will have been garbage collected, so write it again.
1375 * This should be infrequent enough that doing it while holding
1376 * exclusive lock isn't a performance problem.
1377 */
1378 if (!stored || pgss->gc_count != gc_count)
1379 stored = qtext_store(norm_query ? norm_query : query, query_len,
1380 &query_offset, NULL);
1381
1382 /* If we failed to write to the text file, give up */
1383 if (!stored)
1384 goto done;
1385
1386 /* OK to create a new hashtable entry */
1387 entry = entry_alloc(&key, query_offset, query_len, encoding,
1388 jstate != NULL);
1389
1390 /* If needed, perform garbage collection while exclusive lock held */
1391 if (do_gc)
1392 gc_qtexts();
1393 }
1394
1395 /* Increment the counts, except when jstate is not NULL */
1396 if (!jstate)
1397 {
1398 Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1399
1400 /*
1401 * Grab the spinlock while updating the counters (see comment about
1402 * locking rules at the head of the file)
1403 */
1404 SpinLockAcquire(&entry->mutex);
1405
1406 /* "Unstick" entry if it was previously sticky */
1407 if (IS_STICKY(entry->counters))
1408 entry->counters.usage = USAGE_INIT;
1409
1410 entry->counters.calls[kind] += 1;
1411 entry->counters.total_time[kind] += total_time;
1412
1413 if (entry->counters.calls[kind] == 1)
1414 {
1415 entry->counters.min_time[kind] = total_time;
1416 entry->counters.max_time[kind] = total_time;
1417 entry->counters.mean_time[kind] = total_time;
1418 }
1419 else
1420 {
1421 /*
1422 * Welford's method for accurately computing variance. See
1423 * <http://www.johndcook.com/blog/standard_deviation/>
1424 */
1425 double old_mean = entry->counters.mean_time[kind];
1426
1427 entry->counters.mean_time[kind] +=
1428 (total_time - old_mean) / entry->counters.calls[kind];
1429 entry->counters.sum_var_time[kind] +=
1430 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1431
1432 /*
1433 * Calculate min and max time. min = 0 and max = 0 means that the
1434 * min/max statistics were reset
1435 */
1436 if (entry->counters.min_time[kind] == 0
1437 && entry->counters.max_time[kind] == 0)
1438 {
1439 entry->counters.min_time[kind] = total_time;
1440 entry->counters.max_time[kind] = total_time;
1441 }
1442 else
1443 {
1444 if (entry->counters.min_time[kind] > total_time)
1445 entry->counters.min_time[kind] = total_time;
1446 if (entry->counters.max_time[kind] < total_time)
1447 entry->counters.max_time[kind] = total_time;
1448 }
1449 }
1450 entry->counters.rows += rows;
1451 entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1452 entry->counters.shared_blks_read += bufusage->shared_blks_read;
1455 entry->counters.local_blks_hit += bufusage->local_blks_hit;
1456 entry->counters.local_blks_read += bufusage->local_blks_read;
1459 entry->counters.temp_blks_read += bufusage->temp_blks_read;
1460 entry->counters.temp_blks_written += bufusage->temp_blks_written;
1467 entry->counters.usage += USAGE_EXEC(total_time);
1468 entry->counters.wal_records += walusage->wal_records;
1469 entry->counters.wal_fpi += walusage->wal_fpi;
1470 entry->counters.wal_bytes += walusage->wal_bytes;
1471 entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1472 if (jitusage)
1473 {
1474 entry->counters.jit_functions += jitusage->created_functions;
1476
1478 entry->counters.jit_deform_count++;
1480
1484
1488
1492 }
1493
1494 /* parallel worker counters */
1495 entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1496 entry->counters.parallel_workers_launched += parallel_workers_launched;
1497
1498 SpinLockRelease(&entry->mutex);
1499 }
1500
1501done:
1503
1504 /* We postpone this clean-up until we're out of the lock */
1505 if (norm_query)
1506 pfree(norm_query);
1507}
1508
1509/*
1510 * Reset statement statistics corresponding to userid, dbid, and queryid.
1511 */
1512Datum
1514{
1515 Oid userid;
1516 Oid dbid;
1517 uint64 queryid;
1518
1519 userid = PG_GETARG_OID(0);
1520 dbid = PG_GETARG_OID(1);
1521 queryid = (uint64) PG_GETARG_INT64(2);
1522
1523 entry_reset(userid, dbid, queryid, false);
1524
1526}
1527
1528Datum
1530{
1531 Oid userid;
1532 Oid dbid;
1533 uint64 queryid;
1534 bool minmax_only;
1535
1536 userid = PG_GETARG_OID(0);
1537 dbid = PG_GETARG_OID(1);
1538 queryid = (uint64) PG_GETARG_INT64(2);
1539 minmax_only = PG_GETARG_BOOL(3);
1540
1541 PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1542}
1543
1544/*
1545 * Reset statement statistics.
1546 */
1547Datum
1549{
1550 entry_reset(0, 0, 0, false);
1551
1553}
1554
1555/* Number of output arguments (columns) for various API versions */
1556#define PG_STAT_STATEMENTS_COLS_V1_0 14
1557#define PG_STAT_STATEMENTS_COLS_V1_1 18
1558#define PG_STAT_STATEMENTS_COLS_V1_2 19
1559#define PG_STAT_STATEMENTS_COLS_V1_3 23
1560#define PG_STAT_STATEMENTS_COLS_V1_8 32
1561#define PG_STAT_STATEMENTS_COLS_V1_9 33
1562#define PG_STAT_STATEMENTS_COLS_V1_10 43
1563#define PG_STAT_STATEMENTS_COLS_V1_11 49
1564#define PG_STAT_STATEMENTS_COLS_V1_12 52
1565#define PG_STAT_STATEMENTS_COLS 52 /* maximum of above */
1566
1567/*
1568 * Retrieve statement statistics.
1569 *
1570 * The SQL API of this function has changed multiple times, and will likely
1571 * do so again in future. To support the case where a newer version of this
1572 * loadable module is being used with an old SQL declaration of the function,
1573 * we continue to support the older API versions. For 1.2 and later, the
1574 * expected API version is identified by embedding it in the C name of the
1575 * function. Unfortunately we weren't bright enough to do that for 1.1.
1576 */
1577Datum
1579{
1580 bool showtext = PG_GETARG_BOOL(0);
1581
1582 pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext);
1583
1584 return (Datum) 0;
1585}
1586
1587Datum
1589{
1590 bool showtext = PG_GETARG_BOOL(0);
1591
1592 pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext);
1593
1594 return (Datum) 0;
1595}
1596
1597Datum
1599{
1600 bool showtext = PG_GETARG_BOOL(0);
1601
1602 pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
1603
1604 return (Datum) 0;
1605}
1606
1607Datum
1609{
1610 bool showtext = PG_GETARG_BOOL(0);
1611
1612 pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
1613
1614 return (Datum) 0;
1615}
1616
1617Datum
1619{
1620 bool showtext = PG_GETARG_BOOL(0);
1621
1622 pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
1623
1624 return (Datum) 0;
1625}
1626
1627Datum
1629{
1630 bool showtext = PG_GETARG_BOOL(0);
1631
1632 pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
1633
1634 return (Datum) 0;
1635}
1636
1637Datum
1639{
1640 bool showtext = PG_GETARG_BOOL(0);
1641
1642 pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1643
1644 return (Datum) 0;
1645}
1646
1647/*
1648 * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1649 * This can be removed someday, perhaps.
1650 */
1651Datum
1653{
1654 /* If it's really API 1.1, we'll figure that out below */
1656
1657 return (Datum) 0;
1658}
1659
1660/* Common code for all versions of pg_stat_statements() */
1661static void
1663 pgssVersion api_version,
1664 bool showtext)
1665{
1666 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1667 Oid userid = GetUserId();
1668 bool is_allowed_role = false;
1669 char *qbuffer = NULL;
1670 Size qbuffer_size = 0;
1671 Size extent = 0;
1672 int gc_count = 0;
1673 HASH_SEQ_STATUS hash_seq;
1674 pgssEntry *entry;
1675
1676 /*
1677 * Superusers or roles with the privileges of pg_read_all_stats members
1678 * are allowed
1679 */
1680 is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
1681
1682 /* hash table must exist already */
1683 if (!pgss || !pgss_hash)
1684 ereport(ERROR,
1685 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1686 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1687
1688 InitMaterializedSRF(fcinfo, 0);
1689
1690 /*
1691 * Check we have the expected number of output arguments. Aside from
1692 * being a good safety check, we need a kluge here to detect API version
1693 * 1.1, which was wedged into the code in an ill-considered way.
1694 */
1695 switch (rsinfo->setDesc->natts)
1696 {
1698 if (api_version != PGSS_V1_0)
1699 elog(ERROR, "incorrect number of output arguments");
1700 break;
1702 /* pg_stat_statements() should have told us 1.0 */
1703 if (api_version != PGSS_V1_0)
1704 elog(ERROR, "incorrect number of output arguments");
1705 api_version = PGSS_V1_1;
1706 break;
1708 if (api_version != PGSS_V1_2)
1709 elog(ERROR, "incorrect number of output arguments");
1710 break;
1712 if (api_version != PGSS_V1_3)
1713 elog(ERROR, "incorrect number of output arguments");
1714 break;
1716 if (api_version != PGSS_V1_8)
1717 elog(ERROR, "incorrect number of output arguments");
1718 break;
1720 if (api_version != PGSS_V1_9)
1721 elog(ERROR, "incorrect number of output arguments");
1722 break;
1724 if (api_version != PGSS_V1_10)
1725 elog(ERROR, "incorrect number of output arguments");
1726 break;
1728 if (api_version != PGSS_V1_11)
1729 elog(ERROR, "incorrect number of output arguments");
1730 break;
1732 if (api_version != PGSS_V1_12)
1733 elog(ERROR, "incorrect number of output arguments");
1734 break;
1735 default:
1736 elog(ERROR, "incorrect number of output arguments");
1737 }
1738
1739 /*
1740 * We'd like to load the query text file (if needed) while not holding any
1741 * lock on pgss->lock. In the worst case we'll have to do this again
1742 * after we have the lock, but it's unlikely enough to make this a win
1743 * despite occasional duplicated work. We need to reload if anybody
1744 * writes to the file (either a retail qtext_store(), or a garbage
1745 * collection) between this point and where we've gotten shared lock. If
1746 * a qtext_store is actually in progress when we look, we might as well
1747 * skip the speculative load entirely.
1748 */
1749 if (showtext)
1750 {
1751 int n_writers;
1752
1753 /* Take the mutex so we can examine variables */
1755 extent = pgss->extent;
1756 n_writers = pgss->n_writers;
1757 gc_count = pgss->gc_count;
1759
1760 /* No point in loading file now if there are active writers */
1761 if (n_writers == 0)
1762 qbuffer = qtext_load_file(&qbuffer_size);
1763 }
1764
1765 /*
1766 * Get shared lock, load or reload the query text file if we must, and
1767 * iterate over the hashtable entries.
1768 *
1769 * With a large hash table, we might be holding the lock rather longer
1770 * than one could wish. However, this only blocks creation of new hash
1771 * table entries, and the larger the hash table the less likely that is to
1772 * be needed. So we can hope this is okay. Perhaps someday we'll decide
1773 * we need to partition the hash table to limit the time spent holding any
1774 * one lock.
1775 */
1777
1778 if (showtext)
1779 {
1780 /*
1781 * Here it is safe to examine extent and gc_count without taking the
1782 * mutex. Note that although other processes might change
1783 * pgss->extent just after we look at it, the strings they then write
1784 * into the file cannot yet be referenced in the hashtable, so we
1785 * don't care whether we see them or not.
1786 *
1787 * If qtext_load_file fails, we just press on; we'll return NULL for
1788 * every query text.
1789 */
1790 if (qbuffer == NULL ||
1791 pgss->extent != extent ||
1792 pgss->gc_count != gc_count)
1793 {
1794 free(qbuffer);
1795 qbuffer = qtext_load_file(&qbuffer_size);
1796 }
1797 }
1798
1799 hash_seq_init(&hash_seq, pgss_hash);
1800 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1801 {
1803 bool nulls[PG_STAT_STATEMENTS_COLS];
1804 int i = 0;
1805 Counters tmp;
1806 double stddev;
1807 int64 queryid = entry->key.queryid;
1808 TimestampTz stats_since;
1809 TimestampTz minmax_stats_since;
1810
1811 memset(values, 0, sizeof(values));
1812 memset(nulls, 0, sizeof(nulls));
1813
1814 values[i++] = ObjectIdGetDatum(entry->key.userid);
1815 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1816 if (api_version >= PGSS_V1_9)
1817 values[i++] = BoolGetDatum(entry->key.toplevel);
1818
1819 if (is_allowed_role || entry->key.userid == userid)
1820 {
1821 if (api_version >= PGSS_V1_2)
1822 values[i++] = Int64GetDatumFast(queryid);
1823
1824 if (showtext)
1825 {
1826 char *qstr = qtext_fetch(entry->query_offset,
1827 entry->query_len,
1828 qbuffer,
1829 qbuffer_size);
1830
1831 if (qstr)
1832 {
1833 char *enc;
1834
1835 enc = pg_any_to_server(qstr,
1836 entry->query_len,
1837 entry->encoding);
1838
1840
1841 if (enc != qstr)
1842 pfree(enc);
1843 }
1844 else
1845 {
1846 /* Just return a null if we fail to find the text */
1847 nulls[i++] = true;
1848 }
1849 }
1850 else
1851 {
1852 /* Query text not requested */
1853 nulls[i++] = true;
1854 }
1855 }
1856 else
1857 {
1858 /* Don't show queryid */
1859 if (api_version >= PGSS_V1_2)
1860 nulls[i++] = true;
1861
1862 /*
1863 * Don't show query text, but hint as to the reason for not doing
1864 * so if it was requested
1865 */
1866 if (showtext)
1867 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1868 else
1869 nulls[i++] = true;
1870 }
1871
1872 /* copy counters to a local variable to keep locking time short */
1873 SpinLockAcquire(&entry->mutex);
1874 tmp = entry->counters;
1875 SpinLockRelease(&entry->mutex);
1876
1877 /*
1878 * The spinlock is not required when reading these two as they are
1879 * always updated when holding pgss->lock exclusively.
1880 */
1881 stats_since = entry->stats_since;
1882 minmax_stats_since = entry->minmax_stats_since;
1883
1884 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1885 if (IS_STICKY(tmp))
1886 continue;
1887
1888 /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1889 for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1890 {
1891 if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1892 {
1893 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1894 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1895 }
1896
1897 if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1898 api_version >= PGSS_V1_8)
1899 {
1900 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1901 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1902 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1903
1904 /*
1905 * Note we are calculating the population variance here, not
1906 * the sample variance, as we have data for the whole
1907 * population, so Bessel's correction is not used, and we
1908 * don't divide by tmp.calls - 1.
1909 */
1910 if (tmp.calls[kind] > 1)
1911 stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1912 else
1913 stddev = 0.0;
1914 values[i++] = Float8GetDatumFast(stddev);
1915 }
1916 }
1917 values[i++] = Int64GetDatumFast(tmp.rows);
1920 if (api_version >= PGSS_V1_1)
1925 if (api_version >= PGSS_V1_1)
1930 if (api_version >= PGSS_V1_1)
1931 {
1934 }
1935 if (api_version >= PGSS_V1_11)
1936 {
1939 }
1940 if (api_version >= PGSS_V1_10)
1941 {
1944 }
1945 if (api_version >= PGSS_V1_8)
1946 {
1947 char buf[256];
1948 Datum wal_bytes;
1949
1952
1953 snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1954
1955 /* Convert to numeric. */
1956 wal_bytes = DirectFunctionCall3(numeric_in,
1959 Int32GetDatum(-1));
1960 values[i++] = wal_bytes;
1961 }
1962 if (api_version >= PGSS_V1_12)
1963 {
1965 }
1966 if (api_version >= PGSS_V1_10)
1967 {
1976 }
1977 if (api_version >= PGSS_V1_11)
1978 {
1981 }
1982 if (api_version >= PGSS_V1_12)
1983 {
1986 }
1987 if (api_version >= PGSS_V1_11)
1988 {
1989 values[i++] = TimestampTzGetDatum(stats_since);
1990 values[i++] = TimestampTzGetDatum(minmax_stats_since);
1991 }
1992
1993 Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
1994 api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
1995 api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
1996 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
1997 api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
1998 api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
1999 api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2000 api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2001 api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2002 -1 /* fail if you forget to update this assert */ ));
2003
2004 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2005 }
2006
2008
2009 free(qbuffer);
2010}
2011
2012/* Number of output arguments (columns) for pg_stat_statements_info */
2013#define PG_STAT_STATEMENTS_INFO_COLS 2
2014
2015/*
2016 * Return statistics of pg_stat_statements.
2017 */
2018Datum
2020{
2021 pgssGlobalStats stats;
2022 TupleDesc tupdesc;
2024 bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2025
2026 if (!pgss || !pgss_hash)
2027 ereport(ERROR,
2028 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2029 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2030
2031 /* Build a tuple descriptor for our result type */
2032 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2033 elog(ERROR, "return type must be a row type");
2034
2035 /* Read global statistics for pg_stat_statements */
2037 stats = pgss->stats;
2039
2040 values[0] = Int64GetDatum(stats.dealloc);
2042
2044}
2045
2046/*
2047 * Estimate shared memory space needed.
2048 */
2049static Size
2051{
2052 Size size;
2053
2054 size = MAXALIGN(sizeof(pgssSharedState));
2055 size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
2056
2057 return size;
2058}
2059
2060/*
2061 * Allocate a new hashtable entry.
2062 * caller must hold an exclusive lock on pgss->lock
2063 *
2064 * "query" need not be null-terminated; we rely on query_len instead
2065 *
2066 * If "sticky" is true, make the new entry artificially sticky so that it will
2067 * probably still be there when the query finishes execution. We do this by
2068 * giving it a median usage value rather than the normal value. (Strictly
2069 * speaking, query strings are normalized on a best effort basis, though it
2070 * would be difficult to demonstrate this even under artificial conditions.)
2071 *
2072 * Note: despite needing exclusive lock, it's not an error for the target
2073 * entry to already exist. This is because pgss_store releases and
2074 * reacquires lock after failing to find a match; so someone else could
2075 * have made the entry while we waited to get exclusive lock.
2076 */
2077static pgssEntry *
2078entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2079 bool sticky)
2080{
2081 pgssEntry *entry;
2082 bool found;
2083
2084 /* Make space if needed */
2086 entry_dealloc();
2087
2088 /* Find or create an entry with desired hash code */
2089 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2090
2091 if (!found)
2092 {
2093 /* New entry, initialize it */
2094
2095 /* reset the statistics */
2096 memset(&entry->counters, 0, sizeof(Counters));
2097 /* set the appropriate initial usage count */
2098 entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
2099 /* re-initialize the mutex each time ... we assume no one using it */
2100 SpinLockInit(&entry->mutex);
2101 /* ... and don't forget the query text metadata */
2102 Assert(query_len >= 0);
2103 entry->query_offset = query_offset;
2104 entry->query_len = query_len;
2105 entry->encoding = encoding;
2107 entry->minmax_stats_since = entry->stats_since;
2108 }
2109
2110 return entry;
2111}
2112
2113/*
2114 * qsort comparator for sorting into increasing usage order
2115 */
2116static int
2117entry_cmp(const void *lhs, const void *rhs)
2118{
2119 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2120 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2121
2122 if (l_usage < r_usage)
2123 return -1;
2124 else if (l_usage > r_usage)
2125 return +1;
2126 else
2127 return 0;
2128}
2129
2130/*
2131 * Deallocate least-used entries.
2132 *
2133 * Caller must hold an exclusive lock on pgss->lock.
2134 */
2135static void
2137{
2138 HASH_SEQ_STATUS hash_seq;
2139 pgssEntry **entries;
2140 pgssEntry *entry;
2141 int nvictims;
2142 int i;
2143 Size tottextlen;
2144 int nvalidtexts;
2145
2146 /*
2147 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2148 * While we're scanning the table, apply the decay factor to the usage
2149 * values, and update the mean query length.
2150 *
2151 * Note that the mean query length is almost immediately obsolete, since
2152 * we compute it before not after discarding the least-used entries.
2153 * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2154 * making two passes to get a more current result. Likewise, the new
2155 * cur_median_usage includes the entries we're about to zap.
2156 */
2157
2158 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2159
2160 i = 0;
2161 tottextlen = 0;
2162 nvalidtexts = 0;
2163
2164 hash_seq_init(&hash_seq, pgss_hash);
2165 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2166 {
2167 entries[i++] = entry;
2168 /* "Sticky" entries get a different usage decay rate. */
2169 if (IS_STICKY(entry->counters))
2171 else
2173 /* In the mean length computation, ignore dropped texts. */
2174 if (entry->query_len >= 0)
2175 {
2176 tottextlen += entry->query_len + 1;
2177 nvalidtexts++;
2178 }
2179 }
2180
2181 /* Sort into increasing order by usage */
2182 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2183
2184 /* Record the (approximate) median usage */
2185 if (i > 0)
2186 pgss->cur_median_usage = entries[i / 2]->counters.usage;
2187 /* Record the mean query length */
2188 if (nvalidtexts > 0)
2189 pgss->mean_query_len = tottextlen / nvalidtexts;
2190 else
2192
2193 /* Now zap an appropriate fraction of lowest-usage entries */
2194 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2195 nvictims = Min(nvictims, i);
2196
2197 for (i = 0; i < nvictims; i++)
2198 {
2199 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2200 }
2201
2202 pfree(entries);
2203
2204 /* Increment the number of times entries are deallocated */
2206 pgss->stats.dealloc += 1;
2208}
2209
2210/*
2211 * Given a query string (not necessarily null-terminated), allocate a new
2212 * entry in the external query text file and store the string there.
2213 *
2214 * If successful, returns true, and stores the new entry's offset in the file
2215 * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2216 * number of garbage collections that have occurred so far.
2217 *
2218 * On failure, returns false.
2219 *
2220 * At least a shared lock on pgss->lock must be held by the caller, so as
2221 * to prevent a concurrent garbage collection. Share-lock-holding callers
2222 * should pass a gc_count pointer to obtain the number of garbage collections,
2223 * so that they can recheck the count after obtaining exclusive lock to
2224 * detect whether a garbage collection occurred (and removed this entry).
2225 */
2226static bool
2227qtext_store(const char *query, int query_len,
2228 Size *query_offset, int *gc_count)
2229{
2230 Size off;
2231 int fd;
2232
2233 /*
2234 * We use a spinlock to protect extent/n_writers/gc_count, so that
2235 * multiple processes may execute this function concurrently.
2236 */
2238 off = pgss->extent;
2239 pgss->extent += query_len + 1;
2240 pgss->n_writers++;
2241 if (gc_count)
2242 *gc_count = pgss->gc_count;
2244
2245 *query_offset = off;
2246
2247 /*
2248 * Don't allow the file to grow larger than what qtext_load_file can
2249 * (theoretically) handle. This has been seen to be reachable on 32-bit
2250 * platforms.
2251 */
2252 if (unlikely(query_len >= MaxAllocHugeSize - off))
2253 {
2254 errno = EFBIG; /* not quite right, but it'll do */
2255 fd = -1;
2256 goto error;
2257 }
2258
2259 /* Now write the data into the successfully-reserved part of the file */
2260 fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
2261 if (fd < 0)
2262 goto error;
2263
2264 if (pg_pwrite(fd, query, query_len, off) != query_len)
2265 goto error;
2266 if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2267 goto error;
2268
2270
2271 /* Mark our write complete */
2273 pgss->n_writers--;
2275
2276 return true;
2277
2278error:
2279 ereport(LOG,
2281 errmsg("could not write file \"%s\": %m",
2282 PGSS_TEXT_FILE)));
2283
2284 if (fd >= 0)
2286
2287 /* Mark our write complete */
2289 pgss->n_writers--;
2291
2292 return false;
2293}
2294
2295/*
2296 * Read the external query text file into a malloc'd buffer.
2297 *
2298 * Returns NULL (without throwing an error) if unable to read, eg
2299 * file not there or insufficient memory.
2300 *
2301 * On success, the buffer size is also returned into *buffer_size.
2302 *
2303 * This can be called without any lock on pgss->lock, but in that case
2304 * the caller is responsible for verifying that the result is sane.
2305 */
2306static char *
2308{
2309 char *buf;
2310 int fd;
2311 struct stat stat;
2312 Size nread;
2313
2315 if (fd < 0)
2316 {
2317 if (errno != ENOENT)
2318 ereport(LOG,
2320 errmsg("could not read file \"%s\": %m",
2321 PGSS_TEXT_FILE)));
2322 return NULL;
2323 }
2324
2325 /* Get file length */
2326 if (fstat(fd, &stat))
2327 {
2328 ereport(LOG,
2330 errmsg("could not stat file \"%s\": %m",
2331 PGSS_TEXT_FILE)));
2333 return NULL;
2334 }
2335
2336 /* Allocate buffer; beware that off_t might be wider than size_t */
2338 buf = (char *) malloc(stat.st_size);
2339 else
2340 buf = NULL;
2341 if (buf == NULL)
2342 {
2343 ereport(LOG,
2344 (errcode(ERRCODE_OUT_OF_MEMORY),
2345 errmsg("out of memory"),
2346 errdetail("Could not allocate enough memory to read file \"%s\".",
2347 PGSS_TEXT_FILE)));
2349 return NULL;
2350 }
2351
2352 /*
2353 * OK, slurp in the file. Windows fails if we try to read more than
2354 * INT_MAX bytes at once, and other platforms might not like that either,
2355 * so read a very large file in 1GB segments.
2356 */
2357 nread = 0;
2358 while (nread < stat.st_size)
2359 {
2360 int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2361
2362 /*
2363 * If we get a short read and errno doesn't get set, the reason is
2364 * probably that garbage collection truncated the file since we did
2365 * the fstat(), so we don't log a complaint --- but we don't return
2366 * the data, either, since it's most likely corrupt due to concurrent
2367 * writes from garbage collection.
2368 */
2369 errno = 0;
2370 if (read(fd, buf + nread, toread) != toread)
2371 {
2372 if (errno)
2373 ereport(LOG,
2375 errmsg("could not read file \"%s\": %m",
2376 PGSS_TEXT_FILE)));
2377 free(buf);
2379 return NULL;
2380 }
2381 nread += toread;
2382 }
2383
2384 if (CloseTransientFile(fd) != 0)
2385 ereport(LOG,
2387 errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2388
2389 *buffer_size = nread;
2390 return buf;
2391}
2392
2393/*
2394 * Locate a query text in the file image previously read by qtext_load_file().
2395 *
2396 * We validate the given offset/length, and return NULL if bogus. Otherwise,
2397 * the result points to a null-terminated string within the buffer.
2398 */
2399static char *
2400qtext_fetch(Size query_offset, int query_len,
2401 char *buffer, Size buffer_size)
2402{
2403 /* File read failed? */
2404 if (buffer == NULL)
2405 return NULL;
2406 /* Bogus offset/length? */
2407 if (query_len < 0 ||
2408 query_offset + query_len >= buffer_size)
2409 return NULL;
2410 /* As a further sanity check, make sure there's a trailing null */
2411 if (buffer[query_offset + query_len] != '\0')
2412 return NULL;
2413 /* Looks OK */
2414 return buffer + query_offset;
2415}
2416
2417/*
2418 * Do we need to garbage-collect the external query text file?
2419 *
2420 * Caller should hold at least a shared lock on pgss->lock.
2421 */
2422static bool
2424{
2425 Size extent;
2426
2427 /* Read shared extent pointer */
2429 extent = pgss->extent;
2431
2432 /*
2433 * Don't proceed if file does not exceed 512 bytes per possible entry.
2434 *
2435 * Here and in the next test, 32-bit machines have overflow hazards if
2436 * pgss_max and/or mean_query_len are large. Force the multiplications
2437 * and comparisons to be done in uint64 arithmetic to forestall trouble.
2438 */
2439 if ((uint64) extent < (uint64) 512 * pgss_max)
2440 return false;
2441
2442 /*
2443 * Don't proceed if file is less than about 50% bloat. Nothing can or
2444 * should be done in the event of unusually large query texts accounting
2445 * for file's large size. We go to the trouble of maintaining the mean
2446 * query length in order to prevent garbage collection from thrashing
2447 * uselessly.
2448 */
2449 if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2450 return false;
2451
2452 return true;
2453}
2454
2455/*
2456 * Garbage-collect orphaned query texts in external file.
2457 *
2458 * This won't be called often in the typical case, since it's likely that
2459 * there won't be too much churn, and besides, a similar compaction process
2460 * occurs when serializing to disk at shutdown or as part of resetting.
2461 * Despite this, it seems prudent to plan for the edge case where the file
2462 * becomes unreasonably large, with no other method of compaction likely to
2463 * occur in the foreseeable future.
2464 *
2465 * The caller must hold an exclusive lock on pgss->lock.
2466 *
2467 * At the first sign of trouble we unlink the query text file to get a clean
2468 * slate (although existing statistics are retained), rather than risk
2469 * thrashing by allowing the same problem case to recur indefinitely.
2470 */
2471static void
2473{
2474 char *qbuffer;
2475 Size qbuffer_size;
2476 FILE *qfile = NULL;
2477 HASH_SEQ_STATUS hash_seq;
2478 pgssEntry *entry;
2479 Size extent;
2480 int nentries;
2481
2482 /*
2483 * When called from pgss_store, some other session might have proceeded
2484 * with garbage collection in the no-lock-held interim of lock strength
2485 * escalation. Check once more that this is actually necessary.
2486 */
2487 if (!need_gc_qtexts())
2488 return;
2489
2490 /*
2491 * Load the old texts file. If we fail (out of memory, for instance),
2492 * invalidate query texts. Hopefully this is rare. It might seem better
2493 * to leave things alone on an OOM failure, but the problem is that the
2494 * file is only going to get bigger; hoping for a future non-OOM result is
2495 * risky and can easily lead to complete denial of service.
2496 */
2497 qbuffer = qtext_load_file(&qbuffer_size);
2498 if (qbuffer == NULL)
2499 goto gc_fail;
2500
2501 /*
2502 * We overwrite the query texts file in place, so as to reduce the risk of
2503 * an out-of-disk-space failure. Since the file is guaranteed not to get
2504 * larger, this should always work on traditional filesystems; though we
2505 * could still lose on copy-on-write filesystems.
2506 */
2508 if (qfile == NULL)
2509 {
2510 ereport(LOG,
2512 errmsg("could not write file \"%s\": %m",
2513 PGSS_TEXT_FILE)));
2514 goto gc_fail;
2515 }
2516
2517 extent = 0;
2518 nentries = 0;
2519
2520 hash_seq_init(&hash_seq, pgss_hash);
2521 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2522 {
2523 int query_len = entry->query_len;
2524 char *qry = qtext_fetch(entry->query_offset,
2525 query_len,
2526 qbuffer,
2527 qbuffer_size);
2528
2529 if (qry == NULL)
2530 {
2531 /* Trouble ... drop the text */
2532 entry->query_offset = 0;
2533 entry->query_len = -1;
2534 /* entry will not be counted in mean query length computation */
2535 continue;
2536 }
2537
2538 if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2539 {
2540 ereport(LOG,
2542 errmsg("could not write file \"%s\": %m",
2543 PGSS_TEXT_FILE)));
2544 hash_seq_term(&hash_seq);
2545 goto gc_fail;
2546 }
2547
2548 entry->query_offset = extent;
2549 extent += query_len + 1;
2550 nentries++;
2551 }
2552
2553 /*
2554 * Truncate away any now-unused space. If this fails for some odd reason,
2555 * we log it, but there's no need to fail.
2556 */
2557 if (ftruncate(fileno(qfile), extent) != 0)
2558 ereport(LOG,
2560 errmsg("could not truncate file \"%s\": %m",
2561 PGSS_TEXT_FILE)));
2562
2563 if (FreeFile(qfile))
2564 {
2565 ereport(LOG,
2567 errmsg("could not write file \"%s\": %m",
2568 PGSS_TEXT_FILE)));
2569 qfile = NULL;
2570 goto gc_fail;
2571 }
2572
2573 elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2574 pgss->extent, extent);
2575
2576 /* Reset the shared extent pointer */
2577 pgss->extent = extent;
2578
2579 /*
2580 * Also update the mean query length, to be sure that need_gc_qtexts()
2581 * won't still think we have a problem.
2582 */
2583 if (nentries > 0)
2584 pgss->mean_query_len = extent / nentries;
2585 else
2587
2588 free(qbuffer);
2589
2590 /*
2591 * OK, count a garbage collection cycle. (Note: even though we have
2592 * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2593 * other processes may examine gc_count while holding only the mutex.
2594 * Also, we have to advance the count *after* we've rewritten the file,
2595 * else other processes might not realize they read a stale file.)
2596 */
2598
2599 return;
2600
2601gc_fail:
2602 /* clean up resources */
2603 if (qfile)
2604 FreeFile(qfile);
2605 free(qbuffer);
2606
2607 /*
2608 * Since the contents of the external file are now uncertain, mark all
2609 * hashtable entries as having invalid texts.
2610 */
2611 hash_seq_init(&hash_seq, pgss_hash);
2612 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2613 {
2614 entry->query_offset = 0;
2615 entry->query_len = -1;
2616 }
2617
2618 /*
2619 * Destroy the query text file and create a new, empty one
2620 */
2621 (void) unlink(PGSS_TEXT_FILE);
2623 if (qfile == NULL)
2624 ereport(LOG,
2626 errmsg("could not recreate file \"%s\": %m",
2627 PGSS_TEXT_FILE)));
2628 else
2629 FreeFile(qfile);
2630
2631 /* Reset the shared extent pointer */
2632 pgss->extent = 0;
2633
2634 /* Reset mean_query_len to match the new state */
2636
2637 /*
2638 * Bump the GC count even though we failed.
2639 *
2640 * This is needed to make concurrent readers of file without any lock on
2641 * pgss->lock notice existence of new version of file. Once readers
2642 * subsequently observe a change in GC count with pgss->lock held, that
2643 * forces a safe reopen of file. Writers also require that we bump here,
2644 * of course. (As required by locking protocol, readers and writers don't
2645 * trust earlier file contents until gc_count is found unchanged after
2646 * pgss->lock acquired in shared or exclusive mode respectively.)
2647 */
2649}
2650
2651#define SINGLE_ENTRY_RESET(e) \
2652if (e) { \
2653 if (minmax_only) { \
2654 /* When requested reset only min/max statistics of an entry */ \
2655 for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2656 { \
2657 e->counters.max_time[kind] = 0; \
2658 e->counters.min_time[kind] = 0; \
2659 } \
2660 e->minmax_stats_since = stats_reset; \
2661 } \
2662 else \
2663 { \
2664 /* Remove the key otherwise */ \
2665 hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2666 num_remove++; \
2667 } \
2668}
2669
2670/*
2671 * Reset entries corresponding to parameters passed.
2672 */
2673static TimestampTz
2674entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only)
2675{
2676 HASH_SEQ_STATUS hash_seq;
2677 pgssEntry *entry;
2678 FILE *qfile;
2679 long num_entries;
2680 long num_remove = 0;
2682 TimestampTz stats_reset;
2683
2684 if (!pgss || !pgss_hash)
2685 ereport(ERROR,
2686 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2687 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2688
2690 num_entries = hash_get_num_entries(pgss_hash);
2691
2692 stats_reset = GetCurrentTimestamp();
2693
2694 if (userid != 0 && dbid != 0 && queryid != UINT64CONST(0))
2695 {
2696 /* If all the parameters are available, use the fast path. */
2697 memset(&key, 0, sizeof(pgssHashKey));
2698 key.userid = userid;
2699 key.dbid = dbid;
2700 key.queryid = queryid;
2701
2702 /*
2703 * Reset the entry if it exists, starting with the non-top-level
2704 * entry.
2705 */
2706 key.toplevel = false;
2707 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2708
2709 SINGLE_ENTRY_RESET(entry);
2710
2711 /* Also reset the top-level entry if it exists. */
2712 key.toplevel = true;
2713 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2714
2715 SINGLE_ENTRY_RESET(entry);
2716 }
2717 else if (userid != 0 || dbid != 0 || queryid != UINT64CONST(0))
2718 {
2719 /* Reset entries corresponding to valid parameters. */
2720 hash_seq_init(&hash_seq, pgss_hash);
2721 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2722 {
2723 if ((!userid || entry->key.userid == userid) &&
2724 (!dbid || entry->key.dbid == dbid) &&
2725 (!queryid || entry->key.queryid == queryid))
2726 {
2727 SINGLE_ENTRY_RESET(entry);
2728 }
2729 }
2730 }
2731 else
2732 {
2733 /* Reset all entries. */
2734 hash_seq_init(&hash_seq, pgss_hash);
2735 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2736 {
2737 SINGLE_ENTRY_RESET(entry);
2738 }
2739 }
2740
2741 /* All entries are removed? */
2742 if (num_entries != num_remove)
2743 goto release_lock;
2744
2745 /*
2746 * Reset global statistics for pg_stat_statements since all entries are
2747 * removed.
2748 */
2750 pgss->stats.dealloc = 0;
2751 pgss->stats.stats_reset = stats_reset;
2753
2754 /*
2755 * Write new empty query file, perhaps even creating a new one to recover
2756 * if the file was missing.
2757 */
2759 if (qfile == NULL)
2760 {
2761 ereport(LOG,
2763 errmsg("could not create file \"%s\": %m",
2764 PGSS_TEXT_FILE)));
2765 goto done;
2766 }
2767
2768 /* If ftruncate fails, log it, but it's not a fatal problem */
2769 if (ftruncate(fileno(qfile), 0) != 0)
2770 ereport(LOG,
2772 errmsg("could not truncate file \"%s\": %m",
2773 PGSS_TEXT_FILE)));
2774
2775 FreeFile(qfile);
2776
2777done:
2778 pgss->extent = 0;
2779 /* This counts as a query text garbage collection for our purposes */
2781
2782release_lock:
2784
2785 return stats_reset;
2786}
2787
2788/*
2789 * Generate a normalized version of the query string that will be used to
2790 * represent all similar queries.
2791 *
2792 * Note that the normalized representation may well vary depending on
2793 * just which "equivalent" query is used to create the hashtable entry.
2794 * We assume this is OK.
2795 *
2796 * If query_loc > 0, then "query" has been advanced by that much compared to
2797 * the original string start, so we need to translate the provided locations
2798 * to compensate. (This lets us avoid re-scanning statements before the one
2799 * of interest, so it's worth doing.)
2800 *
2801 * *query_len_p contains the input string length, and is updated with
2802 * the result string length on exit. The resulting string might be longer
2803 * or shorter depending on what happens with replacement of constants.
2804 *
2805 * Returns a palloc'd string.
2806 */
2807static char *
2808generate_normalized_query(JumbleState *jstate, const char *query,
2809 int query_loc, int *query_len_p)
2810{
2811 char *norm_query;
2812 int query_len = *query_len_p;
2813 int i,
2814 norm_query_buflen, /* Space allowed for norm_query */
2815 len_to_wrt, /* Length (in bytes) to write */
2816 quer_loc = 0, /* Source query byte location */
2817 n_quer_loc = 0, /* Normalized query byte location */
2818 last_off = 0, /* Offset from start for previous tok */
2819 last_tok_len = 0; /* Length (in bytes) of that tok */
2820 bool in_squashed = false; /* in a run of squashed consts? */
2821 int skipped_constants = 0; /* Position adjustment of later
2822 * constants after squashed ones */
2823
2824
2825 /*
2826 * Get constants' lengths (core system only gives us locations). Note
2827 * this also ensures the items are sorted by location.
2828 */
2829 fill_in_constant_lengths(jstate, query, query_loc);
2830
2831 /*
2832 * Allow for $n symbols to be longer than the constants they replace.
2833 * Constants must take at least one byte in text form, while a $n symbol
2834 * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2835 * could refine that limit based on the max value of n for the current
2836 * query, but it hardly seems worth any extra effort to do so.
2837 *
2838 * Note this also gives enough room for the commented-out ", ..." list
2839 * syntax used by constant squashing.
2840 */
2841 norm_query_buflen = query_len + jstate->clocations_count * 10;
2842
2843 /* Allocate result buffer */
2844 norm_query = palloc(norm_query_buflen + 1);
2845
2846 for (i = 0; i < jstate->clocations_count; i++)
2847 {
2848 int off, /* Offset from start for cur tok */
2849 tok_len; /* Length (in bytes) of that tok */
2850
2851 off = jstate->clocations[i].location;
2852
2853 /* Adjust recorded location if we're dealing with partial string */
2854 off -= query_loc;
2855
2856 tok_len = jstate->clocations[i].length;
2857
2858 if (tok_len < 0)
2859 continue; /* ignore any duplicates */
2860
2861 /*
2862 * What to do next depends on whether we're squashing constant lists,
2863 * and whether we're already in a run of such constants.
2864 */
2865 if (!jstate->clocations[i].squashed)
2866 {
2867 /*
2868 * This location corresponds to a constant not to be squashed.
2869 * Print what comes before the constant ...
2870 */
2871 len_to_wrt = off - last_off;
2872 len_to_wrt -= last_tok_len;
2873
2874 Assert(len_to_wrt >= 0);
2875
2876 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2877 n_quer_loc += len_to_wrt;
2878
2879 /* ... and then a param symbol replacing the constant itself */
2880 n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
2881 i + 1 + jstate->highest_extern_param_id - skipped_constants);
2882
2883 /* In case previous constants were merged away, stop doing that */
2884 in_squashed = false;
2885 }
2886 else if (!in_squashed)
2887 {
2888 /*
2889 * This location is the start position of a run of constants to be
2890 * squashed, so we need to print the representation of starting a
2891 * group of stashed constants.
2892 *
2893 * Print what comes before the constant ...
2894 */
2895 len_to_wrt = off - last_off;
2896 len_to_wrt -= last_tok_len;
2897 Assert(len_to_wrt >= 0);
2898 Assert(i + 1 < jstate->clocations_count);
2899 Assert(jstate->clocations[i + 1].squashed);
2900 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2901 n_quer_loc += len_to_wrt;
2902
2903 /* ... and then start a run of squashed constants */
2904 n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */",
2905 i + 1 + jstate->highest_extern_param_id - skipped_constants);
2906
2907 /* The next location will match the block below, to end the run */
2908 in_squashed = true;
2909
2910 skipped_constants++;
2911 }
2912 else
2913 {
2914 /*
2915 * The second location of a run of squashable elements; this
2916 * indicates its end.
2917 */
2918 in_squashed = false;
2919 }
2920
2921 /* Otherwise the constant is squashed away -- move forward */
2922 quer_loc = off + tok_len;
2923 last_off = off;
2924 last_tok_len = tok_len;
2925 }
2926
2927 /*
2928 * We've copied up until the last ignorable constant. Copy over the
2929 * remaining bytes of the original query string.
2930 */
2931 len_to_wrt = query_len - quer_loc;
2932
2933 Assert(len_to_wrt >= 0);
2934 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2935 n_quer_loc += len_to_wrt;
2936
2937 Assert(n_quer_loc <= norm_query_buflen);
2938 norm_query[n_quer_loc] = '\0';
2939
2940 *query_len_p = n_quer_loc;
2941 return norm_query;
2942}
2943
2944/*
2945 * Given a valid SQL string and an array of constant-location records,
2946 * fill in the textual lengths of those constants.
2947 *
2948 * The constants may use any allowed constant syntax, such as float literals,
2949 * bit-strings, single-quoted strings and dollar-quoted strings. This is
2950 * accomplished by using the public API for the core scanner.
2951 *
2952 * It is the caller's job to ensure that the string is a valid SQL statement
2953 * with constants at the indicated locations. Since in practice the string
2954 * has already been parsed, and the locations that the caller provides will
2955 * have originated from within the authoritative parser, this should not be
2956 * a problem.
2957 *
2958 * Duplicate constant pointers are possible, and will have their lengths
2959 * marked as '-1', so that they are later ignored. (Actually, we assume the
2960 * lengths were initialized as -1 to start with, and don't change them here.)
2961 *
2962 * If query_loc > 0, then "query" has been advanced by that much compared to
2963 * the original string start, so we need to translate the provided locations
2964 * to compensate. (This lets us avoid re-scanning statements before the one
2965 * of interest, so it's worth doing.)
2966 *
2967 * N.B. There is an assumption that a '-' character at a Const location begins
2968 * a negative numeric constant. This precludes there ever being another
2969 * reason for a constant to start with a '-'.
2970 */
2971static void
2972fill_in_constant_lengths(JumbleState *jstate, const char *query,
2973 int query_loc)
2974{
2975 LocationLen *locs;
2976 core_yyscan_t yyscanner;
2978 core_YYSTYPE yylval;
2980 int last_loc = -1;
2981 int i;
2982
2983 /*
2984 * Sort the records by location so that we can process them in order while
2985 * scanning the query text.
2986 */
2987 if (jstate->clocations_count > 1)
2988 qsort(jstate->clocations, jstate->clocations_count,
2989 sizeof(LocationLen), comp_location);
2990 locs = jstate->clocations;
2991
2992 /* initialize the flex scanner --- should match raw_parser() */
2993 yyscanner = scanner_init(query,
2994 &yyextra,
2995 &ScanKeywords,
2997
2998 /* we don't want to re-emit any escape string warnings */
2999 yyextra.escape_string_warning = false;
3000
3001 /* Search for each constant, in sequence */
3002 for (i = 0; i < jstate->clocations_count; i++)
3003 {
3004 int loc = locs[i].location;
3005 int tok;
3006
3007 /* Adjust recorded location if we're dealing with partial string */
3008 loc -= query_loc;
3009
3010 Assert(loc >= 0);
3011
3012 if (loc <= last_loc)
3013 continue; /* Duplicate constant, ignore */
3014
3015 /* Lex tokens until we find the desired constant */
3016 for (;;)
3017 {
3018 tok = core_yylex(&yylval, &yylloc, yyscanner);
3019
3020 /* We should not hit end-of-string, but if we do, behave sanely */
3021 if (tok == 0)
3022 break; /* out of inner for-loop */
3023
3024 /*
3025 * We should find the token position exactly, but if we somehow
3026 * run past it, work with that.
3027 */
3028 if (yylloc >= loc)
3029 {
3030 if (query[loc] == '-')
3031 {
3032 /*
3033 * It's a negative value - this is the one and only case
3034 * where we replace more than a single token.
3035 *
3036 * Do not compensate for the core system's special-case
3037 * adjustment of location to that of the leading '-'
3038 * operator in the event of a negative constant. It is
3039 * also useful for our purposes to start from the minus
3040 * symbol. In this way, queries like "select * from foo
3041 * where bar = 1" and "select * from foo where bar = -2"
3042 * will have identical normalized query strings.
3043 */
3044 tok = core_yylex(&yylval, &yylloc, yyscanner);
3045 if (tok == 0)
3046 break; /* out of inner for-loop */
3047 }
3048
3049 /*
3050 * We now rely on the assumption that flex has placed a zero
3051 * byte after the text of the current token in scanbuf.
3052 */
3053 locs[i].length = strlen(yyextra.scanbuf + loc);
3054 break; /* out of inner for-loop */
3055 }
3056 }
3057
3058 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
3059 if (tok == 0)
3060 break;
3061
3062 last_loc = loc;
3063 }
3064
3065 scanner_finish(yyscanner);
3066}
3067
3068/*
3069 * comp_location: comparator for qsorting LocationLen structs by location
3070 */
3071static int
3072comp_location(const void *a, const void *b)
3073{
3074 int l = ((const LocationLen *) a)->location;
3075 int r = ((const LocationLen *) b)->location;
3076
3077 return pg_cmp_s32(l, r);
3078}
bool has_privs_of_role(Oid member, Oid role)
Definition: acl.c:5268
void(* post_parse_analyze_hook_type)(ParseState *pstate, Query *query, JumbleState *jstate)
Definition: analyze.h:22
Datum numeric_in(PG_FUNCTION_ARGS)
Definition: numeric.c:637
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define CStringGetTextDatum(s)
Definition: builtins.h:97
#define Min(x, y)
Definition: c.h:975
#define PG_BINARY_R
Definition: c.h:1246
#define MAXALIGN(LEN)
Definition: c.h:782
#define Max(x, y)
Definition: c.h:969
int64_t int64
Definition: c.h:499
#define PG_BINARY
Definition: c.h:1244
#define UINT64_FORMAT
Definition: c.h:521
int32_t int32
Definition: c.h:498
uint64_t uint64
Definition: c.h:503
#define unlikely(x)
Definition: c.h:347
uint32_t uint32
Definition: c.h:502
#define PG_BINARY_W
Definition: c.h:1247
#define UINT64CONST(x)
Definition: c.h:517
size_t Size
Definition: c.h:576
enc
int64 TimestampTz
Definition: timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:956
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1421
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1515
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1342
Size hash_estimate_size(long num_entries, Size entrysize)
Definition: dynahash.c:784
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1386
int errcode_for_file_access(void)
Definition: elog.c:877
int errdetail(const char *fmt,...)
Definition: elog.c:1204
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define LOG
Definition: elog.h:31
#define PG_TRY(...)
Definition: elog.h:371
#define PG_END_TRY(...)
Definition: elog.h:396
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define PG_FINALLY(...)
Definition: elog.h:388
#define ereport(elevel,...)
Definition: elog.h:149
ExecutorEnd_hook_type ExecutorEnd_hook
Definition: execMain.c:71
ExecutorFinish_hook_type ExecutorFinish_hook
Definition: execMain.c:70
ExecutorStart_hook_type ExecutorStart_hook
Definition: execMain.c:68
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition: execMain.c:142
ExecutorRun_hook_type ExecutorRun_hook
Definition: execMain.c:69
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition: execMain.c:308
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition: execMain.c:476
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition: execMain.c:416
void(* ExecutorFinish_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:85
void(* ExecutorRun_hook_type)(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition: executor.h:79
void(* ExecutorStart_hook_type)(QueryDesc *queryDesc, int eflags)
Definition: executor.h:75
void(* ExecutorEnd_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:89
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:782
int CloseTransientFile(int fd)
Definition: fd.c:2871
int FreeFile(FILE *file)
Definition: fd.c:2843
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2644
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2694
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:686
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
bool IsUnderPostmaster
Definition: globals.c:120
Oid MyDatabaseId
Definition: globals.c:94
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5244
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5133
void MarkGUCPrefixReserved(const char *className)
Definition: guc.c:5280
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5159
@ PGC_SUSET
Definition: guc.h:78
@ PGC_POSTMASTER
Definition: guc.h:74
@ PGC_SIGHUP
Definition: guc.h:75
Assert(PointerIsAligned(start, uint64))
return str start
#define free(a)
Definition: header.h:65
#define malloc(a)
Definition: header.h:50
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MILLISEC(t)
Definition: instr_time.h:191
void InstrEndLoop(Instrumentation *instr)
Definition: instrument.c:140
Instrumentation * InstrAlloc(int n, int instrument_options, bool async_mode)
Definition: instrument.c:31
WalUsage pgWalUsage
Definition: instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:287
BufferUsage pgBufferUsage
Definition: instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:248
@ INSTRUMENT_ALL
Definition: instrument.h:66
static int pg_cmp_s32(int32 a, int32 b)
Definition: int.h:646
#define read(a, b, c)
Definition: win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
void(* shmem_startup_hook_type)(void)
Definition: ipc.h:22
shmem_startup_hook_type shmem_startup_hook
Definition: ipci.c:58
void RequestAddinShmemSpace(Size size)
Definition: ipci.c:74
int b
Definition: isn.c:74
int a
Definition: isn.c:73
int i
Definition: isn.c:77
PGDLLIMPORT const ScanKeywordList ScanKeywords
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1180
LWLockPadded * GetNamedLWLockTranche(const char *tranche_name)
Definition: lwlock.c:585
void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
Definition: lwlock.c:682
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1900
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
int GetDatabaseEncoding(void)
Definition: mbutils.c:1262
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:677
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1548
void pfree(void *pointer)
Definition: mcxt.c:1528
void * palloc(Size size)
Definition: mcxt.c:1321
#define MaxAllocHugeSize
Definition: memutils.h:45
void(* shmem_request_hook_type)(void)
Definition: miscadmin.h:532
Oid GetUserId(void)
Definition: miscinit.c:520
shmem_request_hook_type shmem_request_hook
Definition: miscinit.c:1840
bool process_shared_preload_libraries_in_progress
Definition: miscinit.c:1837
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
post_parse_analyze_hook_type post_parse_analyze_hook
Definition: analyze.c:59
void * arg
const void size_t len
int32 encoding
Definition: pg_database.h:41
static int entry_cmp(const void *lhs, const void *rhs)
#define PG_STAT_STATEMENTS_COLS_V1_0
static planner_hook_type prev_planner_hook
@ PGSS_V1_9
@ PGSS_V1_10
@ PGSS_V1_12
@ PGSS_V1_1
@ PGSS_V1_11
@ PGSS_V1_3
@ PGSS_V1_2
@ PGSS_V1_8
@ PGSS_V1_0
#define SINGLE_ENTRY_RESET(e)
static int pgss_track
static bool pgss_track_planning
#define ASSUMED_MEDIAN_INIT
#define PG_STAT_STATEMENTS_INFO_COLS
PG_FUNCTION_INFO_V1(pg_stat_statements_reset)
static ExecutorRun_hook_type prev_ExecutorRun
struct pgssSharedState pgssSharedState
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define record_gc_qtexts()
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
void _PG_init(void)
static void gc_qtexts(void)
static void pgss_store(const char *query, uint64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched)
#define PG_STAT_STATEMENTS_COLS_V1_8
static int comp_location(const void *a, const void *b)
Datum pg_stat_statements_1_11(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS
struct Counters Counters
PG_MODULE_MAGIC_EXT(.name="pg_stat_statements",.version=PG_VERSION)
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
#define PGSS_TEXT_FILE
PGSSTrackLevel
@ PGSS_TRACK_ALL
@ PGSS_TRACK_NONE
@ PGSS_TRACK_TOP
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
static int pgss_max
#define USAGE_DEALLOC_PERCENT
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
Datum pg_stat_statements_1_10(PG_FUNCTION_ARGS)
#define USAGE_EXEC(duration)
#define PG_STAT_STATEMENTS_COLS_V1_11
#define STICKY_DECREASE_FACTOR
#define IS_STICKY(c)
static const struct config_enum_entry track_options[]
#define PG_STAT_STATEMENTS_COLS_V1_2
#define PG_STAT_STATEMENTS_COLS_V1_12
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
#define PGSS_DUMP_FILE
static char * qtext_load_file(Size *buffer_size)
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
static bool need_gc_qtexts(void)
#define pgss_enabled(level)
static shmem_startup_hook_type prev_shmem_startup_hook
static shmem_request_hook_type prev_shmem_request_hook
static void pgss_shmem_request(void)
static TimestampTz entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only)
pgssStoreKind
@ PGSS_PLAN
@ PGSS_EXEC
@ PGSS_INVALID
#define ASSUMED_LENGTH_INIT
#define PG_STAT_STATEMENTS_COLS_V1_3
static Size pgss_memsize(void)
#define PGSS_NUMKIND
static bool pgss_save
static void pgss_shmem_startup(void)
static int nesting_level
struct pgssGlobalStats pgssGlobalStats
static const uint32 PGSS_PG_MAJOR_VERSION
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
struct pgssEntry pgssEntry
#define USAGE_DECREASE_FACTOR
static ExecutorStart_hook_type prev_ExecutorStart
Datum pg_stat_statements(PG_FUNCTION_ARGS)
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
static void entry_dealloc(void)
#define PG_STAT_STATEMENTS_COLS_V1_10
static pgssSharedState * pgss
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
static ProcessUtility_hook_type prev_ProcessUtility
#define PG_STAT_STATEMENTS_COLS_V1_1
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
struct pgssHashKey pgssHashKey
Datum pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
static void fill_in_constant_lengths(JumbleState *jstate, const char *query, int query_loc)
static bool pgss_track_utility
#define USAGE_INIT
Datum pg_stat_statements_1_12(PG_FUNCTION_ARGS)
static ExecutorEnd_hook_type prev_ExecutorEnd
#define PG_STAT_STATEMENTS_COLS_V1_9
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
static char * generate_normalized_query(JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static HTAB * pgss_hash
static const uint32 PGSS_FILE_HEADER
static void pgss_shmem_shutdown(int code, Datum arg)
static ExecutorFinish_hook_type prev_ExecutorFinish
static char * buf
Definition: pg_test_fsync.c:72
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:281
static int duration
Definition: pgbench.c:175
planner_hook_type planner_hook
Definition: planner.c:73
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.c:302
PlannedStmt *(* planner_hook_type)(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams)
Definition: planner.h:26
#define sprintf
Definition: port.h:241
#define pg_pwrite
Definition: port.h:227
#define snprintf
Definition: port.h:239
#define qsort(a, b, c, d)
Definition: port.h:479
#define Int64GetDatumFast(X)
Definition: postgres.h:559
uintptr_t Datum
Definition: postgres.h:69
#define Float8GetDatumFast(X)
Definition: postgres.h:561
static Datum BoolGetDatum(bool X)
Definition: postgres.h:107
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:257
static Datum CStringGetDatum(const char *X)
Definition: postgres.h:355
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:217
unsigned int Oid
Definition: postgres_ext.h:30
static int fd(const char *x, int i)
Definition: preproc-init.c:105
const char * YYLTYPE
const char * CleanQuerytext(const char *query, int *location, int *len)
void EnableQueryId(void)
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:717
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeywordList *keywordlist, const uint16 *keyword_tokens)
Definition: scan.l:1249
#define yylloc
Definition: scan.l:1122
void scanner_finish(core_yyscan_t yyscanner)
Definition: scan.l:1291
#define yyextra
Definition: scan.l:1118
const uint16 ScanKeywordTokens[]
Definition: scan.l:81
void * core_yyscan_t
Definition: scanner.h:121
int core_yylex(core_YYSTYPE *yylval_param, YYLTYPE *yylloc_param, core_yyscan_t yyscanner)
ScanDirection
Definition: sdir.h:25
HTAB * ShmemInitHash(const char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags)
Definition: shmem.c:332
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
static void error(void)
Definition: sql-dyntest.c:147
instr_time local_blk_read_time
Definition: instrument.h:38
int64 shared_blks_dirtied
Definition: instrument.h:28
int64 local_blks_hit
Definition: instrument.h:30
instr_time temp_blk_write_time
Definition: instrument.h:41
instr_time shared_blk_read_time
Definition: instrument.h:36
instr_time shared_blk_write_time
Definition: instrument.h:37
int64 local_blks_written
Definition: instrument.h:33
instr_time temp_blk_read_time
Definition: instrument.h:40
instr_time local_blk_write_time
Definition: instrument.h:39
int64 temp_blks_read
Definition: instrument.h:34
int64 shared_blks_read
Definition: instrument.h:27
int64 shared_blks_written
Definition: instrument.h:29
int64 temp_blks_written
Definition: instrument.h:35
int64 local_blks_read
Definition: instrument.h:31
int64 local_blks_dirtied
Definition: instrument.h:32
int64 shared_blks_hit
Definition: instrument.h:26
int64 temp_blks_written
int64 calls[PGSS_NUMKIND]
int64 wal_buffers_full
int64 parallel_workers_launched
int64 shared_blks_written
double jit_generation_time
int64 temp_blks_read
double min_time[PGSS_NUMKIND]
int64 local_blks_written
double sum_var_time[PGSS_NUMKIND]
double temp_blk_read_time
double local_blk_write_time
int64 jit_emission_count
int64 jit_deform_count
double jit_emission_time
int64 shared_blks_hit
double local_blk_read_time
double jit_optimization_time
double shared_blk_write_time
int64 jit_optimization_count
double total_time[PGSS_NUMKIND]
double max_time[PGSS_NUMKIND]
int64 shared_blks_dirtied
double mean_time[PGSS_NUMKIND]
double temp_blk_write_time
int64 local_blks_dirtied
int64 jit_inlining_count
int64 shared_blks_read
int64 local_blks_hit
double jit_deform_time
int64 parallel_workers_to_launch
int64 local_blks_read
double shared_blk_read_time
double jit_inlining_time
int es_parallel_workers_to_launch
Definition: execnodes.h:742
struct JitContext * es_jit
Definition: execnodes.h:760
uint64 es_total_processed
Definition: execnodes.h:712
MemoryContext es_query_cxt
Definition: execnodes.h:706
int es_parallel_workers_launched
Definition: execnodes.h:744
fmNodePtr resultinfo
Definition: fmgr.h:89
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
Definition: dynahash.c:220
WalUsage walusage
Definition: instrument.h:93
BufferUsage bufusage
Definition: instrument.h:92
JitInstrumentation instr
Definition: jit.h:62
instr_time generation_counter
Definition: jit.h:33
size_t created_functions
Definition: jit.h:30
instr_time optimization_counter
Definition: jit.h:42
instr_time deform_counter
Definition: jit.h:36
instr_time emission_counter
Definition: jit.h:45
instr_time inlining_counter
Definition: jit.h:39
int highest_extern_param_id
Definition: queryjumble.h:56
LocationLen * clocations
Definition: queryjumble.h:47
int clocations_count
Definition: queryjumble.h:53
Definition: lwlock.h:42
bool squashed
Definition: queryjumble.h:31
Definition: nodes.h:135
const char * p_sourcetext
Definition: parse_node.h:209
ParseLoc stmt_len
Definition: plannodes.h:138
ParseLoc stmt_location
Definition: plannodes.h:136
Node * utilityStmt
Definition: plannodes.h:132
uint64 queryId
Definition: plannodes.h:56
uint64 nprocessed
Definition: cmdtag.h:32
CommandTag commandTag
Definition: cmdtag.h:31
const char * sourceText
Definition: execdesc.h:38
EState * estate
Definition: execdesc.h:48
PlannedStmt * plannedstmt
Definition: execdesc.h:37
struct Instrumentation * totaltime
Definition: execdesc.h:55
Node * utilityStmt
Definition: parsenodes.h:136
ParseLoc stmt_location
Definition: parsenodes.h:249
TupleDesc setDesc
Definition: execnodes.h:358
Tuplestorestate * setResult
Definition: execnodes.h:357
int64 wal_buffers_full
Definition: instrument.h:56
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
Definition: guc.h:174
Counters counters
pgssHashKey key
TimestampTz minmax_stats_since
TimestampTz stats_since
TimestampTz stats_reset
pgssGlobalStats stats
__int64 st_size
Definition: win32_port.h:263
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.c:543
ProcessUtility_hook_type ProcessUtility_hook
Definition: utility.c:70
void(* ProcessUtility_hook_type)(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.h:71
ProcessUtilityContext
Definition: utility.h:21
static Datum TimestampTzGetDatum(TimestampTz X)
Definition: timestamp.h:52
#define PG_RETURN_TIMESTAMPTZ(x)
Definition: timestamp.h:68
const char * name
#define fstat
Definition: win32_port.h:273