summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Haas2023-06-23 16:23:49 +0000
committerRobert Haas2023-06-23 16:23:49 +0000
commit5b7396f887a52a8ea7888fa0dd0f884cc5eda76d (patch)
tree53afd09a25e74ddabd035bf351ecc740fdf23de0
parentf5c446e3367527f9db1506d7c38d2f56e20950b6 (diff)
pgfruit: Demonstration of how to synchronize table contents to memory.
Lightly tested, but probably has bugs.
-rw-r--r--contrib/pgfruit/Makefile21
-rw-r--r--contrib/pgfruit/pgfruit--1.0.sql35
-rw-r--r--contrib/pgfruit/pgfruit.c847
-rw-r--r--contrib/pgfruit/pgfruit.control6
4 files changed, 909 insertions, 0 deletions
diff --git a/contrib/pgfruit/Makefile b/contrib/pgfruit/Makefile
new file mode 100644
index 0000000000..5d7843a9c3
--- /dev/null
+++ b/contrib/pgfruit/Makefile
@@ -0,0 +1,21 @@
+# contrib/pgfruit/Makefile
+
+MODULE_big = pgfruit
+OBJS = \
+ $(WIN32RES) \
+ pgfruit.o
+
+EXTENSION = pgfruit
+DATA = pgfruit--1.0.sql
+PGFILEDESC = "pgfruit - track fruit properties in memory"
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_stat_statements
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pgfruit/pgfruit--1.0.sql b/contrib/pgfruit/pgfruit--1.0.sql
new file mode 100644
index 0000000000..c8fa3d74cc
--- /dev/null
+++ b/contrib/pgfruit/pgfruit--1.0.sql
@@ -0,0 +1,35 @@
+/* contrib/pg_fruit/pg_fruit--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pg_fruit" to load this file. \quit
+
+-- Register the functions.
+CREATE FUNCTION pgfruit_poke(int4)
+RETURNS void
+AS 'MODULE_PATHNAME', 'pgfruit_poke'
+LANGUAGE C PARALLEL SAFE;
+
+CREATE FUNCTION pgfruit_trigger()
+RETURNS trigger
+AS 'MODULE_PATHNAME', 'pgfruit_trigger'
+LANGUAGE C PARALLEL SAFE;
+
+CREATE FUNCTION pgfruit_contents(OUT id int4,
+ OUT name text,
+ OUT color text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pgfruit_contents'
+LANGUAGE C PARALLEL SAFE;
+
+-- Create the table.
+CREATE TABLE fruit (
+ id int4 primary key,
+ name text,
+ color text
+);
+
+-- And put a trigger on it.
+CREATE TRIGGER fruit_trigger
+ AFTER INSERT OR UPDATE OR DELETE ON fruit
+ FOR EACH ROW EXECUTE FUNCTION pgfruit_trigger();
diff --git a/contrib/pgfruit/pgfruit.c b/contrib/pgfruit/pgfruit.c
new file mode 100644
index 0000000000..9a47611c66
--- /dev/null
+++ b/contrib/pgfruit/pgfruit.c
@@ -0,0 +1,847 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgfruit.c
+ * Track fruit properties in memory.
+ *
+ * This code is a proof of concept to demonstrate a technique that can be
+ * used to sync a hash table in shared memory with the contents of a
+ * specific database table. It has not been thoroughly tested or debugged
+ * but seems to work in casual testing. I suspect that it may leak memory.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/relation.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "catalog/pg_am_d.h"
+#include "catalog/pg_type.h"
+#include "commands/trigger.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "postmaster/bgworker.h"
+#include "postmaster/interrupt.h"
+#include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/lmgr.h"
+#include "storage/lwlock.h"
+#include "storage/procsignal.h"
+#include "storage/shmem.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/wait_event.h"
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(pgfruit_contents);
+PG_FUNCTION_INFO_V1(pgfruit_poke);
+PG_FUNCTION_INFO_V1(pgfruit_trigger);
+
+/*
+ * This code is just for demonstration purposes, so I've chosen small values
+ * for these constants to make it is easier to test out what happens if we
+ * exceed them. For production use, larger values would likely be appropriate.
+ */
+#define MAX_FRUIT_LENGTH 32
+#define MAX_COLOR_LENGTH 32
+#define MAX_HINTS 4
+
+/*
+ * The details we need to remember for one fruit.
+ */
+typedef struct
+{
+ int id;
+ char name[MAX_FRUIT_LENGTH];
+ char color[MAX_COLOR_LENGTH];
+ bool marked;
+} fruit_entry;
+
+/*
+ * Shared-memory data structure to facilitate incremental rereading of fruit
+ * data.
+ */
+typedef struct
+{
+ LWLock *lock;
+ pid_t bgworker_pid;
+ int nhints;
+ int hinted_ids[MAX_HINTS];
+} fruit_data;
+
+extern PGDLLEXPORT void pgfruit_main(Datum main_arg);
+
+static shmem_request_hook_type prev_shmem_request_hook = NULL;
+static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
+static int pgfruit_max = 100;
+static char *pgfruit_db = "";
+static HTAB *pgfruit_hash = NULL;
+static fruit_data *pgfruit_data = NULL;
+
+static int pgfruit_find_column_by_name(TupleDesc td, char *name, Oid type_oid);
+static Relation pgfruit_open_relevant_index(Relation heapRel, int i_id);
+static void pgfruit_reread(int nhints, int *hinted_ids);
+static void pgfruit_shmem_exit(int code, Datum arg);
+static void pgfruit_shmem_request(void);
+static void pgfruit_shmem_startup(void);
+
+/*
+ * Load-time initialization.
+ */
+void
+_PG_init(void)
+{
+ /*
+ * GUC variable to set size of shared hash table
+ *
+ * If this hash table were backend-local, we could permit it to grow
+ * arbitrarily large, but as we've chosen to store it in memory it must
+ * have a maximum size, so we need a GUC to control that.
+ */
+ DefineCustomIntVariable("pgfruit.max",
+ "Sets the maximum number of fruits.",
+ NULL,
+ &pgfruit_max,
+ 100,
+ 1,
+ INT_MAX,
+ PGC_POSTMASTER,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ /*
+ * GUC variable to set the database to which the background worker should
+ * connect to get fruit information.
+ */
+ DefineCustomStringVariable("pgfruit.db",
+ "Database containing public.fruit table.",
+ NULL,
+ &pgfruit_db,
+ "",
+ PGC_POSTMASTER,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ /* Install shared memory request and startup hooks. */
+ prev_shmem_request_hook = shmem_request_hook;
+ shmem_request_hook = pgfruit_shmem_request;
+ prev_shmem_startup_hook = shmem_startup_hook;
+ shmem_startup_hook = pgfruit_shmem_startup;
+
+ /*
+ * Start the process during the preload_shared_libraries phase unless the
+ * database name is empty; interpret that case as a request to disable this
+ * module.
+ */
+ if (process_shared_preload_libraries_in_progress && pgfruit_db[0] != '\0')
+ {
+ BackgroundWorker worker;
+
+ memset(&worker, 0, sizeof(BackgroundWorker));
+ strcpy(worker.bgw_name, "pgfruit");
+ strcpy(worker.bgw_type, "pgfruit");
+ worker.bgw_flags = BGWORKER_SHMEM_ACCESS
+ | BGWORKER_BACKEND_DATABASE_CONNECTION;
+ worker.bgw_start_time = BgWorkerStart_RecoveryFinished;
+ worker.bgw_restart_time = 10;
+ strcpy(worker.bgw_library_name, "pgfruit");
+ strcpy(worker.bgw_function_name, "pgfruit_main");
+ RegisterBackgroundWorker(&worker);
+ }
+}
+
+/*
+ * Request shared memory.
+ */
+static void
+pgfruit_shmem_request(void)
+{
+ Size sz;
+
+ sz = hash_estimate_size(pgfruit_max, sizeof(fruit_entry));
+ sz = add_size(sz, MAXALIGN(sizeof(fruit_data)));
+
+ RequestAddinShmemSpace(sz);
+ RequestNamedLWLockTranche("pgfruit", 1);
+}
+
+/*
+ * Attach to and if required initialize shared memory.
+ */
+static void
+pgfruit_shmem_startup(void)
+{
+ HASHCTL info;
+ bool found;
+
+ if (prev_shmem_startup_hook)
+ prev_shmem_startup_hook();
+
+ LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
+
+ info.keysize = sizeof(int);
+ info.entrysize = sizeof(fruit_entry);
+ pgfruit_hash = ShmemInitHash("pgfruit hash",
+ pgfruit_max, pgfruit_max,
+ &info,
+ HASH_ELEM | HASH_BLOBS);
+
+ pgfruit_data = ShmemInitStruct("pgfruit data", sizeof(fruit_data), &found);
+ if (!found)
+ {
+ pgfruit_data->lock = &(GetNamedLWLockTranche("pgfruit"))->lock;
+ pgfruit_data->bgworker_pid = InvalidPid;
+ pgfruit_data->nhints = -1;
+ }
+
+ LWLockRelease(AddinShmemInitLock);
+}
+
+/*
+ * Entry point for background worker.
+ */
+void
+pgfruit_main(Datum main_arg)
+{
+ /* Establish signal handlers; once that's done, unblock signals. */
+ pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
+ pqsignal(SIGHUP, SignalHandlerForConfigReload);
+ pqsignal(SIGUSR1, procsignal_sigusr1_handler);
+ BackgroundWorkerUnblockSignals();
+
+ /*
+ * Connect to the target database.
+ *
+ * Since pgfruit.db is PGC_POSTMASTER, we don't have to worry about it
+ * changing. If we wanted to allow it to be changed at runtime, then we'd
+ * probably want to just exit if it changed, so that the postmaster would
+ * relaunch us and we'd reconnect to the new DB.
+ */
+ BackgroundWorkerInitializeConnection(pgfruit_db, NULL, 0);
+
+ /*
+ * Advertise our PID in shared memory, and arrange to de-advertise it when
+ * we exit.
+ */
+ on_shmem_exit(pgfruit_shmem_exit, 0);
+ LWLockAcquire(pgfruit_data->lock, LW_EXCLUSIVE);
+ if (pgfruit_data->bgworker_pid != InvalidPid)
+ ereport(FATAL,
+ errmsg("pgfruit background worker is already running"));
+ pgfruit_data->bgworker_pid = MyProcPid;
+ LWLockRelease(pgfruit_data->lock);
+
+ /* Main loop. */
+ while (!ShutdownRequestPending)
+ {
+ int nhints;
+ int hinted_ids[MAX_HINTS];
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* In case of a SIGHUP, just reload the configuration. */
+ if (ConfigReloadPending)
+ {
+ ConfigReloadPending = false;
+ ProcessConfigFile(PGC_SIGHUP);
+ }
+
+ /* Transfer hints from shared memory into our memory space. */
+ LWLockAcquire(pgfruit_data->lock, LW_EXCLUSIVE);
+ nhints = pgfruit_data->nhints;
+ if (nhints > 0)
+ {
+ Assert(nhints <= MAX_HINTS);
+ memcpy(hinted_ids, pgfruit_data->hinted_ids,
+ pgfruit_data->nhints * sizeof(int));
+ }
+ pgfruit_data->nhints = 0;
+ LWLockRelease(pgfruit_data->lock);
+
+ /*
+ * If nhints < 0, that means that we've been told to reread all the
+ * data. If nhints > 0, we've been told to reread the data for a
+ * specific list of fruit IDs. If nhints == 0, we haven't been told to
+ * do anything at all.
+ */
+ if (nhints != 0)
+ pgfruit_reread(nhints, hinted_ids);
+
+ /* Sleep until woken up, or for a full minute. */
+ (void) WaitLatch(MyLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
+ 60000,
+ PG_WAIT_EXTENSION);
+ ResetLatch(MyLatch);
+ }
+}
+
+/*
+ * Find a column with a certain name within a tuple descriptr, and check that
+ * it has the specified type OID.
+ */
+static int
+pgfruit_find_column_by_name(TupleDesc td, char *name, Oid type_oid)
+{
+ int i;
+
+ for (i = 0; i < td->natts; ++i)
+ {
+ Form_pg_attribute attr = TupleDescAttr(td, i);
+
+ if (strcmp(NameStr(attr->attname), name) != 0)
+ continue;
+
+ if (attr->atttypid != type_oid)
+ {
+ HeapTuple tup;
+ Form_pg_type typ;
+
+ tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type_oid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", type_oid);
+ typ = (Form_pg_type) GETSTRUCT(tup);
+
+ ereport(ERROR,
+ (errmsg("column \"public.fruit.%s\" should have type \"%s\"",
+ name, NameStr(typ->typname))));
+ }
+
+ /*
+ * The attribute number is one greater than offset within the
+ * tuple descriptor.
+ */
+ return i + 1;
+ }
+
+ ereport(ERROR,
+ (errmsg("column \"public.fruit.%s\" does not exist", name)));
+}
+
+/*
+ * Find the usable index on the relation's "id" column, if there is one.
+ *
+ * Use the value from pgfruit_find_column_by_name(..., "id", ...) for i_id.
+ *
+ * Caller must close the index. Returns NULL if no suitable index is
+ * identified.
+ */
+static Relation
+pgfruit_open_relevant_index(Relation heapRel, int i_id)
+{
+ ListCell *lc;
+
+ foreach(lc, RelationGetIndexList(heapRel))
+ {
+ Oid indexoid = lfirst_oid(lc);
+ Relation indexRel;
+ Form_pg_index index;
+ bool ok;
+
+ indexRel = index_open(indexoid, AccessShareLock);
+ index = indexRel->rd_index;
+
+ /*
+ * See get_relation_info() for the motivation behind these first few
+ * tests. The index must be valid and, if created concurrently, must
+ * be old enough that none of its HOT chains will appear broken to us.
+ *
+ * We're looking for a single-column btree index on the id column.
+ */
+ if (!index->indisvalid)
+ ok = false;
+ else if (index->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRel->rd_indextuple->t_data), TransactionXmin))
+ ok = false;
+ else if (indexRel->rd_rel->relam != BTREE_AM_OID)
+ ok = false;
+ else if (index->indnatts != 1)
+ ok = false;
+ else if (index->indkey.values[0] != i_id)
+ ok = false;
+ else
+ ok = true;
+
+ if (ok)
+ return indexRel;
+
+ index_close(indexRel, NoLock);
+ }
+
+ return NULL;
+}
+
+/*
+ * Get a datum from a slot, but throw an error if the attribute is null.
+ *
+ * The second argument, here called i, should be the value returned by
+ * pgfruit_find_column_by_name.
+ */
+static Datum
+pgfruit_get_datum(TupleTableSlot *slot, int i, char *name)
+{
+ Datum val;
+ bool isnull;
+
+ val = slot_getattr(slot, i, &isnull);
+ if (isnull)
+ ereport(ERROR,
+ errmsg("\"public.fruit.%s\" should not be null", name));
+
+ return val;
+}
+
+/*
+ * Reread data from the table.
+ *
+ * If nhints < 0, then we need to reread the entire table. This always happens
+ * at startup, and can happen later if the shared memory area that stores hints
+ * about which rows to reread overflows.
+ *
+ * If nhints > 0, then we only need to reread the specific rows that are
+ * listed in hinted_ids.
+ */
+static void
+pgfruit_reread(int nhints, int *hinted_ids)
+{
+ Relation rel;
+ Relation indexRel;
+ Snapshot snapshot;
+ TupleTableSlot *slot;
+ int i_id;
+ int i_name;
+ int i_color;
+
+ /* We need a transaction to read a database table. */
+ StartTransactionCommand();
+
+ /* Open the relation and find the columns that we care about. */
+ rel = relation_openrv(makeRangeVar("public", "fruit", -1),
+ AccessShareLock);
+ i_id = pgfruit_find_column_by_name(rel->rd_att, "id", INT4OID);
+ i_name = pgfruit_find_column_by_name(rel->rd_att, "name", TEXTOID);
+ i_color = pgfruit_find_column_by_name(rel->rd_att, "color", TEXTOID);
+
+ /* Create a slot to store the tuples we fetch */
+ slot = table_slot_create(rel, NULL);
+
+ /*
+ * Using a trigger to notify the background worker that something needs
+ * to be reread has a fundamental race condition: the trigger fires before
+ * the commit actually executes, meaning that any modifications are not
+ * yet visible; but we need to reread the data after commit, so that we
+ * actually see the modifications and reflect them in our hash table.
+ *
+ * We could narrow the race condition by using a constraint trigger that
+ * is INITIALLY DEFERRED, but there would still be a small window of time
+ * after the trigger has fired and before the transaction has definitively
+ * committed.
+ *
+ * To avoid this problem, we have the process that sends us a hint first
+ * acquire a heavyweight lock, and we here acquire that same lock in a
+ * conflicting more. That way, we can be certain that once we acquire this
+ * lock, all processes that have sent us hints have also committed or
+ * aborted prior to our reading the data.
+ *
+ * Note that it's sufficient here to acquire and release the lock. We
+ * don't care if new processes insert hints after this point -- we only
+ * care that the ones that inserted the hints we're now processing are
+ * gone.
+ *
+ * Note that the lock taken by LockDatabaseObject(RelationRelationId, ...)
+ * does not conflict with the lock that would have been taken by
+ * LockRelation(...), but in this case, that's intentional. We need a way
+ * to coordinate between processes that are updating this relation and
+ * the background worker without actually blocking access.
+ */
+ LockDatabaseObject(RelationRelationId, RelationGetRelid(rel),
+ 0, AccessExclusiveLock);
+ UnlockDatabaseObject(RelationRelationId, RelationGetRelid(rel),
+ 0, AccessExclusiveLock);
+
+ /* We also need a snapshot. */
+ snapshot = RegisterSnapshot(GetLatestSnapshot());
+
+ /*
+ * If nhints < 0, we're supposed to reread the whole relation. Otherwise,
+ * if a relevant index exists, we can use that to reread just certain
+ * particular rows. If there isn't a relevant index, we can fall back to
+ * rescanning the whole relation.
+ */
+ if (nhints > 0 &&
+ (indexRel = pgfruit_open_relevant_index(rel, i_id)) != NULL)
+ {
+ int i;
+ IndexScanDesc scan;
+
+ /* Prepare to repeatedly scan the index on the id column */
+ scan = index_beginscan(rel, indexRel, snapshot, 1, 0);
+
+ for (i = 0; i < nhints; ++i)
+ {
+ ScanKeyData skey;
+ int id = hinted_ids[i];
+
+ ScanKeyInit(&skey, i_id, BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(id));
+ index_rescan(scan, &skey, 1, NULL, 0);
+
+ if (index_getnext_slot(scan, ForwardScanDirection, slot))
+ {
+ fruit_entry *fe;
+ Datum d_id;
+ Datum d_name;
+ Datum d_color;
+ int32 id;
+ char *name;
+ char *color;
+ bool found;
+
+ /* Extract column values. */
+ d_id = pgfruit_get_datum(slot, i_id, "id");
+ d_name = pgfruit_get_datum(slot, i_name, "name");
+ d_color = pgfruit_get_datum(slot, i_color, "color");
+ id = DatumGetInt32(d_id);
+ name = TextDatumGetCString(d_name);
+ color = TextDatumGetCString(d_color);
+
+ LWLockAcquire(pgfruit_data->lock, LW_EXCLUSIVE);
+ fe = hash_search(pgfruit_hash, &id, HASH_ENTER, &found);
+ strlcpy(fe->name, name, MAX_FRUIT_LENGTH);
+ strlcpy(fe->color, color, MAX_COLOR_LENGTH);
+ LWLockRelease(pgfruit_data->lock);
+ }
+ else
+ {
+ LWLockAcquire(pgfruit_data->lock, LW_EXCLUSIVE);
+ hash_search(pgfruit_hash, &id, HASH_REMOVE, NULL);
+ LWLockRelease(pgfruit_data->lock);
+ }
+
+ if (index_getnext_slot(scan, ForwardScanDirection, slot))
+ ereport(ERROR,
+ (errmsg("multiple fruits with id = %d",
+ hinted_ids[i])));
+ }
+
+ index_endscan(scan);
+ index_close(indexRel, NoLock);
+ }
+ else
+ {
+ TableScanDesc scan;
+ HASH_SEQ_STATUS seq;
+ fruit_entry *fe;
+
+ /* Unmark all hash table entries. */
+ LWLockAcquire(pgfruit_data->lock, LW_EXCLUSIVE);
+ hash_seq_init(&seq, pgfruit_hash);
+ while ((fe = hash_seq_search(&seq)) != NULL)
+ fe->marked = false;
+ LWLockRelease(pgfruit_data->lock);
+
+ /* Prepare to scan the entire relation. */
+ scan = table_beginscan(rel, snapshot, 0, NULL);
+
+ /* Scan the relation. */
+ while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
+ {
+ Datum d_id;
+ Datum d_name;
+ Datum d_color;
+ int32 id;
+ char *name;
+ char *color;
+ bool found;
+
+ /* Extract column values. */
+ d_id = pgfruit_get_datum(slot, i_id, "id");
+ d_name = pgfruit_get_datum(slot, i_name, "name");
+ d_color = pgfruit_get_datum(slot, i_color, "color");
+ id = DatumGetInt32(d_id);
+ name = TextDatumGetCString(d_name);
+ color = TextDatumGetCString(d_color);
+
+ /*
+ * Throw an error here if a fruit or color length is too long.
+ *
+ * In some ways, this is unfriendly behavior, because if we throw
+ * an error here, then the table doesn't get updated at all. We
+ * could fix this by (1) not throwing an error here and just
+ * accepting truncation below, or (2) moving the hash table into
+ * backend-private memory and replacing the char[] members with
+ * char * members that could point to a string of arbitrary length,
+ * or (3) requiring the table columns to have VARCHAR type instead
+ * of text and create a script that creates them as VARCHAR(n).
+ */
+ if (strlen(name) > MAX_FRUIT_LENGTH)
+ ereport(ERROR,
+ errmsg("fruit name for id %d is too long", id));
+ if (strlen(color) > MAX_COLOR_LENGTH)
+ ereport(ERROR,
+ errmsg("color name for id %d is too long", id));
+
+ /*
+ * Note that if the hash table fills up, hash_search will error
+ * out, with unfriendly results similar to the checks above.
+ * Again, this wouldn't be a problem if we chose to store the
+ * data in backend-private memory, but it has no real solution
+ * in shared memory. In some contexts other coping strategies might
+ * be possible (e.g. dropping less important or older entries).
+ */
+ LWLockAcquire(pgfruit_data->lock, LW_EXCLUSIVE);
+ fe = hash_search(pgfruit_hash, &id, HASH_ENTER, &found);
+ strlcpy(fe->name, name, MAX_FRUIT_LENGTH);
+ strlcpy(fe->color, color, MAX_COLOR_LENGTH);
+ fe->marked = true;
+ LWLockRelease(pgfruit_data->lock);
+ }
+
+ /* Post-scan cleanup. */
+ table_endscan(scan);
+
+ /*
+ * Any entries that are still unmarked were not found in our scan,
+ * and thus don't exist any more, and thus should be removed.
+ */
+ LWLockAcquire(pgfruit_data->lock, LW_EXCLUSIVE);
+ hash_seq_init(&seq, pgfruit_hash);
+ while ((fe = hash_seq_search(&seq)) != NULL)
+ if (!fe->marked)
+ hash_search(pgfruit_hash, &fe->id, HASH_REMOVE, NULL);
+ LWLockRelease(pgfruit_data->lock);
+ }
+
+ /* Clean up. */
+ ExecDropSingleTupleTableSlot(slot);
+ UnregisterSnapshot(snapshot);
+ relation_close(rel, AccessShareLock);
+ CommitTransactionCommand();
+}
+
+/*
+ * Clear our PID from shared state. Also tell the next worker process to
+ * reread all the data, in case we removed hints from shared memory but weren't
+ * able to finish processing them before we ran into trouble.
+ */
+static void
+pgfruit_shmem_exit(int code, Datum arg)
+{
+ LWLockAcquire(pgfruit_data->lock, LW_EXCLUSIVE);
+ pgfruit_data->bgworker_pid = InvalidPid;
+ pgfruit_data->nhints = -1;
+ LWLockRelease(pgfruit_data->lock);
+}
+
+/*
+ * Retrieve the contents of the in-memory hash table and return it as a
+ * result set.
+ */
+Datum
+pgfruit_contents(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ HASH_SEQ_STATUS seq;
+ fruit_entry *fe;
+
+ InitMaterializedSRF(fcinfo, 0);
+
+ hash_seq_init(&seq, pgfruit_hash);
+ LWLockAcquire(pgfruit_data->lock, LW_SHARED);
+ while ((fe = hash_seq_search(&seq)) != NULL)
+ {
+ Datum values[3];
+ bool nulls[3];
+
+ memset(nulls, 0, sizeof(nulls));
+
+ values[0] = Int32GetDatum(fe->id);
+ values[1] = CStringGetTextDatum(fe->name);
+ values[2] = CStringGetTextDatum(fe->color);
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+ LWLockRelease(pgfruit_data->lock);
+
+ return (Datum) 0;
+}
+
+/*
+ * Notify any pgfruit background worker that exists to reread the entry for
+ * the specified id.
+ *
+ * This isn't really needed if pgfruit_trigger() is installed as a trigger
+ * on the table, except perhaps for debugging purposes or recovering after
+ * some bug. But since all the code in this file is just for demonstration
+ * purposes, it makes sense to have this, too.
+ */
+Datum
+pgfruit_poke(PG_FUNCTION_ARGS)
+{
+ int32 id = PG_GETARG_INT32(0);
+ pid_t pid;
+ Relation rel;
+
+ /*
+ * Before inserting the hint, take a heavyweight lock that will be held
+ * until commit time. See pgfruit_reread() for the rationale.
+ */
+ rel = relation_openrv(makeRangeVar("public", "fruit", -1),
+ AccessShareLock);
+ LockDatabaseObject(RelationRelationId, RelationGetRelid(rel),
+ 0, AccessShareLock);
+ relation_close(rel, AccessShareLock);
+
+ /* Acquire the lock. */
+ LWLockAcquire(pgfruit_data->lock, LW_EXCLUSIVE);
+
+ /* Save the bgworker's PID. */
+ pid = pgfruit_data->bgworker_pid;
+
+ /* Store hint. */
+ if (pgfruit_data->nhints >= MAX_HINTS)
+ {
+ /* Too many hints, just reread the whole thing. */
+ pgfruit_data->nhints = -1;
+ }
+ else if (pgfruit_data->nhints >= 0)
+ {
+ int nhints = pgfruit_data->nhints;
+ int i;
+
+ /* Check for an existing hint for this id. */
+ for (i = 0; i < nhints && pgfruit_data->hinted_ids[i] != id; ++i)
+ ;
+
+ /* If no existing hint, add one. */
+ if (i == nhints)
+ {
+ pgfruit_data->hinted_ids[nhints] = id;
+ pgfruit_data->nhints++;
+ }
+ }
+
+ /* Done with the lock. */
+ LWLockRelease(pgfruit_data->lock);
+
+ /* If there's a valid pid, SIGUSR1 should set the process latch. */
+ if (pid != InvalidPid)
+ kill(pid, SIGUSR1);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * This function is intended to be called as an AFTER INSERT OR UPDATE OR
+ * DELETE FOR EACH ROW trigger. It extracts the id values from the old and/or
+ * new tuples and instructs the background worker to reread the relevant
+ * rows.
+ */
+Datum
+pgfruit_trigger(PG_FUNCTION_ARGS)
+{
+ TriggerData *trigdata = (TriggerData *) fcinfo->context;
+ pid_t pid;
+ int i_id;
+ int num_ids = 0;
+ int ids[2];
+ int i;
+
+ /* make sure it's called as a trigger */
+ if (!CALLED_AS_TRIGGER(fcinfo))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("pgfruit_trigger: must be called as trigger")));
+
+ /* and that it's called after the change */
+ if (!TRIGGER_FIRED_AFTER(trigdata->tg_event))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("pgfruit_trigger: must be called after the change")));
+
+ /* and that it's called for each row */
+ if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("pgfruit_trigger: must be called for each row")));
+
+ /* We need to extract the "id" column, so figure out where it is. */
+ i_id = pgfruit_find_column_by_name(trigdata->tg_relation->rd_att,
+ "id", INT4OID);
+
+ /* Extract id values from tuples. */
+ if (trigdata->tg_trigslot != NULL)
+ {
+ Datum d_id = pgfruit_get_datum(trigdata->tg_trigslot, i_id, "id");
+
+ ids[num_ids++] = DatumGetInt32(d_id);
+ }
+ if (trigdata->tg_newslot != NULL)
+ {
+ Datum d_id = pgfruit_get_datum(trigdata->tg_newslot, i_id, "id");
+
+ ids[num_ids++] = DatumGetInt32(d_id);
+ }
+
+ /*
+ * Before inserting hints, take a heavyweight lock that will be held until
+ * commit time. See pgfruit_reread() for the rationale.
+ */
+ LockDatabaseObject(RelationRelationId,
+ RelationGetRelid(trigdata->tg_relation),
+ 0, AccessShareLock);
+
+ /* Acquire the lock. */
+ LWLockAcquire(pgfruit_data->lock, LW_EXCLUSIVE);
+
+ /* Save the bgworker's PID. */
+ pid = pgfruit_data->bgworker_pid;
+
+ /* Store hint. */
+ for (i = 0; i < num_ids; ++i)
+ {
+ int nhints = pgfruit_data->nhints;
+ int j;
+
+ if (nhints < 0 || nhints >= MAX_HINTS)
+ {
+ /* Too many hints, just reread the whole thing. */
+ pgfruit_data->nhints = -1;
+ break;
+ }
+
+ /* Check for an existing hint for this id. */
+ for (j = 0; j < nhints && pgfruit_data->hinted_ids[j] != ids[i]; ++j)
+ ;
+
+ /* If no existing hint, add one. */
+ if (j == nhints)
+ {
+ pgfruit_data->hinted_ids[nhints] = ids[i];
+ pgfruit_data->nhints++;
+ }
+ }
+
+ /* Done with the lock. */
+ LWLockRelease(pgfruit_data->lock);
+
+ /* If there's a valid pid, SIGUSR1 should set the process latch. */
+ if (pid != InvalidPid)
+ kill(pid, SIGUSR1);
+
+ return PointerGetDatum(NULL);
+}
diff --git a/contrib/pgfruit/pgfruit.control b/contrib/pgfruit/pgfruit.control
new file mode 100644
index 0000000000..cc12701507
--- /dev/null
+++ b/contrib/pgfruit/pgfruit.control
@@ -0,0 +1,6 @@
+# pg_prewarm extension
+comment = 'pgfruit'
+default_version = '1.0'
+module_pathname = '$libdir/pgfruit'
+relocatable = false
+schema = public