diff --git a/contrib/Makefile b/contrib/Makefile index 2f0a88d3f774..26cf6e94ff10 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -44,6 +44,7 @@ SUBDIRS = \ pg_walinspect \ postgres_fdw \ seg \ + snowflake \ spi \ tablefunc \ tcn \ diff --git a/contrib/meson.build b/contrib/meson.build index ed30ee7d639f..e3bd2b464172 100644 --- a/contrib/meson.build +++ b/contrib/meson.build @@ -59,6 +59,7 @@ subdir('pg_walinspect') subdir('postgres_fdw') subdir('seg') subdir('sepgsql') +subdir('snowflake') subdir('spi') subdir('sslinfo') # start-scripts doesn't contain build products diff --git a/contrib/snowflake/.gitignore b/contrib/snowflake/.gitignore new file mode 100644 index 000000000000..44d119cfcc24 --- /dev/null +++ b/contrib/snowflake/.gitignore @@ -0,0 +1,3 @@ +# Generated subdirectories +/log/ +/results/ diff --git a/contrib/snowflake/Makefile b/contrib/snowflake/Makefile new file mode 100644 index 000000000000..fa5b48d565d8 --- /dev/null +++ b/contrib/snowflake/Makefile @@ -0,0 +1,19 @@ +# contrib/snowflake/Makefile + +MODULES = snowflake + +EXTENSION = snowflake +DATA = snowflake--1.0.sql + +REGRESS = snowflake + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/snowflake +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/snowflake/expected/snowflake.out b/contrib/snowflake/expected/snowflake.out new file mode 100644 index 000000000000..b7e469bf7384 --- /dev/null +++ b/contrib/snowflake/expected/snowflake.out @@ -0,0 +1,73 @@ +CREATE EXTENSION snowflake; +CREATE SEQUENCE snowflake_seq USING snowflake; +SET snowflake.machine_id = 2000; -- error +ERROR: 2000 is outside the valid range for parameter "snowflake.machine_id" (0 .. 1023) +SET snowflake.machine_id = 4; -- ok +SELECT machine, counter FROM snowflake_get(nextval('snowflake_seq')); + machine | counter +---------+--------- + 4 | 2 +(1 row) + +SELECT machine, counter FROM snowflake_get(lastval()); + machine | counter +---------+--------- + 4 | 2 +(1 row) + +SELECT machine, counter FROM snowflake_get(nextval('snowflake_seq')); + machine | counter +---------+--------- + 4 | 3 +(1 row) + +SELECT machine, counter FROM snowflake_get(currval('snowflake_seq')); + machine | counter +---------+--------- + 4 | 3 +(1 row) + +-- Sequence relation exists, is unlogged and remains unlogged. +SELECT * FROM snowflake_seq; + count | is_called +-------+----------- + 3 | t +(1 row) + +ALTER SEQUENCE snowflake_seq SET LOGGED; +SELECT relpersistence FROM pg_class where relname = 'snowflake_seq'; + relpersistence +---------------- + u +(1 row) + +ALTER SEQUENCE snowflake_seq RESTART; +SELECT * FROM snowflake_seq; + count | is_called +-------+----------- + 1 | f +(1 row) + +-- Identity column, where cache affects value. +SET default_sequence_access_method = 'snowflake'; +CREATE TABLE snowflake_tab (a int GENERATED ALWAYS AS IDENTITY, b int); +INSERT INTO snowflake_tab VALUES (DEFAULT, generate_series(1, 10)); +SELECT data.machine, data.counter + FROM snowflake_tab, LATERAL snowflake_get(a) AS data; + machine | counter +---------+--------- + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 5 + 4 | 6 + 4 | 7 + 4 | 8 + 4 | 9 + 4 | 10 + 4 | 11 +(10 rows) + +DROP TABLE snowflake_tab; +DROP SEQUENCE snowflake_seq; +DROP EXTENSION snowflake; diff --git a/contrib/snowflake/meson.build b/contrib/snowflake/meson.build new file mode 100644 index 000000000000..567669eea790 --- /dev/null +++ b/contrib/snowflake/meson.build @@ -0,0 +1,34 @@ +# Copyright (c) 2024, PostgreSQL Global Development Group + +snowflake_sources = files( + 'snowflake.c', +) + +if host_system == 'windows' + snowflake_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'snowflake', + '--FILEDESC', 'snowflake - sequence access method',]) +endif + +snowflake = shared_module('snowflake', + snowflake_sources, + kwargs: contrib_mod_args, +) +contrib_targets += bloom + +install_data( + 'snowflake.control', + 'snowflake--1.0.sql', + kwargs: contrib_data_args, +) + +tests += { + 'name': 'snowflake', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'snowflake', + ], + }, +} diff --git a/contrib/snowflake/snowflake--1.0.sql b/contrib/snowflake/snowflake--1.0.sql new file mode 100644 index 000000000000..bcb9d754f1b4 --- /dev/null +++ b/contrib/snowflake/snowflake--1.0.sql @@ -0,0 +1,21 @@ +/* contrib/snowflake/snowflake--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION snowflake" to load this file. \quit + +CREATE FUNCTION snowflake_sequenceam_handler(internal) + RETURNS sequence_am_handler + AS 'MODULE_PATHNAME' + LANGUAGE C; + +CREATE ACCESS METHOD snowflake + TYPE SEQUENCE HANDLER snowflake_sequenceam_handler; +COMMENT ON ACCESS METHOD snowflake IS 'snowflake sequence access method'; + +CREATE FUNCTION snowflake_get(IN raw int8, + OUT time_ms int8, + OUT machine int4, + OUT counter int4) + RETURNS record + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT diff --git a/contrib/snowflake/snowflake.c b/contrib/snowflake/snowflake.c new file mode 100644 index 000000000000..be4c4039ecd0 --- /dev/null +++ b/contrib/snowflake/snowflake.c @@ -0,0 +1,570 @@ +/*------------------------------------------------------------------------- + * + * snowflake.c + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * contrib/snowflake/snowflake.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/generic_xlog.h" +#include "access/sequenceam.h" +#include "access/xact.h" +#include "catalog/storage_xlog.h" +#include "commands/tablecmds.h" +#include "fmgr.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "storage/bufmgr.h" +#include "utils/guc.h" +#include "utils/numeric.h" + +PG_MODULE_MAGIC; + +/* "special area" of a snowflake's buffer page. */ +#define SNOWFLAKE_MAGIC 0x01 + +typedef struct snowflake_magic +{ + uint32 magic; +} snowflake_magic; + +/* ----------------------------------------------------------------------- + * Snowflake ID are 64-bit based, with the following structure: + * - 41 bits for an epoch-based timestamp, in milli-seconds. + * - 10 bits for a machine ID. + * - 12 bits for a sequence counter. + * + * The timestamp can be cut to an offset. The machine ID is controlled + * by a superuser GUC. Sequence properties apply to the sequence counter, + * as the other two are environment-dependent. + * ----------------------------------------------------------------------- + */ + +/* + * Helper routines to convert a snowflake ID from/to an int64. + */ +#define SNOWFLAKE_COUNTER_MASK 0x0000000000000FFF /* 12 bits */ +#define SNOWFLAKE_COUNTER_SHIFT 0 +#define SNOWFLAKE_MACHINE_ID_MASK 0x00000000000003FF /* 10 bits */ +#define SNOWFLAKE_MACHINE_ID_SHIFT 12 /* counter */ +#define SNOWFLAKE_TIMESTAMP_MASK 0x000001FFFFFFFFFF /* 41 bits */ +#define SNOWFLAKE_TIMESTAMP_SHIFT 22 /* machine ID + counter sizes */ + +typedef struct snowflake_id +{ + uint32 count; + uint32 machine; + uint64 time_ms; +} snowflake_id; + +#define snowflake_id_to_int64(id, raw) { \ + raw = (((id).count) & SNOWFLAKE_COUNTER_MASK) << SNOWFLAKE_COUNTER_SHIFT | \ + (((id).machine) & SNOWFLAKE_MACHINE_ID_MASK) << SNOWFLAKE_MACHINE_ID_SHIFT | \ + (((id).time_ms) & SNOWFLAKE_TIMESTAMP_MASK) << SNOWFLAKE_TIMESTAMP_SHIFT; \ +} + +#define int64_to_snowflake_id(raw, id) { \ + (id).count = ((raw) >> SNOWFLAKE_COUNTER_SHIFT) & SNOWFLAKE_COUNTER_MASK; \ + (id).machine = ((raw) >> SNOWFLAKE_MACHINE_ID_SHIFT) & SNOWFLAKE_MACHINE_ID_MASK; \ + (id).time_ms = ((raw) >> SNOWFLAKE_TIMESTAMP_SHIFT) & SNOWFLAKE_TIMESTAMP_MASK; \ +} + +/* + * Format of tuples stored in heap table associated to snowflake sequence. + */ +typedef struct FormData_snowflake_data +{ + /* enough to cover 12 bits of the internal counter */ + int16 count; + bool is_called; +} FormData_snowflake_data; + +typedef FormData_snowflake_data *Form_snowflake_data; + +/* + * Columns of a snowflake sequence relation. + */ +#define SNOWFLAKE_COL_COUNT 1 +#define SNOWFLAKE_COL_CALLED 2 + +#define SNOWFLAKE_COLS 2 + +/* GUC parameter */ +static int snowflake_machine_id = 1; + +PG_FUNCTION_INFO_V1(snowflake_sequenceam_handler); + +/* ----------------------------------------------------------------------- + * Interfaces for relation manipulation. + * ----------------------------------------------------------------------- + */ + +/* + * Initialize snowflake relation's fork with some data. + */ +static void +fill_snowflake_fork(Relation rel, HeapTuple tuple, ForkNumber forkNum) +{ + Buffer buf; + Page page; + snowflake_magic *sm; + OffsetNumber offnum; + GenericXLogState *state = NULL; + + /* Initialize first page of relation with special magic number */ + + buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL, + EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); + Assert(BufferGetBlockNumber(buf) == 0); + + page = BufferGetPage(buf); + + PageInit(page, BufferGetPageSize(buf), sizeof(snowflake_magic)); + sm = (snowflake_magic *) PageGetSpecialPointer(page); + sm->magic = SNOWFLAKE_MAGIC; + + /* Now insert sequence tuple */ + + /* + * Since VACUUM does not process sequences, we have to force the tuple to + * have xmin = FrozenTransactionId now. Otherwise it would become + * invisible to SELECTs after 2G transactions. It is okay to do this + * because if the current transaction aborts, no other xact will ever + * examine the sequence tuple anyway. + */ + HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); + HeapTupleHeaderSetXminFrozen(tuple->t_data); + HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); + HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); + tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; + ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); + + /* + * Initialize before entering in the critical section, as this does + * allocations. + */ + if (forkNum == INIT_FORKNUM) + state = GenericXLogStart(rel); + + START_CRIT_SECTION(); + + MarkBufferDirty(buf); + + offnum = PageAddItem(page, (Item) tuple->t_data, tuple->t_len, + InvalidOffsetNumber, false, false); + if (offnum != FirstOffsetNumber) + elog(ERROR, "failed to add sequence tuple to page"); + + /* + * Init forks have to be logged. These go through generic WAL records + * for simplicity's sake to save from the need of a custom RMGR. + */ + if (forkNum == INIT_FORKNUM) + { + (void) GenericXLogRegisterBuffer(state, buf, GENERIC_XLOG_FULL_IMAGE); + GenericXLogFinish(state); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); +} + +/* + * Initialize snowflake relation. + * + * This needs to handle both the initial and main forks. + */ +static void +fill_snowflake(Relation rel, HeapTuple tuple) +{ + SMgrRelation srel; + + Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED); + + fill_snowflake_fork(rel, tuple, MAIN_FORKNUM); + + /* init fork */ + srel = smgropen(rel->rd_locator, INVALID_PROC_NUMBER); + smgrcreate(srel, INIT_FORKNUM, false); + log_smgrcreate(&rel->rd_locator, INIT_FORKNUM); + fill_snowflake_fork(rel, tuple, INIT_FORKNUM); + FlushRelationBuffers(rel); + smgrclose(srel); +} + +/* + * Read the current state of a snowflake sequence + * + * Given an opened sequence relation, lock the page buffer and find the tuple. + * + * *buf receives the reference to the pinned-and-ex-locked buffer. + * *seqdatatuple receives the reference to the sequence tuple proper. + * + * Returns value points to the data payload of the tuple. + */ +static Form_snowflake_data +read_snowflake(Relation rel, Buffer *buf, HeapTuple seqdatatuple) +{ + Page page; + ItemId lp; + snowflake_magic *sm; + Form_snowflake_data seq; + + *buf = ReadBuffer(rel, 0); + LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(*buf); + sm = (snowflake_magic *) PageGetSpecialPointer(page); + + if (sm->magic != SNOWFLAKE_MAGIC) + elog(ERROR, "bad magic number in sequence \"%s\": %08X", + RelationGetRelationName(rel), sm->magic); + + lp = PageGetItemId(page, FirstOffsetNumber); + Assert(ItemIdIsNormal(lp)); + + /* Note we currently only bother to set these two fields of *seqdatatuple */ + seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); + seqdatatuple->t_len = ItemIdGetLength(lp); + + /* + * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on + * a sequence, which would leave a non-frozen XID in the sequence tuple's + * xmax, which eventually leads to clog access failures or worse. If we + * see this has happened, clean up after it. We treat this like a hint + * bit update, ie, don't bother to WAL-log it, since we can certainly do + * this again if the update gets lost. + */ + Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); + if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) + { + HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); + seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; + seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; + MarkBufferDirtyHint(*buf, true); + } + + seq = (Form_snowflake_data) GETSTRUCT(seqdatatuple); + + return seq; +} + + +/* ------------------------------------------------------------------------ + * Callbacks for the snowflake sequence access method. + * ------------------------------------------------------------------------ + */ + +/* + * Return the table access method used by this sequence. + * + * This is just an on-memory sequence, so anything is fine. + */ +static const char * +snowflake_sequenceam_get_table_am(void) +{ + return "heap"; +} + +/* + * snowflake_sequenceam_init + * + * Initialize relation of a snowflake sequence. This stores the sequence + * counter in an unlogged relation as timestamps ensure value unicity. + */ +static void +snowflake_sequenceam_init(Relation rel, int64 last_value, bool is_called) +{ + Datum values[SNOWFLAKE_COLS]; + bool nulls[SNOWFLAKE_COLS]; + int16 counter; + TupleDesc tupdesc; + HeapTuple tuple; + List *elts = NIL; + ListCell *lc; + ColumnDef *coldef = NULL; + AlterTableCmd *atcmd; + List *atcmds = NIL; + + /* Adjust last_value, depending on the defaults given */ + counter = ((int16) last_value) & SNOWFLAKE_COUNTER_MASK; + + /* + * Create unlogged relation with its attributes. + */ + coldef = makeColumnDef("count", INT2OID, -1, InvalidOid); + coldef->is_not_null = true; + elts = lappend(elts, coldef); + coldef = makeColumnDef("is_called", BOOLOID, -1, InvalidOid); + coldef->is_not_null = true; + elts = lappend(elts, coldef); + + foreach(lc, elts) + { + atcmd = makeNode(AlterTableCmd); + atcmd->subtype = AT_AddColumnToSequence; + atcmd->def = (Node *) lfirst(lc); + atcmds = lappend(atcmds, atcmd); + } + + /* + * No recursion needed. Note that EventTriggerAlterTableStart() should + * have been called. + */ + AlterTableInternal(RelationGetRelid(rel), atcmds, false); + CommandCounterIncrement(); + + /* + * Switch the relation to be unlogged. This forces a rewrite, but + * the relation is empty so that's OK. + */ + RelationSetNewRelfilenumber(rel, RELPERSISTENCE_UNLOGGED); + + /* And insert its first tuple */ + values[0] = Int16GetDatum(counter); + nulls[0] = false; + values[1] = BoolGetDatum(is_called); + nulls[1] = false; + + tupdesc = RelationGetDescr(rel); + tuple = heap_form_tuple(tupdesc, values, nulls); + fill_snowflake(rel, tuple); +} + +/* + * snowflake_sequenceam_nextval + * + * Return the next value for a snowflake sequence. + */ +static int64 +snowflake_sequenceam_nextval(Relation rel, int64 incby, int64 maxv, + int64 minv, int64 cache, bool cycle, + int64 *last) +{ + Buffer buf; + Form_snowflake_data seq; + HeapTupleData seqdatatuple; + int64 result = 0; + snowflake_id id = {0}; + struct timeval tp; + + /* lock page buffer and read tuple */ + seq = read_snowflake(rel, &buf, &seqdatatuple); + + /* + * The logic here is quite simple, increment the counter until its + * threshold is reached and get back to the start. If the threshold + * is reached, wait 1ms to ensure a unique timestamp. There is no + * need to do a retry as the buffer is already locked. + */ + id.count = seq->count; + id.count++; + + if (id.count > (PG_INT16_MAX & SNOWFLAKE_COUNTER_MASK)) + { + /* + * Threshold reached, so wait a bit for force clock to a new + * timestamp. + */ + id.count = 1; + pg_usleep(1000L); /* 1ms */ + } + + /* Compute timestamp, with buffer locked */ + gettimeofday(&tp, NULL); + id.time_ms = (uint64) tp.tv_sec * 1000 + + tp.tv_usec / 1000; + + /* Machine ID */ + id.machine = snowflake_machine_id; + + /* ready to change the on-disk (or really, in-buffer) tuple */ + START_CRIT_SECTION(); + seq->count = id.count; + seq->is_called = true; + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); + + /* Store the last value computed for lastval() */ + snowflake_id_to_int64(id, result); + *last = result; + return result; +} + +/* + * snowflake_sequenceam_setval + * + * Set the sequence value, manipulating only the sequence counter. + */ +static void +snowflake_sequenceam_setval(Relation rel, int64 next, bool iscalled) +{ + Buffer buf; + HeapTupleData seqdatatuple; + Form_snowflake_data seq; + + /* lock page buffer and read tuple */ + seq = read_snowflake(rel, &buf, &seqdatatuple); + + /* Change the in-buffer tuple */ + START_CRIT_SECTION(); + seq->count = (next & SNOWFLAKE_COUNTER_MASK); + seq->is_called = iscalled; + MarkBufferDirty(buf); + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); +} + +/* + * snowflake_sequenceam_get_state + * + * Return the last sequence counter value. + */ +static void +snowflake_sequenceam_get_state(Relation rel, int64 *last_value, bool *is_called) +{ + Buffer buf; + HeapTupleData seqdatatuple; + Form_snowflake_data seq; + + seq = read_snowflake(rel, &buf, &seqdatatuple); + *last_value = seq->count; + *is_called = seq->is_called; + UnlockReleaseBuffer(buf); +} + +/* + * snowflake_sequenceam_reset + * + * Reset the sequence, coming down to resetting its counter. + */ +static void +snowflake_sequenceam_reset(Relation rel, int64 startv, bool is_called, + bool reset_state) +{ + HeapTupleData seqdatatuple; + HeapTuple tuple; + Form_snowflake_data seq; + Buffer buf; + + /* lock buffer page and read tuple */ + (void) read_snowflake(rel, &buf, &seqdatatuple); + + /* copy the existing tuple */ + tuple = heap_copytuple(&seqdatatuple); + + /* Now we're done with the old page */ + UnlockReleaseBuffer(buf); + + /* + * Modify the copied tuple to execute the restart (compare the RESTART + * action in AlterSequence) + */ + seq = (Form_snowflake_data) GETSTRUCT(tuple); + seq->count = (startv & SNOWFLAKE_COUNTER_MASK); + seq->is_called = is_called; + + /* create new storage */ + RelationSetNewRelfilenumber(rel, rel->rd_rel->relpersistence); + + /* insert the modified tuple into the page */ + fill_snowflake(rel, tuple); +} + +/* + * snowflake_sequenceam_change_persistence + * + * There is nothing to do here, the underneath relation has to remain + * unlogged and is set as such when creating the sequence. + */ +static void +snowflake_sequenceam_change_persistence(Relation rel, char newrelpersistence) +{ + /* Nothing to do here */ +} + +/* ------------------------------------------------------------------------ + * Definition of the snowflake sequence access method. + * ------------------------------------------------------------------------ + */ + +static const SequenceAmRoutine snowflake_sequenceam_methods = { + .type = T_SequenceAmRoutine, + .get_table_am = snowflake_sequenceam_get_table_am, + .init = snowflake_sequenceam_init, + .nextval = snowflake_sequenceam_nextval, + .setval = snowflake_sequenceam_setval, + .get_state = snowflake_sequenceam_get_state, + .reset = snowflake_sequenceam_reset, + .change_persistence = snowflake_sequenceam_change_persistence +}; + +Datum +snowflake_sequenceam_handler(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(&snowflake_sequenceam_methods); +} + +/* Utility functions */ + +/* + * snowflake_get + * + * Return a tuple worth of snowflake ID data, in a readable shape. + */ +PG_FUNCTION_INFO_V1(snowflake_get); +Datum +snowflake_get(PG_FUNCTION_ARGS) +{ +#define SNOWFLAKE_GET_COLS 3 + int64 raw = PG_GETARG_INT64(0); + Datum *values; + bool *nulls; + TupleDesc tupdesc; + snowflake_id id; + + /* determine result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + int64_to_snowflake_id(raw, id); + + nulls = palloc0(sizeof(bool) * tupdesc->natts); + values = palloc0(sizeof(Datum) * tupdesc->natts); + + values[0] = Int64GetDatum(id.time_ms); + values[1] = Int32GetDatum(id.machine); + values[2] = Int32GetDatum(id.count); + + /* Returns the record as Datum */ + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} + +/* + * Entry point when loading extension. + */ +void +_PG_init(void) +{ + DefineCustomIntVariable("snowflake.machine_id", + "Machine ID to use with snowflake sequence.", + "Default value is 1.", + &snowflake_machine_id, + 1, 0, 1023, /* 10 bits as max */ + PGC_SUSET, + 0, NULL, NULL, NULL); +} diff --git a/contrib/snowflake/snowflake.control b/contrib/snowflake/snowflake.control new file mode 100644 index 000000000000..7b8c6089c25f --- /dev/null +++ b/contrib/snowflake/snowflake.control @@ -0,0 +1,5 @@ +# snowflake extension +comment = 'snowflake - sequence access method' +default_version = '1.0' +module_pathname = '$libdir/snowflake' +relocatable = true diff --git a/contrib/snowflake/sql/snowflake.sql b/contrib/snowflake/sql/snowflake.sql new file mode 100644 index 000000000000..395d166ba4bc --- /dev/null +++ b/contrib/snowflake/sql/snowflake.sql @@ -0,0 +1,29 @@ +CREATE EXTENSION snowflake; + +CREATE SEQUENCE snowflake_seq USING snowflake; + +SET snowflake.machine_id = 2000; -- error +SET snowflake.machine_id = 4; -- ok +SELECT machine, counter FROM snowflake_get(nextval('snowflake_seq')); +SELECT machine, counter FROM snowflake_get(lastval()); +SELECT machine, counter FROM snowflake_get(nextval('snowflake_seq')); +SELECT machine, counter FROM snowflake_get(currval('snowflake_seq')); + +-- Sequence relation exists, is unlogged and remains unlogged. +SELECT * FROM snowflake_seq; +ALTER SEQUENCE snowflake_seq SET LOGGED; +SELECT relpersistence FROM pg_class where relname = 'snowflake_seq'; + +ALTER SEQUENCE snowflake_seq RESTART; +SELECT * FROM snowflake_seq; + +-- Identity column, where cache affects value. +SET default_sequence_access_method = 'snowflake'; +CREATE TABLE snowflake_tab (a int GENERATED ALWAYS AS IDENTITY, b int); +INSERT INTO snowflake_tab VALUES (DEFAULT, generate_series(1, 10)); +SELECT data.machine, data.counter + FROM snowflake_tab, LATERAL snowflake_get(a) AS data; +DROP TABLE snowflake_tab; + +DROP SEQUENCE snowflake_seq; +DROP EXTENSION snowflake; diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index fd6e3e028907..659a37960579 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -9752,6 +9752,22 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + default_sequence_access_method (string) + + default_sequence_access_method configuration parameter + + + + + This parameter specifies the default sequence access method to use when + creating sequences if the CREATE SEQUENCE + command does not explicitly specify an access method. The default is + local. + + + + default_tablespace (string) diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index 24b706b29adc..5e26d5baacc9 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -168,6 +168,7 @@ CREATE EXTENSION extension_name; &seg; &sepgsql; &contrib-spi; + &snowflake; &sslinfo; &tablefunc; &tcn; diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index fef9584f908e..bee964856ece 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -95,6 +95,7 @@ + @@ -158,6 +159,7 @@ + diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml index af476c82fcc1..9f369cc89930 100644 --- a/doc/src/sgml/postgres.sgml +++ b/doc/src/sgml/postgres.sgml @@ -258,6 +258,7 @@ break is not needed in a wider output rendering. &geqo; &tableam; &indexam; + &sequenceam; &wal-for-extensions; &indextypes; &storage; diff --git a/doc/src/sgml/ref/create_access_method.sgml b/doc/src/sgml/ref/create_access_method.sgml index dae43dbaed58..3067dc4d4df0 100644 --- a/doc/src/sgml/ref/create_access_method.sgml +++ b/doc/src/sgml/ref/create_access_method.sgml @@ -61,8 +61,8 @@ CREATE ACCESS METHOD name This clause specifies the type of access method to define. - Only TABLE and INDEX - are supported at present. + Only TABLE, SEQUENCE and + INDEX are supported at present. @@ -77,12 +77,15 @@ CREATE ACCESS METHOD name declared to take a single argument of type internal, and its return type depends on the type of access method; for TABLE access methods, it must - be table_am_handler and for INDEX - access methods, it must be index_am_handler. + be table_am_handler; for INDEX + access methods, it must be index_am_handler; + for SEQUENCE, it must be + sequence_am_handler; The C-level API that the handler function must implement varies depending on the type of access method. The table access method API - is described in and the index access method - API is described in . + is described in , the index access method + API is described in and the sequence access + method is described in . diff --git a/doc/src/sgml/ref/create_sequence.sgml b/doc/src/sgml/ref/create_sequence.sgml index 1e283f13d15c..52c6096e4ba2 100644 --- a/doc/src/sgml/ref/create_sequence.sgml +++ b/doc/src/sgml/ref/create_sequence.sgml @@ -29,6 +29,7 @@ CREATE [ { TEMPORARY | TEMP } | UNLOGGED ] SEQUENCE [ IF NOT EXISTS ] start ] [ CACHE cache ] [ OWNED BY { table_name.column_name | NONE } ] + [ USING access_method ] @@ -263,6 +264,17 @@ SELECT * FROM name; + + + USING access_method + + + The USING option specifies which sequence access + method will be used when generating the sequence numbers. The default + is local. + + + diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index b757d27ebd0b..dda753e1cf25 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -1173,6 +1173,23 @@ PostgreSQL documentation + + + + + Do not output commands to select sequence access methods. + With this option, all objects will be created with whichever + sequence access method is the default during restore. + + + + This option is ignored when emitting an archive (non-text) output + file. For the archive formats, you can specify the option when you + call pg_restore. + + + + diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index 43fdab2d77ed..17681045996a 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -605,6 +605,17 @@ exclude database PATTERN + + + + + Do not output commands to select sequence access methods. + With this option, all objects will be created with whichever + sequence access method is the default during restore. + + + + diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml index b6de497aee18..78d5cd7e6011 100644 --- a/doc/src/sgml/ref/pg_restore.sgml +++ b/doc/src/sgml/ref/pg_restore.sgml @@ -829,6 +829,17 @@ PostgreSQL documentation + + + + + Do not output commands to select sequence access methods. + With this option, all objects will be created with whichever + sequence access method is the default during restore. + + + + diff --git a/doc/src/sgml/sequenceam.sgml b/doc/src/sgml/sequenceam.sgml new file mode 100644 index 000000000000..a96170bfac03 --- /dev/null +++ b/doc/src/sgml/sequenceam.sgml @@ -0,0 +1,80 @@ + + + + Sequence Access Method Interface Definition + + + Sequence Access Method + + + sequenceam + Sequence Access Method + + + + This chapter explains the interface between the core + PostgreSQL system and sequence access + methods, which manage the operations around sequences . The core + system knows little about these access methods beyond what is specified here, + so it is possible to develop entirely new access method types by writing + add-on code. + + + + Each sequence access method is described by a row in the + pg_am system + catalog. The pg_am entry specifies a name and a + handler function for the sequence access method. These + entries can be created and deleted using the + and + SQL commands. + + + + A sequence access method handler function must be declared to accept a single + argument of type internal and to return the pseudo-type + sequence_am_handler. The argument is a dummy value that simply + serves to prevent handler functions from being called directly from SQL commands. + + The result of the function must be a pointer to a struct of type + SequenceAmRoutine, which contains everything that the + core code needs to know to make use of the sequence access method. The return + value needs to be of server lifetime, which is typically achieved by + defining it as a static const variable in global + scope. The SequenceAmRoutine struct, also called the + access method's API struct, defines the behavior of + the access method using callbacks. These callbacks are pointers to plain C + functions and are not visible or callable at the SQL level. All the + callbacks and their behavior is defined in the + SequenceAmRoutine structure (with comments inside + the struct defining the requirements for callbacks). Most callbacks have + wrapper functions, which are documented from the point of view of a user + (rather than an implementor) of the sequence access method. For details, + please refer to the + src/include/access/sequenceam.h file. + + + + Currently, the way a sequence access method stores data is fairly + unconstrained, and it is possible to use a predefined + Table Access Method to store sequence + data. + + + + For crash safety, a sequence access method can use + WAL, or a custom + implementation. + If WAL is chosen, either + Generic WAL Records can be used, or a + Custom WAL Resource Manager can be + implemented. + + + + Any developer of a new sequence access method can refer to + the existing local implementation present in + src/backend/access/sequence/local.c for details of + its implementation. + + diff --git a/doc/src/sgml/snowflake.sgml b/doc/src/sgml/snowflake.sgml new file mode 100644 index 000000000000..060699e7ecd1 --- /dev/null +++ b/doc/src/sgml/snowflake.sgml @@ -0,0 +1,100 @@ + + + + snowflake — sequence access method + + + snowflake + + + + snowflake provides a sequence access method based on + Snowflake IDs. + + + + A Snowflake ID (or snowflake) is a unique 64-bit identifier made of three + components: + + 41 bits for a timestamp, epoch-adjusted in milli-seconds + 10 bits for machine ID + 12 bits for a sequence number + + + + + Functions + + + + + snowflake_get(raw int8) returns record + + snowflake_get + function + + + + + + Returns a record made of the timestamp in milli-seconds, the machine ID + and the sequence number for a single snowflake ID. + + + + + + + + Configuration Parameters + + + + + snowflake.machine_id + + snowflake.machine_id configuration parameter + + + + + Machine ID assigned to the snowflake IDs used in the sequence. The + default value is 1. + + + + + + + + Examples + + + This is an example of creating a snowflake sequence: + + + +CREATE SEQUENCE snowflake_seq USING snowflake; + + + + Similarly to the default sequence access method, snowflake sequences + can be queried as a table: + + + + =# SELECT * FROM snowflake_seq; + count | is_called +-------+----------- + 1 | f +(1 row) +=# SELECT to_timestamp(time_ms / 1000), machine, counter + FROM snowflake_get(nextval('snowflake_seq')); + to_timestamp | machine | counter +------------------------+---------+--------- + 2024-04-26 14:28:26+09 | 1 | 3 +(1 row) + + + + diff --git a/src/backend/access/rmgrdesc/Makefile b/src/backend/access/rmgrdesc/Makefile index cd95eec37f14..e5900ed77af5 100644 --- a/src/backend/access/rmgrdesc/Makefile +++ b/src/backend/access/rmgrdesc/Makefile @@ -24,7 +24,7 @@ OBJS = \ relmapdesc.o \ replorigindesc.o \ rmgrdesc_utils.o \ - seqdesc.o \ + seqlocaldesc.o \ smgrdesc.o \ spgdesc.o \ standbydesc.o \ diff --git a/src/backend/access/rmgrdesc/meson.build b/src/backend/access/rmgrdesc/meson.build index 96c98e800c22..6b3b3e1e44c1 100644 --- a/src/backend/access/rmgrdesc/meson.build +++ b/src/backend/access/rmgrdesc/meson.build @@ -17,7 +17,7 @@ rmgr_desc_sources = files( 'relmapdesc.c', 'replorigindesc.c', 'rmgrdesc_utils.c', - 'seqdesc.c', + 'seqlocaldesc.c', 'smgrdesc.c', 'spgdesc.c', 'standbydesc.c', diff --git a/src/backend/access/rmgrdesc/seqdesc.c b/src/backend/access/rmgrdesc/seqlocaldesc.c similarity index 64% rename from src/backend/access/rmgrdesc/seqdesc.c rename to src/backend/access/rmgrdesc/seqlocaldesc.c index 0d289d77fcf7..6ffbcb2c4735 100644 --- a/src/backend/access/rmgrdesc/seqdesc.c +++ b/src/backend/access/rmgrdesc/seqlocaldesc.c @@ -1,44 +1,44 @@ /*------------------------------------------------------------------------- * - * seqdesc.c - * rmgr descriptor routines for commands/sequence.c + * seqlocaldesc.c + * rmgr descriptor routines for sequence/seqlocal.c * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * src/backend/access/rmgrdesc/seqdesc.c + * src/backend/access/rmgrdesc/seqlocaldesc.c * *------------------------------------------------------------------------- */ #include "postgres.h" -#include "commands/sequence.h" +#include "access/seqlocalam.h" void -seq_desc(StringInfo buf, XLogReaderState *record) +seq_local_desc(StringInfo buf, XLogReaderState *record) { char *rec = XLogRecGetData(record); uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; - xl_seq_rec *xlrec = (xl_seq_rec *) rec; + xl_seq_local_rec *xlrec = (xl_seq_local_rec *) rec; - if (info == XLOG_SEQ_LOG) + if (info == XLOG_SEQ_LOCAL_LOG) appendStringInfo(buf, "rel %u/%u/%u", xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.relNumber); } const char * -seq_identify(uint8 info) +seq_local_identify(uint8 info) { const char *id = NULL; switch (info & ~XLR_INFO_MASK) { - case XLOG_SEQ_LOG: - id = "LOG"; + case XLOG_SEQ_LOCAL_LOG: + id = "SEQ_LOCAL_LOG"; break; } diff --git a/src/backend/access/sequence/Makefile b/src/backend/access/sequence/Makefile index 9f9d31f5425a..62006165a15f 100644 --- a/src/backend/access/sequence/Makefile +++ b/src/backend/access/sequence/Makefile @@ -12,6 +12,6 @@ subdir = src/backend/access/sequence top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = sequence.o +OBJS = seqlocalam.o seqlocalxlog.o sequence.o sequenceamapi.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/sequence/meson.build b/src/backend/access/sequence/meson.build index ec9ab9b7e9db..d82af34d538c 100644 --- a/src/backend/access/sequence/meson.build +++ b/src/backend/access/sequence/meson.build @@ -1,5 +1,8 @@ # Copyright (c) 2022-2025, PostgreSQL Global Development Group backend_sources += files( + 'seqlocalam.c', + 'seqlocalxlog.c', 'sequence.c', + 'sequenceamapi.c', ) diff --git a/src/backend/access/sequence/seqlocalam.c b/src/backend/access/sequence/seqlocalam.c new file mode 100644 index 000000000000..5dec9d51ec82 --- /dev/null +++ b/src/backend/access/sequence/seqlocalam.c @@ -0,0 +1,657 @@ +/*------------------------------------------------------------------------- + * + * seqlocalam.c + * Local sequence access manager + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/access/sequence/seqlocalam.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/multixact.h" +#include "access/seqlocalam.h" +#include "access/sequenceam.h" +#include "access/xact.h" +#include "access/xloginsert.h" +#include "access/xlogutils.h" +#include "catalog/storage_xlog.h" +#include "commands/tablecmds.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "utils/builtins.h" + + +/* + * We don't want to log each fetching of a value from a sequence, + * so we pre-log a few fetches in advance. In the event of + * crash we can lose (skip over) as many values as we pre-logged. + */ +#define SEQ_LOCAL_LOG_VALS 32 + +/* Format of tuples stored in heap table associated to local sequences */ +typedef struct FormData_pg_seq_local_data +{ + int64 last_value; + int64 log_cnt; + bool is_called; +} FormData_pg_seq_local_data; + +typedef FormData_pg_seq_local_data *Form_pg_seq_local_data; + +/* + * Columns of a local sequence relation + */ +#define SEQ_LOCAL_COL_LASTVAL 1 +#define SEQ_LOCAL_COL_LOG 2 +#define SEQ_LOCAL_COL_CALLED 3 + +#define SEQ_LOCAL_COL_FIRSTCOL SEQ_LOCAL_COL_LASTVAL +#define SEQ_LOCAL_COL_LASTCOL SEQ_LOCAL_COL_CALLED + + +/* + * We don't want to log each fetching of a value from a sequence, + * so we pre-log a few fetches in advance. In the event of + * crash we can lose (skip over) as many values as we pre-logged. + */ +#define SEQ_LOCAL_LOG_VALS 32 + +static Form_pg_seq_local_data read_seq_tuple(Relation rel, + Buffer *buf, + HeapTuple seqdatatuple); +static void fill_seq_with_data(Relation rel, HeapTuple tuple); +static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple, + ForkNumber forkNum); + +/* + * Given an opened sequence relation, lock the page buffer and find the tuple + * + * *buf receives the reference to the pinned-and-ex-locked buffer + * *seqdatatuple receives the reference to the sequence tuple proper + * (this arg should point to a local variable of type HeapTupleData) + * + * Function's return value points to the data payload of the tuple + */ +static Form_pg_seq_local_data +read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) +{ + Page page; + ItemId lp; + seq_local_magic *sm; + Form_pg_seq_local_data seq; + + *buf = ReadBuffer(rel, 0); + LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(*buf); + sm = (seq_local_magic *) PageGetSpecialPointer(page); + + if (sm->magic != SEQ_LOCAL_MAGIC) + elog(ERROR, "bad magic number in sequence \"%s\": %08X", + RelationGetRelationName(rel), sm->magic); + + lp = PageGetItemId(page, FirstOffsetNumber); + Assert(ItemIdIsNormal(lp)); + + /* Note we currently only bother to set these two fields of *seqdatatuple */ + seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); + seqdatatuple->t_len = ItemIdGetLength(lp); + + /* + * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on + * a sequence, which would leave a non-frozen XID in the sequence tuple's + * xmax, which eventually leads to clog access failures or worse. If we + * see this has happened, clean up after it. We treat this like a hint + * bit update, ie, don't bother to WAL-log it, since we can certainly do + * this again if the update gets lost. + */ + Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); + if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) + { + HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); + seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; + seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; + MarkBufferDirtyHint(*buf, true); + } + + seq = (Form_pg_seq_local_data) GETSTRUCT(seqdatatuple); + + return seq; +} + +/* + * Initialize a sequence's relation with the specified tuple as content + * + * This handles unlogged sequences by writing to both the main and the init + * fork as necessary. + */ +static void +fill_seq_with_data(Relation rel, HeapTuple tuple) +{ + fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM); + + if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) + { + SMgrRelation srel; + + srel = smgropen(rel->rd_locator, INVALID_PROC_NUMBER); + smgrcreate(srel, INIT_FORKNUM, false); + log_smgrcreate(&rel->rd_locator, INIT_FORKNUM); + fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM); + FlushRelationBuffers(rel); + smgrclose(srel); + } +} + +/* + * Initialize a sequence's relation fork with the specified tuple as content + */ +static void +fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum) +{ + Buffer buf; + Page page; + seq_local_magic *sm; + OffsetNumber offnum; + + /* Initialize first page of relation with special magic number */ + + buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL, + EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); + Assert(BufferGetBlockNumber(buf) == 0); + + page = BufferGetPage(buf); + + PageInit(page, BufferGetPageSize(buf), sizeof(seq_local_magic)); + sm = (seq_local_magic *) PageGetSpecialPointer(page); + sm->magic = SEQ_LOCAL_MAGIC; + + /* Now insert sequence tuple */ + + /* + * Since VACUUM does not process sequences, we have to force the tuple to + * have xmin = FrozenTransactionId now. Otherwise it would become + * invisible to SELECTs after 2G transactions. It is okay to do this + * because if the current transaction aborts, no other xact will ever + * examine the sequence tuple anyway. + */ + HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); + HeapTupleHeaderSetXminFrozen(tuple->t_data); + HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); + HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); + tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; + ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); + + /* check the comment above nextval_internal()'s equivalent call. */ + if (RelationNeedsWAL(rel)) + GetTopTransactionId(); + + START_CRIT_SECTION(); + + MarkBufferDirty(buf); + + offnum = PageAddItem(page, (Item) tuple->t_data, tuple->t_len, + InvalidOffsetNumber, false, false); + if (offnum != FirstOffsetNumber) + elog(ERROR, "failed to add sequence tuple to page"); + + /* XLOG stuff */ + if (RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM) + { + xl_seq_local_rec xlrec; + XLogRecPtr recptr; + + XLogBeginInsert(); + XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); + + xlrec.locator = rel->rd_locator; + + XLogRegisterData(&xlrec, sizeof(xl_seq_local_rec)); + XLogRegisterData(tuple->t_data, tuple->t_len); + + recptr = XLogInsert(RM_SEQ_LOCAL_ID, XLOG_SEQ_LOCAL_LOG); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); +} + +/* + * seq_local_nextval() + * + * Allocate a new value for a local sequence, based on the sequence + * configuration. + */ +static int64 +seq_local_nextval(Relation rel, int64 incby, int64 maxv, + int64 minv, int64 cache, bool cycle, + int64 *last) +{ + int64 result; + int64 fetch; + int64 next; + int64 rescnt = 0; + int64 log; + Buffer buf; + HeapTupleData seqdatatuple; + Form_pg_seq_local_data seq; + Page page; + bool logit = false; + + /* lock page buffer and read tuple */ + seq = read_seq_tuple(rel, &buf, &seqdatatuple); + page = BufferGetPage(buf); + + *last = next = result = seq->last_value; + fetch = cache; + log = seq->log_cnt; + + if (!seq->is_called) + { + rescnt++; /* return last_value if not is_called */ + fetch--; + } + + /* + * Decide whether we should emit a WAL log record. If so, force up the + * fetch count to grab SEQ_LOCAL_LOG_VALS more values than we actually + * need to cache. (These will then be usable without logging.) + * + * If this is the first nextval after a checkpoint, we must force a new + * WAL record to be written anyway, else replay starting from the + * checkpoint would fail to advance the sequence past the logged values. + * In this case we may as well fetch extra values. + */ + if (log < fetch || !seq->is_called) + { + /* forced log to satisfy local demand for values */ + fetch = log = fetch + SEQ_LOCAL_LOG_VALS; + logit = true; + } + else + { + XLogRecPtr redoptr = GetRedoRecPtr(); + + if (PageGetLSN(page) <= redoptr) + { + /* last update of seq was before checkpoint */ + fetch = log = fetch + SEQ_LOCAL_LOG_VALS; + logit = true; + } + } + + while (fetch) /* try to fetch cache [+ log ] numbers */ + { + /* + * Check MAXVALUE for ascending sequences and MINVALUE for descending + * sequences + */ + if (incby > 0) + { + /* ascending sequence */ + if ((maxv >= 0 && next > maxv - incby) || + (maxv < 0 && next + incby > maxv)) + { + if (rescnt > 0) + break; /* stop fetching */ + if (!cycle) + ereport(ERROR, + (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), + errmsg("nextval: reached maximum value of sequence \"%s\" (%" PRId64 ")", + RelationGetRelationName(rel), + maxv))); + next = minv; + } + else + next += incby; + } + else + { + /* descending sequence */ + if ((minv < 0 && next < minv - incby) || + (minv >= 0 && next + incby < minv)) + { + if (rescnt > 0) + break; /* stop fetching */ + if (!cycle) + ereport(ERROR, + (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), + errmsg("nextval: reached minimum value of sequence \"%s\" (%" PRId64 ")", + RelationGetRelationName(rel), + minv))); + next = maxv; + } + else + next += incby; + } + fetch--; + if (rescnt < cache) + { + log--; + rescnt++; + *last = next; + if (rescnt == 1) /* if it's first result - */ + result = next; /* it's what to return */ + } + } + + log -= fetch; /* adjust for any unfetched numbers */ + Assert(log >= 0); + + /* + * If something needs to be WAL logged, acquire an xid, so this + * transaction's commit will trigger a WAL flush and wait for syncrep. + * It's sufficient to ensure the toplevel transaction has an xid, no need + * to assign xids subxacts, that'll already trigger an appropriate wait. + * (Have to do that here, so we're outside the critical section) + */ + if (logit && RelationNeedsWAL(rel)) + GetTopTransactionId(); + + /* ready to change the on-disk (or really, in-buffer) tuple */ + START_CRIT_SECTION(); + + /* + * We must mark the buffer dirty before doing XLogInsert(); see notes in + * SyncOneBuffer(). However, we don't apply the desired changes just yet. + * This looks like a violation of the buffer update protocol, but it is in + * fact safe because we hold exclusive lock on the buffer. Any other + * process, including a checkpoint, that tries to examine the buffer + * contents will block until we release the lock, and then will see the + * final state that we install below. + */ + MarkBufferDirty(buf); + + /* XLOG stuff */ + if (logit && RelationNeedsWAL(rel)) + { + xl_seq_local_rec xlrec; + XLogRecPtr recptr; + + /* + * We don't log the current state of the tuple, but rather the state + * as it would appear after "log" more fetches. This lets us skip + * that many future WAL records, at the cost that we lose those + * sequence values if we crash. + */ + XLogBeginInsert(); + XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); + + /* set values that will be saved in xlog */ + seq->last_value = next; + seq->is_called = true; + seq->log_cnt = 0; + + xlrec.locator = rel->rd_locator; + + XLogRegisterData(&xlrec, sizeof(xl_seq_local_rec)); + XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len); + + recptr = XLogInsert(RM_SEQ_LOCAL_ID, XLOG_SEQ_LOCAL_LOG); + + PageSetLSN(page, recptr); + } + + /* Now update sequence tuple to the intended final state */ + seq->last_value = *last; /* last fetched number */ + seq->is_called = true; + seq->log_cnt = log; /* how much is logged */ + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); + + return result; +} + +/* + * seq_local_get_table_am() + * + * Return the table access method used by this sequence. + */ +static const char * +seq_local_get_table_am(void) +{ + return "heap"; +} + +/* + * seq_local_init() + * + * Add the sequence attributes to the relation created for this sequence + * AM and insert a tuple of metadata into the sequence relation, based on + * the information guessed from pg_sequences. This is the first tuple + * inserted after the relation has been created, filling in its heap + * table. + */ +static void +seq_local_init(Relation rel, int64 last_value, bool is_called) +{ + Datum value[SEQ_LOCAL_COL_LASTCOL]; + bool null[SEQ_LOCAL_COL_LASTCOL]; + List *elts = NIL; + List *atcmds = NIL; + ListCell *lc; + TupleDesc tupdesc; + HeapTuple tuple; + + /* + * Create relation (and fill value[] and null[] for the initial tuple). + */ + for (int i = SEQ_LOCAL_COL_FIRSTCOL; i <= SEQ_LOCAL_COL_LASTCOL; i++) + { + ColumnDef *coldef = NULL; + + switch (i) + { + case SEQ_LOCAL_COL_LASTVAL: + coldef = makeColumnDef("last_value", INT8OID, -1, InvalidOid); + value[i - 1] = Int64GetDatumFast(last_value); + break; + case SEQ_LOCAL_COL_LOG: + coldef = makeColumnDef("log_cnt", INT8OID, -1, InvalidOid); + value[i - 1] = Int64GetDatum(0); + break; + case SEQ_LOCAL_COL_CALLED: + coldef = makeColumnDef("is_called", BOOLOID, -1, InvalidOid); + value[i - 1] = BoolGetDatum(is_called); + break; + } + + coldef->is_not_null = true; + null[i - 1] = false; + elts = lappend(elts, coldef); + } + + /* Add all the attributes to the sequence */ + foreach(lc, elts) + { + AlterTableCmd *atcmd; + + atcmd = makeNode(AlterTableCmd); + atcmd->subtype = AT_AddColumnToSequence; + atcmd->def = (Node *) lfirst(lc); + atcmds = lappend(atcmds, atcmd); + } + + /* + * No recursion needed. Note that EventTriggerAlterTableStart() should + * have been called. + */ + AlterTableInternal(RelationGetRelid(rel), atcmds, false); + CommandCounterIncrement(); + + tupdesc = RelationGetDescr(rel); + tuple = heap_form_tuple(tupdesc, value, null); + fill_seq_with_data(rel, tuple); +} + +/* + * seq_local_setval() + * + * Callback for setval(). + */ +static void +seq_local_setval(Relation rel, int64 next, bool iscalled) +{ + Buffer buf; + HeapTupleData seqdatatuple; + Form_pg_seq_local_data seq; + + /* lock page buffer and read tuple */ + seq = read_seq_tuple(rel, &buf, &seqdatatuple); + + /* ready to change the on-disk (or really, in-buffer) tuple */ + START_CRIT_SECTION(); + seq->last_value = next; /* last fetched number */ + seq->is_called = iscalled; + seq->log_cnt = 0; + + MarkBufferDirty(buf); + + /* XLOG stuff */ + if (RelationNeedsWAL(rel)) + { + xl_seq_local_rec xlrec; + XLogRecPtr recptr; + Page page = BufferGetPage(buf); + + XLogBeginInsert(); + XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); + + xlrec.locator = rel->rd_locator; + XLogRegisterData(&xlrec, sizeof(xl_seq_local_rec)); + XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len); + + recptr = XLogInsert(RM_SEQ_LOCAL_ID, XLOG_SEQ_LOCAL_LOG); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); +} + +/* + * seq_local_reset() + * + * Perform a hard reset on the local sequence, rewriting its heap data + * entirely. + */ +static void +seq_local_reset(Relation rel, int64 startv, bool is_called, bool reset_state) +{ + Form_pg_seq_local_data seq; + Buffer buf; + HeapTupleData seqdatatuple; + HeapTuple tuple; + + /* lock buffer page and read tuple */ + (void) read_seq_tuple(rel, &buf, &seqdatatuple); + + /* + * Copy the existing sequence tuple. + */ + tuple = heap_copytuple(&seqdatatuple); + + /* Now we're done with the old page */ + UnlockReleaseBuffer(buf); + + /* + * Modify the copied tuple to execute the restart (compare the RESTART + * action in AlterSequence) + */ + seq = (Form_pg_seq_local_data) GETSTRUCT(tuple); + seq->last_value = startv; + seq->is_called = is_called; + if (reset_state) + seq->log_cnt = 0; + + /* + * Create a new storage file for the sequence. + */ + RelationSetNewRelfilenumber(rel, rel->rd_rel->relpersistence); + + /* + * Ensure sequence's relfrozenxid is at 0, since it won't contain any + * unfrozen XIDs. Same with relminmxid, since a sequence will never + * contain multixacts. + */ + Assert(rel->rd_rel->relfrozenxid == InvalidTransactionId); + Assert(rel->rd_rel->relminmxid == InvalidMultiXactId); + + /* + * Insert the modified tuple into the new storage file. + */ + fill_seq_with_data(rel, tuple); +} + +/* + * seq_local_get_state() + * + * Retrieve the state of a local sequence. + */ +static void +seq_local_get_state(Relation rel, int64 *last_value, bool *is_called) +{ + Buffer buf; + HeapTupleData seqdatatuple; + Form_pg_seq_local_data seq; + + /* lock page buffer and read tuple */ + seq = read_seq_tuple(rel, &buf, &seqdatatuple); + + *last_value = seq->last_value; + *is_called = seq->is_called; + + UnlockReleaseBuffer(buf); +} + +/* + * seq_local_change_persistence() + * + * Persistence change for the local sequence Relation. + */ +static void +seq_local_change_persistence(Relation rel, char newrelpersistence) +{ + Buffer buf; + HeapTupleData seqdatatuple; + + (void) read_seq_tuple(rel, &buf, &seqdatatuple); + RelationSetNewRelfilenumber(rel, newrelpersistence); + fill_seq_with_data(rel, &seqdatatuple); + UnlockReleaseBuffer(buf); +} + +/* ------------------------------------------------------------------------ + * Definition of the local sequence access method. + * ------------------------------------------------------------------------ + */ +static const SequenceAmRoutine seq_local_methods = { + .type = T_SequenceAmRoutine, + .get_table_am = seq_local_get_table_am, + .init = seq_local_init, + .nextval = seq_local_nextval, + .setval = seq_local_setval, + .reset = seq_local_reset, + .get_state = seq_local_get_state, + .change_persistence = seq_local_change_persistence +}; + +Datum +seq_local_sequenceam_handler(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(&seq_local_methods); +} diff --git a/src/backend/access/sequence/seqlocalxlog.c b/src/backend/access/sequence/seqlocalxlog.c new file mode 100644 index 000000000000..db0ad969dbf2 --- /dev/null +++ b/src/backend/access/sequence/seqlocalxlog.c @@ -0,0 +1,82 @@ +/*------------------------------------------------------------------------- + * + * seqlocalxlog.c + * WAL replay logic for local sequence access manager + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/access/sequence/seqlocalxlog.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/bufmask.h" +#include "access/seqlocalam.h" +#include "access/xlogutils.h" +#include "storage/block.h" + +/* + * Mask a Sequence page before performing consistency checks on it. + */ +void +seq_local_mask(char *page, BlockNumber blkno) +{ + mask_page_lsn_and_checksum(page); + + mask_unused_space(page); +} + +void +seq_local_redo(XLogReaderState *record) +{ + XLogRecPtr lsn = record->EndRecPtr; + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + Buffer buffer; + Page page; + Page localpage; + char *item; + Size itemsz; + xl_seq_local_rec *xlrec = (xl_seq_local_rec *) XLogRecGetData(record); + seq_local_magic *sm; + + if (info != XLOG_SEQ_LOCAL_LOG) + elog(PANIC, "seq_redo: unknown op code %u", info); + + buffer = XLogInitBufferForRedo(record, 0); + page = (Page) BufferGetPage(buffer); + + /* + * We always reinit the page. However, since this WAL record type is also + * used for updating sequences, it's possible that a hot-standby backend + * is examining the page concurrently; so we mustn't transiently trash the + * buffer. The solution is to build the correct new page contents in + * local workspace and then memcpy into the buffer. Then only bytes that + * are supposed to change will change, even transiently. We must palloc + * the local page for alignment reasons. + */ + localpage = (Page) palloc(BufferGetPageSize(buffer)); + + PageInit(localpage, BufferGetPageSize(buffer), sizeof(seq_local_magic)); + sm = (seq_local_magic *) PageGetSpecialPointer(localpage); + sm->magic = SEQ_LOCAL_MAGIC; + + item = (char *) xlrec + sizeof(xl_seq_local_rec); + itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_local_rec); + + if (PageAddItem(localpage, (Item) item, itemsz, + FirstOffsetNumber, false, false) == InvalidOffsetNumber) + elog(PANIC, "seq_local_redo: failed to add item to page"); + + PageSetLSN(localpage, lsn); + + memcpy(page, localpage, BufferGetPageSize(buffer)); + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); + + pfree(localpage); +} diff --git a/src/backend/access/sequence/sequence.c b/src/backend/access/sequence/sequence.c index 8b5303553702..f30317d1feae 100644 --- a/src/backend/access/sequence/sequence.c +++ b/src/backend/access/sequence/sequence.c @@ -13,7 +13,8 @@ * * NOTES * This file contains sequence_ routines that implement access to sequences - * (in contrast to other relation types like indexes). + * (in contrast to other relation types like indexes) that are independent + * of individual sequence access methods. * *------------------------------------------------------------------------- */ diff --git a/src/backend/access/sequence/sequenceamapi.c b/src/backend/access/sequence/sequenceamapi.c new file mode 100644 index 000000000000..dd1a60d827a4 --- /dev/null +++ b/src/backend/access/sequence/sequenceamapi.c @@ -0,0 +1,145 @@ +/*------------------------------------------------------------------------- + * + * sequenceamapi.c + * general sequence access method routines + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/access/sequence/sequenceamapi.c + * + * + * Sequence access method allows the SQL Standard Sequence objects to be + * managed according to either the default access method or a pluggable + * replacement. Each sequence can only use one access method at a time, + * though different sequence access methods can be in use by different + * sequences at the same time. + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/xact.h" +#include "access/sequenceam.h" +#include "catalog/pg_am.h" +#include "commands/defrem.h" +#include "miscadmin.h" +#include "utils/guc_hooks.h" +#include "utils/syscache.h" + + +/* GUC */ +char *default_sequence_access_method = DEFAULT_SEQUENCE_ACCESS_METHOD; + +/* + * GetSequenceAmRoutine + * Call the specified access method handler routine to get its + * SequenceAmRoutine struct, which will be palloc'd in the caller's + * memory context. + */ +const SequenceAmRoutine * +GetSequenceAmRoutine(Oid amhandler) +{ + Datum datum; + SequenceAmRoutine *routine; + + datum = OidFunctionCall0(amhandler); + routine = (SequenceAmRoutine *) DatumGetPointer(datum); + + if (routine == NULL || !IsA(routine, SequenceAmRoutine)) + elog(ERROR, "sequence access method handler %u did not return a SequenceAmRoutine struct", + amhandler); + + /* + * Assert that all required callbacks are present. That makes it a bit + * easier to keep AMs up to date, e.g. when forward porting them to a new + * major version. + */ + Assert(routine->get_table_am != NULL); + Assert(routine->init != NULL); + Assert(routine->nextval != NULL); + Assert(routine->setval != NULL); + Assert(routine->reset != NULL); + Assert(routine->get_state != NULL); + Assert(routine->change_persistence != NULL); + + return routine; +} + +/* + * GetSequenceAmRoutineId + * Call pg_am and retrieve the OID of the access method handler. + */ +Oid +GetSequenceAmRoutineId(Oid amoid) +{ + Oid amhandleroid; + HeapTuple tuple; + Form_pg_am aform; + + tuple = SearchSysCache1(AMOID, + ObjectIdGetDatum(amoid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for access method %u", amoid); + aform = (Form_pg_am) GETSTRUCT(tuple); + Assert(aform->amtype == AMTYPE_SEQUENCE); + amhandleroid = aform->amhandler; + ReleaseSysCache(tuple); + + return amhandleroid; +} + +/* check_hook: validate new default_sequence_access_method */ +bool +check_default_sequence_access_method(char **newval, void **extra, + GucSource source) +{ + if (**newval == '\0') + { + GUC_check_errdetail("%s cannot be empty.", + "default_sequence_access_method"); + return false; + } + + if (strlen(*newval) >= NAMEDATALEN) + { + GUC_check_errdetail("%s is too long (maximum %d characters).", + "default_sequence_access_method", NAMEDATALEN - 1); + return false; + } + + /* + * If we aren't inside a transaction, or not connected to a database, we + * cannot do the catalog access necessary to verify the method. Must + * accept the value on faith. + */ + if (IsTransactionState() && MyDatabaseId != InvalidOid) + { + if (!OidIsValid(get_sequence_am_oid(*newval, true))) + { + /* + * When source == PGC_S_TEST, don't throw a hard error for a + * nonexistent sequence access method, only a NOTICE. See comments + * in guc.h. + */ + if (source == PGC_S_TEST) + { + ereport(NOTICE, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("sequence access method \"%s\" does not exist", + *newval))); + } + else + { + GUC_check_errdetail("sequence access method \"%s\" does not exist.", + *newval); + return false; + } + } + } + + return true; +} diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c index 1b7499726eb0..cc92268937b1 100644 --- a/src/backend/access/transam/rmgr.c +++ b/src/backend/access/transam/rmgr.c @@ -27,6 +27,7 @@ #include "access/gistxlog.h" #include "access/hash_xlog.h" #include "access/heapam_xlog.h" +#include "access/seqlocalam.h" #include "access/multixact.h" #include "access/nbtxlog.h" #include "access/spgxlog.h" diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index fbaed5359ad7..558e9a8f9695 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1480,9 +1480,13 @@ heap_create_with_catalog(const char *relname, * No need to add an explicit dependency for the toast table, as the * main table depends on it. Partitioned tables may not have an * access method set. + * + * Sequences and tables are created with their access method ID + * given by the caller of this function. */ if ((RELKIND_HAS_TABLE_AM(relkind) && relkind != RELKIND_TOASTVALUE) || - (relkind == RELKIND_PARTITIONED_TABLE && OidIsValid(accessmtd))) + (relkind == RELKIND_PARTITIONED_TABLE && OidIsValid(accessmtd)) || + RELKIND_HAS_SEQUENCE_AM(relkind)) { ObjectAddressSet(referenced, AccessMethodRelationId, accessmtd); add_exact_object_address(&referenced, addrs); diff --git a/src/backend/commands/amcmds.c b/src/backend/commands/amcmds.c index 58ed9d216cc0..e44633d13b60 100644 --- a/src/backend/commands/amcmds.c +++ b/src/backend/commands/amcmds.c @@ -15,6 +15,7 @@ #include "access/htup_details.h" #include "access/table.h" +#include "access/sequenceam.h" #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/indexing.h" @@ -175,6 +176,16 @@ get_table_am_oid(const char *amname, bool missing_ok) return get_am_type_oid(amname, AMTYPE_TABLE, missing_ok); } +/* + * get_sequence_am_oid - given an access method name, look up its OID + * and verify it corresponds to an sequence AM. + */ +Oid +get_sequence_am_oid(const char *amname, bool missing_ok) +{ + return get_am_type_oid(amname, AMTYPE_SEQUENCE, missing_ok); +} + /* * get_am_oid - given an access method name, look up its OID. * The type is not checked. @@ -215,6 +226,8 @@ get_am_type_string(char amtype) { case AMTYPE_INDEX: return "INDEX"; + case AMTYPE_SEQUENCE: + return "SEQUENCE"; case AMTYPE_TABLE: return "TABLE"; default: @@ -251,6 +264,9 @@ lookup_am_handler_func(List *handler_name, char amtype) case AMTYPE_INDEX: expectedType = INDEX_AM_HANDLEROID; break; + case AMTYPE_SEQUENCE: + expectedType = SEQUENCE_AM_HANDLEROID; + break; case AMTYPE_TABLE: expectedType = TABLE_AM_HANDLEROID; break; diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 451ae6f7f694..fc2c18df4dc3 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -19,6 +19,7 @@ #include "access/multixact.h" #include "access/relation.h" #include "access/sequence.h" +#include "access/sequenceam.h" #include "access/table.h" #include "access/transam.h" #include "access/xact.h" @@ -50,23 +51,6 @@ #include "utils/varlena.h" -/* - * We don't want to log each fetching of a value from a sequence, - * so we pre-log a few fetches in advance. In the event of - * crash we can lose (skip over) as many values as we pre-logged. - */ -#define SEQ_LOG_VALS 32 - -/* - * The "special area" of a sequence's buffer page looks like this. - */ -#define SEQ_MAGIC 0x1717 - -typedef struct sequence_magic -{ - uint32 magic; -} sequence_magic; - /* * We store a SeqTable item for every sequence we have touched in the current * session. This is needed to hold onto nextval/currval state. (We can't @@ -96,17 +80,15 @@ static HTAB *seqhashtab = NULL; /* hash table for SeqTable items */ */ static SeqTableData *last_used_seq = NULL; -static void fill_seq_with_data(Relation rel, HeapTuple tuple); -static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum); static Relation lock_and_open_sequence(SeqTable seq); static void create_seq_hashtable(void); static void init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel); -static Form_pg_sequence_data read_seq_tuple(Relation rel, - Buffer *buf, HeapTuple seqdatatuple); static void init_params(ParseState *pstate, List *options, bool for_identity, bool isInit, Form_pg_sequence seqform, - Form_pg_sequence_data seqdataform, + int64 *last_value, + bool *reset_state, + bool *is_called, bool *need_seq_rewrite, List **owned_by); static void do_setval(Oid relid, int64 next, bool iscalled); @@ -121,7 +103,9 @@ ObjectAddress DefineSequence(ParseState *pstate, CreateSeqStmt *seq) { FormData_pg_sequence seqform; - FormData_pg_sequence_data seqdataform; + int64 last_value; + bool reset_state; + bool is_called; bool need_seq_rewrite; List *owned_by; CreateStmt *stmt = makeNode(CreateStmt); @@ -130,11 +114,8 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) Relation rel; HeapTuple tuple; TupleDesc tupDesc; - Datum value[SEQ_COL_LASTCOL]; - bool null[SEQ_COL_LASTCOL]; Datum pgs_values[Natts_pg_sequence]; bool pgs_nulls[Natts_pg_sequence]; - int i; /* * If if_not_exists was given and a relation with the same name already @@ -164,57 +145,36 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) /* Check and set all option values */ init_params(pstate, seq->options, seq->for_identity, true, - &seqform, &seqdataform, + &seqform, &last_value, &reset_state, &is_called, &need_seq_rewrite, &owned_by); - /* - * Create relation (and fill value[] and null[] for the tuple) - */ - stmt->tableElts = NIL; - for (i = SEQ_COL_FIRSTCOL; i <= SEQ_COL_LASTCOL; i++) - { - ColumnDef *coldef = NULL; - - switch (i) - { - case SEQ_COL_LASTVAL: - coldef = makeColumnDef("last_value", INT8OID, -1, InvalidOid); - value[i - 1] = Int64GetDatumFast(seqdataform.last_value); - break; - case SEQ_COL_LOG: - coldef = makeColumnDef("log_cnt", INT8OID, -1, InvalidOid); - value[i - 1] = Int64GetDatum((int64) 0); - break; - case SEQ_COL_CALLED: - coldef = makeColumnDef("is_called", BOOLOID, -1, InvalidOid); - value[i - 1] = BoolGetDatum(false); - break; - } - - coldef->is_not_null = true; - null[i - 1] = false; - - stmt->tableElts = lappend(stmt->tableElts, coldef); - } - stmt->relation = seq->sequence; stmt->inhRelations = NIL; stmt->constraints = NIL; stmt->options = NIL; + stmt->accessMethod = seq->accessMethod ? pstrdup(seq->accessMethod) : NULL; stmt->oncommit = ONCOMMIT_NOOP; stmt->tablespacename = NULL; stmt->if_not_exists = seq->if_not_exists; + /* + * Initial relation has no attributes, these are added later. + */ + stmt->tableElts = NIL; + + /* + * Initial relation has no attributes, these can be added later via the + * "init" AM callback. + */ + stmt->tableElts = NIL; address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL); seqoid = address.objectId; Assert(seqoid != InvalidOid); rel = sequence_open(seqoid, AccessExclusiveLock); - tupDesc = RelationGetDescr(rel); - /* now initialize the sequence's data */ - tuple = heap_form_tuple(tupDesc, value, null); - fill_seq_with_data(rel, tuple); + /* now initialize the sequence table structure and its data */ + sequence_init(rel, last_value, is_called); /* process OWNED BY if given */ if (owned_by) @@ -263,10 +223,6 @@ ResetSequence(Oid seq_relid) { Relation seq_rel; SeqTable elm; - Form_pg_sequence_data seq; - Buffer buf; - HeapTupleData seqdatatuple; - HeapTuple tuple; HeapTuple pgstuple; Form_pg_sequence pgsform; int64 startv; @@ -277,7 +233,6 @@ ResetSequence(Oid seq_relid) * indeed a sequence. */ init_sequence(seq_relid, &elm, &seq_rel); - (void) read_seq_tuple(seq_rel, &buf, &seqdatatuple); pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(seq_relid)); if (!HeapTupleIsValid(pgstuple)) @@ -286,40 +241,8 @@ ResetSequence(Oid seq_relid) startv = pgsform->seqstart; ReleaseSysCache(pgstuple); - /* - * Copy the existing sequence tuple. - */ - tuple = heap_copytuple(&seqdatatuple); - - /* Now we're done with the old page */ - UnlockReleaseBuffer(buf); - - /* - * Modify the copied tuple to execute the restart (compare the RESTART - * action in AlterSequence) - */ - seq = (Form_pg_sequence_data) GETSTRUCT(tuple); - seq->last_value = startv; - seq->is_called = false; - seq->log_cnt = 0; - - /* - * Create a new storage file for the sequence. - */ - RelationSetNewRelfilenumber(seq_rel, seq_rel->rd_rel->relpersistence); - - /* - * Ensure sequence's relfrozenxid is at 0, since it won't contain any - * unfrozen XIDs. Same with relminmxid, since a sequence will never - * contain multixacts. - */ - Assert(seq_rel->rd_rel->relfrozenxid == InvalidTransactionId); - Assert(seq_rel->rd_rel->relminmxid == InvalidMultiXactId); - - /* - * Insert the modified tuple into the new storage file. - */ - fill_seq_with_data(seq_rel, tuple); + /* Sequence state is forcibly reset here. */ + sequence_reset(seq_rel, startv, false, true); /* Clear local cache so that we don't think we have cached numbers */ /* Note that we do not change the currval() state */ @@ -328,106 +251,6 @@ ResetSequence(Oid seq_relid) sequence_close(seq_rel, NoLock); } -/* - * Initialize a sequence's relation with the specified tuple as content - * - * This handles unlogged sequences by writing to both the main and the init - * fork as necessary. - */ -static void -fill_seq_with_data(Relation rel, HeapTuple tuple) -{ - fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM); - - if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) - { - SMgrRelation srel; - - srel = smgropen(rel->rd_locator, INVALID_PROC_NUMBER); - smgrcreate(srel, INIT_FORKNUM, false); - log_smgrcreate(&rel->rd_locator, INIT_FORKNUM); - fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM); - FlushRelationBuffers(rel); - smgrclose(srel); - } -} - -/* - * Initialize a sequence's relation fork with the specified tuple as content - */ -static void -fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum) -{ - Buffer buf; - Page page; - sequence_magic *sm; - OffsetNumber offnum; - - /* Initialize first page of relation with special magic number */ - - buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL, - EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); - Assert(BufferGetBlockNumber(buf) == 0); - - page = BufferGetPage(buf); - - PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic)); - sm = (sequence_magic *) PageGetSpecialPointer(page); - sm->magic = SEQ_MAGIC; - - /* Now insert sequence tuple */ - - /* - * Since VACUUM does not process sequences, we have to force the tuple to - * have xmin = FrozenTransactionId now. Otherwise it would become - * invisible to SELECTs after 2G transactions. It is okay to do this - * because if the current transaction aborts, no other xact will ever - * examine the sequence tuple anyway. - */ - HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); - HeapTupleHeaderSetXminFrozen(tuple->t_data); - HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); - HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); - tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; - ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); - - /* check the comment above nextval_internal()'s equivalent call. */ - if (RelationNeedsWAL(rel)) - GetTopTransactionId(); - - START_CRIT_SECTION(); - - MarkBufferDirty(buf); - - offnum = PageAddItem(page, (Item) tuple->t_data, tuple->t_len, - InvalidOffsetNumber, false, false); - if (offnum != FirstOffsetNumber) - elog(ERROR, "failed to add sequence tuple to page"); - - /* XLOG stuff */ - if (RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM) - { - xl_seq_rec xlrec; - XLogRecPtr recptr; - - XLogBeginInsert(); - XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); - - xlrec.locator = rel->rd_locator; - - XLogRegisterData(&xlrec, sizeof(xl_seq_rec)); - XLogRegisterData(tuple->t_data, tuple->t_len); - - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); - - PageSetLSN(page, recptr); - } - - END_CRIT_SECTION(); - - UnlockReleaseBuffer(buf); -} - /* * AlterSequence * @@ -439,16 +262,15 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) Oid relid; SeqTable elm; Relation seqrel; - Buffer buf; - HeapTupleData datatuple; Form_pg_sequence seqform; - Form_pg_sequence_data newdataform; bool need_seq_rewrite; List *owned_by; ObjectAddress address; Relation rel; HeapTuple seqtuple; - HeapTuple newdatatuple; + bool reset_state = false; + bool is_called; + int64 last_value; /* Open and lock sequence, and check for ownership along the way. */ relid = RangeVarGetRelidExtended(stmt->sequence, @@ -475,45 +297,21 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) seqform = (Form_pg_sequence) GETSTRUCT(seqtuple); - /* lock page buffer and read tuple into new sequence structure */ - (void) read_seq_tuple(seqrel, &buf, &datatuple); - - /* copy the existing sequence data tuple, so it can be modified locally */ - newdatatuple = heap_copytuple(&datatuple); - newdataform = (Form_pg_sequence_data) GETSTRUCT(newdatatuple); - - UnlockReleaseBuffer(buf); + /* Read sequence data */ + sequence_get_state(seqrel, &last_value, &is_called); /* Check and set new values */ init_params(pstate, stmt->options, stmt->for_identity, false, - seqform, newdataform, + seqform, &last_value, &reset_state, &is_called, &need_seq_rewrite, &owned_by); /* If needed, rewrite the sequence relation itself */ if (need_seq_rewrite) { - /* check the comment above nextval_internal()'s equivalent call. */ if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); - /* - * Create a new storage file for the sequence, making the state - * changes transactional. - */ - RelationSetNewRelfilenumber(seqrel, seqrel->rd_rel->relpersistence); - - /* - * Ensure sequence's relfrozenxid is at 0, since it won't contain any - * unfrozen XIDs. Same with relminmxid, since a sequence will never - * contain multixacts. - */ - Assert(seqrel->rd_rel->relfrozenxid == InvalidTransactionId); - Assert(seqrel->rd_rel->relminmxid == InvalidMultiXactId); - - /* - * Insert the modified tuple into the new storage file. - */ - fill_seq_with_data(seqrel, newdatatuple); + sequence_reset(seqrel, last_value, is_called, reset_state); } /* Clear local cache so that we don't think we have cached numbers */ @@ -542,8 +340,6 @@ SequenceChangePersistence(Oid relid, char newrelpersistence) { SeqTable elm; Relation seqrel; - Buffer buf; - HeapTupleData seqdatatuple; /* * ALTER SEQUENCE acquires this lock earlier. If we're processing an @@ -558,10 +354,7 @@ SequenceChangePersistence(Oid relid, char newrelpersistence) if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); - (void) read_seq_tuple(seqrel, &buf, &seqdatatuple); - RelationSetNewRelfilenumber(seqrel, newrelpersistence); - fill_seq_with_data(seqrel, &seqdatatuple); - UnlockReleaseBuffer(buf); + sequence_change_persistence(seqrel, newrelpersistence); sequence_close(seqrel, NoLock); } @@ -624,24 +417,15 @@ nextval_internal(Oid relid, bool check_permissions) { SeqTable elm; Relation seqrel; - Buffer buf; - Page page; HeapTuple pgstuple; Form_pg_sequence pgsform; - HeapTupleData seqdatatuple; - Form_pg_sequence_data seq; int64 incby, maxv, minv, cache, - log, - fetch, last; - int64 result, - next, - rescnt = 0; + int64 result; bool cycle; - bool logit = false; /* open and lock sequence */ init_sequence(relid, &elm, &seqrel); @@ -686,105 +470,9 @@ nextval_internal(Oid relid, bool check_permissions) cycle = pgsform->seqcycle; ReleaseSysCache(pgstuple); - /* lock page buffer and read tuple */ - seq = read_seq_tuple(seqrel, &buf, &seqdatatuple); - page = BufferGetPage(buf); - - last = next = result = seq->last_value; - fetch = cache; - log = seq->log_cnt; - - if (!seq->is_called) - { - rescnt++; /* return last_value if not is_called */ - fetch--; - } - - /* - * Decide whether we should emit a WAL log record. If so, force up the - * fetch count to grab SEQ_LOG_VALS more values than we actually need to - * cache. (These will then be usable without logging.) - * - * If this is the first nextval after a checkpoint, we must force a new - * WAL record to be written anyway, else replay starting from the - * checkpoint would fail to advance the sequence past the logged values. - * In this case we may as well fetch extra values. - */ - if (log < fetch || !seq->is_called) - { - /* forced log to satisfy local demand for values */ - fetch = log = fetch + SEQ_LOG_VALS; - logit = true; - } - else - { - XLogRecPtr redoptr = GetRedoRecPtr(); - - if (PageGetLSN(page) <= redoptr) - { - /* last update of seq was before checkpoint */ - fetch = log = fetch + SEQ_LOG_VALS; - logit = true; - } - } - - while (fetch) /* try to fetch cache [+ log ] numbers */ - { - /* - * Check MAXVALUE for ascending sequences and MINVALUE for descending - * sequences - */ - if (incby > 0) - { - /* ascending sequence */ - if ((maxv >= 0 && next > maxv - incby) || - (maxv < 0 && next + incby > maxv)) - { - if (rescnt > 0) - break; /* stop fetching */ - if (!cycle) - ereport(ERROR, - (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), - errmsg("nextval: reached maximum value of sequence \"%s\" (%" PRId64 ")", - RelationGetRelationName(seqrel), - maxv))); - next = minv; - } - else - next += incby; - } - else - { - /* descending sequence */ - if ((minv < 0 && next < minv - incby) || - (minv >= 0 && next + incby < minv)) - { - if (rescnt > 0) - break; /* stop fetching */ - if (!cycle) - ereport(ERROR, - (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), - errmsg("nextval: reached minimum value of sequence \"%s\" (%" PRId64 ")", - RelationGetRelationName(seqrel), - minv))); - next = maxv; - } - else - next += incby; - } - fetch--; - if (rescnt < cache) - { - log--; - rescnt++; - last = next; - if (rescnt == 1) /* if it's first result - */ - result = next; /* it's what to return */ - } - } - - log -= fetch; /* adjust for any unfetched numbers */ - Assert(log >= 0); + /* retrieve next value from the access method */ + result = sequence_nextval(seqrel, incby, maxv, minv, cache, cycle, + &last); /* save info in local cache */ elm->increment = incby; @@ -794,69 +482,6 @@ nextval_internal(Oid relid, bool check_permissions) last_used_seq = elm; - /* - * If something needs to be WAL logged, acquire an xid, so this - * transaction's commit will trigger a WAL flush and wait for syncrep. - * It's sufficient to ensure the toplevel transaction has an xid, no need - * to assign xids subxacts, that'll already trigger an appropriate wait. - * (Have to do that here, so we're outside the critical section) - */ - if (logit && RelationNeedsWAL(seqrel)) - GetTopTransactionId(); - - /* ready to change the on-disk (or really, in-buffer) tuple */ - START_CRIT_SECTION(); - - /* - * We must mark the buffer dirty before doing XLogInsert(); see notes in - * SyncOneBuffer(). However, we don't apply the desired changes just yet. - * This looks like a violation of the buffer update protocol, but it is in - * fact safe because we hold exclusive lock on the buffer. Any other - * process, including a checkpoint, that tries to examine the buffer - * contents will block until we release the lock, and then will see the - * final state that we install below. - */ - MarkBufferDirty(buf); - - /* XLOG stuff */ - if (logit && RelationNeedsWAL(seqrel)) - { - xl_seq_rec xlrec; - XLogRecPtr recptr; - - /* - * We don't log the current state of the tuple, but rather the state - * as it would appear after "log" more fetches. This lets us skip - * that many future WAL records, at the cost that we lose those - * sequence values if we crash. - */ - XLogBeginInsert(); - XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); - - /* set values that will be saved in xlog */ - seq->last_value = next; - seq->is_called = true; - seq->log_cnt = 0; - - xlrec.locator = seqrel->rd_locator; - - XLogRegisterData(&xlrec, sizeof(xl_seq_rec)); - XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len); - - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); - - PageSetLSN(page, recptr); - } - - /* Now update sequence tuple to the intended final state */ - seq->last_value = last; /* last fetched number */ - seq->is_called = true; - seq->log_cnt = log; /* how much is logged */ - - END_CRIT_SECTION(); - - UnlockReleaseBuffer(buf); - sequence_close(seqrel, NoLock); return result; @@ -946,9 +571,6 @@ do_setval(Oid relid, int64 next, bool iscalled) { SeqTable elm; Relation seqrel; - Buffer buf; - HeapTupleData seqdatatuple; - Form_pg_sequence_data seq; HeapTuple pgstuple; Form_pg_sequence pgsform; int64 maxv, @@ -982,9 +604,6 @@ do_setval(Oid relid, int64 next, bool iscalled) */ PreventCommandIfParallelMode("setval()"); - /* lock page buffer and read tuple */ - seq = read_seq_tuple(seqrel, &buf, &seqdatatuple); - if ((next < minv) || (next > maxv)) ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), @@ -1006,37 +625,8 @@ do_setval(Oid relid, int64 next, bool iscalled) if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); - /* ready to change the on-disk (or really, in-buffer) tuple */ - START_CRIT_SECTION(); - - seq->last_value = next; /* last fetched number */ - seq->is_called = iscalled; - seq->log_cnt = 0; - - MarkBufferDirty(buf); - - /* XLOG stuff */ - if (RelationNeedsWAL(seqrel)) - { - xl_seq_rec xlrec; - XLogRecPtr recptr; - Page page = BufferGetPage(buf); - - XLogBeginInsert(); - XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); - - xlrec.locator = seqrel->rd_locator; - XLogRegisterData(&xlrec, sizeof(xl_seq_rec)); - XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len); - - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); - - PageSetLSN(page, recptr); - } - - END_CRIT_SECTION(); - - UnlockReleaseBuffer(buf); + /* Call the access method callback */ + sequence_setval(seqrel, next, iscalled); sequence_close(seqrel, NoLock); } @@ -1177,76 +767,22 @@ init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel) } -/* - * Given an opened sequence relation, lock the page buffer and find the tuple - * - * *buf receives the reference to the pinned-and-ex-locked buffer - * *seqdatatuple receives the reference to the sequence tuple proper - * (this arg should point to a local variable of type HeapTupleData) - * - * Function's return value points to the data payload of the tuple - */ -static Form_pg_sequence_data -read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) -{ - Page page; - ItemId lp; - sequence_magic *sm; - Form_pg_sequence_data seq; - - *buf = ReadBuffer(rel, 0); - LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); - - page = BufferGetPage(*buf); - sm = (sequence_magic *) PageGetSpecialPointer(page); - - if (sm->magic != SEQ_MAGIC) - elog(ERROR, "bad magic number in sequence \"%s\": %08X", - RelationGetRelationName(rel), sm->magic); - - lp = PageGetItemId(page, FirstOffsetNumber); - Assert(ItemIdIsNormal(lp)); - - /* Note we currently only bother to set these two fields of *seqdatatuple */ - seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); - seqdatatuple->t_len = ItemIdGetLength(lp); - - /* - * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on - * a sequence, which would leave a non-frozen XID in the sequence tuple's - * xmax, which eventually leads to clog access failures or worse. If we - * see this has happened, clean up after it. We treat this like a hint - * bit update, ie, don't bother to WAL-log it, since we can certainly do - * this again if the update gets lost. - */ - Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); - if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) - { - HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); - seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; - seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; - MarkBufferDirtyHint(*buf, true); - } - - seq = (Form_pg_sequence_data) GETSTRUCT(seqdatatuple); - - return seq; -} - /* * init_params: process the options list of CREATE or ALTER SEQUENCE, and * store the values into appropriate fields of seqform, for changes that go - * into the pg_sequence catalog, and fields of seqdataform for changes to the - * sequence relation itself. Set *need_seq_rewrite to true if we changed any - * parameters that require rewriting the sequence's relation (interesting for - * ALTER SEQUENCE). Also set *owned_by to any OWNED BY option, or to NIL if - * there is none. + * into the pg_sequence catalog, and fields for changes to the sequence + * relation itself (is_called, last_value or any state it may hold). Set + * *need_seq_rewrite to true if we changed any parameters that require + * rewriting the sequence's relation (interesting for ALTER SEQUENCE). Also + * set *owned_by to any OWNED BY option, or to NIL if there is none. Set + * *reset_state if the internal state of the sequence needs to change on a + * follow-up nextval(). * * If isInit is true, fill any unspecified options with default values; * otherwise, do not change existing options that aren't explicitly overridden. * * Note: we force a sequence rewrite whenever we change parameters that affect - * generation of future sequence values, even if the seqdataform per se is not + * generation of future sequence values, even if the metadata per se is not * changed. This allows ALTER SEQUENCE to behave transactionally. Currently, * the only option that doesn't cause that is OWNED BY. It's *necessary* for * ALTER SEQUENCE OWNED BY to not rewrite the sequence, because that would @@ -1257,7 +793,9 @@ static void init_params(ParseState *pstate, List *options, bool for_identity, bool isInit, Form_pg_sequence seqform, - Form_pg_sequence_data seqdataform, + int64 *last_value, + bool *reset_state, + bool *is_called, bool *need_seq_rewrite, List **owned_by) { @@ -1363,11 +901,11 @@ init_params(ParseState *pstate, List *options, bool for_identity, } /* - * We must reset log_cnt when isInit or when changing any parameters that - * would affect future nextval allocations. + * We must reset the state when isInit or when changing any parameters + * that would affect future nextval allocations. */ if (isInit) - seqdataform->log_cnt = 0; + *reset_state = true; /* AS type */ if (as_type != NULL) @@ -1416,7 +954,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("INCREMENT must not be zero"))); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit) { @@ -1428,7 +966,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, { seqform->seqcycle = boolVal(is_cycled->arg); Assert(BoolIsValid(seqform->seqcycle)); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit) { @@ -1439,7 +977,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, if (max_value != NULL && max_value->arg) { seqform->seqmax = defGetInt64(max_value); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit || max_value != NULL || reset_max_value) { @@ -1455,7 +993,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, } else seqform->seqmax = -1; /* descending seq */ - seqdataform->log_cnt = 0; + *reset_state = true; } /* Validate maximum value. No need to check INT8 as seqmax is an int64 */ @@ -1471,7 +1009,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, if (min_value != NULL && min_value->arg) { seqform->seqmin = defGetInt64(min_value); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit || min_value != NULL || reset_min_value) { @@ -1487,7 +1025,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, } else seqform->seqmin = 1; /* ascending seq */ - seqdataform->log_cnt = 0; + *reset_state = true; } /* Validate minimum value. No need to check INT8 as seqmin is an int64 */ @@ -1538,30 +1076,30 @@ init_params(ParseState *pstate, List *options, bool for_identity, if (restart_value != NULL) { if (restart_value->arg != NULL) - seqdataform->last_value = defGetInt64(restart_value); + *last_value = defGetInt64(restart_value); else - seqdataform->last_value = seqform->seqstart; - seqdataform->is_called = false; - seqdataform->log_cnt = 0; + *last_value = seqform->seqstart; + *is_called = false; + *reset_state = true; } else if (isInit) { - seqdataform->last_value = seqform->seqstart; - seqdataform->is_called = false; + *last_value = seqform->seqstart; + *is_called = false; } /* crosscheck RESTART (or current value, if changing MIN/MAX) */ - if (seqdataform->last_value < seqform->seqmin) + if (*last_value < seqform->seqmin) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("RESTART value (%" PRId64 ") cannot be less than MINVALUE (%" PRId64 ")", - seqdataform->last_value, + *last_value, seqform->seqmin))); - if (seqdataform->last_value > seqform->seqmax) + if (*last_value > seqform->seqmax) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("RESTART value (%" PRId64 ") cannot be greater than MAXVALUE (%" PRId64 ")", - seqdataform->last_value, + *last_value, seqform->seqmax))); /* CACHE */ @@ -1573,7 +1111,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("CACHE (%" PRId64 ") must be greater than zero", seqform->seqcache))); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit) { @@ -1813,16 +1351,13 @@ pg_get_sequence_data(PG_FUNCTION_ARGS) !RELATION_IS_OTHER_TEMP(seqrel) && (RelationIsPermanent(seqrel) || !RecoveryInProgress())) { - Buffer buf; - HeapTupleData seqtuple; - Form_pg_sequence_data seq; - - seq = read_seq_tuple(seqrel, &buf, &seqtuple); + bool is_called; + int64 last_value; - values[0] = Int64GetDatum(seq->last_value); - values[1] = BoolGetDatum(seq->is_called); + sequence_get_state(seqrel, &last_value, &is_called); - UnlockReleaseBuffer(buf); + values[0] = Int64GetDatum(last_value); + values[1] = BoolGetDatum(is_called); } else memset(isnull, true, sizeof(isnull)); @@ -1866,17 +1401,9 @@ pg_sequence_last_value(PG_FUNCTION_ARGS) !RELATION_IS_OTHER_TEMP(seqrel) && (RelationIsPermanent(seqrel) || !RecoveryInProgress())) { - Buffer buf; - HeapTupleData seqtuple; - Form_pg_sequence_data seq; - - seq = read_seq_tuple(seqrel, &buf, &seqtuple); - - is_called = seq->is_called; - result = seq->last_value; - - UnlockReleaseBuffer(buf); + sequence_get_state(seqrel, &result, &is_called); } + sequence_close(seqrel, NoLock); if (is_called) @@ -1885,57 +1412,6 @@ pg_sequence_last_value(PG_FUNCTION_ARGS) PG_RETURN_NULL(); } - -void -seq_redo(XLogReaderState *record) -{ - XLogRecPtr lsn = record->EndRecPtr; - uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; - Buffer buffer; - Page page; - Page localpage; - char *item; - Size itemsz; - xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record); - sequence_magic *sm; - - if (info != XLOG_SEQ_LOG) - elog(PANIC, "seq_redo: unknown op code %u", info); - - buffer = XLogInitBufferForRedo(record, 0); - page = (Page) BufferGetPage(buffer); - - /* - * We always reinit the page. However, since this WAL record type is also - * used for updating sequences, it's possible that a hot-standby backend - * is examining the page concurrently; so we mustn't transiently trash the - * buffer. The solution is to build the correct new page contents in - * local workspace and then memcpy into the buffer. Then only bytes that - * are supposed to change will change, even transiently. We must palloc - * the local page for alignment reasons. - */ - localpage = (Page) palloc(BufferGetPageSize(buffer)); - - PageInit(localpage, BufferGetPageSize(buffer), sizeof(sequence_magic)); - sm = (sequence_magic *) PageGetSpecialPointer(localpage); - sm->magic = SEQ_MAGIC; - - item = (char *) xlrec + sizeof(xl_seq_rec); - itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_rec); - - if (PageAddItem(localpage, (Item) item, itemsz, - FirstOffsetNumber, false, false) == InvalidOffsetNumber) - elog(PANIC, "seq_redo: failed to add item to page"); - - PageSetLSN(localpage, lsn); - - memcpy(page, localpage, BufferGetPageSize(buffer)); - MarkBufferDirty(buffer); - UnlockReleaseBuffer(buffer); - - pfree(localpage); -} - /* * Flush cached sequence information. */ @@ -1950,14 +1426,3 @@ ResetSequenceCaches(void) last_used_seq = NULL; } - -/* - * Mask a Sequence page before performing consistency checks on it. - */ -void -seq_mask(char *page, BlockNumber blkno) -{ - mask_page_lsn_and_checksum(page); - - mask_unused_space(page); -} diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 2705cf11330d..5fa8928119f9 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -23,6 +23,7 @@ #include "access/reloptions.h" #include "access/relscan.h" #include "access/sysattr.h" +#include "access/sequenceam.h" #include "access/tableam.h" #include "access/toast_compression.h" #include "access/xact.h" @@ -1025,14 +1026,18 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, } /* - * For relations with table AM and partitioned tables, select access - * method to use: an explicitly indicated one, or (in the case of a + * For relations with table AM, partitioned tables or sequences, select + * access method to use: an explicitly indicated one, or (in the case of a * partitioned table) the parent's, if it has one. */ if (stmt->accessMethod != NULL) { - Assert(RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_PARTITIONED_TABLE); - accessMethodId = get_table_am_oid(stmt->accessMethod, false); + Assert(RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_PARTITIONED_TABLE || + RELKIND_HAS_SEQUENCE_AM(relkind)); + if (RELKIND_HAS_SEQUENCE_AM(relkind)) + accessMethodId = get_sequence_am_oid(stmt->accessMethod, false); + else + accessMethodId = get_table_am_oid(stmt->accessMethod, false); } else if (RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_PARTITIONED_TABLE) { @@ -1045,6 +1050,10 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, if (RELKIND_HAS_TABLE_AM(relkind) && !OidIsValid(accessMethodId)) accessMethodId = get_table_am_oid(default_table_access_method, false); } + else if (RELKIND_HAS_SEQUENCE_AM(relkind)) + { + accessMethodId = get_sequence_am_oid(default_sequence_access_method, false); + } /* * Create the relation. Inherited defaults and CHECK constraints are @@ -4646,6 +4655,7 @@ AlterTableGetLockLevel(List *cmds) * Subcommands that may be visible to concurrent SELECTs */ case AT_DropColumn: /* change visible to SELECT */ + case AT_AddColumnToSequence: /* CREATE SEQUENCE */ case AT_AddColumnToView: /* CREATE VIEW */ case AT_DropOids: /* used to equiv to DropColumn */ case AT_EnableAlwaysRule: /* may change SELECT rules */ @@ -4941,6 +4951,13 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd, /* Recursion occurs during execution phase */ pass = AT_PASS_ADD_COL; break; + case AT_AddColumnToSequence: /* add column via CREATE SEQUENCE */ + ATSimplePermissions(cmd->subtype, rel, ATT_SEQUENCE); + ATPrepAddColumn(wqueue, rel, recurse, recursing, false, cmd, + lockmode, context); + /* Recursion occurs during execution phase */ + pass = AT_PASS_ADD_COL; + break; case AT_AddColumnToView: /* add column via CREATE OR REPLACE VIEW */ ATSimplePermissions(cmd->subtype, rel, ATT_VIEW); ATPrepAddColumn(wqueue, rel, recurse, recursing, true, cmd, @@ -5373,6 +5390,7 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, switch (cmd->subtype) { case AT_AddColumn: /* ADD COLUMN */ + case AT_AddColumnToSequence: /* add column via CREATE SEQUENCE */ case AT_AddColumnToView: /* add column via CREATE OR REPLACE VIEW */ address = ATExecAddColumn(wqueue, tab, rel, &cmd, cmd->recurse, false, @@ -6588,6 +6606,7 @@ alter_table_type_to_string(AlterTableType cmdtype) switch (cmdtype) { case AT_AddColumn: + case AT_AddColumnToSequence: case AT_AddColumnToView: return "ADD COLUMN"; case AT_ColumnDefault: diff --git a/src/backend/nodes/Makefile b/src/backend/nodes/Makefile index 77ddb9ca53f1..64d4dccc936f 100644 --- a/src/backend/nodes/Makefile +++ b/src/backend/nodes/Makefile @@ -48,6 +48,7 @@ node_headers = \ access/amapi.h \ access/cmptype.h \ access/sdir.h \ + access/sequenceam.h \ access/tableam.h \ access/tsmapi.h \ commands/event_trigger.h \ diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index 77659b0f7602..5e4ff23cf1e4 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -60,6 +60,7 @@ sub elem access/amapi.h access/cmptype.h access/sdir.h + access/sequenceam.h access/tableam.h access/tsmapi.h commands/event_trigger.h @@ -84,6 +85,7 @@ sub elem nodes/execnodes.h access/amapi.h access/sdir.h + access/sequenceam.h access/tableam.h access/tsmapi.h commands/event_trigger.h diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 3c4268b271a4..3a287c7f87a9 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -381,6 +381,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type copy_file_name access_method_clause attr_name table_access_method_clause name cursor_name file_name + sequence_access_method_clause cluster_index_specification %type func_name handler_name qual_Op qual_all_Op subquery_Op @@ -4941,23 +4942,26 @@ RefreshMatViewStmt: CreateSeqStmt: CREATE OptTemp SEQUENCE qualified_name OptSeqOptList + sequence_access_method_clause { CreateSeqStmt *n = makeNode(CreateSeqStmt); - $4->relpersistence = $2; n->sequence = $4; n->options = $5; + n->accessMethod = $6; n->ownerId = InvalidOid; n->if_not_exists = false; $$ = (Node *) n; } | CREATE OptTemp SEQUENCE IF_P NOT EXISTS qualified_name OptSeqOptList + sequence_access_method_clause { CreateSeqStmt *n = makeNode(CreateSeqStmt); $7->relpersistence = $2; n->sequence = $7; n->options = $8; + n->accessMethod = $9; n->ownerId = InvalidOid; n->if_not_exists = true; $$ = (Node *) n; @@ -4994,6 +4998,11 @@ OptParenthesizedSeqOptList: '(' SeqOptList ')' { $$ = $2; } | /*EMPTY*/ { $$ = NIL; } ; +sequence_access_method_clause: + USING name { $$ = $2; } + | /*EMPTY*/ { $$ = NULL; } + ; + SeqOptList: SeqOptElem { $$ = list_make1($1); } | SeqOptList SeqOptElem { $$ = lappend($1, $2); } ; @@ -5997,6 +6006,7 @@ CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type HANDLER handler_name am_type: INDEX { $$ = AMTYPE_INDEX; } + | SEQUENCE { $$ = AMTYPE_SEQUENCE; } | TABLE { $$ = AMTYPE_TABLE; } ; diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 62015431fdf1..9ba0febe63c4 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -26,6 +26,7 @@ #include "access/htup_details.h" #include "access/relation.h" #include "access/reloptions.h" +#include "access/sequenceam.h" #include "access/table.h" #include "access/toast_compression.h" #include "catalog/dependency.h" @@ -518,6 +519,7 @@ generateSerialExtraStmts(CreateStmtContext *cxt, ColumnDef *column, seqstmt->sequence = makeRangeVar(snamespace, sname, -1); seqstmt->sequence->relpersistence = seqpersistence; seqstmt->options = seqoptions; + seqstmt->accessMethod = NULL; /* * If a sequence data type was specified, add it to the options. Prepend diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 25fe3d580166..0629dc5387c0 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1667,7 +1667,11 @@ ProcessUtilitySlow(ParseState *pstate, break; case T_CreateSeqStmt: + EventTriggerAlterTableStart(parsetree); address = DefineSequence(pstate, (CreateSeqStmt *) parsetree); + /* stashed internally */ + commandCollected = true; + EventTriggerAlterTableEnd(); break; case T_AlterSeqStmt: diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c index 317a1f2b282f..68f160dda7d3 100644 --- a/src/backend/utils/adt/pseudotypes.c +++ b/src/backend/utils/adt/pseudotypes.c @@ -369,6 +369,7 @@ PSEUDOTYPE_DUMMY_IO_FUNCS(language_handler); PSEUDOTYPE_DUMMY_IO_FUNCS(fdw_handler); PSEUDOTYPE_DUMMY_IO_FUNCS(table_am_handler); PSEUDOTYPE_DUMMY_IO_FUNCS(index_am_handler); +PSEUDOTYPE_DUMMY_IO_FUNCS(sequence_am_handler); PSEUDOTYPE_DUMMY_IO_FUNCS(tsm_handler); PSEUDOTYPE_DUMMY_IO_FUNCS(internal); PSEUDOTYPE_DUMMY_IO_FUNCS(anyelement); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 68ff67de549a..2f6c0a12f3e3 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -34,6 +34,7 @@ #include "access/multixact.h" #include "access/parallel.h" #include "access/reloptions.h" +#include "access/sequenceam.h" #include "access/sysattr.h" #include "access/table.h" #include "access/tableam.h" @@ -64,6 +65,7 @@ #include "catalog/pg_type.h" #include "catalog/schemapg.h" #include "catalog/storage.h" +#include "commands/defrem.h" #include "commands/policy.h" #include "commands/publicationcmds.h" #include "commands/trigger.h" @@ -302,6 +304,7 @@ static void RelationParseRelOptions(Relation relation, HeapTuple tuple); static void RelationBuildTupleDesc(Relation relation); static Relation RelationBuildDesc(Oid targetRelId, bool insertIt); static void RelationInitPhysicalAddr(Relation relation); +static void RelationInitSequenceAccessMethod(Relation relation); static void load_critical_index(Oid indexoid, Oid heapoid); static TupleDesc GetPgClassDescriptor(void); static TupleDesc GetPgIndexDescriptor(void); @@ -1225,8 +1228,7 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) if (relation->rd_rel->relkind == RELKIND_INDEX || relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) RelationInitIndexAccessInfo(relation); - else if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind) || - relation->rd_rel->relkind == RELKIND_SEQUENCE) + else if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind)) RelationInitTableAccessMethod(relation); else if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { @@ -1235,6 +1237,8 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) * inherit. */ } + else if (RELKIND_HAS_SEQUENCE_AM(relation->rd_rel->relkind)) + RelationInitSequenceAccessMethod(relation); else Assert(relation->rd_rel->relam == InvalidOid); @@ -1831,17 +1835,9 @@ RelationInitTableAccessMethod(Relation relation) HeapTuple tuple; Form_pg_am aform; - if (relation->rd_rel->relkind == RELKIND_SEQUENCE) - { - /* - * Sequences are currently accessed like heap tables, but it doesn't - * seem prudent to show that in the catalog. So just overwrite it - * here. - */ - Assert(relation->rd_rel->relam == InvalidOid); - relation->rd_amhandler = F_HEAP_TABLEAM_HANDLER; - } - else if (IsCatalogRelation(relation)) + Assert(RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind)); + + if (IsCatalogRelation(relation)) { /* * Avoid doing a syscache lookup for catalog tables. @@ -1872,6 +1868,49 @@ RelationInitTableAccessMethod(Relation relation) InitTableAmRoutine(relation); } +/* + * Initialize sequence-access-method support data for a sequence relation + */ +static void +RelationInitSequenceAccessMethod(Relation relation) +{ + HeapTuple tuple; + Form_pg_am aform; + const char *tableam_name; + Oid tableam_oid; + Oid tableam_handler; + + Assert(RELKIND_HAS_SEQUENCE_AM(relation->rd_rel->relkind)); + + /* + * Look up the sequence access method, save the OID of its handler + * function. + */ + Assert(relation->rd_rel->relam != InvalidOid); + relation->rd_amhandler = GetSequenceAmRoutineId(relation->rd_rel->relam); + + /* + * Now we can fetch the sequence AM's API struct. + */ + relation->rd_sequenceam = GetSequenceAmRoutine(relation->rd_amhandler); + + /* + * From the sequence AM, set its expected table access method. + */ + tableam_name = sequence_get_table_am(relation); + tableam_oid = get_table_am_oid(tableam_name, false); + + tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(tableam_oid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for access method %u", + tableam_oid); + aform = (Form_pg_am) GETSTRUCT(tuple); + tableam_handler = aform->amhandler; + ReleaseSysCache(tuple); + + relation->rd_tableam = GetTableAmRoutine(tableam_handler); +} + /* * formrdesc * @@ -3711,14 +3750,17 @@ RelationBuildLocalRelation(const char *relname, rel->rd_rel->relam = accessmtd; /* - * RelationInitTableAccessMethod will do syscache lookups, so we mustn't - * run it in CacheMemoryContext. Fortunately, the remaining steps don't - * require a long-lived current context. + * RelationInitTableAccessMethod() and RelationInitSequenceAccessMethod() + * will do syscache lookups, so we mustn't run them in CacheMemoryContext. + * Fortunately, the remaining steps don't require a long-lived current + * context. */ MemoryContextSwitchTo(oldcxt); - if (RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_SEQUENCE) + if (RELKIND_HAS_TABLE_AM(relkind)) RelationInitTableAccessMethod(rel); + else if (relkind == RELKIND_SEQUENCE) + RelationInitSequenceAccessMethod(rel); /* * Leave index access method uninitialized, because the pg_index row has @@ -4343,13 +4385,21 @@ RelationCacheInitializePhase3(void) /* Reload tableam data if needed */ if (relation->rd_tableam == NULL && - (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind) || relation->rd_rel->relkind == RELKIND_SEQUENCE)) + (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))) { RelationInitTableAccessMethod(relation); Assert(relation->rd_tableam != NULL); restart = true; } + else if (relation->rd_sequenceam == NULL && + relation->rd_rel->relkind == RELKIND_SEQUENCE) + { + RelationInitSequenceAccessMethod(relation); + Assert(relation->rd_sequenceam != NULL); + + restart = true; + } /* Release hold on the relation */ RelationDecrementReferenceCount(relation); @@ -6422,8 +6472,10 @@ load_relcache_init_file(bool shared) nailed_rels++; /* Load table AM data */ - if (RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind) || rel->rd_rel->relkind == RELKIND_SEQUENCE) + if (RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind)) RelationInitTableAccessMethod(rel); + else if (rel->rd_rel->relkind == RELKIND_SEQUENCE) + RelationInitSequenceAccessMethod(rel); Assert(rel->rd_index == NULL); Assert(rel->rd_indextuple == NULL); @@ -6435,6 +6487,7 @@ load_relcache_init_file(bool shared) Assert(rel->rd_supportinfo == NULL); Assert(rel->rd_indoption == NULL); Assert(rel->rd_indcollation == NULL); + Assert(rel->rd_sequenceam == NULL); Assert(rel->rd_opcoptions == NULL); } diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 2f8cbd867599..daa550113a1f 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -33,6 +33,7 @@ #include "access/gin.h" #include "access/slru.h" #include "access/toast_compression.h" +#include "access/sequenceam.h" #include "access/twophase.h" #include "access/xlog_internal.h" #include "access/xlogprefetcher.h" @@ -4341,6 +4342,17 @@ struct config_string ConfigureNamesString[] = check_default_table_access_method, NULL, NULL }, + { + {"default_sequence_access_method", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Sets the default sequence access method for new sequences."), + NULL, + GUC_IS_NAME + }, + &default_sequence_access_method, + DEFAULT_SEQUENCE_ACCESS_METHOD, + check_default_sequence_access_method, NULL, NULL + }, + { {"default_tablespace", PGC_USERSET, CLIENT_CONN_STATEMENT, gettext_noop("Sets the default tablespace to create tables and indexes in."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 34826d01380b..911528bd0da6 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -754,6 +754,7 @@ autovacuum_worker_slots = 16 # autovacuum worker slots to allocate # error #search_path = '"$user", public' # schema names #row_security = on +#default_sequence_access_method = 'seqlocal' #default_table_access_method = 'heap' #default_tablespace = '' # a tablespace name, '' uses the default #default_toast_compression = 'pglz' # 'pglz' or 'lz4' diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index af0007fb6d2f..62d86d69db45 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -97,6 +97,7 @@ typedef struct _restoreOptions { int createDB; /* Issue commands to create the database */ int noOwner; /* Don't try to match original object owner */ + int noSequenceAm; /* Don't issue sequence-AM-related commands */ int noTableAm; /* Don't issue table-AM-related commands */ int noTablespace; /* Don't issue tablespace-related commands */ int disable_triggers; /* disable triggers during data-only @@ -190,6 +191,7 @@ typedef struct _dumpOptions int no_unlogged_table_data; int serializable_deferrable; int disable_triggers; + int outputNoSequenceAm; int outputNoTableAm; int outputNoTablespaces; int use_setsessauth; diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index afa42337b110..3d63d8f6f6b5 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -181,6 +181,7 @@ dumpOptionsFromRestoreOptions(RestoreOptions *ropt) dopt->outputSuperuser = ropt->superuser; dopt->outputCreateDB = ropt->createDB; dopt->outputNoOwner = ropt->noOwner; + dopt->outputNoSequenceAm = ropt->noSequenceAm; dopt->outputNoTableAm = ropt->noTableAm; dopt->outputNoTablespaces = ropt->noTablespace; dopt->disable_triggers = ropt->disable_triggers; @@ -1246,6 +1247,7 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId, newToc->tag = pg_strdup(opts->tag); newToc->namespace = opts->namespace ? pg_strdup(opts->namespace) : NULL; newToc->tablespace = opts->tablespace ? pg_strdup(opts->tablespace) : NULL; + newToc->sequenceam = opts->sequenceam ? pg_strdup(opts->sequenceam) : NULL; newToc->tableam = opts->tableam ? pg_strdup(opts->tableam) : NULL; newToc->relkind = opts->relkind; newToc->owner = opts->owner ? pg_strdup(opts->owner) : NULL; @@ -2405,6 +2407,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt, AH->currUser = NULL; /* unknown */ AH->currSchema = NULL; /* ditto */ + AH->currSequenceAm = NULL; /* ditto */ AH->currTablespace = NULL; /* ditto */ AH->currTableAm = NULL; /* ditto */ @@ -2672,6 +2675,7 @@ WriteToc(ArchiveHandle *AH) WriteStr(AH, te->copyStmt); WriteStr(AH, te->namespace); WriteStr(AH, te->tablespace); + WriteStr(AH, te->sequenceam); WriteStr(AH, te->tableam); WriteInt(AH, te->relkind); WriteStr(AH, te->owner); @@ -2776,6 +2780,9 @@ ReadToc(ArchiveHandle *AH) if (AH->version >= K_VERS_1_10) te->tablespace = ReadStr(AH); + if (AH->version >= K_VERS_1_17) + te->sequenceam = ReadStr(AH); + if (AH->version >= K_VERS_1_14) te->tableam = ReadStr(AH); @@ -3462,6 +3469,9 @@ _reconnectToDB(ArchiveHandle *AH, const char *dbname) free(AH->currSchema); AH->currSchema = NULL; + free(AH->currSequenceAm); + AH->currSequenceAm = NULL; + free(AH->currTableAm); AH->currTableAm = NULL; @@ -3624,6 +3634,57 @@ _selectTablespace(ArchiveHandle *AH, const char *tablespace) destroyPQExpBuffer(qry); } +/* + * Set the proper default_sequence_access_method value for the sequence. + */ +static void +_selectSequenceAccessMethod(ArchiveHandle *AH, const char *sequenceam) +{ + RestoreOptions *ropt = AH->public.ropt; + PQExpBuffer cmd; + const char *want, + *have; + + /* do nothing in --no-sequence-access-method mode */ + if (ropt->noSequenceAm) + return; + + have = AH->currSequenceAm; + want = sequenceam; + + if (!want) + return; + + if (have && strcmp(want, have) == 0) + return; + + cmd = createPQExpBuffer(); + appendPQExpBuffer(cmd, + "SET default_sequence_access_method = %s;", + fmtId(want)); + + if (RestoringToDB(AH)) + { + PGresult *res; + + res = PQexec(AH->connection, cmd->data); + + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + warn_or_exit_horribly(AH, + "could not set default_sequence_access_method: %s", + PQerrorMessage(AH->connection)); + + PQclear(res); + } + else + ahprintf(AH, "%s\n\n", cmd->data); + + destroyPQExpBuffer(cmd); + + free(AH->currSequenceAm); + AH->currSequenceAm = pg_strdup(want); +} + /* * Set the proper default_table_access_method value for the table. */ @@ -3833,6 +3894,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) _becomeOwner(AH, te); _selectOutputSchema(AH, te->namespace); _selectTablespace(AH, te->tablespace); + _selectSequenceAccessMethod(AH, te->sequenceam); if (te->relkind != RELKIND_PARTITIONED_TABLE) _selectTableAccessMethod(AH, te->tableam); @@ -4389,6 +4451,8 @@ restore_toc_entries_prefork(ArchiveHandle *AH, TocEntry *pending_list) AH->currUser = NULL; free(AH->currSchema); AH->currSchema = NULL; + free(AH->currSequenceAm); + AH->currSequenceAm = NULL; free(AH->currTablespace); AH->currTablespace = NULL; free(AH->currTableAm); @@ -5128,6 +5192,7 @@ CloneArchive(ArchiveHandle *AH) clone->connCancel = NULL; clone->currUser = NULL; clone->currSchema = NULL; + clone->currSequenceAm = NULL; clone->currTableAm = NULL; clone->currTablespace = NULL; @@ -5187,6 +5252,7 @@ DeCloneArchive(ArchiveHandle *AH) /* Clear any connection-local state */ free(AH->currUser); free(AH->currSchema); + free(AH->currSequenceAm); free(AH->currTablespace); free(AH->currTableAm); free(AH->savedPassword); diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index 365073b3eae4..89b5cec22ac1 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -71,10 +71,11 @@ #define K_VERS_1_16 MAKE_ARCHIVE_VERSION(1, 16, 0) /* BLOB METADATA entries * and multiple BLOBS, * relkind */ +#define K_VERS_1_17 MAKE_ARCHIVE_VERSION(1, 17, 0) /* add sequenceam */ /* Current archive version number (the format we can output) */ #define K_VERS_MAJOR 1 -#define K_VERS_MINOR 16 +#define K_VERS_MINOR 17 #define K_VERS_REV 0 #define K_VERS_SELF MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, K_VERS_REV) @@ -323,6 +324,7 @@ struct _archiveHandle /* these vars track state to avoid sending redundant SET commands */ char *currUser; /* current username, or NULL if unknown */ char *currSchema; /* current schema, or NULL */ + char *currSequenceAm; /* current sequence access method, or NULL */ char *currTablespace; /* current tablespace, or NULL */ char *currTableAm; /* current table access method, or NULL */ @@ -358,6 +360,7 @@ struct _tocEntry char *namespace; /* null or empty string if not in a schema */ char *tablespace; /* null if not in a tablespace; empty string * means use database default */ + char *sequenceam; /* table access method, only for SEQUENCE tags */ char *tableam; /* table access method, only for TABLE tags */ char relkind; /* relation kind, only for TABLE tags */ char *owner; @@ -404,6 +407,7 @@ typedef struct _archiveOpts const char *tag; const char *namespace; const char *tablespace; + const char *sequenceam; const char *tableam; char relkind; const char *owner; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index e2e7975b34e0..543a1d958436 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -134,6 +134,7 @@ typedef struct int64 cache; /* cache size */ int64 last_value; /* last value of sequence */ bool is_called; /* whether nextval advances before returning */ + char *seqam; /* access method of sequence */ } SequenceItem; typedef enum OidOptions @@ -495,6 +496,7 @@ main(int argc, char **argv) {"if-exists", no_argument, &dopt.if_exists, 1}, {"inserts", no_argument, NULL, 9}, {"lock-wait-timeout", required_argument, NULL, 2}, + {"no-sequence-access-method", no_argument, &dopt.outputNoSequenceAm, 1}, {"no-table-access-method", no_argument, &dopt.outputNoTableAm, 1}, {"no-tablespaces", no_argument, &dopt.outputNoTablespaces, 1}, {"quote-all-identifiers", no_argument, "e_all_identifiers, 1}, @@ -1182,6 +1184,7 @@ main(int argc, char **argv) ropt->superuser = dopt.outputSuperuser; ropt->createDB = dopt.outputCreateDB; ropt->noOwner = dopt.outputNoOwner; + ropt->noSequenceAm = dopt.outputNoSequenceAm; ropt->noTableAm = dopt.outputNoTableAm; ropt->noTablespace = dopt.outputNoTablespaces; ropt->disable_triggers = dopt.disable_triggers; @@ -1303,6 +1306,7 @@ help(const char *progname) printf(_(" --no-security-labels do not dump security label assignments\n")); printf(_(" --no-statistics do not dump statistics\n")); printf(_(" --no-subscriptions do not dump subscriptions\n")); + printf(_(" --no-sequence-access-method do not sequence table access methods\n")); printf(_(" --no-table-access-method do not dump table access methods\n")); printf(_(" --no-tablespaces do not dump tablespace assignments\n")); printf(_(" --no-toast-compression do not dump TOAST compression methods\n")); @@ -14084,6 +14088,9 @@ dumpAccessMethod(Archive *fout, const AccessMethodInfo *aminfo) case AMTYPE_INDEX: appendPQExpBufferStr(q, "TYPE INDEX "); break; + case AMTYPE_SEQUENCE: + appendPQExpBufferStr(q, "TYPE SEQUENCE "); + break; case AMTYPE_TABLE: appendPQExpBufferStr(q, "TYPE TABLE "); break; @@ -18492,26 +18499,40 @@ collectSequences(Archive *fout) * * Since version 18, we can gather the sequence data in this query with * pg_get_sequence_data(), but we only do so for non-schema-only dumps. + * + * Access methods for sequences are supported since version 18. */ if (fout->remoteVersion < 100000) return; - else if (fout->remoteVersion < 180000 || - (!fout->dopt->dumpData && !fout->dopt->sequence_data)) + else if (fout->remoteVersion < 180000) query = "SELECT seqrelid, format_type(seqtypid, NULL), " "seqstart, seqincrement, " "seqmax, seqmin, " "seqcache, seqcycle, " - "NULL, 'f' " + "NULL, 'f', NULL " "FROM pg_catalog.pg_sequence " "ORDER BY seqrelid"; + else if (!fout->dopt->dumpData && !fout->dopt->sequence_data) + query = "SELECT s.seqrelid, format_type(s.seqtypid, NULL), " + "s.seqstart, s.seqincrement, " + "s.seqmax, s.seqmin, " + "s.seqcache, s.seqcycle, " + "NULL, 'f', a.amname AS seqam " + "FROM pg_catalog.pg_sequence s " + "JOIN pg_class c ON (c.oid = s.seqrelid) " + "JOIN pg_am a ON (a.oid = c.relam) " + "ORDER BY seqrelid"; else - query = "SELECT seqrelid, format_type(seqtypid, NULL), " - "seqstart, seqincrement, " - "seqmax, seqmin, " - "seqcache, seqcycle, " - "last_value, is_called " - "FROM pg_catalog.pg_sequence, " - "pg_get_sequence_data(seqrelid) " + query = "SELECT s.seqrelid, format_type(s.seqtypid, NULL), " + "s.seqstart, s.seqincrement, " + "s.seqmax, s.seqmin, " + "s.seqcache, s.seqcycle, " + "r.last_value, r.is_called, " + "a.amname AS seqam " + "FROM pg_catalog.pg_sequence s " + "JOIN pg_class c ON (c.oid = s.seqrelid) " + "JOIN pg_am a ON (a.oid = c.relam), " + "pg_get_sequence_data(s.seqrelid) r " "ORDER BY seqrelid;"; res = ExecuteSqlQuery(fout, query, PGRES_TUPLES_OK); @@ -18531,6 +18552,10 @@ collectSequences(Archive *fout) sequences[i].cycled = (strcmp(PQgetvalue(res, i, 7), "t") == 0); sequences[i].last_value = strtoi64(PQgetvalue(res, i, 8), NULL, 10); sequences[i].is_called = (strcmp(PQgetvalue(res, i, 9), "t") == 0); + if (!PQgetisnull(res, i, 10)) + sequences[i].seqam = pg_strdup(PQgetvalue(res, i, 10)); + else + sequences[i].seqam = NULL; } PQclear(res); @@ -18602,6 +18627,7 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) seq->minv = strtoi64(PQgetvalue(res, 0, 4), NULL, 10); seq->cache = strtoi64(PQgetvalue(res, 0, 5), NULL, 10); seq->cycled = (strcmp(PQgetvalue(res, 0, 6), "t") == 0); + seq->seqam = NULL; PQclear(res); } @@ -18724,6 +18750,7 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) ARCHIVE_OPTS(.tag = tbinfo->dobj.name, .namespace = tbinfo->dobj.namespace->dobj.name, .owner = tbinfo->rolname, + .sequenceam = seq->seqam, .description = "SEQUENCE", .section = SECTION_PRE_DATA, .createStmt = query->data, diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 946a6d0fafc6..f8c6a74320ad 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -93,6 +93,7 @@ static int disable_dollar_quoting = 0; static int disable_triggers = 0; static int if_exists = 0; static int inserts = 0; +static int no_sequence_access_method = 0; static int no_table_access_method = 0; static int no_tablespaces = 0; static int use_setsessauth = 0; @@ -164,6 +165,7 @@ main(int argc, char *argv[]) {"if-exists", no_argument, &if_exists, 1}, {"inserts", no_argument, &inserts, 1}, {"lock-wait-timeout", required_argument, NULL, 2}, + {"no-sequence-access-method", no_argument, &no_sequence_access_method, 1}, {"no-table-access-method", no_argument, &no_table_access_method, 1}, {"no-tablespaces", no_argument, &no_tablespaces, 1}, {"quote-all-identifiers", no_argument, "e_all_identifiers, 1}, @@ -465,6 +467,8 @@ main(int argc, char *argv[]) appendPQExpBufferStr(pgdumpopts, " --disable-triggers"); if (inserts) appendPQExpBufferStr(pgdumpopts, " --inserts"); + if (no_sequence_access_method) + appendPQExpBufferStr(pgdumpopts, " --no-sequence-access-method"); if (no_table_access_method) appendPQExpBufferStr(pgdumpopts, " --no-table-access-method"); if (no_tablespaces) @@ -738,6 +742,7 @@ help(void) printf(_(" --no-statistics do not dump statistics\n")); printf(_(" --no-subscriptions do not dump subscriptions\n")); printf(_(" --no-sync do not wait for changes to be written safely to disk\n")); + printf(_(" --no-sequence-access-method do not dump sequence access methods\n")); printf(_(" --no-table-access-method do not dump table access methods\n")); printf(_(" --no-tablespaces do not dump tablespace assignments\n")); printf(_(" --no-toast-compression do not dump TOAST compression methods\n")); diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index f2182e918256..73826fed9803 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -98,6 +98,7 @@ main(int argc, char **argv) static int enable_row_security = 0; static int if_exists = 0; static int no_data_for_failed_tables = 0; + static int outputNoSequenceAm = 0; static int outputNoTableAm = 0; static int outputNoTablespaces = 0; static int use_setsessauth = 0; @@ -154,6 +155,7 @@ main(int argc, char **argv) {"enable-row-security", no_argument, &enable_row_security, 1}, {"if-exists", no_argument, &if_exists, 1}, {"no-data-for-failed-tables", no_argument, &no_data_for_failed_tables, 1}, + {"no-sequence-access-method", no_argument, &outputNoSequenceAm, 1}, {"no-table-access-method", no_argument, &outputNoTableAm, 1}, {"no-tablespaces", no_argument, &outputNoTablespaces, 1}, {"role", required_argument, NULL, 2}, @@ -459,6 +461,7 @@ main(int argc, char **argv) opts->disable_triggers = disable_triggers; opts->enable_row_security = enable_row_security; opts->noDataForFailedTables = no_data_for_failed_tables; + opts->noSequenceAm = outputNoSequenceAm; opts->noTableAm = outputNoTableAm; opts->noTablespace = outputNoTablespaces; opts->use_setsessauth = use_setsessauth; @@ -702,6 +705,7 @@ usage(const char *progname) printf(_(" --no-security-labels do not restore security labels\n")); printf(_(" --no-statistics do not restore statistics\n")); printf(_(" --no-subscriptions do not restore subscriptions\n")); + printf(_(" --no-sequence-access-method do not restore sequence access methods\n")); printf(_(" --no-table-access-method do not restore table access methods\n")); printf(_(" --no-tablespaces do not restore tablespace assignments\n")); printf(_(" --section=SECTION restore named section (pre-data, data, or post-data)\n")); diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 55d892d9c162..6c913d568b44 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -604,6 +604,13 @@ 'postgres', ], }, + no_sequence_access_method => { + dump_cmd => [ + 'pg_dump', '--no-sync', + "--file=$tempdir/no_sequence_access_method.sql", + '--no-sequence-access-method', 'postgres', + ], + }, no_table_access_method => { dump_cmd => [ 'pg_dump', '--no-sync', @@ -822,6 +829,7 @@ no_policies => 1, no_privs => 1, no_statistics => 1, + no_sequence_access_method => 1, no_table_access_method => 1, pg_dumpall_dbprivs => 1, pg_dumpall_exclude => 1, @@ -4773,6 +4781,18 @@ }, }, + 'CREATE ACCESS METHOD regress_test_sequence_am' => { + create_order => 11, + create_sql => + 'CREATE ACCESS METHOD regress_sequence_am TYPE SEQUENCE HANDLER seq_local_sequenceam_handler;', + regexp => qr/^ + \QCREATE ACCESS METHOD regress_sequence_am TYPE SEQUENCE HANDLER seq_local_sequenceam_handler;\E + \n/xm, + like => { + %full_runs, section_pre_data => 1, + }, + }, + # It's a bit tricky to ensure that the proper SET of default table # AM occurs. To achieve that we create a table with the standard # AM, test AM, standard AM. That guarantees that there needs to be @@ -4801,6 +4821,35 @@ }, }, + + # This uses the same trick as for materialized views and tables, + # but this time with a sequence access method, checking that a + # correct set of SET queries are created. + 'CREATE SEQUENCE regress_pg_dump_seq_am' => { + create_order => 12, + create_sql => ' + CREATE SEQUENCE dump_test.regress_pg_dump_seq_am_0 USING seqlocal; + CREATE SEQUENCE dump_test.regress_pg_dump_seq_am_1 USING regress_sequence_am; + CREATE SEQUENCE dump_test.regress_pg_dump_seq_am_2 USING seqlocal;', + regexp => qr/^ + \QSET default_sequence_access_method = regress_sequence_am;\E + (\n(?!SET[^;]+;)[^\n]*)* + \n\QCREATE SEQUENCE dump_test.regress_pg_dump_seq_am_1\E + \n\s+\QSTART WITH 1\E + \n\s+\QINCREMENT BY 1\E + \n\s+\QNO MINVALUE\E + \n\s+\QNO MAXVALUE\E + \n\s+\QCACHE 1;\E\n/xm, + like => { + %full_runs, %dump_test_schema_runs, section_pre_data => 1, + }, + unlike => { + exclude_dump_test_schema => 1, + no_sequence_access_method => 1, + only_dump_measurement => 1, + }, + }, + 'CREATE MATERIALIZED VIEW regress_pg_dump_matview_am' => { create_order => 13, create_sql => ' diff --git a/src/bin/pg_waldump/.gitignore b/src/bin/pg_waldump/.gitignore index ec51f41c767e..8d1195de2637 100644 --- a/src/bin/pg_waldump/.gitignore +++ b/src/bin/pg_waldump/.gitignore @@ -10,13 +10,13 @@ /gistdesc.c /hashdesc.c /heapdesc.c +/seqlocaldesc.c /logicalmsgdesc.c /mxactdesc.c /nbtdesc.c /relmapdesc.c /replorigindesc.c /rmgrdesc_utils.c -/seqdesc.c /smgrdesc.c /spgdesc.c /standbydesc.c diff --git a/src/bin/pg_waldump/rmgrdesc.c b/src/bin/pg_waldump/rmgrdesc.c index fac509ed134e..2fcf9fc4392a 100644 --- a/src/bin/pg_waldump/rmgrdesc.c +++ b/src/bin/pg_waldump/rmgrdesc.c @@ -16,6 +16,7 @@ #include "access/gistxlog.h" #include "access/hash_xlog.h" #include "access/heapam_xlog.h" +#include "access/seqlocalam.h" #include "access/multixact.h" #include "access/nbtxlog.h" #include "access/rmgr.h" diff --git a/src/bin/pg_waldump/t/001_basic.pl b/src/bin/pg_waldump/t/001_basic.pl index f26d75e01cfd..2495a28d26a1 100644 --- a/src/bin/pg_waldump/t/001_basic.pl +++ b/src/bin/pg_waldump/t/001_basic.pl @@ -67,7 +67,7 @@ Hash Gin Gist -Sequence +SequenceLocal SPGist BRIN CommitTs diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 1d08268393e3..4c7a9446b96c 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -168,10 +168,12 @@ describeAccessMethods(const char *pattern, bool verbose) "SELECT amname AS \"%s\",\n" " CASE amtype" " WHEN " CppAsString2(AMTYPE_INDEX) " THEN '%s'" + " WHEN " CppAsString2(AMTYPE_SEQUENCE) " THEN '%s'" " WHEN " CppAsString2(AMTYPE_TABLE) " THEN '%s'" " END AS \"%s\"", gettext_noop("Name"), gettext_noop("Index"), + gettext_noop("Sequence"), gettext_noop("Table"), gettext_noop("Type")); diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index c916b9299a80..5f8a5989adc5 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -2562,7 +2562,7 @@ match_previous_words(int pattern_id, else if (Matches("ALTER", "SEQUENCE", MatchAny)) COMPLETE_WITH("AS", "INCREMENT", "MINVALUE", "MAXVALUE", "RESTART", "START", "NO", "CACHE", "CYCLE", "SET", "OWNED BY", - "OWNER TO", "RENAME TO"); + "OWNER TO", "RENAME TO", "USING"); /* ALTER SEQUENCE AS */ else if (TailMatches("ALTER", "SEQUENCE", MatchAny, "AS")) COMPLETE_WITH_CS("smallint", "integer", "bigint"); @@ -3605,7 +3605,7 @@ match_previous_words(int pattern_id, else if (TailMatches("CREATE", "SEQUENCE", MatchAny) || TailMatches("CREATE", "TEMP|TEMPORARY", "SEQUENCE", MatchAny)) COMPLETE_WITH("AS", "INCREMENT BY", "MINVALUE", "MAXVALUE", "NO", - "CACHE", "CYCLE", "OWNED BY", "START WITH"); + "CACHE", "CYCLE", "OWNED BY", "START WITH", "USING"); else if (TailMatches("CREATE", "SEQUENCE", MatchAny, "AS") || TailMatches("CREATE", "TEMP|TEMPORARY", "SEQUENCE", MatchAny, "AS")) COMPLETE_WITH_CS("smallint", "integer", "bigint"); diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h index 8e7fc9db8778..b942b25350bc 100644 --- a/src/include/access/rmgrlist.h +++ b/src/include/access/rmgrlist.h @@ -40,7 +40,7 @@ PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_identify, btree_xlog PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, hash_identify, NULL, NULL, hash_mask, NULL) PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_identify, gin_xlog_startup, gin_xlog_cleanup, gin_mask, NULL) PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_identify, gist_xlog_startup, gist_xlog_cleanup, gist_mask, NULL) -PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, seq_identify, NULL, NULL, seq_mask, NULL) +PG_RMGR(RM_SEQ_LOCAL_ID, "SequenceLocal", seq_local_redo, seq_local_desc, seq_local_identify, NULL, NULL, seq_local_mask, NULL) PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_identify, spg_xlog_startup, spg_xlog_cleanup, spg_mask, NULL) PG_RMGR(RM_BRIN_ID, "BRIN", brin_redo, brin_desc, brin_identify, NULL, NULL, brin_mask, NULL) PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_identify, NULL, NULL, NULL, NULL) diff --git a/src/include/access/seqlocalam.h b/src/include/access/seqlocalam.h new file mode 100644 index 000000000000..21936511ac2b --- /dev/null +++ b/src/include/access/seqlocalam.h @@ -0,0 +1,43 @@ +/*------------------------------------------------------------------------- + * + * seqlocalam.h + * Local sequence access method. + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/seqlocalam.h + * + *------------------------------------------------------------------------- + */ +#ifndef SEQLOCALAM_H +#define SEQLOCALAM_H + +#include "access/xlogreader.h" +#include "storage/relfilelocator.h" + +/* XLOG stuff */ +#define XLOG_SEQ_LOCAL_LOG 0x00 + +typedef struct xl_seq_local_rec +{ + RelFileLocator locator; + /* SEQUENCE TUPLE DATA FOLLOWS AT THE END */ +} xl_seq_local_rec; + +/* + * The "special area" of a local sequence's buffer page looks like this. + */ +#define SEQ_LOCAL_MAGIC 0x1717 + +typedef struct seq_local_magic +{ + uint32 magic; +} seq_local_magic; + +extern void seq_local_redo(XLogReaderState *record); +extern void seq_local_desc(StringInfo buf, XLogReaderState *record); +extern const char *seq_local_identify(uint8 info); +extern void seq_local_mask(char *page, BlockNumber blkno); + +#endif /* SEQLOCALAM_H */ diff --git a/src/include/access/sequenceam.h b/src/include/access/sequenceam.h new file mode 100644 index 000000000000..ac48c8b468be --- /dev/null +++ b/src/include/access/sequenceam.h @@ -0,0 +1,181 @@ +/*------------------------------------------------------------------------- + * + * sequenceam.h + * POSTGRES sequence access method definitions. + * + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/sequenceam.h + * + * NOTES + * See sequenceam.sgml for higher level documentation. + * + *------------------------------------------------------------------------- + */ +#ifndef SEQUENCEAM_H +#define SEQUENCEAM_H + +#include "utils/rel.h" + +#define DEFAULT_SEQUENCE_ACCESS_METHOD "seqlocal" + +/* GUCs */ +extern PGDLLIMPORT char *default_sequence_access_method; + +/* + * API struct for a sequence AM. Note this must be allocated in a + * server-lifetime manner, typically as a static const struct, which then gets + * returned by FormData_pg_am.amhandler. + * + * In most cases it's not appropriate to call the callbacks directly, use the + * sequence_* wrapper functions instead. + * + * GetSequenceAmRoutine() asserts that required callbacks are filled in, + * remember to update when adding a callback. + */ +typedef struct SequenceAmRoutine +{ + /* this must be set to T_SequenceAmRoutine */ + NodeTag type; + + /* + * Retrieve table access method used by a sequence to store its metadata. + */ + const char *(*get_table_am) (void); + + /* + * Initialize sequence after creating a sequence Relation in pg_class, + * setting up the sequence for use. "last_value" and "is_called" are + * guessed from the options set for the sequence in CREATE SEQUENCE, based + * on the configuration of pg_sequence. + */ + void (*init) (Relation rel, int64 last_value, bool is_called); + + /* + * Retrieve a result for nextval(), based on the options retrieved from + * the sequence's options in pg_sequence. "last" is the last value + * calculated stored in the session's local cache, for lastval(). + */ + int64 (*nextval) (Relation rel, int64 incby, int64 maxv, + int64 minv, int64 cache, bool cycle, + int64 *last); + + /* + * Callback to set the state of a sequence, based on the input arguments + * from setval(). + */ + void (*setval) (Relation rel, int64 next, bool iscalled); + + /* + * Reset a sequence to its initial value. "reset_state", if set to true, + * means that the sequence parameters have changed, hence its internal + * state may need to be reset as well. "startv" and "is_called" are + * values guessed from the configuration of the sequence, based on the + * contents of pg_sequence. + */ + void (*reset) (Relation rel, int64 startv, bool is_called, + bool reset_state); + + /* + * Returns the current state of a sequence, returning data for + * pg_sequence_last_value() and related DDLs like ALTER SEQUENCE. + * "last_value" and "is_called" should be assigned to the values retrieved + * from the sequence Relation. + */ + void (*get_state) (Relation rel, int64 *last_value, bool *is_called); + + /* + * Callback used when switching persistence of a sequence Relation, to + * reset the sequence based on its new persistence "newrelpersistence". + */ + void (*change_persistence) (Relation rel, char newrelpersistence); + +} SequenceAmRoutine; + + +/* --------------------------------------------------------------------------- + * Wrapper functions for each callback. + * --------------------------------------------------------------------------- + */ + +/* + * Returns the name of the table access method used by this sequence. + */ +static inline const char * +sequence_get_table_am(Relation rel) +{ + return rel->rd_sequenceam->get_table_am(); +} + +/* + * Insert tuple data based on the information guessed from the contents + * of pg_sequence. + */ +static inline void +sequence_init(Relation rel, int64 last_value, bool is_called) +{ + rel->rd_sequenceam->init(rel, last_value, is_called); +} + +/* + * Allocate a set of values for the given sequence. "last" is the last value + * allocated. The result returned is the next value of the sequence computed. + */ +static inline int64 +sequence_nextval(Relation rel, int64 incby, int64 maxv, + int64 minv, int64 cache, bool cycle, + int64 *last) +{ + return rel->rd_sequenceam->nextval(rel, incby, maxv, minv, cache, + cycle, last); +} + +/* + * Callback to set the state of a sequence, based on the input arguments + * from setval(). + */ +static inline void +sequence_setval(Relation rel, int64 next, bool iscalled) +{ + rel->rd_sequenceam->setval(rel, next, iscalled); +} + +/* + * Reset a sequence to its initial state. + */ +static inline void +sequence_reset(Relation rel, int64 startv, bool is_called, + bool reset_state) +{ + rel->rd_sequenceam->reset(rel, startv, is_called, reset_state); +} + +/* + * Retrieve sequence metadata. + */ +static inline void +sequence_get_state(Relation rel, int64 *last_value, bool *is_called) +{ + rel->rd_sequenceam->get_state(rel, last_value, is_called); +} + +/* + * Callback to change the persistence of a sequence Relation. + */ +static inline void +sequence_change_persistence(Relation rel, char newrelpersistence) +{ + rel->rd_sequenceam->change_persistence(rel, newrelpersistence); +} + +/* ---------------------------------------------------------------------------- + * Functions in sequenceamapi.c + * ---------------------------------------------------------------------------- + */ + +extern const SequenceAmRoutine *GetSequenceAmRoutine(Oid amhandler); +extern Oid GetSequenceAmRoutineId(Oid amoid); + +#endif /* SEQUENCEAM_H */ diff --git a/src/include/catalog/pg_am.dat b/src/include/catalog/pg_am.dat index 26d15928a155..8f076fcec958 100644 --- a/src/include/catalog/pg_am.dat +++ b/src/include/catalog/pg_am.dat @@ -15,6 +15,9 @@ { oid => '2', oid_symbol => 'HEAP_TABLE_AM_OID', descr => 'heap table access method', amname => 'heap', amhandler => 'heap_tableam_handler', amtype => 't' }, +{ oid => '8051', oid_symbol => 'LOCAL_SEQUENCE_AM_OID', + descr => 'local sequence access method', + amname => 'seqlocal', amhandler => 'seq_local_sequenceam_handler', amtype => 's' }, { oid => '403', oid_symbol => 'BTREE_AM_OID', descr => 'b-tree index access method', amname => 'btree', amhandler => 'bthandler', amtype => 'i' }, diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h index 6e98a0930c27..080bea5031a9 100644 --- a/src/include/catalog/pg_am.h +++ b/src/include/catalog/pg_am.h @@ -59,6 +59,7 @@ MAKE_SYSCACHE(AMOID, pg_am_oid_index, 4); * Allowed values for amtype */ #define AMTYPE_INDEX 'i' /* index access method */ +#define AMTYPE_SEQUENCE 's' /* sequence access method */ #define AMTYPE_TABLE 't' /* table access method */ #endif /* EXPOSE_TO_CLIENT_CODE */ diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 07d182da796a..012a2863c037 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -231,6 +231,12 @@ MAKE_SYSCACHE(RELNAMENSP, pg_class_relname_nsp_index, 128); (relkind) == RELKIND_TOASTVALUE || \ (relkind) == RELKIND_MATVIEW) +/* + * Relation kinds with a sequence access method (rd_sequenceam). + */ +#define RELKIND_HAS_SEQUENCE_AM(relkind) \ + ((relkind) == RELKIND_SEQUENCE) + #endif /* EXPOSE_TO_CLIENT_CODE */ extern int errdetail_relkind_not_supported(char relkind); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 62beb71da288..191489fcfcca 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -913,6 +913,12 @@ prorettype => 'table_am_handler', proargtypes => 'internal', prosrc => 'heap_tableam_handler' }, +# Sequence access method handlers +{ oid => '8209', descr => 'local sequence access method handler', + proname => 'seq_local_sequenceam_handler', provolatile => 'v', + prorettype => 'sequence_am_handler', proargtypes => 'internal', + prosrc => 'seq_local_sequenceam_handler' }, + # Index access method handlers { oid => '330', descr => 'btree index access method handler', proname => 'bthandler', provolatile => 'v', prorettype => 'index_am_handler', @@ -7882,6 +7888,13 @@ { oid => '327', descr => 'I/O', proname => 'index_am_handler_out', prorettype => 'cstring', proargtypes => 'index_am_handler', prosrc => 'index_am_handler_out' }, +{ oid => '8207', descr => 'I/O', + proname => 'sequence_am_handler_in', proisstrict => 'f', + prorettype => 'sequence_am_handler', proargtypes => 'cstring', + prosrc => 'sequence_am_handler_in' }, +{ oid => '8208', descr => 'I/O', + proname => 'sequence_am_handler_out', prorettype => 'cstring', + proargtypes => 'sequence_am_handler', prosrc => 'sequence_am_handler_out' }, { oid => '3311', descr => 'I/O', proname => 'tsm_handler_in', proisstrict => 'f', prorettype => 'tsm_handler', proargtypes => 'cstring', prosrc => 'tsm_handler_in' }, diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index 6dca77e0a22f..c42531b25533 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -627,6 +627,12 @@ typcategory => 'P', typinput => 'index_am_handler_in', typoutput => 'index_am_handler_out', typreceive => '-', typsend => '-', typalign => 'i' }, +{ oid => '8210', + descr => 'pseudo-type for the result of a sequence AM handler function', + typname => 'sequence_am_handler', typlen => '4', typbyval => 't', + typtype => 'p', typcategory => 'P', typinput => 'sequence_am_handler_in', + typoutput => 'sequence_am_handler_out', typreceive => '-', typsend => '-', + typalign => 'i' }, { oid => '3310', descr => 'pseudo-type for the result of a tablesample method function', typname => 'tsm_handler', typlen => '4', typbyval => 't', typtype => 'p', diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index dd22b5efdfd9..6790728aced3 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -145,6 +145,7 @@ extern Datum transformGenericOptions(Oid catalogId, extern ObjectAddress CreateAccessMethod(CreateAmStmt *stmt); extern Oid get_index_am_oid(const char *amname, bool missing_ok); extern Oid get_table_am_oid(const char *amname, bool missing_ok); +extern Oid get_sequence_am_oid(const char *amname, bool missing_ok); extern Oid get_am_oid(const char *amname, bool missing_ok); extern char *get_am_name(Oid amOid); diff --git a/src/include/commands/sequence.h b/src/include/commands/sequence.h index 9ac0b67683d3..7693e9941fc9 100644 --- a/src/include/commands/sequence.h +++ b/src/include/commands/sequence.h @@ -22,35 +22,6 @@ #include "storage/relfilelocator.h" -typedef struct FormData_pg_sequence_data -{ - int64 last_value; - int64 log_cnt; - bool is_called; -} FormData_pg_sequence_data; - -typedef FormData_pg_sequence_data *Form_pg_sequence_data; - -/* - * Columns of a sequence relation - */ - -#define SEQ_COL_LASTVAL 1 -#define SEQ_COL_LOG 2 -#define SEQ_COL_CALLED 3 - -#define SEQ_COL_FIRSTCOL SEQ_COL_LASTVAL -#define SEQ_COL_LASTCOL SEQ_COL_CALLED - -/* XLOG stuff */ -#define XLOG_SEQ_LOG 0x00 - -typedef struct xl_seq_rec -{ - RelFileLocator locator; - /* SEQUENCE TUPLE DATA FOLLOWS AT THE END */ -} xl_seq_rec; - extern int64 nextval_internal(Oid relid, bool check_permissions); extern Datum nextval(PG_FUNCTION_ARGS); extern List *sequence_options(Oid relid); @@ -62,9 +33,4 @@ extern void DeleteSequenceTuple(Oid relid); extern void ResetSequence(Oid seq_relid); extern void ResetSequenceCaches(void); -extern void seq_redo(XLogReaderState *record); -extern void seq_desc(StringInfo buf, XLogReaderState *record); -extern const char *seq_identify(uint8 info); -extern void seq_mask(char *page, BlockNumber blkno); - #endif /* SEQUENCE_H */ diff --git a/src/include/nodes/meson.build b/src/include/nodes/meson.build index d1ca24dd32f0..b1c4155c9a91 100644 --- a/src/include/nodes/meson.build +++ b/src/include/nodes/meson.build @@ -10,6 +10,7 @@ node_support_input_i = [ 'access/amapi.h', 'access/cmptype.h', 'access/sdir.h', + 'access/sequenceam.h', 'access/tableam.h', 'access/tsmapi.h', 'commands/event_trigger.h', diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 4610fc61293b..79465fe4c54d 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2407,6 +2407,7 @@ typedef struct AlterTableStmt typedef enum AlterTableType { AT_AddColumn, /* add column */ + AT_AddColumnToSequence, /* implicitly via CREATE SEQUENCE */ AT_AddColumnToView, /* implicitly via CREATE OR REPLACE VIEW */ AT_ColumnDefault, /* alter column default */ AT_CookedColumnDefault, /* add a pre-cooked column default */ @@ -3215,6 +3216,7 @@ typedef struct CreateSeqStmt List *options; Oid ownerId; /* ID of owner, or InvalidOid for default */ bool for_identity; + char *accessMethod; /* USING name of access method (eg. local) */ bool if_not_exists; /* just do nothing if it already exists? */ } CreateSeqStmt; diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h index 799fa7ace684..82bb81561671 100644 --- a/src/include/utils/guc_hooks.h +++ b/src/include/utils/guc_hooks.h @@ -55,6 +55,8 @@ extern bool check_log_connections(char **newval, void **extra, GucSource source) extern void assign_log_connections(const char *newval, void *extra); extern bool check_default_table_access_method(char **newval, void **extra, GucSource source); +extern bool check_default_sequence_access_method(char **newval, void **extra, + GucSource source); extern bool check_default_tablespace(char **newval, void **extra, GucSource source); extern bool check_default_text_search_config(char **newval, void **extra, GucSource source); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index b552359915f1..4e418968253d 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -188,6 +188,11 @@ typedef struct RelationData */ const struct TableAmRoutine *rd_tableam; + /* + * Sequence access method. + */ + const struct SequenceAmRoutine *rd_sequenceam; + /* These are non-NULL only for an index relation: */ Form_pg_index rd_index; /* pg_index tuple describing this index */ /* use "struct" here to avoid needing to include htup.h: */ diff --git a/src/test/modules/test_ddl_deparse/expected/alter_table.out b/src/test/modules/test_ddl_deparse/expected/alter_table.out index 50d0354a3417..ed31059ef584 100644 --- a/src/test/modules/test_ddl_deparse/expected/alter_table.out +++ b/src/test/modules/test_ddl_deparse/expected/alter_table.out @@ -25,7 +25,10 @@ NOTICE: DDL test: type simple, tag CREATE TABLE CREATE TABLE grandchild () INHERITS (child); NOTICE: DDL test: type simple, tag CREATE TABLE ALTER TABLE parent ADD COLUMN b serial; -NOTICE: DDL test: type simple, tag CREATE SEQUENCE +NOTICE: DDL test: type alter table, tag CREATE SEQUENCE +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column last_value of sequence parent_b_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column log_cnt of sequence parent_b_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column is_called of sequence parent_b_seq NOTICE: DDL test: type alter table, tag ALTER TABLE NOTICE: subcommand: type ADD COLUMN (and recurse) desc column b of table parent NOTICE: subcommand: type ADD CONSTRAINT (and recurse) desc constraint parent_b_not_null on table parent @@ -70,7 +73,10 @@ ALTER TABLE parent ALTER COLUMN a SET NOT NULL; NOTICE: DDL test: type alter table, tag ALTER TABLE NOTICE: subcommand: type SET NOT NULL (and recurse) desc constraint parent_a_not_null on table parent ALTER TABLE parent ALTER COLUMN a ADD GENERATED ALWAYS AS IDENTITY; -NOTICE: DDL test: type simple, tag CREATE SEQUENCE +NOTICE: DDL test: type alter table, tag CREATE SEQUENCE +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column last_value of sequence parent_a_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column log_cnt of sequence parent_a_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column is_called of sequence parent_a_seq NOTICE: DDL test: type simple, tag ALTER SEQUENCE NOTICE: DDL test: type alter table, tag ALTER TABLE NOTICE: subcommand: type ADD IDENTITY (and recurse) desc column a of table parent diff --git a/src/test/modules/test_ddl_deparse/expected/create_sequence_1.out b/src/test/modules/test_ddl_deparse/expected/create_sequence_1.out index 5837ea484e40..310ce5a6baf5 100644 --- a/src/test/modules/test_ddl_deparse/expected/create_sequence_1.out +++ b/src/test/modules/test_ddl_deparse/expected/create_sequence_1.out @@ -8,4 +8,7 @@ CREATE SEQUENCE fkey_table_seq START 10 CACHE 10 CYCLE; -NOTICE: DDL test: type simple, tag CREATE SEQUENCE +NOTICE: DDL test: type alter table, tag CREATE SEQUENCE +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column last_value of sequence fkey_table_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column log_cnt of sequence fkey_table_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column is_called of sequence fkey_table_seq diff --git a/src/test/modules/test_ddl_deparse/expected/create_table.out b/src/test/modules/test_ddl_deparse/expected/create_table.out index 14915f661a89..527c67995a94 100644 --- a/src/test/modules/test_ddl_deparse/expected/create_table.out +++ b/src/test/modules/test_ddl_deparse/expected/create_table.out @@ -50,9 +50,18 @@ CREATE TABLE datatype_table ( PRIMARY KEY (id), UNIQUE (id_big) ); -NOTICE: DDL test: type simple, tag CREATE SEQUENCE -NOTICE: DDL test: type simple, tag CREATE SEQUENCE -NOTICE: DDL test: type simple, tag CREATE SEQUENCE +NOTICE: DDL test: type alter table, tag CREATE SEQUENCE +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column last_value of sequence datatype_table_id_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column log_cnt of sequence datatype_table_id_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column is_called of sequence datatype_table_id_seq +NOTICE: DDL test: type alter table, tag CREATE SEQUENCE +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column last_value of sequence datatype_table_id_big_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column log_cnt of sequence datatype_table_id_big_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column is_called of sequence datatype_table_id_big_seq +NOTICE: DDL test: type alter table, tag CREATE SEQUENCE +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column last_value of sequence datatype_table_is_small_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column log_cnt of sequence datatype_table_is_small_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column is_called of sequence datatype_table_is_small_seq NOTICE: DDL test: type simple, tag CREATE TABLE NOTICE: DDL test: type simple, tag CREATE INDEX NOTICE: DDL test: type simple, tag CREATE INDEX diff --git a/src/test/modules/test_ddl_deparse/test_ddl_deparse.c b/src/test/modules/test_ddl_deparse/test_ddl_deparse.c index 193669f2bc1e..254fdf90c79a 100644 --- a/src/test/modules/test_ddl_deparse/test_ddl_deparse.c +++ b/src/test/modules/test_ddl_deparse/test_ddl_deparse.c @@ -113,6 +113,9 @@ get_altertable_subcmdinfo(PG_FUNCTION_ARGS) case AT_AddColumn: strtype = "ADD COLUMN"; break; + case AT_AddColumnToSequence: + strtype = "ADD COLUMN TO SEQUENCE"; + break; case AT_AddColumnToView: strtype = "ADD COLUMN TO VIEW"; break; diff --git a/src/test/regress/expected/create_am.out b/src/test/regress/expected/create_am.out index c1a951572512..784870e603d1 100644 --- a/src/test/regress/expected/create_am.out +++ b/src/test/regress/expected/create_am.out @@ -163,11 +163,6 @@ CREATE VIEW tableam_view_heap2 USING heap2 AS SELECT * FROM tableam_tbl_heap2; ERROR: syntax error at or near "USING" LINE 1: CREATE VIEW tableam_view_heap2 USING heap2 AS SELECT * FROM ... ^ --- CREATE SEQUENCE doesn't support USING -CREATE SEQUENCE tableam_seq_heap2 USING heap2; -ERROR: syntax error at or near "USING" -LINE 1: CREATE SEQUENCE tableam_seq_heap2 USING heap2; - ^ -- CREATE MATERIALIZED VIEW does support USING CREATE MATERIALIZED VIEW tableam_tblmv_heap2 USING heap2 AS SELECT * FROM tableam_tbl_heap2; SELECT f1 FROM tableam_tblmv_heap2 ORDER BY f1; @@ -514,9 +509,12 @@ CREATE TABLE tableam_parted_heapx (a text, b int) PARTITION BY list (a); CREATE TABLE tableam_parted_1_heapx PARTITION OF tableam_parted_heapx FOR VALUES IN ('a', 'b'); -- but an explicitly set AM overrides it CREATE TABLE tableam_parted_2_heapx PARTITION OF tableam_parted_heapx FOR VALUES IN ('c', 'd') USING heap; --- sequences, views and foreign servers shouldn't have an AM -CREATE VIEW tableam_view_heapx AS SELECT * FROM tableam_tbl_heapx; +-- sequences have an AM +SET LOCAL default_sequence_access_method = 'seqlocal'; CREATE SEQUENCE tableam_seq_heapx; +RESET default_sequence_access_method; +-- views and foreign servers shouldn't have an AM +CREATE VIEW tableam_view_heapx AS SELECT * FROM tableam_tbl_heapx; CREATE FOREIGN DATA WRAPPER fdw_heap2 VALIDATOR postgresql_fdw_validator; CREATE SERVER fs_heap2 FOREIGN DATA WRAPPER fdw_heap2 ; CREATE FOREIGN table tableam_fdw_heapx () SERVER fs_heap2; @@ -533,18 +531,18 @@ FROM pg_class AS pc LEFT JOIN pg_am AS pa ON (pa.oid = pc.relam) WHERE pc.relname LIKE 'tableam_%_heapx' ORDER BY 3, 1, 2; - relkind | amname | relname ----------+--------+----------------------------- - f | | tableam_fdw_heapx - r | heap2 | tableam_parted_1_heapx - r | heap | tableam_parted_2_heapx - p | | tableam_parted_heapx - S | | tableam_seq_heapx - r | heap2 | tableam_tbl_heapx - r | heap2 | tableam_tblas_heapx - m | heap2 | tableam_tblmv_heapx - r | heap2 | tableam_tblselectinto_heapx - v | | tableam_view_heapx + relkind | amname | relname +---------+----------+----------------------------- + f | | tableam_fdw_heapx + r | heap2 | tableam_parted_1_heapx + r | heap | tableam_parted_2_heapx + p | | tableam_parted_heapx + S | seqlocal | tableam_seq_heapx + r | heap2 | tableam_tbl_heapx + r | heap2 | tableam_tblas_heapx + m | heap2 | tableam_tblmv_heapx + r | heap2 | tableam_tblselectinto_heapx + v | | tableam_view_heapx (10 rows) -- don't want to keep those tables, nor the default @@ -574,3 +572,22 @@ table tableam_parted_b_heap2 depends on access method heap2 table tableam_parted_d_heap2 depends on access method heap2 HINT: Use DROP ... CASCADE to drop the dependent objects too. -- we intentionally leave the objects created above alive, to verify pg_dump support +-- Checks for sequence access methods +-- Create new sequence access method which uses standard local handler +CREATE ACCESS METHOD local2 TYPE SEQUENCE HANDLER seq_local_sequenceam_handler; +-- Create and use sequence +CREATE SEQUENCE test_seqam USING local2; +SELECT nextval('test_seqam'::regclass); + nextval +--------- + 1 +(1 row) + +-- Try to drop and fail on dependency +DROP ACCESS METHOD local2; +ERROR: cannot drop access method local2 because other objects depend on it +DETAIL: sequence test_seqam depends on access method local2 +HINT: Use DROP ... CASCADE to drop the dependent objects too. +-- And cleanup +DROP SEQUENCE test_seqam; +DROP ACCESS METHOD local2; diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 20bf9ea9cdf7..20b3d8520315 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -1943,6 +1943,18 @@ WHERE p1.oid = a1.amhandler AND a1.amtype = 't' AND -----+--------+-----+--------- (0 rows) +-- check for sequence amhandler functions with the wrong signature +SELECT a1.oid, a1.amname, p1.oid, p1.proname +FROM pg_am AS a1, pg_proc AS p1 +WHERE p1.oid = a1.amhandler AND a1.amtype = 's' AND + (p1.prorettype != 'sequence_am_handler'::regtype + OR p1.proretset + OR p1.pronargs != 1 + OR p1.proargtypes[0] != 'internal'::regtype); + oid | amname | oid | proname +-----+--------+-----+--------- +(0 rows) + -- **************** pg_amop **************** -- Look for illegal values in pg_amop fields SELECT a1.amopfamily, a1.amopstrategy diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index cf48ae6d0c2e..2b7d6c69e1e1 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -5127,31 +5127,33 @@ Indexes: -- check printing info about access methods \dA List of access methods - Name | Type ---------+------- - brin | Index - btree | Index - gin | Index - gist | Index - hash | Index - heap | Table - heap2 | Table - spgist | Index -(8 rows) + Name | Type +----------+---------- + brin | Index + btree | Index + gin | Index + gist | Index + hash | Index + heap | Table + heap2 | Table + seqlocal | Sequence + spgist | Index +(9 rows) \dA * List of access methods - Name | Type ---------+------- - brin | Index - btree | Index - gin | Index - gist | Index - hash | Index - heap | Table - heap2 | Table - spgist | Index -(8 rows) + Name | Type +----------+---------- + brin | Index + btree | Index + gin | Index + gist | Index + hash | Index + heap | Table + heap2 | Table + seqlocal | Sequence + spgist | Index +(9 rows) \dA h* List of access methods @@ -5176,32 +5178,34 @@ List of access methods \dA: extra argument "bar" ignored \dA+ - List of access methods - Name | Type | Handler | Description ---------+-------+----------------------+---------------------------------------- - brin | Index | brinhandler | block range index (BRIN) access method - btree | Index | bthandler | b-tree index access method - gin | Index | ginhandler | GIN index access method - gist | Index | gisthandler | GiST index access method - hash | Index | hashhandler | hash index access method - heap | Table | heap_tableam_handler | heap table access method - heap2 | Table | heap_tableam_handler | - spgist | Index | spghandler | SP-GiST index access method -(8 rows) + List of access methods + Name | Type | Handler | Description +----------+----------+------------------------------+---------------------------------------- + brin | Index | brinhandler | block range index (BRIN) access method + btree | Index | bthandler | b-tree index access method + gin | Index | ginhandler | GIN index access method + gist | Index | gisthandler | GiST index access method + hash | Index | hashhandler | hash index access method + heap | Table | heap_tableam_handler | heap table access method + heap2 | Table | heap_tableam_handler | + seqlocal | Sequence | seq_local_sequenceam_handler | local sequence access method + spgist | Index | spghandler | SP-GiST index access method +(9 rows) \dA+ * - List of access methods - Name | Type | Handler | Description ---------+-------+----------------------+---------------------------------------- - brin | Index | brinhandler | block range index (BRIN) access method - btree | Index | bthandler | b-tree index access method - gin | Index | ginhandler | GIN index access method - gist | Index | gisthandler | GiST index access method - hash | Index | hashhandler | hash index access method - heap | Table | heap_tableam_handler | heap table access method - heap2 | Table | heap_tableam_handler | - spgist | Index | spghandler | SP-GiST index access method -(8 rows) + List of access methods + Name | Type | Handler | Description +----------+----------+------------------------------+---------------------------------------- + brin | Index | brinhandler | block range index (BRIN) access method + btree | Index | bthandler | b-tree index access method + gin | Index | ginhandler | GIN index access method + gist | Index | gisthandler | GiST index access method + hash | Index | hashhandler | hash index access method + heap | Table | heap_tableam_handler | heap table access method + heap2 | Table | heap_tableam_handler | + seqlocal | Sequence | seq_local_sequenceam_handler | local sequence access method + spgist | Index | spghandler | SP-GiST index access method +(9 rows) \dA+ h* List of access methods diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index dd0c52ab08b5..c3218a36bc48 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -511,21 +511,21 @@ WHERE relkind NOT IN ('r', 'i', 'S', 't', 'v', 'm', 'c', 'f', 'p', 'I') OR -----+--------- (0 rows) --- All tables, indexes, partitioned indexes and matviews should have an --- access method. +-- All tables, indexes, partitioned indexes, matviews and sequences should have +-- an access method. SELECT c1.oid, c1.relname FROM pg_class as c1 -WHERE c1.relkind NOT IN ('S', 'v', 'f', 'c', 'p') and +WHERE c1.relkind NOT IN ('v', 'f', 'c', 'p') and c1.relam = 0; oid | relname -----+--------- (0 rows) --- Conversely, sequences, views, foreign tables, types and partitioned --- tables shouldn't have them. +-- Conversely, views, foreign tables, types and partitioned tables +-- shouldn't have them. SELECT c1.oid, c1.relname FROM pg_class as c1 -WHERE c1.relkind IN ('S', 'v', 'f', 'c', 'p') and +WHERE c1.relkind IN ('v', 'f', 'c', 'p') and c1.relam != 0; oid | relname -----+--------- diff --git a/src/test/regress/sql/create_am.sql b/src/test/regress/sql/create_am.sql index 754fe0c694bc..76a91cf8dd68 100644 --- a/src/test/regress/sql/create_am.sql +++ b/src/test/regress/sql/create_am.sql @@ -117,9 +117,6 @@ SELECT INTO tableam_tblselectinto_heap2 USING heap2 FROM tableam_tbl_heap2; -- CREATE VIEW doesn't support USING CREATE VIEW tableam_view_heap2 USING heap2 AS SELECT * FROM tableam_tbl_heap2; --- CREATE SEQUENCE doesn't support USING -CREATE SEQUENCE tableam_seq_heap2 USING heap2; - -- CREATE MATERIALIZED VIEW does support USING CREATE MATERIALIZED VIEW tableam_tblmv_heap2 USING heap2 AS SELECT * FROM tableam_tbl_heap2; SELECT f1 FROM tableam_tblmv_heap2 ORDER BY f1; @@ -327,9 +324,13 @@ CREATE TABLE tableam_parted_1_heapx PARTITION OF tableam_parted_heapx FOR VALUES -- but an explicitly set AM overrides it CREATE TABLE tableam_parted_2_heapx PARTITION OF tableam_parted_heapx FOR VALUES IN ('c', 'd') USING heap; --- sequences, views and foreign servers shouldn't have an AM -CREATE VIEW tableam_view_heapx AS SELECT * FROM tableam_tbl_heapx; +-- sequences have an AM +SET LOCAL default_sequence_access_method = 'seqlocal'; CREATE SEQUENCE tableam_seq_heapx; +RESET default_sequence_access_method; + +-- views and foreign servers shouldn't have an AM +CREATE VIEW tableam_view_heapx AS SELECT * FROM tableam_tbl_heapx; CREATE FOREIGN DATA WRAPPER fdw_heap2 VALIDATOR postgresql_fdw_validator; CREATE SERVER fs_heap2 FOREIGN DATA WRAPPER fdw_heap2 ; CREATE FOREIGN table tableam_fdw_heapx () SERVER fs_heap2; @@ -365,3 +366,16 @@ CREATE FOREIGN TABLE fp PARTITION OF tableam_parted_a_heap2 DEFAULT SERVER x; DROP ACCESS METHOD heap2; -- we intentionally leave the objects created above alive, to verify pg_dump support + +-- Checks for sequence access methods + +-- Create new sequence access method which uses standard local handler +CREATE ACCESS METHOD local2 TYPE SEQUENCE HANDLER seq_local_sequenceam_handler; +-- Create and use sequence +CREATE SEQUENCE test_seqam USING local2; +SELECT nextval('test_seqam'::regclass); +-- Try to drop and fail on dependency +DROP ACCESS METHOD local2; +-- And cleanup +DROP SEQUENCE test_seqam; +DROP ACCESS METHOD local2; diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index 2fb3a8528781..e60eeab33c02 100644 --- a/src/test/regress/sql/opr_sanity.sql +++ b/src/test/regress/sql/opr_sanity.sql @@ -1236,6 +1236,16 @@ WHERE p1.oid = a1.amhandler AND a1.amtype = 't' AND OR p1.pronargs != 1 OR p1.proargtypes[0] != 'internal'::regtype); +-- check for sequence amhandler functions with the wrong signature + +SELECT a1.oid, a1.amname, p1.oid, p1.proname +FROM pg_am AS a1, pg_proc AS p1 +WHERE p1.oid = a1.amhandler AND a1.amtype = 's' AND + (p1.prorettype != 'sequence_am_handler'::regtype + OR p1.proretset + OR p1.pronargs != 1 + OR p1.proargtypes[0] != 'internal'::regtype); + -- **************** pg_amop **************** -- Look for illegal values in pg_amop fields diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index c94dd83d3061..f1b6cd1091ef 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -370,18 +370,18 @@ WHERE relkind NOT IN ('r', 'i', 'S', 't', 'v', 'm', 'c', 'f', 'p', 'I') OR relpersistence NOT IN ('p', 'u', 't') OR relreplident NOT IN ('d', 'n', 'f', 'i'); --- All tables, indexes, partitioned indexes and matviews should have an --- access method. +-- All tables, indexes, partitioned indexes, matviews and sequences should have +-- an access method. SELECT c1.oid, c1.relname FROM pg_class as c1 -WHERE c1.relkind NOT IN ('S', 'v', 'f', 'c', 'p') and +WHERE c1.relkind NOT IN ('v', 'f', 'c', 'p') and c1.relam = 0; --- Conversely, sequences, views, foreign tables, types and partitioned --- tables shouldn't have them. +-- Conversely, views, foreign tables, types and partitioned tables +-- shouldn't have them. SELECT c1.oid, c1.relname FROM pg_class as c1 -WHERE c1.relkind IN ('S', 'v', 'f', 'c', 'p') and +WHERE c1.relkind IN ('v', 'f', 'c', 'p') and c1.relam != 0; -- Indexes and partitioned indexes should have AMs of type 'i'. diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index e5879e00dffe..e0654ddecd48 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2679,6 +2679,7 @@ SeqScanState SeqTable SeqTableData SeqType +SequenceAmRoutine SequenceItem SerCommitSeqNo SerialControl @@ -3723,6 +3724,7 @@ list_sort_comparator loc_chunk local_relopt local_relopts +local_sequence_magic local_source local_ts_iter local_ts_radix_tree @@ -4011,7 +4013,6 @@ scram_state_enum script_error_callback_arg security_class_t sem_t -sepgsql_context_info_t sequence_magic set_join_pathlist_hook_type set_rel_pathlist_hook_type @@ -4236,6 +4237,7 @@ xl_heap_visible xl_invalid_page xl_invalid_page_key xl_invalidations +xl_local_seq_rec xl_logical_message xl_multi_insert_tuple xl_multixact_create @@ -4247,7 +4249,6 @@ xl_replorigin_drop xl_replorigin_set xl_restore_point xl_running_xacts -xl_seq_rec xl_smgr_create xl_smgr_truncate xl_standby_lock