From 7889f5e37a853506785dbc36c777e742fea4658c Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 1 Dec 2023 12:00:45 +0900 Subject: [PATCH 1/7] Remove FormData_pg_sequence_data from init_params()/sequence.c init_params() sets up "last_value" and "is_called" for a sequence, based on the sequence properties in pg_sequences. This simplifies the logic around log_cnt, which is reset to 0 when the metadata of a sequence is expected to start from afresh when its properties are updated. --- src/backend/commands/sequence.c | 81 ++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 451ae6f7f694..08744c3e9112 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -106,7 +106,9 @@ static Form_pg_sequence_data read_seq_tuple(Relation rel, static void init_params(ParseState *pstate, List *options, bool for_identity, bool isInit, Form_pg_sequence seqform, - Form_pg_sequence_data seqdataform, + int64 *last_value, + bool *reset_state, + bool *is_called, bool *need_seq_rewrite, List **owned_by); static void do_setval(Oid relid, int64 next, bool iscalled); @@ -121,7 +123,9 @@ ObjectAddress DefineSequence(ParseState *pstate, CreateSeqStmt *seq) { FormData_pg_sequence seqform; - FormData_pg_sequence_data seqdataform; + int64 last_value; + bool reset_state; + bool is_called; bool need_seq_rewrite; List *owned_by; CreateStmt *stmt = makeNode(CreateStmt); @@ -164,7 +168,7 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) /* Check and set all option values */ init_params(pstate, seq->options, seq->for_identity, true, - &seqform, &seqdataform, + &seqform, &last_value, &reset_state, &is_called, &need_seq_rewrite, &owned_by); /* @@ -179,7 +183,7 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) { case SEQ_COL_LASTVAL: coldef = makeColumnDef("last_value", INT8OID, -1, InvalidOid); - value[i - 1] = Int64GetDatumFast(seqdataform.last_value); + value[i - 1] = Int64GetDatumFast(last_value); break; case SEQ_COL_LOG: coldef = makeColumnDef("log_cnt", INT8OID, -1, InvalidOid); @@ -448,6 +452,9 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) ObjectAddress address; Relation rel; HeapTuple seqtuple; + bool reset_state = false; + bool is_called; + int64 last_value; HeapTuple newdatatuple; /* Open and lock sequence, and check for ownership along the way. */ @@ -481,12 +488,14 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) /* copy the existing sequence data tuple, so it can be modified locally */ newdatatuple = heap_copytuple(&datatuple); newdataform = (Form_pg_sequence_data) GETSTRUCT(newdatatuple); + last_value = newdataform->last_value; + is_called = newdataform->is_called; UnlockReleaseBuffer(buf); /* Check and set new values */ init_params(pstate, stmt->options, stmt->for_identity, false, - seqform, newdataform, + seqform, &last_value, &reset_state, &is_called, &need_seq_rewrite, &owned_by); /* If needed, rewrite the sequence relation itself */ @@ -513,6 +522,10 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) /* * Insert the modified tuple into the new storage file. */ + newdataform->last_value = last_value; + newdataform->is_called = is_called; + if (reset_state) + newdataform->log_cnt = 0; fill_seq_with_data(seqrel, newdatatuple); } @@ -1236,17 +1249,19 @@ read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) /* * init_params: process the options list of CREATE or ALTER SEQUENCE, and * store the values into appropriate fields of seqform, for changes that go - * into the pg_sequence catalog, and fields of seqdataform for changes to the - * sequence relation itself. Set *need_seq_rewrite to true if we changed any - * parameters that require rewriting the sequence's relation (interesting for - * ALTER SEQUENCE). Also set *owned_by to any OWNED BY option, or to NIL if - * there is none. + * into the pg_sequence catalog, and fields for changes to the sequence + * relation itself (is_called, last_value or any state it may hold). Set + * *need_seq_rewrite to true if we changed any parameters that require + * rewriting the sequence's relation (interesting for ALTER SEQUENCE). Also + * set *owned_by to any OWNED BY option, or to NIL if there is none. Set + * *reset_state if the internal state of the sequence needs to change on a + * follow-up nextval(). * * If isInit is true, fill any unspecified options with default values; * otherwise, do not change existing options that aren't explicitly overridden. * * Note: we force a sequence rewrite whenever we change parameters that affect - * generation of future sequence values, even if the seqdataform per se is not + * generation of future sequence values, even if the metadata per se is not * changed. This allows ALTER SEQUENCE to behave transactionally. Currently, * the only option that doesn't cause that is OWNED BY. It's *necessary* for * ALTER SEQUENCE OWNED BY to not rewrite the sequence, because that would @@ -1257,7 +1272,9 @@ static void init_params(ParseState *pstate, List *options, bool for_identity, bool isInit, Form_pg_sequence seqform, - Form_pg_sequence_data seqdataform, + int64 *last_value, + bool *reset_state, + bool *is_called, bool *need_seq_rewrite, List **owned_by) { @@ -1363,11 +1380,11 @@ init_params(ParseState *pstate, List *options, bool for_identity, } /* - * We must reset log_cnt when isInit or when changing any parameters that - * would affect future nextval allocations. + * We must reset the state when isInit or when changing any parameters + * that would affect future nextval allocations. */ if (isInit) - seqdataform->log_cnt = 0; + *reset_state = true; /* AS type */ if (as_type != NULL) @@ -1416,7 +1433,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("INCREMENT must not be zero"))); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit) { @@ -1428,7 +1445,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, { seqform->seqcycle = boolVal(is_cycled->arg); Assert(BoolIsValid(seqform->seqcycle)); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit) { @@ -1439,7 +1456,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, if (max_value != NULL && max_value->arg) { seqform->seqmax = defGetInt64(max_value); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit || max_value != NULL || reset_max_value) { @@ -1455,7 +1472,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, } else seqform->seqmax = -1; /* descending seq */ - seqdataform->log_cnt = 0; + *reset_state = true; } /* Validate maximum value. No need to check INT8 as seqmax is an int64 */ @@ -1471,7 +1488,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, if (min_value != NULL && min_value->arg) { seqform->seqmin = defGetInt64(min_value); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit || min_value != NULL || reset_min_value) { @@ -1487,7 +1504,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, } else seqform->seqmin = 1; /* ascending seq */ - seqdataform->log_cnt = 0; + *reset_state = true; } /* Validate minimum value. No need to check INT8 as seqmin is an int64 */ @@ -1538,30 +1555,30 @@ init_params(ParseState *pstate, List *options, bool for_identity, if (restart_value != NULL) { if (restart_value->arg != NULL) - seqdataform->last_value = defGetInt64(restart_value); + *last_value = defGetInt64(restart_value); else - seqdataform->last_value = seqform->seqstart; - seqdataform->is_called = false; - seqdataform->log_cnt = 0; + *last_value = seqform->seqstart; + *is_called = false; + *reset_state = true; } else if (isInit) { - seqdataform->last_value = seqform->seqstart; - seqdataform->is_called = false; + *last_value = seqform->seqstart; + *is_called = false; } /* crosscheck RESTART (or current value, if changing MIN/MAX) */ - if (seqdataform->last_value < seqform->seqmin) + if (*last_value < seqform->seqmin) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("RESTART value (%" PRId64 ") cannot be less than MINVALUE (%" PRId64 ")", - seqdataform->last_value, + *last_value, seqform->seqmin))); - if (seqdataform->last_value > seqform->seqmax) + if (*last_value > seqform->seqmax) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("RESTART value (%" PRId64 ") cannot be greater than MAXVALUE (%" PRId64 ")", - seqdataform->last_value, + *last_value, seqform->seqmax))); /* CACHE */ @@ -1573,7 +1590,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("CACHE (%" PRId64 ") must be greater than zero", seqform->seqcache))); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit) { From 5fa8ed8ca7a62f98e2c1e381112eaade9c511647 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 19 Jan 2024 15:00:14 +0900 Subject: [PATCH 2/7] Integrate addition of attributes for sequences with ALTER TABLE This is a process similar to CREATE OR REPLACE VIEW, where attributes are added to a sequence after the initial creation of its Relation. This gives more flexibility to sequence AMs, as these may want to force their own set of attributes to use when coupled with their computation methods and/or underlying table AM. --- src/backend/commands/sequence.c | 29 +++++++++++++++++-- src/backend/commands/tablecmds.c | 10 +++++++ src/backend/tcop/utility.c | 4 +++ src/include/nodes/parsenodes.h | 1 + .../test_ddl_deparse/expected/alter_table.out | 10 +++++-- .../expected/create_sequence_1.out | 5 +++- .../expected/create_table.out | 15 ++++++++-- .../test_ddl_deparse/test_ddl_deparse.c | 3 ++ 8 files changed, 69 insertions(+), 8 deletions(-) diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 08744c3e9112..20e8978c9346 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -136,6 +136,9 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) TupleDesc tupDesc; Datum value[SEQ_COL_LASTCOL]; bool null[SEQ_COL_LASTCOL]; + List *elts = NIL; + List *atcmds = NIL; + ListCell *lc; Datum pgs_values[Natts_pg_sequence]; bool pgs_nulls[Natts_pg_sequence]; int i; @@ -174,7 +177,6 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) /* * Create relation (and fill value[] and null[] for the tuple) */ - stmt->tableElts = NIL; for (i = SEQ_COL_FIRSTCOL; i <= SEQ_COL_LASTCOL; i++) { ColumnDef *coldef = NULL; @@ -198,7 +200,7 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) coldef->is_not_null = true; null[i - 1] = false; - stmt->tableElts = lappend(stmt->tableElts, coldef); + elts = lappend(elts, coldef); } stmt->relation = seq->sequence; @@ -208,12 +210,35 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) stmt->oncommit = ONCOMMIT_NOOP; stmt->tablespacename = NULL; stmt->if_not_exists = seq->if_not_exists; + /* + * Initial relation has no attributes, these are added later. + */ + stmt->tableElts = NIL; address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL); seqoid = address.objectId; Assert(seqoid != InvalidOid); rel = sequence_open(seqoid, AccessExclusiveLock); + + /* Add all the attributes to the sequence */ + foreach(lc, elts) + { + AlterTableCmd *atcmd; + + atcmd = makeNode(AlterTableCmd); + atcmd->subtype = AT_AddColumnToSequence; + atcmd->def = (Node *) lfirst(lc); + atcmds = lappend(atcmds, atcmd); + } + + /* + * No recursion needed. Note that EventTriggerAlterTableStart() should + * have been called. + */ + AlterTableInternal(RelationGetRelid(rel), atcmds, false); + CommandCounterIncrement(); + tupDesc = RelationGetDescr(rel); /* now initialize the sequence's data */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 2705cf11330d..dc4d459653a8 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -4646,6 +4646,7 @@ AlterTableGetLockLevel(List *cmds) * Subcommands that may be visible to concurrent SELECTs */ case AT_DropColumn: /* change visible to SELECT */ + case AT_AddColumnToSequence: /* CREATE SEQUENCE */ case AT_AddColumnToView: /* CREATE VIEW */ case AT_DropOids: /* used to equiv to DropColumn */ case AT_EnableAlwaysRule: /* may change SELECT rules */ @@ -4941,6 +4942,13 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd, /* Recursion occurs during execution phase */ pass = AT_PASS_ADD_COL; break; + case AT_AddColumnToSequence: /* add column via CREATE SEQUENCE */ + ATSimplePermissions(cmd->subtype, rel, ATT_SEQUENCE); + ATPrepAddColumn(wqueue, rel, recurse, recursing, false, cmd, + lockmode, context); + /* Recursion occurs during execution phase */ + pass = AT_PASS_ADD_COL; + break; case AT_AddColumnToView: /* add column via CREATE OR REPLACE VIEW */ ATSimplePermissions(cmd->subtype, rel, ATT_VIEW); ATPrepAddColumn(wqueue, rel, recurse, recursing, true, cmd, @@ -5373,6 +5381,7 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, switch (cmd->subtype) { case AT_AddColumn: /* ADD COLUMN */ + case AT_AddColumnToSequence: /* add column via CREATE SEQUENCE */ case AT_AddColumnToView: /* add column via CREATE OR REPLACE VIEW */ address = ATExecAddColumn(wqueue, tab, rel, &cmd, cmd->recurse, false, @@ -6588,6 +6597,7 @@ alter_table_type_to_string(AlterTableType cmdtype) switch (cmdtype) { case AT_AddColumn: + case AT_AddColumnToSequence: case AT_AddColumnToView: return "ADD COLUMN"; case AT_ColumnDefault: diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 25fe3d580166..0629dc5387c0 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1667,7 +1667,11 @@ ProcessUtilitySlow(ParseState *pstate, break; case T_CreateSeqStmt: + EventTriggerAlterTableStart(parsetree); address = DefineSequence(pstate, (CreateSeqStmt *) parsetree); + /* stashed internally */ + commandCollected = true; + EventTriggerAlterTableEnd(); break; case T_AlterSeqStmt: diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 4610fc61293b..5bdea762fc72 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2407,6 +2407,7 @@ typedef struct AlterTableStmt typedef enum AlterTableType { AT_AddColumn, /* add column */ + AT_AddColumnToSequence, /* implicitly via CREATE SEQUENCE */ AT_AddColumnToView, /* implicitly via CREATE OR REPLACE VIEW */ AT_ColumnDefault, /* alter column default */ AT_CookedColumnDefault, /* add a pre-cooked column default */ diff --git a/src/test/modules/test_ddl_deparse/expected/alter_table.out b/src/test/modules/test_ddl_deparse/expected/alter_table.out index 50d0354a3417..ed31059ef584 100644 --- a/src/test/modules/test_ddl_deparse/expected/alter_table.out +++ b/src/test/modules/test_ddl_deparse/expected/alter_table.out @@ -25,7 +25,10 @@ NOTICE: DDL test: type simple, tag CREATE TABLE CREATE TABLE grandchild () INHERITS (child); NOTICE: DDL test: type simple, tag CREATE TABLE ALTER TABLE parent ADD COLUMN b serial; -NOTICE: DDL test: type simple, tag CREATE SEQUENCE +NOTICE: DDL test: type alter table, tag CREATE SEQUENCE +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column last_value of sequence parent_b_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column log_cnt of sequence parent_b_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column is_called of sequence parent_b_seq NOTICE: DDL test: type alter table, tag ALTER TABLE NOTICE: subcommand: type ADD COLUMN (and recurse) desc column b of table parent NOTICE: subcommand: type ADD CONSTRAINT (and recurse) desc constraint parent_b_not_null on table parent @@ -70,7 +73,10 @@ ALTER TABLE parent ALTER COLUMN a SET NOT NULL; NOTICE: DDL test: type alter table, tag ALTER TABLE NOTICE: subcommand: type SET NOT NULL (and recurse) desc constraint parent_a_not_null on table parent ALTER TABLE parent ALTER COLUMN a ADD GENERATED ALWAYS AS IDENTITY; -NOTICE: DDL test: type simple, tag CREATE SEQUENCE +NOTICE: DDL test: type alter table, tag CREATE SEQUENCE +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column last_value of sequence parent_a_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column log_cnt of sequence parent_a_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column is_called of sequence parent_a_seq NOTICE: DDL test: type simple, tag ALTER SEQUENCE NOTICE: DDL test: type alter table, tag ALTER TABLE NOTICE: subcommand: type ADD IDENTITY (and recurse) desc column a of table parent diff --git a/src/test/modules/test_ddl_deparse/expected/create_sequence_1.out b/src/test/modules/test_ddl_deparse/expected/create_sequence_1.out index 5837ea484e40..310ce5a6baf5 100644 --- a/src/test/modules/test_ddl_deparse/expected/create_sequence_1.out +++ b/src/test/modules/test_ddl_deparse/expected/create_sequence_1.out @@ -8,4 +8,7 @@ CREATE SEQUENCE fkey_table_seq START 10 CACHE 10 CYCLE; -NOTICE: DDL test: type simple, tag CREATE SEQUENCE +NOTICE: DDL test: type alter table, tag CREATE SEQUENCE +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column last_value of sequence fkey_table_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column log_cnt of sequence fkey_table_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column is_called of sequence fkey_table_seq diff --git a/src/test/modules/test_ddl_deparse/expected/create_table.out b/src/test/modules/test_ddl_deparse/expected/create_table.out index 14915f661a89..527c67995a94 100644 --- a/src/test/modules/test_ddl_deparse/expected/create_table.out +++ b/src/test/modules/test_ddl_deparse/expected/create_table.out @@ -50,9 +50,18 @@ CREATE TABLE datatype_table ( PRIMARY KEY (id), UNIQUE (id_big) ); -NOTICE: DDL test: type simple, tag CREATE SEQUENCE -NOTICE: DDL test: type simple, tag CREATE SEQUENCE -NOTICE: DDL test: type simple, tag CREATE SEQUENCE +NOTICE: DDL test: type alter table, tag CREATE SEQUENCE +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column last_value of sequence datatype_table_id_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column log_cnt of sequence datatype_table_id_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column is_called of sequence datatype_table_id_seq +NOTICE: DDL test: type alter table, tag CREATE SEQUENCE +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column last_value of sequence datatype_table_id_big_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column log_cnt of sequence datatype_table_id_big_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column is_called of sequence datatype_table_id_big_seq +NOTICE: DDL test: type alter table, tag CREATE SEQUENCE +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column last_value of sequence datatype_table_is_small_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column log_cnt of sequence datatype_table_is_small_seq +NOTICE: subcommand: type ADD COLUMN TO SEQUENCE desc column is_called of sequence datatype_table_is_small_seq NOTICE: DDL test: type simple, tag CREATE TABLE NOTICE: DDL test: type simple, tag CREATE INDEX NOTICE: DDL test: type simple, tag CREATE INDEX diff --git a/src/test/modules/test_ddl_deparse/test_ddl_deparse.c b/src/test/modules/test_ddl_deparse/test_ddl_deparse.c index 193669f2bc1e..254fdf90c79a 100644 --- a/src/test/modules/test_ddl_deparse/test_ddl_deparse.c +++ b/src/test/modules/test_ddl_deparse/test_ddl_deparse.c @@ -113,6 +113,9 @@ get_altertable_subcmdinfo(PG_FUNCTION_ARGS) case AT_AddColumn: strtype = "ADD COLUMN"; break; + case AT_AddColumnToSequence: + strtype = "ADD COLUMN TO SEQUENCE"; + break; case AT_AddColumnToView: strtype = "ADD COLUMN TO VIEW"; break; From f6c267e40c26a4ca270f29a71d50b0b5068442d3 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 30 Apr 2025 08:26:43 +0900 Subject: [PATCH 3/7] Refactor code for in-core "local" sequences This commit restructures the code of in-core sequences into a new set of files: - seqdesc.c is renamed to seqlocaldesc.c. - seqlocalxlog.c to the code in the WAL replay logic. - seqlocalam.c to store a set of routines called from sequence.c, finishing the separation between the main sequence logic and the in-core sequences. - seqlocalam.h to store the AM-specific structures and routine related to the in-core sequences. WAL records are renamed to "SequenceLocal" with structures, variables and file structures mapping to that. --- src/backend/access/rmgrdesc/Makefile | 2 +- src/backend/access/rmgrdesc/meson.build | 2 +- .../rmgrdesc/{seqdesc.c => seqlocaldesc.c} | 20 +- src/backend/access/sequence/Makefile | 2 +- src/backend/access/sequence/meson.build | 2 + src/backend/access/sequence/seqlocalam.c | 634 ++++++++++++++++++ src/backend/access/sequence/seqlocalxlog.c | 82 +++ src/backend/access/transam/rmgr.c | 1 + src/backend/commands/sequence.c | 634 +----------------- src/bin/pg_waldump/.gitignore | 2 +- src/bin/pg_waldump/rmgrdesc.c | 1 + src/bin/pg_waldump/t/001_basic.pl | 2 +- src/include/access/rmgrlist.h | 2 +- src/include/access/seqlocalam.h | 58 ++ 14 files changed, 822 insertions(+), 622 deletions(-) rename src/backend/access/rmgrdesc/{seqdesc.c => seqlocaldesc.c} (64%) create mode 100644 src/backend/access/sequence/seqlocalam.c create mode 100644 src/backend/access/sequence/seqlocalxlog.c create mode 100644 src/include/access/seqlocalam.h diff --git a/src/backend/access/rmgrdesc/Makefile b/src/backend/access/rmgrdesc/Makefile index cd95eec37f14..e5900ed77af5 100644 --- a/src/backend/access/rmgrdesc/Makefile +++ b/src/backend/access/rmgrdesc/Makefile @@ -24,7 +24,7 @@ OBJS = \ relmapdesc.o \ replorigindesc.o \ rmgrdesc_utils.o \ - seqdesc.o \ + seqlocaldesc.o \ smgrdesc.o \ spgdesc.o \ standbydesc.o \ diff --git a/src/backend/access/rmgrdesc/meson.build b/src/backend/access/rmgrdesc/meson.build index 96c98e800c22..6b3b3e1e44c1 100644 --- a/src/backend/access/rmgrdesc/meson.build +++ b/src/backend/access/rmgrdesc/meson.build @@ -17,7 +17,7 @@ rmgr_desc_sources = files( 'relmapdesc.c', 'replorigindesc.c', 'rmgrdesc_utils.c', - 'seqdesc.c', + 'seqlocaldesc.c', 'smgrdesc.c', 'spgdesc.c', 'standbydesc.c', diff --git a/src/backend/access/rmgrdesc/seqdesc.c b/src/backend/access/rmgrdesc/seqlocaldesc.c similarity index 64% rename from src/backend/access/rmgrdesc/seqdesc.c rename to src/backend/access/rmgrdesc/seqlocaldesc.c index 0d289d77fcf7..6ffbcb2c4735 100644 --- a/src/backend/access/rmgrdesc/seqdesc.c +++ b/src/backend/access/rmgrdesc/seqlocaldesc.c @@ -1,44 +1,44 @@ /*------------------------------------------------------------------------- * - * seqdesc.c - * rmgr descriptor routines for commands/sequence.c + * seqlocaldesc.c + * rmgr descriptor routines for sequence/seqlocal.c * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * src/backend/access/rmgrdesc/seqdesc.c + * src/backend/access/rmgrdesc/seqlocaldesc.c * *------------------------------------------------------------------------- */ #include "postgres.h" -#include "commands/sequence.h" +#include "access/seqlocalam.h" void -seq_desc(StringInfo buf, XLogReaderState *record) +seq_local_desc(StringInfo buf, XLogReaderState *record) { char *rec = XLogRecGetData(record); uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; - xl_seq_rec *xlrec = (xl_seq_rec *) rec; + xl_seq_local_rec *xlrec = (xl_seq_local_rec *) rec; - if (info == XLOG_SEQ_LOG) + if (info == XLOG_SEQ_LOCAL_LOG) appendStringInfo(buf, "rel %u/%u/%u", xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.relNumber); } const char * -seq_identify(uint8 info) +seq_local_identify(uint8 info) { const char *id = NULL; switch (info & ~XLR_INFO_MASK) { - case XLOG_SEQ_LOG: - id = "LOG"; + case XLOG_SEQ_LOCAL_LOG: + id = "SEQ_LOCAL_LOG"; break; } diff --git a/src/backend/access/sequence/Makefile b/src/backend/access/sequence/Makefile index 9f9d31f5425a..a15ceec1c0a0 100644 --- a/src/backend/access/sequence/Makefile +++ b/src/backend/access/sequence/Makefile @@ -12,6 +12,6 @@ subdir = src/backend/access/sequence top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = sequence.o +OBJS = seqlocalam.o seqlocalxlog.o sequence.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/sequence/meson.build b/src/backend/access/sequence/meson.build index ec9ab9b7e9db..8bc0e95e68c0 100644 --- a/src/backend/access/sequence/meson.build +++ b/src/backend/access/sequence/meson.build @@ -1,5 +1,7 @@ # Copyright (c) 2022-2025, PostgreSQL Global Development Group backend_sources += files( + 'seqlocalam.c', + 'seqlocalxlog.c', 'sequence.c', ) diff --git a/src/backend/access/sequence/seqlocalam.c b/src/backend/access/sequence/seqlocalam.c new file mode 100644 index 000000000000..e019a6f5a95d --- /dev/null +++ b/src/backend/access/sequence/seqlocalam.c @@ -0,0 +1,634 @@ +/*------------------------------------------------------------------------- + * + * seqlocalam.c + * Local sequence access manager + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/access/sequence/seqlocalam.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/multixact.h" +#include "access/seqlocalam.h" +#include "access/xact.h" +#include "access/xloginsert.h" +#include "access/xlogutils.h" +#include "catalog/storage_xlog.h" +#include "commands/tablecmds.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" + + +/* + * We don't want to log each fetching of a value from a sequence, + * so we pre-log a few fetches in advance. In the event of + * crash we can lose (skip over) as many values as we pre-logged. + */ +#define SEQ_LOCAL_LOG_VALS 32 + +/* Format of tuples stored in heap table associated to local sequences */ +typedef struct FormData_pg_seq_local_data +{ + int64 last_value; + int64 log_cnt; + bool is_called; +} FormData_pg_seq_local_data; + +typedef FormData_pg_seq_local_data *Form_pg_seq_local_data; + +/* + * Columns of a local sequence relation + */ +#define SEQ_LOCAL_COL_LASTVAL 1 +#define SEQ_LOCAL_COL_LOG 2 +#define SEQ_LOCAL_COL_CALLED 3 + +#define SEQ_LOCAL_COL_FIRSTCOL SEQ_LOCAL_COL_LASTVAL +#define SEQ_LOCAL_COL_LASTCOL SEQ_LOCAL_COL_CALLED + + +/* + * We don't want to log each fetching of a value from a sequence, + * so we pre-log a few fetches in advance. In the event of + * crash we can lose (skip over) as many values as we pre-logged. + */ +#define SEQ_LOCAL_LOG_VALS 32 + +static Form_pg_seq_local_data read_seq_tuple(Relation rel, + Buffer *buf, + HeapTuple seqdatatuple); +static void fill_seq_with_data(Relation rel, HeapTuple tuple); +static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple, + ForkNumber forkNum); + +/* + * Given an opened sequence relation, lock the page buffer and find the tuple + * + * *buf receives the reference to the pinned-and-ex-locked buffer + * *seqdatatuple receives the reference to the sequence tuple proper + * (this arg should point to a local variable of type HeapTupleData) + * + * Function's return value points to the data payload of the tuple + */ +static Form_pg_seq_local_data +read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) +{ + Page page; + ItemId lp; + seq_local_magic *sm; + Form_pg_seq_local_data seq; + + *buf = ReadBuffer(rel, 0); + LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(*buf); + sm = (seq_local_magic *) PageGetSpecialPointer(page); + + if (sm->magic != SEQ_LOCAL_MAGIC) + elog(ERROR, "bad magic number in sequence \"%s\": %08X", + RelationGetRelationName(rel), sm->magic); + + lp = PageGetItemId(page, FirstOffsetNumber); + Assert(ItemIdIsNormal(lp)); + + /* Note we currently only bother to set these two fields of *seqdatatuple */ + seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); + seqdatatuple->t_len = ItemIdGetLength(lp); + + /* + * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on + * a sequence, which would leave a non-frozen XID in the sequence tuple's + * xmax, which eventually leads to clog access failures or worse. If we + * see this has happened, clean up after it. We treat this like a hint + * bit update, ie, don't bother to WAL-log it, since we can certainly do + * this again if the update gets lost. + */ + Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); + if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) + { + HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); + seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; + seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; + MarkBufferDirtyHint(*buf, true); + } + + seq = (Form_pg_seq_local_data) GETSTRUCT(seqdatatuple); + + return seq; +} + +/* + * Initialize a sequence's relation with the specified tuple as content + * + * This handles unlogged sequences by writing to both the main and the init + * fork as necessary. + */ +static void +fill_seq_with_data(Relation rel, HeapTuple tuple) +{ + fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM); + + if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) + { + SMgrRelation srel; + + srel = smgropen(rel->rd_locator, INVALID_PROC_NUMBER); + smgrcreate(srel, INIT_FORKNUM, false); + log_smgrcreate(&rel->rd_locator, INIT_FORKNUM); + fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM); + FlushRelationBuffers(rel); + smgrclose(srel); + } +} + +/* + * Initialize a sequence's relation fork with the specified tuple as content + */ +static void +fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum) +{ + Buffer buf; + Page page; + seq_local_magic *sm; + OffsetNumber offnum; + + /* Initialize first page of relation with special magic number */ + + buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL, + EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); + Assert(BufferGetBlockNumber(buf) == 0); + + page = BufferGetPage(buf); + + PageInit(page, BufferGetPageSize(buf), sizeof(seq_local_magic)); + sm = (seq_local_magic *) PageGetSpecialPointer(page); + sm->magic = SEQ_LOCAL_MAGIC; + + /* Now insert sequence tuple */ + + /* + * Since VACUUM does not process sequences, we have to force the tuple to + * have xmin = FrozenTransactionId now. Otherwise it would become + * invisible to SELECTs after 2G transactions. It is okay to do this + * because if the current transaction aborts, no other xact will ever + * examine the sequence tuple anyway. + */ + HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); + HeapTupleHeaderSetXminFrozen(tuple->t_data); + HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); + HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); + tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; + ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); + + /* check the comment above nextval_internal()'s equivalent call. */ + if (RelationNeedsWAL(rel)) + GetTopTransactionId(); + + START_CRIT_SECTION(); + + MarkBufferDirty(buf); + + offnum = PageAddItem(page, (Item) tuple->t_data, tuple->t_len, + InvalidOffsetNumber, false, false); + if (offnum != FirstOffsetNumber) + elog(ERROR, "failed to add sequence tuple to page"); + + /* XLOG stuff */ + if (RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM) + { + xl_seq_local_rec xlrec; + XLogRecPtr recptr; + + XLogBeginInsert(); + XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); + + xlrec.locator = rel->rd_locator; + + XLogRegisterData(&xlrec, sizeof(xl_seq_local_rec)); + XLogRegisterData(tuple->t_data, tuple->t_len); + + recptr = XLogInsert(RM_SEQ_LOCAL_ID, XLOG_SEQ_LOCAL_LOG); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); +} + +/* + * seq_local_nextval() + * + * Allocate a new value for a local sequence, based on the sequence + * configuration. + */ +int64 +seq_local_nextval(Relation rel, int64 incby, int64 maxv, + int64 minv, int64 cache, bool cycle, + int64 *last) +{ + int64 result; + int64 fetch; + int64 next; + int64 rescnt = 0; + int64 log; + Buffer buf; + HeapTupleData seqdatatuple; + Form_pg_seq_local_data seq; + Page page; + bool logit = false; + + /* lock page buffer and read tuple */ + seq = read_seq_tuple(rel, &buf, &seqdatatuple); + page = BufferGetPage(buf); + + *last = next = result = seq->last_value; + fetch = cache; + log = seq->log_cnt; + + if (!seq->is_called) + { + rescnt++; /* return last_value if not is_called */ + fetch--; + } + + /* + * Decide whether we should emit a WAL log record. If so, force up the + * fetch count to grab SEQ_LOCAL_LOG_VALS more values than we actually + * need to cache. (These will then be usable without logging.) + * + * If this is the first nextval after a checkpoint, we must force a new + * WAL record to be written anyway, else replay starting from the + * checkpoint would fail to advance the sequence past the logged values. + * In this case we may as well fetch extra values. + */ + if (log < fetch || !seq->is_called) + { + /* forced log to satisfy local demand for values */ + fetch = log = fetch + SEQ_LOCAL_LOG_VALS; + logit = true; + } + else + { + XLogRecPtr redoptr = GetRedoRecPtr(); + + if (PageGetLSN(page) <= redoptr) + { + /* last update of seq was before checkpoint */ + fetch = log = fetch + SEQ_LOCAL_LOG_VALS; + logit = true; + } + } + + while (fetch) /* try to fetch cache [+ log ] numbers */ + { + /* + * Check MAXVALUE for ascending sequences and MINVALUE for descending + * sequences + */ + if (incby > 0) + { + /* ascending sequence */ + if ((maxv >= 0 && next > maxv - incby) || + (maxv < 0 && next + incby > maxv)) + { + if (rescnt > 0) + break; /* stop fetching */ + if (!cycle) + ereport(ERROR, + (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), + errmsg("nextval: reached maximum value of sequence \"%s\" (%" PRId64 ")", + RelationGetRelationName(rel), + maxv))); + next = minv; + } + else + next += incby; + } + else + { + /* descending sequence */ + if ((minv < 0 && next < minv - incby) || + (minv >= 0 && next + incby < minv)) + { + if (rescnt > 0) + break; /* stop fetching */ + if (!cycle) + ereport(ERROR, + (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), + errmsg("nextval: reached minimum value of sequence \"%s\" (%" PRId64 ")", + RelationGetRelationName(rel), + minv))); + next = maxv; + } + else + next += incby; + } + fetch--; + if (rescnt < cache) + { + log--; + rescnt++; + *last = next; + if (rescnt == 1) /* if it's first result - */ + result = next; /* it's what to return */ + } + } + + log -= fetch; /* adjust for any unfetched numbers */ + Assert(log >= 0); + + /* + * If something needs to be WAL logged, acquire an xid, so this + * transaction's commit will trigger a WAL flush and wait for syncrep. + * It's sufficient to ensure the toplevel transaction has an xid, no need + * to assign xids subxacts, that'll already trigger an appropriate wait. + * (Have to do that here, so we're outside the critical section) + */ + if (logit && RelationNeedsWAL(rel)) + GetTopTransactionId(); + + /* ready to change the on-disk (or really, in-buffer) tuple */ + START_CRIT_SECTION(); + + /* + * We must mark the buffer dirty before doing XLogInsert(); see notes in + * SyncOneBuffer(). However, we don't apply the desired changes just yet. + * This looks like a violation of the buffer update protocol, but it is in + * fact safe because we hold exclusive lock on the buffer. Any other + * process, including a checkpoint, that tries to examine the buffer + * contents will block until we release the lock, and then will see the + * final state that we install below. + */ + MarkBufferDirty(buf); + + /* XLOG stuff */ + if (logit && RelationNeedsWAL(rel)) + { + xl_seq_local_rec xlrec; + XLogRecPtr recptr; + + /* + * We don't log the current state of the tuple, but rather the state + * as it would appear after "log" more fetches. This lets us skip + * that many future WAL records, at the cost that we lose those + * sequence values if we crash. + */ + XLogBeginInsert(); + XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); + + /* set values that will be saved in xlog */ + seq->last_value = next; + seq->is_called = true; + seq->log_cnt = 0; + + xlrec.locator = rel->rd_locator; + + XLogRegisterData(&xlrec, sizeof(xl_seq_local_rec)); + XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len); + + recptr = XLogInsert(RM_SEQ_LOCAL_ID, XLOG_SEQ_LOCAL_LOG); + + PageSetLSN(page, recptr); + } + + /* Now update sequence tuple to the intended final state */ + seq->last_value = *last; /* last fetched number */ + seq->is_called = true; + seq->log_cnt = log; /* how much is logged */ + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); + + return result; +} + +/* + * seq_local_get_table_am() + * + * Return the table access method used by this sequence. + */ +const char * +seq_local_get_table_am(void) +{ + return "heap"; +} + +/* + * seq_local_init() + * + * Add the sequence attributes to the relation created for this sequence + * AM and insert a tuple of metadata into the sequence relation, based on + * the information guessed from pg_sequences. This is the first tuple + * inserted after the relation has been created, filling in its heap + * table. + */ +void +seq_local_init(Relation rel, int64 last_value, bool is_called) +{ + Datum value[SEQ_LOCAL_COL_LASTCOL]; + bool null[SEQ_LOCAL_COL_LASTCOL]; + List *elts = NIL; + List *atcmds = NIL; + ListCell *lc; + TupleDesc tupdesc; + HeapTuple tuple; + + /* + * Create relation (and fill value[] and null[] for the initial tuple). + */ + for (int i = SEQ_LOCAL_COL_FIRSTCOL; i <= SEQ_LOCAL_COL_LASTCOL; i++) + { + ColumnDef *coldef = NULL; + + switch (i) + { + case SEQ_LOCAL_COL_LASTVAL: + coldef = makeColumnDef("last_value", INT8OID, -1, InvalidOid); + value[i - 1] = Int64GetDatumFast(last_value); + break; + case SEQ_LOCAL_COL_LOG: + coldef = makeColumnDef("log_cnt", INT8OID, -1, InvalidOid); + value[i - 1] = Int64GetDatum(0); + break; + case SEQ_LOCAL_COL_CALLED: + coldef = makeColumnDef("is_called", BOOLOID, -1, InvalidOid); + value[i - 1] = BoolGetDatum(is_called); + break; + } + + coldef->is_not_null = true; + null[i - 1] = false; + elts = lappend(elts, coldef); + } + + /* Add all the attributes to the sequence */ + foreach(lc, elts) + { + AlterTableCmd *atcmd; + + atcmd = makeNode(AlterTableCmd); + atcmd->subtype = AT_AddColumnToSequence; + atcmd->def = (Node *) lfirst(lc); + atcmds = lappend(atcmds, atcmd); + } + + /* + * No recursion needed. Note that EventTriggerAlterTableStart() should + * have been called. + */ + AlterTableInternal(RelationGetRelid(rel), atcmds, false); + CommandCounterIncrement(); + + tupdesc = RelationGetDescr(rel); + tuple = heap_form_tuple(tupdesc, value, null); + fill_seq_with_data(rel, tuple); +} + +/* + * seq_local_setval() + * + * Callback for setval(). + */ +void +seq_local_setval(Relation rel, int64 next, bool iscalled) +{ + Buffer buf; + HeapTupleData seqdatatuple; + Form_pg_seq_local_data seq; + + /* lock page buffer and read tuple */ + seq = read_seq_tuple(rel, &buf, &seqdatatuple); + + /* ready to change the on-disk (or really, in-buffer) tuple */ + START_CRIT_SECTION(); + seq->last_value = next; /* last fetched number */ + seq->is_called = iscalled; + seq->log_cnt = 0; + + MarkBufferDirty(buf); + + /* XLOG stuff */ + if (RelationNeedsWAL(rel)) + { + xl_seq_local_rec xlrec; + XLogRecPtr recptr; + Page page = BufferGetPage(buf); + + XLogBeginInsert(); + XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); + + xlrec.locator = rel->rd_locator; + XLogRegisterData(&xlrec, sizeof(xl_seq_local_rec)); + XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len); + + recptr = XLogInsert(RM_SEQ_LOCAL_ID, XLOG_SEQ_LOCAL_LOG); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); +} + +/* + * seq_local_reset() + * + * Perform a hard reset on the local sequence, rewriting its heap data + * entirely. + */ +void +seq_local_reset(Relation rel, int64 startv, bool is_called, bool reset_state) +{ + Form_pg_seq_local_data seq; + Buffer buf; + HeapTupleData seqdatatuple; + HeapTuple tuple; + + /* lock buffer page and read tuple */ + (void) read_seq_tuple(rel, &buf, &seqdatatuple); + + /* + * Copy the existing sequence tuple. + */ + tuple = heap_copytuple(&seqdatatuple); + + /* Now we're done with the old page */ + UnlockReleaseBuffer(buf); + + /* + * Modify the copied tuple to execute the restart (compare the RESTART + * action in AlterSequence) + */ + seq = (Form_pg_seq_local_data) GETSTRUCT(tuple); + seq->last_value = startv; + seq->is_called = is_called; + if (reset_state) + seq->log_cnt = 0; + + /* + * Create a new storage file for the sequence. + */ + RelationSetNewRelfilenumber(rel, rel->rd_rel->relpersistence); + + /* + * Ensure sequence's relfrozenxid is at 0, since it won't contain any + * unfrozen XIDs. Same with relminmxid, since a sequence will never + * contain multixacts. + */ + Assert(rel->rd_rel->relfrozenxid == InvalidTransactionId); + Assert(rel->rd_rel->relminmxid == InvalidMultiXactId); + + /* + * Insert the modified tuple into the new storage file. + */ + fill_seq_with_data(rel, tuple); +} + +/* + * seq_local_get_state() + * + * Retrieve the state of a local sequence. + */ +void +seq_local_get_state(Relation rel, int64 *last_value, bool *is_called) +{ + Buffer buf; + HeapTupleData seqdatatuple; + Form_pg_seq_local_data seq; + + /* lock page buffer and read tuple */ + seq = read_seq_tuple(rel, &buf, &seqdatatuple); + + *last_value = seq->last_value; + *is_called = seq->is_called; + + UnlockReleaseBuffer(buf); +} + +/* + * seq_local_change_persistence() + * + * Persistence change for the local sequence Relation. + */ +void +seq_local_change_persistence(Relation rel, char newrelpersistence) +{ + Buffer buf; + HeapTupleData seqdatatuple; + + (void) read_seq_tuple(rel, &buf, &seqdatatuple); + RelationSetNewRelfilenumber(rel, newrelpersistence); + fill_seq_with_data(rel, &seqdatatuple); + UnlockReleaseBuffer(buf); +} diff --git a/src/backend/access/sequence/seqlocalxlog.c b/src/backend/access/sequence/seqlocalxlog.c new file mode 100644 index 000000000000..db0ad969dbf2 --- /dev/null +++ b/src/backend/access/sequence/seqlocalxlog.c @@ -0,0 +1,82 @@ +/*------------------------------------------------------------------------- + * + * seqlocalxlog.c + * WAL replay logic for local sequence access manager + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/access/sequence/seqlocalxlog.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/bufmask.h" +#include "access/seqlocalam.h" +#include "access/xlogutils.h" +#include "storage/block.h" + +/* + * Mask a Sequence page before performing consistency checks on it. + */ +void +seq_local_mask(char *page, BlockNumber blkno) +{ + mask_page_lsn_and_checksum(page); + + mask_unused_space(page); +} + +void +seq_local_redo(XLogReaderState *record) +{ + XLogRecPtr lsn = record->EndRecPtr; + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + Buffer buffer; + Page page; + Page localpage; + char *item; + Size itemsz; + xl_seq_local_rec *xlrec = (xl_seq_local_rec *) XLogRecGetData(record); + seq_local_magic *sm; + + if (info != XLOG_SEQ_LOCAL_LOG) + elog(PANIC, "seq_redo: unknown op code %u", info); + + buffer = XLogInitBufferForRedo(record, 0); + page = (Page) BufferGetPage(buffer); + + /* + * We always reinit the page. However, since this WAL record type is also + * used for updating sequences, it's possible that a hot-standby backend + * is examining the page concurrently; so we mustn't transiently trash the + * buffer. The solution is to build the correct new page contents in + * local workspace and then memcpy into the buffer. Then only bytes that + * are supposed to change will change, even transiently. We must palloc + * the local page for alignment reasons. + */ + localpage = (Page) palloc(BufferGetPageSize(buffer)); + + PageInit(localpage, BufferGetPageSize(buffer), sizeof(seq_local_magic)); + sm = (seq_local_magic *) PageGetSpecialPointer(localpage); + sm->magic = SEQ_LOCAL_MAGIC; + + item = (char *) xlrec + sizeof(xl_seq_local_rec); + itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_local_rec); + + if (PageAddItem(localpage, (Item) item, itemsz, + FirstOffsetNumber, false, false) == InvalidOffsetNumber) + elog(PANIC, "seq_local_redo: failed to add item to page"); + + PageSetLSN(localpage, lsn); + + memcpy(page, localpage, BufferGetPageSize(buffer)); + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); + + pfree(localpage); +} diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c index 1b7499726eb0..cc92268937b1 100644 --- a/src/backend/access/transam/rmgr.c +++ b/src/backend/access/transam/rmgr.c @@ -27,6 +27,7 @@ #include "access/gistxlog.h" #include "access/hash_xlog.h" #include "access/heapam_xlog.h" +#include "access/seqlocalam.h" #include "access/multixact.h" #include "access/nbtxlog.h" #include "access/spgxlog.h" diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 20e8978c9346..15ea0e24970c 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -16,6 +16,7 @@ #include "access/bufmask.h" #include "access/htup_details.h" +#include "access/seqlocalam.h" #include "access/multixact.h" #include "access/relation.h" #include "access/sequence.h" @@ -50,23 +51,6 @@ #include "utils/varlena.h" -/* - * We don't want to log each fetching of a value from a sequence, - * so we pre-log a few fetches in advance. In the event of - * crash we can lose (skip over) as many values as we pre-logged. - */ -#define SEQ_LOG_VALS 32 - -/* - * The "special area" of a sequence's buffer page looks like this. - */ -#define SEQ_MAGIC 0x1717 - -typedef struct sequence_magic -{ - uint32 magic; -} sequence_magic; - /* * We store a SeqTable item for every sequence we have touched in the current * session. This is needed to hold onto nextval/currval state. (We can't @@ -96,13 +80,9 @@ static HTAB *seqhashtab = NULL; /* hash table for SeqTable items */ */ static SeqTableData *last_used_seq = NULL; -static void fill_seq_with_data(Relation rel, HeapTuple tuple); -static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum); static Relation lock_and_open_sequence(SeqTable seq); static void create_seq_hashtable(void); static void init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel); -static Form_pg_sequence_data read_seq_tuple(Relation rel, - Buffer *buf, HeapTuple seqdatatuple); static void init_params(ParseState *pstate, List *options, bool for_identity, bool isInit, Form_pg_sequence seqform, @@ -134,14 +114,8 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) Relation rel; HeapTuple tuple; TupleDesc tupDesc; - Datum value[SEQ_COL_LASTCOL]; - bool null[SEQ_COL_LASTCOL]; - List *elts = NIL; - List *atcmds = NIL; - ListCell *lc; Datum pgs_values[Natts_pg_sequence]; bool pgs_nulls[Natts_pg_sequence]; - int i; /* * If if_not_exists was given and a relation with the same name already @@ -174,35 +148,6 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) &seqform, &last_value, &reset_state, &is_called, &need_seq_rewrite, &owned_by); - /* - * Create relation (and fill value[] and null[] for the tuple) - */ - for (i = SEQ_COL_FIRSTCOL; i <= SEQ_COL_LASTCOL; i++) - { - ColumnDef *coldef = NULL; - - switch (i) - { - case SEQ_COL_LASTVAL: - coldef = makeColumnDef("last_value", INT8OID, -1, InvalidOid); - value[i - 1] = Int64GetDatumFast(last_value); - break; - case SEQ_COL_LOG: - coldef = makeColumnDef("log_cnt", INT8OID, -1, InvalidOid); - value[i - 1] = Int64GetDatum((int64) 0); - break; - case SEQ_COL_CALLED: - coldef = makeColumnDef("is_called", BOOLOID, -1, InvalidOid); - value[i - 1] = BoolGetDatum(false); - break; - } - - coldef->is_not_null = true; - null[i - 1] = false; - - elts = lappend(elts, coldef); - } - stmt->relation = seq->sequence; stmt->inhRelations = NIL; stmt->constraints = NIL; @@ -215,35 +160,20 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) */ stmt->tableElts = NIL; + /* + * Initial relation has no attributes, these can be added later via the + * "init" AM callback. + */ + stmt->tableElts = NIL; + address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL); seqoid = address.objectId; Assert(seqoid != InvalidOid); rel = sequence_open(seqoid, AccessExclusiveLock); - /* Add all the attributes to the sequence */ - foreach(lc, elts) - { - AlterTableCmd *atcmd; - - atcmd = makeNode(AlterTableCmd); - atcmd->subtype = AT_AddColumnToSequence; - atcmd->def = (Node *) lfirst(lc); - atcmds = lappend(atcmds, atcmd); - } - - /* - * No recursion needed. Note that EventTriggerAlterTableStart() should - * have been called. - */ - AlterTableInternal(RelationGetRelid(rel), atcmds, false); - CommandCounterIncrement(); - - tupDesc = RelationGetDescr(rel); - - /* now initialize the sequence's data */ - tuple = heap_form_tuple(tupDesc, value, null); - fill_seq_with_data(rel, tuple); + /* now initialize the sequence table structure and its data */ + seq_local_init(rel, last_value, is_called); /* process OWNED BY if given */ if (owned_by) @@ -292,10 +222,6 @@ ResetSequence(Oid seq_relid) { Relation seq_rel; SeqTable elm; - Form_pg_sequence_data seq; - Buffer buf; - HeapTupleData seqdatatuple; - HeapTuple tuple; HeapTuple pgstuple; Form_pg_sequence pgsform; int64 startv; @@ -306,7 +232,6 @@ ResetSequence(Oid seq_relid) * indeed a sequence. */ init_sequence(seq_relid, &elm, &seq_rel); - (void) read_seq_tuple(seq_rel, &buf, &seqdatatuple); pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(seq_relid)); if (!HeapTupleIsValid(pgstuple)) @@ -315,40 +240,8 @@ ResetSequence(Oid seq_relid) startv = pgsform->seqstart; ReleaseSysCache(pgstuple); - /* - * Copy the existing sequence tuple. - */ - tuple = heap_copytuple(&seqdatatuple); - - /* Now we're done with the old page */ - UnlockReleaseBuffer(buf); - - /* - * Modify the copied tuple to execute the restart (compare the RESTART - * action in AlterSequence) - */ - seq = (Form_pg_sequence_data) GETSTRUCT(tuple); - seq->last_value = startv; - seq->is_called = false; - seq->log_cnt = 0; - - /* - * Create a new storage file for the sequence. - */ - RelationSetNewRelfilenumber(seq_rel, seq_rel->rd_rel->relpersistence); - - /* - * Ensure sequence's relfrozenxid is at 0, since it won't contain any - * unfrozen XIDs. Same with relminmxid, since a sequence will never - * contain multixacts. - */ - Assert(seq_rel->rd_rel->relfrozenxid == InvalidTransactionId); - Assert(seq_rel->rd_rel->relminmxid == InvalidMultiXactId); - - /* - * Insert the modified tuple into the new storage file. - */ - fill_seq_with_data(seq_rel, tuple); + /* Sequence state is forcibly reset here. */ + seq_local_reset(seq_rel, startv, false, true); /* Clear local cache so that we don't think we have cached numbers */ /* Note that we do not change the currval() state */ @@ -357,106 +250,6 @@ ResetSequence(Oid seq_relid) sequence_close(seq_rel, NoLock); } -/* - * Initialize a sequence's relation with the specified tuple as content - * - * This handles unlogged sequences by writing to both the main and the init - * fork as necessary. - */ -static void -fill_seq_with_data(Relation rel, HeapTuple tuple) -{ - fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM); - - if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) - { - SMgrRelation srel; - - srel = smgropen(rel->rd_locator, INVALID_PROC_NUMBER); - smgrcreate(srel, INIT_FORKNUM, false); - log_smgrcreate(&rel->rd_locator, INIT_FORKNUM); - fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM); - FlushRelationBuffers(rel); - smgrclose(srel); - } -} - -/* - * Initialize a sequence's relation fork with the specified tuple as content - */ -static void -fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum) -{ - Buffer buf; - Page page; - sequence_magic *sm; - OffsetNumber offnum; - - /* Initialize first page of relation with special magic number */ - - buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL, - EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); - Assert(BufferGetBlockNumber(buf) == 0); - - page = BufferGetPage(buf); - - PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic)); - sm = (sequence_magic *) PageGetSpecialPointer(page); - sm->magic = SEQ_MAGIC; - - /* Now insert sequence tuple */ - - /* - * Since VACUUM does not process sequences, we have to force the tuple to - * have xmin = FrozenTransactionId now. Otherwise it would become - * invisible to SELECTs after 2G transactions. It is okay to do this - * because if the current transaction aborts, no other xact will ever - * examine the sequence tuple anyway. - */ - HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); - HeapTupleHeaderSetXminFrozen(tuple->t_data); - HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); - HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); - tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; - ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); - - /* check the comment above nextval_internal()'s equivalent call. */ - if (RelationNeedsWAL(rel)) - GetTopTransactionId(); - - START_CRIT_SECTION(); - - MarkBufferDirty(buf); - - offnum = PageAddItem(page, (Item) tuple->t_data, tuple->t_len, - InvalidOffsetNumber, false, false); - if (offnum != FirstOffsetNumber) - elog(ERROR, "failed to add sequence tuple to page"); - - /* XLOG stuff */ - if (RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM) - { - xl_seq_rec xlrec; - XLogRecPtr recptr; - - XLogBeginInsert(); - XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); - - xlrec.locator = rel->rd_locator; - - XLogRegisterData(&xlrec, sizeof(xl_seq_rec)); - XLogRegisterData(tuple->t_data, tuple->t_len); - - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); - - PageSetLSN(page, recptr); - } - - END_CRIT_SECTION(); - - UnlockReleaseBuffer(buf); -} - /* * AlterSequence * @@ -468,10 +261,7 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) Oid relid; SeqTable elm; Relation seqrel; - Buffer buf; - HeapTupleData datatuple; Form_pg_sequence seqform; - Form_pg_sequence_data newdataform; bool need_seq_rewrite; List *owned_by; ObjectAddress address; @@ -480,7 +270,6 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) bool reset_state = false; bool is_called; int64 last_value; - HeapTuple newdatatuple; /* Open and lock sequence, and check for ownership along the way. */ relid = RangeVarGetRelidExtended(stmt->sequence, @@ -507,16 +296,8 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) seqform = (Form_pg_sequence) GETSTRUCT(seqtuple); - /* lock page buffer and read tuple into new sequence structure */ - (void) read_seq_tuple(seqrel, &buf, &datatuple); - - /* copy the existing sequence data tuple, so it can be modified locally */ - newdatatuple = heap_copytuple(&datatuple); - newdataform = (Form_pg_sequence_data) GETSTRUCT(newdatatuple); - last_value = newdataform->last_value; - is_called = newdataform->is_called; - - UnlockReleaseBuffer(buf); + /* Read sequence data */ + seq_local_get_state(seqrel, &last_value, &is_called); /* Check and set new values */ init_params(pstate, stmt->options, stmt->for_identity, false, @@ -526,32 +307,10 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) /* If needed, rewrite the sequence relation itself */ if (need_seq_rewrite) { - /* check the comment above nextval_internal()'s equivalent call. */ if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); - /* - * Create a new storage file for the sequence, making the state - * changes transactional. - */ - RelationSetNewRelfilenumber(seqrel, seqrel->rd_rel->relpersistence); - - /* - * Ensure sequence's relfrozenxid is at 0, since it won't contain any - * unfrozen XIDs. Same with relminmxid, since a sequence will never - * contain multixacts. - */ - Assert(seqrel->rd_rel->relfrozenxid == InvalidTransactionId); - Assert(seqrel->rd_rel->relminmxid == InvalidMultiXactId); - - /* - * Insert the modified tuple into the new storage file. - */ - newdataform->last_value = last_value; - newdataform->is_called = is_called; - if (reset_state) - newdataform->log_cnt = 0; - fill_seq_with_data(seqrel, newdatatuple); + seq_local_reset(seqrel, last_value, is_called, reset_state); } /* Clear local cache so that we don't think we have cached numbers */ @@ -580,8 +339,6 @@ SequenceChangePersistence(Oid relid, char newrelpersistence) { SeqTable elm; Relation seqrel; - Buffer buf; - HeapTupleData seqdatatuple; /* * ALTER SEQUENCE acquires this lock earlier. If we're processing an @@ -596,10 +353,7 @@ SequenceChangePersistence(Oid relid, char newrelpersistence) if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); - (void) read_seq_tuple(seqrel, &buf, &seqdatatuple); - RelationSetNewRelfilenumber(seqrel, newrelpersistence); - fill_seq_with_data(seqrel, &seqdatatuple); - UnlockReleaseBuffer(buf); + seq_local_change_persistence(seqrel, newrelpersistence); sequence_close(seqrel, NoLock); } @@ -662,24 +416,15 @@ nextval_internal(Oid relid, bool check_permissions) { SeqTable elm; Relation seqrel; - Buffer buf; - Page page; HeapTuple pgstuple; Form_pg_sequence pgsform; - HeapTupleData seqdatatuple; - Form_pg_sequence_data seq; int64 incby, maxv, minv, cache, - log, - fetch, last; - int64 result, - next, - rescnt = 0; + int64 result; bool cycle; - bool logit = false; /* open and lock sequence */ init_sequence(relid, &elm, &seqrel); @@ -724,105 +469,9 @@ nextval_internal(Oid relid, bool check_permissions) cycle = pgsform->seqcycle; ReleaseSysCache(pgstuple); - /* lock page buffer and read tuple */ - seq = read_seq_tuple(seqrel, &buf, &seqdatatuple); - page = BufferGetPage(buf); - - last = next = result = seq->last_value; - fetch = cache; - log = seq->log_cnt; - - if (!seq->is_called) - { - rescnt++; /* return last_value if not is_called */ - fetch--; - } - - /* - * Decide whether we should emit a WAL log record. If so, force up the - * fetch count to grab SEQ_LOG_VALS more values than we actually need to - * cache. (These will then be usable without logging.) - * - * If this is the first nextval after a checkpoint, we must force a new - * WAL record to be written anyway, else replay starting from the - * checkpoint would fail to advance the sequence past the logged values. - * In this case we may as well fetch extra values. - */ - if (log < fetch || !seq->is_called) - { - /* forced log to satisfy local demand for values */ - fetch = log = fetch + SEQ_LOG_VALS; - logit = true; - } - else - { - XLogRecPtr redoptr = GetRedoRecPtr(); - - if (PageGetLSN(page) <= redoptr) - { - /* last update of seq was before checkpoint */ - fetch = log = fetch + SEQ_LOG_VALS; - logit = true; - } - } - - while (fetch) /* try to fetch cache [+ log ] numbers */ - { - /* - * Check MAXVALUE for ascending sequences and MINVALUE for descending - * sequences - */ - if (incby > 0) - { - /* ascending sequence */ - if ((maxv >= 0 && next > maxv - incby) || - (maxv < 0 && next + incby > maxv)) - { - if (rescnt > 0) - break; /* stop fetching */ - if (!cycle) - ereport(ERROR, - (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), - errmsg("nextval: reached maximum value of sequence \"%s\" (%" PRId64 ")", - RelationGetRelationName(seqrel), - maxv))); - next = minv; - } - else - next += incby; - } - else - { - /* descending sequence */ - if ((minv < 0 && next < minv - incby) || - (minv >= 0 && next + incby < minv)) - { - if (rescnt > 0) - break; /* stop fetching */ - if (!cycle) - ereport(ERROR, - (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), - errmsg("nextval: reached minimum value of sequence \"%s\" (%" PRId64 ")", - RelationGetRelationName(seqrel), - minv))); - next = maxv; - } - else - next += incby; - } - fetch--; - if (rescnt < cache) - { - log--; - rescnt++; - last = next; - if (rescnt == 1) /* if it's first result - */ - result = next; /* it's what to return */ - } - } - - log -= fetch; /* adjust for any unfetched numbers */ - Assert(log >= 0); + /* retrieve next value from the access method */ + result = seq_local_nextval(seqrel, incby, maxv, minv, cache, cycle, + &last); /* save info in local cache */ elm->increment = incby; @@ -832,69 +481,6 @@ nextval_internal(Oid relid, bool check_permissions) last_used_seq = elm; - /* - * If something needs to be WAL logged, acquire an xid, so this - * transaction's commit will trigger a WAL flush and wait for syncrep. - * It's sufficient to ensure the toplevel transaction has an xid, no need - * to assign xids subxacts, that'll already trigger an appropriate wait. - * (Have to do that here, so we're outside the critical section) - */ - if (logit && RelationNeedsWAL(seqrel)) - GetTopTransactionId(); - - /* ready to change the on-disk (or really, in-buffer) tuple */ - START_CRIT_SECTION(); - - /* - * We must mark the buffer dirty before doing XLogInsert(); see notes in - * SyncOneBuffer(). However, we don't apply the desired changes just yet. - * This looks like a violation of the buffer update protocol, but it is in - * fact safe because we hold exclusive lock on the buffer. Any other - * process, including a checkpoint, that tries to examine the buffer - * contents will block until we release the lock, and then will see the - * final state that we install below. - */ - MarkBufferDirty(buf); - - /* XLOG stuff */ - if (logit && RelationNeedsWAL(seqrel)) - { - xl_seq_rec xlrec; - XLogRecPtr recptr; - - /* - * We don't log the current state of the tuple, but rather the state - * as it would appear after "log" more fetches. This lets us skip - * that many future WAL records, at the cost that we lose those - * sequence values if we crash. - */ - XLogBeginInsert(); - XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); - - /* set values that will be saved in xlog */ - seq->last_value = next; - seq->is_called = true; - seq->log_cnt = 0; - - xlrec.locator = seqrel->rd_locator; - - XLogRegisterData(&xlrec, sizeof(xl_seq_rec)); - XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len); - - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); - - PageSetLSN(page, recptr); - } - - /* Now update sequence tuple to the intended final state */ - seq->last_value = last; /* last fetched number */ - seq->is_called = true; - seq->log_cnt = log; /* how much is logged */ - - END_CRIT_SECTION(); - - UnlockReleaseBuffer(buf); - sequence_close(seqrel, NoLock); return result; @@ -984,9 +570,6 @@ do_setval(Oid relid, int64 next, bool iscalled) { SeqTable elm; Relation seqrel; - Buffer buf; - HeapTupleData seqdatatuple; - Form_pg_sequence_data seq; HeapTuple pgstuple; Form_pg_sequence pgsform; int64 maxv, @@ -1020,9 +603,6 @@ do_setval(Oid relid, int64 next, bool iscalled) */ PreventCommandIfParallelMode("setval()"); - /* lock page buffer and read tuple */ - seq = read_seq_tuple(seqrel, &buf, &seqdatatuple); - if ((next < minv) || (next > maxv)) ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), @@ -1044,37 +624,8 @@ do_setval(Oid relid, int64 next, bool iscalled) if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); - /* ready to change the on-disk (or really, in-buffer) tuple */ - START_CRIT_SECTION(); - - seq->last_value = next; /* last fetched number */ - seq->is_called = iscalled; - seq->log_cnt = 0; - - MarkBufferDirty(buf); - - /* XLOG stuff */ - if (RelationNeedsWAL(seqrel)) - { - xl_seq_rec xlrec; - XLogRecPtr recptr; - Page page = BufferGetPage(buf); - - XLogBeginInsert(); - XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); - - xlrec.locator = seqrel->rd_locator; - XLogRegisterData(&xlrec, sizeof(xl_seq_rec)); - XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len); - - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); - - PageSetLSN(page, recptr); - } - - END_CRIT_SECTION(); - - UnlockReleaseBuffer(buf); + /* Call the access method callback */ + seq_local_setval(seqrel, next, iscalled); sequence_close(seqrel, NoLock); } @@ -1215,62 +766,6 @@ init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel) } -/* - * Given an opened sequence relation, lock the page buffer and find the tuple - * - * *buf receives the reference to the pinned-and-ex-locked buffer - * *seqdatatuple receives the reference to the sequence tuple proper - * (this arg should point to a local variable of type HeapTupleData) - * - * Function's return value points to the data payload of the tuple - */ -static Form_pg_sequence_data -read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) -{ - Page page; - ItemId lp; - sequence_magic *sm; - Form_pg_sequence_data seq; - - *buf = ReadBuffer(rel, 0); - LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); - - page = BufferGetPage(*buf); - sm = (sequence_magic *) PageGetSpecialPointer(page); - - if (sm->magic != SEQ_MAGIC) - elog(ERROR, "bad magic number in sequence \"%s\": %08X", - RelationGetRelationName(rel), sm->magic); - - lp = PageGetItemId(page, FirstOffsetNumber); - Assert(ItemIdIsNormal(lp)); - - /* Note we currently only bother to set these two fields of *seqdatatuple */ - seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); - seqdatatuple->t_len = ItemIdGetLength(lp); - - /* - * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on - * a sequence, which would leave a non-frozen XID in the sequence tuple's - * xmax, which eventually leads to clog access failures or worse. If we - * see this has happened, clean up after it. We treat this like a hint - * bit update, ie, don't bother to WAL-log it, since we can certainly do - * this again if the update gets lost. - */ - Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); - if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) - { - HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); - seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; - seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; - MarkBufferDirtyHint(*buf, true); - } - - seq = (Form_pg_sequence_data) GETSTRUCT(seqdatatuple); - - return seq; -} - /* * init_params: process the options list of CREATE or ALTER SEQUENCE, and * store the values into appropriate fields of seqform, for changes that go @@ -1855,16 +1350,13 @@ pg_get_sequence_data(PG_FUNCTION_ARGS) !RELATION_IS_OTHER_TEMP(seqrel) && (RelationIsPermanent(seqrel) || !RecoveryInProgress())) { - Buffer buf; - HeapTupleData seqtuple; - Form_pg_sequence_data seq; - - seq = read_seq_tuple(seqrel, &buf, &seqtuple); + bool is_called; + int64 last_value; - values[0] = Int64GetDatum(seq->last_value); - values[1] = BoolGetDatum(seq->is_called); + seq_local_get_state(seqrel, &last_value, &is_called); - UnlockReleaseBuffer(buf); + values[0] = Int64GetDatum(last_value); + values[1] = BoolGetDatum(is_called); } else memset(isnull, true, sizeof(isnull)); @@ -1908,17 +1400,9 @@ pg_sequence_last_value(PG_FUNCTION_ARGS) !RELATION_IS_OTHER_TEMP(seqrel) && (RelationIsPermanent(seqrel) || !RecoveryInProgress())) { - Buffer buf; - HeapTupleData seqtuple; - Form_pg_sequence_data seq; - - seq = read_seq_tuple(seqrel, &buf, &seqtuple); - - is_called = seq->is_called; - result = seq->last_value; - - UnlockReleaseBuffer(buf); + seq_local_get_state(seqrel, &result, &is_called); } + sequence_close(seqrel, NoLock); if (is_called) @@ -1927,57 +1411,6 @@ pg_sequence_last_value(PG_FUNCTION_ARGS) PG_RETURN_NULL(); } - -void -seq_redo(XLogReaderState *record) -{ - XLogRecPtr lsn = record->EndRecPtr; - uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; - Buffer buffer; - Page page; - Page localpage; - char *item; - Size itemsz; - xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record); - sequence_magic *sm; - - if (info != XLOG_SEQ_LOG) - elog(PANIC, "seq_redo: unknown op code %u", info); - - buffer = XLogInitBufferForRedo(record, 0); - page = (Page) BufferGetPage(buffer); - - /* - * We always reinit the page. However, since this WAL record type is also - * used for updating sequences, it's possible that a hot-standby backend - * is examining the page concurrently; so we mustn't transiently trash the - * buffer. The solution is to build the correct new page contents in - * local workspace and then memcpy into the buffer. Then only bytes that - * are supposed to change will change, even transiently. We must palloc - * the local page for alignment reasons. - */ - localpage = (Page) palloc(BufferGetPageSize(buffer)); - - PageInit(localpage, BufferGetPageSize(buffer), sizeof(sequence_magic)); - sm = (sequence_magic *) PageGetSpecialPointer(localpage); - sm->magic = SEQ_MAGIC; - - item = (char *) xlrec + sizeof(xl_seq_rec); - itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_rec); - - if (PageAddItem(localpage, (Item) item, itemsz, - FirstOffsetNumber, false, false) == InvalidOffsetNumber) - elog(PANIC, "seq_redo: failed to add item to page"); - - PageSetLSN(localpage, lsn); - - memcpy(page, localpage, BufferGetPageSize(buffer)); - MarkBufferDirty(buffer); - UnlockReleaseBuffer(buffer); - - pfree(localpage); -} - /* * Flush cached sequence information. */ @@ -1992,14 +1425,3 @@ ResetSequenceCaches(void) last_used_seq = NULL; } - -/* - * Mask a Sequence page before performing consistency checks on it. - */ -void -seq_mask(char *page, BlockNumber blkno) -{ - mask_page_lsn_and_checksum(page); - - mask_unused_space(page); -} diff --git a/src/bin/pg_waldump/.gitignore b/src/bin/pg_waldump/.gitignore index ec51f41c767e..8d1195de2637 100644 --- a/src/bin/pg_waldump/.gitignore +++ b/src/bin/pg_waldump/.gitignore @@ -10,13 +10,13 @@ /gistdesc.c /hashdesc.c /heapdesc.c +/seqlocaldesc.c /logicalmsgdesc.c /mxactdesc.c /nbtdesc.c /relmapdesc.c /replorigindesc.c /rmgrdesc_utils.c -/seqdesc.c /smgrdesc.c /spgdesc.c /standbydesc.c diff --git a/src/bin/pg_waldump/rmgrdesc.c b/src/bin/pg_waldump/rmgrdesc.c index fac509ed134e..2fcf9fc4392a 100644 --- a/src/bin/pg_waldump/rmgrdesc.c +++ b/src/bin/pg_waldump/rmgrdesc.c @@ -16,6 +16,7 @@ #include "access/gistxlog.h" #include "access/hash_xlog.h" #include "access/heapam_xlog.h" +#include "access/seqlocalam.h" #include "access/multixact.h" #include "access/nbtxlog.h" #include "access/rmgr.h" diff --git a/src/bin/pg_waldump/t/001_basic.pl b/src/bin/pg_waldump/t/001_basic.pl index f26d75e01cfd..2495a28d26a1 100644 --- a/src/bin/pg_waldump/t/001_basic.pl +++ b/src/bin/pg_waldump/t/001_basic.pl @@ -67,7 +67,7 @@ Hash Gin Gist -Sequence +SequenceLocal SPGist BRIN CommitTs diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h index 8e7fc9db8778..b942b25350bc 100644 --- a/src/include/access/rmgrlist.h +++ b/src/include/access/rmgrlist.h @@ -40,7 +40,7 @@ PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_identify, btree_xlog PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, hash_identify, NULL, NULL, hash_mask, NULL) PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_identify, gin_xlog_startup, gin_xlog_cleanup, gin_mask, NULL) PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_identify, gist_xlog_startup, gist_xlog_cleanup, gist_mask, NULL) -PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, seq_identify, NULL, NULL, seq_mask, NULL) +PG_RMGR(RM_SEQ_LOCAL_ID, "SequenceLocal", seq_local_redo, seq_local_desc, seq_local_identify, NULL, NULL, seq_local_mask, NULL) PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_identify, spg_xlog_startup, spg_xlog_cleanup, spg_mask, NULL) PG_RMGR(RM_BRIN_ID, "BRIN", brin_redo, brin_desc, brin_identify, NULL, NULL, brin_mask, NULL) PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_identify, NULL, NULL, NULL, NULL) diff --git a/src/include/access/seqlocalam.h b/src/include/access/seqlocalam.h new file mode 100644 index 000000000000..225fb9a2cbeb --- /dev/null +++ b/src/include/access/seqlocalam.h @@ -0,0 +1,58 @@ +/*------------------------------------------------------------------------- + * + * seqlocalam.h + * Local sequence access method. + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/seqlocalam.h + * + *------------------------------------------------------------------------- + */ +#ifndef SEQLOCALAM_H +#define SEQLOCALAM_H + +#include "access/xlogreader.h" +#include "storage/relfilelocator.h" +#include "utils/rel.h" + +/* XLOG stuff */ +#define XLOG_SEQ_LOCAL_LOG 0x00 + +typedef struct xl_seq_local_rec +{ + RelFileLocator locator; + /* SEQUENCE TUPLE DATA FOLLOWS AT THE END */ +} xl_seq_local_rec; + +/* + * The "special area" of a local sequence's buffer page looks like this. + */ +#define SEQ_LOCAL_MAGIC 0x1717 + +typedef struct seq_local_magic +{ + uint32 magic; +} seq_local_magic; + +extern void seq_local_redo(XLogReaderState *record); +extern void seq_local_desc(StringInfo buf, XLogReaderState *record); +extern const char *seq_local_identify(uint8 info); +extern void seq_local_mask(char *page, BlockNumber blkno); + +/* access routines */ +extern int64 seq_local_nextval(Relation rel, int64 incby, int64 maxv, + int64 minv, int64 cache, bool cycle, + int64 *last); +extern const char *seq_local_get_table_am(void); +extern void seq_local_init(Relation rel, int64 last_value, bool is_called); +extern void seq_local_setval(Relation rel, int64 next, bool iscalled); +extern void seq_local_reset(Relation rel, int64 startv, bool is_called, + bool reset_state); +extern void seq_local_get_state(Relation rel, int64 *last_value, + bool *is_called); +extern void seq_local_change_persistence(Relation rel, + char newrelpersistence); + +#endif /* SEQLOCALAM_H */ From 50509bcc9a7481a366e7302de9f26740d02edac1 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 4 Dec 2024 08:09:46 +0900 Subject: [PATCH 4/7] Sequence access methods - backend support The "seqlocal" sequence AM is now plugged in as a handler in the relcache, and a set of callbacks in sequenceam.h. --- src/backend/access/sequence/Makefile | 2 +- src/backend/access/sequence/meson.build | 1 + src/backend/access/sequence/seqlocalam.c | 41 +++- src/backend/access/sequence/sequence.c | 3 +- src/backend/access/sequence/sequenceamapi.c | 145 ++++++++++++++ src/backend/catalog/heap.c | 6 +- src/backend/commands/amcmds.c | 16 ++ src/backend/commands/sequence.c | 23 +-- src/backend/commands/tablecmds.c | 17 +- src/backend/nodes/Makefile | 1 + src/backend/nodes/gen_node_support.pl | 2 + src/backend/parser/gram.y | 12 +- src/backend/parser/parse_utilcmd.c | 2 + src/backend/utils/adt/pseudotypes.c | 1 + src/backend/utils/cache/relcache.c | 91 +++++++-- src/backend/utils/misc/guc_tables.c | 12 ++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/bin/psql/describe.c | 2 + src/bin/psql/tab-complete.in.c | 4 +- src/include/access/seqlocalam.h | 15 -- src/include/access/sequenceam.h | 181 ++++++++++++++++++ src/include/catalog/pg_am.dat | 3 + src/include/catalog/pg_am.h | 1 + src/include/catalog/pg_class.h | 6 + src/include/catalog/pg_proc.dat | 13 ++ src/include/catalog/pg_type.dat | 6 + src/include/commands/defrem.h | 1 + src/include/commands/sequence.h | 34 ---- src/include/nodes/meson.build | 1 + src/include/nodes/parsenodes.h | 1 + src/include/utils/guc_hooks.h | 2 + src/include/utils/rel.h | 5 + src/test/regress/expected/create_am.out | 55 ++++-- src/test/regress/expected/opr_sanity.out | 12 ++ src/test/regress/expected/psql.out | 96 +++++----- src/test/regress/expected/type_sanity.out | 12 +- src/test/regress/sql/create_am.sql | 24 ++- src/test/regress/sql/opr_sanity.sql | 10 + src/test/regress/sql/type_sanity.sql | 12 +- src/tools/pgindent/typedefs.list | 5 +- 40 files changed, 695 insertions(+), 182 deletions(-) create mode 100644 src/backend/access/sequence/sequenceamapi.c create mode 100644 src/include/access/sequenceam.h diff --git a/src/backend/access/sequence/Makefile b/src/backend/access/sequence/Makefile index a15ceec1c0a0..62006165a15f 100644 --- a/src/backend/access/sequence/Makefile +++ b/src/backend/access/sequence/Makefile @@ -12,6 +12,6 @@ subdir = src/backend/access/sequence top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = seqlocalam.o seqlocalxlog.o sequence.o +OBJS = seqlocalam.o seqlocalxlog.o sequence.o sequenceamapi.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/sequence/meson.build b/src/backend/access/sequence/meson.build index 8bc0e95e68c0..d82af34d538c 100644 --- a/src/backend/access/sequence/meson.build +++ b/src/backend/access/sequence/meson.build @@ -4,4 +4,5 @@ backend_sources += files( 'seqlocalam.c', 'seqlocalxlog.c', 'sequence.c', + 'sequenceamapi.c', ) diff --git a/src/backend/access/sequence/seqlocalam.c b/src/backend/access/sequence/seqlocalam.c index e019a6f5a95d..5dec9d51ec82 100644 --- a/src/backend/access/sequence/seqlocalam.c +++ b/src/backend/access/sequence/seqlocalam.c @@ -17,6 +17,7 @@ #include "access/multixact.h" #include "access/seqlocalam.h" +#include "access/sequenceam.h" #include "access/xact.h" #include "access/xloginsert.h" #include "access/xlogutils.h" @@ -24,6 +25,7 @@ #include "commands/tablecmds.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "utils/builtins.h" /* @@ -230,10 +232,10 @@ fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum) * Allocate a new value for a local sequence, based on the sequence * configuration. */ -int64 +static int64 seq_local_nextval(Relation rel, int64 incby, int64 maxv, - int64 minv, int64 cache, bool cycle, - int64 *last) + int64 minv, int64 cache, bool cycle, + int64 *last) { int64 result; int64 fetch; @@ -417,7 +419,7 @@ seq_local_nextval(Relation rel, int64 incby, int64 maxv, * * Return the table access method used by this sequence. */ -const char * +static const char * seq_local_get_table_am(void) { return "heap"; @@ -432,7 +434,7 @@ seq_local_get_table_am(void) * inserted after the relation has been created, filling in its heap * table. */ -void +static void seq_local_init(Relation rel, int64 last_value, bool is_called) { Datum value[SEQ_LOCAL_COL_LASTCOL]; @@ -499,7 +501,7 @@ seq_local_init(Relation rel, int64 last_value, bool is_called) * * Callback for setval(). */ -void +static void seq_local_setval(Relation rel, int64 next, bool iscalled) { Buffer buf; @@ -547,7 +549,7 @@ seq_local_setval(Relation rel, int64 next, bool iscalled) * Perform a hard reset on the local sequence, rewriting its heap data * entirely. */ -void +static void seq_local_reset(Relation rel, int64 startv, bool is_called, bool reset_state) { Form_pg_seq_local_data seq; @@ -600,7 +602,7 @@ seq_local_reset(Relation rel, int64 startv, bool is_called, bool reset_state) * * Retrieve the state of a local sequence. */ -void +static void seq_local_get_state(Relation rel, int64 *last_value, bool *is_called) { Buffer buf; @@ -621,7 +623,7 @@ seq_local_get_state(Relation rel, int64 *last_value, bool *is_called) * * Persistence change for the local sequence Relation. */ -void +static void seq_local_change_persistence(Relation rel, char newrelpersistence) { Buffer buf; @@ -632,3 +634,24 @@ seq_local_change_persistence(Relation rel, char newrelpersistence) fill_seq_with_data(rel, &seqdatatuple); UnlockReleaseBuffer(buf); } + +/* ------------------------------------------------------------------------ + * Definition of the local sequence access method. + * ------------------------------------------------------------------------ + */ +static const SequenceAmRoutine seq_local_methods = { + .type = T_SequenceAmRoutine, + .get_table_am = seq_local_get_table_am, + .init = seq_local_init, + .nextval = seq_local_nextval, + .setval = seq_local_setval, + .reset = seq_local_reset, + .get_state = seq_local_get_state, + .change_persistence = seq_local_change_persistence +}; + +Datum +seq_local_sequenceam_handler(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(&seq_local_methods); +} diff --git a/src/backend/access/sequence/sequence.c b/src/backend/access/sequence/sequence.c index 8b5303553702..f30317d1feae 100644 --- a/src/backend/access/sequence/sequence.c +++ b/src/backend/access/sequence/sequence.c @@ -13,7 +13,8 @@ * * NOTES * This file contains sequence_ routines that implement access to sequences - * (in contrast to other relation types like indexes). + * (in contrast to other relation types like indexes) that are independent + * of individual sequence access methods. * *------------------------------------------------------------------------- */ diff --git a/src/backend/access/sequence/sequenceamapi.c b/src/backend/access/sequence/sequenceamapi.c new file mode 100644 index 000000000000..dd1a60d827a4 --- /dev/null +++ b/src/backend/access/sequence/sequenceamapi.c @@ -0,0 +1,145 @@ +/*------------------------------------------------------------------------- + * + * sequenceamapi.c + * general sequence access method routines + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/access/sequence/sequenceamapi.c + * + * + * Sequence access method allows the SQL Standard Sequence objects to be + * managed according to either the default access method or a pluggable + * replacement. Each sequence can only use one access method at a time, + * though different sequence access methods can be in use by different + * sequences at the same time. + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/xact.h" +#include "access/sequenceam.h" +#include "catalog/pg_am.h" +#include "commands/defrem.h" +#include "miscadmin.h" +#include "utils/guc_hooks.h" +#include "utils/syscache.h" + + +/* GUC */ +char *default_sequence_access_method = DEFAULT_SEQUENCE_ACCESS_METHOD; + +/* + * GetSequenceAmRoutine + * Call the specified access method handler routine to get its + * SequenceAmRoutine struct, which will be palloc'd in the caller's + * memory context. + */ +const SequenceAmRoutine * +GetSequenceAmRoutine(Oid amhandler) +{ + Datum datum; + SequenceAmRoutine *routine; + + datum = OidFunctionCall0(amhandler); + routine = (SequenceAmRoutine *) DatumGetPointer(datum); + + if (routine == NULL || !IsA(routine, SequenceAmRoutine)) + elog(ERROR, "sequence access method handler %u did not return a SequenceAmRoutine struct", + amhandler); + + /* + * Assert that all required callbacks are present. That makes it a bit + * easier to keep AMs up to date, e.g. when forward porting them to a new + * major version. + */ + Assert(routine->get_table_am != NULL); + Assert(routine->init != NULL); + Assert(routine->nextval != NULL); + Assert(routine->setval != NULL); + Assert(routine->reset != NULL); + Assert(routine->get_state != NULL); + Assert(routine->change_persistence != NULL); + + return routine; +} + +/* + * GetSequenceAmRoutineId + * Call pg_am and retrieve the OID of the access method handler. + */ +Oid +GetSequenceAmRoutineId(Oid amoid) +{ + Oid amhandleroid; + HeapTuple tuple; + Form_pg_am aform; + + tuple = SearchSysCache1(AMOID, + ObjectIdGetDatum(amoid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for access method %u", amoid); + aform = (Form_pg_am) GETSTRUCT(tuple); + Assert(aform->amtype == AMTYPE_SEQUENCE); + amhandleroid = aform->amhandler; + ReleaseSysCache(tuple); + + return amhandleroid; +} + +/* check_hook: validate new default_sequence_access_method */ +bool +check_default_sequence_access_method(char **newval, void **extra, + GucSource source) +{ + if (**newval == '\0') + { + GUC_check_errdetail("%s cannot be empty.", + "default_sequence_access_method"); + return false; + } + + if (strlen(*newval) >= NAMEDATALEN) + { + GUC_check_errdetail("%s is too long (maximum %d characters).", + "default_sequence_access_method", NAMEDATALEN - 1); + return false; + } + + /* + * If we aren't inside a transaction, or not connected to a database, we + * cannot do the catalog access necessary to verify the method. Must + * accept the value on faith. + */ + if (IsTransactionState() && MyDatabaseId != InvalidOid) + { + if (!OidIsValid(get_sequence_am_oid(*newval, true))) + { + /* + * When source == PGC_S_TEST, don't throw a hard error for a + * nonexistent sequence access method, only a NOTICE. See comments + * in guc.h. + */ + if (source == PGC_S_TEST) + { + ereport(NOTICE, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("sequence access method \"%s\" does not exist", + *newval))); + } + else + { + GUC_check_errdetail("sequence access method \"%s\" does not exist.", + *newval); + return false; + } + } + } + + return true; +} diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index fbaed5359ad7..558e9a8f9695 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1480,9 +1480,13 @@ heap_create_with_catalog(const char *relname, * No need to add an explicit dependency for the toast table, as the * main table depends on it. Partitioned tables may not have an * access method set. + * + * Sequences and tables are created with their access method ID + * given by the caller of this function. */ if ((RELKIND_HAS_TABLE_AM(relkind) && relkind != RELKIND_TOASTVALUE) || - (relkind == RELKIND_PARTITIONED_TABLE && OidIsValid(accessmtd))) + (relkind == RELKIND_PARTITIONED_TABLE && OidIsValid(accessmtd)) || + RELKIND_HAS_SEQUENCE_AM(relkind)) { ObjectAddressSet(referenced, AccessMethodRelationId, accessmtd); add_exact_object_address(&referenced, addrs); diff --git a/src/backend/commands/amcmds.c b/src/backend/commands/amcmds.c index 58ed9d216cc0..e44633d13b60 100644 --- a/src/backend/commands/amcmds.c +++ b/src/backend/commands/amcmds.c @@ -15,6 +15,7 @@ #include "access/htup_details.h" #include "access/table.h" +#include "access/sequenceam.h" #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/indexing.h" @@ -175,6 +176,16 @@ get_table_am_oid(const char *amname, bool missing_ok) return get_am_type_oid(amname, AMTYPE_TABLE, missing_ok); } +/* + * get_sequence_am_oid - given an access method name, look up its OID + * and verify it corresponds to an sequence AM. + */ +Oid +get_sequence_am_oid(const char *amname, bool missing_ok) +{ + return get_am_type_oid(amname, AMTYPE_SEQUENCE, missing_ok); +} + /* * get_am_oid - given an access method name, look up its OID. * The type is not checked. @@ -215,6 +226,8 @@ get_am_type_string(char amtype) { case AMTYPE_INDEX: return "INDEX"; + case AMTYPE_SEQUENCE: + return "SEQUENCE"; case AMTYPE_TABLE: return "TABLE"; default: @@ -251,6 +264,9 @@ lookup_am_handler_func(List *handler_name, char amtype) case AMTYPE_INDEX: expectedType = INDEX_AM_HANDLEROID; break; + case AMTYPE_SEQUENCE: + expectedType = SEQUENCE_AM_HANDLEROID; + break; case AMTYPE_TABLE: expectedType = TABLE_AM_HANDLEROID; break; diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 15ea0e24970c..fc2c18df4dc3 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -16,10 +16,10 @@ #include "access/bufmask.h" #include "access/htup_details.h" -#include "access/seqlocalam.h" #include "access/multixact.h" #include "access/relation.h" #include "access/sequence.h" +#include "access/sequenceam.h" #include "access/table.h" #include "access/transam.h" #include "access/xact.h" @@ -152,6 +152,7 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) stmt->inhRelations = NIL; stmt->constraints = NIL; stmt->options = NIL; + stmt->accessMethod = seq->accessMethod ? pstrdup(seq->accessMethod) : NULL; stmt->oncommit = ONCOMMIT_NOOP; stmt->tablespacename = NULL; stmt->if_not_exists = seq->if_not_exists; @@ -173,7 +174,7 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) rel = sequence_open(seqoid, AccessExclusiveLock); /* now initialize the sequence table structure and its data */ - seq_local_init(rel, last_value, is_called); + sequence_init(rel, last_value, is_called); /* process OWNED BY if given */ if (owned_by) @@ -241,7 +242,7 @@ ResetSequence(Oid seq_relid) ReleaseSysCache(pgstuple); /* Sequence state is forcibly reset here. */ - seq_local_reset(seq_rel, startv, false, true); + sequence_reset(seq_rel, startv, false, true); /* Clear local cache so that we don't think we have cached numbers */ /* Note that we do not change the currval() state */ @@ -297,7 +298,7 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) seqform = (Form_pg_sequence) GETSTRUCT(seqtuple); /* Read sequence data */ - seq_local_get_state(seqrel, &last_value, &is_called); + sequence_get_state(seqrel, &last_value, &is_called); /* Check and set new values */ init_params(pstate, stmt->options, stmt->for_identity, false, @@ -310,7 +311,7 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); - seq_local_reset(seqrel, last_value, is_called, reset_state); + sequence_reset(seqrel, last_value, is_called, reset_state); } /* Clear local cache so that we don't think we have cached numbers */ @@ -353,7 +354,7 @@ SequenceChangePersistence(Oid relid, char newrelpersistence) if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); - seq_local_change_persistence(seqrel, newrelpersistence); + sequence_change_persistence(seqrel, newrelpersistence); sequence_close(seqrel, NoLock); } @@ -470,8 +471,8 @@ nextval_internal(Oid relid, bool check_permissions) ReleaseSysCache(pgstuple); /* retrieve next value from the access method */ - result = seq_local_nextval(seqrel, incby, maxv, minv, cache, cycle, - &last); + result = sequence_nextval(seqrel, incby, maxv, minv, cache, cycle, + &last); /* save info in local cache */ elm->increment = incby; @@ -625,7 +626,7 @@ do_setval(Oid relid, int64 next, bool iscalled) GetTopTransactionId(); /* Call the access method callback */ - seq_local_setval(seqrel, next, iscalled); + sequence_setval(seqrel, next, iscalled); sequence_close(seqrel, NoLock); } @@ -1353,7 +1354,7 @@ pg_get_sequence_data(PG_FUNCTION_ARGS) bool is_called; int64 last_value; - seq_local_get_state(seqrel, &last_value, &is_called); + sequence_get_state(seqrel, &last_value, &is_called); values[0] = Int64GetDatum(last_value); values[1] = BoolGetDatum(is_called); @@ -1400,7 +1401,7 @@ pg_sequence_last_value(PG_FUNCTION_ARGS) !RELATION_IS_OTHER_TEMP(seqrel) && (RelationIsPermanent(seqrel) || !RecoveryInProgress())) { - seq_local_get_state(seqrel, &result, &is_called); + sequence_get_state(seqrel, &result, &is_called); } sequence_close(seqrel, NoLock); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index dc4d459653a8..5fa8928119f9 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -23,6 +23,7 @@ #include "access/reloptions.h" #include "access/relscan.h" #include "access/sysattr.h" +#include "access/sequenceam.h" #include "access/tableam.h" #include "access/toast_compression.h" #include "access/xact.h" @@ -1025,14 +1026,18 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, } /* - * For relations with table AM and partitioned tables, select access - * method to use: an explicitly indicated one, or (in the case of a + * For relations with table AM, partitioned tables or sequences, select + * access method to use: an explicitly indicated one, or (in the case of a * partitioned table) the parent's, if it has one. */ if (stmt->accessMethod != NULL) { - Assert(RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_PARTITIONED_TABLE); - accessMethodId = get_table_am_oid(stmt->accessMethod, false); + Assert(RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_PARTITIONED_TABLE || + RELKIND_HAS_SEQUENCE_AM(relkind)); + if (RELKIND_HAS_SEQUENCE_AM(relkind)) + accessMethodId = get_sequence_am_oid(stmt->accessMethod, false); + else + accessMethodId = get_table_am_oid(stmt->accessMethod, false); } else if (RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_PARTITIONED_TABLE) { @@ -1045,6 +1050,10 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, if (RELKIND_HAS_TABLE_AM(relkind) && !OidIsValid(accessMethodId)) accessMethodId = get_table_am_oid(default_table_access_method, false); } + else if (RELKIND_HAS_SEQUENCE_AM(relkind)) + { + accessMethodId = get_sequence_am_oid(default_sequence_access_method, false); + } /* * Create the relation. Inherited defaults and CHECK constraints are diff --git a/src/backend/nodes/Makefile b/src/backend/nodes/Makefile index 77ddb9ca53f1..64d4dccc936f 100644 --- a/src/backend/nodes/Makefile +++ b/src/backend/nodes/Makefile @@ -48,6 +48,7 @@ node_headers = \ access/amapi.h \ access/cmptype.h \ access/sdir.h \ + access/sequenceam.h \ access/tableam.h \ access/tsmapi.h \ commands/event_trigger.h \ diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index 77659b0f7602..5e4ff23cf1e4 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -60,6 +60,7 @@ sub elem access/amapi.h access/cmptype.h access/sdir.h + access/sequenceam.h access/tableam.h access/tsmapi.h commands/event_trigger.h @@ -84,6 +85,7 @@ sub elem nodes/execnodes.h access/amapi.h access/sdir.h + access/sequenceam.h access/tableam.h access/tsmapi.h commands/event_trigger.h diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 3c4268b271a4..3a287c7f87a9 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -381,6 +381,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type copy_file_name access_method_clause attr_name table_access_method_clause name cursor_name file_name + sequence_access_method_clause cluster_index_specification %type func_name handler_name qual_Op qual_all_Op subquery_Op @@ -4941,23 +4942,26 @@ RefreshMatViewStmt: CreateSeqStmt: CREATE OptTemp SEQUENCE qualified_name OptSeqOptList + sequence_access_method_clause { CreateSeqStmt *n = makeNode(CreateSeqStmt); - $4->relpersistence = $2; n->sequence = $4; n->options = $5; + n->accessMethod = $6; n->ownerId = InvalidOid; n->if_not_exists = false; $$ = (Node *) n; } | CREATE OptTemp SEQUENCE IF_P NOT EXISTS qualified_name OptSeqOptList + sequence_access_method_clause { CreateSeqStmt *n = makeNode(CreateSeqStmt); $7->relpersistence = $2; n->sequence = $7; n->options = $8; + n->accessMethod = $9; n->ownerId = InvalidOid; n->if_not_exists = true; $$ = (Node *) n; @@ -4994,6 +4998,11 @@ OptParenthesizedSeqOptList: '(' SeqOptList ')' { $$ = $2; } | /*EMPTY*/ { $$ = NIL; } ; +sequence_access_method_clause: + USING name { $$ = $2; } + | /*EMPTY*/ { $$ = NULL; } + ; + SeqOptList: SeqOptElem { $$ = list_make1($1); } | SeqOptList SeqOptElem { $$ = lappend($1, $2); } ; @@ -5997,6 +6006,7 @@ CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type HANDLER handler_name am_type: INDEX { $$ = AMTYPE_INDEX; } + | SEQUENCE { $$ = AMTYPE_SEQUENCE; } | TABLE { $$ = AMTYPE_TABLE; } ; diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 62015431fdf1..9ba0febe63c4 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -26,6 +26,7 @@ #include "access/htup_details.h" #include "access/relation.h" #include "access/reloptions.h" +#include "access/sequenceam.h" #include "access/table.h" #include "access/toast_compression.h" #include "catalog/dependency.h" @@ -518,6 +519,7 @@ generateSerialExtraStmts(CreateStmtContext *cxt, ColumnDef *column, seqstmt->sequence = makeRangeVar(snamespace, sname, -1); seqstmt->sequence->relpersistence = seqpersistence; seqstmt->options = seqoptions; + seqstmt->accessMethod = NULL; /* * If a sequence data type was specified, add it to the options. Prepend diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c index 317a1f2b282f..68f160dda7d3 100644 --- a/src/backend/utils/adt/pseudotypes.c +++ b/src/backend/utils/adt/pseudotypes.c @@ -369,6 +369,7 @@ PSEUDOTYPE_DUMMY_IO_FUNCS(language_handler); PSEUDOTYPE_DUMMY_IO_FUNCS(fdw_handler); PSEUDOTYPE_DUMMY_IO_FUNCS(table_am_handler); PSEUDOTYPE_DUMMY_IO_FUNCS(index_am_handler); +PSEUDOTYPE_DUMMY_IO_FUNCS(sequence_am_handler); PSEUDOTYPE_DUMMY_IO_FUNCS(tsm_handler); PSEUDOTYPE_DUMMY_IO_FUNCS(internal); PSEUDOTYPE_DUMMY_IO_FUNCS(anyelement); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 68ff67de549a..2f6c0a12f3e3 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -34,6 +34,7 @@ #include "access/multixact.h" #include "access/parallel.h" #include "access/reloptions.h" +#include "access/sequenceam.h" #include "access/sysattr.h" #include "access/table.h" #include "access/tableam.h" @@ -64,6 +65,7 @@ #include "catalog/pg_type.h" #include "catalog/schemapg.h" #include "catalog/storage.h" +#include "commands/defrem.h" #include "commands/policy.h" #include "commands/publicationcmds.h" #include "commands/trigger.h" @@ -302,6 +304,7 @@ static void RelationParseRelOptions(Relation relation, HeapTuple tuple); static void RelationBuildTupleDesc(Relation relation); static Relation RelationBuildDesc(Oid targetRelId, bool insertIt); static void RelationInitPhysicalAddr(Relation relation); +static void RelationInitSequenceAccessMethod(Relation relation); static void load_critical_index(Oid indexoid, Oid heapoid); static TupleDesc GetPgClassDescriptor(void); static TupleDesc GetPgIndexDescriptor(void); @@ -1225,8 +1228,7 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) if (relation->rd_rel->relkind == RELKIND_INDEX || relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) RelationInitIndexAccessInfo(relation); - else if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind) || - relation->rd_rel->relkind == RELKIND_SEQUENCE) + else if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind)) RelationInitTableAccessMethod(relation); else if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { @@ -1235,6 +1237,8 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) * inherit. */ } + else if (RELKIND_HAS_SEQUENCE_AM(relation->rd_rel->relkind)) + RelationInitSequenceAccessMethod(relation); else Assert(relation->rd_rel->relam == InvalidOid); @@ -1831,17 +1835,9 @@ RelationInitTableAccessMethod(Relation relation) HeapTuple tuple; Form_pg_am aform; - if (relation->rd_rel->relkind == RELKIND_SEQUENCE) - { - /* - * Sequences are currently accessed like heap tables, but it doesn't - * seem prudent to show that in the catalog. So just overwrite it - * here. - */ - Assert(relation->rd_rel->relam == InvalidOid); - relation->rd_amhandler = F_HEAP_TABLEAM_HANDLER; - } - else if (IsCatalogRelation(relation)) + Assert(RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind)); + + if (IsCatalogRelation(relation)) { /* * Avoid doing a syscache lookup for catalog tables. @@ -1872,6 +1868,49 @@ RelationInitTableAccessMethod(Relation relation) InitTableAmRoutine(relation); } +/* + * Initialize sequence-access-method support data for a sequence relation + */ +static void +RelationInitSequenceAccessMethod(Relation relation) +{ + HeapTuple tuple; + Form_pg_am aform; + const char *tableam_name; + Oid tableam_oid; + Oid tableam_handler; + + Assert(RELKIND_HAS_SEQUENCE_AM(relation->rd_rel->relkind)); + + /* + * Look up the sequence access method, save the OID of its handler + * function. + */ + Assert(relation->rd_rel->relam != InvalidOid); + relation->rd_amhandler = GetSequenceAmRoutineId(relation->rd_rel->relam); + + /* + * Now we can fetch the sequence AM's API struct. + */ + relation->rd_sequenceam = GetSequenceAmRoutine(relation->rd_amhandler); + + /* + * From the sequence AM, set its expected table access method. + */ + tableam_name = sequence_get_table_am(relation); + tableam_oid = get_table_am_oid(tableam_name, false); + + tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(tableam_oid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for access method %u", + tableam_oid); + aform = (Form_pg_am) GETSTRUCT(tuple); + tableam_handler = aform->amhandler; + ReleaseSysCache(tuple); + + relation->rd_tableam = GetTableAmRoutine(tableam_handler); +} + /* * formrdesc * @@ -3711,14 +3750,17 @@ RelationBuildLocalRelation(const char *relname, rel->rd_rel->relam = accessmtd; /* - * RelationInitTableAccessMethod will do syscache lookups, so we mustn't - * run it in CacheMemoryContext. Fortunately, the remaining steps don't - * require a long-lived current context. + * RelationInitTableAccessMethod() and RelationInitSequenceAccessMethod() + * will do syscache lookups, so we mustn't run them in CacheMemoryContext. + * Fortunately, the remaining steps don't require a long-lived current + * context. */ MemoryContextSwitchTo(oldcxt); - if (RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_SEQUENCE) + if (RELKIND_HAS_TABLE_AM(relkind)) RelationInitTableAccessMethod(rel); + else if (relkind == RELKIND_SEQUENCE) + RelationInitSequenceAccessMethod(rel); /* * Leave index access method uninitialized, because the pg_index row has @@ -4343,13 +4385,21 @@ RelationCacheInitializePhase3(void) /* Reload tableam data if needed */ if (relation->rd_tableam == NULL && - (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind) || relation->rd_rel->relkind == RELKIND_SEQUENCE)) + (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))) { RelationInitTableAccessMethod(relation); Assert(relation->rd_tableam != NULL); restart = true; } + else if (relation->rd_sequenceam == NULL && + relation->rd_rel->relkind == RELKIND_SEQUENCE) + { + RelationInitSequenceAccessMethod(relation); + Assert(relation->rd_sequenceam != NULL); + + restart = true; + } /* Release hold on the relation */ RelationDecrementReferenceCount(relation); @@ -6422,8 +6472,10 @@ load_relcache_init_file(bool shared) nailed_rels++; /* Load table AM data */ - if (RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind) || rel->rd_rel->relkind == RELKIND_SEQUENCE) + if (RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind)) RelationInitTableAccessMethod(rel); + else if (rel->rd_rel->relkind == RELKIND_SEQUENCE) + RelationInitSequenceAccessMethod(rel); Assert(rel->rd_index == NULL); Assert(rel->rd_indextuple == NULL); @@ -6435,6 +6487,7 @@ load_relcache_init_file(bool shared) Assert(rel->rd_supportinfo == NULL); Assert(rel->rd_indoption == NULL); Assert(rel->rd_indcollation == NULL); + Assert(rel->rd_sequenceam == NULL); Assert(rel->rd_opcoptions == NULL); } diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 2f8cbd867599..daa550113a1f 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -33,6 +33,7 @@ #include "access/gin.h" #include "access/slru.h" #include "access/toast_compression.h" +#include "access/sequenceam.h" #include "access/twophase.h" #include "access/xlog_internal.h" #include "access/xlogprefetcher.h" @@ -4341,6 +4342,17 @@ struct config_string ConfigureNamesString[] = check_default_table_access_method, NULL, NULL }, + { + {"default_sequence_access_method", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Sets the default sequence access method for new sequences."), + NULL, + GUC_IS_NAME + }, + &default_sequence_access_method, + DEFAULT_SEQUENCE_ACCESS_METHOD, + check_default_sequence_access_method, NULL, NULL + }, + { {"default_tablespace", PGC_USERSET, CLIENT_CONN_STATEMENT, gettext_noop("Sets the default tablespace to create tables and indexes in."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 34826d01380b..911528bd0da6 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -754,6 +754,7 @@ autovacuum_worker_slots = 16 # autovacuum worker slots to allocate # error #search_path = '"$user", public' # schema names #row_security = on +#default_sequence_access_method = 'seqlocal' #default_table_access_method = 'heap' #default_tablespace = '' # a tablespace name, '' uses the default #default_toast_compression = 'pglz' # 'pglz' or 'lz4' diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 1d08268393e3..4c7a9446b96c 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -168,10 +168,12 @@ describeAccessMethods(const char *pattern, bool verbose) "SELECT amname AS \"%s\",\n" " CASE amtype" " WHEN " CppAsString2(AMTYPE_INDEX) " THEN '%s'" + " WHEN " CppAsString2(AMTYPE_SEQUENCE) " THEN '%s'" " WHEN " CppAsString2(AMTYPE_TABLE) " THEN '%s'" " END AS \"%s\"", gettext_noop("Name"), gettext_noop("Index"), + gettext_noop("Sequence"), gettext_noop("Table"), gettext_noop("Type")); diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index c916b9299a80..5f8a5989adc5 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -2562,7 +2562,7 @@ match_previous_words(int pattern_id, else if (Matches("ALTER", "SEQUENCE", MatchAny)) COMPLETE_WITH("AS", "INCREMENT", "MINVALUE", "MAXVALUE", "RESTART", "START", "NO", "CACHE", "CYCLE", "SET", "OWNED BY", - "OWNER TO", "RENAME TO"); + "OWNER TO", "RENAME TO", "USING"); /* ALTER SEQUENCE AS */ else if (TailMatches("ALTER", "SEQUENCE", MatchAny, "AS")) COMPLETE_WITH_CS("smallint", "integer", "bigint"); @@ -3605,7 +3605,7 @@ match_previous_words(int pattern_id, else if (TailMatches("CREATE", "SEQUENCE", MatchAny) || TailMatches("CREATE", "TEMP|TEMPORARY", "SEQUENCE", MatchAny)) COMPLETE_WITH("AS", "INCREMENT BY", "MINVALUE", "MAXVALUE", "NO", - "CACHE", "CYCLE", "OWNED BY", "START WITH"); + "CACHE", "CYCLE", "OWNED BY", "START WITH", "USING"); else if (TailMatches("CREATE", "SEQUENCE", MatchAny, "AS") || TailMatches("CREATE", "TEMP|TEMPORARY", "SEQUENCE", MatchAny, "AS")) COMPLETE_WITH_CS("smallint", "integer", "bigint"); diff --git a/src/include/access/seqlocalam.h b/src/include/access/seqlocalam.h index 225fb9a2cbeb..21936511ac2b 100644 --- a/src/include/access/seqlocalam.h +++ b/src/include/access/seqlocalam.h @@ -15,7 +15,6 @@ #include "access/xlogreader.h" #include "storage/relfilelocator.h" -#include "utils/rel.h" /* XLOG stuff */ #define XLOG_SEQ_LOCAL_LOG 0x00 @@ -41,18 +40,4 @@ extern void seq_local_desc(StringInfo buf, XLogReaderState *record); extern const char *seq_local_identify(uint8 info); extern void seq_local_mask(char *page, BlockNumber blkno); -/* access routines */ -extern int64 seq_local_nextval(Relation rel, int64 incby, int64 maxv, - int64 minv, int64 cache, bool cycle, - int64 *last); -extern const char *seq_local_get_table_am(void); -extern void seq_local_init(Relation rel, int64 last_value, bool is_called); -extern void seq_local_setval(Relation rel, int64 next, bool iscalled); -extern void seq_local_reset(Relation rel, int64 startv, bool is_called, - bool reset_state); -extern void seq_local_get_state(Relation rel, int64 *last_value, - bool *is_called); -extern void seq_local_change_persistence(Relation rel, - char newrelpersistence); - #endif /* SEQLOCALAM_H */ diff --git a/src/include/access/sequenceam.h b/src/include/access/sequenceam.h new file mode 100644 index 000000000000..ac48c8b468be --- /dev/null +++ b/src/include/access/sequenceam.h @@ -0,0 +1,181 @@ +/*------------------------------------------------------------------------- + * + * sequenceam.h + * POSTGRES sequence access method definitions. + * + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/sequenceam.h + * + * NOTES + * See sequenceam.sgml for higher level documentation. + * + *------------------------------------------------------------------------- + */ +#ifndef SEQUENCEAM_H +#define SEQUENCEAM_H + +#include "utils/rel.h" + +#define DEFAULT_SEQUENCE_ACCESS_METHOD "seqlocal" + +/* GUCs */ +extern PGDLLIMPORT char *default_sequence_access_method; + +/* + * API struct for a sequence AM. Note this must be allocated in a + * server-lifetime manner, typically as a static const struct, which then gets + * returned by FormData_pg_am.amhandler. + * + * In most cases it's not appropriate to call the callbacks directly, use the + * sequence_* wrapper functions instead. + * + * GetSequenceAmRoutine() asserts that required callbacks are filled in, + * remember to update when adding a callback. + */ +typedef struct SequenceAmRoutine +{ + /* this must be set to T_SequenceAmRoutine */ + NodeTag type; + + /* + * Retrieve table access method used by a sequence to store its metadata. + */ + const char *(*get_table_am) (void); + + /* + * Initialize sequence after creating a sequence Relation in pg_class, + * setting up the sequence for use. "last_value" and "is_called" are + * guessed from the options set for the sequence in CREATE SEQUENCE, based + * on the configuration of pg_sequence. + */ + void (*init) (Relation rel, int64 last_value, bool is_called); + + /* + * Retrieve a result for nextval(), based on the options retrieved from + * the sequence's options in pg_sequence. "last" is the last value + * calculated stored in the session's local cache, for lastval(). + */ + int64 (*nextval) (Relation rel, int64 incby, int64 maxv, + int64 minv, int64 cache, bool cycle, + int64 *last); + + /* + * Callback to set the state of a sequence, based on the input arguments + * from setval(). + */ + void (*setval) (Relation rel, int64 next, bool iscalled); + + /* + * Reset a sequence to its initial value. "reset_state", if set to true, + * means that the sequence parameters have changed, hence its internal + * state may need to be reset as well. "startv" and "is_called" are + * values guessed from the configuration of the sequence, based on the + * contents of pg_sequence. + */ + void (*reset) (Relation rel, int64 startv, bool is_called, + bool reset_state); + + /* + * Returns the current state of a sequence, returning data for + * pg_sequence_last_value() and related DDLs like ALTER SEQUENCE. + * "last_value" and "is_called" should be assigned to the values retrieved + * from the sequence Relation. + */ + void (*get_state) (Relation rel, int64 *last_value, bool *is_called); + + /* + * Callback used when switching persistence of a sequence Relation, to + * reset the sequence based on its new persistence "newrelpersistence". + */ + void (*change_persistence) (Relation rel, char newrelpersistence); + +} SequenceAmRoutine; + + +/* --------------------------------------------------------------------------- + * Wrapper functions for each callback. + * --------------------------------------------------------------------------- + */ + +/* + * Returns the name of the table access method used by this sequence. + */ +static inline const char * +sequence_get_table_am(Relation rel) +{ + return rel->rd_sequenceam->get_table_am(); +} + +/* + * Insert tuple data based on the information guessed from the contents + * of pg_sequence. + */ +static inline void +sequence_init(Relation rel, int64 last_value, bool is_called) +{ + rel->rd_sequenceam->init(rel, last_value, is_called); +} + +/* + * Allocate a set of values for the given sequence. "last" is the last value + * allocated. The result returned is the next value of the sequence computed. + */ +static inline int64 +sequence_nextval(Relation rel, int64 incby, int64 maxv, + int64 minv, int64 cache, bool cycle, + int64 *last) +{ + return rel->rd_sequenceam->nextval(rel, incby, maxv, minv, cache, + cycle, last); +} + +/* + * Callback to set the state of a sequence, based on the input arguments + * from setval(). + */ +static inline void +sequence_setval(Relation rel, int64 next, bool iscalled) +{ + rel->rd_sequenceam->setval(rel, next, iscalled); +} + +/* + * Reset a sequence to its initial state. + */ +static inline void +sequence_reset(Relation rel, int64 startv, bool is_called, + bool reset_state) +{ + rel->rd_sequenceam->reset(rel, startv, is_called, reset_state); +} + +/* + * Retrieve sequence metadata. + */ +static inline void +sequence_get_state(Relation rel, int64 *last_value, bool *is_called) +{ + rel->rd_sequenceam->get_state(rel, last_value, is_called); +} + +/* + * Callback to change the persistence of a sequence Relation. + */ +static inline void +sequence_change_persistence(Relation rel, char newrelpersistence) +{ + rel->rd_sequenceam->change_persistence(rel, newrelpersistence); +} + +/* ---------------------------------------------------------------------------- + * Functions in sequenceamapi.c + * ---------------------------------------------------------------------------- + */ + +extern const SequenceAmRoutine *GetSequenceAmRoutine(Oid amhandler); +extern Oid GetSequenceAmRoutineId(Oid amoid); + +#endif /* SEQUENCEAM_H */ diff --git a/src/include/catalog/pg_am.dat b/src/include/catalog/pg_am.dat index 26d15928a155..8f076fcec958 100644 --- a/src/include/catalog/pg_am.dat +++ b/src/include/catalog/pg_am.dat @@ -15,6 +15,9 @@ { oid => '2', oid_symbol => 'HEAP_TABLE_AM_OID', descr => 'heap table access method', amname => 'heap', amhandler => 'heap_tableam_handler', amtype => 't' }, +{ oid => '8051', oid_symbol => 'LOCAL_SEQUENCE_AM_OID', + descr => 'local sequence access method', + amname => 'seqlocal', amhandler => 'seq_local_sequenceam_handler', amtype => 's' }, { oid => '403', oid_symbol => 'BTREE_AM_OID', descr => 'b-tree index access method', amname => 'btree', amhandler => 'bthandler', amtype => 'i' }, diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h index 6e98a0930c27..080bea5031a9 100644 --- a/src/include/catalog/pg_am.h +++ b/src/include/catalog/pg_am.h @@ -59,6 +59,7 @@ MAKE_SYSCACHE(AMOID, pg_am_oid_index, 4); * Allowed values for amtype */ #define AMTYPE_INDEX 'i' /* index access method */ +#define AMTYPE_SEQUENCE 's' /* sequence access method */ #define AMTYPE_TABLE 't' /* table access method */ #endif /* EXPOSE_TO_CLIENT_CODE */ diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 07d182da796a..012a2863c037 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -231,6 +231,12 @@ MAKE_SYSCACHE(RELNAMENSP, pg_class_relname_nsp_index, 128); (relkind) == RELKIND_TOASTVALUE || \ (relkind) == RELKIND_MATVIEW) +/* + * Relation kinds with a sequence access method (rd_sequenceam). + */ +#define RELKIND_HAS_SEQUENCE_AM(relkind) \ + ((relkind) == RELKIND_SEQUENCE) + #endif /* EXPOSE_TO_CLIENT_CODE */ extern int errdetail_relkind_not_supported(char relkind); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 62beb71da288..191489fcfcca 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -913,6 +913,12 @@ prorettype => 'table_am_handler', proargtypes => 'internal', prosrc => 'heap_tableam_handler' }, +# Sequence access method handlers +{ oid => '8209', descr => 'local sequence access method handler', + proname => 'seq_local_sequenceam_handler', provolatile => 'v', + prorettype => 'sequence_am_handler', proargtypes => 'internal', + prosrc => 'seq_local_sequenceam_handler' }, + # Index access method handlers { oid => '330', descr => 'btree index access method handler', proname => 'bthandler', provolatile => 'v', prorettype => 'index_am_handler', @@ -7882,6 +7888,13 @@ { oid => '327', descr => 'I/O', proname => 'index_am_handler_out', prorettype => 'cstring', proargtypes => 'index_am_handler', prosrc => 'index_am_handler_out' }, +{ oid => '8207', descr => 'I/O', + proname => 'sequence_am_handler_in', proisstrict => 'f', + prorettype => 'sequence_am_handler', proargtypes => 'cstring', + prosrc => 'sequence_am_handler_in' }, +{ oid => '8208', descr => 'I/O', + proname => 'sequence_am_handler_out', prorettype => 'cstring', + proargtypes => 'sequence_am_handler', prosrc => 'sequence_am_handler_out' }, { oid => '3311', descr => 'I/O', proname => 'tsm_handler_in', proisstrict => 'f', prorettype => 'tsm_handler', proargtypes => 'cstring', prosrc => 'tsm_handler_in' }, diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index 6dca77e0a22f..c42531b25533 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -627,6 +627,12 @@ typcategory => 'P', typinput => 'index_am_handler_in', typoutput => 'index_am_handler_out', typreceive => '-', typsend => '-', typalign => 'i' }, +{ oid => '8210', + descr => 'pseudo-type for the result of a sequence AM handler function', + typname => 'sequence_am_handler', typlen => '4', typbyval => 't', + typtype => 'p', typcategory => 'P', typinput => 'sequence_am_handler_in', + typoutput => 'sequence_am_handler_out', typreceive => '-', typsend => '-', + typalign => 'i' }, { oid => '3310', descr => 'pseudo-type for the result of a tablesample method function', typname => 'tsm_handler', typlen => '4', typbyval => 't', typtype => 'p', diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index dd22b5efdfd9..6790728aced3 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -145,6 +145,7 @@ extern Datum transformGenericOptions(Oid catalogId, extern ObjectAddress CreateAccessMethod(CreateAmStmt *stmt); extern Oid get_index_am_oid(const char *amname, bool missing_ok); extern Oid get_table_am_oid(const char *amname, bool missing_ok); +extern Oid get_sequence_am_oid(const char *amname, bool missing_ok); extern Oid get_am_oid(const char *amname, bool missing_ok); extern char *get_am_name(Oid amOid); diff --git a/src/include/commands/sequence.h b/src/include/commands/sequence.h index 9ac0b67683d3..7693e9941fc9 100644 --- a/src/include/commands/sequence.h +++ b/src/include/commands/sequence.h @@ -22,35 +22,6 @@ #include "storage/relfilelocator.h" -typedef struct FormData_pg_sequence_data -{ - int64 last_value; - int64 log_cnt; - bool is_called; -} FormData_pg_sequence_data; - -typedef FormData_pg_sequence_data *Form_pg_sequence_data; - -/* - * Columns of a sequence relation - */ - -#define SEQ_COL_LASTVAL 1 -#define SEQ_COL_LOG 2 -#define SEQ_COL_CALLED 3 - -#define SEQ_COL_FIRSTCOL SEQ_COL_LASTVAL -#define SEQ_COL_LASTCOL SEQ_COL_CALLED - -/* XLOG stuff */ -#define XLOG_SEQ_LOG 0x00 - -typedef struct xl_seq_rec -{ - RelFileLocator locator; - /* SEQUENCE TUPLE DATA FOLLOWS AT THE END */ -} xl_seq_rec; - extern int64 nextval_internal(Oid relid, bool check_permissions); extern Datum nextval(PG_FUNCTION_ARGS); extern List *sequence_options(Oid relid); @@ -62,9 +33,4 @@ extern void DeleteSequenceTuple(Oid relid); extern void ResetSequence(Oid seq_relid); extern void ResetSequenceCaches(void); -extern void seq_redo(XLogReaderState *record); -extern void seq_desc(StringInfo buf, XLogReaderState *record); -extern const char *seq_identify(uint8 info); -extern void seq_mask(char *page, BlockNumber blkno); - #endif /* SEQUENCE_H */ diff --git a/src/include/nodes/meson.build b/src/include/nodes/meson.build index d1ca24dd32f0..b1c4155c9a91 100644 --- a/src/include/nodes/meson.build +++ b/src/include/nodes/meson.build @@ -10,6 +10,7 @@ node_support_input_i = [ 'access/amapi.h', 'access/cmptype.h', 'access/sdir.h', + 'access/sequenceam.h', 'access/tableam.h', 'access/tsmapi.h', 'commands/event_trigger.h', diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 5bdea762fc72..79465fe4c54d 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -3216,6 +3216,7 @@ typedef struct CreateSeqStmt List *options; Oid ownerId; /* ID of owner, or InvalidOid for default */ bool for_identity; + char *accessMethod; /* USING name of access method (eg. local) */ bool if_not_exists; /* just do nothing if it already exists? */ } CreateSeqStmt; diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h index 799fa7ace684..82bb81561671 100644 --- a/src/include/utils/guc_hooks.h +++ b/src/include/utils/guc_hooks.h @@ -55,6 +55,8 @@ extern bool check_log_connections(char **newval, void **extra, GucSource source) extern void assign_log_connections(const char *newval, void *extra); extern bool check_default_table_access_method(char **newval, void **extra, GucSource source); +extern bool check_default_sequence_access_method(char **newval, void **extra, + GucSource source); extern bool check_default_tablespace(char **newval, void **extra, GucSource source); extern bool check_default_text_search_config(char **newval, void **extra, GucSource source); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index b552359915f1..4e418968253d 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -188,6 +188,11 @@ typedef struct RelationData */ const struct TableAmRoutine *rd_tableam; + /* + * Sequence access method. + */ + const struct SequenceAmRoutine *rd_sequenceam; + /* These are non-NULL only for an index relation: */ Form_pg_index rd_index; /* pg_index tuple describing this index */ /* use "struct" here to avoid needing to include htup.h: */ diff --git a/src/test/regress/expected/create_am.out b/src/test/regress/expected/create_am.out index c1a951572512..784870e603d1 100644 --- a/src/test/regress/expected/create_am.out +++ b/src/test/regress/expected/create_am.out @@ -163,11 +163,6 @@ CREATE VIEW tableam_view_heap2 USING heap2 AS SELECT * FROM tableam_tbl_heap2; ERROR: syntax error at or near "USING" LINE 1: CREATE VIEW tableam_view_heap2 USING heap2 AS SELECT * FROM ... ^ --- CREATE SEQUENCE doesn't support USING -CREATE SEQUENCE tableam_seq_heap2 USING heap2; -ERROR: syntax error at or near "USING" -LINE 1: CREATE SEQUENCE tableam_seq_heap2 USING heap2; - ^ -- CREATE MATERIALIZED VIEW does support USING CREATE MATERIALIZED VIEW tableam_tblmv_heap2 USING heap2 AS SELECT * FROM tableam_tbl_heap2; SELECT f1 FROM tableam_tblmv_heap2 ORDER BY f1; @@ -514,9 +509,12 @@ CREATE TABLE tableam_parted_heapx (a text, b int) PARTITION BY list (a); CREATE TABLE tableam_parted_1_heapx PARTITION OF tableam_parted_heapx FOR VALUES IN ('a', 'b'); -- but an explicitly set AM overrides it CREATE TABLE tableam_parted_2_heapx PARTITION OF tableam_parted_heapx FOR VALUES IN ('c', 'd') USING heap; --- sequences, views and foreign servers shouldn't have an AM -CREATE VIEW tableam_view_heapx AS SELECT * FROM tableam_tbl_heapx; +-- sequences have an AM +SET LOCAL default_sequence_access_method = 'seqlocal'; CREATE SEQUENCE tableam_seq_heapx; +RESET default_sequence_access_method; +-- views and foreign servers shouldn't have an AM +CREATE VIEW tableam_view_heapx AS SELECT * FROM tableam_tbl_heapx; CREATE FOREIGN DATA WRAPPER fdw_heap2 VALIDATOR postgresql_fdw_validator; CREATE SERVER fs_heap2 FOREIGN DATA WRAPPER fdw_heap2 ; CREATE FOREIGN table tableam_fdw_heapx () SERVER fs_heap2; @@ -533,18 +531,18 @@ FROM pg_class AS pc LEFT JOIN pg_am AS pa ON (pa.oid = pc.relam) WHERE pc.relname LIKE 'tableam_%_heapx' ORDER BY 3, 1, 2; - relkind | amname | relname ----------+--------+----------------------------- - f | | tableam_fdw_heapx - r | heap2 | tableam_parted_1_heapx - r | heap | tableam_parted_2_heapx - p | | tableam_parted_heapx - S | | tableam_seq_heapx - r | heap2 | tableam_tbl_heapx - r | heap2 | tableam_tblas_heapx - m | heap2 | tableam_tblmv_heapx - r | heap2 | tableam_tblselectinto_heapx - v | | tableam_view_heapx + relkind | amname | relname +---------+----------+----------------------------- + f | | tableam_fdw_heapx + r | heap2 | tableam_parted_1_heapx + r | heap | tableam_parted_2_heapx + p | | tableam_parted_heapx + S | seqlocal | tableam_seq_heapx + r | heap2 | tableam_tbl_heapx + r | heap2 | tableam_tblas_heapx + m | heap2 | tableam_tblmv_heapx + r | heap2 | tableam_tblselectinto_heapx + v | | tableam_view_heapx (10 rows) -- don't want to keep those tables, nor the default @@ -574,3 +572,22 @@ table tableam_parted_b_heap2 depends on access method heap2 table tableam_parted_d_heap2 depends on access method heap2 HINT: Use DROP ... CASCADE to drop the dependent objects too. -- we intentionally leave the objects created above alive, to verify pg_dump support +-- Checks for sequence access methods +-- Create new sequence access method which uses standard local handler +CREATE ACCESS METHOD local2 TYPE SEQUENCE HANDLER seq_local_sequenceam_handler; +-- Create and use sequence +CREATE SEQUENCE test_seqam USING local2; +SELECT nextval('test_seqam'::regclass); + nextval +--------- + 1 +(1 row) + +-- Try to drop and fail on dependency +DROP ACCESS METHOD local2; +ERROR: cannot drop access method local2 because other objects depend on it +DETAIL: sequence test_seqam depends on access method local2 +HINT: Use DROP ... CASCADE to drop the dependent objects too. +-- And cleanup +DROP SEQUENCE test_seqam; +DROP ACCESS METHOD local2; diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 20bf9ea9cdf7..20b3d8520315 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -1943,6 +1943,18 @@ WHERE p1.oid = a1.amhandler AND a1.amtype = 't' AND -----+--------+-----+--------- (0 rows) +-- check for sequence amhandler functions with the wrong signature +SELECT a1.oid, a1.amname, p1.oid, p1.proname +FROM pg_am AS a1, pg_proc AS p1 +WHERE p1.oid = a1.amhandler AND a1.amtype = 's' AND + (p1.prorettype != 'sequence_am_handler'::regtype + OR p1.proretset + OR p1.pronargs != 1 + OR p1.proargtypes[0] != 'internal'::regtype); + oid | amname | oid | proname +-----+--------+-----+--------- +(0 rows) + -- **************** pg_amop **************** -- Look for illegal values in pg_amop fields SELECT a1.amopfamily, a1.amopstrategy diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index cf48ae6d0c2e..2b7d6c69e1e1 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -5127,31 +5127,33 @@ Indexes: -- check printing info about access methods \dA List of access methods - Name | Type ---------+------- - brin | Index - btree | Index - gin | Index - gist | Index - hash | Index - heap | Table - heap2 | Table - spgist | Index -(8 rows) + Name | Type +----------+---------- + brin | Index + btree | Index + gin | Index + gist | Index + hash | Index + heap | Table + heap2 | Table + seqlocal | Sequence + spgist | Index +(9 rows) \dA * List of access methods - Name | Type ---------+------- - brin | Index - btree | Index - gin | Index - gist | Index - hash | Index - heap | Table - heap2 | Table - spgist | Index -(8 rows) + Name | Type +----------+---------- + brin | Index + btree | Index + gin | Index + gist | Index + hash | Index + heap | Table + heap2 | Table + seqlocal | Sequence + spgist | Index +(9 rows) \dA h* List of access methods @@ -5176,32 +5178,34 @@ List of access methods \dA: extra argument "bar" ignored \dA+ - List of access methods - Name | Type | Handler | Description ---------+-------+----------------------+---------------------------------------- - brin | Index | brinhandler | block range index (BRIN) access method - btree | Index | bthandler | b-tree index access method - gin | Index | ginhandler | GIN index access method - gist | Index | gisthandler | GiST index access method - hash | Index | hashhandler | hash index access method - heap | Table | heap_tableam_handler | heap table access method - heap2 | Table | heap_tableam_handler | - spgist | Index | spghandler | SP-GiST index access method -(8 rows) + List of access methods + Name | Type | Handler | Description +----------+----------+------------------------------+---------------------------------------- + brin | Index | brinhandler | block range index (BRIN) access method + btree | Index | bthandler | b-tree index access method + gin | Index | ginhandler | GIN index access method + gist | Index | gisthandler | GiST index access method + hash | Index | hashhandler | hash index access method + heap | Table | heap_tableam_handler | heap table access method + heap2 | Table | heap_tableam_handler | + seqlocal | Sequence | seq_local_sequenceam_handler | local sequence access method + spgist | Index | spghandler | SP-GiST index access method +(9 rows) \dA+ * - List of access methods - Name | Type | Handler | Description ---------+-------+----------------------+---------------------------------------- - brin | Index | brinhandler | block range index (BRIN) access method - btree | Index | bthandler | b-tree index access method - gin | Index | ginhandler | GIN index access method - gist | Index | gisthandler | GiST index access method - hash | Index | hashhandler | hash index access method - heap | Table | heap_tableam_handler | heap table access method - heap2 | Table | heap_tableam_handler | - spgist | Index | spghandler | SP-GiST index access method -(8 rows) + List of access methods + Name | Type | Handler | Description +----------+----------+------------------------------+---------------------------------------- + brin | Index | brinhandler | block range index (BRIN) access method + btree | Index | bthandler | b-tree index access method + gin | Index | ginhandler | GIN index access method + gist | Index | gisthandler | GiST index access method + hash | Index | hashhandler | hash index access method + heap | Table | heap_tableam_handler | heap table access method + heap2 | Table | heap_tableam_handler | + seqlocal | Sequence | seq_local_sequenceam_handler | local sequence access method + spgist | Index | spghandler | SP-GiST index access method +(9 rows) \dA+ h* List of access methods diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index dd0c52ab08b5..c3218a36bc48 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -511,21 +511,21 @@ WHERE relkind NOT IN ('r', 'i', 'S', 't', 'v', 'm', 'c', 'f', 'p', 'I') OR -----+--------- (0 rows) --- All tables, indexes, partitioned indexes and matviews should have an --- access method. +-- All tables, indexes, partitioned indexes, matviews and sequences should have +-- an access method. SELECT c1.oid, c1.relname FROM pg_class as c1 -WHERE c1.relkind NOT IN ('S', 'v', 'f', 'c', 'p') and +WHERE c1.relkind NOT IN ('v', 'f', 'c', 'p') and c1.relam = 0; oid | relname -----+--------- (0 rows) --- Conversely, sequences, views, foreign tables, types and partitioned --- tables shouldn't have them. +-- Conversely, views, foreign tables, types and partitioned tables +-- shouldn't have them. SELECT c1.oid, c1.relname FROM pg_class as c1 -WHERE c1.relkind IN ('S', 'v', 'f', 'c', 'p') and +WHERE c1.relkind IN ('v', 'f', 'c', 'p') and c1.relam != 0; oid | relname -----+--------- diff --git a/src/test/regress/sql/create_am.sql b/src/test/regress/sql/create_am.sql index 754fe0c694bc..76a91cf8dd68 100644 --- a/src/test/regress/sql/create_am.sql +++ b/src/test/regress/sql/create_am.sql @@ -117,9 +117,6 @@ SELECT INTO tableam_tblselectinto_heap2 USING heap2 FROM tableam_tbl_heap2; -- CREATE VIEW doesn't support USING CREATE VIEW tableam_view_heap2 USING heap2 AS SELECT * FROM tableam_tbl_heap2; --- CREATE SEQUENCE doesn't support USING -CREATE SEQUENCE tableam_seq_heap2 USING heap2; - -- CREATE MATERIALIZED VIEW does support USING CREATE MATERIALIZED VIEW tableam_tblmv_heap2 USING heap2 AS SELECT * FROM tableam_tbl_heap2; SELECT f1 FROM tableam_tblmv_heap2 ORDER BY f1; @@ -327,9 +324,13 @@ CREATE TABLE tableam_parted_1_heapx PARTITION OF tableam_parted_heapx FOR VALUES -- but an explicitly set AM overrides it CREATE TABLE tableam_parted_2_heapx PARTITION OF tableam_parted_heapx FOR VALUES IN ('c', 'd') USING heap; --- sequences, views and foreign servers shouldn't have an AM -CREATE VIEW tableam_view_heapx AS SELECT * FROM tableam_tbl_heapx; +-- sequences have an AM +SET LOCAL default_sequence_access_method = 'seqlocal'; CREATE SEQUENCE tableam_seq_heapx; +RESET default_sequence_access_method; + +-- views and foreign servers shouldn't have an AM +CREATE VIEW tableam_view_heapx AS SELECT * FROM tableam_tbl_heapx; CREATE FOREIGN DATA WRAPPER fdw_heap2 VALIDATOR postgresql_fdw_validator; CREATE SERVER fs_heap2 FOREIGN DATA WRAPPER fdw_heap2 ; CREATE FOREIGN table tableam_fdw_heapx () SERVER fs_heap2; @@ -365,3 +366,16 @@ CREATE FOREIGN TABLE fp PARTITION OF tableam_parted_a_heap2 DEFAULT SERVER x; DROP ACCESS METHOD heap2; -- we intentionally leave the objects created above alive, to verify pg_dump support + +-- Checks for sequence access methods + +-- Create new sequence access method which uses standard local handler +CREATE ACCESS METHOD local2 TYPE SEQUENCE HANDLER seq_local_sequenceam_handler; +-- Create and use sequence +CREATE SEQUENCE test_seqam USING local2; +SELECT nextval('test_seqam'::regclass); +-- Try to drop and fail on dependency +DROP ACCESS METHOD local2; +-- And cleanup +DROP SEQUENCE test_seqam; +DROP ACCESS METHOD local2; diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index 2fb3a8528781..e60eeab33c02 100644 --- a/src/test/regress/sql/opr_sanity.sql +++ b/src/test/regress/sql/opr_sanity.sql @@ -1236,6 +1236,16 @@ WHERE p1.oid = a1.amhandler AND a1.amtype = 't' AND OR p1.pronargs != 1 OR p1.proargtypes[0] != 'internal'::regtype); +-- check for sequence amhandler functions with the wrong signature + +SELECT a1.oid, a1.amname, p1.oid, p1.proname +FROM pg_am AS a1, pg_proc AS p1 +WHERE p1.oid = a1.amhandler AND a1.amtype = 's' AND + (p1.prorettype != 'sequence_am_handler'::regtype + OR p1.proretset + OR p1.pronargs != 1 + OR p1.proargtypes[0] != 'internal'::regtype); + -- **************** pg_amop **************** -- Look for illegal values in pg_amop fields diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index c94dd83d3061..f1b6cd1091ef 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -370,18 +370,18 @@ WHERE relkind NOT IN ('r', 'i', 'S', 't', 'v', 'm', 'c', 'f', 'p', 'I') OR relpersistence NOT IN ('p', 'u', 't') OR relreplident NOT IN ('d', 'n', 'f', 'i'); --- All tables, indexes, partitioned indexes and matviews should have an --- access method. +-- All tables, indexes, partitioned indexes, matviews and sequences should have +-- an access method. SELECT c1.oid, c1.relname FROM pg_class as c1 -WHERE c1.relkind NOT IN ('S', 'v', 'f', 'c', 'p') and +WHERE c1.relkind NOT IN ('v', 'f', 'c', 'p') and c1.relam = 0; --- Conversely, sequences, views, foreign tables, types and partitioned --- tables shouldn't have them. +-- Conversely, views, foreign tables, types and partitioned tables +-- shouldn't have them. SELECT c1.oid, c1.relname FROM pg_class as c1 -WHERE c1.relkind IN ('S', 'v', 'f', 'c', 'p') and +WHERE c1.relkind IN ('v', 'f', 'c', 'p') and c1.relam != 0; -- Indexes and partitioned indexes should have AMs of type 'i'. diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index e5879e00dffe..e0654ddecd48 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2679,6 +2679,7 @@ SeqScanState SeqTable SeqTableData SeqType +SequenceAmRoutine SequenceItem SerCommitSeqNo SerialControl @@ -3723,6 +3724,7 @@ list_sort_comparator loc_chunk local_relopt local_relopts +local_sequence_magic local_source local_ts_iter local_ts_radix_tree @@ -4011,7 +4013,6 @@ scram_state_enum script_error_callback_arg security_class_t sem_t -sepgsql_context_info_t sequence_magic set_join_pathlist_hook_type set_rel_pathlist_hook_type @@ -4236,6 +4237,7 @@ xl_heap_visible xl_invalid_page xl_invalid_page_key xl_invalidations +xl_local_seq_rec xl_logical_message xl_multi_insert_tuple xl_multixact_create @@ -4247,7 +4249,6 @@ xl_replorigin_drop xl_replorigin_set xl_restore_point xl_running_xacts -xl_seq_rec xl_smgr_create xl_smgr_truncate xl_standby_lock From 81d7fa6c29b57509a31c6915c71307b675100051 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 26 Aug 2024 13:25:19 +0900 Subject: [PATCH 5/7] Sequence access methods - dump/restore support --- doc/src/sgml/ref/pg_dump.sgml | 17 +++++++ doc/src/sgml/ref/pg_dumpall.sgml | 11 +++++ doc/src/sgml/ref/pg_restore.sgml | 11 +++++ src/bin/pg_dump/pg_backup.h | 2 + src/bin/pg_dump/pg_backup_archiver.c | 66 ++++++++++++++++++++++++++++ src/bin/pg_dump/pg_backup_archiver.h | 6 ++- src/bin/pg_dump/pg_dump.c | 47 +++++++++++++++----- src/bin/pg_dump/pg_dumpall.c | 5 +++ src/bin/pg_dump/pg_restore.c | 4 ++ src/bin/pg_dump/t/002_pg_dump.pl | 49 +++++++++++++++++++++ 10 files changed, 207 insertions(+), 11 deletions(-) diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index b757d27ebd0b..dda753e1cf25 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -1173,6 +1173,23 @@ PostgreSQL documentation + + + + + Do not output commands to select sequence access methods. + With this option, all objects will be created with whichever + sequence access method is the default during restore. + + + + This option is ignored when emitting an archive (non-text) output + file. For the archive formats, you can specify the option when you + call pg_restore. + + + + diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index 43fdab2d77ed..17681045996a 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -605,6 +605,17 @@ exclude database PATTERN + + + + + Do not output commands to select sequence access methods. + With this option, all objects will be created with whichever + sequence access method is the default during restore. + + + + diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml index b6de497aee18..78d5cd7e6011 100644 --- a/doc/src/sgml/ref/pg_restore.sgml +++ b/doc/src/sgml/ref/pg_restore.sgml @@ -829,6 +829,17 @@ PostgreSQL documentation + + + + + Do not output commands to select sequence access methods. + With this option, all objects will be created with whichever + sequence access method is the default during restore. + + + + diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index af0007fb6d2f..62d86d69db45 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -97,6 +97,7 @@ typedef struct _restoreOptions { int createDB; /* Issue commands to create the database */ int noOwner; /* Don't try to match original object owner */ + int noSequenceAm; /* Don't issue sequence-AM-related commands */ int noTableAm; /* Don't issue table-AM-related commands */ int noTablespace; /* Don't issue tablespace-related commands */ int disable_triggers; /* disable triggers during data-only @@ -190,6 +191,7 @@ typedef struct _dumpOptions int no_unlogged_table_data; int serializable_deferrable; int disable_triggers; + int outputNoSequenceAm; int outputNoTableAm; int outputNoTablespaces; int use_setsessauth; diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index afa42337b110..3d63d8f6f6b5 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -181,6 +181,7 @@ dumpOptionsFromRestoreOptions(RestoreOptions *ropt) dopt->outputSuperuser = ropt->superuser; dopt->outputCreateDB = ropt->createDB; dopt->outputNoOwner = ropt->noOwner; + dopt->outputNoSequenceAm = ropt->noSequenceAm; dopt->outputNoTableAm = ropt->noTableAm; dopt->outputNoTablespaces = ropt->noTablespace; dopt->disable_triggers = ropt->disable_triggers; @@ -1246,6 +1247,7 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId, newToc->tag = pg_strdup(opts->tag); newToc->namespace = opts->namespace ? pg_strdup(opts->namespace) : NULL; newToc->tablespace = opts->tablespace ? pg_strdup(opts->tablespace) : NULL; + newToc->sequenceam = opts->sequenceam ? pg_strdup(opts->sequenceam) : NULL; newToc->tableam = opts->tableam ? pg_strdup(opts->tableam) : NULL; newToc->relkind = opts->relkind; newToc->owner = opts->owner ? pg_strdup(opts->owner) : NULL; @@ -2405,6 +2407,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt, AH->currUser = NULL; /* unknown */ AH->currSchema = NULL; /* ditto */ + AH->currSequenceAm = NULL; /* ditto */ AH->currTablespace = NULL; /* ditto */ AH->currTableAm = NULL; /* ditto */ @@ -2672,6 +2675,7 @@ WriteToc(ArchiveHandle *AH) WriteStr(AH, te->copyStmt); WriteStr(AH, te->namespace); WriteStr(AH, te->tablespace); + WriteStr(AH, te->sequenceam); WriteStr(AH, te->tableam); WriteInt(AH, te->relkind); WriteStr(AH, te->owner); @@ -2776,6 +2780,9 @@ ReadToc(ArchiveHandle *AH) if (AH->version >= K_VERS_1_10) te->tablespace = ReadStr(AH); + if (AH->version >= K_VERS_1_17) + te->sequenceam = ReadStr(AH); + if (AH->version >= K_VERS_1_14) te->tableam = ReadStr(AH); @@ -3462,6 +3469,9 @@ _reconnectToDB(ArchiveHandle *AH, const char *dbname) free(AH->currSchema); AH->currSchema = NULL; + free(AH->currSequenceAm); + AH->currSequenceAm = NULL; + free(AH->currTableAm); AH->currTableAm = NULL; @@ -3624,6 +3634,57 @@ _selectTablespace(ArchiveHandle *AH, const char *tablespace) destroyPQExpBuffer(qry); } +/* + * Set the proper default_sequence_access_method value for the sequence. + */ +static void +_selectSequenceAccessMethod(ArchiveHandle *AH, const char *sequenceam) +{ + RestoreOptions *ropt = AH->public.ropt; + PQExpBuffer cmd; + const char *want, + *have; + + /* do nothing in --no-sequence-access-method mode */ + if (ropt->noSequenceAm) + return; + + have = AH->currSequenceAm; + want = sequenceam; + + if (!want) + return; + + if (have && strcmp(want, have) == 0) + return; + + cmd = createPQExpBuffer(); + appendPQExpBuffer(cmd, + "SET default_sequence_access_method = %s;", + fmtId(want)); + + if (RestoringToDB(AH)) + { + PGresult *res; + + res = PQexec(AH->connection, cmd->data); + + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + warn_or_exit_horribly(AH, + "could not set default_sequence_access_method: %s", + PQerrorMessage(AH->connection)); + + PQclear(res); + } + else + ahprintf(AH, "%s\n\n", cmd->data); + + destroyPQExpBuffer(cmd); + + free(AH->currSequenceAm); + AH->currSequenceAm = pg_strdup(want); +} + /* * Set the proper default_table_access_method value for the table. */ @@ -3833,6 +3894,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) _becomeOwner(AH, te); _selectOutputSchema(AH, te->namespace); _selectTablespace(AH, te->tablespace); + _selectSequenceAccessMethod(AH, te->sequenceam); if (te->relkind != RELKIND_PARTITIONED_TABLE) _selectTableAccessMethod(AH, te->tableam); @@ -4389,6 +4451,8 @@ restore_toc_entries_prefork(ArchiveHandle *AH, TocEntry *pending_list) AH->currUser = NULL; free(AH->currSchema); AH->currSchema = NULL; + free(AH->currSequenceAm); + AH->currSequenceAm = NULL; free(AH->currTablespace); AH->currTablespace = NULL; free(AH->currTableAm); @@ -5128,6 +5192,7 @@ CloneArchive(ArchiveHandle *AH) clone->connCancel = NULL; clone->currUser = NULL; clone->currSchema = NULL; + clone->currSequenceAm = NULL; clone->currTableAm = NULL; clone->currTablespace = NULL; @@ -5187,6 +5252,7 @@ DeCloneArchive(ArchiveHandle *AH) /* Clear any connection-local state */ free(AH->currUser); free(AH->currSchema); + free(AH->currSequenceAm); free(AH->currTablespace); free(AH->currTableAm); free(AH->savedPassword); diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index 365073b3eae4..89b5cec22ac1 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -71,10 +71,11 @@ #define K_VERS_1_16 MAKE_ARCHIVE_VERSION(1, 16, 0) /* BLOB METADATA entries * and multiple BLOBS, * relkind */ +#define K_VERS_1_17 MAKE_ARCHIVE_VERSION(1, 17, 0) /* add sequenceam */ /* Current archive version number (the format we can output) */ #define K_VERS_MAJOR 1 -#define K_VERS_MINOR 16 +#define K_VERS_MINOR 17 #define K_VERS_REV 0 #define K_VERS_SELF MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, K_VERS_REV) @@ -323,6 +324,7 @@ struct _archiveHandle /* these vars track state to avoid sending redundant SET commands */ char *currUser; /* current username, or NULL if unknown */ char *currSchema; /* current schema, or NULL */ + char *currSequenceAm; /* current sequence access method, or NULL */ char *currTablespace; /* current tablespace, or NULL */ char *currTableAm; /* current table access method, or NULL */ @@ -358,6 +360,7 @@ struct _tocEntry char *namespace; /* null or empty string if not in a schema */ char *tablespace; /* null if not in a tablespace; empty string * means use database default */ + char *sequenceam; /* table access method, only for SEQUENCE tags */ char *tableam; /* table access method, only for TABLE tags */ char relkind; /* relation kind, only for TABLE tags */ char *owner; @@ -404,6 +407,7 @@ typedef struct _archiveOpts const char *tag; const char *namespace; const char *tablespace; + const char *sequenceam; const char *tableam; char relkind; const char *owner; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index e2e7975b34e0..543a1d958436 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -134,6 +134,7 @@ typedef struct int64 cache; /* cache size */ int64 last_value; /* last value of sequence */ bool is_called; /* whether nextval advances before returning */ + char *seqam; /* access method of sequence */ } SequenceItem; typedef enum OidOptions @@ -495,6 +496,7 @@ main(int argc, char **argv) {"if-exists", no_argument, &dopt.if_exists, 1}, {"inserts", no_argument, NULL, 9}, {"lock-wait-timeout", required_argument, NULL, 2}, + {"no-sequence-access-method", no_argument, &dopt.outputNoSequenceAm, 1}, {"no-table-access-method", no_argument, &dopt.outputNoTableAm, 1}, {"no-tablespaces", no_argument, &dopt.outputNoTablespaces, 1}, {"quote-all-identifiers", no_argument, "e_all_identifiers, 1}, @@ -1182,6 +1184,7 @@ main(int argc, char **argv) ropt->superuser = dopt.outputSuperuser; ropt->createDB = dopt.outputCreateDB; ropt->noOwner = dopt.outputNoOwner; + ropt->noSequenceAm = dopt.outputNoSequenceAm; ropt->noTableAm = dopt.outputNoTableAm; ropt->noTablespace = dopt.outputNoTablespaces; ropt->disable_triggers = dopt.disable_triggers; @@ -1303,6 +1306,7 @@ help(const char *progname) printf(_(" --no-security-labels do not dump security label assignments\n")); printf(_(" --no-statistics do not dump statistics\n")); printf(_(" --no-subscriptions do not dump subscriptions\n")); + printf(_(" --no-sequence-access-method do not sequence table access methods\n")); printf(_(" --no-table-access-method do not dump table access methods\n")); printf(_(" --no-tablespaces do not dump tablespace assignments\n")); printf(_(" --no-toast-compression do not dump TOAST compression methods\n")); @@ -14084,6 +14088,9 @@ dumpAccessMethod(Archive *fout, const AccessMethodInfo *aminfo) case AMTYPE_INDEX: appendPQExpBufferStr(q, "TYPE INDEX "); break; + case AMTYPE_SEQUENCE: + appendPQExpBufferStr(q, "TYPE SEQUENCE "); + break; case AMTYPE_TABLE: appendPQExpBufferStr(q, "TYPE TABLE "); break; @@ -18492,26 +18499,40 @@ collectSequences(Archive *fout) * * Since version 18, we can gather the sequence data in this query with * pg_get_sequence_data(), but we only do so for non-schema-only dumps. + * + * Access methods for sequences are supported since version 18. */ if (fout->remoteVersion < 100000) return; - else if (fout->remoteVersion < 180000 || - (!fout->dopt->dumpData && !fout->dopt->sequence_data)) + else if (fout->remoteVersion < 180000) query = "SELECT seqrelid, format_type(seqtypid, NULL), " "seqstart, seqincrement, " "seqmax, seqmin, " "seqcache, seqcycle, " - "NULL, 'f' " + "NULL, 'f', NULL " "FROM pg_catalog.pg_sequence " "ORDER BY seqrelid"; + else if (!fout->dopt->dumpData && !fout->dopt->sequence_data) + query = "SELECT s.seqrelid, format_type(s.seqtypid, NULL), " + "s.seqstart, s.seqincrement, " + "s.seqmax, s.seqmin, " + "s.seqcache, s.seqcycle, " + "NULL, 'f', a.amname AS seqam " + "FROM pg_catalog.pg_sequence s " + "JOIN pg_class c ON (c.oid = s.seqrelid) " + "JOIN pg_am a ON (a.oid = c.relam) " + "ORDER BY seqrelid"; else - query = "SELECT seqrelid, format_type(seqtypid, NULL), " - "seqstart, seqincrement, " - "seqmax, seqmin, " - "seqcache, seqcycle, " - "last_value, is_called " - "FROM pg_catalog.pg_sequence, " - "pg_get_sequence_data(seqrelid) " + query = "SELECT s.seqrelid, format_type(s.seqtypid, NULL), " + "s.seqstart, s.seqincrement, " + "s.seqmax, s.seqmin, " + "s.seqcache, s.seqcycle, " + "r.last_value, r.is_called, " + "a.amname AS seqam " + "FROM pg_catalog.pg_sequence s " + "JOIN pg_class c ON (c.oid = s.seqrelid) " + "JOIN pg_am a ON (a.oid = c.relam), " + "pg_get_sequence_data(s.seqrelid) r " "ORDER BY seqrelid;"; res = ExecuteSqlQuery(fout, query, PGRES_TUPLES_OK); @@ -18531,6 +18552,10 @@ collectSequences(Archive *fout) sequences[i].cycled = (strcmp(PQgetvalue(res, i, 7), "t") == 0); sequences[i].last_value = strtoi64(PQgetvalue(res, i, 8), NULL, 10); sequences[i].is_called = (strcmp(PQgetvalue(res, i, 9), "t") == 0); + if (!PQgetisnull(res, i, 10)) + sequences[i].seqam = pg_strdup(PQgetvalue(res, i, 10)); + else + sequences[i].seqam = NULL; } PQclear(res); @@ -18602,6 +18627,7 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) seq->minv = strtoi64(PQgetvalue(res, 0, 4), NULL, 10); seq->cache = strtoi64(PQgetvalue(res, 0, 5), NULL, 10); seq->cycled = (strcmp(PQgetvalue(res, 0, 6), "t") == 0); + seq->seqam = NULL; PQclear(res); } @@ -18724,6 +18750,7 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) ARCHIVE_OPTS(.tag = tbinfo->dobj.name, .namespace = tbinfo->dobj.namespace->dobj.name, .owner = tbinfo->rolname, + .sequenceam = seq->seqam, .description = "SEQUENCE", .section = SECTION_PRE_DATA, .createStmt = query->data, diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 946a6d0fafc6..f8c6a74320ad 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -93,6 +93,7 @@ static int disable_dollar_quoting = 0; static int disable_triggers = 0; static int if_exists = 0; static int inserts = 0; +static int no_sequence_access_method = 0; static int no_table_access_method = 0; static int no_tablespaces = 0; static int use_setsessauth = 0; @@ -164,6 +165,7 @@ main(int argc, char *argv[]) {"if-exists", no_argument, &if_exists, 1}, {"inserts", no_argument, &inserts, 1}, {"lock-wait-timeout", required_argument, NULL, 2}, + {"no-sequence-access-method", no_argument, &no_sequence_access_method, 1}, {"no-table-access-method", no_argument, &no_table_access_method, 1}, {"no-tablespaces", no_argument, &no_tablespaces, 1}, {"quote-all-identifiers", no_argument, "e_all_identifiers, 1}, @@ -465,6 +467,8 @@ main(int argc, char *argv[]) appendPQExpBufferStr(pgdumpopts, " --disable-triggers"); if (inserts) appendPQExpBufferStr(pgdumpopts, " --inserts"); + if (no_sequence_access_method) + appendPQExpBufferStr(pgdumpopts, " --no-sequence-access-method"); if (no_table_access_method) appendPQExpBufferStr(pgdumpopts, " --no-table-access-method"); if (no_tablespaces) @@ -738,6 +742,7 @@ help(void) printf(_(" --no-statistics do not dump statistics\n")); printf(_(" --no-subscriptions do not dump subscriptions\n")); printf(_(" --no-sync do not wait for changes to be written safely to disk\n")); + printf(_(" --no-sequence-access-method do not dump sequence access methods\n")); printf(_(" --no-table-access-method do not dump table access methods\n")); printf(_(" --no-tablespaces do not dump tablespace assignments\n")); printf(_(" --no-toast-compression do not dump TOAST compression methods\n")); diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index f2182e918256..73826fed9803 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -98,6 +98,7 @@ main(int argc, char **argv) static int enable_row_security = 0; static int if_exists = 0; static int no_data_for_failed_tables = 0; + static int outputNoSequenceAm = 0; static int outputNoTableAm = 0; static int outputNoTablespaces = 0; static int use_setsessauth = 0; @@ -154,6 +155,7 @@ main(int argc, char **argv) {"enable-row-security", no_argument, &enable_row_security, 1}, {"if-exists", no_argument, &if_exists, 1}, {"no-data-for-failed-tables", no_argument, &no_data_for_failed_tables, 1}, + {"no-sequence-access-method", no_argument, &outputNoSequenceAm, 1}, {"no-table-access-method", no_argument, &outputNoTableAm, 1}, {"no-tablespaces", no_argument, &outputNoTablespaces, 1}, {"role", required_argument, NULL, 2}, @@ -459,6 +461,7 @@ main(int argc, char **argv) opts->disable_triggers = disable_triggers; opts->enable_row_security = enable_row_security; opts->noDataForFailedTables = no_data_for_failed_tables; + opts->noSequenceAm = outputNoSequenceAm; opts->noTableAm = outputNoTableAm; opts->noTablespace = outputNoTablespaces; opts->use_setsessauth = use_setsessauth; @@ -702,6 +705,7 @@ usage(const char *progname) printf(_(" --no-security-labels do not restore security labels\n")); printf(_(" --no-statistics do not restore statistics\n")); printf(_(" --no-subscriptions do not restore subscriptions\n")); + printf(_(" --no-sequence-access-method do not restore sequence access methods\n")); printf(_(" --no-table-access-method do not restore table access methods\n")); printf(_(" --no-tablespaces do not restore tablespace assignments\n")); printf(_(" --section=SECTION restore named section (pre-data, data, or post-data)\n")); diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 55d892d9c162..6c913d568b44 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -604,6 +604,13 @@ 'postgres', ], }, + no_sequence_access_method => { + dump_cmd => [ + 'pg_dump', '--no-sync', + "--file=$tempdir/no_sequence_access_method.sql", + '--no-sequence-access-method', 'postgres', + ], + }, no_table_access_method => { dump_cmd => [ 'pg_dump', '--no-sync', @@ -822,6 +829,7 @@ no_policies => 1, no_privs => 1, no_statistics => 1, + no_sequence_access_method => 1, no_table_access_method => 1, pg_dumpall_dbprivs => 1, pg_dumpall_exclude => 1, @@ -4773,6 +4781,18 @@ }, }, + 'CREATE ACCESS METHOD regress_test_sequence_am' => { + create_order => 11, + create_sql => + 'CREATE ACCESS METHOD regress_sequence_am TYPE SEQUENCE HANDLER seq_local_sequenceam_handler;', + regexp => qr/^ + \QCREATE ACCESS METHOD regress_sequence_am TYPE SEQUENCE HANDLER seq_local_sequenceam_handler;\E + \n/xm, + like => { + %full_runs, section_pre_data => 1, + }, + }, + # It's a bit tricky to ensure that the proper SET of default table # AM occurs. To achieve that we create a table with the standard # AM, test AM, standard AM. That guarantees that there needs to be @@ -4801,6 +4821,35 @@ }, }, + + # This uses the same trick as for materialized views and tables, + # but this time with a sequence access method, checking that a + # correct set of SET queries are created. + 'CREATE SEQUENCE regress_pg_dump_seq_am' => { + create_order => 12, + create_sql => ' + CREATE SEQUENCE dump_test.regress_pg_dump_seq_am_0 USING seqlocal; + CREATE SEQUENCE dump_test.regress_pg_dump_seq_am_1 USING regress_sequence_am; + CREATE SEQUENCE dump_test.regress_pg_dump_seq_am_2 USING seqlocal;', + regexp => qr/^ + \QSET default_sequence_access_method = regress_sequence_am;\E + (\n(?!SET[^;]+;)[^\n]*)* + \n\QCREATE SEQUENCE dump_test.regress_pg_dump_seq_am_1\E + \n\s+\QSTART WITH 1\E + \n\s+\QINCREMENT BY 1\E + \n\s+\QNO MINVALUE\E + \n\s+\QNO MAXVALUE\E + \n\s+\QCACHE 1;\E\n/xm, + like => { + %full_runs, %dump_test_schema_runs, section_pre_data => 1, + }, + unlike => { + exclude_dump_test_schema => 1, + no_sequence_access_method => 1, + only_dump_measurement => 1, + }, + }, + 'CREATE MATERIALIZED VIEW regress_pg_dump_matview_am' => { create_order => 13, create_sql => ' From 7019977012a77294c0d5dbddbdaf1a4358c979ef Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 1 Dec 2023 12:55:56 +0900 Subject: [PATCH 6/7] Sequence access methods - core documentation --- doc/src/sgml/config.sgml | 16 +++++ doc/src/sgml/filelist.sgml | 1 + doc/src/sgml/postgres.sgml | 1 + doc/src/sgml/ref/create_access_method.sgml | 15 ++-- doc/src/sgml/ref/create_sequence.sgml | 12 ++++ doc/src/sgml/sequenceam.sgml | 80 ++++++++++++++++++++++ 6 files changed, 119 insertions(+), 6 deletions(-) create mode 100644 doc/src/sgml/sequenceam.sgml diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index fd6e3e028907..659a37960579 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -9752,6 +9752,22 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + default_sequence_access_method (string) + + default_sequence_access_method configuration parameter + + + + + This parameter specifies the default sequence access method to use when + creating sequences if the CREATE SEQUENCE + command does not explicitly specify an access method. The default is + local. + + + + default_tablespace (string) diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index fef9584f908e..a80de08dd26e 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -95,6 +95,7 @@ + diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml index af476c82fcc1..9f369cc89930 100644 --- a/doc/src/sgml/postgres.sgml +++ b/doc/src/sgml/postgres.sgml @@ -258,6 +258,7 @@ break is not needed in a wider output rendering. &geqo; &tableam; &indexam; + &sequenceam; &wal-for-extensions; &indextypes; &storage; diff --git a/doc/src/sgml/ref/create_access_method.sgml b/doc/src/sgml/ref/create_access_method.sgml index dae43dbaed58..3067dc4d4df0 100644 --- a/doc/src/sgml/ref/create_access_method.sgml +++ b/doc/src/sgml/ref/create_access_method.sgml @@ -61,8 +61,8 @@ CREATE ACCESS METHOD name This clause specifies the type of access method to define. - Only TABLE and INDEX - are supported at present. + Only TABLE, SEQUENCE and + INDEX are supported at present. @@ -77,12 +77,15 @@ CREATE ACCESS METHOD name declared to take a single argument of type internal, and its return type depends on the type of access method; for TABLE access methods, it must - be table_am_handler and for INDEX - access methods, it must be index_am_handler. + be table_am_handler; for INDEX + access methods, it must be index_am_handler; + for SEQUENCE, it must be + sequence_am_handler; The C-level API that the handler function must implement varies depending on the type of access method. The table access method API - is described in and the index access method - API is described in . + is described in , the index access method + API is described in and the sequence access + method is described in . diff --git a/doc/src/sgml/ref/create_sequence.sgml b/doc/src/sgml/ref/create_sequence.sgml index 1e283f13d15c..52c6096e4ba2 100644 --- a/doc/src/sgml/ref/create_sequence.sgml +++ b/doc/src/sgml/ref/create_sequence.sgml @@ -29,6 +29,7 @@ CREATE [ { TEMPORARY | TEMP } | UNLOGGED ] SEQUENCE [ IF NOT EXISTS ] start ] [ CACHE cache ] [ OWNED BY { table_name.column_name | NONE } ] + [ USING access_method ] @@ -263,6 +264,17 @@ SELECT * FROM name; + + + USING access_method + + + The USING option specifies which sequence access + method will be used when generating the sequence numbers. The default + is local. + + + diff --git a/doc/src/sgml/sequenceam.sgml b/doc/src/sgml/sequenceam.sgml new file mode 100644 index 000000000000..a96170bfac03 --- /dev/null +++ b/doc/src/sgml/sequenceam.sgml @@ -0,0 +1,80 @@ + + + + Sequence Access Method Interface Definition + + + Sequence Access Method + + + sequenceam + Sequence Access Method + + + + This chapter explains the interface between the core + PostgreSQL system and sequence access + methods, which manage the operations around sequences . The core + system knows little about these access methods beyond what is specified here, + so it is possible to develop entirely new access method types by writing + add-on code. + + + + Each sequence access method is described by a row in the + pg_am system + catalog. The pg_am entry specifies a name and a + handler function for the sequence access method. These + entries can be created and deleted using the + and + SQL commands. + + + + A sequence access method handler function must be declared to accept a single + argument of type internal and to return the pseudo-type + sequence_am_handler. The argument is a dummy value that simply + serves to prevent handler functions from being called directly from SQL commands. + + The result of the function must be a pointer to a struct of type + SequenceAmRoutine, which contains everything that the + core code needs to know to make use of the sequence access method. The return + value needs to be of server lifetime, which is typically achieved by + defining it as a static const variable in global + scope. The SequenceAmRoutine struct, also called the + access method's API struct, defines the behavior of + the access method using callbacks. These callbacks are pointers to plain C + functions and are not visible or callable at the SQL level. All the + callbacks and their behavior is defined in the + SequenceAmRoutine structure (with comments inside + the struct defining the requirements for callbacks). Most callbacks have + wrapper functions, which are documented from the point of view of a user + (rather than an implementor) of the sequence access method. For details, + please refer to the + src/include/access/sequenceam.h file. + + + + Currently, the way a sequence access method stores data is fairly + unconstrained, and it is possible to use a predefined + Table Access Method to store sequence + data. + + + + For crash safety, a sequence access method can use + WAL, or a custom + implementation. + If WAL is chosen, either + Generic WAL Records can be used, or a + Custom WAL Resource Manager can be + implemented. + + + + Any developer of a new sequence access method can refer to + the existing local implementation present in + src/backend/access/sequence/local.c for details of + its implementation. + + From 56223d1ee299d5295e200db52924d8b92d7a703e Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 26 Apr 2024 15:09:00 +0900 Subject: [PATCH 7/7] snowflake: Add sequence AM based on it This includes documentation and a basic implementation, though this is in a very early stage. --- contrib/Makefile | 1 + contrib/meson.build | 1 + contrib/snowflake/.gitignore | 3 + contrib/snowflake/Makefile | 19 + contrib/snowflake/expected/snowflake.out | 73 +++ contrib/snowflake/meson.build | 34 ++ contrib/snowflake/snowflake--1.0.sql | 21 + contrib/snowflake/snowflake.c | 570 +++++++++++++++++++++++ contrib/snowflake/snowflake.control | 5 + contrib/snowflake/sql/snowflake.sql | 29 ++ doc/src/sgml/contrib.sgml | 1 + doc/src/sgml/filelist.sgml | 1 + doc/src/sgml/snowflake.sgml | 100 ++++ 13 files changed, 858 insertions(+) create mode 100644 contrib/snowflake/.gitignore create mode 100644 contrib/snowflake/Makefile create mode 100644 contrib/snowflake/expected/snowflake.out create mode 100644 contrib/snowflake/meson.build create mode 100644 contrib/snowflake/snowflake--1.0.sql create mode 100644 contrib/snowflake/snowflake.c create mode 100644 contrib/snowflake/snowflake.control create mode 100644 contrib/snowflake/sql/snowflake.sql create mode 100644 doc/src/sgml/snowflake.sgml diff --git a/contrib/Makefile b/contrib/Makefile index 2f0a88d3f774..26cf6e94ff10 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -44,6 +44,7 @@ SUBDIRS = \ pg_walinspect \ postgres_fdw \ seg \ + snowflake \ spi \ tablefunc \ tcn \ diff --git a/contrib/meson.build b/contrib/meson.build index ed30ee7d639f..e3bd2b464172 100644 --- a/contrib/meson.build +++ b/contrib/meson.build @@ -59,6 +59,7 @@ subdir('pg_walinspect') subdir('postgres_fdw') subdir('seg') subdir('sepgsql') +subdir('snowflake') subdir('spi') subdir('sslinfo') # start-scripts doesn't contain build products diff --git a/contrib/snowflake/.gitignore b/contrib/snowflake/.gitignore new file mode 100644 index 000000000000..44d119cfcc24 --- /dev/null +++ b/contrib/snowflake/.gitignore @@ -0,0 +1,3 @@ +# Generated subdirectories +/log/ +/results/ diff --git a/contrib/snowflake/Makefile b/contrib/snowflake/Makefile new file mode 100644 index 000000000000..fa5b48d565d8 --- /dev/null +++ b/contrib/snowflake/Makefile @@ -0,0 +1,19 @@ +# contrib/snowflake/Makefile + +MODULES = snowflake + +EXTENSION = snowflake +DATA = snowflake--1.0.sql + +REGRESS = snowflake + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/snowflake +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/snowflake/expected/snowflake.out b/contrib/snowflake/expected/snowflake.out new file mode 100644 index 000000000000..b7e469bf7384 --- /dev/null +++ b/contrib/snowflake/expected/snowflake.out @@ -0,0 +1,73 @@ +CREATE EXTENSION snowflake; +CREATE SEQUENCE snowflake_seq USING snowflake; +SET snowflake.machine_id = 2000; -- error +ERROR: 2000 is outside the valid range for parameter "snowflake.machine_id" (0 .. 1023) +SET snowflake.machine_id = 4; -- ok +SELECT machine, counter FROM snowflake_get(nextval('snowflake_seq')); + machine | counter +---------+--------- + 4 | 2 +(1 row) + +SELECT machine, counter FROM snowflake_get(lastval()); + machine | counter +---------+--------- + 4 | 2 +(1 row) + +SELECT machine, counter FROM snowflake_get(nextval('snowflake_seq')); + machine | counter +---------+--------- + 4 | 3 +(1 row) + +SELECT machine, counter FROM snowflake_get(currval('snowflake_seq')); + machine | counter +---------+--------- + 4 | 3 +(1 row) + +-- Sequence relation exists, is unlogged and remains unlogged. +SELECT * FROM snowflake_seq; + count | is_called +-------+----------- + 3 | t +(1 row) + +ALTER SEQUENCE snowflake_seq SET LOGGED; +SELECT relpersistence FROM pg_class where relname = 'snowflake_seq'; + relpersistence +---------------- + u +(1 row) + +ALTER SEQUENCE snowflake_seq RESTART; +SELECT * FROM snowflake_seq; + count | is_called +-------+----------- + 1 | f +(1 row) + +-- Identity column, where cache affects value. +SET default_sequence_access_method = 'snowflake'; +CREATE TABLE snowflake_tab (a int GENERATED ALWAYS AS IDENTITY, b int); +INSERT INTO snowflake_tab VALUES (DEFAULT, generate_series(1, 10)); +SELECT data.machine, data.counter + FROM snowflake_tab, LATERAL snowflake_get(a) AS data; + machine | counter +---------+--------- + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 5 + 4 | 6 + 4 | 7 + 4 | 8 + 4 | 9 + 4 | 10 + 4 | 11 +(10 rows) + +DROP TABLE snowflake_tab; +DROP SEQUENCE snowflake_seq; +DROP EXTENSION snowflake; diff --git a/contrib/snowflake/meson.build b/contrib/snowflake/meson.build new file mode 100644 index 000000000000..567669eea790 --- /dev/null +++ b/contrib/snowflake/meson.build @@ -0,0 +1,34 @@ +# Copyright (c) 2024, PostgreSQL Global Development Group + +snowflake_sources = files( + 'snowflake.c', +) + +if host_system == 'windows' + snowflake_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'snowflake', + '--FILEDESC', 'snowflake - sequence access method',]) +endif + +snowflake = shared_module('snowflake', + snowflake_sources, + kwargs: contrib_mod_args, +) +contrib_targets += bloom + +install_data( + 'snowflake.control', + 'snowflake--1.0.sql', + kwargs: contrib_data_args, +) + +tests += { + 'name': 'snowflake', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'snowflake', + ], + }, +} diff --git a/contrib/snowflake/snowflake--1.0.sql b/contrib/snowflake/snowflake--1.0.sql new file mode 100644 index 000000000000..bcb9d754f1b4 --- /dev/null +++ b/contrib/snowflake/snowflake--1.0.sql @@ -0,0 +1,21 @@ +/* contrib/snowflake/snowflake--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION snowflake" to load this file. \quit + +CREATE FUNCTION snowflake_sequenceam_handler(internal) + RETURNS sequence_am_handler + AS 'MODULE_PATHNAME' + LANGUAGE C; + +CREATE ACCESS METHOD snowflake + TYPE SEQUENCE HANDLER snowflake_sequenceam_handler; +COMMENT ON ACCESS METHOD snowflake IS 'snowflake sequence access method'; + +CREATE FUNCTION snowflake_get(IN raw int8, + OUT time_ms int8, + OUT machine int4, + OUT counter int4) + RETURNS record + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT diff --git a/contrib/snowflake/snowflake.c b/contrib/snowflake/snowflake.c new file mode 100644 index 000000000000..be4c4039ecd0 --- /dev/null +++ b/contrib/snowflake/snowflake.c @@ -0,0 +1,570 @@ +/*------------------------------------------------------------------------- + * + * snowflake.c + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * contrib/snowflake/snowflake.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/generic_xlog.h" +#include "access/sequenceam.h" +#include "access/xact.h" +#include "catalog/storage_xlog.h" +#include "commands/tablecmds.h" +#include "fmgr.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "storage/bufmgr.h" +#include "utils/guc.h" +#include "utils/numeric.h" + +PG_MODULE_MAGIC; + +/* "special area" of a snowflake's buffer page. */ +#define SNOWFLAKE_MAGIC 0x01 + +typedef struct snowflake_magic +{ + uint32 magic; +} snowflake_magic; + +/* ----------------------------------------------------------------------- + * Snowflake ID are 64-bit based, with the following structure: + * - 41 bits for an epoch-based timestamp, in milli-seconds. + * - 10 bits for a machine ID. + * - 12 bits for a sequence counter. + * + * The timestamp can be cut to an offset. The machine ID is controlled + * by a superuser GUC. Sequence properties apply to the sequence counter, + * as the other two are environment-dependent. + * ----------------------------------------------------------------------- + */ + +/* + * Helper routines to convert a snowflake ID from/to an int64. + */ +#define SNOWFLAKE_COUNTER_MASK 0x0000000000000FFF /* 12 bits */ +#define SNOWFLAKE_COUNTER_SHIFT 0 +#define SNOWFLAKE_MACHINE_ID_MASK 0x00000000000003FF /* 10 bits */ +#define SNOWFLAKE_MACHINE_ID_SHIFT 12 /* counter */ +#define SNOWFLAKE_TIMESTAMP_MASK 0x000001FFFFFFFFFF /* 41 bits */ +#define SNOWFLAKE_TIMESTAMP_SHIFT 22 /* machine ID + counter sizes */ + +typedef struct snowflake_id +{ + uint32 count; + uint32 machine; + uint64 time_ms; +} snowflake_id; + +#define snowflake_id_to_int64(id, raw) { \ + raw = (((id).count) & SNOWFLAKE_COUNTER_MASK) << SNOWFLAKE_COUNTER_SHIFT | \ + (((id).machine) & SNOWFLAKE_MACHINE_ID_MASK) << SNOWFLAKE_MACHINE_ID_SHIFT | \ + (((id).time_ms) & SNOWFLAKE_TIMESTAMP_MASK) << SNOWFLAKE_TIMESTAMP_SHIFT; \ +} + +#define int64_to_snowflake_id(raw, id) { \ + (id).count = ((raw) >> SNOWFLAKE_COUNTER_SHIFT) & SNOWFLAKE_COUNTER_MASK; \ + (id).machine = ((raw) >> SNOWFLAKE_MACHINE_ID_SHIFT) & SNOWFLAKE_MACHINE_ID_MASK; \ + (id).time_ms = ((raw) >> SNOWFLAKE_TIMESTAMP_SHIFT) & SNOWFLAKE_TIMESTAMP_MASK; \ +} + +/* + * Format of tuples stored in heap table associated to snowflake sequence. + */ +typedef struct FormData_snowflake_data +{ + /* enough to cover 12 bits of the internal counter */ + int16 count; + bool is_called; +} FormData_snowflake_data; + +typedef FormData_snowflake_data *Form_snowflake_data; + +/* + * Columns of a snowflake sequence relation. + */ +#define SNOWFLAKE_COL_COUNT 1 +#define SNOWFLAKE_COL_CALLED 2 + +#define SNOWFLAKE_COLS 2 + +/* GUC parameter */ +static int snowflake_machine_id = 1; + +PG_FUNCTION_INFO_V1(snowflake_sequenceam_handler); + +/* ----------------------------------------------------------------------- + * Interfaces for relation manipulation. + * ----------------------------------------------------------------------- + */ + +/* + * Initialize snowflake relation's fork with some data. + */ +static void +fill_snowflake_fork(Relation rel, HeapTuple tuple, ForkNumber forkNum) +{ + Buffer buf; + Page page; + snowflake_magic *sm; + OffsetNumber offnum; + GenericXLogState *state = NULL; + + /* Initialize first page of relation with special magic number */ + + buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL, + EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); + Assert(BufferGetBlockNumber(buf) == 0); + + page = BufferGetPage(buf); + + PageInit(page, BufferGetPageSize(buf), sizeof(snowflake_magic)); + sm = (snowflake_magic *) PageGetSpecialPointer(page); + sm->magic = SNOWFLAKE_MAGIC; + + /* Now insert sequence tuple */ + + /* + * Since VACUUM does not process sequences, we have to force the tuple to + * have xmin = FrozenTransactionId now. Otherwise it would become + * invisible to SELECTs after 2G transactions. It is okay to do this + * because if the current transaction aborts, no other xact will ever + * examine the sequence tuple anyway. + */ + HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); + HeapTupleHeaderSetXminFrozen(tuple->t_data); + HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); + HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); + tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; + ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); + + /* + * Initialize before entering in the critical section, as this does + * allocations. + */ + if (forkNum == INIT_FORKNUM) + state = GenericXLogStart(rel); + + START_CRIT_SECTION(); + + MarkBufferDirty(buf); + + offnum = PageAddItem(page, (Item) tuple->t_data, tuple->t_len, + InvalidOffsetNumber, false, false); + if (offnum != FirstOffsetNumber) + elog(ERROR, "failed to add sequence tuple to page"); + + /* + * Init forks have to be logged. These go through generic WAL records + * for simplicity's sake to save from the need of a custom RMGR. + */ + if (forkNum == INIT_FORKNUM) + { + (void) GenericXLogRegisterBuffer(state, buf, GENERIC_XLOG_FULL_IMAGE); + GenericXLogFinish(state); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); +} + +/* + * Initialize snowflake relation. + * + * This needs to handle both the initial and main forks. + */ +static void +fill_snowflake(Relation rel, HeapTuple tuple) +{ + SMgrRelation srel; + + Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED); + + fill_snowflake_fork(rel, tuple, MAIN_FORKNUM); + + /* init fork */ + srel = smgropen(rel->rd_locator, INVALID_PROC_NUMBER); + smgrcreate(srel, INIT_FORKNUM, false); + log_smgrcreate(&rel->rd_locator, INIT_FORKNUM); + fill_snowflake_fork(rel, tuple, INIT_FORKNUM); + FlushRelationBuffers(rel); + smgrclose(srel); +} + +/* + * Read the current state of a snowflake sequence + * + * Given an opened sequence relation, lock the page buffer and find the tuple. + * + * *buf receives the reference to the pinned-and-ex-locked buffer. + * *seqdatatuple receives the reference to the sequence tuple proper. + * + * Returns value points to the data payload of the tuple. + */ +static Form_snowflake_data +read_snowflake(Relation rel, Buffer *buf, HeapTuple seqdatatuple) +{ + Page page; + ItemId lp; + snowflake_magic *sm; + Form_snowflake_data seq; + + *buf = ReadBuffer(rel, 0); + LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(*buf); + sm = (snowflake_magic *) PageGetSpecialPointer(page); + + if (sm->magic != SNOWFLAKE_MAGIC) + elog(ERROR, "bad magic number in sequence \"%s\": %08X", + RelationGetRelationName(rel), sm->magic); + + lp = PageGetItemId(page, FirstOffsetNumber); + Assert(ItemIdIsNormal(lp)); + + /* Note we currently only bother to set these two fields of *seqdatatuple */ + seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); + seqdatatuple->t_len = ItemIdGetLength(lp); + + /* + * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on + * a sequence, which would leave a non-frozen XID in the sequence tuple's + * xmax, which eventually leads to clog access failures or worse. If we + * see this has happened, clean up after it. We treat this like a hint + * bit update, ie, don't bother to WAL-log it, since we can certainly do + * this again if the update gets lost. + */ + Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); + if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) + { + HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); + seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; + seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; + MarkBufferDirtyHint(*buf, true); + } + + seq = (Form_snowflake_data) GETSTRUCT(seqdatatuple); + + return seq; +} + + +/* ------------------------------------------------------------------------ + * Callbacks for the snowflake sequence access method. + * ------------------------------------------------------------------------ + */ + +/* + * Return the table access method used by this sequence. + * + * This is just an on-memory sequence, so anything is fine. + */ +static const char * +snowflake_sequenceam_get_table_am(void) +{ + return "heap"; +} + +/* + * snowflake_sequenceam_init + * + * Initialize relation of a snowflake sequence. This stores the sequence + * counter in an unlogged relation as timestamps ensure value unicity. + */ +static void +snowflake_sequenceam_init(Relation rel, int64 last_value, bool is_called) +{ + Datum values[SNOWFLAKE_COLS]; + bool nulls[SNOWFLAKE_COLS]; + int16 counter; + TupleDesc tupdesc; + HeapTuple tuple; + List *elts = NIL; + ListCell *lc; + ColumnDef *coldef = NULL; + AlterTableCmd *atcmd; + List *atcmds = NIL; + + /* Adjust last_value, depending on the defaults given */ + counter = ((int16) last_value) & SNOWFLAKE_COUNTER_MASK; + + /* + * Create unlogged relation with its attributes. + */ + coldef = makeColumnDef("count", INT2OID, -1, InvalidOid); + coldef->is_not_null = true; + elts = lappend(elts, coldef); + coldef = makeColumnDef("is_called", BOOLOID, -1, InvalidOid); + coldef->is_not_null = true; + elts = lappend(elts, coldef); + + foreach(lc, elts) + { + atcmd = makeNode(AlterTableCmd); + atcmd->subtype = AT_AddColumnToSequence; + atcmd->def = (Node *) lfirst(lc); + atcmds = lappend(atcmds, atcmd); + } + + /* + * No recursion needed. Note that EventTriggerAlterTableStart() should + * have been called. + */ + AlterTableInternal(RelationGetRelid(rel), atcmds, false); + CommandCounterIncrement(); + + /* + * Switch the relation to be unlogged. This forces a rewrite, but + * the relation is empty so that's OK. + */ + RelationSetNewRelfilenumber(rel, RELPERSISTENCE_UNLOGGED); + + /* And insert its first tuple */ + values[0] = Int16GetDatum(counter); + nulls[0] = false; + values[1] = BoolGetDatum(is_called); + nulls[1] = false; + + tupdesc = RelationGetDescr(rel); + tuple = heap_form_tuple(tupdesc, values, nulls); + fill_snowflake(rel, tuple); +} + +/* + * snowflake_sequenceam_nextval + * + * Return the next value for a snowflake sequence. + */ +static int64 +snowflake_sequenceam_nextval(Relation rel, int64 incby, int64 maxv, + int64 minv, int64 cache, bool cycle, + int64 *last) +{ + Buffer buf; + Form_snowflake_data seq; + HeapTupleData seqdatatuple; + int64 result = 0; + snowflake_id id = {0}; + struct timeval tp; + + /* lock page buffer and read tuple */ + seq = read_snowflake(rel, &buf, &seqdatatuple); + + /* + * The logic here is quite simple, increment the counter until its + * threshold is reached and get back to the start. If the threshold + * is reached, wait 1ms to ensure a unique timestamp. There is no + * need to do a retry as the buffer is already locked. + */ + id.count = seq->count; + id.count++; + + if (id.count > (PG_INT16_MAX & SNOWFLAKE_COUNTER_MASK)) + { + /* + * Threshold reached, so wait a bit for force clock to a new + * timestamp. + */ + id.count = 1; + pg_usleep(1000L); /* 1ms */ + } + + /* Compute timestamp, with buffer locked */ + gettimeofday(&tp, NULL); + id.time_ms = (uint64) tp.tv_sec * 1000 + + tp.tv_usec / 1000; + + /* Machine ID */ + id.machine = snowflake_machine_id; + + /* ready to change the on-disk (or really, in-buffer) tuple */ + START_CRIT_SECTION(); + seq->count = id.count; + seq->is_called = true; + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); + + /* Store the last value computed for lastval() */ + snowflake_id_to_int64(id, result); + *last = result; + return result; +} + +/* + * snowflake_sequenceam_setval + * + * Set the sequence value, manipulating only the sequence counter. + */ +static void +snowflake_sequenceam_setval(Relation rel, int64 next, bool iscalled) +{ + Buffer buf; + HeapTupleData seqdatatuple; + Form_snowflake_data seq; + + /* lock page buffer and read tuple */ + seq = read_snowflake(rel, &buf, &seqdatatuple); + + /* Change the in-buffer tuple */ + START_CRIT_SECTION(); + seq->count = (next & SNOWFLAKE_COUNTER_MASK); + seq->is_called = iscalled; + MarkBufferDirty(buf); + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); +} + +/* + * snowflake_sequenceam_get_state + * + * Return the last sequence counter value. + */ +static void +snowflake_sequenceam_get_state(Relation rel, int64 *last_value, bool *is_called) +{ + Buffer buf; + HeapTupleData seqdatatuple; + Form_snowflake_data seq; + + seq = read_snowflake(rel, &buf, &seqdatatuple); + *last_value = seq->count; + *is_called = seq->is_called; + UnlockReleaseBuffer(buf); +} + +/* + * snowflake_sequenceam_reset + * + * Reset the sequence, coming down to resetting its counter. + */ +static void +snowflake_sequenceam_reset(Relation rel, int64 startv, bool is_called, + bool reset_state) +{ + HeapTupleData seqdatatuple; + HeapTuple tuple; + Form_snowflake_data seq; + Buffer buf; + + /* lock buffer page and read tuple */ + (void) read_snowflake(rel, &buf, &seqdatatuple); + + /* copy the existing tuple */ + tuple = heap_copytuple(&seqdatatuple); + + /* Now we're done with the old page */ + UnlockReleaseBuffer(buf); + + /* + * Modify the copied tuple to execute the restart (compare the RESTART + * action in AlterSequence) + */ + seq = (Form_snowflake_data) GETSTRUCT(tuple); + seq->count = (startv & SNOWFLAKE_COUNTER_MASK); + seq->is_called = is_called; + + /* create new storage */ + RelationSetNewRelfilenumber(rel, rel->rd_rel->relpersistence); + + /* insert the modified tuple into the page */ + fill_snowflake(rel, tuple); +} + +/* + * snowflake_sequenceam_change_persistence + * + * There is nothing to do here, the underneath relation has to remain + * unlogged and is set as such when creating the sequence. + */ +static void +snowflake_sequenceam_change_persistence(Relation rel, char newrelpersistence) +{ + /* Nothing to do here */ +} + +/* ------------------------------------------------------------------------ + * Definition of the snowflake sequence access method. + * ------------------------------------------------------------------------ + */ + +static const SequenceAmRoutine snowflake_sequenceam_methods = { + .type = T_SequenceAmRoutine, + .get_table_am = snowflake_sequenceam_get_table_am, + .init = snowflake_sequenceam_init, + .nextval = snowflake_sequenceam_nextval, + .setval = snowflake_sequenceam_setval, + .get_state = snowflake_sequenceam_get_state, + .reset = snowflake_sequenceam_reset, + .change_persistence = snowflake_sequenceam_change_persistence +}; + +Datum +snowflake_sequenceam_handler(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(&snowflake_sequenceam_methods); +} + +/* Utility functions */ + +/* + * snowflake_get + * + * Return a tuple worth of snowflake ID data, in a readable shape. + */ +PG_FUNCTION_INFO_V1(snowflake_get); +Datum +snowflake_get(PG_FUNCTION_ARGS) +{ +#define SNOWFLAKE_GET_COLS 3 + int64 raw = PG_GETARG_INT64(0); + Datum *values; + bool *nulls; + TupleDesc tupdesc; + snowflake_id id; + + /* determine result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + int64_to_snowflake_id(raw, id); + + nulls = palloc0(sizeof(bool) * tupdesc->natts); + values = palloc0(sizeof(Datum) * tupdesc->natts); + + values[0] = Int64GetDatum(id.time_ms); + values[1] = Int32GetDatum(id.machine); + values[2] = Int32GetDatum(id.count); + + /* Returns the record as Datum */ + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} + +/* + * Entry point when loading extension. + */ +void +_PG_init(void) +{ + DefineCustomIntVariable("snowflake.machine_id", + "Machine ID to use with snowflake sequence.", + "Default value is 1.", + &snowflake_machine_id, + 1, 0, 1023, /* 10 bits as max */ + PGC_SUSET, + 0, NULL, NULL, NULL); +} diff --git a/contrib/snowflake/snowflake.control b/contrib/snowflake/snowflake.control new file mode 100644 index 000000000000..7b8c6089c25f --- /dev/null +++ b/contrib/snowflake/snowflake.control @@ -0,0 +1,5 @@ +# snowflake extension +comment = 'snowflake - sequence access method' +default_version = '1.0' +module_pathname = '$libdir/snowflake' +relocatable = true diff --git a/contrib/snowflake/sql/snowflake.sql b/contrib/snowflake/sql/snowflake.sql new file mode 100644 index 000000000000..395d166ba4bc --- /dev/null +++ b/contrib/snowflake/sql/snowflake.sql @@ -0,0 +1,29 @@ +CREATE EXTENSION snowflake; + +CREATE SEQUENCE snowflake_seq USING snowflake; + +SET snowflake.machine_id = 2000; -- error +SET snowflake.machine_id = 4; -- ok +SELECT machine, counter FROM snowflake_get(nextval('snowflake_seq')); +SELECT machine, counter FROM snowflake_get(lastval()); +SELECT machine, counter FROM snowflake_get(nextval('snowflake_seq')); +SELECT machine, counter FROM snowflake_get(currval('snowflake_seq')); + +-- Sequence relation exists, is unlogged and remains unlogged. +SELECT * FROM snowflake_seq; +ALTER SEQUENCE snowflake_seq SET LOGGED; +SELECT relpersistence FROM pg_class where relname = 'snowflake_seq'; + +ALTER SEQUENCE snowflake_seq RESTART; +SELECT * FROM snowflake_seq; + +-- Identity column, where cache affects value. +SET default_sequence_access_method = 'snowflake'; +CREATE TABLE snowflake_tab (a int GENERATED ALWAYS AS IDENTITY, b int); +INSERT INTO snowflake_tab VALUES (DEFAULT, generate_series(1, 10)); +SELECT data.machine, data.counter + FROM snowflake_tab, LATERAL snowflake_get(a) AS data; +DROP TABLE snowflake_tab; + +DROP SEQUENCE snowflake_seq; +DROP EXTENSION snowflake; diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index 24b706b29adc..5e26d5baacc9 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -168,6 +168,7 @@ CREATE EXTENSION extension_name; &seg; &sepgsql; &contrib-spi; + &snowflake; &sslinfo; &tablefunc; &tcn; diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index a80de08dd26e..bee964856ece 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -159,6 +159,7 @@ + diff --git a/doc/src/sgml/snowflake.sgml b/doc/src/sgml/snowflake.sgml new file mode 100644 index 000000000000..060699e7ecd1 --- /dev/null +++ b/doc/src/sgml/snowflake.sgml @@ -0,0 +1,100 @@ + + + + snowflake — sequence access method + + + snowflake + + + + snowflake provides a sequence access method based on + Snowflake IDs. + + + + A Snowflake ID (or snowflake) is a unique 64-bit identifier made of three + components: + + 41 bits for a timestamp, epoch-adjusted in milli-seconds + 10 bits for machine ID + 12 bits for a sequence number + + + + + Functions + + + + + snowflake_get(raw int8) returns record + + snowflake_get + function + + + + + + Returns a record made of the timestamp in milli-seconds, the machine ID + and the sequence number for a single snowflake ID. + + + + + + + + Configuration Parameters + + + + + snowflake.machine_id + + snowflake.machine_id configuration parameter + + + + + Machine ID assigned to the snowflake IDs used in the sequence. The + default value is 1. + + + + + + + + Examples + + + This is an example of creating a snowflake sequence: + + + +CREATE SEQUENCE snowflake_seq USING snowflake; + + + + Similarly to the default sequence access method, snowflake sequences + can be queried as a table: + + + + =# SELECT * FROM snowflake_seq; + count | is_called +-------+----------- + 1 | f +(1 row) +=# SELECT to_timestamp(time_ms / 1000), machine, counter + FROM snowflake_get(nextval('snowflake_seq')); + to_timestamp | machine | counter +------------------------+---------+--------- + 2024-04-26 14:28:26+09 | 1 | 3 +(1 row) + + + +