Skip to content

Commit 93ee38e

Browse files
Teach pageinspect about nbtree deduplication.
Add a new bt_metap() column to display the metapage's allequalimage field. Also add three new columns to contrib/pageinspect's bt_page_items() function: * Add a boolean column ("dead") that displays the LP_DEAD bit value for each non-pivot tuple. * Add a TID column ("htid") that displays a single heap TID value for each tuple. This is the TID that is returned by BTreeTupleGetHeapTID(), so comparable values are shown for pivot tuples, plain non-pivot tuples, and posting list tuples. * Add a TID array column ("tids") that displays TIDs from each tuple's posting list, if any. This works just like the "tids" column from pageinspect's gin_leafpage_items() function. No version bump for the pageinspect extension, since there hasn't been a stable Postgres release since the last version bump (the last bump was part of commit 58b4cb3). Author: Peter Geoghegan Discussion: https://postgr.es/m/CAH2-WzmSMmU2eNvY9+a4MNP+z02h6sa-uxZvN3un6jY02ZVBSw@mail.gmail.com
1 parent 58c47cc commit 93ee38e

File tree

4 files changed

+273
-53
lines changed

4 files changed

+273
-53
lines changed

contrib/pageinspect/btreefuncs.c

+128-18
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@
3131
#include "access/relation.h"
3232
#include "catalog/namespace.h"
3333
#include "catalog/pg_am.h"
34+
#include "catalog/pg_type.h"
3435
#include "funcapi.h"
3536
#include "miscadmin.h"
3637
#include "pageinspect.h"
38+
#include "utils/array.h"
3739
#include "utils/builtins.h"
3840
#include "utils/rel.h"
3941
#include "utils/varlena.h"
@@ -45,6 +47,8 @@ PG_FUNCTION_INFO_V1(bt_page_stats);
4547

4648
#define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
4749
#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
50+
#define DatumGetItemPointer(X) ((ItemPointer) DatumGetPointer(X))
51+
#define ItemPointerGetDatum(X) PointerGetDatum(X)
4852

4953
/* note: BlockNumber is unsigned, hence can't be negative */
5054
#define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
@@ -243,6 +247,9 @@ struct user_args
243247
{
244248
Page page;
245249
OffsetNumber offset;
250+
bool leafpage;
251+
bool rightmost;
252+
TupleDesc tupd;
246253
};
247254

248255
/*-------------------------------------------------------
@@ -252,17 +259,25 @@ struct user_args
252259
* ------------------------------------------------------
253260
*/
254261
static Datum
255-
bt_page_print_tuples(FuncCallContext *fctx, Page page, OffsetNumber offset)
262+
bt_page_print_tuples(FuncCallContext *fctx, struct user_args *uargs)
256263
{
257-
char *values[6];
264+
Page page = uargs->page;
265+
OffsetNumber offset = uargs->offset;
266+
bool leafpage = uargs->leafpage;
267+
bool rightmost = uargs->rightmost;
268+
bool ispivottuple;
269+
Datum values[9];
270+
bool nulls[9];
258271
HeapTuple tuple;
259272
ItemId id;
260273
IndexTuple itup;
261274
int j;
262275
int off;
263276
int dlen;
264-
char *dump;
277+
char *dump,
278+
*datacstring;
265279
char *ptr;
280+
ItemPointer htid;
266281

267282
id = PageGetItemId(page, offset);
268283

@@ -272,27 +287,112 @@ bt_page_print_tuples(FuncCallContext *fctx, Page page, OffsetNumber offset)
272287
itup = (IndexTuple) PageGetItem(page, id);
273288

274289
j = 0;
275-
values[j++] = psprintf("%d", offset);
276-
values[j++] = psprintf("(%u,%u)",
277-
ItemPointerGetBlockNumberNoCheck(&itup->t_tid),
278-
ItemPointerGetOffsetNumberNoCheck(&itup->t_tid));
279-
values[j++] = psprintf("%d", (int) IndexTupleSize(itup));
280-
values[j++] = psprintf("%c", IndexTupleHasNulls(itup) ? 't' : 'f');
281-
values[j++] = psprintf("%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
290+
memset(nulls, 0, sizeof(nulls));
291+
values[j++] = DatumGetInt16(offset);
292+
values[j++] = ItemPointerGetDatum(&itup->t_tid);
293+
values[j++] = Int32GetDatum((int) IndexTupleSize(itup));
294+
values[j++] = BoolGetDatum(IndexTupleHasNulls(itup));
295+
values[j++] = BoolGetDatum(IndexTupleHasVarwidths(itup));
282296

283297
ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
284298
dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
299+
300+
/*
301+
* Make sure that "data" column does not include posting list or pivot
302+
* tuple representation of heap TID(s).
303+
*
304+
* Note: BTreeTupleIsPivot() won't work reliably on !heapkeyspace indexes
305+
* (those built before BTREE_VERSION 4), but we have no way of determining
306+
* if this page came from a !heapkeyspace index. We may only have a bytea
307+
* nbtree page image to go on, so in general there is no metapage that we
308+
* can check.
309+
*
310+
* That's okay here because BTreeTupleIsPivot() can only return false for
311+
* a !heapkeyspace pivot, never true for a !heapkeyspace non-pivot. Since
312+
* heap TID isn't part of the keyspace in a !heapkeyspace index anyway,
313+
* there cannot possibly be a pivot tuple heap TID representation that we
314+
* fail to make an adjustment for. A !heapkeyspace index can have
315+
* BTreeTupleIsPivot() return true (due to things like suffix truncation
316+
* for INCLUDE indexes in Postgres v11), but when that happens
317+
* BTreeTupleGetHeapTID() can be trusted to work reliably (i.e. return
318+
* NULL).
319+
*
320+
* Note: BTreeTupleIsPosting() always works reliably, even with
321+
* !heapkeyspace indexes.
322+
*/
323+
if (BTreeTupleIsPosting(itup))
324+
dlen -= IndexTupleSize(itup) - BTreeTupleGetPostingOffset(itup);
325+
else if (BTreeTupleIsPivot(itup) && BTreeTupleGetHeapTID(itup) != NULL)
326+
dlen -= MAXALIGN(sizeof(ItemPointerData));
327+
328+
if (dlen < 0 || dlen > INDEX_SIZE_MASK)
329+
elog(ERROR, "invalid tuple length %d for tuple at offset number %u",
330+
dlen, offset);
285331
dump = palloc0(dlen * 3 + 1);
286-
values[j] = dump;
332+
datacstring = dump;
287333
for (off = 0; off < dlen; off++)
288334
{
289335
if (off > 0)
290336
*dump++ = ' ';
291337
sprintf(dump, "%02x", *(ptr + off) & 0xff);
292338
dump += 2;
293339
}
340+
values[j++] = CStringGetTextDatum(datacstring);
341+
pfree(datacstring);
342+
343+
/*
344+
* We need to work around the BTreeTupleIsPivot() !heapkeyspace limitation
345+
* again. Deduce whether or not tuple must be a pivot tuple based on
346+
* whether or not the page is a leaf page, as well as the page offset
347+
* number of the tuple.
348+
*/
349+
ispivottuple = (!leafpage || (!rightmost && offset == P_HIKEY));
350+
351+
/* LP_DEAD bit can never be set for pivot tuples, so show a NULL there */
352+
if (!ispivottuple)
353+
values[j++] = BoolGetDatum(ItemIdIsDead(id));
354+
else
355+
{
356+
Assert(!ItemIdIsDead(id));
357+
nulls[j++] = true;
358+
}
359+
360+
htid = BTreeTupleGetHeapTID(itup);
361+
if (ispivottuple && !BTreeTupleIsPivot(itup))
362+
{
363+
/* Don't show bogus heap TID in !heapkeyspace pivot tuple */
364+
htid = NULL;
365+
}
366+
367+
if (htid)
368+
values[j++] = ItemPointerGetDatum(htid);
369+
else
370+
nulls[j++] = true;
371+
372+
if (BTreeTupleIsPosting(itup))
373+
{
374+
/* Build an array of item pointers */
375+
ItemPointer tids;
376+
Datum *tids_datum;
377+
int nposting;
378+
379+
tids = BTreeTupleGetPosting(itup);
380+
nposting = BTreeTupleGetNPosting(itup);
381+
tids_datum = (Datum *) palloc(nposting * sizeof(Datum));
382+
for (int i = 0; i < nposting; i++)
383+
tids_datum[i] = ItemPointerGetDatum(&tids[i]);
384+
values[j++] = PointerGetDatum(construct_array(tids_datum,
385+
nposting,
386+
TIDOID,
387+
sizeof(ItemPointerData),
388+
false, 's'));
389+
pfree(tids_datum);
390+
}
391+
else
392+
nulls[j++] = true;
294393

295-
tuple = BuildTupleFromCStrings(fctx->attinmeta, values);
394+
/* Build and return the result tuple */
395+
tuple = heap_form_tuple(uargs->tupd, values, nulls);
296396

297397
return HeapTupleGetDatum(tuple);
298398
}
@@ -378,12 +478,15 @@ bt_page_items(PG_FUNCTION_ARGS)
378478
elog(NOTICE, "page is deleted");
379479

380480
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
481+
uargs->leafpage = P_ISLEAF(opaque);
482+
uargs->rightmost = P_RIGHTMOST(opaque);
381483

382484
/* Build a tuple descriptor for our result type */
383485
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
384486
elog(ERROR, "return type must be a row type");
487+
tupleDesc = BlessTupleDesc(tupleDesc);
385488

386-
fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
489+
uargs->tupd = tupleDesc;
387490

388491
fctx->user_fctx = uargs;
389492

@@ -395,7 +498,7 @@ bt_page_items(PG_FUNCTION_ARGS)
395498

396499
if (fctx->call_cntr < fctx->max_calls)
397500
{
398-
result = bt_page_print_tuples(fctx, uargs->page, uargs->offset);
501+
result = bt_page_print_tuples(fctx, uargs);
399502
uargs->offset++;
400503
SRF_RETURN_NEXT(fctx, result);
401504
}
@@ -463,12 +566,15 @@ bt_page_items_bytea(PG_FUNCTION_ARGS)
463566
elog(NOTICE, "page is deleted");
464567

465568
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
569+
uargs->leafpage = P_ISLEAF(opaque);
570+
uargs->rightmost = P_RIGHTMOST(opaque);
466571

467572
/* Build a tuple descriptor for our result type */
468573
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
469574
elog(ERROR, "return type must be a row type");
575+
tupleDesc = BlessTupleDesc(tupleDesc);
470576

471-
fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
577+
uargs->tupd = tupleDesc;
472578

473579
fctx->user_fctx = uargs;
474580

@@ -480,7 +586,7 @@ bt_page_items_bytea(PG_FUNCTION_ARGS)
480586

481587
if (fctx->call_cntr < fctx->max_calls)
482588
{
483-
result = bt_page_print_tuples(fctx, uargs->page, uargs->offset);
589+
result = bt_page_print_tuples(fctx, uargs);
484590
uargs->offset++;
485591
SRF_RETURN_NEXT(fctx, result);
486592
}
@@ -510,7 +616,7 @@ bt_metap(PG_FUNCTION_ARGS)
510616
BTMetaPageData *metad;
511617
TupleDesc tupleDesc;
512618
int j;
513-
char *values[8];
619+
char *values[9];
514620
Buffer buffer;
515621
Page page;
516622
HeapTuple tuple;
@@ -557,17 +663,21 @@ bt_metap(PG_FUNCTION_ARGS)
557663

558664
/*
559665
* Get values of extended metadata if available, use default values
560-
* otherwise.
666+
* otherwise. Note that we rely on the assumption that btm_allequalimage
667+
* is initialized to zero with indexes that were built on versions prior
668+
* to Postgres 13 (just like _bt_metaversion()).
561669
*/
562670
if (metad->btm_version >= BTREE_NOVAC_VERSION)
563671
{
564672
values[j++] = psprintf("%u", metad->btm_oldest_btpo_xact);
565673
values[j++] = psprintf("%f", metad->btm_last_cleanup_num_heap_tuples);
674+
values[j++] = metad->btm_allequalimage ? "t" : "f";
566675
}
567676
else
568677
{
569678
values[j++] = "0";
570679
values[j++] = "-1";
680+
values[j++] = "f";
571681
}
572682

573683
tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),

contrib/pageinspect/expected/btree.out

+7
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ fastroot | 1
1212
fastlevel | 0
1313
oldest_xact | 0
1414
last_cleanup_num_tuples | -1
15+
allequalimage | t
1516

1617
SELECT * FROM bt_page_stats('test1_a_idx', 0);
1718
ERROR: block 0 is a meta page
@@ -41,6 +42,9 @@ itemlen | 16
4142
nulls | f
4243
vars | f
4344
data | 01 00 00 00 00 00 00 01
45+
dead | f
46+
htid | (0,1)
47+
tids |
4448

4549
SELECT * FROM bt_page_items('test1_a_idx', 2);
4650
ERROR: block number out of range
@@ -54,6 +58,9 @@ itemlen | 16
5458
nulls | f
5559
vars | f
5660
data | 01 00 00 00 00 00 00 01
61+
dead | f
62+
htid | (0,1)
63+
tids |
5764

5865
SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 2));
5966
ERROR: block number 2 is out of range for relation "test1_a_idx"

contrib/pageinspect/pageinspect--1.7--1.8.sql

+53
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,56 @@ CREATE FUNCTION heap_tuple_infomask_flags(
1414
RETURNS record
1515
AS 'MODULE_PATHNAME', 'heap_tuple_infomask_flags'
1616
LANGUAGE C STRICT PARALLEL SAFE;
17+
18+
--
19+
-- bt_metap()
20+
--
21+
DROP FUNCTION bt_metap(text);
22+
CREATE FUNCTION bt_metap(IN relname text,
23+
OUT magic int4,
24+
OUT version int4,
25+
OUT root int4,
26+
OUT level int4,
27+
OUT fastroot int4,
28+
OUT fastlevel int4,
29+
OUT oldest_xact int4,
30+
OUT last_cleanup_num_tuples real,
31+
OUT allequalimage boolean)
32+
AS 'MODULE_PATHNAME', 'bt_metap'
33+
LANGUAGE C STRICT PARALLEL SAFE;
34+
35+
--
36+
-- bt_page_items(text, int4)
37+
--
38+
DROP FUNCTION bt_page_items(text, int4);
39+
CREATE FUNCTION bt_page_items(IN relname text, IN blkno int4,
40+
OUT itemoffset smallint,
41+
OUT ctid tid,
42+
OUT itemlen smallint,
43+
OUT nulls bool,
44+
OUT vars bool,
45+
OUT data text,
46+
OUT dead boolean,
47+
OUT htid tid,
48+
OUT tids tid[])
49+
RETURNS SETOF record
50+
AS 'MODULE_PATHNAME', 'bt_page_items'
51+
LANGUAGE C STRICT PARALLEL SAFE;
52+
53+
--
54+
-- bt_page_items(bytea)
55+
--
56+
DROP FUNCTION bt_page_items(bytea);
57+
CREATE FUNCTION bt_page_items(IN page bytea,
58+
OUT itemoffset smallint,
59+
OUT ctid tid,
60+
OUT itemlen smallint,
61+
OUT nulls bool,
62+
OUT vars bool,
63+
OUT data text,
64+
OUT dead boolean,
65+
OUT htid tid,
66+
OUT tids tid[])
67+
RETURNS SETOF record
68+
AS 'MODULE_PATHNAME', 'bt_page_items_bytea'
69+
LANGUAGE C STRICT PARALLEL SAFE;

0 commit comments

Comments
 (0)