diff options
author | Teodor Sigaev | 2017-12-22 10:33:16 +0000 |
---|---|---|
committer | Teodor Sigaev | 2017-12-22 10:33:16 +0000 |
commit | 854823fa334cb826eed50da751801d0693b10173 (patch) | |
tree | 437b74de241ede698bc1f9734f40829ee648b93e | |
parent | 9373baa0f764392c504df034afd2f6b178c29491 (diff) |
Add optional compression method to SP-GiST
Patch allows to have different types of column and value stored in leaf tuples
of SP-GiST. The main application of feature is to transform complex column type
to simple indexed type or for truncating too long value, transformation could
be lossy. Simple example: polygons are converted to their bounding boxes,
this opclass follows.
Authors: me, Heikki Linnakangas, Alexander Korotkov, Nikita Glukhov
Reviewed-By: all authors + Darafei Praliaskouski
Discussions:
https://www.postgresql.org/message-id/[email protected]
https://www.postgresql.org/message-id/flat/[email protected]#[email protected]
-rw-r--r-- | doc/src/sgml/spgist.sgml | 92 | ||||
-rw-r--r-- | src/backend/access/spgist/spgdoinsert.c | 37 | ||||
-rw-r--r-- | src/backend/access/spgist/spgscan.c | 6 | ||||
-rw-r--r-- | src/backend/access/spgist/spgutils.c | 21 | ||||
-rw-r--r-- | src/backend/access/spgist/spgvalidate.c | 50 | ||||
-rw-r--r-- | src/include/access/spgist.h | 5 | ||||
-rw-r--r-- | src/include/access/spgist_private.h | 8 |
7 files changed, 182 insertions, 37 deletions
diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml index 139c8ed8f7..b4a8be476e 100644 --- a/doc/src/sgml/spgist.sgml +++ b/doc/src/sgml/spgist.sgml @@ -240,20 +240,22 @@ <para> There are five user-defined methods that an index operator class for - <acronym>SP-GiST</acronym> must provide. All five follow the convention - of accepting two <type>internal</type> arguments, the first of which is a - pointer to a C struct containing input values for the support method, - while the second argument is a pointer to a C struct where output values - must be placed. Four of the methods just return <type>void</type>, since - all their results appear in the output struct; but + <acronym>SP-GiST</acronym> must provide, and one is optional. All five + mandatory methods follow the convention of accepting two <type>internal</type> + arguments, the first of which is a pointer to a C struct containing input + values for the support method, while the second argument is a pointer to a + C struct where output values must be placed. Four of the mandatory methods just + return <type>void</type>, since all their results appear in the output struct; but <function>leaf_consistent</function> additionally returns a <type>boolean</type> result. The methods must not modify any fields of their input structs. In all cases, the output struct is initialized to zeroes before calling the - user-defined method. + user-defined method. Optional sixth method <function>compress</function> + accepts datum to be indexed as the only argument and returns value suitable + for physical storage in leaf tuple. </para> <para> - The five user-defined methods are: + The five mandatory user-defined methods are: </para> <variablelist> @@ -283,6 +285,7 @@ typedef struct spgConfigOut { Oid prefixType; /* Data type of inner-tuple prefixes */ Oid labelType; /* Data type of inner-tuple node labels */ + Oid leafType; /* Data type of leaf-tuple values */ bool canReturnData; /* Opclass can reconstruct original data */ bool longValuesOK; /* Opclass can cope with values > 1 page */ } spgConfigOut; @@ -305,6 +308,22 @@ typedef struct spgConfigOut class is capable of segmenting long values by repeated suffixing (see <xref linkend="spgist-limits"/>). </para> + + <para> + <structfield>leafType</structfield> is typically the same as + <structfield>attType</structfield>. For the reasons of backward + compatibility, method <function>config</function> can + leave <structfield>leafType</structfield> uninitialized; that would + give the same effect as setting <structfield>leafType</structfield> equal + to <structfield>attType</structfield>. When <structfield>attType</structfield> + and <structfield>leafType</structfield> are different, then optional + method <function>compress</function> must be provided. + Method <function>compress</function> is responsible + for transformation of datums to be indexed from <structfield>attType</structfield> + to <structfield>leafType</structfield>. + Note: both consistent functions will get <structfield>scankeys</structfield> + unchanged, without transformation using <function>compress</function>. + </para> </listitem> </varlistentry> @@ -380,10 +399,16 @@ typedef struct spgChooseOut } spgChooseOut; </programlisting> - <structfield>datum</structfield> is the original datum that was to be inserted - into the index. - <structfield>leafDatum</structfield> is initially the same as - <structfield>datum</structfield>, but can change at lower levels of the tree + <structfield>datum</structfield> is the original datum of + <structname>spgConfigIn</structname>.<structfield>attType</structfield> + type that was to be inserted into the index. + <structfield>leafDatum</structfield> is a value of + <structname>spgConfigOut</structname>.<structfield>leafType</structfield> + type which is initially an result of method + <function>compress</function> applied to <structfield>datum</structfield> + when method <function>compress</function> is provided, or same value as + <structfield>datum</structfield> otherwise. + <structfield>leafDatum</structfield> can change at lower levels of the tree if the <function>choose</function> or <function>picksplit</function> methods change it. When the insertion search reaches a leaf page, the current value of <structfield>leafDatum</structfield> is what will be stored @@ -418,7 +443,7 @@ typedef struct spgChooseOut Set <structfield>levelAdd</structfield> to the increment in <structfield>level</structfield> caused by descending through that node, or leave it as zero if the operator class does not use levels. - Set <structfield>restDatum</structfield> to equal <structfield>datum</structfield> + Set <structfield>restDatum</structfield> to equal <structfield>leafDatum</structfield> if the operator class does not modify datums from one level to the next, or otherwise set it to the modified value to be used as <structfield>leafDatum</structfield> at the next level. @@ -509,7 +534,9 @@ typedef struct spgPickSplitOut </programlisting> <structfield>nTuples</structfield> is the number of leaf tuples provided. - <structfield>datums</structfield> is an array of their datum values. + <structfield>datums</structfield> is an array of their datum values of + <structname>spgConfigOut</structname>.<structfield>leafType</structfield> + type. <structfield>level</structfield> is the current level that all the leaf tuples share, which will become the level of the new inner tuple. </para> @@ -624,7 +651,8 @@ typedef struct spgInnerConsistentOut <structfield>reconstructedValue</structfield> is the value reconstructed for the parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the <function>inner_consistent</function> function did not provide a value at the - parent level. + parent level. <structfield>reconstructedValue</structfield> is always of + <structname>spgConfigOut</structname>.<structfield>leafType</structfield> type. <structfield>traversalValue</structfield> is a pointer to any traverse data passed down from the previous call of <function>inner_consistent</function> on the parent index tuple, or NULL at the root level. @@ -659,6 +687,7 @@ typedef struct spgInnerConsistentOut necessarily so, so an array is used.) If value reconstruction is needed, set <structfield>reconstructedValues</structfield> to an array of the values + of <structname>spgConfigOut</structname>.<structfield>leafType</structfield> type reconstructed for each child node to be visited; otherwise, leave <structfield>reconstructedValues</structfield> as NULL. If it is desired to pass down additional out-of-band information @@ -730,7 +759,8 @@ typedef struct spgLeafConsistentOut <structfield>reconstructedValue</structfield> is the value reconstructed for the parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the <function>inner_consistent</function> function did not provide a value at the - parent level. + parent level. <structfield>reconstructedValue</structfield> is always of + <structname>spgConfigOut</structname>.<structfield>leafType</structfield> type. <structfield>traversalValue</structfield> is a pointer to any traverse data passed down from the previous call of <function>inner_consistent</function> on the parent index tuple, or NULL at the root level. @@ -739,16 +769,18 @@ typedef struct spgLeafConsistentOut <structfield>returnData</structfield> is <literal>true</literal> if reconstructed data is required for this query; this will only be so if the <function>config</function> function asserted <structfield>canReturnData</structfield>. - <structfield>leafDatum</structfield> is the key value stored in the current - leaf tuple. + <structfield>leafDatum</structfield> is the key value of + <structname>spgConfigOut</structname>.<structfield>leafType</structfield> + stored in the current leaf tuple. </para> <para> The function must return <literal>true</literal> if the leaf tuple matches the query, or <literal>false</literal> if not. In the <literal>true</literal> case, if <structfield>returnData</structfield> is <literal>true</literal> then - <structfield>leafValue</structfield> must be set to the value originally supplied - to be indexed for this leaf tuple. Also, + <structfield>leafValue</structfield> must be set to the value of + <structname>spgConfigIn</structname>.<structfield>attType</structfield> type + originally supplied to be indexed for this leaf tuple. Also, <structfield>recheck</structfield> may be set to <literal>true</literal> if the match is uncertain and so the operator(s) must be re-applied to the actual heap tuple to verify the match. @@ -757,6 +789,26 @@ typedef struct spgLeafConsistentOut </varlistentry> </variablelist> + <para> + The optional user-defined method is: + </para> + + <variablelist> + <varlistentry> + <term><function>Datum compress(Datum in)</function></term> + <listitem> + <para> + Converts the data item into a format suitable for physical storage in + a leaf tuple of index page. It accepts + <structname>spgConfigIn</structname>.<structfield>attType</structfield> + value and return + <structname>spgConfigOut</structname>.<structfield>leafType</structfield> + value. Output value should not be toasted. + </para> + </listitem> + </varlistentry> + </variablelist> + <para> All the SP-GiST support methods are normally called in a short-lived memory context; that is, <varname>CurrentMemoryContext</varname> will be reset diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c index a5f4c4059c..a8cb8c7bdc 100644 --- a/src/backend/access/spgist/spgdoinsert.c +++ b/src/backend/access/spgist/spgdoinsert.c @@ -1906,14 +1906,37 @@ spgdoinsert(Relation index, SpGistState *state, procinfo = index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC); /* - * Since we don't use index_form_tuple in this AM, we have to make sure + * Prepare the leaf datum to insert. + * + * If an optional "compress" method is provided, then call it to form + * the leaf datum from the input datum. Otherwise store the input datum as + * is. Since we don't use index_form_tuple in this AM, we have to make sure * value to be inserted is not toasted; FormIndexDatum doesn't guarantee - * that. + * that. But we assume the "compress" method to return an untoasted value. */ - if (!isnull && state->attType.attlen == -1) - datum = PointerGetDatum(PG_DETOAST_DATUM(datum)); + if (!isnull) + { + if (OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC))) + { + FmgrInfo *compressProcinfo = NULL; + + compressProcinfo = index_getprocinfo(index, 1, SPGIST_COMPRESS_PROC); + leafDatum = FunctionCall1Coll(compressProcinfo, + index->rd_indcollation[0], + datum); + } + else + { + Assert(state->attLeafType.type == state->attType.type); - leafDatum = datum; + if (state->attType.attlen == -1) + leafDatum = PointerGetDatum(PG_DETOAST_DATUM(datum)); + else + leafDatum = datum; + } + } + else + leafDatum = (Datum) 0; /* * Compute space needed for a leaf tuple containing the given datum. @@ -1923,7 +1946,7 @@ spgdoinsert(Relation index, SpGistState *state, */ if (!isnull) leafSize = SGLTHDRSZ + sizeof(ItemIdData) + - SpGistGetTypeSize(&state->attType, leafDatum); + SpGistGetTypeSize(&state->attLeafType, leafDatum); else leafSize = SGDTSIZE + sizeof(ItemIdData); @@ -2138,7 +2161,7 @@ spgdoinsert(Relation index, SpGistState *state, { leafDatum = out.result.matchNode.restDatum; leafSize = SGLTHDRSZ + sizeof(ItemIdData) + - SpGistGetTypeSize(&state->attType, leafDatum); + SpGistGetTypeSize(&state->attLeafType, leafDatum); } /* diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c index 7965b5846d..c64a174143 100644 --- a/src/backend/access/spgist/spgscan.c +++ b/src/backend/access/spgist/spgscan.c @@ -40,7 +40,7 @@ typedef struct ScanStackEntry static void freeScanStackEntry(SpGistScanOpaque so, ScanStackEntry *stackEntry) { - if (!so->state.attType.attbyval && + if (!so->state.attLeafType.attbyval && DatumGetPointer(stackEntry->reconstructedValue) != NULL) pfree(DatumGetPointer(stackEntry->reconstructedValue)); if (stackEntry->traversalValue) @@ -527,8 +527,8 @@ redirect: if (out.reconstructedValues) newEntry->reconstructedValue = datumCopy(out.reconstructedValues[i], - so->state.attType.attbyval, - so->state.attType.attlen); + so->state.attLeafType.attbyval, + so->state.attLeafType.attlen); else newEntry->reconstructedValue = (Datum) 0; diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index bd5301f383..e571f0cce0 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -125,6 +125,22 @@ spgGetCache(Relation index) /* Get the information we need about each relevant datatype */ fillTypeDesc(&cache->attType, atttype); + + if (OidIsValid(cache->config.leafType) && + cache->config.leafType != atttype) + { + if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("compress method must not defined when leaf type is different from input type"))); + + fillTypeDesc(&cache->attLeafType, cache->config.leafType); + } + else + { + cache->attLeafType = cache->attType; + } + fillTypeDesc(&cache->attPrefixType, cache->config.prefixType); fillTypeDesc(&cache->attLabelType, cache->config.labelType); @@ -164,6 +180,7 @@ initSpGistState(SpGistState *state, Relation index) state->config = cache->config; state->attType = cache->attType; + state->attLeafType = cache->attLeafType; state->attPrefixType = cache->attPrefixType; state->attLabelType = cache->attLabelType; @@ -618,7 +635,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr, /* compute space needed (note result is already maxaligned) */ size = SGLTHDRSZ; if (!isnull) - size += SpGistGetTypeSize(&state->attType, datum); + size += SpGistGetTypeSize(&state->attLeafType, datum); /* * Ensure that we can replace the tuple with a dead tuple later. This @@ -634,7 +651,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr, tup->nextOffset = InvalidOffsetNumber; tup->heapPtr = *heapPtr; if (!isnull) - memcpyDatum(SGLTDATAPTR(tup), &state->attType, datum); + memcpyDatum(SGLTDATAPTR(tup), &state->attLeafType, datum); return tup; } diff --git a/src/backend/access/spgist/spgvalidate.c b/src/backend/access/spgist/spgvalidate.c index 157cf2a028..440b3ce917 100644 --- a/src/backend/access/spgist/spgvalidate.c +++ b/src/backend/access/spgist/spgvalidate.c @@ -22,6 +22,7 @@ #include "catalog/pg_opfamily.h" #include "catalog/pg_type.h" #include "utils/builtins.h" +#include "utils/lsyscache.h" #include "utils/regproc.h" #include "utils/syscache.h" @@ -52,6 +53,10 @@ spgvalidate(Oid opclassoid) OpFamilyOpFuncGroup *opclassgroup; int i; ListCell *lc; + spgConfigIn configIn; + spgConfigOut configOut; + Oid configOutLefttype = InvalidOid; + Oid configOutRighttype = InvalidOid; /* Fetch opclass information */ classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid)); @@ -74,6 +79,7 @@ spgvalidate(Oid opclassoid) /* Fetch all operators and support functions of the opfamily */ oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid)); proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid)); + grouplist = identify_opfamily_groups(oprlist, proclist); /* Check individual support functions */ for (i = 0; i < proclist->n_members; i++) @@ -100,6 +106,40 @@ spgvalidate(Oid opclassoid) switch (procform->amprocnum) { case SPGIST_CONFIG_PROC: + ok = check_amproc_signature(procform->amproc, VOIDOID, true, + 2, 2, INTERNALOID, INTERNALOID); + configIn.attType = procform->amproclefttype; + memset(&configOut, 0, sizeof(configOut)); + + OidFunctionCall2(procform->amproc, + PointerGetDatum(&configIn), + PointerGetDatum(&configOut)); + + configOutLefttype = procform->amproclefttype; + configOutRighttype = procform->amprocrighttype; + + /* + * When leaf and attribute types are the same, compress function + * is not required and we set corresponding bit in functionset + * for later group consistency check. + */ + if (!OidIsValid(configOut.leafType) || + configOut.leafType == configIn.attType) + { + foreach(lc, grouplist) + { + OpFamilyOpFuncGroup *group = lfirst(lc); + + if (group->lefttype == procform->amproclefttype && + group->righttype == procform->amprocrighttype) + { + group->functionset |= + ((uint64) 1) << SPGIST_COMPRESS_PROC; + break; + } + } + } + break; case SPGIST_CHOOSE_PROC: case SPGIST_PICKSPLIT_PROC: case SPGIST_INNER_CONSISTENT_PROC: @@ -110,6 +150,15 @@ spgvalidate(Oid opclassoid) ok = check_amproc_signature(procform->amproc, BOOLOID, true, 2, 2, INTERNALOID, INTERNALOID); break; + case SPGIST_COMPRESS_PROC: + if (configOutLefttype != procform->amproclefttype || + configOutRighttype != procform->amprocrighttype) + ok = false; + else + ok = check_amproc_signature(procform->amproc, + configOut.leafType, true, + 1, 1, procform->amproclefttype); + break; default: ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -178,7 +227,6 @@ spgvalidate(Oid opclassoid) } /* Now check for inconsistent groups of operators/functions */ - grouplist = identify_opfamily_groups(oprlist, proclist); opclassgroup = NULL; foreach(lc, grouplist) { diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h index d1bc396e6d..06b1d88e5a 100644 --- a/src/include/access/spgist.h +++ b/src/include/access/spgist.h @@ -30,7 +30,9 @@ #define SPGIST_PICKSPLIT_PROC 3 #define SPGIST_INNER_CONSISTENT_PROC 4 #define SPGIST_LEAF_CONSISTENT_PROC 5 -#define SPGISTNProc 5 +#define SPGIST_COMPRESS_PROC 6 +#define SPGISTNRequiredProc 5 +#define SPGISTNProc 6 /* * Argument structs for spg_config method @@ -44,6 +46,7 @@ typedef struct spgConfigOut { Oid prefixType; /* Data type of inner-tuple prefixes */ Oid labelType; /* Data type of inner-tuple node labels */ + Oid leafType; /* Data type of leaf-tuple values */ bool canReturnData; /* Opclass can reconstruct original data */ bool longValuesOK; /* Opclass can cope with values > 1 page */ } spgConfigOut; diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h index 1c4b321b6c..e55de9dc54 100644 --- a/src/include/access/spgist_private.h +++ b/src/include/access/spgist_private.h @@ -119,7 +119,8 @@ typedef struct SpGistState { spgConfigOut config; /* filled in by opclass config method */ - SpGistTypeDesc attType; /* type of input data and leaf values */ + SpGistTypeDesc attType; /* type of values to be indexed/restored */ + SpGistTypeDesc attLeafType; /* type of leaf-tuple values */ SpGistTypeDesc attPrefixType; /* type of inner-tuple prefix values */ SpGistTypeDesc attLabelType; /* type of node label values */ @@ -178,7 +179,8 @@ typedef struct SpGistCache { spgConfigOut config; /* filled in by opclass config method */ - SpGistTypeDesc attType; /* type of input data and leaf values */ + SpGistTypeDesc attType; /* type of values to be indexed/restored */ + SpGistTypeDesc attLeafType; /* type of leaf-tuple values */ SpGistTypeDesc attPrefixType; /* type of inner-tuple prefix values */ SpGistTypeDesc attLabelType; /* type of node label values */ @@ -300,7 +302,7 @@ typedef SpGistLeafTupleData *SpGistLeafTuple; #define SGLTHDRSZ MAXALIGN(sizeof(SpGistLeafTupleData)) #define SGLTDATAPTR(x) (((char *) (x)) + SGLTHDRSZ) -#define SGLTDATUM(x, s) ((s)->attType.attbyval ? \ +#define SGLTDATUM(x, s) ((s)->attLeafType.attbyval ? \ *(Datum *) SGLTDATAPTR(x) : \ PointerGetDatum(SGLTDATAPTR(x))) |